summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2012-04-14 13:18:27 +0200
committerIngo Molnar <mingo@kernel.org>2012-04-14 13:19:04 +0200
commit6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e (patch)
tree021cc9f6b477146fcebe6f3be4752abfa2ba18a9 /arch/x86
parent682968e0c425c60f0dde37977e5beb2b12ddc4cc (diff)
parenta385ec4f11bdcf81af094c03e2444ee9b7fad2e5 (diff)
downloadop-kernel-dev-6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e.zip
op-kernel-dev-6ac1ef482d7ae0c690f1640bf6eb818ff9a2d91e.tar.gz
Merge branch 'perf/core' into perf/uprobes
Merge in latest upstream (and the latest perf development tree), to prepare for tooling changes, and also to pick up v3.4 MM changes that the uprobes code needs to take care of. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig43
-rw-r--r--arch/x86/Kconfig.cpu5
-rw-r--r--arch/x86/Makefile1
-rw-r--r--arch/x86/Makefile.um4
-rw-r--r--arch/x86/boot/Makefile3
-rw-r--r--arch/x86/boot/boot.h2
-rw-r--r--arch/x86/boot/compressed/Makefile1
-rw-r--r--arch/x86/boot/compressed/eboot.c8
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c11
-rw-r--r--arch/x86/boot/compressed/relocs.c6
-rw-r--r--arch/x86/boot/tools/build.c40
-rw-r--r--arch/x86/configs/i386_defconfig64
-rw-r--r--arch/x86/configs/x86_64_defconfig67
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c36
-rw-r--r--arch/x86/crypto/blowfish_glue.c191
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S520
-rw-r--r--arch/x86/crypto/camellia_glue.c1952
-rw-r--r--arch/x86/crypto/crc32c-intel.c11
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c12
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S29
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S29
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c394
-rw-r--r--arch/x86/crypto/twofish_glue.c2
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c269
-rw-r--r--arch/x86/ia32/ia32_aout.c19
-rw-r--r--arch/x86/ia32/ia32_signal.c3
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/apic.h7
-rw-r--r--arch/x86/include/asm/atomic64_32.h146
-rw-r--r--arch/x86/include/asm/auxvec.h7
-rw-r--r--arch/x86/include/asm/barrier.h116
-rw-r--r--arch/x86/include/asm/bug.h4
-rw-r--r--arch/x86/include/asm/cacheflush.h1
-rw-r--r--arch/x86/include/asm/cpu_device_id.h13
-rw-r--r--arch/x86/include/asm/cpufeature.h4
-rw-r--r--arch/x86/include/asm/debugreg.h67
-rw-r--r--arch/x86/include/asm/dma-mapping.h26
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/elf.h1
-rw-r--r--arch/x86/include/asm/exec.h1
-rw-r--r--arch/x86/include/asm/fpu-internal.h520
-rw-r--r--arch/x86/include/asm/futex.h1
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/highmem.h2
-rw-r--r--arch/x86/include/asm/i387.h589
-rw-r--r--arch/x86/include/asm/ia32.h4
-rw-r--r--arch/x86/include/asm/idle.h1
-rw-r--r--arch/x86/include/asm/io_apic.h9
-rw-r--r--arch/x86/include/asm/irq_controller.h12
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kgdb.h10
-rw-r--r--arch/x86/include/asm/kvm.h4
-rw-r--r--arch/x86/include/asm/kvm_emulate.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h63
-rw-r--r--arch/x86/include/asm/local.h1
-rw-r--r--arch/x86/include/asm/mc146818rtc.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mrst.h4
-rw-r--r--arch/x86/include/asm/msr-index.h7
-rw-r--r--arch/x86/include/asm/page_types.h1
-rw-r--r--arch/x86/include/asm/paravirt.h7
-rw-r--r--arch/x86/include/asm/perf_event.h1
-rw-r--r--arch/x86/include/asm/processor.h85
-rw-r--r--arch/x86/include/asm/prom.h10
-rw-r--r--arch/x86/include/asm/segment.h58
-rw-r--r--arch/x86/include/asm/special_insns.h199
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/spinlock_types.h1
-rw-r--r--arch/x86/include/asm/stackprotector.h1
-rw-r--r--arch/x86/include/asm/switch_to.h129
-rw-r--r--arch/x86/include/asm/system.h523
-rw-r--r--arch/x86/include/asm/timer.h8
-rw-r--r--arch/x86/include/asm/tlbflush.h2
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/include/asm/vgtod.h17
-rw-r--r--arch/x86/include/asm/virtext.h1
-rw-r--r--arch/x86/include/asm/word-at-a-time.h46
-rw-r--r--arch/x86/include/asm/x2apic.h5
-rw-r--r--arch/x86/include/asm/x86_init.h6
-rw-r--r--arch/x86/include/asm/xen/interface.h1
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c7
-rw-r--r--arch/x86/kernel/acpi/cstate.c1
-rw-r--r--arch/x86/kernel/amd_gart_64.c11
-rw-r--r--arch/x86/kernel/apic/apic.c13
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c2
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c7
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c199
-rw-r--r--arch/x86/kernel/apic/numaq_32.c1
-rw-r--r--arch/x86/kernel/apic/probe_32.c1
-rw-r--r--arch/x86/kernel/apic/summit_32.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c6
-rw-r--r--arch/x86/kernel/apm_32.c12
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/common.c18
-rw-r--r--arch/x86/kernel/cpu/match.c91
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c26
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c195
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event.c106
-rw-r--r--arch/x86/kernel/cpu/perf_event.h43
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c21
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c194
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c22
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c526
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c13
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c19
-rw-r--r--arch/x86/kernel/cpu/scattered.c1
-rw-r--r--arch/x86/kernel/cpuid.c1
-rw-r--r--arch/x86/kernel/crash_dump_32.c6
-rw-r--r--arch/x86/kernel/devicetree.c101
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/entry_32.S17
-rw-r--r--arch/x86/kernel/entry_64.S73
-rw-r--r--arch/x86/kernel/i387.c83
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/irq.c7
-rw-r--r--arch/x86/kernel/irq_32.c11
-rw-r--r--arch/x86/kernel/irqinit.c7
-rw-r--r--arch/x86/kernel/kdebugfs.c9
-rw-r--r--arch/x86/kernel/kgdb.c67
-rw-r--r--arch/x86/kernel/kprobes-common.h102
-rw-r--r--arch/x86/kernel/kprobes-opt.c512
-rw-r--r--arch/x86/kernel/kprobes.c664
-rw-r--r--arch/x86/kernel/kvm.c8
-rw-r--r--arch/x86/kernel/kvmclock.c15
-rw-r--r--arch/x86/kernel/ldt.c1
-rw-r--r--arch/x86/kernel/machine_kexec_32.c1
-rw-r--r--arch/x86/kernel/mca_32.c1
-rw-r--r--arch/x86/kernel/microcode_core.c15
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/msr.c1
-rw-r--r--arch/x86/kernel/nmi_selftest.c37
-rw-r--r--arch/x86/kernel/paravirt.c6
-rw-r--r--arch/x86/kernel/pci-calgary_64.c10
-rw-r--r--arch/x86/kernel/pci-dma.c8
-rw-r--r--arch/x86/kernel/pci-nommu.c6
-rw-r--r--arch/x86/kernel/pci-swiotlb.c17
-rw-r--r--arch/x86/kernel/probe_roms.c1
-rw-r--r--arch/x86/kernel/process.c150
-rw-r--r--arch/x86/kernel/process_32.c63
-rw-r--r--arch/x86/kernel/process_64.c113
-rw-r--r--arch/x86/kernel/ptrace.c2
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/smpboot.c32
-rw-r--r--arch/x86/kernel/sys_x86_64.c34
-rw-r--r--arch/x86/kernel/tboot.c9
-rw-r--r--arch/x86/kernel/tce_64.c1
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/tls.c5
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/tsc.c17
-rw-r--r--arch/x86/kernel/tsc_sync.c29
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c25
-rw-r--r--arch/x86/kernel/x86_init.c5
-rw-r--r--arch/x86/kernel/xsave.c1
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c112
-rw-r--r--arch/x86/kvm/i8259.c1
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/mmu.c85
-rw-r--r--arch/x86/kvm/mmu_audit.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h4
-rw-r--r--arch/x86/kvm/pmu.c12
-rw-r--r--arch/x86/kvm/svm.c119
-rw-r--r--arch/x86/kvm/vmx.c57
-rw-r--r--arch/x86/kvm/x86.c412
-rw-r--r--arch/x86/lib/atomic64_32.c59
-rw-r--r--arch/x86/lib/atomic64_386_32.S6
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S29
-rw-r--r--arch/x86/lib/copy_page_64.S12
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/lib/memcpy_64.S44
-rw-r--r--arch/x86/lib/memset_64.S33
-rw-r--r--arch/x86/lib/usercopy_32.c4
-rw-r--r--arch/x86/mm/highmem_32.c4
-rw-r--r--arch/x86/mm/hugetlbpage.c30
-rw-r--r--arch/x86/mm/init.c1
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/init_64.c1
-rw-r--r--arch/x86/mm/kmemcheck/selftest.c1
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pgtable_32.c1
-rw-r--r--arch/x86/mm/srat.c2
-rw-r--r--arch/x86/net/bpf_jit.S122
-rw-r--r--arch/x86/net/bpf_jit_comp.c47
-rw-r--r--arch/x86/pci/acpi.c29
-rw-r--r--arch/x86/pci/fixup.c12
-rw-r--r--arch/x86/pci/i386.c85
-rw-r--r--arch/x86/pci/mrst.c40
-rw-r--r--arch/x86/pci/xen.c27
-rw-r--r--arch/x86/platform/ce4100/falconfalls.dts7
-rw-r--r--arch/x86/platform/efi/efi.c377
-rw-r--r--arch/x86/platform/geode/Makefile2
-rw-r--r--arch/x86/platform/geode/alix.c76
-rw-r--r--arch/x86/platform/geode/geos.c128
-rw-r--r--arch/x86/platform/geode/net5501.c154
-rw-r--r--arch/x86/platform/mrst/Makefile1
-rw-r--r--arch/x86/platform/mrst/mrst.c86
-rw-r--r--arch/x86/platform/mrst/pmu.c817
-rw-r--r--arch/x86/platform/mrst/pmu.h234
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c72
-rw-r--r--arch/x86/platform/olpc/olpc.c97
-rw-r--r--arch/x86/platform/scx200/scx200_32.c24
-rw-r--r--arch/x86/platform/uv/uv_time.c6
-rw-r--r--arch/x86/power/cpu.c5
-rw-r--r--arch/x86/power/hibernate_32.c1
-rw-r--r--arch/x86/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/um/Kconfig4
-rw-r--r--arch/x86/um/asm/processor.h10
-rw-r--r--arch/x86/um/asm/processor_32.h10
-rw-r--r--arch/x86/um/asm/processor_64.h10
-rw-r--r--arch/x86/um/bugs_32.c4
-rw-r--r--arch/x86/um/mem_32.c8
-rw-r--r--arch/x86/um/vdso/vma.c3
-rw-r--r--arch/x86/vdso/vclock_gettime.c135
-rw-r--r--arch/x86/vdso/vdso32-setup.c17
-rw-r--r--arch/x86/vdso/vma.c3
-rw-r--r--arch/x86/xen/enlighten.c101
-rw-r--r--arch/x86/xen/irq.c8
-rw-r--r--arch/x86/xen/mmu.c24
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c4
-rw-r--r--arch/x86/xen/setup.c3
-rw-r--r--arch/x86/xen/smp.c10
239 files changed, 9047 insertions, 5177 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d2a540f..76f5a46 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -69,7 +69,6 @@ config X86
select HAVE_ARCH_JUMP_LABEL
select HAVE_TEXT_POKE_SMP
select HAVE_GENERIC_HARDIRQS
- select HAVE_SPARSE_IRQ
select SPARSE_IRQ
select GENERIC_FIND_FIRST_BIT
select GENERIC_IRQ_PROBE
@@ -82,6 +81,7 @@ config X86
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
+ select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS || UPROBES)
@@ -179,6 +179,9 @@ config ARCH_HAS_DEFAULT_IDLE
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
+config ARCH_HAS_CPU_AUTOPROBE
+ def_bool y
+
config HAVE_SETUP_PER_CPU_AREA
def_bool y
@@ -401,6 +404,7 @@ config X86_INTEL_CE
select X86_REBOOTFIXUPS
select OF
select OF_EARLY_FLATTREE
+ select IRQ_DOMAIN
---help---
Select for the Intel CE media processor (CE4100) SOC.
This option compiles in support for the CE4100 SOC for settop
@@ -420,27 +424,6 @@ if X86_WANT_INTEL_MID
config X86_INTEL_MID
bool
-config X86_MRST
- bool "Moorestown MID platform"
- depends on PCI
- depends on PCI_GOANY
- depends on X86_IO_APIC
- select X86_INTEL_MID
- select SFI
- select DW_APB_TIMER
- select APB_TIMER
- select I2C
- select SPI
- select INTEL_SCU_IPC
- select X86_PLATFORM_DEVICES
- ---help---
- Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
- Internet Device(MID) platform. Moorestown consists of two chips:
- Lincroft (CPU core, graphics, and memory controller) and Langwell IOH.
- Unlike standard x86 PCs, Moorestown does not have many legacy devices
- nor standard legacy replacement devices/features. e.g. Moorestown does
- not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
-
config X86_MDFLD
bool "Medfield MID platform"
depends on PCI
@@ -454,6 +437,7 @@ config X86_MDFLD
select SPI
select INTEL_SCU_IPC
select X86_PLATFORM_DEVICES
+ select MFD_INTEL_MSIC
---help---
Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin
Internet Device(MID) platform.
@@ -2079,6 +2063,7 @@ config OLPC
select GPIOLIB
select OF
select OF_PROMTREE
+ select IRQ_DOMAIN
---help---
Add support for detecting the unique features of the OLPC
XO hardware.
@@ -2136,6 +2121,19 @@ config ALIX
Note: You have to set alix.force=1 for boards with Award BIOS.
+config NET5501
+ bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)"
+ select GPIOLIB
+ ---help---
+ This option enables system support for the Soekris Engineering net5501.
+
+config GEOS
+ bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)"
+ select GPIOLIB
+ depends on DMI
+ ---help---
+ This option enables system support for the Traverse Technologies GEOS.
+
endif # X86_32
config AMD_NB
@@ -2194,6 +2192,7 @@ config X86_X32
config COMPAT
def_bool y
depends on IA32_EMULATION || X86_X32
+ select ARCH_WANT_OLD_COMPAT_IPC
config COMPAT_FOR_U64_ALIGNMENT
def_bool COMPAT
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 3c57033..706e12e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -303,7 +303,6 @@ config X86_GENERIC
config X86_INTERNODE_CACHE_SHIFT
int
default "12" if X86_VSMP
- default "7" if NUMA
default X86_L1_CACHE_SHIFT
config X86_CMPXCHG
@@ -441,7 +440,7 @@ config CPU_SUP_INTEL
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
- depends on !64BIT
+ depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for Cyrix processors
@@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
config CPU_SUP_UMC_32
default y
bool "Support UMC processors" if PROCESSOR_SELECT
- depends on !64BIT
+ depends on M386 || M486 || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for UMC processors
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 968dbe2..41a7237 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -129,6 +129,7 @@ KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# prevent gcc from generating any FP code by mistake
KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um
index 36ddec6..4be406a 100644
--- a/arch/x86/Makefile.um
+++ b/arch/x86/Makefile.um
@@ -8,15 +8,11 @@ ELF_ARCH := i386
ELF_FORMAT := elf32-i386
CHECKFLAGS += -D__i386__
-ifeq ("$(origin SUBARCH)", "command line")
-ifneq ("$(shell uname -m | sed -e s/i.86/i386/)", "$(SUBARCH)")
KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS += $(call cc-option,-m32)
LINK-y += $(call cc-option,-m32)
export LDFLAGS
-endif
-endif
# First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y.
include $(srctree)/arch/x86/Makefile_32.cpu
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 95365a8..5a747dd 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,8 @@ setup-y += video-bios.o
targets += $(setup-y)
hostprogs-y := mkcpustr tools/build
-HOST_EXTRACFLAGS += $(LINUXINCLUDE)
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \
+ -D__EXPORTED_HEADERS__
$(obj)/cpu.o: $(obj)/cpustr.h
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index c7093bd..18997e5 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -67,7 +67,7 @@ static inline void outl(u32 v, u16 port)
{
asm volatile("outl %0,%1" : : "a" (v), "dN" (port));
}
-static inline u32 inl(u32 port)
+static inline u32 inl(u16 port)
{
u32 v;
asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index b123b9a..fd55a2f 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,6 +22,7 @@ LDFLAGS := -m elf_$(UTS_MACHINE)
LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index fec216f..0cdfc0d 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -539,7 +539,7 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
struct initrd *initrd;
efi_file_handle_t *h;
efi_file_info_t *info;
- efi_char16_t filename[256];
+ efi_char16_t filename_16[256];
unsigned long info_sz;
efi_guid_t info_guid = EFI_FILE_INFO_ID;
efi_char16_t *p;
@@ -552,14 +552,14 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
str += 7;
initrd = &initrds[i];
- p = filename;
+ p = filename_16;
/* Skip any leading slashes */
while (*str == '/' || *str == '\\')
str++;
while (*str && *str != ' ' && *str != '\n') {
- if (p >= filename + sizeof(filename))
+ if ((u8 *)p >= (u8 *)filename_16 + sizeof(filename_16))
break;
*p++ = *str++;
@@ -583,7 +583,7 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
goto free_initrds;
}
- status = efi_call_phys5(fh->open, fh, &h, filename,
+ status = efi_call_phys5(fh->open, fh, &h, filename_16,
EFI_FILE_MODE_READ, (u64)0);
if (status != EFI_SUCCESS)
goto close_handles;
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index 46a8238..958a641 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -29,14 +29,7 @@
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
-
-static uint32_t getle32(const void *p)
-{
- const uint8_t *cp = p;
-
- return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
- ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
-}
+#include <tools/le_byteshift.h>
int main(int argc, char *argv[])
{
@@ -69,7 +62,7 @@ int main(int argc, char *argv[])
}
ilen = ftell(f);
- olen = getle32(&olen);
+ olen = get_unaligned_le32(&olen);
fclose(f);
/*
diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
index 89bbf4e..d3c0b02 100644
--- a/arch/x86/boot/compressed/relocs.c
+++ b/arch/x86/boot/compressed/relocs.c
@@ -10,6 +10,7 @@
#define USE_BSD
#include <endian.h>
#include <regex.h>
+#include <tools/le_byteshift.h>
static void die(char *fmt, ...);
@@ -605,10 +606,7 @@ static void emit_relocs(int as_text)
fwrite("\0\0\0\0", 4, 1, stdout);
/* Now print each relocation */
for (i = 0; i < reloc_count; i++) {
- buf[0] = (relocs[i] >> 0) & 0xff;
- buf[1] = (relocs[i] >> 8) & 0xff;
- buf[2] = (relocs[i] >> 16) & 0xff;
- buf[3] = (relocs[i] >> 24) & 0xff;
+ put_unaligned_le32(relocs[i], buf);
fwrite(buf, 4, 1, stdout);
}
}
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 4e9bd6b..ed54976 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -29,18 +29,18 @@
#include <stdarg.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <sys/sysmacros.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
-#include <asm/boot.h>
+#include <tools/le_byteshift.h>
typedef unsigned char u8;
typedef unsigned short u16;
-typedef unsigned long u32;
+typedef unsigned int u32;
#define DEFAULT_MAJOR_ROOT 0
#define DEFAULT_MINOR_ROOT 0
+#define DEFAULT_ROOT_DEV (DEFAULT_MAJOR_ROOT << 8 | DEFAULT_MINOR_ROOT)
/* Minimal number of setup sectors */
#define SETUP_SECT_MIN 5
@@ -159,7 +159,7 @@ int main(int argc, char ** argv)
die("read-error on `setup'");
if (c < 1024)
die("The setup must be at least 1024 bytes");
- if (buf[510] != 0x55 || buf[511] != 0xaa)
+ if (get_unaligned_le16(&buf[510]) != 0xAA55)
die("Boot block hasn't got boot flag (0xAA55)");
fclose(file);
@@ -171,8 +171,7 @@ int main(int argc, char ** argv)
memset(buf+c, 0, i-c);
/* Set the default root device */
- buf[508] = DEFAULT_MINOR_ROOT;
- buf[509] = DEFAULT_MAJOR_ROOT;
+ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]);
fprintf(stderr, "Setup is %d bytes (padded to %d bytes).\n", c, i);
@@ -192,44 +191,42 @@ int main(int argc, char ** argv)
/* Patch the setup code with the appropriate size parameters */
buf[0x1f1] = setup_sectors-1;
- buf[0x1f4] = sys_size;
- buf[0x1f5] = sys_size >> 8;
- buf[0x1f6] = sys_size >> 16;
- buf[0x1f7] = sys_size >> 24;
+ put_unaligned_le32(sys_size, &buf[0x1f4]);
#ifdef CONFIG_EFI_STUB
file_sz = sz + i + ((sys_size * 16) - sz);
- pe_header = *(unsigned int *)&buf[0x3c];
+ pe_header = get_unaligned_le32(&buf[0x3c]);
/* Size of code */
- *(unsigned int *)&buf[pe_header + 0x1c] = file_sz;
+ put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]);
/* Size of image */
- *(unsigned int *)&buf[pe_header + 0x50] = file_sz;
+ put_unaligned_le32(file_sz, &buf[pe_header + 0x50]);
#ifdef CONFIG_X86_32
/* Address of entry point */
- *(unsigned int *)&buf[pe_header + 0x28] = i;
+ put_unaligned_le32(i, &buf[pe_header + 0x28]);
/* .text size */
- *(unsigned int *)&buf[pe_header + 0xb0] = file_sz;
+ put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]);
/* .text size of initialised data */
- *(unsigned int *)&buf[pe_header + 0xb8] = file_sz;
+ put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]);
#else
/*
* Address of entry point. startup_32 is at the beginning and
* the 64-bit entry point (startup_64) is always 512 bytes
* after.
*/
- *(unsigned int *)&buf[pe_header + 0x28] = i + 512;
+ put_unaligned_le32(i + 512, &buf[pe_header + 0x28]);
/* .text size */
- *(unsigned int *)&buf[pe_header + 0xc0] = file_sz;
+ put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]);
/* .text size of initialised data */
- *(unsigned int *)&buf[pe_header + 0xc8] = file_sz;
+ put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]);
+
#endif /* CONFIG_X86_32 */
#endif /* CONFIG_EFI_STUB */
@@ -250,8 +247,9 @@ int main(int argc, char ** argv)
}
/* Write the CRC */
- fprintf(stderr, "CRC %lx\n", crc);
- if (fwrite(&crc, 1, 4, stdout) != 4)
+ fprintf(stderr, "CRC %x\n", crc);
+ put_unaligned_le32(crc, buf);
+ if (fwrite(buf, 1, 4, stdout) != 4)
die("Writing CRC failed");
close(fd);
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 2bf18059f..119db67 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -15,23 +15,28 @@ CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_SCHED=y
-CONFIG_UTS_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_SMP=y
-CONFIG_SPARSE_IRQ=y
CONFIG_X86_GENERIC=y
CONFIG_HPET_TIMER=y
CONFIG_SCHED_SMT=y
@@ -51,14 +56,12 @@ CONFIG_HZ_1000=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_COMPAT_VDSO is not set
-CONFIG_PM=y
+CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
-CONFIG_HIBERNATION=y
CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_DOCK=y
CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_DEBUG=y
# CONFIG_CPU_FREQ_STAT is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
@@ -69,7 +72,6 @@ CONFIG_PCI_MSI=y
CONFIG_PCCARD=y
CONFIG_YENTA=y
CONFIG_HOTPLUG_PCI=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_BINFMT_MISC=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -120,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_LOG=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
@@ -128,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
-CONFIG_IP6_NF_TARGET_LOG=y
CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
@@ -169,25 +169,20 @@ CONFIG_DM_ZERO=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_VENDOR_3COM=y
+CONFIG_NETCONSOLE=y
+CONFIG_BNX2=y
+CONFIG_TIGON3=y
CONFIG_NET_TULIP=y
-CONFIG_NET_PCI=y
-CONFIG_FORCEDETH=y
CONFIG_E100=y
+CONFIG_E1000=y
+CONFIG_E1000E=y
+CONFIG_SKY2=y
CONFIG_NE2K_PCI=y
+CONFIG_FORCEDETH=y
CONFIG_8139TOO=y
# CONFIG_8139TOO_PIO is not set
-CONFIG_E1000=y
-CONFIG_E1000E=y
CONFIG_R8169=y
-CONFIG_SKY2=y
-CONFIG_TIGON3=y
-CONFIG_BNX2=y
-CONFIG_TR=y
-CONFIG_NET_PCMCIA=y
CONFIG_FDDI=y
-CONFIG_NETCONSOLE=y
CONFIG_INPUT_POLLDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
@@ -196,6 +191,7 @@ CONFIG_INPUT_TABLET=y
CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_INPUT_MISC=y
CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
@@ -205,7 +201,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
-# CONFIG_LEGACY_PTYS is not set
CONFIG_HW_RANDOM=y
CONFIG_NVRAM=y
CONFIG_HPET=y
@@ -220,7 +215,6 @@ CONFIG_DRM_I915=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
-CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
@@ -283,7 +277,6 @@ CONFIG_ZISOFS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
@@ -291,18 +284,6 @@ CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
@@ -317,13 +298,12 @@ CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+CONFIG_DEBUG_STACK_USAGE=y
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_DEBUG_STACK_USAGE=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_NX_TEST=m
CONFIG_DEBUG_BOOT_PARAMS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 058a35b..76eb290 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,4 +1,3 @@
-CONFIG_64BIT=y
CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
@@ -16,26 +15,29 @@ CONFIG_CPUSETS=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_RESOURCE_COUNTERS=y
CONFIG_CGROUP_SCHED=y
-CONFIG_UTS_NS=y
-CONFIG_IPC_NS=y
-CONFIG_USER_NS=y
-CONFIG_PID_NS=y
-CONFIG_NET_NS=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_EFI_PARTITION=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_SMP=y
-CONFIG_SPARSE_IRQ=y
CONFIG_CALGARY_IOMMU=y
-CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_STATS=y
CONFIG_NR_CPUS=64
CONFIG_SCHED_SMT=y
CONFIG_PREEMPT_VOLUNTARY=y
@@ -53,27 +55,22 @@ CONFIG_HZ_1000=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
# CONFIG_COMPAT_VDSO is not set
-CONFIG_PM=y
+CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_PM_TRACE_RTC=y
-CONFIG_HIBERNATION=y
CONFIG_ACPI_PROCFS=y
CONFIG_ACPI_DOCK=y
CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_DEBUG=y
# CONFIG_CPU_FREQ_STAT is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_X86_ACPI_CPUFREQ=y
CONFIG_PCI_MMCONFIG=y
-CONFIG_INTEL_IOMMU=y
-# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_PCIEPORTBUS=y
CONFIG_PCCARD=y
CONFIG_YENTA=y
CONFIG_HOTPLUG_PCI=y
-CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
CONFIG_BINFMT_MISC=y
CONFIG_IA32_EMULATION=y
CONFIG_NET=y
@@ -125,7 +122,6 @@ CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
CONFIG_IP_NF_TARGET_REJECT=y
-CONFIG_IP_NF_TARGET_LOG=y
CONFIG_IP_NF_TARGET_ULOG=y
CONFIG_NF_NAT=y
CONFIG_IP_NF_TARGET_MASQUERADE=y
@@ -133,7 +129,6 @@ CONFIG_IP_NF_MANGLE=y
CONFIG_NF_CONNTRACK_IPV6=y
CONFIG_IP6_NF_IPTABLES=y
CONFIG_IP6_NF_MATCH_IPV6HEADER=y
-CONFIG_IP6_NF_TARGET_LOG=y
CONFIG_IP6_NF_FILTER=y
CONFIG_IP6_NF_TARGET_REJECT=y
CONFIG_IP6_NF_MANGLE=y
@@ -172,20 +167,15 @@ CONFIG_DM_ZERO=y
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_MAC_EMUMOUSEBTN=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_VENDOR_3COM=y
+CONFIG_NETCONSOLE=y
+CONFIG_TIGON3=y
CONFIG_NET_TULIP=y
-CONFIG_NET_PCI=y
-CONFIG_FORCEDETH=y
CONFIG_E100=y
-CONFIG_8139TOO=y
CONFIG_E1000=y
CONFIG_SKY2=y
-CONFIG_TIGON3=y
-CONFIG_TR=y
-CONFIG_NET_PCMCIA=y
+CONFIG_FORCEDETH=y
+CONFIG_8139TOO=y
CONFIG_FDDI=y
-CONFIG_NETCONSOLE=y
CONFIG_INPUT_POLLDEV=y
# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
@@ -194,6 +184,7 @@ CONFIG_INPUT_TABLET=y
CONFIG_INPUT_TOUCHSCREEN=y
CONFIG_INPUT_MISC=y
CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
@@ -203,7 +194,6 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
-# CONFIG_LEGACY_PTYS is not set
CONFIG_HW_RANDOM=y
# CONFIG_HW_RANDOM_INTEL is not set
# CONFIG_HW_RANDOM_AMD is not set
@@ -221,7 +211,6 @@ CONFIG_DRM_I915_KMS=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_EFI=y
-CONFIG_BACKLIGHT_LCD_SUPPORT=y
# CONFIG_LCD_CLASS_DEVICE is not set
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
@@ -268,6 +257,10 @@ CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
CONFIG_DMADEVICES=y
CONFIG_EEEPC_LAPTOP=y
+CONFIG_AMD_IOMMU=y
+CONFIG_AMD_IOMMU_STATS=y
+CONFIG_INTEL_IOMMU=y
+# CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
CONFIG_EFI_VARS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
@@ -284,7 +277,6 @@ CONFIG_ZISOFS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
@@ -292,18 +284,6 @@ CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
@@ -317,13 +297,12 @@ CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+CONFIG_DEBUG_STACK_USAGE=y
CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
CONFIG_EARLY_PRINTK_DBGP=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_DEBUG_STACK_USAGE=y
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_DEBUG_NX_TEST=m
CONFIG_DEBUG_BOOT_PARAMS=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 2b0b963..e191ac0 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -25,6 +26,7 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
+camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 545d0ce..c799352 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -28,6 +28,7 @@
#include <crypto/aes.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
+#include <asm/cpu_device_id.h>
#include <asm/i387.h>
#include <asm/aes.h>
#include <crypto/scatterwalk.h>
@@ -1107,12 +1108,12 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
scatterwalk_start(&assoc_sg_walk, req->assoc);
- src = scatterwalk_map(&src_sg_walk, 0);
- assoc = scatterwalk_map(&assoc_sg_walk, 0);
+ src = scatterwalk_map(&src_sg_walk);
+ assoc = scatterwalk_map(&assoc_sg_walk);
dst = src;
if (unlikely(req->src != req->dst)) {
scatterwalk_start(&dst_sg_walk, req->dst);
- dst = scatterwalk_map(&dst_sg_walk, 0);
+ dst = scatterwalk_map(&dst_sg_walk);
}
} else {
@@ -1136,11 +1137,11 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
* back to the packet. */
if (one_entry_in_sg) {
if (unlikely(req->src != req->dst)) {
- scatterwalk_unmap(dst, 0);
+ scatterwalk_unmap(dst);
scatterwalk_done(&dst_sg_walk, 0, 0);
}
- scatterwalk_unmap(src, 0);
- scatterwalk_unmap(assoc, 0);
+ scatterwalk_unmap(src);
+ scatterwalk_unmap(assoc);
scatterwalk_done(&src_sg_walk, 0, 0);
scatterwalk_done(&assoc_sg_walk, 0, 0);
} else {
@@ -1189,12 +1190,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
one_entry_in_sg = 1;
scatterwalk_start(&src_sg_walk, req->src);
scatterwalk_start(&assoc_sg_walk, req->assoc);
- src = scatterwalk_map(&src_sg_walk, 0);
- assoc = scatterwalk_map(&assoc_sg_walk, 0);
+ src = scatterwalk_map(&src_sg_walk);
+ assoc = scatterwalk_map(&assoc_sg_walk);
dst = src;
if (unlikely(req->src != req->dst)) {
scatterwalk_start(&dst_sg_walk, req->dst);
- dst = scatterwalk_map(&dst_sg_walk, 0);
+ dst = scatterwalk_map(&dst_sg_walk);
}
} else {
@@ -1219,11 +1220,11 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
if (one_entry_in_sg) {
if (unlikely(req->src != req->dst)) {
- scatterwalk_unmap(dst, 0);
+ scatterwalk_unmap(dst);
scatterwalk_done(&dst_sg_walk, 0, 0);
}
- scatterwalk_unmap(src, 0);
- scatterwalk_unmap(assoc, 0);
+ scatterwalk_unmap(src);
+ scatterwalk_unmap(assoc);
scatterwalk_done(&src_sg_walk, 0, 0);
scatterwalk_done(&assoc_sg_walk, 0, 0);
} else {
@@ -1253,14 +1254,19 @@ static struct crypto_alg __rfc4106_alg = {
};
#endif
+
+static const struct x86_cpu_id aesni_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_AES),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
+
static int __init aesni_init(void)
{
int err;
- if (!cpu_has_aes) {
- printk(KERN_INFO "Intel AES-NI instructions are not detected.\n");
+ if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
- }
if ((err = crypto_fpu_init()))
goto fpu_err;
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index b05aa16..7967474 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -25,6 +25,7 @@
*
*/
+#include <asm/processor.h>
#include <crypto/blowfish.h>
#include <linux/crypto.h>
#include <linux/init.h>
@@ -76,27 +77,6 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
-static struct crypto_alg bf_alg = {
- .cra_name = "blowfish",
- .cra_driver_name = "blowfish-asm",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 3,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(bf_alg.cra_list),
- .cra_u = {
- .cipher = {
- .cia_min_keysize = BF_MIN_KEY_SIZE,
- .cia_max_keysize = BF_MAX_KEY_SIZE,
- .cia_setkey = blowfish_setkey,
- .cia_encrypt = blowfish_encrypt,
- .cia_decrypt = blowfish_decrypt,
- }
- }
-};
-
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
void (*fn)(struct bf_ctx *, u8 *, const u8 *),
void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
@@ -160,28 +140,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
}
-static struct crypto_alg blk_ecb_alg = {
- .cra_name = "ecb(blowfish)",
- .cra_driver_name = "ecb-blowfish-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-};
-
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
@@ -307,29 +265,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_cbc_alg = {
- .cra_name = "cbc(blowfish)",
- .cra_driver_name = "cbc-blowfish-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .ivsize = BF_BLOCK_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-};
-
static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
@@ -423,7 +358,67 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_ctr_alg = {
+static struct crypto_alg bf_algs[4] = { {
+ .cra_name = "blowfish",
+ .cra_driver_name = "blowfish-asm",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = BF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct bf_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list),
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = BF_MIN_KEY_SIZE,
+ .cia_max_keysize = BF_MAX_KEY_SIZE,
+ .cia_setkey = blowfish_setkey,
+ .cia_encrypt = blowfish_encrypt,
+ .cia_decrypt = blowfish_decrypt,
+ }
+ }
+}, {
+ .cra_name = "ecb(blowfish)",
+ .cra_driver_name = "ecb-blowfish-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = BF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct bf_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = BF_MIN_KEY_SIZE,
+ .max_keysize = BF_MAX_KEY_SIZE,
+ .setkey = blowfish_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(blowfish)",
+ .cra_driver_name = "cbc-blowfish-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = BF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct bf_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = BF_MIN_KEY_SIZE,
+ .max_keysize = BF_MAX_KEY_SIZE,
+ .ivsize = BF_BLOCK_SIZE,
+ .setkey = blowfish_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
.cra_name = "ctr(blowfish)",
.cra_driver_name = "ctr-blowfish-asm",
.cra_priority = 300,
@@ -433,7 +428,7 @@ static struct crypto_alg blk_ctr_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
+ .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
@@ -444,43 +439,45 @@ static struct crypto_alg blk_ctr_alg = {
.decrypt = ctr_crypt,
},
},
-};
+} };
+
+static bool is_blacklisted_cpu(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ if (boot_cpu_data.x86 == 0x0f) {
+ /*
+ * On Pentium 4, blowfish-x86_64 is slower than generic C
+ * implementation because use of 64bit rotates (which are really
+ * slow on P4). Therefore blacklist P4s.
+ */
+ return true;
+ }
+
+ return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
- int err;
+ if (!force && is_blacklisted_cpu()) {
+ printk(KERN_INFO
+ "blowfish-x86_64: performance on this CPU "
+ "would be suboptimal: disabling "
+ "blowfish-x86_64.\n");
+ return -ENODEV;
+ }
- err = crypto_register_alg(&bf_alg);
- if (err)
- goto bf_err;
- err = crypto_register_alg(&blk_ecb_alg);
- if (err)
- goto ecb_err;
- err = crypto_register_alg(&blk_cbc_alg);
- if (err)
- goto cbc_err;
- err = crypto_register_alg(&blk_ctr_alg);
- if (err)
- goto ctr_err;
-
- return 0;
-
-ctr_err:
- crypto_unregister_alg(&blk_cbc_alg);
-cbc_err:
- crypto_unregister_alg(&blk_ecb_alg);
-ecb_err:
- crypto_unregister_alg(&bf_alg);
-bf_err:
- return err;
+ return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
static void __exit fini(void)
{
- crypto_unregister_alg(&blk_ctr_alg);
- crypto_unregister_alg(&blk_cbc_alg);
- crypto_unregister_alg(&blk_ecb_alg);
- crypto_unregister_alg(&bf_alg);
+ crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
module_init(init);
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
new file mode 100644
index 0000000..0b33743
--- /dev/null
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -0,0 +1,520 @@
+/*
+ * Camellia Cipher Algorithm (x86_64)
+ *
+ * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+.file "camellia-x86_64-asm_64.S"
+.text
+
+.extern camellia_sp10011110;
+.extern camellia_sp22000222;
+.extern camellia_sp03303033;
+.extern camellia_sp00444404;
+.extern camellia_sp02220222;
+.extern camellia_sp30333033;
+.extern camellia_sp44044404;
+.extern camellia_sp11101110;
+
+#define sp10011110 camellia_sp10011110
+#define sp22000222 camellia_sp22000222
+#define sp03303033 camellia_sp03303033
+#define sp00444404 camellia_sp00444404
+#define sp02220222 camellia_sp02220222
+#define sp30333033 camellia_sp30333033
+#define sp44044404 camellia_sp44044404
+#define sp11101110 camellia_sp11101110
+
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+/* struct camellia_ctx: */
+#define key_table 0
+#define key_length CAMELLIA_TABLE_BYTE_LEN
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+#define RIOd %esi
+
+#define RAB0 %rax
+#define RCD0 %rcx
+#define RAB1 %rbx
+#define RCD1 %rdx
+
+#define RAB0d %eax
+#define RCD0d %ecx
+#define RAB1d %ebx
+#define RCD1d %edx
+
+#define RAB0bl %al
+#define RCD0bl %cl
+#define RAB1bl %bl
+#define RCD1bl %dl
+
+#define RAB0bh %ah
+#define RCD0bh %ch
+#define RAB1bh %bh
+#define RCD1bh %dh
+
+#define RT0 %rsi
+#define RT1 %rbp
+#define RT2 %r8
+
+#define RT0d %esi
+#define RT1d %ebp
+#define RT2d %r8d
+
+#define RT2bl %r8b
+
+#define RXOR %r9
+#define RRBP %r10
+#define RDST %r11
+
+#define RXORd %r9d
+#define RXORbl %r9b
+
+#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
+ movzbl ab ## bl, tmp2 ## d; \
+ movzbl ab ## bh, tmp1 ## d; \
+ rorq $16, ab; \
+ xorq T0(, tmp2, 8), dst; \
+ xorq T1(, tmp1, 8), dst;
+
+/**********************************************************************
+ 1-way camellia
+ **********************************************************************/
+#define roundsm(ab, subkey, cd) \
+ movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
+ \
+ xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
+ xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
+ xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
+ xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
+ \
+ xorq RT2, cd ## 0;
+
+#define fls(l, r, kl, kr) \
+ movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
+ andl l ## 0d, RT0d; \
+ roll $1, RT0d; \
+ shlq $32, RT0; \
+ xorq RT0, l ## 0; \
+ movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
+ orq r ## 0, RT1; \
+ shrq $32, RT1; \
+ xorq RT1, r ## 0; \
+ \
+ movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
+ orq l ## 0, RT2; \
+ shrq $32, RT2; \
+ xorq RT2, l ## 0; \
+ movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
+ andl r ## 0d, RT0d; \
+ roll $1, RT0d; \
+ shlq $32, RT0; \
+ xorq RT0, r ## 0;
+
+#define enc_rounds(i) \
+ roundsm(RAB, i + 2, RCD); \
+ roundsm(RCD, i + 3, RAB); \
+ roundsm(RAB, i + 4, RCD); \
+ roundsm(RCD, i + 5, RAB); \
+ roundsm(RAB, i + 6, RCD); \
+ roundsm(RCD, i + 7, RAB);
+
+#define enc_fls(i) \
+ fls(RAB, RCD, i + 0, i + 1);
+
+#define enc_inpack() \
+ movq (RIO), RAB0; \
+ bswapq RAB0; \
+ rolq $32, RAB0; \
+ movq 4*2(RIO), RCD0; \
+ bswapq RCD0; \
+ rorq $32, RCD0; \
+ xorq key_table(CTX), RAB0;
+
+#define enc_outunpack(op, max) \
+ xorq key_table(CTX, max, 8), RCD0; \
+ rorq $32, RCD0; \
+ bswapq RCD0; \
+ op ## q RCD0, (RIO); \
+ rolq $32, RAB0; \
+ bswapq RAB0; \
+ op ## q RAB0, 4*2(RIO);
+
+#define dec_rounds(i) \
+ roundsm(RAB, i + 7, RCD); \
+ roundsm(RCD, i + 6, RAB); \
+ roundsm(RAB, i + 5, RCD); \
+ roundsm(RCD, i + 4, RAB); \
+ roundsm(RAB, i + 3, RCD); \
+ roundsm(RCD, i + 2, RAB);
+
+#define dec_fls(i) \
+ fls(RAB, RCD, i + 1, i + 0);
+
+#define dec_inpack(max) \
+ movq (RIO), RAB0; \
+ bswapq RAB0; \
+ rolq $32, RAB0; \
+ movq 4*2(RIO), RCD0; \
+ bswapq RCD0; \
+ rorq $32, RCD0; \
+ xorq key_table(CTX, max, 8), RAB0;
+
+#define dec_outunpack() \
+ xorq key_table(CTX), RCD0; \
+ rorq $32, RCD0; \
+ bswapq RCD0; \
+ movq RCD0, (RIO); \
+ rolq $32, RAB0; \
+ bswapq RAB0; \
+ movq RAB0, 4*2(RIO);
+
+.global __camellia_enc_blk;
+.type __camellia_enc_blk,@function;
+
+__camellia_enc_blk:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: bool xor
+ */
+ movq %rbp, RRBP;
+
+ movq %rcx, RXOR;
+ movq %rsi, RDST;
+ movq %rdx, RIO;
+
+ enc_inpack();
+
+ enc_rounds(0);
+ enc_fls(8);
+ enc_rounds(8);
+ enc_fls(16);
+ enc_rounds(16);
+ movl $24, RT1d; /* max */
+
+ cmpb $16, key_length(CTX);
+ je __enc_done;
+
+ enc_fls(24);
+ enc_rounds(24);
+ movl $32, RT1d; /* max */
+
+__enc_done:
+ testb RXORbl, RXORbl;
+ movq RDST, RIO;
+
+ jnz __enc_xor;
+
+ enc_outunpack(mov, RT1);
+
+ movq RRBP, %rbp;
+ ret;
+
+__enc_xor:
+ enc_outunpack(xor, RT1);
+
+ movq RRBP, %rbp;
+ ret;
+
+.global camellia_dec_blk;
+.type camellia_dec_blk,@function;
+
+camellia_dec_blk:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ cmpl $16, key_length(CTX);
+ movl $32, RT2d;
+ movl $24, RXORd;
+ cmovel RXORd, RT2d; /* max */
+
+ movq %rbp, RRBP;
+ movq %rsi, RDST;
+ movq %rdx, RIO;
+
+ dec_inpack(RT2);
+
+ cmpb $24, RT2bl;
+ je __dec_rounds16;
+
+ dec_rounds(24);
+ dec_fls(24);
+
+__dec_rounds16:
+ dec_rounds(16);
+ dec_fls(16);
+ dec_rounds(8);
+ dec_fls(8);
+ dec_rounds(0);
+
+ movq RDST, RIO;
+
+ dec_outunpack();
+
+ movq RRBP, %rbp;
+ ret;
+
+/**********************************************************************
+ 2-way camellia
+ **********************************************************************/
+#define roundsm2(ab, subkey, cd) \
+ movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
+ xorq RT2, cd ## 1; \
+ \
+ xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
+ xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
+ xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
+ xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
+ \
+ xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
+ xorq RT2, cd ## 0; \
+ xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
+ xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
+ xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
+
+#define fls2(l, r, kl, kr) \
+ movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
+ andl l ## 0d, RT0d; \
+ roll $1, RT0d; \
+ shlq $32, RT0; \
+ xorq RT0, l ## 0; \
+ movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
+ orq r ## 0, RT1; \
+ shrq $32, RT1; \
+ xorq RT1, r ## 0; \
+ \
+ movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
+ andl l ## 1d, RT2d; \
+ roll $1, RT2d; \
+ shlq $32, RT2; \
+ xorq RT2, l ## 1; \
+ movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
+ orq r ## 1, RT0; \
+ shrq $32, RT0; \
+ xorq RT0, r ## 1; \
+ \
+ movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
+ orq l ## 0, RT1; \
+ shrq $32, RT1; \
+ xorq RT1, l ## 0; \
+ movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
+ andl r ## 0d, RT2d; \
+ roll $1, RT2d; \
+ shlq $32, RT2; \
+ xorq RT2, r ## 0; \
+ \
+ movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
+ orq l ## 1, RT0; \
+ shrq $32, RT0; \
+ xorq RT0, l ## 1; \
+ movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
+ andl r ## 1d, RT1d; \
+ roll $1, RT1d; \
+ shlq $32, RT1; \
+ xorq RT1, r ## 1;
+
+#define enc_rounds2(i) \
+ roundsm2(RAB, i + 2, RCD); \
+ roundsm2(RCD, i + 3, RAB); \
+ roundsm2(RAB, i + 4, RCD); \
+ roundsm2(RCD, i + 5, RAB); \
+ roundsm2(RAB, i + 6, RCD); \
+ roundsm2(RCD, i + 7, RAB);
+
+#define enc_fls2(i) \
+ fls2(RAB, RCD, i + 0, i + 1);
+
+#define enc_inpack2() \
+ movq (RIO), RAB0; \
+ bswapq RAB0; \
+ rorq $32, RAB0; \
+ movq 4*2(RIO), RCD0; \
+ bswapq RCD0; \
+ rolq $32, RCD0; \
+ xorq key_table(CTX), RAB0; \
+ \
+ movq 8*2(RIO), RAB1; \
+ bswapq RAB1; \
+ rorq $32, RAB1; \
+ movq 12*2(RIO), RCD1; \
+ bswapq RCD1; \
+ rolq $32, RCD1; \
+ xorq key_table(CTX), RAB1;
+
+#define enc_outunpack2(op, max) \
+ xorq key_table(CTX, max, 8), RCD0; \
+ rolq $32, RCD0; \
+ bswapq RCD0; \
+ op ## q RCD0, (RIO); \
+ rorq $32, RAB0; \
+ bswapq RAB0; \
+ op ## q RAB0, 4*2(RIO); \
+ \
+ xorq key_table(CTX, max, 8), RCD1; \
+ rolq $32, RCD1; \
+ bswapq RCD1; \
+ op ## q RCD1, 8*2(RIO); \
+ rorq $32, RAB1; \
+ bswapq RAB1; \
+ op ## q RAB1, 12*2(RIO);
+
+#define dec_rounds2(i) \
+ roundsm2(RAB, i + 7, RCD); \
+ roundsm2(RCD, i + 6, RAB); \
+ roundsm2(RAB, i + 5, RCD); \
+ roundsm2(RCD, i + 4, RAB); \
+ roundsm2(RAB, i + 3, RCD); \
+ roundsm2(RCD, i + 2, RAB);
+
+#define dec_fls2(i) \
+ fls2(RAB, RCD, i + 1, i + 0);
+
+#define dec_inpack2(max) \
+ movq (RIO), RAB0; \
+ bswapq RAB0; \
+ rorq $32, RAB0; \
+ movq 4*2(RIO), RCD0; \
+ bswapq RCD0; \
+ rolq $32, RCD0; \
+ xorq key_table(CTX, max, 8), RAB0; \
+ \
+ movq 8*2(RIO), RAB1; \
+ bswapq RAB1; \
+ rorq $32, RAB1; \
+ movq 12*2(RIO), RCD1; \
+ bswapq RCD1; \
+ rolq $32, RCD1; \
+ xorq key_table(CTX, max, 8), RAB1;
+
+#define dec_outunpack2() \
+ xorq key_table(CTX), RCD0; \
+ rolq $32, RCD0; \
+ bswapq RCD0; \
+ movq RCD0, (RIO); \
+ rorq $32, RAB0; \
+ bswapq RAB0; \
+ movq RAB0, 4*2(RIO); \
+ \
+ xorq key_table(CTX), RCD1; \
+ rolq $32, RCD1; \
+ bswapq RCD1; \
+ movq RCD1, 8*2(RIO); \
+ rorq $32, RAB1; \
+ bswapq RAB1; \
+ movq RAB1, 12*2(RIO);
+
+.global __camellia_enc_blk_2way;
+.type __camellia_enc_blk_2way,@function;
+
+__camellia_enc_blk_2way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: bool xor
+ */
+ pushq %rbx;
+
+ movq %rbp, RRBP;
+ movq %rcx, RXOR;
+ movq %rsi, RDST;
+ movq %rdx, RIO;
+
+ enc_inpack2();
+
+ enc_rounds2(0);
+ enc_fls2(8);
+ enc_rounds2(8);
+ enc_fls2(16);
+ enc_rounds2(16);
+ movl $24, RT2d; /* max */
+
+ cmpb $16, key_length(CTX);
+ je __enc2_done;
+
+ enc_fls2(24);
+ enc_rounds2(24);
+ movl $32, RT2d; /* max */
+
+__enc2_done:
+ test RXORbl, RXORbl;
+ movq RDST, RIO;
+ jnz __enc2_xor;
+
+ enc_outunpack2(mov, RT2);
+
+ movq RRBP, %rbp;
+ popq %rbx;
+ ret;
+
+__enc2_xor:
+ enc_outunpack2(xor, RT2);
+
+ movq RRBP, %rbp;
+ popq %rbx;
+ ret;
+
+.global camellia_dec_blk_2way;
+.type camellia_dec_blk_2way,@function;
+
+camellia_dec_blk_2way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ cmpl $16, key_length(CTX);
+ movl $32, RT2d;
+ movl $24, RXORd;
+ cmovel RXORd, RT2d; /* max */
+
+ movq %rbx, RXOR;
+ movq %rbp, RRBP;
+ movq %rsi, RDST;
+ movq %rdx, RIO;
+
+ dec_inpack2(RT2);
+
+ cmpb $24, RT2bl;
+ je __dec2_rounds16;
+
+ dec_rounds2(24);
+ dec_fls2(24);
+
+__dec2_rounds16:
+ dec_rounds2(16);
+ dec_fls2(16);
+ dec_rounds2(8);
+ dec_fls2(8);
+ dec_rounds2(0);
+
+ movq RDST, RIO;
+
+ dec_outunpack2();
+
+ movq RRBP, %rbp;
+ movq RXOR, %rbx;
+ ret;
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
new file mode 100644
index 0000000..3306dc0
--- /dev/null
+++ b/arch/x86/crypto/camellia_glue.c
@@ -0,0 +1,1952 @@
+/*
+ * Glue Code for assembler optimized version of Camellia
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * Camellia parts based on code by:
+ * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation)
+ * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
+ * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
+ * CTR part based on code (crypto/ctr.c) by:
+ * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/unaligned.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+
+#define CAMELLIA_MIN_KEY_SIZE 16
+#define CAMELLIA_MAX_KEY_SIZE 32
+#define CAMELLIA_BLOCK_SIZE 16
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+struct camellia_ctx {
+ u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+ u32 key_length;
+};
+
+/* regular block cipher functions */
+asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+/* 2-way parallel cipher functions */
+asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk(ctx, dst, src, true);
+}
+
+static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk_2way(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk_2way(ctx, dst, src, true);
+}
+
+static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src);
+}
+
+/* camellia sboxes */
+const u64 camellia_sp10011110[256] = {
+ 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00,
+ 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700,
+ 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400,
+ 0x8500008585858500, 0x5700005757575700, 0x3500003535353500,
+ 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00,
+ 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00,
+ 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500,
+ 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100,
+ 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00,
+ 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500,
+ 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600,
+ 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00,
+ 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00,
+ 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000,
+ 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00,
+ 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00,
+ 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900,
+ 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700,
+ 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900,
+ 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600,
+ 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00,
+ 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00,
+ 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00,
+ 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00,
+ 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00,
+ 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100,
+ 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00,
+ 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200,
+ 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600,
+ 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700,
+ 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100,
+ 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700,
+ 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00,
+ 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00,
+ 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200,
+ 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600,
+ 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200,
+ 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400,
+ 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300,
+ 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100,
+ 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800,
+ 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00,
+ 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00,
+ 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00,
+ 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200,
+ 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00,
+ 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000,
+ 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200,
+ 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000,
+ 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700,
+ 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00,
+ 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00,
+ 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00,
+ 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00,
+ 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00,
+ 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300,
+ 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600,
+ 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00,
+ 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200,
+ 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600,
+ 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600,
+ 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600,
+ 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300,
+ 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00,
+ 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700,
+ 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00,
+ 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900,
+ 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600,
+ 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400,
+ 0x5900005959595900, 0x7800007878787800, 0x9800009898989800,
+ 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700,
+ 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00,
+ 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00,
+ 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200,
+ 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200,
+ 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500,
+ 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00,
+ 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900,
+ 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00,
+ 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400,
+ 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300,
+ 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900,
+ 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500,
+ 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400,
+ 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000,
+ 0x9e00009e9e9e9e00,
+};
+
+const u64 camellia_sp22000222[256] = {
+ 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858,
+ 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e,
+ 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9,
+ 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a,
+ 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d,
+ 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf,
+ 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a,
+ 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242,
+ 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e,
+ 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca,
+ 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d,
+ 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f,
+ 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e,
+ 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060,
+ 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc,
+ 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434,
+ 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272,
+ 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e,
+ 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3,
+ 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad,
+ 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8,
+ 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a,
+ 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7,
+ 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a,
+ 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656,
+ 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363,
+ 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf,
+ 0x9898000000989898, 0x9797000000979797, 0x8585000000858585,
+ 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec,
+ 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f,
+ 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3,
+ 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf,
+ 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474,
+ 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636,
+ 0x2222000000222222, 0x3838000000383838, 0x6464000000646464,
+ 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c,
+ 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5,
+ 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888,
+ 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787,
+ 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323,
+ 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1,
+ 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9,
+ 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555,
+ 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa,
+ 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4,
+ 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6,
+ 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1,
+ 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5,
+ 0x2020000000202020, 0x8989000000898989, 0x0000000000000000,
+ 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef,
+ 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515,
+ 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5,
+ 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb,
+ 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8,
+ 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b,
+ 0x9494000000949494, 0x2121000000212121, 0x6666000000666666,
+ 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed,
+ 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe,
+ 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4,
+ 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c,
+ 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d,
+ 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d,
+ 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626,
+ 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c,
+ 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f,
+ 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc,
+ 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252,
+ 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d,
+ 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969,
+ 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131,
+ 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf,
+ 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575,
+ 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757,
+ 0x8484000000848484, 0x1111000000111111, 0x4545000000454545,
+ 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4,
+ 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa,
+ 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959,
+ 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292,
+ 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878,
+ 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949,
+ 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7,
+ 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393,
+ 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a,
+ 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9,
+ 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101,
+ 0x3d3d0000003d3d3d,
+};
+
+const u64 camellia_sp03303033[256] = {
+ 0x0038380038003838, 0x0041410041004141, 0x0016160016001616,
+ 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393,
+ 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272,
+ 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a,
+ 0x0075750075007575, 0x0006060006000606, 0x0057570057005757,
+ 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7,
+ 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2,
+ 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090,
+ 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7,
+ 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2,
+ 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343,
+ 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7,
+ 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f,
+ 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818,
+ 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f,
+ 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d,
+ 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c,
+ 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3,
+ 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec,
+ 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b,
+ 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636,
+ 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686,
+ 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd,
+ 0x0066660066006666, 0x0058580058005858, 0x0096960096009696,
+ 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595,
+ 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8,
+ 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef,
+ 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161,
+ 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b,
+ 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb,
+ 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8,
+ 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb,
+ 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d,
+ 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d,
+ 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919,
+ 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b,
+ 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979,
+ 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222,
+ 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1,
+ 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8,
+ 0x0012120012001212, 0x0004040004000404, 0x0074740074007474,
+ 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e,
+ 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555,
+ 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe,
+ 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131,
+ 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad,
+ 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070,
+ 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969,
+ 0x0008080008000808, 0x0062620062006262, 0x0000000000000000,
+ 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb,
+ 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545,
+ 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d,
+ 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee,
+ 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e,
+ 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6,
+ 0x0025250025002525, 0x0048480048004848, 0x0099990099009999,
+ 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b,
+ 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf,
+ 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929,
+ 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313,
+ 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363,
+ 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b,
+ 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989,
+ 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717,
+ 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3,
+ 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737,
+ 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494,
+ 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b,
+ 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a,
+ 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c,
+ 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3,
+ 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d,
+ 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5,
+ 0x0021210021002121, 0x0044440044004444, 0x0051510051005151,
+ 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939,
+ 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa,
+ 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656,
+ 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4,
+ 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e,
+ 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252,
+ 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9,
+ 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4,
+ 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a,
+ 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a,
+ 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040,
+ 0x004f4f004f004f4f,
+};
+
+const u64 camellia_sp00444404[256] = {
+ 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3,
+ 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057,
+ 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023,
+ 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5,
+ 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d,
+ 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af,
+ 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e,
+ 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b,
+ 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5,
+ 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a,
+ 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b,
+ 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0,
+ 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0,
+ 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb,
+ 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d,
+ 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004,
+ 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de,
+ 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c,
+ 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe,
+ 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a,
+ 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060,
+ 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0,
+ 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054,
+ 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064,
+ 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3,
+ 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6,
+ 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087,
+ 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090,
+ 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d,
+ 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8,
+ 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081,
+ 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063,
+ 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f,
+ 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9,
+ 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078,
+ 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071,
+ 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088,
+ 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9,
+ 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036,
+ 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1,
+ 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb,
+ 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad,
+ 0x0000777777770077, 0x0000808080800080, 0x0000828282820082,
+ 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5,
+ 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c,
+ 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093,
+ 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e,
+ 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd,
+ 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb,
+ 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f,
+ 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1,
+ 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d,
+ 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056,
+ 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066,
+ 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012,
+ 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099,
+ 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e,
+ 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031,
+ 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058,
+ 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c,
+ 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018,
+ 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2,
+ 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008,
+ 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050,
+ 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089,
+ 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095,
+ 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4,
+ 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db,
+ 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f,
+ 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002,
+ 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067,
+ 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2,
+ 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037,
+ 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b,
+ 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079,
+ 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e,
+ 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd,
+ 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a,
+ 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025,
+ 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa,
+ 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee,
+ 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068,
+ 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028,
+ 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1,
+ 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7,
+ 0x00009e9e9e9e009e,
+};
+
+const u64 camellia_sp02220222[256] = {
+ 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858,
+ 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e,
+ 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9,
+ 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a,
+ 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d,
+ 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf,
+ 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a,
+ 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242,
+ 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e,
+ 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca,
+ 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d,
+ 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f,
+ 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e,
+ 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060,
+ 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc,
+ 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434,
+ 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272,
+ 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e,
+ 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3,
+ 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad,
+ 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8,
+ 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a,
+ 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7,
+ 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a,
+ 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656,
+ 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363,
+ 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf,
+ 0x0098989800989898, 0x0097979700979797, 0x0085858500858585,
+ 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec,
+ 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f,
+ 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3,
+ 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf,
+ 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474,
+ 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636,
+ 0x0022222200222222, 0x0038383800383838, 0x0064646400646464,
+ 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c,
+ 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5,
+ 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888,
+ 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787,
+ 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323,
+ 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1,
+ 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9,
+ 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555,
+ 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa,
+ 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4,
+ 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6,
+ 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1,
+ 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5,
+ 0x0020202000202020, 0x0089898900898989, 0x0000000000000000,
+ 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef,
+ 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515,
+ 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5,
+ 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb,
+ 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8,
+ 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b,
+ 0x0094949400949494, 0x0021212100212121, 0x0066666600666666,
+ 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed,
+ 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe,
+ 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4,
+ 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c,
+ 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d,
+ 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d,
+ 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626,
+ 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c,
+ 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f,
+ 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc,
+ 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252,
+ 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d,
+ 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969,
+ 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131,
+ 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf,
+ 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575,
+ 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757,
+ 0x0084848400848484, 0x0011111100111111, 0x0045454500454545,
+ 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4,
+ 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa,
+ 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959,
+ 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292,
+ 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878,
+ 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949,
+ 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7,
+ 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393,
+ 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a,
+ 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9,
+ 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101,
+ 0x003d3d3d003d3d3d,
+};
+
+const u64 camellia_sp30333033[256] = {
+ 0x3800383838003838, 0x4100414141004141, 0x1600161616001616,
+ 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393,
+ 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272,
+ 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a,
+ 0x7500757575007575, 0x0600060606000606, 0x5700575757005757,
+ 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7,
+ 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2,
+ 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090,
+ 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7,
+ 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2,
+ 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343,
+ 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7,
+ 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f,
+ 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818,
+ 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f,
+ 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d,
+ 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c,
+ 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3,
+ 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec,
+ 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b,
+ 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636,
+ 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686,
+ 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd,
+ 0x6600666666006666, 0x5800585858005858, 0x9600969696009696,
+ 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595,
+ 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8,
+ 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef,
+ 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161,
+ 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b,
+ 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb,
+ 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8,
+ 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb,
+ 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d,
+ 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d,
+ 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919,
+ 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b,
+ 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979,
+ 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222,
+ 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1,
+ 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8,
+ 0x1200121212001212, 0x0400040404000404, 0x7400747474007474,
+ 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e,
+ 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555,
+ 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe,
+ 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131,
+ 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad,
+ 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070,
+ 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969,
+ 0x0800080808000808, 0x6200626262006262, 0x0000000000000000,
+ 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb,
+ 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545,
+ 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d,
+ 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee,
+ 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e,
+ 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6,
+ 0x2500252525002525, 0x4800484848004848, 0x9900999999009999,
+ 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b,
+ 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf,
+ 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929,
+ 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313,
+ 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363,
+ 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b,
+ 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989,
+ 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717,
+ 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3,
+ 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737,
+ 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494,
+ 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b,
+ 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a,
+ 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c,
+ 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3,
+ 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d,
+ 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5,
+ 0x2100212121002121, 0x4400444444004444, 0x5100515151005151,
+ 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939,
+ 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa,
+ 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656,
+ 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4,
+ 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e,
+ 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252,
+ 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9,
+ 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4,
+ 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a,
+ 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a,
+ 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040,
+ 0x4f004f4f4f004f4f,
+};
+
+const u64 camellia_sp44044404[256] = {
+ 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3,
+ 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057,
+ 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023,
+ 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5,
+ 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d,
+ 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af,
+ 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e,
+ 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b,
+ 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5,
+ 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a,
+ 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b,
+ 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0,
+ 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0,
+ 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb,
+ 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d,
+ 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004,
+ 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de,
+ 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c,
+ 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe,
+ 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a,
+ 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060,
+ 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0,
+ 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054,
+ 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064,
+ 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3,
+ 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6,
+ 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087,
+ 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090,
+ 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d,
+ 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8,
+ 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081,
+ 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063,
+ 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f,
+ 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9,
+ 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078,
+ 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071,
+ 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088,
+ 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9,
+ 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036,
+ 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1,
+ 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb,
+ 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad,
+ 0x7777007777770077, 0x8080008080800080, 0x8282008282820082,
+ 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5,
+ 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c,
+ 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093,
+ 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e,
+ 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd,
+ 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb,
+ 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f,
+ 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1,
+ 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d,
+ 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056,
+ 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066,
+ 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012,
+ 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099,
+ 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e,
+ 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031,
+ 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058,
+ 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c,
+ 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018,
+ 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2,
+ 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008,
+ 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050,
+ 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089,
+ 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095,
+ 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4,
+ 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db,
+ 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f,
+ 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002,
+ 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067,
+ 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2,
+ 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037,
+ 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b,
+ 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079,
+ 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e,
+ 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd,
+ 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a,
+ 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025,
+ 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa,
+ 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee,
+ 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068,
+ 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028,
+ 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1,
+ 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7,
+ 0x9e9e009e9e9e009e,
+};
+
+const u64 camellia_sp11101110[256] = {
+ 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00,
+ 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700,
+ 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400,
+ 0x8585850085858500, 0x5757570057575700, 0x3535350035353500,
+ 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00,
+ 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00,
+ 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500,
+ 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100,
+ 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00,
+ 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500,
+ 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600,
+ 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00,
+ 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00,
+ 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000,
+ 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00,
+ 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00,
+ 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900,
+ 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700,
+ 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900,
+ 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600,
+ 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00,
+ 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00,
+ 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00,
+ 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00,
+ 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00,
+ 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100,
+ 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00,
+ 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200,
+ 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600,
+ 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700,
+ 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100,
+ 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700,
+ 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00,
+ 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00,
+ 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200,
+ 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600,
+ 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200,
+ 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400,
+ 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300,
+ 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100,
+ 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800,
+ 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00,
+ 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00,
+ 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00,
+ 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200,
+ 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00,
+ 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000,
+ 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200,
+ 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000,
+ 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700,
+ 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00,
+ 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00,
+ 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00,
+ 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00,
+ 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00,
+ 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300,
+ 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600,
+ 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00,
+ 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200,
+ 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600,
+ 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600,
+ 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600,
+ 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300,
+ 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00,
+ 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700,
+ 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00,
+ 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900,
+ 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600,
+ 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400,
+ 0x5959590059595900, 0x7878780078787800, 0x9898980098989800,
+ 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700,
+ 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00,
+ 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00,
+ 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200,
+ 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200,
+ 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500,
+ 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00,
+ 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900,
+ 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00,
+ 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400,
+ 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300,
+ 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900,
+ 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500,
+ 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400,
+ 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000,
+ 0x9e9e9e009e9e9e00,
+};
+
+/* key constants */
+#define CAMELLIA_SIGMA1L (0xA09E667FL)
+#define CAMELLIA_SIGMA1R (0x3BCC908BL)
+#define CAMELLIA_SIGMA2L (0xB67AE858L)
+#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
+#define CAMELLIA_SIGMA3L (0xC6EF372FL)
+#define CAMELLIA_SIGMA3R (0xE94F82BEL)
+#define CAMELLIA_SIGMA4L (0x54FF53A5L)
+#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
+#define CAMELLIA_SIGMA5L (0x10E527FAL)
+#define CAMELLIA_SIGMA5R (0xDE682D1DL)
+#define CAMELLIA_SIGMA6L (0xB05688C2L)
+#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
+
+/* macros */
+#define ROLDQ(l, r, bits) ({ \
+ u64 t = l; \
+ l = (l << bits) | (r >> (64 - bits)); \
+ r = (r << bits) | (t >> (64 - bits)); \
+})
+
+#define CAMELLIA_F(x, kl, kr, y) ({ \
+ u64 ii = x ^ (((u64)kl << 32) | kr); \
+ y = camellia_sp11101110[(uint8_t)ii]; \
+ y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \
+ ii >>= 16; \
+ y ^= camellia_sp30333033[(uint8_t)ii]; \
+ y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \
+ ii >>= 16; \
+ y ^= camellia_sp00444404[(uint8_t)ii]; \
+ y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \
+ ii >>= 16; \
+ y ^= camellia_sp22000222[(uint8_t)ii]; \
+ y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \
+ y = ror64(y, 32); \
+})
+
+#define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32))
+
+static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max)
+{
+ u64 kw4, tt;
+ u32 dw, tl, tr;
+
+ /* absorb kw2 to other subkeys */
+ /* round 2 */
+ subRL[3] ^= subRL[1];
+ /* round 4 */
+ subRL[5] ^= subRL[1];
+ /* round 6 */
+ subRL[7] ^= subRL[1];
+
+ subRL[1] ^= (subRL[1] & ~subRL[9]) << 32;
+ /* modified for FLinv(kl2) */
+ dw = (subRL[1] & subRL[9]) >> 32,
+ subRL[1] ^= rol32(dw, 1);
+
+ /* round 8 */
+ subRL[11] ^= subRL[1];
+ /* round 10 */
+ subRL[13] ^= subRL[1];
+ /* round 12 */
+ subRL[15] ^= subRL[1];
+
+ subRL[1] ^= (subRL[1] & ~subRL[17]) << 32;
+ /* modified for FLinv(kl4) */
+ dw = (subRL[1] & subRL[17]) >> 32,
+ subRL[1] ^= rol32(dw, 1);
+
+ /* round 14 */
+ subRL[19] ^= subRL[1];
+ /* round 16 */
+ subRL[21] ^= subRL[1];
+ /* round 18 */
+ subRL[23] ^= subRL[1];
+
+ if (max == 24) {
+ /* kw3 */
+ subRL[24] ^= subRL[1];
+
+ /* absorb kw4 to other subkeys */
+ kw4 = subRL[25];
+ } else {
+ subRL[1] ^= (subRL[1] & ~subRL[25]) << 32;
+ /* modified for FLinv(kl6) */
+ dw = (subRL[1] & subRL[25]) >> 32,
+ subRL[1] ^= rol32(dw, 1);
+
+ /* round 20 */
+ subRL[27] ^= subRL[1];
+ /* round 22 */
+ subRL[29] ^= subRL[1];
+ /* round 24 */
+ subRL[31] ^= subRL[1];
+ /* kw3 */
+ subRL[32] ^= subRL[1];
+
+ /* absorb kw4 to other subkeys */
+ kw4 = subRL[33];
+ /* round 23 */
+ subRL[30] ^= kw4;
+ /* round 21 */
+ subRL[28] ^= kw4;
+ /* round 19 */
+ subRL[26] ^= kw4;
+
+ kw4 ^= (kw4 & ~subRL[24]) << 32;
+ /* modified for FL(kl5) */
+ dw = (kw4 & subRL[24]) >> 32,
+ kw4 ^= rol32(dw, 1);
+ }
+
+ /* round 17 */
+ subRL[22] ^= kw4;
+ /* round 15 */
+ subRL[20] ^= kw4;
+ /* round 13 */
+ subRL[18] ^= kw4;
+
+ kw4 ^= (kw4 & ~subRL[16]) << 32;
+ /* modified for FL(kl3) */
+ dw = (kw4 & subRL[16]) >> 32,
+ kw4 ^= rol32(dw, 1);
+
+ /* round 11 */
+ subRL[14] ^= kw4;
+ /* round 9 */
+ subRL[12] ^= kw4;
+ /* round 7 */
+ subRL[10] ^= kw4;
+
+ kw4 ^= (kw4 & ~subRL[8]) << 32;
+ /* modified for FL(kl1) */
+ dw = (kw4 & subRL[8]) >> 32,
+ kw4 ^= rol32(dw, 1);
+
+ /* round 5 */
+ subRL[6] ^= kw4;
+ /* round 3 */
+ subRL[4] ^= kw4;
+ /* round 1 */
+ subRL[2] ^= kw4;
+ /* kw1 */
+ subRL[0] ^= kw4;
+
+ /* key XOR is end of F-function */
+ SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */
+ SET_SUBKEY_LR(2, subRL[3]); /* round 1 */
+ SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */
+ SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */
+ SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */
+ SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */
+
+ tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]);
+ dw = tl & (subRL[8] >> 32), /* FL(kl1) */
+ tr = subRL[10] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */
+ SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */
+ SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */
+
+ tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]);
+ dw = tl & (subRL[9] >> 32), /* FLinv(kl2) */
+ tr = subRL[7] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */
+ SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */
+ SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */
+ SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */
+ SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */
+
+ tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]);
+ dw = tl & (subRL[16] >> 32), /* FL(kl3) */
+ tr = subRL[18] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */
+ SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */
+ SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */
+
+ tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]);
+ dw = tl & (subRL[17] >> 32), /* FLinv(kl4) */
+ tr = subRL[15] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */
+ SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */
+ SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */
+ SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */
+ SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */
+
+ if (max == 24) {
+ SET_SUBKEY_LR(23, subRL[22]); /* round 18 */
+ SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */
+ } else {
+ tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]);
+ dw = tl & (subRL[24] >> 32), /* FL(kl5) */
+ tr = subRL[26] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */
+ SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */
+ SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */
+
+ tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]);
+ dw = tl & (subRL[25] >> 32), /* FLinv(kl6) */
+ tr = subRL[23] ^ rol32(dw, 1);
+ tt = (tr | ((u64)tl << 32));
+
+ SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */
+ SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */
+ SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */
+ SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */
+ SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */
+ SET_SUBKEY_LR(31, subRL[30]); /* round 24 */
+ SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */
+ }
+}
+
+static void camellia_setup128(const unsigned char *key, u64 *subkey)
+{
+ u64 kl, kr, ww;
+ u64 subRL[26];
+
+ /**
+ * k == kl || kr (|| is concatenation)
+ */
+ kl = get_unaligned_be64(key);
+ kr = get_unaligned_be64(key + 8);
+
+ /* generate KL dependent subkeys */
+ /* kw1 */
+ subRL[0] = kl;
+ /* kw2 */
+ subRL[1] = kr;
+
+ /* rotation left shift 15bit */
+ ROLDQ(kl, kr, 15);
+
+ /* k3 */
+ subRL[4] = kl;
+ /* k4 */
+ subRL[5] = kr;
+
+ /* rotation left shift 15+30bit */
+ ROLDQ(kl, kr, 30);
+
+ /* k7 */
+ subRL[10] = kl;
+ /* k8 */
+ subRL[11] = kr;
+
+ /* rotation left shift 15+30+15bit */
+ ROLDQ(kl, kr, 15);
+
+ /* k10 */
+ subRL[13] = kr;
+ /* rotation left shift 15+30+15+17 bit */
+ ROLDQ(kl, kr, 17);
+
+ /* kl3 */
+ subRL[16] = kl;
+ /* kl4 */
+ subRL[17] = kr;
+
+ /* rotation left shift 15+30+15+17+17 bit */
+ ROLDQ(kl, kr, 17);
+
+ /* k13 */
+ subRL[18] = kl;
+ /* k14 */
+ subRL[19] = kr;
+
+ /* rotation left shift 15+30+15+17+17+17 bit */
+ ROLDQ(kl, kr, 17);
+
+ /* k17 */
+ subRL[22] = kl;
+ /* k18 */
+ subRL[23] = kr;
+
+ /* generate KA */
+ kl = subRL[0];
+ kr = subRL[1];
+ CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
+ kr ^= ww;
+ CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
+
+ /* current status == (kll, klr, w0, w1) */
+ CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
+ kr ^= ww;
+ CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
+ kl ^= ww;
+
+ /* generate KA dependent subkeys */
+ /* k1, k2 */
+ subRL[2] = kl;
+ subRL[3] = kr;
+ ROLDQ(kl, kr, 15);
+ /* k5,k6 */
+ subRL[6] = kl;
+ subRL[7] = kr;
+ ROLDQ(kl, kr, 15);
+ /* kl1, kl2 */
+ subRL[8] = kl;
+ subRL[9] = kr;
+ ROLDQ(kl, kr, 15);
+ /* k9 */
+ subRL[12] = kl;
+ ROLDQ(kl, kr, 15);
+ /* k11, k12 */
+ subRL[14] = kl;
+ subRL[15] = kr;
+ ROLDQ(kl, kr, 34);
+ /* k15, k16 */
+ subRL[20] = kl;
+ subRL[21] = kr;
+ ROLDQ(kl, kr, 17);
+ /* kw3, kw4 */
+ subRL[24] = kl;
+ subRL[25] = kr;
+
+ camellia_setup_tail(subkey, subRL, 24);
+}
+
+static void camellia_setup256(const unsigned char *key, u64 *subkey)
+{
+ u64 kl, kr; /* left half of key */
+ u64 krl, krr; /* right half of key */
+ u64 ww; /* temporary variables */
+ u64 subRL[34];
+
+ /**
+ * key = (kl || kr || krl || krr) (|| is concatenation)
+ */
+ kl = get_unaligned_be64(key);
+ kr = get_unaligned_be64(key + 8);
+ krl = get_unaligned_be64(key + 16);
+ krr = get_unaligned_be64(key + 24);
+
+ /* generate KL dependent subkeys */
+ /* kw1 */
+ subRL[0] = kl;
+ /* kw2 */
+ subRL[1] = kr;
+ ROLDQ(kl, kr, 45);
+ /* k9 */
+ subRL[12] = kl;
+ /* k10 */
+ subRL[13] = kr;
+ ROLDQ(kl, kr, 15);
+ /* kl3 */
+ subRL[16] = kl;
+ /* kl4 */
+ subRL[17] = kr;
+ ROLDQ(kl, kr, 17);
+ /* k17 */
+ subRL[22] = kl;
+ /* k18 */
+ subRL[23] = kr;
+ ROLDQ(kl, kr, 34);
+ /* k23 */
+ subRL[30] = kl;
+ /* k24 */
+ subRL[31] = kr;
+
+ /* generate KR dependent subkeys */
+ ROLDQ(krl, krr, 15);
+ /* k3 */
+ subRL[4] = krl;
+ /* k4 */
+ subRL[5] = krr;
+ ROLDQ(krl, krr, 15);
+ /* kl1 */
+ subRL[8] = krl;
+ /* kl2 */
+ subRL[9] = krr;
+ ROLDQ(krl, krr, 30);
+ /* k13 */
+ subRL[18] = krl;
+ /* k14 */
+ subRL[19] = krr;
+ ROLDQ(krl, krr, 34);
+ /* k19 */
+ subRL[26] = krl;
+ /* k20 */
+ subRL[27] = krr;
+ ROLDQ(krl, krr, 34);
+
+ /* generate KA */
+ kl = subRL[0] ^ krl;
+ kr = subRL[1] ^ krr;
+
+ CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww);
+ kr ^= ww;
+ CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl);
+ kl ^= krl;
+ CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr);
+ kr ^= ww ^ krr;
+ CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww);
+ kl ^= ww;
+
+ /* generate KB */
+ krl ^= kl;
+ krr ^= kr;
+ CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww);
+ krr ^= ww;
+ CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww);
+ krl ^= ww;
+
+ /* generate KA dependent subkeys */
+ ROLDQ(kl, kr, 15);
+ /* k5 */
+ subRL[6] = kl;
+ /* k6 */
+ subRL[7] = kr;
+ ROLDQ(kl, kr, 30);
+ /* k11 */
+ subRL[14] = kl;
+ /* k12 */
+ subRL[15] = kr;
+ /* rotation left shift 32bit */
+ ROLDQ(kl, kr, 32);
+ /* kl5 */
+ subRL[24] = kl;
+ /* kl6 */
+ subRL[25] = kr;
+ /* rotation left shift 17 from k11,k12 -> k21,k22 */
+ ROLDQ(kl, kr, 17);
+ /* k21 */
+ subRL[28] = kl;
+ /* k22 */
+ subRL[29] = kr;
+
+ /* generate KB dependent subkeys */
+ /* k1 */
+ subRL[2] = krl;
+ /* k2 */
+ subRL[3] = krr;
+ ROLDQ(krl, krr, 30);
+ /* k7 */
+ subRL[10] = krl;
+ /* k8 */
+ subRL[11] = krr;
+ ROLDQ(krl, krr, 30);
+ /* k15 */
+ subRL[20] = krl;
+ /* k16 */
+ subRL[21] = krr;
+ ROLDQ(krl, krr, 51);
+ /* kw3 */
+ subRL[32] = krl;
+ /* kw4 */
+ subRL[33] = krr;
+
+ camellia_setup_tail(subkey, subRL, 32);
+}
+
+static void camellia_setup192(const unsigned char *key, u64 *subkey)
+{
+ unsigned char kk[32];
+ u64 krl, krr;
+
+ memcpy(kk, key, 24);
+ memcpy((unsigned char *)&krl, key+16, 8);
+ krr = ~krl;
+ memcpy(kk+24, (unsigned char *)&krr, 8);
+ camellia_setup256(kk, subkey);
+}
+
+static int __camellia_setkey(struct camellia_ctx *cctx,
+ const unsigned char *key,
+ unsigned int key_len, u32 *flags)
+{
+ if (key_len != 16 && key_len != 24 && key_len != 32) {
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ cctx->key_length = key_len;
+
+ switch (key_len) {
+ case 16:
+ camellia_setup128(key, cctx->key_table);
+ break;
+ case 24:
+ camellia_setup192(key, cctx->key_table);
+ break;
+ case 32:
+ camellia_setup256(key, cctx->key_table);
+ break;
+ }
+
+ return 0;
+}
+
+static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
+ &tfm->crt_flags);
+}
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+ void (*fn)(struct camellia_ctx *, u8 *, const u8 *),
+ void (*fn_2way)(struct camellia_ctx *, u8 *, const u8 *))
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ unsigned int nbytes;
+ int err;
+
+ err = blkcipher_walk_virt(desc, walk);
+
+ while ((nbytes = walk->nbytes)) {
+ u8 *wsrc = walk->src.virt.addr;
+ u8 *wdst = walk->dst.virt.addr;
+
+ /* Process two block batch */
+ if (nbytes >= bsize * 2) {
+ do {
+ fn_2way(ctx, wdst, wsrc);
+
+ wsrc += bsize * 2;
+ wdst += bsize * 2;
+ nbytes -= bsize * 2;
+ } while (nbytes >= bsize * 2);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+
+ /* Handle leftovers */
+ do {
+ fn(ctx, wdst, wsrc);
+
+ wsrc += bsize;
+ wdst += bsize;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+done:
+ err = blkcipher_walk_done(desc, walk, nbytes);
+ }
+
+ return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_crypt(desc, &walk, camellia_enc_blk, camellia_enc_blk_2way);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ return ecb_crypt(desc, &walk, camellia_dec_blk, camellia_dec_blk_2way);
+}
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 *iv = (u128 *)walk->iv;
+
+ do {
+ u128_xor(dst, src, iv);
+ camellia_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
+ iv = dst;
+
+ src += 1;
+ dst += 1;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+ u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+ return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ nbytes = __cbc_encrypt(desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 ivs[2 - 1];
+ u128 last_iv;
+
+ /* Start of the last block. */
+ src += nbytes / bsize - 1;
+ dst += nbytes / bsize - 1;
+
+ last_iv = *src;
+
+ /* Process two block batch */
+ if (nbytes >= bsize * 2) {
+ do {
+ nbytes -= bsize * (2 - 1);
+ src -= 2 - 1;
+ dst -= 2 - 1;
+
+ ivs[0] = src[0];
+
+ camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
+
+ u128_xor(dst + 1, dst + 1, ivs + 0);
+
+ nbytes -= bsize;
+ if (nbytes < bsize)
+ goto done;
+
+ u128_xor(dst, dst, src - 1);
+ src -= 1;
+ dst -= 1;
+ } while (nbytes >= bsize * 2);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+
+ /* Handle leftovers */
+ for (;;) {
+ camellia_dec_blk(ctx, (u8 *)dst, (u8 *)src);
+
+ nbytes -= bsize;
+ if (nbytes < bsize)
+ break;
+
+ u128_xor(dst, dst, src - 1);
+ src -= 1;
+ dst -= 1;
+ }
+
+done:
+ u128_xor(dst, dst, (u128 *)walk->iv);
+ *(u128 *)walk->iv = last_iv;
+
+ return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ nbytes = __cbc_decrypt(desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ return err;
+}
+
+static inline void u128_to_be128(be128 *dst, const u128 *src)
+{
+ dst->a = cpu_to_be64(src->a);
+ dst->b = cpu_to_be64(src->b);
+}
+
+static inline void be128_to_u128(u128 *dst, const be128 *src)
+{
+ dst->a = be64_to_cpu(src->a);
+ dst->b = be64_to_cpu(src->b);
+}
+
+static inline void u128_inc(u128 *i)
+{
+ i->b++;
+ if (!i->b)
+ i->a++;
+}
+
+static void ctr_crypt_final(struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ u8 keystream[CAMELLIA_BLOCK_SIZE];
+ u8 *src = walk->src.virt.addr;
+ u8 *dst = walk->dst.virt.addr;
+ unsigned int nbytes = walk->nbytes;
+ u128 ctrblk;
+
+ memcpy(keystream, src, nbytes);
+ camellia_enc_blk_xor(ctx, keystream, walk->iv);
+ memcpy(dst, keystream, nbytes);
+
+ be128_to_u128(&ctrblk, (be128 *)walk->iv);
+ u128_inc(&ctrblk);
+ u128_to_be128((be128 *)walk->iv, &ctrblk);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+ struct blkcipher_walk *walk)
+{
+ struct camellia_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ unsigned int nbytes = walk->nbytes;
+ u128 *src = (u128 *)walk->src.virt.addr;
+ u128 *dst = (u128 *)walk->dst.virt.addr;
+ u128 ctrblk;
+ be128 ctrblocks[2];
+
+ be128_to_u128(&ctrblk, (be128 *)walk->iv);
+
+ /* Process two block batch */
+ if (nbytes >= bsize * 2) {
+ do {
+ if (dst != src) {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ }
+
+ /* create ctrblks for parallel encrypt */
+ u128_to_be128(&ctrblocks[0], &ctrblk);
+ u128_inc(&ctrblk);
+ u128_to_be128(&ctrblocks[1], &ctrblk);
+ u128_inc(&ctrblk);
+
+ camellia_enc_blk_xor_2way(ctx, (u8 *)dst,
+ (u8 *)ctrblocks);
+
+ src += 2;
+ dst += 2;
+ nbytes -= bsize * 2;
+ } while (nbytes >= bsize * 2);
+
+ if (nbytes < bsize)
+ goto done;
+ }
+
+ /* Handle leftovers */
+ do {
+ if (dst != src)
+ *dst = *src;
+
+ u128_to_be128(&ctrblocks[0], &ctrblk);
+ u128_inc(&ctrblk);
+
+ camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
+
+ src += 1;
+ dst += 1;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+done:
+ u128_to_be128((be128 *)walk->iv, &ctrblk);
+ return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct blkcipher_walk walk;
+ int err;
+
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, CAMELLIA_BLOCK_SIZE);
+
+ while ((nbytes = walk.nbytes) >= CAMELLIA_BLOCK_SIZE) {
+ nbytes = __ctr_crypt(desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, nbytes);
+ }
+
+ if (walk.nbytes) {
+ ctr_crypt_final(desc, &walk);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+
+ return err;
+}
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ struct camellia_ctx *ctx = priv;
+ int i;
+
+ while (nbytes >= 2 * bsize) {
+ camellia_enc_blk_2way(ctx, srcdst, srcdst);
+ srcdst += bsize * 2;
+ nbytes -= bsize * 2;
+ }
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ camellia_enc_blk(ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ struct camellia_ctx *ctx = priv;
+ int i;
+
+ while (nbytes >= 2 * bsize) {
+ camellia_dec_blk_2way(ctx, srcdst, srcdst);
+ srcdst += bsize * 2;
+ nbytes -= bsize * 2;
+ }
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ camellia_dec_blk(ctx, srcdst, srcdst);
+}
+
+struct camellia_lrw_ctx {
+ struct lrw_table_ctx lrw_table;
+ struct camellia_ctx camellia_ctx;
+};
+
+static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err;
+
+ err = __camellia_setkey(&ctx->camellia_ctx, key,
+ keylen - CAMELLIA_BLOCK_SIZE,
+ &tfm->crt_flags);
+ if (err)
+ return err;
+
+ return lrw_init_table(&ctx->lrw_table,
+ key + keylen - CAMELLIA_BLOCK_SIZE);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[2 * 4];
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &ctx->camellia_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+
+ return lrw_crypt(desc, dst, src, nbytes, &req);
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[2 * 4];
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &ctx->camellia_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+
+ return lrw_crypt(desc, dst, src, nbytes, &req);
+}
+
+static void lrw_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ lrw_free_table(&ctx->lrw_table);
+}
+
+struct camellia_xts_ctx {
+ struct camellia_ctx tweak_ctx;
+ struct camellia_ctx crypt_ctx;
+};
+
+static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ u32 *flags = &tfm->crt_flags;
+ int err;
+
+ /* key consists of keys of equal size concatenated, therefore
+ * the length must be even
+ */
+ if (keylen % 2) {
+ *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ /* first half of xts-key is for crypt */
+ err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+ if (err)
+ return err;
+
+ /* second half of xts-key is for tweak */
+ return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+ flags);
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[2 * 4];
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
+ .crypt_ctx = &ctx->crypt_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+
+ return xts_crypt(desc, dst, src, nbytes, &req);
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[2 * 4];
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
+ .crypt_ctx = &ctx->crypt_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+
+ return xts_crypt(desc, dst, src, nbytes, &req);
+}
+
+static struct crypto_alg camellia_algs[6] = { {
+ .cra_name = "camellia",
+ .cra_driver_name = "camellia-asm",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list),
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .cia_setkey = camellia_setkey,
+ .cia_encrypt = camellia_encrypt,
+ .cia_decrypt = camellia_decrypt
+ }
+ }
+}, {
+ .cra_name = "ecb(camellia)",
+ .cra_driver_name = "ecb-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(camellia)",
+ .cra_driver_name = "cbc-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(camellia)",
+ .cra_driver_name = "ctr-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "lrw(camellia)",
+ .cra_driver_name = "lrw-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list),
+ .cra_exit = lrw_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE +
+ CAMELLIA_BLOCK_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE +
+ CAMELLIA_BLOCK_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = lrw_camellia_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
+ .cra_name = "xts(camellia)",
+ .cra_driver_name = "xts-camellia-asm",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_xts_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = xts_camellia_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+ },
+} };
+
+static bool is_blacklisted_cpu(void)
+{
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ if (boot_cpu_data.x86 == 0x0f) {
+ /*
+ * On Pentium 4, camellia-asm is slower than original assembler
+ * implementation because excessive uses of 64bit rotate and
+ * left-shifts (which are really slow on P4) needed to store and
+ * handle 128bit block in two 64bit registers.
+ */
+ return true;
+ }
+
+ return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
+
+static int __init init(void)
+{
+ if (!force && is_blacklisted_cpu()) {
+ printk(KERN_INFO
+ "camellia-x86_64: performance on this CPU "
+ "would be suboptimal: disabling "
+ "camellia-x86_64.\n");
+ return -ENODEV;
+ }
+
+ return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+}
+
+static void __exit fini(void)
+{
+ crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+}
+
+module_init(init);
+module_exit(fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
+MODULE_ALIAS("camellia");
+MODULE_ALIAS("camellia-asm");
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c
index b9d0026..493f959 100644
--- a/arch/x86/crypto/crc32c-intel.c
+++ b/arch/x86/crypto/crc32c-intel.c
@@ -31,6 +31,7 @@
#include <crypto/internal/hash.h>
#include <asm/cpufeature.h>
+#include <asm/cpu_device_id.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
@@ -173,13 +174,17 @@ static struct shash_alg alg = {
}
};
+static const struct x86_cpu_id crc32c_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_XMM4_2),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
static int __init crc32c_intel_mod_init(void)
{
- if (cpu_has_xmm4_2)
- return crypto_register_shash(&alg);
- else
+ if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
+ return crypto_register_shash(&alg);
}
static void __exit crc32c_intel_mod_fini(void)
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 976aa64..b4bf0a6 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -20,6 +20,7 @@
#include <crypto/gf128mul.h>
#include <crypto/internal/hash.h>
#include <asm/i387.h>
+#include <asm/cpu_device_id.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
@@ -294,15 +295,18 @@ static struct ahash_alg ghash_async_alg = {
},
};
+static const struct x86_cpu_id pcmul_cpu_id[] = {
+ X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), /* Pickle-Mickle-Duck */
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, pcmul_cpu_id);
+
static int __init ghash_pclmulqdqni_mod_init(void)
{
int err;
- if (!cpu_has_pclmulqdq) {
- printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not"
- " detected.\n");
+ if (!x86_match_cpu(pcmul_cpu_id))
return -ENODEV;
- }
err = crypto_register_shash(&ghash_alg);
if (err)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index 4e37677..c00053d 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -463,23 +463,20 @@
pand x0, x4; \
pxor x2, x4;
-#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
- movdqa x2, t3; \
- movdqa x0, t1; \
- unpcklps x3, t3; \
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
movdqa x0, t2; \
- unpcklps x1, t1; \
- unpckhps x1, t2; \
- movdqa t3, x1; \
- unpckhps x3, x2; \
- movdqa t1, x0; \
- movhlps t1, x1; \
- movdqa t2, t1; \
- movlhps t3, x0; \
- movlhps x2, t1; \
- movhlps t2, x2; \
- movdqa x2, x3; \
- movdqa t1, x2;
+ punpckldq x1, x0; \
+ punpckhdq x1, t2; \
+ movdqa x2, t1; \
+ punpckhdq x3, x2; \
+ punpckldq x3, t1; \
+ movdqa x0, x1; \
+ punpcklqdq t1, x0; \
+ punpckhqdq t1, x1; \
+ movdqa t2, x3; \
+ punpcklqdq x2, t2; \
+ punpckhqdq x2, x3; \
+ movdqa t2, x2;
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
movdqu (0*4*4)(in), x0; \
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 7f24a15..3ee1ff0 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -585,23 +585,20 @@
get_key(i, 1, RK1); \
SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
-#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
- movdqa x2, t3; \
- movdqa x0, t1; \
- unpcklps x3, t3; \
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
movdqa x0, t2; \
- unpcklps x1, t1; \
- unpckhps x1, t2; \
- movdqa t3, x1; \
- unpckhps x3, x2; \
- movdqa t1, x0; \
- movhlps t1, x1; \
- movdqa t2, t1; \
- movlhps t3, x0; \
- movlhps x2, t1; \
- movhlps t2, x2; \
- movdqa x2, x3; \
- movdqa t1, x2;
+ punpckldq x1, x0; \
+ punpckhdq x1, t2; \
+ movdqa x2, t1; \
+ punpckhdq x3, x2; \
+ punpckldq x3, t1; \
+ movdqa x0, x1; \
+ punpcklqdq t1, x0; \
+ punpckhqdq t1, x1; \
+ movdqa t2, x3; \
+ punpcklqdq x2, t2; \
+ punpckhqdq x2, x3; \
+ movdqa t2, x2;
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
movdqu (0*4*4)(in), x0; \
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 7955a9b..4b21be8 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -145,28 +145,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ecb_crypt(desc, &walk, false);
}
-static struct crypto_alg blk_ecb_alg = {
- .cra_name = "__ecb-serpent-sse2",
- .cra_driver_name = "__driver-ecb-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-};
-
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
@@ -295,28 +273,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_cbc_alg = {
- .cra_name = "__cbc-serpent-sse2",
- .cra_driver_name = "__driver-cbc-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-};
-
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
@@ -439,29 +395,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_ctr_alg = {
- .cra_name = "__ctr-serpent-sse2",
- .cra_driver_name = "__driver-ctr-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-};
-
struct crypt_priv {
struct serpent_ctx *ctx;
bool fpu_enabled;
@@ -580,32 +513,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
lrw_free_table(&ctx->lrw_table);
}
-static struct crypto_alg blk_lrw_alg = {
- .cra_name = "__lrw-serpent-sse2",
- .cra_driver_name = "__driver-lrw-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
- .cra_exit = lrw_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = lrw_serpent_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-};
-
struct serpent_xts_ctx {
struct serpent_ctx tweak_ctx;
struct serpent_ctx crypt_ctx;
@@ -689,29 +596,6 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ret;
}
-static struct crypto_alg blk_xts_alg = {
- .cra_name = "__xts-serpent-sse2",
- .cra_driver_name = "__driver-xts-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
- .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = xts_serpent_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-};
-
static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int key_len)
{
@@ -792,28 +676,133 @@ static void ablk_exit(struct crypto_tfm *tfm)
cryptd_free_ablkcipher(ctx->cryptd_tfm);
}
-static void ablk_init_common(struct crypto_tfm *tfm,
- struct cryptd_ablkcipher *cryptd_tfm)
+static int ablk_init(struct crypto_tfm *tfm)
{
struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct cryptd_ablkcipher *cryptd_tfm;
+ char drv_name[CRYPTO_MAX_ALG_NAME];
+
+ snprintf(drv_name, sizeof(drv_name), "__driver-%s",
+ crypto_tfm_alg_driver_name(tfm));
+
+ cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
crypto_ablkcipher_reqsize(&cryptd_tfm->base);
-}
-
-static int ablk_ecb_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
return 0;
}
-static struct crypto_alg ablk_ecb_alg = {
+static struct crypto_alg serpent_algs[10] = { {
+ .cra_name = "__ecb-serpent-sse2",
+ .cra_driver_name = "__driver-ecb-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__cbc-serpent-sse2",
+ .cra_driver_name = "__driver-cbc-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__ctr-serpent-sse2",
+ .cra_driver_name = "__driver-ctr-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct serpent_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "__lrw-serpent-sse2",
+ .cra_driver_name = "__driver-lrw-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list),
+ .cra_exit = lrw_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE +
+ SERPENT_BLOCK_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE +
+ SERPENT_BLOCK_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = lrw_serpent_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__xts-serpent-sse2",
+ .cra_driver_name = "__driver-xts-serpent-sse2",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = SERPENT_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct serpent_xts_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
+ .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = xts_serpent_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+ },
+}, {
.cra_name = "ecb(serpent)",
.cra_driver_name = "ecb-serpent-sse2",
.cra_priority = 400,
@@ -823,8 +812,8 @@ static struct crypto_alg ablk_ecb_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
- .cra_init = ablk_ecb_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -835,20 +824,7 @@ static struct crypto_alg ablk_ecb_alg = {
.decrypt = ablk_decrypt,
},
},
-};
-
-static int ablk_cbc_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
-}
-
-static struct crypto_alg ablk_cbc_alg = {
+}, {
.cra_name = "cbc(serpent)",
.cra_driver_name = "cbc-serpent-sse2",
.cra_priority = 400,
@@ -858,8 +834,8 @@ static struct crypto_alg ablk_cbc_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
- .cra_init = ablk_cbc_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -871,20 +847,7 @@ static struct crypto_alg ablk_cbc_alg = {
.decrypt = ablk_decrypt,
},
},
-};
-
-static int ablk_ctr_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
-}
-
-static struct crypto_alg ablk_ctr_alg = {
+}, {
.cra_name = "ctr(serpent)",
.cra_driver_name = "ctr-serpent-sse2",
.cra_priority = 400,
@@ -894,8 +857,8 @@ static struct crypto_alg ablk_ctr_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
- .cra_init = ablk_ctr_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -908,20 +871,7 @@ static struct crypto_alg ablk_ctr_alg = {
.geniv = "chainiv",
},
},
-};
-
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
-}
-
-static struct crypto_alg ablk_lrw_alg = {
+}, {
.cra_name = "lrw(serpent)",
.cra_driver_name = "lrw-serpent-sse2",
.cra_priority = 400,
@@ -931,8 +881,8 @@ static struct crypto_alg ablk_lrw_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
- .cra_init = ablk_lrw_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -946,20 +896,7 @@ static struct crypto_alg ablk_lrw_alg = {
.decrypt = ablk_decrypt,
},
},
-};
-
-static int ablk_xts_init(struct crypto_tfm *tfm)
-{
- struct cryptd_ablkcipher *cryptd_tfm;
-
- cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
- ablk_init_common(tfm, cryptd_tfm);
- return 0;
-}
-
-static struct crypto_alg ablk_xts_alg = {
+}, {
.cra_name = "xts(serpent)",
.cra_driver_name = "xts-serpent-sse2",
.cra_priority = 400,
@@ -969,8 +906,8 @@ static struct crypto_alg ablk_xts_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list),
- .cra_init = ablk_xts_init,
+ .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list),
+ .cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
@@ -982,84 +919,21 @@ static struct crypto_alg ablk_xts_alg = {
.decrypt = ablk_decrypt,
},
},
-};
+} };
static int __init serpent_sse2_init(void)
{
- int err;
-
if (!cpu_has_xmm2) {
printk(KERN_INFO "SSE2 instructions are not detected.\n");
return -ENODEV;
}
- err = crypto_register_alg(&blk_ecb_alg);
- if (err)
- goto blk_ecb_err;
- err = crypto_register_alg(&blk_cbc_alg);
- if (err)
- goto blk_cbc_err;
- err = crypto_register_alg(&blk_ctr_alg);
- if (err)
- goto blk_ctr_err;
- err = crypto_register_alg(&ablk_ecb_alg);
- if (err)
- goto ablk_ecb_err;
- err = crypto_register_alg(&ablk_cbc_alg);
- if (err)
- goto ablk_cbc_err;
- err = crypto_register_alg(&ablk_ctr_alg);
- if (err)
- goto ablk_ctr_err;
- err = crypto_register_alg(&blk_lrw_alg);
- if (err)
- goto blk_lrw_err;
- err = crypto_register_alg(&ablk_lrw_alg);
- if (err)
- goto ablk_lrw_err;
- err = crypto_register_alg(&blk_xts_alg);
- if (err)
- goto blk_xts_err;
- err = crypto_register_alg(&ablk_xts_alg);
- if (err)
- goto ablk_xts_err;
- return err;
-
- crypto_unregister_alg(&ablk_xts_alg);
-ablk_xts_err:
- crypto_unregister_alg(&blk_xts_alg);
-blk_xts_err:
- crypto_unregister_alg(&ablk_lrw_alg);
-ablk_lrw_err:
- crypto_unregister_alg(&blk_lrw_alg);
-blk_lrw_err:
- crypto_unregister_alg(&ablk_ctr_alg);
-ablk_ctr_err:
- crypto_unregister_alg(&ablk_cbc_alg);
-ablk_cbc_err:
- crypto_unregister_alg(&ablk_ecb_alg);
-ablk_ecb_err:
- crypto_unregister_alg(&blk_ctr_alg);
-blk_ctr_err:
- crypto_unregister_alg(&blk_cbc_alg);
-blk_cbc_err:
- crypto_unregister_alg(&blk_ecb_alg);
-blk_ecb_err:
- return err;
+ return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
}
static void __exit serpent_sse2_exit(void)
{
- crypto_unregister_alg(&ablk_xts_alg);
- crypto_unregister_alg(&blk_xts_alg);
- crypto_unregister_alg(&ablk_lrw_alg);
- crypto_unregister_alg(&blk_lrw_alg);
- crypto_unregister_alg(&ablk_ctr_alg);
- crypto_unregister_alg(&ablk_cbc_alg);
- crypto_unregister_alg(&ablk_ecb_alg);
- crypto_unregister_alg(&blk_ctr_alg);
- crypto_unregister_alg(&blk_cbc_alg);
- crypto_unregister_alg(&blk_ecb_alg);
+ crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
}
module_init(serpent_sse2_init);
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index dc6b3fb..359ae08 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -68,7 +68,7 @@ static struct crypto_alg alg = {
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 3,
+ .cra_alignmask = 0,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(alg.cra_list),
.cra_u = {
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 7fee8c1..922ab24 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -25,6 +25,7 @@
*
*/
+#include <asm/processor.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -122,28 +123,6 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return ecb_crypt(desc, &walk, twofish_dec_blk, twofish_dec_blk_3way);
}
-static struct crypto_alg blk_ecb_alg = {
- .cra_name = "ecb(twofish)",
- .cra_driver_name = "ecb-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-};
-
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
@@ -267,29 +246,6 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_cbc_alg = {
- .cra_name = "cbc(twofish)",
- .cra_driver_name = "cbc-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-};
-
static inline void u128_to_be128(be128 *dst, const u128 *src)
{
dst->a = cpu_to_be64(src->a);
@@ -411,29 +367,6 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return err;
}
-static struct crypto_alg blk_ctr_alg = {
- .cra_name = "ctr(twofish)",
- .cra_driver_name = "ctr-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_ctr_alg.cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-};
-
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
@@ -524,30 +457,6 @@ static void lrw_exit_tfm(struct crypto_tfm *tfm)
lrw_free_table(&ctx->lrw_table);
}
-static struct crypto_alg blk_lrw_alg = {
- .cra_name = "lrw(twofish)",
- .cra_driver_name = "lrw-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_lrw_alg.cra_list),
- .cra_exit = lrw_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = lrw_twofish_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-};
-
struct twofish_xts_ctx {
struct twofish_ctx tweak_ctx;
struct twofish_ctx crypt_ctx;
@@ -614,7 +523,91 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return xts_crypt(desc, dst, src, nbytes, &req);
}
-static struct crypto_alg blk_xts_alg = {
+static struct crypto_alg tf_algs[5] = { {
+ .cra_name = "ecb(twofish)",
+ .cra_driver_name = "ecb-twofish-3way",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(twofish)",
+ .cra_driver_name = "cbc-twofish-3way",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(twofish)",
+ .cra_driver_name = "ctr-twofish-3way",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct twofish_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list),
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "lrw(twofish)",
+ .cra_driver_name = "lrw-twofish-3way",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = TF_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list),
+ .cra_exit = lrw_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = lrw_twofish_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
.cra_name = "xts(twofish)",
.cra_driver_name = "xts-twofish-3way",
.cra_priority = 300,
@@ -624,7 +617,7 @@ static struct crypto_alg blk_xts_alg = {
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(blk_xts_alg.cra_list),
+ .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list),
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE * 2,
@@ -635,50 +628,62 @@ static struct crypto_alg blk_xts_alg = {
.decrypt = xts_decrypt,
},
},
-};
+} };
-int __init init(void)
+static bool is_blacklisted_cpu(void)
{
- int err;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+ return false;
+
+ if (boot_cpu_data.x86 == 0x06 &&
+ (boot_cpu_data.x86_model == 0x1c ||
+ boot_cpu_data.x86_model == 0x26 ||
+ boot_cpu_data.x86_model == 0x36)) {
+ /*
+ * On Atom, twofish-3way is slower than original assembler
+ * implementation. Twofish-3way trades off some performance in
+ * storing blocks in 64bit registers to allow three blocks to
+ * be processed parallel. Parallel operation then allows gaining
+ * more performance than was trade off, on out-of-order CPUs.
+ * However Atom does not benefit from this parallellism and
+ * should be blacklisted.
+ */
+ return true;
+ }
- err = crypto_register_alg(&blk_ecb_alg);
- if (err)
- goto ecb_err;
- err = crypto_register_alg(&blk_cbc_alg);
- if (err)
- goto cbc_err;
- err = crypto_register_alg(&blk_ctr_alg);
- if (err)
- goto ctr_err;
- err = crypto_register_alg(&blk_lrw_alg);
- if (err)
- goto blk_lrw_err;
- err = crypto_register_alg(&blk_xts_alg);
- if (err)
- goto blk_xts_err;
-
- return 0;
-
- crypto_unregister_alg(&blk_xts_alg);
-blk_xts_err:
- crypto_unregister_alg(&blk_lrw_alg);
-blk_lrw_err:
- crypto_unregister_alg(&blk_ctr_alg);
-ctr_err:
- crypto_unregister_alg(&blk_cbc_alg);
-cbc_err:
- crypto_unregister_alg(&blk_ecb_alg);
-ecb_err:
- return err;
+ if (boot_cpu_data.x86 == 0x0f) {
+ /*
+ * On Pentium 4, twofish-3way is slower than original assembler
+ * implementation because excessive uses of 64bit rotate and
+ * left-shifts (which are really slow on P4) needed to store and
+ * handle 128bit block in two 64bit registers.
+ */
+ return true;
+ }
+
+ return false;
+}
+
+static int force;
+module_param(force, int, 0);
+MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
+
+static int __init init(void)
+{
+ if (!force && is_blacklisted_cpu()) {
+ printk(KERN_INFO
+ "twofish-x86_64-3way: performance on this CPU "
+ "would be suboptimal: disabling "
+ "twofish-x86_64-3way.\n");
+ return -ENODEV;
+ }
+
+ return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
}
-void __exit fini(void)
+static void __exit fini(void)
{
- crypto_unregister_alg(&blk_xts_alg);
- crypto_unregister_alg(&blk_lrw_alg);
- crypto_unregister_alg(&blk_ctr_alg);
- crypto_unregister_alg(&blk_cbc_alg);
- crypto_unregister_alg(&blk_ecb_alg);
+ crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
}
module_init(init);
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index fd84387..d511d95 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -26,7 +26,6 @@
#include <linux/init.h>
#include <linux/jiffies.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/cacheflush.h>
@@ -315,8 +314,14 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
current->mm->free_area_cache = TASK_UNMAPPED_BASE;
current->mm->cached_hole_size = 0;
+ retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
+ if (retval < 0) {
+ /* Someone check-me: is this error path enough? */
+ send_sig(SIGKILL, current, 0);
+ return retval;
+ }
+
install_exec_creds(bprm);
- current->flags &= ~PF_FORKNOEXEC;
if (N_MAGIC(ex) == OMAGIC) {
unsigned long text_addr, map_size;
@@ -410,13 +415,6 @@ beyond_if:
set_brk(current->mm->start_brk, current->mm->brk);
- retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
- if (retval < 0) {
- /* Someone check-me: is this error path enough? */
- send_sig(SIGKILL, current, 0);
- return retval;
- }
-
current->mm->start_stack =
(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
/* start thread */
@@ -519,7 +517,8 @@ out:
static int __init init_aout_binfmt(void)
{
- return register_binfmt(&aout_format);
+ register_binfmt(&aout_format);
+ return 0;
}
static void __exit exit_aout_binfmt(void)
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 45b4fdd..a69245b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -22,6 +22,7 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/ptrace.h>
#include <asm/ia32_unistd.h>
#include <asm/user32.h>
@@ -37,7 +38,7 @@
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
{
int err = 0;
- bool ia32 = !is_ia32_task();
+ bool ia32 = is_ia32_task();
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
return -EFAULT;
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 37ad100..49331be 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -145,6 +145,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define ASM_OUTPUT2(a...) a
+/*
+ * use this macro if you need clobbers but no inputs in
+ * alternative_{input,io,call}()
+ */
+#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+
struct paravirt_patch_site;
#ifdef CONFIG_PARAVIRT
void apply_paravirt(struct paravirt_patch_site *start,
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 3ab9bdd..d854101 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -11,7 +11,6 @@
#include <linux/atomic.h>
#include <asm/fixmap.h>
#include <asm/mpspec.h>
-#include <asm/system.h>
#include <asm/msr.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -288,6 +287,7 @@ struct apic {
int (*probe)(void);
int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
+ int (*apic_id_valid)(int apicid);
int (*apic_id_registered)(void);
u32 irq_delivery_mode;
@@ -532,6 +532,11 @@ static inline unsigned int read_apic_id(void)
return apic->get_apic_id(reg);
}
+static inline int default_apic_id_valid(int apicid)
+{
+ return (apicid < 255);
+}
+
extern void default_setup_apic_routing(void);
extern struct apic apic_noop;
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index fa13f0e..1981199 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -14,13 +14,52 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) }
+#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
+#ifndef ATOMIC64_EXPORT
+#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
+#else
+#define ATOMIC64_DECL_ONE(sym) __ATOMIC64_DECL(sym); \
+ ATOMIC64_EXPORT(atomic64_##sym)
+#endif
+
#ifdef CONFIG_X86_CMPXCHG64
-#define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8"
+#define __alternative_atomic64(f, g, out, in...) \
+ asm volatile("call %P[func]" \
+ : out : [func] "i" (atomic64_##g##_cx8), ## in)
+
+#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
-#define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8)
+#define __alternative_atomic64(f, g, out, in...) \
+ alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
+ X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
+
+#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
+ ATOMIC64_DECL_ONE(sym##_386)
+
+ATOMIC64_DECL_ONE(add_386);
+ATOMIC64_DECL_ONE(sub_386);
+ATOMIC64_DECL_ONE(inc_386);
+ATOMIC64_DECL_ONE(dec_386);
#endif
-#define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f)
+#define alternative_atomic64(f, out, in...) \
+ __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
+
+ATOMIC64_DECL(read);
+ATOMIC64_DECL(set);
+ATOMIC64_DECL(xchg);
+ATOMIC64_DECL(add_return);
+ATOMIC64_DECL(sub_return);
+ATOMIC64_DECL(inc_return);
+ATOMIC64_DECL(dec_return);
+ATOMIC64_DECL(dec_if_positive);
+ATOMIC64_DECL(inc_not_zero);
+ATOMIC64_DECL(add_unless);
+
+#undef ATOMIC64_DECL
+#undef ATOMIC64_DECL_ONE
+#undef __ATOMIC64_DECL
+#undef ATOMIC64_EXPORT
/**
* atomic64_cmpxchg - cmpxchg atomic64 variable
@@ -50,11 +89,9 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
long long o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
- asm volatile(ATOMIC64_ALTERNATIVE(xchg)
- : "=A" (o), "+b" (low), "+c" (high)
- : "S" (v)
- : "memory"
- );
+ alternative_atomic64(xchg, "=&A" (o),
+ "S" (v), "b" (low), "c" (high)
+ : "memory");
return o;
}
@@ -69,11 +106,9 @@ static inline void atomic64_set(atomic64_t *v, long long i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
- asm volatile(ATOMIC64_ALTERNATIVE(set)
- : "+b" (low), "+c" (high)
- : "S" (v)
- : "eax", "edx", "memory"
- );
+ alternative_atomic64(set, /* no output */,
+ "S" (v), "b" (low), "c" (high)
+ : "eax", "edx", "memory");
}
/**
@@ -85,10 +120,7 @@ static inline void atomic64_set(atomic64_t *v, long long i)
static inline long long atomic64_read(const atomic64_t *v)
{
long long r;
- asm volatile(ATOMIC64_ALTERNATIVE(read)
- : "=A" (r), "+c" (v)
- : : "memory"
- );
+ alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
return r;
}
@@ -101,10 +133,9 @@ static inline long long atomic64_read(const atomic64_t *v)
*/
static inline long long atomic64_add_return(long long i, atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE(add_return)
- : "+A" (i), "+c" (v)
- : : "memory"
- );
+ alternative_atomic64(add_return,
+ ASM_OUTPUT2("+A" (i), "+c" (v)),
+ ASM_NO_INPUT_CLOBBER("memory"));
return i;
}
@@ -113,32 +144,25 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
*/
static inline long long atomic64_sub_return(long long i, atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE(sub_return)
- : "+A" (i), "+c" (v)
- : : "memory"
- );
+ alternative_atomic64(sub_return,
+ ASM_OUTPUT2("+A" (i), "+c" (v)),
+ ASM_NO_INPUT_CLOBBER("memory"));
return i;
}
static inline long long atomic64_inc_return(atomic64_t *v)
{
long long a;
- asm volatile(ATOMIC64_ALTERNATIVE(inc_return)
- : "=A" (a)
- : "S" (v)
- : "memory", "ecx"
- );
+ alternative_atomic64(inc_return, "=&A" (a),
+ "S" (v) : "memory", "ecx");
return a;
}
static inline long long atomic64_dec_return(atomic64_t *v)
{
long long a;
- asm volatile(ATOMIC64_ALTERNATIVE(dec_return)
- : "=A" (a)
- : "S" (v)
- : "memory", "ecx"
- );
+ alternative_atomic64(dec_return, "=&A" (a),
+ "S" (v) : "memory", "ecx");
return a;
}
@@ -151,10 +175,9 @@ static inline long long atomic64_dec_return(atomic64_t *v)
*/
static inline long long atomic64_add(long long i, atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return)
- : "+A" (i), "+c" (v)
- : : "memory"
- );
+ __alternative_atomic64(add, add_return,
+ ASM_OUTPUT2("+A" (i), "+c" (v)),
+ ASM_NO_INPUT_CLOBBER("memory"));
return i;
}
@@ -167,10 +190,9 @@ static inline long long atomic64_add(long long i, atomic64_t *v)
*/
static inline long long atomic64_sub(long long i, atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return)
- : "+A" (i), "+c" (v)
- : : "memory"
- );
+ __alternative_atomic64(sub, sub_return,
+ ASM_OUTPUT2("+A" (i), "+c" (v)),
+ ASM_NO_INPUT_CLOBBER("memory"));
return i;
}
@@ -196,10 +218,8 @@ static inline int atomic64_sub_and_test(long long i, atomic64_t *v)
*/
static inline void atomic64_inc(atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return)
- : : "S" (v)
- : "memory", "eax", "ecx", "edx"
- );
+ __alternative_atomic64(inc, inc_return, /* no output */,
+ "S" (v) : "memory", "eax", "ecx", "edx");
}
/**
@@ -210,10 +230,8 @@ static inline void atomic64_inc(atomic64_t *v)
*/
static inline void atomic64_dec(atomic64_t *v)
{
- asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return)
- : : "S" (v)
- : "memory", "eax", "ecx", "edx"
- );
+ __alternative_atomic64(dec, dec_return, /* no output */,
+ "S" (v) : "memory", "eax", "ecx", "edx");
}
/**
@@ -263,15 +281,15 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v)
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns non-zero if the add was done, zero otherwise.
*/
static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
{
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
- asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t"
- : "+A" (a), "+c" (v), "+S" (low), "+D" (high)
- : : "memory");
+ alternative_atomic64(add_unless,
+ ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
+ "S" (v) : "memory");
return (int)a;
}
@@ -279,26 +297,20 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
static inline int atomic64_inc_not_zero(atomic64_t *v)
{
int r;
- asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero)
- : "=a" (r)
- : "S" (v)
- : "ecx", "edx", "memory"
- );
+ alternative_atomic64(inc_not_zero, "=&a" (r),
+ "S" (v) : "ecx", "edx", "memory");
return r;
}
static inline long long atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
- asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive)
- : "=A" (r)
- : "S" (v)
- : "ecx", "memory"
- );
+ alternative_atomic64(dec_if_positive, "=&A" (r),
+ "S" (v) : "ecx", "memory");
return r;
}
-#undef ATOMIC64_ALTERNATIVE
-#undef ATOMIC64_ALTERNATIVE_
+#undef alternative_atomic64
+#undef __alternative_atomic64
#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/asm/auxvec.h
index 1316b4c..77203ac 100644
--- a/arch/x86/include/asm/auxvec.h
+++ b/arch/x86/include/asm/auxvec.h
@@ -9,4 +9,11 @@
#endif
#define AT_SYSINFO_EHDR 33
+/* entries in ARCH_DLINFO: */
+#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
+# define AT_VECTOR_SIZE_ARCH 2
+#else /* else it's non-compat x86-64 */
+# define AT_VECTOR_SIZE_ARCH 1
+#endif
+
#endif /* _ASM_X86_AUXVEC_H */
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
new file mode 100644
index 0000000..c6cd358
--- /dev/null
+++ b/arch/x86/include/asm/barrier.h
@@ -0,0 +1,116 @@
+#ifndef _ASM_X86_BARRIER_H
+#define _ASM_X86_BARRIER_H
+
+#include <asm/alternative.h>
+#include <asm/nops.h>
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_X86_32
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
+#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
+#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
+#else
+#define mb() asm volatile("mfence":::"memory")
+#define rmb() asm volatile("lfence":::"memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#endif
+
+/**
+ * read_barrier_depends - Flush all pending reads that subsequents reads
+ * depend on.
+ *
+ * No data-dependent reads from memory-like regions are ever reordered
+ * over this barrier. All reads preceding this primitive are guaranteed
+ * to access memory (but not necessarily other CPUs' caches) before any
+ * reads following this primitive that depend on the data return by
+ * any of the preceding reads. This primitive is much lighter weight than
+ * rmb() on most CPUs, and is never heavier weight than is
+ * rmb().
+ *
+ * These ordering constraints are respected by both the local CPU
+ * and the compiler.
+ *
+ * Ordering is not guaranteed by anything other than these primitives,
+ * not even by data dependencies. See the documentation for
+ * memory_barrier() for examples and URLs to more information.
+ *
+ * For example, the following code would force ordering (the initial
+ * value of "a" is zero, "b" is one, and "p" is "&a"):
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * b = 2;
+ * memory_barrier();
+ * p = &b; q = p;
+ * read_barrier_depends();
+ * d = *q;
+ * </programlisting>
+ *
+ * because the read of "*q" depends on the read of "p" and these
+ * two reads are separated by a read_barrier_depends(). However,
+ * the following code, with the same initial values for "a" and "b":
+ *
+ * <programlisting>
+ * CPU 0 CPU 1
+ *
+ * a = 2;
+ * memory_barrier();
+ * b = 3; y = b;
+ * read_barrier_depends();
+ * x = a;
+ * </programlisting>
+ *
+ * does not enforce ordering, since there is no data dependency between
+ * the read of "a" and the read of "b". Therefore, on some CPUs, such
+ * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
+ * in cases like this where there are no data dependencies.
+ **/
+
+#define read_barrier_depends() do { } while (0)
+
+#ifdef CONFIG_SMP
+#define smp_mb() mb()
+#ifdef CONFIG_X86_PPRO_FENCE
+# define smp_rmb() rmb()
+#else
+# define smp_rmb() barrier()
+#endif
+#ifdef CONFIG_X86_OOSTORE
+# define smp_wmb() wmb()
+#else
+# define smp_wmb() barrier()
+#endif
+#define smp_read_barrier_depends() read_barrier_depends()
+#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
+#else
+#define smp_mb() barrier()
+#define smp_rmb() barrier()
+#define smp_wmb() barrier()
+#define smp_read_barrier_depends() do { } while (0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+/*
+ * Stop RDTSC speculation. This is needed when you need to use RDTSC
+ * (or get_cycles or vread that possibly accesses the TSC) in a defined
+ * code region.
+ *
+ * (Could use an alternative three way for this if there was one.)
+ */
+static __always_inline void rdtsc_barrier(void)
+{
+ alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
+ alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+}
+
+#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index f654d1b..11e1152 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -36,4 +36,8 @@ do { \
#endif /* !CONFIG_BUG */
#include <asm-generic/bug.h>
+
+
+extern void show_regs_common(void);
+
#endif /* _ASM_X86_BUG_H */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 4e12668..9863ee3 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -3,6 +3,7 @@
/* Caches aren't brain-dead on the intel. */
#include <asm-generic/cacheflush.h>
+#include <asm/special_insns.h>
#ifdef CONFIG_X86_PAT
/*
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
new file mode 100644
index 0000000..ff501e5
--- /dev/null
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -0,0 +1,13 @@
+#ifndef _CPU_DEVICE_ID
+#define _CPU_DEVICE_ID 1
+
+/*
+ * Declare drivers belonging to specific x86 CPUs
+ * Similar in spirit to pci_device_id and related PCI functions
+ */
+
+#include <linux/mod_devicetable.h>
+
+extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
+
+#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8d67d42..340ee49 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -177,6 +177,7 @@
#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
+#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
@@ -199,10 +200,13 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_SMEP (9*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 (9*32+ 8) /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
+#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index b903d5e..2d91580 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -78,8 +78,75 @@
*/
#ifdef __KERNEL__
+#include <linux/bug.h>
+
DECLARE_PER_CPU(unsigned long, cpu_dr7);
+#ifndef CONFIG_PARAVIRT
+/*
+ * These special macros can be used to get or set a debugging register
+ */
+#define get_debugreg(var, register) \
+ (var) = native_get_debugreg(register)
+#define set_debugreg(value, register) \
+ native_set_debugreg(register, value)
+#endif
+
+static inline unsigned long native_get_debugreg(int regno)
+{
+ unsigned long val = 0; /* Damn you, gcc! */
+
+ switch (regno) {
+ case 0:
+ asm("mov %%db0, %0" :"=r" (val));
+ break;
+ case 1:
+ asm("mov %%db1, %0" :"=r" (val));
+ break;
+ case 2:
+ asm("mov %%db2, %0" :"=r" (val));
+ break;
+ case 3:
+ asm("mov %%db3, %0" :"=r" (val));
+ break;
+ case 6:
+ asm("mov %%db6, %0" :"=r" (val));
+ break;
+ case 7:
+ asm("mov %%db7, %0" :"=r" (val));
+ break;
+ default:
+ BUG();
+ }
+ return val;
+}
+
+static inline void native_set_debugreg(int regno, unsigned long value)
+{
+ switch (regno) {
+ case 0:
+ asm("mov %0, %%db0" ::"r" (value));
+ break;
+ case 1:
+ asm("mov %0, %%db1" ::"r" (value));
+ break;
+ case 2:
+ asm("mov %0, %%db2" ::"r" (value));
+ break;
+ case 3:
+ asm("mov %0, %%db3" ::"r" (value));
+ break;
+ case 6:
+ asm("mov %0, %%db6" ::"r" (value));
+ break;
+ case 7:
+ asm("mov %0, %%db7" ::"r" (value));
+ break;
+ default:
+ BUG();
+ }
+}
+
static inline void hw_breakpoint_disable(void)
{
/* Zero the control register for HW Breakpoint */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index ed3065f..4b4331d 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -59,7 +59,8 @@ extern int dma_supported(struct device *hwdev, u64 mask);
extern int dma_set_mask(struct device *dev, u64 mask);
extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag);
+ dma_addr_t *dma_addr, gfp_t flag,
+ struct dma_attrs *attrs);
static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
@@ -111,9 +112,11 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
return gfp;
}
+#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
+
static inline void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp)
+dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, struct dma_attrs *attrs)
{
struct dma_map_ops *ops = get_dma_ops(dev);
void *memory;
@@ -129,18 +132,21 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
if (!is_device_dma_capable(dev))
return NULL;
- if (!ops->alloc_coherent)
+ if (!ops->alloc)
return NULL;
- memory = ops->alloc_coherent(dev, size, dma_handle,
- dma_alloc_coherent_gfp_flags(dev, gfp));
+ memory = ops->alloc(dev, size, dma_handle,
+ dma_alloc_coherent_gfp_flags(dev, gfp), attrs);
debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
return memory;
}
-static inline void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t bus)
+#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t bus,
+ struct dma_attrs *attrs)
{
struct dma_map_ops *ops = get_dma_ops(dev);
@@ -150,8 +156,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size,
return;
debug_dma_free_coherent(dev, size, vaddr, bus);
- if (ops->free_coherent)
- ops->free_coherent(dev, size, vaddr, bus);
+ if (ops->free)
+ ops->free(dev, size, vaddr, bus, attrs);
}
#endif
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 844f735..c9dcc18 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -95,7 +95,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
extern int add_efi_memmap;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
-extern void efi_memblock_x86_reserve_range(void);
+extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1e40634..5939f44 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -84,7 +84,6 @@ extern unsigned int vdso_enabled;
(((x)->e_machine == EM_386) || ((x)->e_machine == EM_486))
#include <asm/processor.h>
-#include <asm/system.h>
#ifdef CONFIG_X86_32
#include <asm/desc.h>
diff --git a/arch/x86/include/asm/exec.h b/arch/x86/include/asm/exec.h
new file mode 100644
index 0000000..54c2e1d
--- /dev/null
+++ b/arch/x86/include/asm/exec.h
@@ -0,0 +1 @@
+/* define arch_align_stack() here */
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
new file mode 100644
index 0000000..4fa8815
--- /dev/null
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -0,0 +1,520 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _FPU_INTERNAL_H
+#define _FPU_INTERNAL_H
+
+#include <linux/kernel_stat.h>
+#include <linux/regset.h>
+#include <linux/slab.h>
+#include <asm/asm.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/uaccess.h>
+#include <asm/xsave.h>
+
+extern unsigned int sig_xstate_size;
+extern void fpu_init(void);
+
+DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
+
+extern user_regset_active_fn fpregs_active, xfpregs_active;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
+ xstateregs_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
+ xstateregs_set;
+
+
+/*
+ * xstateregs_active == fpregs_active. Please refer to the comment
+ * at the definition of fpregs_active.
+ */
+#define xstateregs_active fpregs_active
+
+extern struct _fpx_sw_bytes fx_sw_reserved;
+#ifdef CONFIG_IA32_EMULATION
+extern unsigned int sig_xstate_ia32_size;
+extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
+struct _fpstate_ia32;
+struct _xstate_ia32;
+extern int save_i387_xstate_ia32(void __user *buf);
+extern int restore_i387_xstate_ia32(void __user *buf);
+#endif
+
+#ifdef CONFIG_MATH_EMULATION
+extern void finit_soft_fpu(struct i387_soft_struct *soft);
+#else
+static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
+#endif
+
+#define X87_FSW_ES (1 << 7) /* Exception Summary */
+
+static __always_inline __pure bool use_xsaveopt(void)
+{
+ return static_cpu_has(X86_FEATURE_XSAVEOPT);
+}
+
+static __always_inline __pure bool use_xsave(void)
+{
+ return static_cpu_has(X86_FEATURE_XSAVE);
+}
+
+static __always_inline __pure bool use_fxsr(void)
+{
+ return static_cpu_has(X86_FEATURE_FXSR);
+}
+
+extern void __sanitize_i387_state(struct task_struct *);
+
+static inline void sanitize_i387_state(struct task_struct *tsk)
+{
+ if (!use_xsaveopt())
+ return;
+ __sanitize_i387_state(tsk);
+}
+
+#ifdef CONFIG_X86_64
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
+{
+ int err;
+
+ /* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+ asm volatile("1: fxrstorq %[fx]\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [err] "=r" (err)
+ : [fx] "m" (*fx), "0" (0));
+#else
+ asm volatile("1: rex64/fxrstor (%[fx])\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [err] "=r" (err)
+ : [fx] "R" (fx), "m" (*fx), "0" (0));
+#endif
+ return err;
+}
+
+static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
+{
+ int err;
+
+ /*
+ * Clear the bytes not touched by the fxsave and reserved
+ * for the SW usage.
+ */
+ err = __clear_user(&fx->sw_reserved,
+ sizeof(struct _fpx_sw_bytes));
+ if (unlikely(err))
+ return -EFAULT;
+
+ /* See comment in fxsave() below. */
+#ifdef CONFIG_AS_FXSAVEQ
+ asm volatile("1: fxsaveq %[fx]\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [err] "=r" (err), [fx] "=m" (*fx)
+ : "0" (0));
+#else
+ asm volatile("1: rex64/fxsave (%[fx])\n\t"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl $-1,%[err]\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : [err] "=r" (err), "=m" (*fx)
+ : [fx] "R" (fx), "0" (0));
+#endif
+ if (unlikely(err) &&
+ __clear_user(fx, sizeof(struct i387_fxsave_struct)))
+ err = -EFAULT;
+ /* No need to clear here because the caller clears USED_MATH */
+ return err;
+}
+
+static inline void fpu_fxsave(struct fpu *fpu)
+{
+ /* Using "rex64; fxsave %0" is broken because, if the memory operand
+ uses any extended registers for addressing, a second REX prefix
+ will be generated (to the assembler, rex64 followed by semicolon
+ is a separate instruction), and hence the 64-bitness is lost. */
+
+#ifdef CONFIG_AS_FXSAVEQ
+ /* Using "fxsaveq %0" would be the ideal choice, but is only supported
+ starting with gas 2.16. */
+ __asm__ __volatile__("fxsaveq %0"
+ : "=m" (fpu->state->fxsave));
+#else
+ /* Using, as a workaround, the properly prefixed form below isn't
+ accepted by any binutils version so far released, complaining that
+ the same type of prefix is used twice if an extended register is
+ needed for addressing (fix submitted to mainline 2005-11-21).
+ asm volatile("rex64/fxsave %0"
+ : "=m" (fpu->state->fxsave));
+ This, however, we can work around by forcing the compiler to select
+ an addressing mode that doesn't require extended registers. */
+ asm volatile("rex64/fxsave (%[fx])"
+ : "=m" (fpu->state->fxsave)
+ : [fx] "R" (&fpu->state->fxsave));
+#endif
+}
+
+#else /* CONFIG_X86_32 */
+
+/* perform fxrstor iff the processor has extended states, otherwise frstor */
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
+{
+ /*
+ * The "nop" is needed to make the instructions the same
+ * length.
+ */
+ alternative_input(
+ "nop ; frstor %1",
+ "fxrstor %1",
+ X86_FEATURE_FXSR,
+ "m" (*fx));
+
+ return 0;
+}
+
+static inline void fpu_fxsave(struct fpu *fpu)
+{
+ asm volatile("fxsave %[fx]"
+ : [fx] "=m" (fpu->state->fxsave));
+}
+
+#endif /* CONFIG_X86_64 */
+
+/*
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact.
+ */
+static inline int fpu_save_init(struct fpu *fpu)
+{
+ if (use_xsave()) {
+ fpu_xsave(fpu);
+
+ /*
+ * xsave header may indicate the init state of the FP.
+ */
+ if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
+ return 1;
+ } else if (use_fxsr()) {
+ fpu_fxsave(fpu);
+ } else {
+ asm volatile("fnsave %[fx]; fwait"
+ : [fx] "=m" (fpu->state->fsave));
+ return 0;
+ }
+
+ /*
+ * If exceptions are pending, we need to clear them so
+ * that we don't randomly get exceptions later.
+ *
+ * FIXME! Is this perhaps only true for the old-style
+ * irq13 case? Maybe we could leave the x87 state
+ * intact otherwise?
+ */
+ if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
+ asm volatile("fnclex");
+ return 0;
+ }
+ return 1;
+}
+
+static inline int __save_init_fpu(struct task_struct *tsk)
+{
+ return fpu_save_init(&tsk->thread.fpu);
+}
+
+static inline int fpu_fxrstor_checking(struct fpu *fpu)
+{
+ return fxrstor_checking(&fpu->state->fxsave);
+}
+
+static inline int fpu_restore_checking(struct fpu *fpu)
+{
+ if (use_xsave())
+ return fpu_xrstor_checking(fpu);
+ else
+ return fpu_fxrstor_checking(fpu);
+}
+
+static inline int restore_fpu_checking(struct task_struct *tsk)
+{
+ /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
+ is pending. Clear the x87 state here by setting it to fixed
+ values. "m" is a random variable that should be in L1 */
+ alternative_input(
+ ASM_NOP8 ASM_NOP2,
+ "emms\n\t" /* clear stack tags */
+ "fildl %P[addr]", /* set F?P to defined value */
+ X86_FEATURE_FXSAVE_LEAK,
+ [addr] "m" (tsk->thread.fpu.has_fpu));
+
+ return fpu_restore_checking(&tsk->thread.fpu);
+}
+
+/*
+ * Software FPU state helpers. Careful: these need to
+ * be preemption protection *and* they need to be
+ * properly paired with the CR0.TS changes!
+ */
+static inline int __thread_has_fpu(struct task_struct *tsk)
+{
+ return tsk->thread.fpu.has_fpu;
+}
+
+/* Must be paired with an 'stts' after! */
+static inline void __thread_clear_has_fpu(struct task_struct *tsk)
+{
+ tsk->thread.fpu.has_fpu = 0;
+ percpu_write(fpu_owner_task, NULL);
+}
+
+/* Must be paired with a 'clts' before! */
+static inline void __thread_set_has_fpu(struct task_struct *tsk)
+{
+ tsk->thread.fpu.has_fpu = 1;
+ percpu_write(fpu_owner_task, tsk);
+}
+
+/*
+ * Encapsulate the CR0.TS handling together with the
+ * software flag.
+ *
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+static inline void __thread_fpu_end(struct task_struct *tsk)
+{
+ __thread_clear_has_fpu(tsk);
+ stts();
+}
+
+static inline void __thread_fpu_begin(struct task_struct *tsk)
+{
+ clts();
+ __thread_set_has_fpu(tsk);
+}
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ * - switch_fpu_prepare() saves the old state and
+ * sets the new state of the CR0.TS bit. This is
+ * done within the context of the old process.
+ *
+ * - switch_fpu_finish() restores the new state as
+ * necessary.
+ */
+typedef struct { int preload; } fpu_switch_t;
+
+/*
+ * FIXME! We could do a totally lazy restore, but we need to
+ * add a per-cpu "this was the task that last touched the FPU
+ * on this CPU" variable, and the task needs to have a "I last
+ * touched the FPU on this CPU" and check them.
+ *
+ * We don't do that yet, so "fpu_lazy_restore()" always returns
+ * false, but some day..
+ */
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+ return new == percpu_read_stable(fpu_owner_task) &&
+ cpu == new->thread.fpu.last_cpu;
+}
+
+static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
+{
+ fpu_switch_t fpu;
+
+ fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+ if (__thread_has_fpu(old)) {
+ if (!__save_init_fpu(old))
+ cpu = ~0;
+ old->thread.fpu.last_cpu = cpu;
+ old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
+
+ /* Don't change CR0.TS if we just switch! */
+ if (fpu.preload) {
+ new->fpu_counter++;
+ __thread_set_has_fpu(new);
+ prefetch(new->thread.fpu.state);
+ } else
+ stts();
+ } else {
+ old->fpu_counter = 0;
+ old->thread.fpu.last_cpu = ~0;
+ if (fpu.preload) {
+ new->fpu_counter++;
+ if (fpu_lazy_restore(new, cpu))
+ fpu.preload = 0;
+ else
+ prefetch(new->thread.fpu.state);
+ __thread_fpu_begin(new);
+ }
+ }
+ return fpu;
+}
+
+/*
+ * By the time this gets called, we've already cleared CR0.TS and
+ * given the process the FPU if we are going to preload the FPU
+ * state - all we need to do is to conditionally restore the register
+ * state itself.
+ */
+static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
+{
+ if (fpu.preload) {
+ if (unlikely(restore_fpu_checking(new)))
+ __thread_fpu_end(new);
+ }
+}
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387_xstate(void __user *buf);
+extern int restore_i387_xstate(void __user *buf);
+
+static inline void __clear_fpu(struct task_struct *tsk)
+{
+ if (__thread_has_fpu(tsk)) {
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ __thread_fpu_end(tsk);
+ }
+}
+
+/*
+ * The actual user_fpu_begin/end() functions
+ * need to be preemption-safe.
+ *
+ * NOTE! user_fpu_end() must be used only after you
+ * have saved the FP state, and user_fpu_begin() must
+ * be used only immediately before restoring it.
+ * These functions do not do any save/restore on
+ * their own.
+ */
+static inline void user_fpu_end(void)
+{
+ preempt_disable();
+ __thread_fpu_end(current);
+ preempt_enable();
+}
+
+static inline void user_fpu_begin(void)
+{
+ preempt_disable();
+ if (!user_has_fpu())
+ __thread_fpu_begin(current);
+ preempt_enable();
+}
+
+/*
+ * These disable preemption on their own and are safe
+ */
+static inline void save_init_fpu(struct task_struct *tsk)
+{
+ WARN_ON_ONCE(!__thread_has_fpu(tsk));
+ preempt_disable();
+ __save_init_fpu(tsk);
+ __thread_fpu_end(tsk);
+ preempt_enable();
+}
+
+static inline void clear_fpu(struct task_struct *tsk)
+{
+ preempt_disable();
+ __clear_fpu(tsk);
+ preempt_enable();
+}
+
+/*
+ * i387 state interaction
+ */
+static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
+{
+ if (cpu_has_fxsr) {
+ return tsk->thread.fpu.state->fxsave.cwd;
+ } else {
+ return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
+ }
+}
+
+static inline unsigned short get_fpu_swd(struct task_struct *tsk)
+{
+ if (cpu_has_fxsr) {
+ return tsk->thread.fpu.state->fxsave.swd;
+ } else {
+ return (unsigned short)tsk->thread.fpu.state->fsave.swd;
+ }
+}
+
+static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
+{
+ if (cpu_has_xmm) {
+ return tsk->thread.fpu.state->fxsave.mxcsr;
+ } else {
+ return MXCSR_DEFAULT;
+ }
+}
+
+static bool fpu_allocated(struct fpu *fpu)
+{
+ return fpu->state != NULL;
+}
+
+static inline int fpu_alloc(struct fpu *fpu)
+{
+ if (fpu_allocated(fpu))
+ return 0;
+ fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
+ if (!fpu->state)
+ return -ENOMEM;
+ WARN_ON((unsigned long)fpu->state & 15);
+ return 0;
+}
+
+static inline void fpu_free(struct fpu *fpu)
+{
+ if (fpu->state) {
+ kmem_cache_free(task_xstate_cachep, fpu->state);
+ fpu->state = NULL;
+ }
+}
+
+static inline void fpu_copy(struct fpu *dst, struct fpu *src)
+{
+ memcpy(dst->state, src->state, xstate_size);
+}
+
+extern void fpu_finit(struct fpu *fpu);
+
+#endif
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index d09bb03..71ecbcb 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -9,7 +9,6 @@
#include <asm/asm.h>
#include <asm/errno.h>
#include <asm/processor.h>
-#include <asm/system.h>
#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
asm volatile("1:\t" insn "\n" \
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index da0b3ca..382f75d 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -7,7 +7,6 @@
typedef struct {
unsigned int __softirq_pending;
unsigned int __nmi_count; /* arch dependent */
- unsigned int irq0_irqs;
#ifdef CONFIG_X86_LOCAL_APIC
unsigned int apic_timer_irqs; /* arch dependent */
unsigned int irq_spurious_count;
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 3bd0402..302a323 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -61,7 +61,7 @@ void *kmap(struct page *page);
void kunmap(struct page *page);
void *kmap_atomic_prot(struct page *page, pgprot_t prot);
-void *__kmap_atomic(struct page *page);
+void *kmap_atomic(struct page *page);
void __kunmap_atomic(void *kvaddr);
void *kmap_atomic_pfn(unsigned long pfn);
void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 2479049..257d9cc 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -13,476 +13,18 @@
#ifndef __ASSEMBLY__
#include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/regset.h>
#include <linux/hardirq.h>
-#include <linux/slab.h>
-#include <asm/asm.h>
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/uaccess.h>
-#include <asm/xsave.h>
-extern unsigned int sig_xstate_size;
-extern void fpu_init(void);
-extern void mxcsr_feature_mask_init(void);
+struct pt_regs;
+struct user_i387_struct;
+
extern int init_fpu(struct task_struct *child);
-extern void math_state_restore(void);
extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
+extern void math_state_restore(void);
-DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
-
-extern user_regset_active_fn fpregs_active, xfpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
- xstateregs_get;
-extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
- xstateregs_set;
-
-/*
- * xstateregs_active == fpregs_active. Please refer to the comment
- * at the definition of fpregs_active.
- */
-#define xstateregs_active fpregs_active
-
-extern struct _fpx_sw_bytes fx_sw_reserved;
-#ifdef CONFIG_IA32_EMULATION
-extern unsigned int sig_xstate_ia32_size;
-extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
-struct _fpstate_ia32;
-struct _xstate_ia32;
-extern int save_i387_xstate_ia32(void __user *buf);
-extern int restore_i387_xstate_ia32(void __user *buf);
-#endif
-
-#ifdef CONFIG_MATH_EMULATION
-extern void finit_soft_fpu(struct i387_soft_struct *soft);
-#else
-static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
-#endif
-
-#define X87_FSW_ES (1 << 7) /* Exception Summary */
-
-static __always_inline __pure bool use_xsaveopt(void)
-{
- return static_cpu_has(X86_FEATURE_XSAVEOPT);
-}
-
-static __always_inline __pure bool use_xsave(void)
-{
- return static_cpu_has(X86_FEATURE_XSAVE);
-}
-
-static __always_inline __pure bool use_fxsr(void)
-{
- return static_cpu_has(X86_FEATURE_FXSR);
-}
-
-extern void __sanitize_i387_state(struct task_struct *);
-
-static inline void sanitize_i387_state(struct task_struct *tsk)
-{
- if (!use_xsaveopt())
- return;
- __sanitize_i387_state(tsk);
-}
-
-#ifdef CONFIG_X86_64
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
-{
- int err;
-
- /* See comment in fxsave() below. */
-#ifdef CONFIG_AS_FXSAVEQ
- asm volatile("1: fxrstorq %[fx]\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err)
- : [fx] "m" (*fx), "0" (0));
-#else
- asm volatile("1: rex64/fxrstor (%[fx])\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err)
- : [fx] "R" (fx), "m" (*fx), "0" (0));
-#endif
- return err;
-}
-
-static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
-{
- int err;
-
- /*
- * Clear the bytes not touched by the fxsave and reserved
- * for the SW usage.
- */
- err = __clear_user(&fx->sw_reserved,
- sizeof(struct _fpx_sw_bytes));
- if (unlikely(err))
- return -EFAULT;
-
- /* See comment in fxsave() below. */
-#ifdef CONFIG_AS_FXSAVEQ
- asm volatile("1: fxsaveq %[fx]\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err), [fx] "=m" (*fx)
- : "0" (0));
-#else
- asm volatile("1: rex64/fxsave (%[fx])\n\t"
- "2:\n"
- ".section .fixup,\"ax\"\n"
- "3: movl $-1,%[err]\n"
- " jmp 2b\n"
- ".previous\n"
- _ASM_EXTABLE(1b, 3b)
- : [err] "=r" (err), "=m" (*fx)
- : [fx] "R" (fx), "0" (0));
-#endif
- if (unlikely(err) &&
- __clear_user(fx, sizeof(struct i387_fxsave_struct)))
- err = -EFAULT;
- /* No need to clear here because the caller clears USED_MATH */
- return err;
-}
-
-static inline void fpu_fxsave(struct fpu *fpu)
-{
- /* Using "rex64; fxsave %0" is broken because, if the memory operand
- uses any extended registers for addressing, a second REX prefix
- will be generated (to the assembler, rex64 followed by semicolon
- is a separate instruction), and hence the 64-bitness is lost. */
-
-#ifdef CONFIG_AS_FXSAVEQ
- /* Using "fxsaveq %0" would be the ideal choice, but is only supported
- starting with gas 2.16. */
- __asm__ __volatile__("fxsaveq %0"
- : "=m" (fpu->state->fxsave));
-#else
- /* Using, as a workaround, the properly prefixed form below isn't
- accepted by any binutils version so far released, complaining that
- the same type of prefix is used twice if an extended register is
- needed for addressing (fix submitted to mainline 2005-11-21).
- asm volatile("rex64/fxsave %0"
- : "=m" (fpu->state->fxsave));
- This, however, we can work around by forcing the compiler to select
- an addressing mode that doesn't require extended registers. */
- asm volatile("rex64/fxsave (%[fx])"
- : "=m" (fpu->state->fxsave)
- : [fx] "R" (&fpu->state->fxsave));
-#endif
-}
-
-#else /* CONFIG_X86_32 */
-
-/* perform fxrstor iff the processor has extended states, otherwise frstor */
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
-{
- /*
- * The "nop" is needed to make the instructions the same
- * length.
- */
- alternative_input(
- "nop ; frstor %1",
- "fxrstor %1",
- X86_FEATURE_FXSR,
- "m" (*fx));
-
- return 0;
-}
-
-static inline void fpu_fxsave(struct fpu *fpu)
-{
- asm volatile("fxsave %[fx]"
- : [fx] "=m" (fpu->state->fxsave));
-}
-
-#endif /* CONFIG_X86_64 */
-
-/*
- * These must be called with preempt disabled. Returns
- * 'true' if the FPU state is still intact.
- */
-static inline int fpu_save_init(struct fpu *fpu)
-{
- if (use_xsave()) {
- fpu_xsave(fpu);
-
- /*
- * xsave header may indicate the init state of the FP.
- */
- if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
- return 1;
- } else if (use_fxsr()) {
- fpu_fxsave(fpu);
- } else {
- asm volatile("fnsave %[fx]; fwait"
- : [fx] "=m" (fpu->state->fsave));
- return 0;
- }
-
- /*
- * If exceptions are pending, we need to clear them so
- * that we don't randomly get exceptions later.
- *
- * FIXME! Is this perhaps only true for the old-style
- * irq13 case? Maybe we could leave the x87 state
- * intact otherwise?
- */
- if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
- asm volatile("fnclex");
- return 0;
- }
- return 1;
-}
-
-static inline int __save_init_fpu(struct task_struct *tsk)
-{
- return fpu_save_init(&tsk->thread.fpu);
-}
-
-static inline int fpu_fxrstor_checking(struct fpu *fpu)
-{
- return fxrstor_checking(&fpu->state->fxsave);
-}
-
-static inline int fpu_restore_checking(struct fpu *fpu)
-{
- if (use_xsave())
- return fpu_xrstor_checking(fpu);
- else
- return fpu_fxrstor_checking(fpu);
-}
-
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
- /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
- is pending. Clear the x87 state here by setting it to fixed
- values. "m" is a random variable that should be in L1 */
- alternative_input(
- ASM_NOP8 ASM_NOP2,
- "emms\n\t" /* clear stack tags */
- "fildl %P[addr]", /* set F?P to defined value */
- X86_FEATURE_FXSAVE_LEAK,
- [addr] "m" (tsk->thread.fpu.has_fpu));
-
- return fpu_restore_checking(&tsk->thread.fpu);
-}
-
-/*
- * Software FPU state helpers. Careful: these need to
- * be preemption protection *and* they need to be
- * properly paired with the CR0.TS changes!
- */
-static inline int __thread_has_fpu(struct task_struct *tsk)
-{
- return tsk->thread.fpu.has_fpu;
-}
-
-/* Must be paired with an 'stts' after! */
-static inline void __thread_clear_has_fpu(struct task_struct *tsk)
-{
- tsk->thread.fpu.has_fpu = 0;
- percpu_write(fpu_owner_task, NULL);
-}
-
-/* Must be paired with a 'clts' before! */
-static inline void __thread_set_has_fpu(struct task_struct *tsk)
-{
- tsk->thread.fpu.has_fpu = 1;
- percpu_write(fpu_owner_task, tsk);
-}
-
-/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void __thread_fpu_end(struct task_struct *tsk)
-{
- __thread_clear_has_fpu(tsk);
- stts();
-}
-
-static inline void __thread_fpu_begin(struct task_struct *tsk)
-{
- clts();
- __thread_set_has_fpu(tsk);
-}
-
-/*
- * FPU state switching for scheduling.
- *
- * This is a two-stage process:
- *
- * - switch_fpu_prepare() saves the old state and
- * sets the new state of the CR0.TS bit. This is
- * done within the context of the old process.
- *
- * - switch_fpu_finish() restores the new state as
- * necessary.
- */
-typedef struct { int preload; } fpu_switch_t;
-
-/*
- * FIXME! We could do a totally lazy restore, but we need to
- * add a per-cpu "this was the task that last touched the FPU
- * on this CPU" variable, and the task needs to have a "I last
- * touched the FPU on this CPU" and check them.
- *
- * We don't do that yet, so "fpu_lazy_restore()" always returns
- * false, but some day..
- */
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
- return new == percpu_read_stable(fpu_owner_task) &&
- cpu == new->thread.fpu.last_cpu;
-}
-
-static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
-{
- fpu_switch_t fpu;
-
- fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
- if (__thread_has_fpu(old)) {
- if (!__save_init_fpu(old))
- cpu = ~0;
- old->thread.fpu.last_cpu = cpu;
- old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
-
- /* Don't change CR0.TS if we just switch! */
- if (fpu.preload) {
- new->fpu_counter++;
- __thread_set_has_fpu(new);
- prefetch(new->thread.fpu.state);
- } else
- stts();
- } else {
- old->fpu_counter = 0;
- old->thread.fpu.last_cpu = ~0;
- if (fpu.preload) {
- new->fpu_counter++;
- if (fpu_lazy_restore(new, cpu))
- fpu.preload = 0;
- else
- prefetch(new->thread.fpu.state);
- __thread_fpu_begin(new);
- }
- }
- return fpu;
-}
-
-/*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
- */
-static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
-{
- if (fpu.preload) {
- if (unlikely(restore_fpu_checking(new)))
- __thread_fpu_end(new);
- }
-}
-
-/*
- * Signal frame handlers...
- */
-extern int save_i387_xstate(void __user *buf);
-extern int restore_i387_xstate(void __user *buf);
-
-static inline void __clear_fpu(struct task_struct *tsk)
-{
- if (__thread_has_fpu(tsk)) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- __thread_fpu_end(tsk);
- }
-}
-
-/*
- * Were we in an interrupt that interrupted kernel mode?
- *
- * We can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: the thread must not have fpu (so
- * that we don't try to save the FPU state), and TS must
- * be set (so that the clts/stts pair does nothing that is
- * visible in the interrupted kernel thread).
- */
-static inline bool interrupted_kernel_fpu_idle(void)
-{
- return !__thread_has_fpu(current) &&
- (read_cr0() & X86_CR0_TS);
-}
-
-/*
- * Were we in user mode (or vm86 mode) when we were
- * interrupted?
- *
- * Doing kernel_fpu_begin/end() is ok if we are running
- * in an interrupt context from user mode - we'll just
- * save the FPU state as required.
- */
-static inline bool interrupted_user_mode(void)
-{
- struct pt_regs *regs = get_irq_regs();
- return regs && user_mode_vm(regs);
-}
-
-/*
- * Can we use the FPU in kernel mode with the
- * whole "kernel_fpu_begin/end()" sequence?
- *
- * It's always ok in process context (ie "not interrupt")
- * but it is sometimes ok even from an irq.
- */
-static inline bool irq_fpu_usable(void)
-{
- return !in_interrupt() ||
- interrupted_user_mode() ||
- interrupted_kernel_fpu_idle();
-}
-
-static inline void kernel_fpu_begin(void)
-{
- struct task_struct *me = current;
-
- WARN_ON_ONCE(!irq_fpu_usable());
- preempt_disable();
- if (__thread_has_fpu(me)) {
- __save_init_fpu(me);
- __thread_clear_has_fpu(me);
- /* We do 'stts()' in kernel_fpu_end() */
- } else {
- percpu_write(fpu_owner_task, NULL);
- clts();
- }
-}
-
-static inline void kernel_fpu_end(void)
-{
- stts();
- preempt_enable();
-}
+extern bool irq_fpu_usable(void);
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
/*
* Some instructions like VIA's padlock instructions generate a spurious
@@ -524,126 +66,13 @@ static inline void irq_ts_restore(int TS_state)
* we can just assume we have FPU access - typically
* to save the FP state - we'll just take a #NM
* fault and get the FPU access back.
- *
- * The actual user_fpu_begin/end() functions
- * need to be preemption-safe, though.
- *
- * NOTE! user_fpu_end() must be used only after you
- * have saved the FP state, and user_fpu_begin() must
- * be used only immediately before restoring it.
- * These functions do not do any save/restore on
- * their own.
*/
static inline int user_has_fpu(void)
{
- return __thread_has_fpu(current);
-}
-
-static inline void user_fpu_end(void)
-{
- preempt_disable();
- __thread_fpu_end(current);
- preempt_enable();
-}
-
-static inline void user_fpu_begin(void)
-{
- preempt_disable();
- if (!user_has_fpu())
- __thread_fpu_begin(current);
- preempt_enable();
-}
-
-/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
- WARN_ON_ONCE(!__thread_has_fpu(tsk));
- preempt_disable();
- __save_init_fpu(tsk);
- __thread_fpu_end(tsk);
- preempt_enable();
-}
-
-static inline void unlazy_fpu(struct task_struct *tsk)
-{
- preempt_disable();
- if (__thread_has_fpu(tsk)) {
- __save_init_fpu(tsk);
- __thread_fpu_end(tsk);
- } else
- tsk->fpu_counter = 0;
- preempt_enable();
-}
-
-static inline void clear_fpu(struct task_struct *tsk)
-{
- preempt_disable();
- __clear_fpu(tsk);
- preempt_enable();
-}
-
-/*
- * i387 state interaction
- */
-static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- return tsk->thread.fpu.state->fxsave.cwd;
- } else {
- return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
- }
-}
-
-static inline unsigned short get_fpu_swd(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- return tsk->thread.fpu.state->fxsave.swd;
- } else {
- return (unsigned short)tsk->thread.fpu.state->fsave.swd;
- }
-}
-
-static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
-{
- if (cpu_has_xmm) {
- return tsk->thread.fpu.state->fxsave.mxcsr;
- } else {
- return MXCSR_DEFAULT;
- }
-}
-
-static bool fpu_allocated(struct fpu *fpu)
-{
- return fpu->state != NULL;
-}
-
-static inline int fpu_alloc(struct fpu *fpu)
-{
- if (fpu_allocated(fpu))
- return 0;
- fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
- if (!fpu->state)
- return -ENOMEM;
- WARN_ON((unsigned long)fpu->state & 15);
- return 0;
-}
-
-static inline void fpu_free(struct fpu *fpu)
-{
- if (fpu->state) {
- kmem_cache_free(task_xstate_cachep, fpu->state);
- fpu->state = NULL;
- }
-}
-
-static inline void fpu_copy(struct fpu *dst, struct fpu *src)
-{
- memcpy(dst->state, src->state, xstate_size);
+ return current->thread.fpu.has_fpu;
}
-extern void fpu_finit(struct fpu *fpu);
+extern void unlazy_fpu(struct task_struct *tsk);
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 7d0c185..ee52760 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -130,8 +130,8 @@ typedef struct compat_siginfo {
unsigned int _pid; /* which child */
unsigned int _uid; /* sender's uid */
int _status; /* exit code */
- s64 _utime;
- s64 _stime;
+ compat_s64 _utime;
+ compat_s64 _stime;
} _sigchld_x32;
/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index f49253d7..c5d1785 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -14,6 +14,7 @@ void exit_idle(void);
#else /* !CONFIG_X86_64 */
static inline void enter_idle(void) { }
static inline void exit_idle(void) { }
+static inline void __exit_idle(void) { }
#endif /* CONFIG_X86_64 */
void amd_e400_remove_cpu(int cpu);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 690d1cc..2c4943d 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -21,6 +21,15 @@
#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
#define IO_APIC_REDIR_MASKED (1 << 16)
+struct io_apic_ops {
+ void (*init) (void);
+ unsigned int (*read) (unsigned int apic, unsigned int reg);
+ void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
+};
+
+void __init set_io_apic_ops(const struct io_apic_ops *);
+
/*
* The structure of the IO-APIC:
*/
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h
deleted file mode 100644
index 423bbbd..0000000
--- a/arch/x86/include/asm/irq_controller.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef __IRQ_CONTROLLER__
-#define __IRQ_CONTROLLER__
-
-struct irq_domain {
- int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize,
- u32 *out_hwirq, u32 *out_type);
- void *priv;
- struct device_node *controller;
- struct list_head l;
-};
-
-#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a32b18c..3a16c14 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -9,12 +9,12 @@
#define JUMP_LABEL_NOP_SIZE 5
-#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
+#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t"
-static __always_inline bool arch_static_branch(struct jump_label_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key)
{
asm goto("1:"
- JUMP_LABEL_INITIAL_NOP
+ STATIC_KEY_INITIAL_NOP
".pushsection __jump_table, \"aw\" \n\t"
_ASM_ALIGN "\n\t"
_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h
index 77e95f5..332f98c 100644
--- a/arch/x86/include/asm/kgdb.h
+++ b/arch/x86/include/asm/kgdb.h
@@ -64,11 +64,15 @@ enum regnames {
GDB_PS, /* 17 */
GDB_CS, /* 18 */
GDB_SS, /* 19 */
+ GDB_DS, /* 20 */
+ GDB_ES, /* 21 */
+ GDB_FS, /* 22 */
+ GDB_GS, /* 23 */
};
#define GDB_ORIG_AX 57
-#define DBG_MAX_REG_NUM 20
-/* 17 64 bit regs and 3 32 bit regs */
-#define NUMREGBYTES ((17 * 8) + (3 * 4))
+#define DBG_MAX_REG_NUM 24
+/* 17 64 bit regs and 5 32 bit regs */
+#define NUMREGBYTES ((17 * 8) + (5 * 4))
#endif /* ! CONFIG_X86_32 */
static inline void arch_kgdb_breakpoint(void)
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 4d8dcbd..e7d1c19 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -321,4 +321,8 @@ struct kvm_xcrs {
__u64 padding[16];
};
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7b9cfc4..c222e1a 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -176,6 +176,7 @@ struct x86_emulate_ops {
void (*set_idt)(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt);
ulong (*get_cr)(struct x86_emulate_ctxt *ctxt, int cr);
int (*set_cr)(struct x86_emulate_ctxt *ctxt, int cr, ulong val);
+ void (*set_rflags)(struct x86_emulate_ctxt *ctxt, ulong val);
int (*cpl)(struct x86_emulate_ctxt *ctxt);
int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
@@ -388,7 +389,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_INTERCEPTED 2
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
- u16 tss_selector, int reason,
+ u16 tss_selector, int idt_index, int reason,
bool has_error_code, u32 error_code);
int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
#endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 52d6640..e216ba0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -29,7 +29,7 @@
#include <asm/msr-index.h>
#define KVM_MAX_VCPUS 254
-#define KVM_SOFT_MAX_VCPUS 64
+#define KVM_SOFT_MAX_VCPUS 160
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
@@ -181,13 +181,6 @@ struct kvm_mmu_memory_cache {
void *objects[KVM_NR_MEM_OBJS];
};
-#define NR_PTE_CHAIN_ENTRIES 5
-
-struct kvm_pte_chain {
- u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
- struct hlist_node link;
-};
-
/*
* kvm_mmu_page_role, below, is defined as:
*
@@ -427,12 +420,16 @@ struct kvm_vcpu_arch {
u64 last_guest_tsc;
u64 last_kernel_ns;
- u64 last_tsc_nsec;
- u64 last_tsc_write;
- u32 virtual_tsc_khz;
+ u64 last_host_tsc;
+ u64 tsc_offset_adjustment;
+ u64 this_tsc_nsec;
+ u64 this_tsc_write;
+ u8 this_tsc_generation;
bool tsc_catchup;
- u32 tsc_catchup_mult;
- s8 tsc_catchup_shift;
+ bool tsc_always_catchup;
+ s8 virtual_tsc_shift;
+ u32 virtual_tsc_mult;
+ u32 virtual_tsc_khz;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
@@ -478,6 +475,21 @@ struct kvm_vcpu_arch {
u32 id;
bool send_user_only;
} apf;
+
+ /* OSVW MSRs (AMD only) */
+ struct {
+ u64 length;
+ u64 status;
+ } osvw;
+};
+
+struct kvm_lpage_info {
+ unsigned long rmap_pde;
+ int write_count;
+};
+
+struct kvm_arch_memory_slot {
+ struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
struct kvm_arch {
@@ -511,8 +523,12 @@ struct kvm_arch {
s64 kvmclock_offset;
raw_spinlock_t tsc_write_lock;
u64 last_tsc_nsec;
- u64 last_tsc_offset;
u64 last_tsc_write;
+ u32 last_tsc_khz;
+ u64 cur_tsc_nsec;
+ u64 cur_tsc_write;
+ u64 cur_tsc_offset;
+ u8 cur_tsc_generation;
struct kvm_xen_hvm_config xen_hvm_config;
@@ -644,7 +660,7 @@ struct kvm_x86_ops {
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void);
- void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment);
+ void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment, bool host);
void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -652,7 +668,7 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
- void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz);
+ void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
@@ -674,6 +690,17 @@ struct kvm_arch_async_pf {
extern struct kvm_x86_ops *kvm_x86_ops;
+static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
+ s64 adjustment)
+{
+ kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, false);
+}
+
+static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
+{
+ kvm_x86_ops->adjust_tsc_offset(vcpu, adjustment, true);
+}
+
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
@@ -741,8 +768,8 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
-int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
- bool has_error_code, u32 error_code);
+int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
+ int reason, bool has_error_code, u32 error_code);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 9cdae5d..c8bed0d 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -3,7 +3,6 @@
#include <linux/percpu.h>
-#include <asm/system.h>
#include <linux/atomic.h>
#include <asm/asm.h>
diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h
index 0e8e85b..d354fb7 100644
--- a/arch/x86/include/asm/mc146818rtc.h
+++ b/arch/x86/include/asm/mc146818rtc.h
@@ -5,7 +5,6 @@
#define _ASM_X86_MC146818RTC_H
#include <asm/io.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <linux/mc146818rtc.h>
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 6aefb14..441520e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -151,7 +151,7 @@ static inline void enable_p5_mce(void) {}
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
-extern struct device *mce_device[CONFIG_NR_CPUS];
+DECLARE_PER_CPU(struct device *, mce_device);
/*
* Maximum banks number.
diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
index 0a0a954..fc18bf3 100644
--- a/arch/x86/include/asm/mrst.h
+++ b/arch/x86/include/asm/mrst.h
@@ -26,8 +26,8 @@ extern struct sfi_rtc_table_entry sfi_mrtc_array[];
* identified via MSRs.
*/
enum mrst_cpu_type {
- MRST_CPU_CHIP_LINCROFT = 1,
- MRST_CPU_CHIP_PENWELL,
+ /* 1 was Moorestown */
+ MRST_CPU_CHIP_PENWELL = 2,
};
extern enum mrst_cpu_type __mrst_cpu_chip;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index a6962d9..ccb8059 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -56,6 +56,13 @@
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
+#define MSR_LBR_SELECT 0x000001c8
+#define MSR_LBR_TOS 0x000001c9
+#define MSR_LBR_NHM_FROM 0x00000680
+#define MSR_LBR_NHM_TO 0x000006c0
+#define MSR_LBR_CORE_FROM 0x00000040
+#define MSR_LBR_CORE_TO 0x00000060
+
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index bce688d..e21fdd1 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -55,7 +55,6 @@ extern unsigned long init_memory_mapping(unsigned long start,
unsigned long end);
extern void initmem_init(void);
-extern void free_initmem(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a7d2db9..aa0f913 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -10,6 +10,7 @@
#include <asm/paravirt_types.h>
#ifndef __ASSEMBLY__
+#include <linux/bug.h>
#include <linux/types.h>
#include <linux/cpumask.h>
@@ -230,9 +231,9 @@ static inline unsigned long long paravirt_sched_clock(void)
return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
}
-struct jump_label_key;
-extern struct jump_label_key paravirt_steal_enabled;
-extern struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key;
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
static inline u64 paravirt_steal_clock(int cpu)
{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index e8fb2c7..2291895 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -23,6 +23,7 @@
#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16)
#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL (1ULL << 19)
#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20)
#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21)
#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f6d0d2e..4fa7dcc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -14,13 +14,13 @@ struct mm_struct;
#include <asm/sigcontext.h>
#include <asm/current.h>
#include <asm/cpufeature.h>
-#include <asm/system.h>
#include <asm/page.h>
#include <asm/pgtable_types.h>
#include <asm/percpu.h>
#include <asm/msr.h>
#include <asm/desc_defs.h>
#include <asm/nops.h>
+#include <asm/special_insns.h>
#include <linux/personality.h>
#include <linux/cpumask.h>
@@ -29,6 +29,15 @@ struct mm_struct;
#include <linux/math64.h>
#include <linux/init.h>
#include <linux/err.h>
+#include <linux/irqflags.h>
+
+/*
+ * We handle most unaligned accesses in hardware. On the other hand
+ * unaligned DMA can be quite expensive on some Nehalem processors.
+ *
+ * Based on this we disable the IP header alignment in network drivers.
+ */
+#define NET_IP_ALIGN 0
#define HBP_NUM 4
/*
@@ -162,6 +171,7 @@ extern void early_cpu_init(void);
extern void identify_boot_cpu(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
+void print_cpu_msr(struct cpuinfo_x86 *);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern unsigned short num_cache_leaves;
@@ -474,61 +484,6 @@ struct thread_struct {
unsigned io_bitmap_max;
};
-static inline unsigned long native_get_debugreg(int regno)
-{
- unsigned long val = 0; /* Damn you, gcc! */
-
- switch (regno) {
- case 0:
- asm("mov %%db0, %0" :"=r" (val));
- break;
- case 1:
- asm("mov %%db1, %0" :"=r" (val));
- break;
- case 2:
- asm("mov %%db2, %0" :"=r" (val));
- break;
- case 3:
- asm("mov %%db3, %0" :"=r" (val));
- break;
- case 6:
- asm("mov %%db6, %0" :"=r" (val));
- break;
- case 7:
- asm("mov %%db7, %0" :"=r" (val));
- break;
- default:
- BUG();
- }
- return val;
-}
-
-static inline void native_set_debugreg(int regno, unsigned long value)
-{
- switch (regno) {
- case 0:
- asm("mov %0, %%db0" ::"r" (value));
- break;
- case 1:
- asm("mov %0, %%db1" ::"r" (value));
- break;
- case 2:
- asm("mov %0, %%db2" ::"r" (value));
- break;
- case 3:
- asm("mov %0, %%db3" ::"r" (value));
- break;
- case 6:
- asm("mov %0, %%db6" ::"r" (value));
- break;
- case 7:
- asm("mov %0, %%db7" ::"r" (value));
- break;
- default:
- BUG();
- }
-}
-
/*
* Set IOPL bits in EFLAGS from given mask
*/
@@ -574,14 +529,6 @@ static inline void native_swapgs(void)
#define __cpuid native_cpuid
#define paravirt_enabled() 0
-/*
- * These special macros can be used to get or set a debugging register
- */
-#define get_debugreg(var, register) \
- (var) = native_get_debugreg(register)
-#define set_debugreg(value, register) \
- native_set_debugreg(register, value)
-
static inline void load_sp0(struct tss_struct *tss,
struct thread_struct *thread)
{
@@ -1027,4 +974,14 @@ extern bool cpu_has_amd_erratum(const int *);
#define cpu_has_amd_erratum(x) (false)
#endif /* CONFIG_CPU_SUP_AMD */
+void cpu_idle_wait(void);
+
+extern unsigned long arch_align_stack(unsigned long sp);
+extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
+
+void default_idle(void);
+bool set_pm_idle_to_default(void);
+
+void stop_this_cpu(void *dummy);
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 644dd885..60bef66 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -21,7 +21,6 @@
#include <asm/irq.h>
#include <linux/atomic.h>
#include <asm/setup.h>
-#include <asm/irq_controller.h>
#ifdef CONFIG_OF
extern int of_ioapic;
@@ -43,15 +42,6 @@ extern char cmd_line[COMMAND_LINE_SIZE];
#define pci_address_to_pio pci_address_to_pio
unsigned long pci_address_to_pio(phys_addr_t addr);
-/**
- * irq_dispose_mapping - Unmap an interrupt
- * @virq: linux virq number of the interrupt to unmap
- *
- * FIXME: We really should implement proper virq handling like power,
- * but that's going to be major surgery.
- */
-static inline void irq_dispose_mapping(unsigned int virq) { }
-
#define HAVE_ARCH_DEVTREE_FIXUPS
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 5e64171..1654662 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -212,7 +212,61 @@
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10];
-#endif
-#endif
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg, value) \
+do { \
+ unsigned short __val = (value); \
+ \
+ asm volatile(" \n" \
+ "1: movl %k0,%%" #seg " \n" \
+ \
+ ".section .fixup,\"ax\" \n" \
+ "2: xorl %k0,%k0 \n" \
+ " jmp 1b \n" \
+ ".previous \n" \
+ \
+ _ASM_EXTABLE(1b, 2b) \
+ \
+ : "+r" (__val) : : "memory"); \
+} while (0)
+
+/*
+ * Save a segment register away
+ */
+#define savesegment(seg, value) \
+ asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
+
+/*
+ * x86_32 user gs accessors.
+ */
+#ifdef CONFIG_X86_32
+#ifdef CONFIG_X86_32_LAZY_GS
+#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
+#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
+#define task_user_gs(tsk) ((tsk)->thread.gs)
+#define lazy_save_gs(v) savesegment(gs, (v))
+#define lazy_load_gs(v) loadsegment(gs, (v))
+#else /* X86_32_LAZY_GS */
+#define get_user_gs(regs) (u16)((regs)->gs)
+#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
+#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
+#define lazy_save_gs(v) do { } while (0)
+#define lazy_load_gs(v) do { } while (0)
+#endif /* X86_32_LAZY_GS */
+#endif /* X86_32 */
+
+static inline unsigned long get_limit(unsigned long segment)
+{
+ unsigned long __limit;
+ asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
+ return __limit + 1;
+}
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
#endif /* _ASM_X86_SEGMENT_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
new file mode 100644
index 0000000..41fc93a
--- /dev/null
+++ b/arch/x86/include/asm/special_insns.h
@@ -0,0 +1,199 @@
+#ifndef _ASM_X86_SPECIAL_INSNS_H
+#define _ASM_X86_SPECIAL_INSNS_H
+
+
+#ifdef __KERNEL__
+
+static inline void native_clts(void)
+{
+ asm volatile("clts");
+}
+
+/*
+ * Volatile isn't enough to prevent the compiler from reordering the
+ * read/write functions for the control registers and messing everything up.
+ * A memory clobber would solve the problem, but would prevent reordering of
+ * all loads stores around it, which can hurt performance. Solution is to
+ * use a variable and mimic reads and writes to it to enforce serialization
+ */
+static unsigned long __force_order;
+
+static inline unsigned long native_read_cr0(void)
+{
+ unsigned long val;
+ asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
+ return val;
+}
+
+static inline void native_write_cr0(unsigned long val)
+{
+ asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr2(void)
+{
+ unsigned long val;
+ asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
+ return val;
+}
+
+static inline void native_write_cr2(unsigned long val)
+{
+ asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr3(void)
+{
+ unsigned long val;
+ asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
+ return val;
+}
+
+static inline void native_write_cr3(unsigned long val)
+{
+ asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long native_read_cr4(void)
+{
+ unsigned long val;
+ asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+ return val;
+}
+
+static inline unsigned long native_read_cr4_safe(void)
+{
+ unsigned long val;
+ /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
+ * exists, so it will never fail. */
+#ifdef CONFIG_X86_32
+ asm volatile("1: mov %%cr4, %0\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b)
+ : "=r" (val), "=m" (__force_order) : "0" (0));
+#else
+ val = native_read_cr4();
+#endif
+ return val;
+}
+
+static inline void native_write_cr4(unsigned long val)
+{
+ asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+}
+
+#ifdef CONFIG_X86_64
+static inline unsigned long native_read_cr8(void)
+{
+ unsigned long cr8;
+ asm volatile("movq %%cr8,%0" : "=r" (cr8));
+ return cr8;
+}
+
+static inline void native_write_cr8(unsigned long val)
+{
+ asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
+}
+#endif
+
+static inline void native_wbinvd(void)
+{
+ asm volatile("wbinvd": : :"memory");
+}
+
+extern void native_load_gs_index(unsigned);
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else
+
+static inline unsigned long read_cr0(void)
+{
+ return native_read_cr0();
+}
+
+static inline void write_cr0(unsigned long x)
+{
+ native_write_cr0(x);
+}
+
+static inline unsigned long read_cr2(void)
+{
+ return native_read_cr2();
+}
+
+static inline void write_cr2(unsigned long x)
+{
+ native_write_cr2(x);
+}
+
+static inline unsigned long read_cr3(void)
+{
+ return native_read_cr3();
+}
+
+static inline void write_cr3(unsigned long x)
+{
+ native_write_cr3(x);
+}
+
+static inline unsigned long read_cr4(void)
+{
+ return native_read_cr4();
+}
+
+static inline unsigned long read_cr4_safe(void)
+{
+ return native_read_cr4_safe();
+}
+
+static inline void write_cr4(unsigned long x)
+{
+ native_write_cr4(x);
+}
+
+static inline void wbinvd(void)
+{
+ native_wbinvd();
+}
+
+#ifdef CONFIG_X86_64
+
+static inline unsigned long read_cr8(void)
+{
+ return native_read_cr8();
+}
+
+static inline void write_cr8(unsigned long x)
+{
+ native_write_cr8(x);
+}
+
+static inline void load_gs_index(unsigned selector)
+{
+ native_load_gs_index(selector);
+}
+
+#endif
+
+/* Clear the 'TS' bit */
+static inline void clts(void)
+{
+ native_clts();
+}
+
+#endif/* CONFIG_PARAVIRT */
+
+#define stts() write_cr0(read_cr0() | X86_CR0_TS)
+
+static inline void clflush(volatile void *__p)
+{
+ asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
+}
+
+#define nop() asm volatile ("nop")
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_X86_SPECIAL_INSNS_H */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a82c2bf..76bfa2c 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
- return !!(tmp.tail ^ tmp.head);
+ return tmp.tail != tmp.head;
}
static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
- return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
+ return (__ticket_t)(tmp.tail - tmp.head) > 1;
}
#ifndef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 8ebd5df..ad0ad07 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
#endif
#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
-#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
typedef struct arch_spinlock {
union {
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 1575177..b5d9533 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -38,7 +38,6 @@
#include <asm/tsc.h>
#include <asm/processor.h>
#include <asm/percpu.h>
-#include <asm/system.h>
#include <asm/desc.h>
#include <linux/random.h>
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
new file mode 100644
index 0000000..4ec45b3
--- /dev/null
+++ b/arch/x86/include/asm/switch_to.h
@@ -0,0 +1,129 @@
+#ifndef _ASM_X86_SWITCH_TO_H
+#define _ASM_X86_SWITCH_TO_H
+
+struct task_struct; /* one of the stranger aspects of C forward declarations */
+struct task_struct *__switch_to(struct task_struct *prev,
+ struct task_struct *next);
+struct tss_struct;
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ struct tss_struct *tss);
+
+#ifdef CONFIG_X86_32
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __switch_canary \
+ "movl %P[task_canary](%[next]), %%ebx\n\t" \
+ "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
+#define __switch_canary_oparam \
+ , [stack_canary] "=m" (stack_canary.canary)
+#define __switch_canary_iparam \
+ , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
+#else /* CC_STACKPROTECTOR */
+#define __switch_canary
+#define __switch_canary_oparam
+#define __switch_canary_iparam
+#endif /* CC_STACKPROTECTOR */
+
+/*
+ * Saving eflags is important. It switches not only IOPL between tasks,
+ * it also protects other tasks from NT leaking through sysenter etc.
+ */
+#define switch_to(prev, next, last) \
+do { \
+ /* \
+ * Context-switching clobbers all registers, so we clobber \
+ * them explicitly, via unused output variables. \
+ * (EAX and EBP is not listed because EBP is saved/restored \
+ * explicitly for wchan access and EAX is the return value of \
+ * __switch_to()) \
+ */ \
+ unsigned long ebx, ecx, edx, esi, edi; \
+ \
+ asm volatile("pushfl\n\t" /* save flags */ \
+ "pushl %%ebp\n\t" /* save EBP */ \
+ "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
+ "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
+ "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
+ "pushl %[next_ip]\n\t" /* restore EIP */ \
+ __switch_canary \
+ "jmp __switch_to\n" /* regparm call */ \
+ "1:\t" \
+ "popl %%ebp\n\t" /* restore EBP */ \
+ "popfl\n" /* restore flags */ \
+ \
+ /* output parameters */ \
+ : [prev_sp] "=m" (prev->thread.sp), \
+ [prev_ip] "=m" (prev->thread.ip), \
+ "=a" (last), \
+ \
+ /* clobbered output registers: */ \
+ "=b" (ebx), "=c" (ecx), "=d" (edx), \
+ "=S" (esi), "=D" (edi) \
+ \
+ __switch_canary_oparam \
+ \
+ /* input parameters: */ \
+ : [next_sp] "m" (next->thread.sp), \
+ [next_ip] "m" (next->thread.ip), \
+ \
+ /* regparm parameters for __switch_to(): */ \
+ [prev] "a" (prev), \
+ [next] "d" (next) \
+ \
+ __switch_canary_iparam \
+ \
+ : /* reloaded segment registers */ \
+ "memory"); \
+} while (0)
+
+#else /* CONFIG_X86_32 */
+
+/* frame pointer must be last for get_wchan */
+#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
+
+#define __EXTRA_CLOBBER \
+ , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
+ "r12", "r13", "r14", "r15"
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+#define __switch_canary \
+ "movq %P[task_canary](%%rsi),%%r8\n\t" \
+ "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
+#define __switch_canary_oparam \
+ , [gs_canary] "=m" (irq_stack_union.stack_canary)
+#define __switch_canary_iparam \
+ , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
+#else /* CC_STACKPROTECTOR */
+#define __switch_canary
+#define __switch_canary_oparam
+#define __switch_canary_iparam
+#endif /* CC_STACKPROTECTOR */
+
+/* Save restore flags to clear handle leaking NT */
+#define switch_to(prev, next, last) \
+ asm volatile(SAVE_CONTEXT \
+ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
+ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
+ "call __switch_to\n\t" \
+ "movq "__percpu_arg([current_task])",%%rsi\n\t" \
+ __switch_canary \
+ "movq %P[thread_info](%%rsi),%%r8\n\t" \
+ "movq %%rax,%%rdi\n\t" \
+ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "jnz ret_from_fork\n\t" \
+ RESTORE_CONTEXT \
+ : "=a" (last) \
+ __switch_canary_oparam \
+ : [next] "S" (next), [prev] "D" (prev), \
+ [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
+ [ti_flags] "i" (offsetof(struct thread_info, flags)), \
+ [_tif_fork] "i" (_TIF_FORK), \
+ [thread_info] "i" (offsetof(struct task_struct, stack)), \
+ [current_task] "m" (current_task) \
+ __switch_canary_iparam \
+ : "memory", "cc" __EXTRA_CLOBBER)
+
+#endif /* CONFIG_X86_32 */
+
+#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
deleted file mode 100644
index 2d2f01c..0000000
--- a/arch/x86/include/asm/system.h
+++ /dev/null
@@ -1,523 +0,0 @@
-#ifndef _ASM_X86_SYSTEM_H
-#define _ASM_X86_SYSTEM_H
-
-#include <asm/asm.h>
-#include <asm/segment.h>
-#include <asm/cpufeature.h>
-#include <asm/cmpxchg.h>
-#include <asm/nops.h>
-
-#include <linux/kernel.h>
-#include <linux/irqflags.h>
-
-/* entries in ARCH_DLINFO: */
-#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
-# define AT_VECTOR_SIZE_ARCH 2
-#else /* else it's non-compat x86-64 */
-# define AT_VECTOR_SIZE_ARCH 1
-#endif
-
-struct task_struct; /* one of the stranger aspects of C forward declarations */
-struct task_struct *__switch_to(struct task_struct *prev,
- struct task_struct *next);
-struct tss_struct;
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss);
-extern void show_regs_common(void);
-
-#ifdef CONFIG_X86_32
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary \
- "movl %P[task_canary](%[next]), %%ebx\n\t" \
- "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
-#define __switch_canary_oparam \
- , [stack_canary] "=m" (stack_canary.canary)
-#define __switch_canary_iparam \
- , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
-
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
-#define switch_to(prev, next, last) \
-do { \
- /* \
- * Context-switching clobbers all registers, so we clobber \
- * them explicitly, via unused output variables. \
- * (EAX and EBP is not listed because EBP is saved/restored \
- * explicitly for wchan access and EAX is the return value of \
- * __switch_to()) \
- */ \
- unsigned long ebx, ecx, edx, esi, edi; \
- \
- asm volatile("pushfl\n\t" /* save flags */ \
- "pushl %%ebp\n\t" /* save EBP */ \
- "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
- "movl %[next_sp],%%esp\n\t" /* restore ESP */ \
- "movl $1f,%[prev_ip]\n\t" /* save EIP */ \
- "pushl %[next_ip]\n\t" /* restore EIP */ \
- __switch_canary \
- "jmp __switch_to\n" /* regparm call */ \
- "1:\t" \
- "popl %%ebp\n\t" /* restore EBP */ \
- "popfl\n" /* restore flags */ \
- \
- /* output parameters */ \
- : [prev_sp] "=m" (prev->thread.sp), \
- [prev_ip] "=m" (prev->thread.ip), \
- "=a" (last), \
- \
- /* clobbered output registers: */ \
- "=b" (ebx), "=c" (ecx), "=d" (edx), \
- "=S" (esi), "=D" (edi) \
- \
- __switch_canary_oparam \
- \
- /* input parameters: */ \
- : [next_sp] "m" (next->thread.sp), \
- [next_ip] "m" (next->thread.ip), \
- \
- /* regparm parameters for __switch_to(): */ \
- [prev] "a" (prev), \
- [next] "d" (next) \
- \
- __switch_canary_iparam \
- \
- : /* reloaded segment registers */ \
- "memory"); \
-} while (0)
-
-/*
- * disable hlt during certain critical i/o operations
- */
-#define HAVE_DISABLE_HLT
-#else
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
-
-#define __EXTRA_CLOBBER \
- , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
- "r12", "r13", "r14", "r15"
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary \
- "movq %P[task_canary](%%rsi),%%r8\n\t" \
- "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
-#define __switch_canary_oparam \
- , [gs_canary] "=m" (irq_stack_union.stack_canary)
-#define __switch_canary_iparam \
- , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
-
-/* Save restore flags to clear handle leaking NT */
-#define switch_to(prev, next, last) \
- asm volatile(SAVE_CONTEXT \
- "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
- "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
- "call __switch_to\n\t" \
- "movq "__percpu_arg([current_task])",%%rsi\n\t" \
- __switch_canary \
- "movq %P[thread_info](%%rsi),%%r8\n\t" \
- "movq %%rax,%%rdi\n\t" \
- "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
- "jnz ret_from_fork\n\t" \
- RESTORE_CONTEXT \
- : "=a" (last) \
- __switch_canary_oparam \
- : [next] "S" (next), [prev] "D" (prev), \
- [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
- [ti_flags] "i" (offsetof(struct thread_info, flags)), \
- [_tif_fork] "i" (_TIF_FORK), \
- [thread_info] "i" (offsetof(struct task_struct, stack)), \
- [current_task] "m" (current_task) \
- __switch_canary_iparam \
- : "memory", "cc" __EXTRA_CLOBBER)
-#endif
-
-#ifdef __KERNEL__
-
-extern void native_load_gs_index(unsigned);
-
-/*
- * Load a segment. Fall back on loading the zero
- * segment if something goes wrong..
- */
-#define loadsegment(seg, value) \
-do { \
- unsigned short __val = (value); \
- \
- asm volatile(" \n" \
- "1: movl %k0,%%" #seg " \n" \
- \
- ".section .fixup,\"ax\" \n" \
- "2: xorl %k0,%k0 \n" \
- " jmp 1b \n" \
- ".previous \n" \
- \
- _ASM_EXTABLE(1b, 2b) \
- \
- : "+r" (__val) : : "memory"); \
-} while (0)
-
-/*
- * Save a segment register away
- */
-#define savesegment(seg, value) \
- asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
-
-/*
- * x86_32 user gs accessors.
- */
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_32_LAZY_GS
-#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
-#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
-#define task_user_gs(tsk) ((tsk)->thread.gs)
-#define lazy_save_gs(v) savesegment(gs, (v))
-#define lazy_load_gs(v) loadsegment(gs, (v))
-#else /* X86_32_LAZY_GS */
-#define get_user_gs(regs) (u16)((regs)->gs)
-#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
-#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
-#define lazy_save_gs(v) do { } while (0)
-#define lazy_load_gs(v) do { } while (0)
-#endif /* X86_32_LAZY_GS */
-#endif /* X86_32 */
-
-static inline unsigned long get_limit(unsigned long segment)
-{
- unsigned long __limit;
- asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
- return __limit + 1;
-}
-
-static inline void native_clts(void)
-{
- asm volatile("clts");
-}
-
-/*
- * Volatile isn't enough to prevent the compiler from reordering the
- * read/write functions for the control registers and messing everything up.
- * A memory clobber would solve the problem, but would prevent reordering of
- * all loads stores around it, which can hurt performance. Solution is to
- * use a variable and mimic reads and writes to it to enforce serialization
- */
-static unsigned long __force_order;
-
-static inline unsigned long native_read_cr0(void)
-{
- unsigned long val;
- asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
- return val;
-}
-
-static inline void native_write_cr0(unsigned long val)
-{
- asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
-}
-
-static inline unsigned long native_read_cr2(void)
-{
- unsigned long val;
- asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
- return val;
-}
-
-static inline void native_write_cr2(unsigned long val)
-{
- asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
-}
-
-static inline unsigned long native_read_cr3(void)
-{
- unsigned long val;
- asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
- return val;
-}
-
-static inline void native_write_cr3(unsigned long val)
-{
- asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
-}
-
-static inline unsigned long native_read_cr4(void)
-{
- unsigned long val;
- asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
- return val;
-}
-
-static inline unsigned long native_read_cr4_safe(void)
-{
- unsigned long val;
- /* This could fault if %cr4 does not exist. In x86_64, a cr4 always
- * exists, so it will never fail. */
-#ifdef CONFIG_X86_32
- asm volatile("1: mov %%cr4, %0\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b)
- : "=r" (val), "=m" (__force_order) : "0" (0));
-#else
- val = native_read_cr4();
-#endif
- return val;
-}
-
-static inline void native_write_cr4(unsigned long val)
-{
- asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
-}
-
-#ifdef CONFIG_X86_64
-static inline unsigned long native_read_cr8(void)
-{
- unsigned long cr8;
- asm volatile("movq %%cr8,%0" : "=r" (cr8));
- return cr8;
-}
-
-static inline void native_write_cr8(unsigned long val)
-{
- asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
-}
-#endif
-
-static inline void native_wbinvd(void)
-{
- asm volatile("wbinvd": : :"memory");
-}
-
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else
-
-static inline unsigned long read_cr0(void)
-{
- return native_read_cr0();
-}
-
-static inline void write_cr0(unsigned long x)
-{
- native_write_cr0(x);
-}
-
-static inline unsigned long read_cr2(void)
-{
- return native_read_cr2();
-}
-
-static inline void write_cr2(unsigned long x)
-{
- native_write_cr2(x);
-}
-
-static inline unsigned long read_cr3(void)
-{
- return native_read_cr3();
-}
-
-static inline void write_cr3(unsigned long x)
-{
- native_write_cr3(x);
-}
-
-static inline unsigned long read_cr4(void)
-{
- return native_read_cr4();
-}
-
-static inline unsigned long read_cr4_safe(void)
-{
- return native_read_cr4_safe();
-}
-
-static inline void write_cr4(unsigned long x)
-{
- native_write_cr4(x);
-}
-
-static inline void wbinvd(void)
-{
- native_wbinvd();
-}
-
-#ifdef CONFIG_X86_64
-
-static inline unsigned long read_cr8(void)
-{
- return native_read_cr8();
-}
-
-static inline void write_cr8(unsigned long x)
-{
- native_write_cr8(x);
-}
-
-static inline void load_gs_index(unsigned selector)
-{
- native_load_gs_index(selector);
-}
-
-#endif
-
-/* Clear the 'TS' bit */
-static inline void clts(void)
-{
- native_clts();
-}
-
-#endif/* CONFIG_PARAVIRT */
-
-#define stts() write_cr0(read_cr0() | X86_CR0_TS)
-
-#endif /* __KERNEL__ */
-
-static inline void clflush(volatile void *__p)
-{
- asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
-}
-
-#define nop() asm volatile ("nop")
-
-void disable_hlt(void);
-void enable_hlt(void);
-
-void cpu_idle_wait(void);
-
-extern unsigned long arch_align_stack(unsigned long sp);
-extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
-
-void default_idle(void);
-bool set_pm_idle_to_default(void);
-
-void stop_this_cpu(void *dummy);
-
-/*
- * Force strict CPU ordering.
- * And yes, this is required on UP too when we're talking
- * to devices.
- */
-#ifdef CONFIG_X86_32
-/*
- * Some non-Intel clones support out of order store. wmb() ceases to be a
- * nop for these.
- */
-#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2)
-#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2)
-#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM)
-#else
-#define mb() asm volatile("mfence":::"memory")
-#define rmb() asm volatile("lfence":::"memory")
-#define wmb() asm volatile("sfence" ::: "memory")
-#endif
-
-/**
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- **/
-
-#define read_barrier_depends() do { } while (0)
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#ifdef CONFIG_X86_PPRO_FENCE
-# define smp_rmb() rmb()
-#else
-# define smp_rmb() barrier()
-#endif
-#ifdef CONFIG_X86_OOSTORE
-# define smp_wmb() wmb()
-#else
-# define smp_wmb() barrier()
-#endif
-#define smp_read_barrier_depends() read_barrier_depends()
-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
-#define set_mb(var, value) do { var = value; barrier(); } while (0)
-#endif
-
-/*
- * Stop RDTSC speculation. This is needed when you need to use RDTSC
- * (or get_cycles or vread that possibly accesses the TSC) in a defined
- * code region.
- *
- * (Could use an alternative three way for this if there was one.)
- */
-static __always_inline void rdtsc_barrier(void)
-{
- alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
- alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
-}
-
-/*
- * We handle most unaligned accesses in hardware. On the other hand
- * unaligned DMA can be quite expensive on some Nehalem processors.
- *
- * Based on this we disable the IP header alignment in network drivers.
- */
-#define NET_IP_ALIGN 0
-#endif /* _ASM_X86_SYSTEM_H */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 431793e..34baa0e 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -57,14 +57,10 @@ DECLARE_PER_CPU(unsigned long long, cyc2ns_offset);
static inline unsigned long long __cycles_2_ns(unsigned long long cyc)
{
- unsigned long long quot;
- unsigned long long rem;
int cpu = smp_processor_id();
unsigned long long ns = per_cpu(cyc2ns_offset, cpu);
- quot = (cyc >> CYC2NS_SCALE_FACTOR);
- rem = cyc & ((1ULL << CYC2NS_SCALE_FACTOR) - 1);
- ns += quot * per_cpu(cyc2ns, cpu) +
- ((rem * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR);
+ ns += mult_frac(cyc, per_cpu(cyc2ns, cpu),
+ (1UL << CYC2NS_SCALE_FACTOR));
return ns;
}
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 169be89..c0e108e 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -5,7 +5,7 @@
#include <linux/sched.h>
#include <asm/processor.h>
-#include <asm/system.h>
+#include <asm/special_insns.h>
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 15d9915..c91e8b9 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu);
extern void check_tsc_sync_target(void);
extern int notsc_setup(char *);
-extern void save_sched_clock_state(void);
-extern void restore_sched_clock_state(void);
+extern void tsc_save_sched_clock_state(void);
+extern void tsc_restore_sched_clock_state(void);
#endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 815285b..8b38be2 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -5,13 +5,8 @@
#include <linux/clocksource.h>
struct vsyscall_gtod_data {
- seqlock_t lock;
+ seqcount_t seq;
- /* open coded 'struct timespec' */
- time_t wall_time_sec;
- u32 wall_time_nsec;
-
- struct timezone sys_tz;
struct { /* extract of a clocksource struct */
int vclock_mode;
cycle_t cycle_last;
@@ -19,8 +14,16 @@ struct vsyscall_gtod_data {
u32 mult;
u32 shift;
} clock;
- struct timespec wall_to_monotonic;
+
+ /* open coded 'struct timespec' */
+ time_t wall_time_sec;
+ u32 wall_time_nsec;
+ u32 monotonic_time_nsec;
+ time_t monotonic_time_sec;
+
+ struct timezone sys_tz;
struct timespec wall_time_coarse;
+ struct timespec monotonic_time_coarse;
};
extern struct vsyscall_gtod_data vsyscall_gtod_data;
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h
index e0f9aa1..5da71c2 100644
--- a/arch/x86/include/asm/virtext.h
+++ b/arch/x86/include/asm/virtext.h
@@ -16,7 +16,6 @@
#define _ASM_X86_VIRTEX_H
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/vmx.h>
#include <asm/svm.h>
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
new file mode 100644
index 0000000..6fe6767
--- /dev/null
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -0,0 +1,46 @@
+#ifndef _ASM_WORD_AT_A_TIME_H
+#define _ASM_WORD_AT_A_TIME_H
+
+/*
+ * This is largely generic for little-endian machines, but the
+ * optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+ return mask*0x0001020304050608ul >> 56;
+}
+
+#else /* 32-bit case */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+ long a = (0x0ff0001+mask) >> 23;
+ /* Fix the 1 for 00 case */
+ return a & mask;
+}
+
+#endif
+
+#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
+
+/* Return the high bit set in the first byte that is a zero */
+static inline unsigned long has_zero(unsigned long a)
+{
+ return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80);
+}
+
+#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/x86/include/asm/x2apic.h b/arch/x86/include/asm/x2apic.h
index 6bf5b8e..92e54ab 100644
--- a/arch/x86/include/asm/x2apic.h
+++ b/arch/x86/include/asm/x2apic.h
@@ -18,6 +18,11 @@ static const struct cpumask *x2apic_target_cpus(void)
return cpu_online_mask;
}
+static int x2apic_apic_id_valid(int apicid)
+{
+ return 1;
+}
+
static int x2apic_apic_id_registered(void)
{
return 1;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 517d476..baaca8d 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -145,9 +145,11 @@ struct x86_init_ops {
/**
* struct x86_cpuinit_ops - platform specific cpu hotplug setups
* @setup_percpu_clockev: set up the per cpu clock event device
+ * @early_percpu_clock_init: early init of the per cpu clock event device
*/
struct x86_cpuinit_ops {
void (*setup_percpu_clockev)(void);
+ void (*early_percpu_clock_init)(void);
void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
};
@@ -160,6 +162,8 @@ struct x86_cpuinit_ops {
* @is_untracked_pat_range exclude from PAT logic
* @nmi_init enable NMI on cpus
* @i8042_detect pre-detect if i8042 controller exists
+ * @save_sched_clock_state: save state for sched_clock() on suspend
+ * @restore_sched_clock_state: restore state for sched_clock() on resume
*/
struct x86_platform_ops {
unsigned long (*calibrate_tsc)(void);
@@ -171,6 +175,8 @@ struct x86_platform_ops {
void (*nmi_init)(void);
unsigned char (*get_nmi_reason)(void);
int (*i8042_detect)(void);
+ void (*save_sched_clock_state)(void);
+ void (*restore_sched_clock_state)(void);
};
struct pci_dev;
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index a1f2db5..cbf0c9d 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -56,6 +56,7 @@ DEFINE_GUEST_HANDLE(int);
DEFINE_GUEST_HANDLE(long);
DEFINE_GUEST_HANDLE(void);
DEFINE_GUEST_HANDLE(uint64_t);
+DEFINE_GUEST_HANDLE(uint32_t);
#endif
#ifndef HYPERVISOR_VIRT_START
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8c8c365..d23d835 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ce664f3..a415b1f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -239,7 +239,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
* to not preallocating memory for all NR_CPUS
* when we use CPU hotplug.
*/
- if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled)
+ if (!apic->apic_id_valid(apic_id) && enabled)
printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
else
acpi_register_lapic(apic_id, enabled);
@@ -593,7 +593,7 @@ void __init acpi_set_irq_model_ioapic(void)
#ifdef CONFIG_ACPI_HOTPLUG_CPU
#include <acpi/processor.h>
-static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static void __cpuinitdata acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
@@ -642,6 +642,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
kfree(buffer.pointer);
buffer.length = ACPI_ALLOCATE_BUFFER;
buffer.pointer = NULL;
+ lapic = NULL;
if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
goto out;
@@ -650,7 +651,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
goto free_tmp_map;
cpumask_copy(tmp_map, cpu_present_mask);
- acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
+ acpi_register_lapic(physid, ACPI_MADT_ENABLED);
/*
* If mp_register_lapic successfully generates a new logical cpu
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index f50e7fb..d2b7f27 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -14,6 +14,7 @@
#include <acpi/processor.h>
#include <asm/acpi.h>
#include <asm/mwait.h>
+#include <asm/special_insns.h>
/*
* Initialize bm_flags based on the CPU cache properties
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b1e7c7f..e6631120 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -477,7 +477,7 @@ error:
/* allocate and map a coherent mapping */
static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
- gfp_t flag)
+ gfp_t flag, struct dma_attrs *attrs)
{
dma_addr_t paddr;
unsigned long align_mask;
@@ -500,7 +500,8 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
}
__free_pages(page, get_order(size));
} else
- return dma_generic_alloc_coherent(dev, size, dma_addr, flag);
+ return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
+ attrs);
return NULL;
}
@@ -508,7 +509,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
/* free a coherent mapping */
static void
gart_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr)
+ dma_addr_t dma_addr, struct dma_attrs *attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
free_pages((unsigned long)vaddr, get_order(size));
@@ -700,8 +701,8 @@ static struct dma_map_ops gart_dma_ops = {
.unmap_sg = gart_unmap_sg,
.map_page = gart_map_page,
.unmap_page = gart_unmap_page,
- .alloc_coherent = gart_alloc_coherent,
- .free_coherent = gart_free_coherent,
+ .alloc = gart_alloc_coherent,
+ .free = gart_free_coherent,
.mapping_error = gart_mapping_error,
};
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2eec05b..11544d8 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -383,20 +383,25 @@ static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
{
- unsigned int rsvd; /* 0: uninitialized */
+ unsigned int rsvd, vector;
if (offset >= APIC_EILVT_NR_MAX)
return ~0;
- rsvd = atomic_read(&eilvt_offsets[offset]) & ~APIC_EILVT_MASKED;
+ rsvd = atomic_read(&eilvt_offsets[offset]);
do {
- if (rsvd &&
- !eilvt_entry_is_changeable(rsvd, new))
+ vector = rsvd & ~APIC_EILVT_MASKED; /* 0: unassigned */
+ if (vector && !eilvt_entry_is_changeable(vector, new))
/* may not change if vectors are different */
return rsvd;
rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
} while (rsvd != new);
+ rsvd &= ~APIC_EILVT_MASKED;
+ if (rsvd && rsvd != vector)
+ pr_info("LVT offset %d assigned for vector 0x%02x\n",
+ offset, rsvd);
+
return new;
}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 8c3cdde..359b689 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -180,6 +180,7 @@ static struct apic apic_flat = {
.name = "flat",
.probe = flat_probe,
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
@@ -337,6 +338,7 @@ static struct apic apic_physflat = {
.name = "physical flat",
.probe = physflat_probe,
.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = flat_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 775b82b..634ae6c 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -124,6 +124,7 @@ struct apic apic_noop = {
.probe = noop_probe,
.acpi_madt_oem_check = NULL,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = noop_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 09d3d8c..899803e 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -56,6 +56,12 @@ static unsigned int read_xapic_id(void)
return get_apic_id(apic_read(APIC_ID));
}
+static int numachip_apic_id_valid(int apicid)
+{
+ /* Trust what bootloader passes in MADT */
+ return 1;
+}
+
static int numachip_apic_id_registered(void)
{
return physid_isset(read_xapic_id(), phys_cpu_present_map);
@@ -238,6 +244,7 @@ static struct apic apic_numachip __refconst = {
.name = "NumaConnect system",
.probe = numachip_probe,
.acpi_madt_oem_check = numachip_acpi_madt_oem_check,
+ .apic_id_valid = numachip_apic_id_valid,
.apic_id_registered = numachip_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 521bead..0cdec70 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -198,6 +198,7 @@ static struct apic apic_bigsmp = {
.name = "bigsmp",
.probe = probe_bigsmp,
.acpi_madt_oem_check = NULL,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = bigsmp_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 5d513bc..e42d1d3b9 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -625,6 +625,7 @@ static struct apic __refdata apic_es7000_cluster = {
.name = "es7000",
.probe = probe_es7000,
.acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = es7000_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
@@ -690,6 +691,7 @@ static struct apic __refdata apic_es7000 = {
.name = "es7000",
.probe = probe_es7000,
.acpi_madt_oem_check = es7000_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = es7000_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fb07275..e88300d 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -64,9 +64,28 @@
#include <asm/apic.h>
#define __apicdebuginit(type) static type __init
+
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
+static void __init __ioapic_init_mappings(void);
+
+static unsigned int __io_apic_read (unsigned int apic, unsigned int reg);
+static void __io_apic_write (unsigned int apic, unsigned int reg, unsigned int val);
+static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
+
+static struct io_apic_ops io_apic_ops = {
+ .init = __ioapic_init_mappings,
+ .read = __io_apic_read,
+ .write = __io_apic_write,
+ .modify = __io_apic_modify,
+};
+
+void __init set_io_apic_ops(const struct io_apic_ops *ops)
+{
+ io_apic_ops = *ops;
+}
+
/*
* Is the SiS APIC rmw bug present ?
* -1 = don't know, 0 = no, 1 = yes
@@ -294,6 +313,22 @@ static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
irq_free_desc(at);
}
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+ return io_apic_ops.read(apic, reg);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ io_apic_ops.write(apic, reg, value);
+}
+
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+ io_apic_ops.modify(apic, reg, value);
+}
+
+
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -314,16 +349,17 @@ static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
writel(vector, &io_apic->eoi);
}
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+static unsigned int __io_apic_read(unsigned int apic, unsigned int reg)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(reg, &io_apic->index);
return readl(&io_apic->data);
}
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+static void __io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
+
writel(reg, &io_apic->index);
writel(value, &io_apic->data);
}
@@ -334,7 +370,7 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
*
* Older SiS APIC requires we rewrite the index register
*/
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+static void __io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
@@ -377,6 +413,7 @@ static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+
return eu.entry;
}
@@ -384,9 +421,11 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
{
union entry_union eu;
unsigned long flags;
+
raw_spin_lock_irqsave(&ioapic_lock, flags);
eu.entry = __ioapic_read_entry(apic, pin);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
return eu.entry;
}
@@ -396,8 +435,7 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
* the interrupt, and we need to make sure the entry is fully populated
* before that happens.
*/
-static void
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
union entry_union eu = {{0, 0}};
@@ -409,6 +447,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
unsigned long flags;
+
raw_spin_lock_irqsave(&ioapic_lock, flags);
__ioapic_write_entry(apic, pin, e);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -435,8 +474,7 @@ static void ioapic_mask_entry(int apic, int pin)
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static int
-__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
struct irq_pin_list **last, *entry;
@@ -521,6 +559,7 @@ static void io_apic_sync(struct irq_pin_list *entry)
* a dummy read from the IO-APIC
*/
struct io_apic __iomem *io_apic;
+
io_apic = io_apic_base(entry->apic);
readl(&io_apic->data);
}
@@ -2512,21 +2551,73 @@ static void ack_apic_edge(struct irq_data *data)
atomic_t irq_mis_count;
-static void ack_apic_level(struct irq_data *data)
-{
- struct irq_cfg *cfg = data->chip_data;
- int i, do_unmask_irq = 0, irq = data->irq;
- unsigned long v;
-
- irq_complete_move(cfg);
#ifdef CONFIG_GENERIC_PENDING_IRQ
+static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+{
/* If we are moving the irq we need to mask it */
if (unlikely(irqd_is_setaffinity_pending(data))) {
- do_unmask_irq = 1;
mask_ioapic(cfg);
+ return true;
}
+ return false;
+}
+
+static inline void ioapic_irqd_unmask(struct irq_data *data,
+ struct irq_cfg *cfg, bool masked)
+{
+ if (unlikely(masked)) {
+ /* Only migrate the irq if the ack has been received.
+ *
+ * On rare occasions the broadcast level triggered ack gets
+ * delayed going to ioapics, and if we reprogram the
+ * vector while Remote IRR is still set the irq will never
+ * fire again.
+ *
+ * To prevent this scenario we read the Remote IRR bit
+ * of the ioapic. This has two effects.
+ * - On any sane system the read of the ioapic will
+ * flush writes (and acks) going to the ioapic from
+ * this cpu.
+ * - We get to see if the ACK has actually been delivered.
+ *
+ * Based on failed experiments of reprogramming the
+ * ioapic entry from outside of irq context starting
+ * with masking the ioapic entry and then polling until
+ * Remote IRR was clear before reprogramming the
+ * ioapic I don't trust the Remote IRR bit to be
+ * completey accurate.
+ *
+ * However there appears to be no other way to plug
+ * this race, so if the Remote IRR bit is not
+ * accurate and is causing problems then it is a hardware bug
+ * and you can go talk to the chipset vendor about it.
+ */
+ if (!io_apic_level_ack_pending(cfg))
+ irq_move_masked_irq(data);
+ unmask_ioapic(cfg);
+ }
+}
+#else
+static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+{
+ return false;
+}
+static inline void ioapic_irqd_unmask(struct irq_data *data,
+ struct irq_cfg *cfg, bool masked)
+{
+}
#endif
+static void ack_apic_level(struct irq_data *data)
+{
+ struct irq_cfg *cfg = data->chip_data;
+ int i, irq = data->irq;
+ unsigned long v;
+ bool masked;
+
+ irq_complete_move(cfg);
+ masked = ioapic_irqd_mask(data, cfg);
+
/*
* It appears there is an erratum which affects at least version 0x11
* of I/O APIC (that's the 82093AA and cores integrated into various
@@ -2581,38 +2672,7 @@ static void ack_apic_level(struct irq_data *data)
eoi_ioapic_irq(irq, cfg);
}
- /* Now we can move and renable the irq */
- if (unlikely(do_unmask_irq)) {
- /* Only migrate the irq if the ack has been received.
- *
- * On rare occasions the broadcast level triggered ack gets
- * delayed going to ioapics, and if we reprogram the
- * vector while Remote IRR is still set the irq will never
- * fire again.
- *
- * To prevent this scenario we read the Remote IRR bit
- * of the ioapic. This has two effects.
- * - On any sane system the read of the ioapic will
- * flush writes (and acks) going to the ioapic from
- * this cpu.
- * - We get to see if the ACK has actually been delivered.
- *
- * Based on failed experiments of reprogramming the
- * ioapic entry from outside of irq context starting
- * with masking the ioapic entry and then polling until
- * Remote IRR was clear before reprogramming the
- * ioapic I don't trust the Remote IRR bit to be
- * completey accurate.
- *
- * However there appears to be no other way to plug
- * this race, so if the Remote IRR bit is not
- * accurate and is causing problems then it is a hardware bug
- * and you can go talk to the chipset vendor about it.
- */
- if (!io_apic_level_ack_pending(cfg))
- irq_move_masked_irq(data);
- unmask_ioapic(cfg);
- }
+ ioapic_irqd_unmask(data, cfg, masked);
}
#ifdef CONFIG_IRQ_REMAP
@@ -3873,6 +3933,11 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
void __init ioapic_and_gsi_init(void)
{
+ io_apic_ops.init();
+}
+
+static void __init __ioapic_init_mappings(void)
+{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
struct resource *ioapic_res;
int i;
@@ -3967,18 +4032,36 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi)
static __init int bad_ioapic(unsigned long address)
{
if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
- "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+ pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+ MAX_IO_APICS, nr_ioapics);
return 1;
}
if (!address) {
- printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
- " found in table, skipping!\n");
+ pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
return 1;
}
return 0;
}
+static __init int bad_ioapic_register(int idx)
+{
+ union IO_APIC_reg_00 reg_00;
+ union IO_APIC_reg_01 reg_01;
+ union IO_APIC_reg_02 reg_02;
+
+ reg_00.raw = io_apic_read(idx, 0);
+ reg_01.raw = io_apic_read(idx, 1);
+ reg_02.raw = io_apic_read(idx, 2);
+
+ if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
+ pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
+ mpc_ioapic_addr(idx));
+ return 1;
+ }
+
+ return 0;
+}
+
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
{
int idx = 0;
@@ -3995,6 +4078,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
ioapics[idx].mp_config.apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+
+ if (bad_ioapic_register(idx)) {
+ clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+ return;
+ }
+
ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
@@ -4015,10 +4104,10 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
if (gsi_cfg->gsi_end >= gsi_top)
gsi_top = gsi_cfg->gsi_end + 1;
- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
- "GSI %d-%d\n", idx, mpc_ioapic_id(idx),
- mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
- gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+ pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
+ idx, mpc_ioapic_id(idx),
+ mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
+ gsi_cfg->gsi_base, gsi_cfg->gsi_end);
nr_ioapics++;
}
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index c4a61ca..00d2422 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -478,6 +478,7 @@ static struct apic __refdata apic_numaq = {
.name = "NUMAQ",
.probe = probe_numaq,
.acpi_madt_oem_check = NULL,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = numaq_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 0787bb3..ff2c1b9 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -92,6 +92,7 @@ static struct apic apic_default = {
.name = "default",
.probe = probe_default,
.acpi_madt_oem_check = NULL,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = default_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 1911442..fea000b 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -496,6 +496,7 @@ static struct apic apic_summit = {
.name = "summit",
.probe = probe_summit,
.acpi_madt_oem_check = summit_acpi_madt_oem_check,
+ .apic_id_valid = default_apic_id_valid,
.apic_id_registered = summit_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 5007958..48f3103 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -213,6 +213,7 @@ static struct apic apic_x2apic_cluster = {
.name = "cluster x2apic",
.probe = x2apic_cluster_probe,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .apic_id_valid = x2apic_apic_id_valid,
.apic_id_registered = x2apic_apic_id_registered,
.irq_delivery_mode = dest_LowestPrio,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f5373df..8a778db 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -119,6 +119,7 @@ static struct apic apic_x2apic_phys = {
.name = "physical x2apic",
.probe = x2apic_phys_probe,
.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+ .apic_id_valid = x2apic_apic_id_valid,
.apic_id_registered = x2apic_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 79b05b8..87bfa69 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -266,6 +266,11 @@ static void uv_send_IPI_all(int vector)
uv_send_IPI_mask(cpu_online_mask, vector);
}
+static int uv_apic_id_valid(int apicid)
+{
+ return 1;
+}
+
static int uv_apic_id_registered(void)
{
return 1;
@@ -351,6 +356,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
.name = "UV large system",
.probe = uv_probe,
.acpi_madt_oem_check = uv_acpi_madt_oem_check,
+ .apic_id_valid = uv_apic_id_valid,
.apic_id_registered = uv_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f76623c..459e78c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -231,7 +231,6 @@
#include <linux/syscore_ops.h>
#include <linux/i8253.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/olpc.h>
@@ -1234,8 +1233,7 @@ static int suspend(int vetoable)
struct apm_user *as;
dpm_suspend_start(PMSG_SUSPEND);
-
- dpm_suspend_noirq(PMSG_SUSPEND);
+ dpm_suspend_end(PMSG_SUSPEND);
local_irq_disable();
syscore_suspend();
@@ -1259,9 +1257,9 @@ static int suspend(int vetoable)
syscore_resume();
local_irq_enable();
- dpm_resume_noirq(PMSG_RESUME);
-
+ dpm_resume_start(PMSG_RESUME);
dpm_resume_end(PMSG_RESUME);
+
queue_event(APM_NORMAL_RESUME, NULL);
spin_lock(&user_list_lock);
for (as = user_list; as != NULL; as = as->next) {
@@ -1277,7 +1275,7 @@ static void standby(void)
{
int err;
- dpm_suspend_noirq(PMSG_SUSPEND);
+ dpm_suspend_end(PMSG_SUSPEND);
local_irq_disable();
syscore_suspend();
@@ -1291,7 +1289,7 @@ static void standby(void)
syscore_resume();
local_irq_enable();
- dpm_resume_noirq(PMSG_RESUME);
+ dpm_resume_start(PMSG_RESUME);
}
static apm_event_t get_event(void)
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 25f24dc..6ab6aa2 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -16,6 +16,7 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o sched.o mshyperv.o
obj-y += rdrand.o
+obj-y += match.o
obj-$(CONFIG_X86_32) += bugs.o
obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c0f7d68..67e2583 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -18,6 +18,7 @@
#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
+#include <asm/debugreg.h>
#include <asm/sections.h>
#include <linux/topology.h>
#include <linux/cpumask.h>
@@ -28,6 +29,7 @@
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/mtrr.h>
#include <linux/numa.h>
#include <asm/asm.h>
@@ -933,7 +935,7 @@ static const struct msr_range msr_range_array[] __cpuinitconst = {
{ 0xc0011000, 0xc001103b},
};
-static void __cpuinit print_cpu_msr(void)
+static void __cpuinit __print_cpu_msr(void)
{
unsigned index_min, index_max;
unsigned index;
@@ -997,13 +999,13 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
else
printk(KERN_CONT "\n");
-#ifdef CONFIG_SMP
+ print_cpu_msr(c);
+}
+
+void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c)
+{
if (c->cpu_index < show_msr)
- print_cpu_msr();
-#else
- if (show_msr)
- print_cpu_msr();
-#endif
+ __print_cpu_msr();
}
static __init int setup_disablecpuid(char *arg)
@@ -1045,7 +1047,6 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) =
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
-EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
/*
* Special IST stacks which the CPU switches to when it calls
@@ -1115,7 +1116,6 @@ void debug_stack_reset(void)
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
-EXPORT_PER_CPU_SYMBOL(fpu_owner_task);
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
new file mode 100644
index 0000000..5502b28
--- /dev/null
+++ b/arch/x86/kernel/cpu/match.c
@@ -0,0 +1,91 @@
+#include <asm/cpu_device_id.h>
+#include <asm/processor.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+/**
+ * x86_match_cpu - match current CPU again an array of x86_cpu_ids
+ * @match: Pointer to array of x86_cpu_ids. Last entry terminated with
+ * {}.
+ *
+ * Return the entry if the current CPU matches the entries in the
+ * passed x86_cpu_id match table. Otherwise NULL. The match table
+ * contains vendor (X86_VENDOR_*), family, model and feature bits or
+ * respective wildcard entries.
+ *
+ * A typical table entry would be to match a specific CPU
+ * { X86_VENDOR_INTEL, 6, 0x12 }
+ * or to match a specific CPU feature
+ * { X86_FEATURE_MATCH(X86_FEATURE_FOOBAR) }
+ *
+ * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
+ * %X86_MODEL_ANY, %X86_FEATURE_ANY or 0 (except for vendor)
+ *
+ * Arrays used to match for this should also be declared using
+ * MODULE_DEVICE_TABLE(x86_cpu, ...)
+ *
+ * This always matches against the boot cpu, assuming models and features are
+ * consistent over all CPUs.
+ */
+const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match)
+{
+ const struct x86_cpu_id *m;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ for (m = match; m->vendor | m->family | m->model | m->feature; m++) {
+ if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor)
+ continue;
+ if (m->family != X86_FAMILY_ANY && c->x86 != m->family)
+ continue;
+ if (m->model != X86_MODEL_ANY && c->x86_model != m->model)
+ continue;
+ if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature))
+ continue;
+ return m;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(x86_match_cpu);
+
+ssize_t arch_print_cpu_modalias(struct device *dev,
+ struct device_attribute *attr,
+ char *bufptr)
+{
+ int size = PAGE_SIZE;
+ int i, n;
+ char *buf = bufptr;
+
+ n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:"
+ "model:%04X:feature:",
+ boot_cpu_data.x86_vendor,
+ boot_cpu_data.x86,
+ boot_cpu_data.x86_model);
+ size -= n;
+ buf += n;
+ size -= 1;
+ for (i = 0; i < NCAPINTS*32; i++) {
+ if (boot_cpu_has(i)) {
+ n = snprintf(buf, size, ",%04X", i);
+ if (n >= size) {
+ WARN(1, "x86 features overflow page\n");
+ break;
+ }
+ size -= n;
+ buf += n;
+ }
+ }
+ *buf++ = '\n';
+ return buf - bufptr;
+}
+
+int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (buf) {
+ arch_print_cpu_modalias(NULL, NULL, buf);
+ add_uevent_var(env, "MODALIAS=%s", buf);
+ kfree(buf);
+ }
+ return 0;
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 7395d5f..0c82091 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -54,7 +54,14 @@ static struct severity {
#define MASK(x, y) .mask = x, .result = y
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
#define MCACOD 0xffff
+/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
+#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
+#define MCACOD_SCRUBMSK 0xfff0
+#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
+#define MCACOD_DATA 0x0134 /* Data Load */
+#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
MCESEV(
NO, "Invalid",
@@ -102,11 +109,24 @@ static struct severity {
SER, BITCLR(MCI_STATUS_S)
),
- /* AR add known MCACODs here */
MCESEV(
PANIC, "Action required with lost events",
SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
),
+
+ /* known AR MCACODs: */
+#ifdef CONFIG_MEMORY_FAILURE
+ MCESEV(
+ KEEP, "HT thread notices Action required: data load error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ MCGMASK(MCG_STATUS_EIPV, 0)
+ ),
+ MCESEV(
+ AR, "Action required: data load error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+ USER
+ ),
+#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
@@ -115,11 +135,11 @@ static struct severity {
/* known AO MCACODs: */
MCESEV(
AO, "Action optional: memory scrubbing error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|0xfff0, MCI_UC_S|0x00c0)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD_SCRUBMSK, MCI_UC_S|MCACOD_SCRUB)
),
MCESEV(
AO, "Action optional: last level cache writeback error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|0x017a)
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_S|MCACOD_L3WB)
),
MCESEV(
SOME, "Action optional: unknown MCACOD",
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5a11ae2..d086a09 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -191,7 +191,7 @@ static void drain_mcelog_buffer(void)
{
unsigned int next, i, prev = 0;
- next = rcu_dereference_check_mce(mcelog.next);
+ next = ACCESS_ONCE(mcelog.next);
do {
struct mce *m;
@@ -540,6 +540,27 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&__get_cpu_var(mce_irq_work));
}
+/*
+ * Read ADDR and MISC registers.
+ */
+static void mce_read_aux(struct mce *m, int i)
+{
+ if (m->status & MCI_STATUS_MISCV)
+ m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+ if (m->status & MCI_STATUS_ADDRV) {
+ m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+
+ /*
+ * Mask the reported address by the reported granularity.
+ */
+ if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
+ u8 shift = MCI_MISC_ADDR_LSB(m->misc);
+ m->addr >>= shift;
+ m->addr <<= shift;
+ }
+ }
+}
+
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
@@ -590,10 +611,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
- if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
- if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ mce_read_aux(&m, i);
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
@@ -917,6 +935,49 @@ static void mce_clear_state(unsigned long *toclear)
}
/*
+ * Need to save faulting physical address associated with a process
+ * in the machine check handler some place where we can grab it back
+ * later in mce_notify_process()
+ */
+#define MCE_INFO_MAX 16
+
+struct mce_info {
+ atomic_t inuse;
+ struct task_struct *t;
+ __u64 paddr;
+} mce_info[MCE_INFO_MAX];
+
+static void mce_save_info(__u64 addr)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
+ if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
+ mi->t = current;
+ mi->paddr = addr;
+ return;
+ }
+ }
+
+ mce_panic("Too many concurrent recoverable errors", NULL, NULL);
+}
+
+static struct mce_info *mce_find_info(void)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
+ if (atomic_read(&mi->inuse) && mi->t == current)
+ return mi;
+ return NULL;
+}
+
+static void mce_clear_info(struct mce_info *mi)
+{
+ atomic_set(&mi->inuse, 0);
+}
+
+/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
*
@@ -969,7 +1030,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
barrier();
/*
- * When no restart IP must always kill or panic.
+ * When no restart IP might need to kill or panic.
+ * Assume the worst for now, but if we find the
+ * severity is MCE_AR_SEVERITY we have other options.
*/
if (!(m.mcgstatus & MCG_STATUS_RIPV))
kill_it = 1;
@@ -1023,16 +1086,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
continue;
}
- /*
- * Kill on action required.
- */
- if (severity == MCE_AR_SEVERITY)
- kill_it = 1;
-
- if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
- if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ mce_read_aux(&m, i);
/*
* Action optional error. Queue address for later processing.
@@ -1052,6 +1106,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
}
}
+ /* mce_clear_state will clear *final, save locally for use later */
+ m = *final;
+
if (!no_way_out)
mce_clear_state(toclear);
@@ -1063,27 +1120,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
no_way_out = worst >= MCE_PANIC_SEVERITY;
/*
- * If we have decided that we just CAN'T continue, and the user
- * has not set tolerant to an insane level, give up and die.
- *
- * This is mainly used in the case when the system doesn't
- * support MCE broadcasting or it has been disabled.
- */
- if (no_way_out && tolerant < 3)
- mce_panic("Fatal machine check on current CPU", final, msg);
-
- /*
- * If the error seems to be unrecoverable, something should be
- * done. Try to kill as little as possible. If we can kill just
- * one task, do that. If the user has set the tolerance very
- * high, don't try to do anything at all.
+ * At insane "tolerant" levels we take no action. Otherwise
+ * we only die if we have no other choice. For less serious
+ * issues we try to recover, or limit damage to the current
+ * process.
*/
-
- if (kill_it && tolerant < 3)
- force_sig(SIGBUS, current);
-
- /* notify userspace ASAP */
- set_thread_flag(TIF_MCE_NOTIFY);
+ if (tolerant < 3) {
+ if (no_way_out)
+ mce_panic("Fatal machine check on current CPU", &m, msg);
+ if (worst == MCE_AR_SEVERITY) {
+ /* schedule action before return to userland */
+ mce_save_info(m.addr);
+ set_thread_flag(TIF_MCE_NOTIFY);
+ } else if (kill_it) {
+ force_sig(SIGBUS, current);
+ }
+ }
if (worst > 0)
mce_report_event(regs);
@@ -1094,34 +1146,57 @@ out:
}
EXPORT_SYMBOL_GPL(do_machine_check);
-/* dummy to break dependency. actual code is in mm/memory-failure.c */
-void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
+#ifndef CONFIG_MEMORY_FAILURE
+int memory_failure(unsigned long pfn, int vector, int flags)
{
- printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
+ /* mce_severity() should not hand us an ACTION_REQUIRED error */
+ BUG_ON(flags & MF_ACTION_REQUIRED);
+ printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
+ "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
+
+ return 0;
}
+#endif
/*
- * Called after mce notification in process context. This code
- * is allowed to sleep. Call the high level VM handler to process
- * any corrupted pages.
- * Assume that the work queue code only calls this one at a time
- * per CPU.
- * Note we don't disable preemption, so this code might run on the wrong
- * CPU. In this case the event is picked up by the scheduled work queue.
- * This is merely a fast path to expedite processing in some common
- * cases.
+ * Called in process context that interrupted by MCE and marked with
+ * TIF_MCE_NOTIFY, just before returning to erroneous userland.
+ * This code is allowed to sleep.
+ * Attempt possible recovery such as calling the high level VM handler to
+ * process any corrupted pages, and kill/signal current process if required.
+ * Action required errors are handled here.
*/
void mce_notify_process(void)
{
unsigned long pfn;
- mce_notify_irq();
- while (mce_ring_get(&pfn))
- memory_failure(pfn, MCE_VECTOR);
+ struct mce_info *mi = mce_find_info();
+
+ if (!mi)
+ mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
+ pfn = mi->paddr >> PAGE_SHIFT;
+
+ clear_thread_flag(TIF_MCE_NOTIFY);
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx",
+ mi->paddr);
+ if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
+ pr_err("Memory error not recovered");
+ force_sig(SIGBUS, current);
+ }
+ mce_clear_info(mi);
}
+/*
+ * Action optional processing happens here (picking up
+ * from the list of faulting pages that do_machine_check()
+ * placed into the "ring").
+ */
static void mce_process_work(struct work_struct *dummy)
{
- mce_notify_process();
+ unsigned long pfn;
+
+ while (mce_ring_get(&pfn))
+ memory_failure(pfn, MCE_VECTOR, 0);
}
#ifdef CONFIG_X86_MCE_INTEL
@@ -1211,8 +1286,6 @@ int mce_notify_irq(void)
/* Not more than two messages every minute */
static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
- clear_thread_flag(TIF_MCE_NOTIFY);
-
if (test_and_clear_bit(0, &mce_need_notify)) {
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
@@ -1541,6 +1614,12 @@ static int __mce_read_apei(char __user **ubuf, size_t usize)
/* Error or no more MCE record */
if (rc <= 0) {
mce_apei_read_done = 1;
+ /*
+ * When ERST is disabled, mce_chrdev_read() should return
+ * "no record" instead of "no device."
+ */
+ if (rc == -ENODEV)
+ return 0;
return rc;
}
rc = -EFAULT;
@@ -1859,7 +1938,7 @@ static struct bus_type mce_subsys = {
.dev_name = "machinecheck",
};
-struct device *mce_device[CONFIG_NR_CPUS];
+DEFINE_PER_CPU(struct device *, mce_device);
__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
@@ -2038,7 +2117,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
goto error2;
}
cpumask_set_cpu(cpu, mce_device_initialized);
- mce_device[cpu] = dev;
+ per_cpu(mce_device, cpu) = dev;
return 0;
error2:
@@ -2055,7 +2134,7 @@ error:
static __cpuinit void mce_device_remove(unsigned int cpu)
{
- struct device *dev = mce_device[cpu];
+ struct device *dev = per_cpu(mce_device, cpu);
int i;
if (!cpumask_test_cpu(cpu, mce_device_initialized))
@@ -2069,7 +2148,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
device_unregister(dev);
cpumask_clear_cpu(cpu, mce_device_initialized);
- mce_device[cpu] = NULL;
+ per_cpu(mce_device, cpu) = NULL;
}
/* Make sure there are no machine checks on offlined CPUs. */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index e4eeaaf..99b5717 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -523,7 +523,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
int i, err = 0;
struct threshold_bank *b = NULL;
- struct device *dev = mce_device[cpu];
+ struct device *dev = per_cpu(mce_device, cpu);
char name[32];
sprintf(name, "threshold_bank%i", bank);
@@ -587,7 +587,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
if (i == cpu)
continue;
- dev = mce_device[i];
+ dev = per_cpu(mce_device, i);
if (dev)
err = sysfs_create_link(&dev->kobj,b->kobj, name);
if (err)
@@ -667,7 +667,8 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
#ifdef CONFIG_SMP
/* sibling symlink */
if (shared_bank[bank] && b->blocks->cpu != cpu) {
- sysfs_remove_link(&mce_device[cpu]->kobj, name);
+ dev = per_cpu(mce_device, cpu);
+ sysfs_remove_link(&dev->kobj, name);
per_cpu(threshold_banks, cpu)[bank] = NULL;
return;
@@ -679,7 +680,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
if (i == cpu)
continue;
- dev = mce_device[i];
+ dev = per_cpu(mce_device, i);
if (dev)
sysfs_remove_link(&dev->kobj, name);
per_cpu(threshold_banks, i)[bank] = NULL;
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 5c0e653..2d5454c 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -9,7 +9,6 @@
#include <linux/smp.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 67bb17a..47a1870 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -25,7 +25,6 @@
#include <linux/cpu.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 54060f5..2d7998f 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -8,7 +8,6 @@
#include <linux/init.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 97b2635..75772ae 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -12,7 +12,6 @@
#include <asm/processor-flags.h>
#include <asm/cpufeature.h>
#include <asm/tlbflush.h>
-#include <asm/system.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
#include <asm/pat.h>
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1c52bdb..bb8e034 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -352,6 +352,36 @@ int x86_setup_perfctr(struct perf_event *event)
return 0;
}
+/*
+ * check that branch_sample_type is compatible with
+ * settings needed for precise_ip > 1 which implies
+ * using the LBR to capture ALL taken branches at the
+ * priv levels of the measurement
+ */
+static inline int precise_br_compat(struct perf_event *event)
+{
+ u64 m = event->attr.branch_sample_type;
+ u64 b = 0;
+
+ /* must capture all branches */
+ if (!(m & PERF_SAMPLE_BRANCH_ANY))
+ return 0;
+
+ m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_user)
+ b |= PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_kernel)
+ b |= PERF_SAMPLE_BRANCH_KERNEL;
+
+ /*
+ * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
+ */
+
+ return m == b;
+}
+
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
@@ -368,6 +398,36 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
+ /*
+ * check that PEBS LBR correction does not conflict with
+ * whatever the user is asking with attr->branch_sample_type
+ */
+ if (event->attr.precise_ip > 1) {
+ u64 *br_type = &event->attr.branch_sample_type;
+
+ if (has_branch_stack(event)) {
+ if (!precise_br_compat(event))
+ return -EOPNOTSUPP;
+
+ /* branch_sample_type is compatible */
+
+ } else {
+ /*
+ * user did not specify branch_sample_type
+ *
+ * For PEBS fixups, we capture all
+ * the branches at the priv level of the
+ * event.
+ */
+ *br_type = PERF_SAMPLE_BRANCH_ANY;
+
+ if (!event->attr.exclude_user)
+ *br_type |= PERF_SAMPLE_BRANCH_USER;
+
+ if (!event->attr.exclude_kernel)
+ *br_type |= PERF_SAMPLE_BRANCH_KERNEL;
+ }
+ }
}
/*
@@ -425,6 +485,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ /* mark not used */
+ event->hw.extra_reg.idx = EXTRA_REG_NONE;
+ event->hw.branch_reg.idx = EXTRA_REG_NONE;
+
return x86_pmu.hw_config(event);
}
@@ -578,14 +642,14 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
/* Prefer fixed purpose counters */
if (x86_pmu.num_counters_fixed) {
idx = X86_PMC_IDX_FIXED;
- for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+ for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_MAX) {
if (!__test_and_set_bit(idx, sched->state.used))
goto done;
}
}
/* Grab the first unused counter starting with idx */
idx = sched->state.counter;
- for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
+ for_each_set_bit_from(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
if (!__test_and_set_bit(idx, sched->state.used))
goto done;
}
@@ -1249,6 +1313,11 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}
+static struct attribute_group x86_pmu_format_group = {
+ .name = "format",
+ .attrs = NULL,
+};
+
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
@@ -1323,6 +1392,7 @@ static int __init init_hw_perf_events(void)
}
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+ x86_pmu_format_group.attrs = x86_pmu.format_attrs;
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
@@ -1551,6 +1621,9 @@ static int x86_pmu_event_idx(struct perf_event *event)
{
int idx = event->hw.idx;
+ if (!x86_pmu.attr_rdpmc)
+ return 0;
+
if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
idx -= X86_PMC_IDX_FIXED;
idx |= 1 << 30;
@@ -1603,38 +1676,51 @@ static struct attribute_group x86_pmu_attr_group = {
static const struct attribute_group *x86_pmu_attr_groups[] = {
&x86_pmu_attr_group,
+ &x86_pmu_format_group,
NULL,
};
+static void x86_pmu_flush_branch_stack(void)
+{
+ if (x86_pmu.flush_branch_stack)
+ x86_pmu.flush_branch_stack();
+}
+
static struct pmu pmu = {
- .pmu_enable = x86_pmu_enable,
- .pmu_disable = x86_pmu_disable,
+ .pmu_enable = x86_pmu_enable,
+ .pmu_disable = x86_pmu_disable,
.attr_groups = x86_pmu_attr_groups,
.event_init = x86_pmu_event_init,
- .add = x86_pmu_add,
- .del = x86_pmu_del,
- .start = x86_pmu_start,
- .stop = x86_pmu_stop,
- .read = x86_pmu_read,
+ .add = x86_pmu_add,
+ .del = x86_pmu_del,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
+ .read = x86_pmu_read,
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
.event_idx = x86_pmu_event_idx,
+ .flush_branch_stack = x86_pmu_flush_branch_stack,
};
-void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
+ userpg->cap_usr_time = 0;
+ userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->pmc_width = x86_pmu.cntval_bits;
+
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return;
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
return;
+ userpg->cap_usr_time = 1;
userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 82db83b..6638aaf 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -33,6 +33,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
+ EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_MAX /* number of entries needed */
};
@@ -130,6 +131,8 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
+ struct er_account *lbr_sel;
+ u64 br_sel;
/*
* Intel host/guest exclude bits
@@ -268,6 +271,29 @@ struct x86_pmu_quirk {
void (*func)(void);
};
+union x86_pmu_config {
+ struct {
+ u64 event:8,
+ umask:8,
+ usr:1,
+ os:1,
+ edge:1,
+ pc:1,
+ interrupt:1,
+ __reserved1:1,
+ en:1,
+ inv:1,
+ cmask:8,
+ event2:4,
+ __reserved2:4,
+ go:1,
+ ho:1;
+ } bits;
+ u64 value;
+};
+
+#define X86_CONFIG(args...) ((union x86_pmu_config){.bits = {args}}).value
+
/*
* struct x86_pmu - generic x86 pmu
*/
@@ -313,6 +339,7 @@ struct x86_pmu {
* sysfs attrs
*/
int attr_rdpmc;
+ struct attribute **format_attrs;
/*
* CPU Hotplug hooks
@@ -321,6 +348,7 @@ struct x86_pmu {
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
+ void (*flush_branch_stack)(void);
/*
* Intel Arch Perfmon v2+
@@ -342,6 +370,8 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
+ u64 lbr_sel_mask; /* LBR_SELECT valid bits */
+ const int *lbr_sel_map; /* lbr_select mappings */
/*
* Extra registers for events
@@ -455,6 +485,15 @@ extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+ return ip > PAGE_OFFSET;
+#else
+ return (long)ip < 0;
+#endif
+}
+
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
@@ -535,6 +574,10 @@ void intel_pmu_lbr_init_nhm(void);
void intel_pmu_lbr_init_atom(void);
+void intel_pmu_lbr_init_snb(void);
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event);
+
int p4_pmu_init(void);
int p6_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 67250a5..95e7fe1 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -401,6 +404,21 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
+PMU_FORMAT_ATTR(event, "config:0-7,32-35");
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+
+static struct attribute *amd_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
static __initconst const struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = x86_pmu_handle_irq,
@@ -423,6 +441,8 @@ static __initconst const struct x86_pmu amd_pmu = {
.get_event_constraints = amd_get_event_constraints,
.put_event_constraints = amd_put_event_constraints,
+ .format_attrs = amd_format_attr,
+
.cpu_prepare = amd_pmu_cpu_prepare,
.cpu_starting = amd_pmu_cpu_starting,
.cpu_dead = amd_pmu_cpu_dead,
@@ -593,6 +613,7 @@ static __initconst const struct x86_pmu amd_pmu_f15h = {
.cpu_dead = amd_pmu_cpu_dead,
#endif
.cpu_starting = amd_pmu_cpu_starting,
+ .format_attrs = amd_format_attr,
};
__init int amd_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3bd37bd..26b3e2f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -385,14 +385,15 @@ static __initconst const u64 westmere_hw_cache_event_ids
#define NHM_LOCAL_DRAM (1 << 14)
#define NHM_NON_DRAM (1 << 15)
-#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
+#define NHM_LOCAL (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_REMOTE (NHM_REMOTE_DRAM)
#define NHM_DMND_READ (NHM_DMND_DATA_RD)
#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
-#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
+#define NHM_L3_MISS (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
static __initconst const u64 nehalem_hw_cache_extra_regs
@@ -416,16 +417,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
},
[ C(NODE) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_ALL_DRAM,
- [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE_DRAM,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_REMOTE,
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_ALL_DRAM,
- [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE_DRAM,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_REMOTE,
},
[ C(OP_PREFETCH) ] = {
- [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_ALL_DRAM,
- [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE_DRAM,
+ [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
+ [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_REMOTE,
},
},
};
@@ -727,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
+{
+ /* user explicitly requested branch sampling */
+ if (has_branch_stack(event))
+ return true;
+
+ /* implicit branch sampling to correct PEBS skid */
+ if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+ return true;
+
+ return false;
+}
+
static void intel_pmu_disable_all(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -881,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
+ /*
+ * must disable before any actual event
+ * because any event may be combined with LBR
+ */
+ if (intel_pmu_needs_lbr_smpl(event))
+ intel_pmu_lbr_disable(event);
+
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
@@ -935,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
intel_pmu_enable_bts(hwc->config);
return;
}
+ /*
+ * must enabled before any actual event
+ * because any event may be combined with LBR
+ */
+ if (intel_pmu_needs_lbr_smpl(event))
+ intel_pmu_lbr_enable(event);
if (event->attr.exclude_host)
cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1057,6 +1084,9 @@ again:
data.period = event->hw.last_period;
+ if (has_branch_stack(event))
+ data.br_stack = &cpuc->lbr_stack;
+
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
}
@@ -1123,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+ struct perf_event *event,
+ struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
- struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era;
unsigned long flags;
int orig_idx = reg->idx;
/* already allocated shared msr */
if (reg->alloc)
- return &unconstrained;
+ return NULL; /* call x86_get_event_constraint() */
again:
era = &cpuc->shared_regs->regs[reg->idx];
@@ -1156,14 +1186,10 @@ again:
reg->alloc = 1;
/*
- * All events using extra_reg are unconstrained.
- * Avoids calling x86_get_event_constraints()
- *
- * Must revisit if extra_reg controlling events
- * ever have constraints. Worst case we go through
- * the regular event constraint table.
+ * need to call x86_get_event_constraint()
+ * to check if associated event has constraints
*/
- c = &unconstrained;
+ c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
@@ -1200,11 +1226,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- struct event_constraint *c = NULL;
-
- if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
- c = __intel_shared_reg_get_constraints(cpuc, event);
-
+ struct event_constraint *c = NULL, *d;
+ struct hw_perf_event_extra *xreg, *breg;
+
+ xreg = &event->hw.extra_reg;
+ if (xreg->idx != EXTRA_REG_NONE) {
+ c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
+ if (c == &emptyconstraint)
+ return c;
+ }
+ breg = &event->hw.branch_reg;
+ if (breg->idx != EXTRA_REG_NONE) {
+ d = __intel_shared_reg_get_constraints(cpuc, event, breg);
+ if (d == &emptyconstraint) {
+ __intel_shared_reg_put_constraints(cpuc, xreg);
+ c = d;
+ }
+ }
return c;
}
@@ -1252,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
+
+ reg = &event->hw.branch_reg;
+ if (reg->idx != EXTRA_REG_NONE)
+ __intel_shared_reg_put_constraints(cpuc, reg);
}
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
@@ -1287,12 +1329,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
*
* Thereby we gain a PEBS capable cycle counter.
*/
- u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
+
alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
event->hw.config = alt_config;
}
+ if (intel_pmu_needs_lbr_smpl(event)) {
+ ret = intel_pmu_setup_lbr_filter(event);
+ if (ret)
+ return ret;
+ }
+
if (event->attr.type != PERF_TYPE_RAW)
return 0;
@@ -1382,6 +1431,24 @@ static void core_pmu_enable_all(int added)
}
}
+PMU_FORMAT_ATTR(event, "config:0-7" );
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(pc, "config:19" );
+PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+
+static struct attribute *intel_arch_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_pc.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
@@ -1406,6 +1473,7 @@ static __initconst const struct x86_pmu core_pmu = {
.put_event_constraints = intel_put_event_constraints,
.event_constraints = intel_core_event_constraints,
.guest_get_msrs = core_guest_get_msrs,
+ .format_attrs = intel_arch_formats_attr,
};
struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1431,7 +1499,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- if (!x86_pmu.extra_regs)
+ if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK;
cpuc->shared_regs = allocate_shared_regs(cpu);
@@ -1453,22 +1521,28 @@ static void intel_pmu_cpu_starting(int cpu)
*/
intel_pmu_lbr_reset();
- if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
+ cpuc->lbr_sel = NULL;
+
+ if (!cpuc->shared_regs)
return;
- for_each_cpu(i, topology_thread_cpumask(cpu)) {
- struct intel_shared_regs *pc;
+ if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
+ for_each_cpu(i, topology_thread_cpumask(cpu)) {
+ struct intel_shared_regs *pc;
- pc = per_cpu(cpu_hw_events, i).shared_regs;
- if (pc && pc->core_id == core_id) {
- cpuc->kfree_on_online = cpuc->shared_regs;
- cpuc->shared_regs = pc;
- break;
+ pc = per_cpu(cpu_hw_events, i).shared_regs;
+ if (pc && pc->core_id == core_id) {
+ cpuc->kfree_on_online = cpuc->shared_regs;
+ cpuc->shared_regs = pc;
+ break;
+ }
}
+ cpuc->shared_regs->core_id = core_id;
+ cpuc->shared_regs->refcnt++;
}
- cpuc->shared_regs->core_id = core_id;
- cpuc->shared_regs->refcnt++;
+ if (x86_pmu.lbr_sel_map)
+ cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
}
static void intel_pmu_cpu_dying(int cpu)
@@ -1486,6 +1560,33 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}
+static void intel_pmu_flush_branch_stack(void)
+{
+ /*
+ * Intel LBR does not tag entries with the
+ * PID of the current task, then we need to
+ * flush it on ctxsw
+ * For now, we simply reset it
+ */
+ if (x86_pmu.lbr_nr)
+ intel_pmu_lbr_reset();
+}
+
+PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
+
+static struct attribute *intel_arch3_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_pc.attr,
+ &format_attr_any.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+
+ &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
+ NULL,
+};
+
static __initconst const struct x86_pmu intel_pmu = {
.name = "Intel",
.handle_irq = intel_pmu_handle_irq,
@@ -1509,10 +1610,13 @@ static __initconst const struct x86_pmu intel_pmu = {
.get_event_constraints = intel_get_event_constraints,
.put_event_constraints = intel_put_event_constraints,
+ .format_attrs = intel_arch3_formats_attr,
+
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs,
+ .flush_branch_stack = intel_pmu_flush_branch_stack,
};
static __init void intel_clovertown_quirk(void)
@@ -1689,9 +1793,11 @@ __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_nehalem_extra_regs;
/* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
x86_add_quirk(intel_nehalem_quirk);
@@ -1726,9 +1832,11 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_HAS_RSP_1;
/* UOPS_ISSUED.STALLED_CYCLES */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
pr_cont("Westmere events, ");
break;
@@ -1739,7 +1847,7 @@ __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
- intel_pmu_lbr_init_nhm();
+ intel_pmu_lbr_init_snb();
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
@@ -1749,9 +1857,11 @@ __init int intel_pmu_init(void)
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+ X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
+ X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
pr_cont("SandyBridge events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index d6bd49f..7f64df1 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -3,6 +3,7 @@
#include <linux/slab.h>
#include <asm/perf_event.h>
+#include <asm/insn.h>
#include "perf_event.h"
@@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event)
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
cpuc->pebs_enabled |= 1ULL << hwc->idx;
-
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
- intel_pmu_lbr_enable(event);
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
-
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
- intel_pmu_lbr_disable(event);
}
void intel_pmu_pebs_enable_all(void)
@@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void)
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
}
-#include <asm/insn.h>
-
-static inline bool kernel_ip(unsigned long ip)
-{
-#ifdef CONFIG_X86_32
- return ip > PAGE_OFFSET;
-#else
- return (long)ip < 0;
-#endif
-}
-
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* both formats and we don't use the other fields in this
* routine.
*/
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_core *pebs = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
@@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
else
regs.flags &= ~PERF_EFLAGS_EXACT;
+ if (has_branch_stack(event))
+ data.br_stack = &cpuc->lbr_stack;
+
if (perf_event_overflow(event, &data, &regs))
x86_pmu_stop(event, 0);
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 47a7e63..520b426 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -3,6 +3,7 @@
#include <asm/perf_event.h>
#include <asm/msr.h>
+#include <asm/insn.h>
#include "perf_event.h"
@@ -14,6 +15,100 @@ enum {
};
/*
+ * Intel LBR_SELECT bits
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
+ *
+ * Hardware branch filter (not available on all CPUs)
+ */
+#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
+#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
+#define LBR_JCC_BIT 2 /* do not capture conditional branches */
+#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
+#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
+#define LBR_RETURN_BIT 5 /* do not capture near returns */
+#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
+#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
+#define LBR_FAR_BIT 8 /* do not capture far branches */
+
+#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
+#define LBR_USER (1 << LBR_USER_BIT)
+#define LBR_JCC (1 << LBR_JCC_BIT)
+#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
+#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
+#define LBR_RETURN (1 << LBR_RETURN_BIT)
+#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
+#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
+#define LBR_FAR (1 << LBR_FAR_BIT)
+
+#define LBR_PLM (LBR_KERNEL | LBR_USER)
+
+#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
+#define LBR_NOT_SUPP -1 /* LBR filter not supported */
+#define LBR_IGN 0 /* ignored */
+
+#define LBR_ANY \
+ (LBR_JCC |\
+ LBR_REL_CALL |\
+ LBR_IND_CALL |\
+ LBR_RETURN |\
+ LBR_REL_JMP |\
+ LBR_IND_JMP |\
+ LBR_FAR)
+
+#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
+
+#define for_each_branch_sample_type(x) \
+ for ((x) = PERF_SAMPLE_BRANCH_USER; \
+ (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
+
+/*
+ * x86control flow change classification
+ * x86control flow changes include branches, interrupts, traps, faults
+ */
+enum {
+ X86_BR_NONE = 0, /* unknown */
+
+ X86_BR_USER = 1 << 0, /* branch target is user */
+ X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
+
+ X86_BR_CALL = 1 << 2, /* call */
+ X86_BR_RET = 1 << 3, /* return */
+ X86_BR_SYSCALL = 1 << 4, /* syscall */
+ X86_BR_SYSRET = 1 << 5, /* syscall return */
+ X86_BR_INT = 1 << 6, /* sw interrupt */
+ X86_BR_IRET = 1 << 7, /* return from interrupt */
+ X86_BR_JCC = 1 << 8, /* conditional */
+ X86_BR_JMP = 1 << 9, /* jump */
+ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
+ X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+};
+
+#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+
+#define X86_BR_ANY \
+ (X86_BR_CALL |\
+ X86_BR_RET |\
+ X86_BR_SYSCALL |\
+ X86_BR_SYSRET |\
+ X86_BR_INT |\
+ X86_BR_IRET |\
+ X86_BR_JCC |\
+ X86_BR_JMP |\
+ X86_BR_IRQ |\
+ X86_BR_IND_CALL)
+
+#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
+
+#define X86_BR_ANY_CALL \
+ (X86_BR_CALL |\
+ X86_BR_IND_CALL |\
+ X86_BR_SYSCALL |\
+ X86_BR_IRQ |\
+ X86_BR_INT)
+
+static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
+
+/*
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
* otherwise it becomes near impossible to get a reliable stack.
*/
@@ -21,6 +116,10 @@ enum {
static void __intel_pmu_lbr_enable(void)
{
u64 debugctl;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+ if (cpuc->lbr_sel)
+ wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
@@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event)
* Reset the LBR stack if we changed task context to
* avoid data leaks.
*/
-
if (event->ctx->task && cpuc->lbr_context != event->ctx) {
intel_pmu_lbr_reset();
cpuc->lbr_context = event->ctx;
}
+ cpuc->br_sel = event->hw.branch_reg.reg;
cpuc->lbr_users++;
}
@@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event)
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
- if (cpuc->enabled && !cpuc->lbr_users)
+ if (cpuc->enabled && !cpuc->lbr_users) {
__intel_pmu_lbr_disable();
+ /* avoid stale pointer */
+ cpuc->lbr_context = NULL;
+ }
}
void intel_pmu_lbr_enable_all(void)
@@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void)
__intel_pmu_lbr_disable();
}
+/*
+ * TOS = most recently recorded branch
+ */
static inline u64 intel_pmu_lbr_tos(void)
{
u64 tos;
@@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
- cpuc->lbr_entries[i].from = msr_lastbranch.from;
- cpuc->lbr_entries[i].to = msr_lastbranch.to;
- cpuc->lbr_entries[i].flags = 0;
+ cpuc->lbr_entries[i].from = msr_lastbranch.from;
+ cpuc->lbr_entries[i].to = msr_lastbranch.to;
+ cpuc->lbr_entries[i].mispred = 0;
+ cpuc->lbr_entries[i].predicted = 0;
+ cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
-#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
-
/*
* Due to lack of segmentation in Linux the effective address (offset)
* is the same as the linear address, allowing us to merge the LIP and EIP
@@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
- u64 from, to, flags = 0;
+ u64 from, to, mis = 0, pred = 0;
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
- flags = !!(from & LBR_FROM_FLAG_MISPRED);
+ mis = !!(from & LBR_FROM_FLAG_MISPRED);
+ pred = !mis;
from = (u64)((((s64)from) << 1) >> 1);
}
- cpuc->lbr_entries[i].from = from;
- cpuc->lbr_entries[i].to = to;
- cpuc->lbr_entries[i].flags = flags;
+ cpuc->lbr_entries[i].from = from;
+ cpuc->lbr_entries[i].to = to;
+ cpuc->lbr_entries[i].mispred = mis;
+ cpuc->lbr_entries[i].predicted = pred;
+ cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
@@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void)
intel_pmu_lbr_read_32(cpuc);
else
intel_pmu_lbr_read_64(cpuc);
+
+ intel_pmu_lbr_filter(cpuc);
+}
+
+/*
+ * SW filter is used:
+ * - in case there is no HW filter
+ * - in case the HW filter has errata or limitations
+ */
+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+{
+ u64 br_type = event->attr.branch_sample_type;
+ int mask = 0;
+
+ if (br_type & PERF_SAMPLE_BRANCH_USER)
+ mask |= X86_BR_USER;
+
+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
+ mask |= X86_BR_KERNEL;
+
+ /* we ignore BRANCH_HV here */
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY)
+ mask |= X86_BR_ANY;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
+ mask |= X86_BR_ANY_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ mask |= X86_BR_IND_CALL;
+ /*
+ * stash actual user request into reg, it may
+ * be used by fixup code for some CPU
+ */
+ event->hw.branch_reg.reg = mask;
+}
+
+/*
+ * setup the HW LBR filter
+ * Used only when available, may not be enough to disambiguate
+ * all branches, may need the help of the SW filter
+ */
+static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
+{
+ struct hw_perf_event_extra *reg;
+ u64 br_type = event->attr.branch_sample_type;
+ u64 mask = 0, m;
+ u64 v;
+
+ for_each_branch_sample_type(m) {
+ if (!(br_type & m))
+ continue;
+
+ v = x86_pmu.lbr_sel_map[m];
+ if (v == LBR_NOT_SUPP)
+ return -EOPNOTSUPP;
+
+ if (v != LBR_IGN)
+ mask |= v;
+ }
+ reg = &event->hw.branch_reg;
+ reg->idx = EXTRA_REG_LBR;
+
+ /* LBR_SELECT operates in suppress mode so invert mask */
+ reg->config = ~mask & x86_pmu.lbr_sel_mask;
+
+ return 0;
+}
+
+int intel_pmu_setup_lbr_filter(struct perf_event *event)
+{
+ int ret = 0;
+
+ /*
+ * no LBR on this PMU
+ */
+ if (!x86_pmu.lbr_nr)
+ return -EOPNOTSUPP;
+
+ /*
+ * setup SW LBR filter
+ */
+ intel_pmu_setup_sw_lbr_filter(event);
+
+ /*
+ * setup HW LBR filter, if any
+ */
+ if (x86_pmu.lbr_sel_map)
+ ret = intel_pmu_setup_hw_lbr_filter(event);
+
+ return ret;
}
+/*
+ * return the type of control flow change at address "from"
+ * intruction is not necessarily a branch (in case of interrupt).
+ *
+ * The branch type returned also includes the priv level of the
+ * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
+ *
+ * If a branch type is unknown OR the instruction cannot be
+ * decoded (e.g., text page not present), then X86_BR_NONE is
+ * returned.
+ */
+static int branch_type(unsigned long from, unsigned long to)
+{
+ struct insn insn;
+ void *addr;
+ int bytes, size = MAX_INSN_SIZE;
+ int ret = X86_BR_NONE;
+ int ext, to_plm, from_plm;
+ u8 buf[MAX_INSN_SIZE];
+ int is64 = 0;
+
+ to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
+ from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
+
+ /*
+ * maybe zero if lbr did not fill up after a reset by the time
+ * we get a PMU interrupt
+ */
+ if (from == 0 || to == 0)
+ return X86_BR_NONE;
+
+ if (from_plm == X86_BR_USER) {
+ /*
+ * can happen if measuring at the user level only
+ * and we interrupt in a kernel thread, e.g., idle.
+ */
+ if (!current->mm)
+ return X86_BR_NONE;
+
+ /* may fail if text not present */
+ bytes = copy_from_user_nmi(buf, (void __user *)from, size);
+ if (bytes != size)
+ return X86_BR_NONE;
+
+ addr = buf;
+ } else
+ addr = (void *)from;
+
+ /*
+ * decoder needs to know the ABI especially
+ * on 64-bit systems running 32-bit apps
+ */
+#ifdef CONFIG_X86_64
+ is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
+#endif
+ insn_init(&insn, addr, is64);
+ insn_get_opcode(&insn);
+
+ switch (insn.opcode.bytes[0]) {
+ case 0xf:
+ switch (insn.opcode.bytes[1]) {
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ ret = X86_BR_SYSCALL;
+ break;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ ret = X86_BR_SYSRET;
+ break;
+ case 0x80 ... 0x8f: /* conditional */
+ ret = X86_BR_JCC;
+ break;
+ default:
+ ret = X86_BR_NONE;
+ }
+ break;
+ case 0x70 ... 0x7f: /* conditional */
+ ret = X86_BR_JCC;
+ break;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ ret = X86_BR_RET;
+ break;
+ case 0xcf: /* iret */
+ ret = X86_BR_IRET;
+ break;
+ case 0xcc ... 0xce: /* int */
+ ret = X86_BR_INT;
+ break;
+ case 0xe8: /* call near rel */
+ case 0x9a: /* call far absolute */
+ ret = X86_BR_CALL;
+ break;
+ case 0xe0 ... 0xe3: /* loop jmp */
+ ret = X86_BR_JCC;
+ break;
+ case 0xe9 ... 0xeb: /* jmp */
+ ret = X86_BR_JMP;
+ break;
+ case 0xff: /* call near absolute, call far absolute ind */
+ insn_get_modrm(&insn);
+ ext = (insn.modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ ret = X86_BR_IND_CALL;
+ break;
+ case 4:
+ case 5:
+ ret = X86_BR_JMP;
+ break;
+ }
+ break;
+ default:
+ ret = X86_BR_NONE;
+ }
+ /*
+ * interrupts, traps, faults (and thus ring transition) may
+ * occur on any instructions. Thus, to classify them correctly,
+ * we need to first look at the from and to priv levels. If they
+ * are different and to is in the kernel, then it indicates
+ * a ring transition. If the from instruction is not a ring
+ * transition instr (syscall, systenter, int), then it means
+ * it was a irq, trap or fault.
+ *
+ * we have no way of detecting kernel to kernel faults.
+ */
+ if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
+ && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
+ ret = X86_BR_IRQ;
+
+ /*
+ * branch priv level determined by target as
+ * is done by HW when LBR_SELECT is implemented
+ */
+ if (ret != X86_BR_NONE)
+ ret |= to_plm;
+
+ return ret;
+}
+
+/*
+ * implement actual branch filter based on user demand.
+ * Hardware may not exactly satisfy that request, thus
+ * we need to inspect opcodes. Mismatched branches are
+ * discarded. Therefore, the number of branches returned
+ * in PERF_SAMPLE_BRANCH_STACK sample may vary.
+ */
+static void
+intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
+{
+ u64 from, to;
+ int br_sel = cpuc->br_sel;
+ int i, j, type;
+ bool compress = false;
+
+ /* if sampling all branches, then nothing to filter */
+ if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+ return;
+
+ for (i = 0; i < cpuc->lbr_stack.nr; i++) {
+
+ from = cpuc->lbr_entries[i].from;
+ to = cpuc->lbr_entries[i].to;
+
+ type = branch_type(from, to);
+
+ /* if type does not correspond, then discard */
+ if (type == X86_BR_NONE || (br_sel & type) != type) {
+ cpuc->lbr_entries[i].from = 0;
+ compress = true;
+ }
+ }
+
+ if (!compress)
+ return;
+
+ /* remove all entries with from=0 */
+ for (i = 0; i < cpuc->lbr_stack.nr; ) {
+ if (!cpuc->lbr_entries[i].from) {
+ j = i;
+ while (++j < cpuc->lbr_stack.nr)
+ cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
+ cpuc->lbr_stack.nr--;
+ if (!cpuc->lbr_entries[i].from)
+ continue;
+ }
+ i++;
+ }
+}
+
+/*
+ * Map interface branch filters onto LBR filters
+ */
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
+ | LBR_IND_JMP | LBR_FAR,
+ /*
+ * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
+ */
+ [PERF_SAMPLE_BRANCH_ANY_CALL] =
+ LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
+ /*
+ * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
+ */
+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+};
+
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
+ | LBR_FAR,
+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
+};
+
+/* core */
void intel_pmu_lbr_init_core(void)
{
x86_pmu.lbr_nr = 4;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x40;
- x86_pmu.lbr_to = 0x60;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("4-deep LBR, ");
}
+/* nehalem/westmere */
void intel_pmu_lbr_init_nhm(void)
{
x86_pmu.lbr_nr = 16;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x680;
- x86_pmu.lbr_to = 0x6c0;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - workaround LBR_SEL errata (see above)
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("16-deep LBR, ");
+}
+
+/* sandy bridge */
+void intel_pmu_lbr_init_snb(void)
+{
+ x86_pmu.lbr_nr = 16;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
+
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ /*
+ * SW branch filter usage:
+ * - support syscall, sysret capture.
+ * That requires LBR_FAR but that means far
+ * jmp need to be filtered out
+ */
+ pr_cont("16-deep LBR, ");
}
+/* atom */
void intel_pmu_lbr_init_atom(void)
{
+ /*
+ * only models starting at stepping 10 seems
+ * to have an operational LBR which can freeze
+ * on PMU interrupt
+ */
+ if (boot_cpu_data.x86_mask < 10) {
+ pr_cont("LBR disabled due to erratum");
+ return;
+ }
+
x86_pmu.lbr_nr = 8;
- x86_pmu.lbr_tos = 0x01c9;
- x86_pmu.lbr_from = 0x40;
- x86_pmu.lbr_to = 0x60;
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
+ x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
+ x86_pmu.lbr_to = MSR_LBR_CORE_TO;
+
+ /*
+ * SW branch filter usage:
+ * - compensate for lack of HW filter
+ */
+ pr_cont("8-deep LBR, ");
}
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ef484d9..a2dfacf 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -1271,6 +1271,17 @@ done:
return num ? -EINVAL : 0;
}
+PMU_FORMAT_ATTR(cccr, "config:0-31" );
+PMU_FORMAT_ATTR(escr, "config:32-62");
+PMU_FORMAT_ATTR(ht, "config:63" );
+
+static struct attribute *intel_p4_formats_attr[] = {
+ &format_attr_cccr.attr,
+ &format_attr_escr.attr,
+ &format_attr_ht.attr,
+ NULL,
+};
+
static __initconst const struct x86_pmu p4_pmu = {
.name = "Netburst P4/Xeon",
.handle_irq = p4_pmu_handle_irq,
@@ -1305,6 +1316,8 @@ static __initconst const struct x86_pmu p4_pmu = {
* the former idea is taken from OProfile code
*/
.perfctr_second_write = 1,
+
+ .format_attrs = intel_p4_formats_attr,
};
__init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index c7181be..32bcfc7 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -87,6 +87,23 @@ static void p6_pmu_enable_event(struct perf_event *event)
(void)checking_wrmsrl(hwc->config_base, val);
}
+PMU_FORMAT_ATTR(event, "config:0-7" );
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(pc, "config:19" );
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+
+static struct attribute *intel_p6_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_pc.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
static __initconst const struct x86_pmu p6_pmu = {
.name = "p6",
.handle_irq = x86_pmu_handle_irq,
@@ -115,6 +132,8 @@ static __initconst const struct x86_pmu p6_pmu = {
.cntval_mask = (1ULL << 32) - 1,
.get_event_constraints = x86_get_event_constraints,
.event_constraints = p6_event_constraints,
+
+ .format_attrs = intel_p6_formats_attr,
};
__init int p6_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index c7f64e6..addf9e8 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
+ { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
{ X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index a524353..39472dd 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -43,7 +43,6 @@
#include <asm/processor.h>
#include <asm/msr.h>
-#include <asm/system.h>
static struct class *cpuid_class;
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 642f75a..11891ca 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -62,16 +62,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!userbuf) {
memcpy(buf, (vaddr + offset), csize);
- kunmap_atomic(vaddr, KM_PTE0);
+ kunmap_atomic(vaddr);
} else {
if (!kdump_buf_page) {
printk(KERN_WARNING "Kdump: Kdump buffer page not"
" allocated\n");
- kunmap_atomic(vaddr, KM_PTE0);
+ kunmap_atomic(vaddr);
return -EFAULT;
}
copy_page(kdump_buf_page, vaddr);
- kunmap_atomic(vaddr, KM_PTE0);
+ kunmap_atomic(vaddr);
if (copy_to_user(buf, (kdump_buf_page + offset), csize))
return -EFAULT;
}
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 5282179..3ae2ced 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -4,6 +4,7 @@
#include <linux/bootmem.h>
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/irqdomain.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/of.h>
@@ -17,64 +18,14 @@
#include <linux/initrd.h>
#include <asm/hpet.h>
-#include <asm/irq_controller.h>
#include <asm/apic.h>
#include <asm/pci_x86.h>
__initdata u64 initial_dtb;
char __initdata cmd_line[COMMAND_LINE_SIZE];
-static LIST_HEAD(irq_domains);
-static DEFINE_RAW_SPINLOCK(big_irq_lock);
int __initdata of_ioapic;
-#ifdef CONFIG_X86_IO_APIC
-static void add_interrupt_host(struct irq_domain *ih)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&big_irq_lock, flags);
- list_add(&ih->l, &irq_domains);
- raw_spin_unlock_irqrestore(&big_irq_lock, flags);
-}
-#endif
-
-static struct irq_domain *get_ih_from_node(struct device_node *controller)
-{
- struct irq_domain *ih, *found = NULL;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&big_irq_lock, flags);
- list_for_each_entry(ih, &irq_domains, l) {
- if (ih->controller == controller) {
- found = ih;
- break;
- }
- }
- raw_spin_unlock_irqrestore(&big_irq_lock, flags);
- return found;
-}
-
-unsigned int irq_create_of_mapping(struct device_node *controller,
- const u32 *intspec, unsigned int intsize)
-{
- struct irq_domain *ih;
- u32 virq, type;
- int ret;
-
- ih = get_ih_from_node(controller);
- if (!ih)
- return 0;
- ret = ih->xlate(ih, intspec, intsize, &virq, &type);
- if (ret)
- return 0;
- if (type == IRQ_TYPE_NONE)
- return virq;
- irq_set_irq_type(virq, type);
- return virq;
-}
-EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-
unsigned long pci_address_to_pio(phys_addr_t address)
{
/*
@@ -354,36 +305,43 @@ static struct of_ioapic_type of_ioapic_type[] =
},
};
-static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize,
- u32 *out_hwirq, u32 *out_type)
+static int ioapic_xlate(struct irq_domain *domain,
+ struct device_node *controller,
+ const u32 *intspec, u32 intsize,
+ irq_hw_number_t *out_hwirq, u32 *out_type)
{
- struct mp_ioapic_gsi *gsi_cfg;
struct io_apic_irq_attr attr;
struct of_ioapic_type *it;
- u32 line, idx, type;
+ u32 line, idx;
+ int rc;
- if (intsize < 2)
+ if (WARN_ON(intsize < 2))
return -EINVAL;
- line = *intspec;
- idx = (u32) id->priv;
- gsi_cfg = mp_ioapic_gsi_routing(idx);
- *out_hwirq = line + gsi_cfg->gsi_base;
-
- intspec++;
- type = *intspec;
+ line = intspec[0];
- if (type >= ARRAY_SIZE(of_ioapic_type))
+ if (intspec[1] >= ARRAY_SIZE(of_ioapic_type))
return -EINVAL;
- it = of_ioapic_type + type;
- *out_type = it->out_type;
+ it = &of_ioapic_type[intspec[1]];
+ idx = (u32) domain->host_data;
set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity);
- return io_apic_setup_irq_pin_once(*out_hwirq, cpu_to_node(0), &attr);
+ rc = io_apic_setup_irq_pin_once(irq_find_mapping(domain, line),
+ cpu_to_node(0), &attr);
+ if (rc)
+ return rc;
+
+ *out_hwirq = line;
+ *out_type = it->out_type;
+ return 0;
}
+const struct irq_domain_ops ioapic_irq_domain_ops = {
+ .xlate = ioapic_xlate,
+};
+
static void __init ioapic_add_ofnode(struct device_node *np)
{
struct resource r;
@@ -399,13 +357,14 @@ static void __init ioapic_add_ofnode(struct device_node *np)
for (i = 0; i < nr_ioapics; i++) {
if (r.start == mpc_ioapic_addr(i)) {
struct irq_domain *id;
+ struct mp_ioapic_gsi *gsi_cfg;
+
+ gsi_cfg = mp_ioapic_gsi_routing(i);
- id = kzalloc(sizeof(*id), GFP_KERNEL);
+ id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
+ &ioapic_irq_domain_ops,
+ (void*)i);
BUG_ON(!id);
- id->controller = np;
- id->xlate = ioapic_xlate;
- id->priv = (void *)i;
- add_interrupt_host(id);
return;
}
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 28f9870..1b81839 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -37,13 +37,16 @@ print_ftrace_graph_addr(unsigned long addr, void *data,
const struct stacktrace_ops *ops,
struct thread_info *tinfo, int *graph)
{
- struct task_struct *task = tinfo->task;
+ struct task_struct *task;
unsigned long ret_addr;
- int index = task->curr_ret_stack;
+ int index;
if (addr != (unsigned long)return_to_handler)
return;
+ task = tinfo->task;
+ index = task->curr_ret_stack;
+
if (!task->ret_stack || index < *graph)
return;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index c99f9ed..88ec912 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
int i;
print_modules();
- __show_regs(regs, 0);
+ __show_regs(regs, !user_mode_vm(regs));
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
TASK_COMM_LEN, current->comm, task_pid_nr(current),
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 79d97e6..7b784f4 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -98,12 +98,6 @@
#endif
.endm
-#ifdef CONFIG_VM86
-#define resume_userspace_sig check_userspace
-#else
-#define resume_userspace_sig resume_userspace
-#endif
-
/*
* User gs save/restore
*
@@ -327,10 +321,19 @@ ret_from_exception:
preempt_stop(CLBR_ANY)
ret_from_intr:
GET_THREAD_INFO(%ebp)
-check_userspace:
+resume_userspace_sig:
+#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
movb PT_CS(%esp), %al
andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+#else
+ /*
+ * We can be coming here from a syscall done in the kernel space,
+ * e.g. a failed kernel_execve().
+ */
+ movl PT_CS(%esp), %eax
+ andl $SEGMENT_RPL_MASK, %eax
+#endif
cmpl $USER_RPL, %eax
jb resume_kernel # not returning to v8086 or userspace
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2925e14..cdc79b5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
movq %rsp, %rsi
leaq -RBP(%rsp),%rdi /* arg1 for handler */
- testl $3, CS(%rdi)
+ testl $3, CS-RBP(%rsi)
je 1f
SWAPGS
/*
@@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
* moving irq_enter into assembly, which would be too much work)
*/
1: incl PER_CPU_VAR(irq_count)
- jne 2f
- mov PER_CPU_VAR(irq_stack_ptr),%rsp
+ cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
CFI_DEF_CFA_REGISTER rsi
-2: /* Store previous stack value */
+ /* Store previous stack value */
pushq %rsi
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
0x77 /* DW_OP_breg7 */, 0, \
@@ -857,7 +856,7 @@ ret_from_intr:
/* Restore saved previous stack */
popq %rsi
- CFI_DEF_CFA_REGISTER rsi
+ CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
leaq ARGOFFSET-RBP(%rsi), %rsp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
@@ -1574,6 +1573,7 @@ ENTRY(nmi)
/* Use %rdx as out temp variable throughout */
pushq_cfi %rdx
+ CFI_REL_OFFSET rdx, 0
/*
* If %cs was not the kernel segment, then the NMI triggered in user
@@ -1598,6 +1598,7 @@ ENTRY(nmi)
*/
lea 6*8(%rsp), %rdx
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+ CFI_REMEMBER_STATE
nested_nmi:
/*
@@ -1629,10 +1630,12 @@ nested_nmi:
nested_nmi_out:
popq_cfi %rdx
+ CFI_RESTORE rdx
/* No need to check faults here */
INTERRUPT_RETURN
+ CFI_RESTORE_STATE
first_nmi:
/*
* Because nested NMIs will use the pushed location that we
@@ -1664,10 +1667,15 @@ first_nmi:
* | pt_regs |
* +-------------------------+
*
- * The saved RIP is used to fix up the copied RIP that a nested
- * NMI may zero out. The original stack frame and the temp storage
+ * The saved stack frame is used to fix up the copied stack frame
+ * that a nested NMI may change to make the interrupted NMI iret jump
+ * to the repeat_nmi. The original stack frame and the temp storage
* is also used by nested NMIs and can not be trusted on exit.
*/
+ /* Do not pop rdx, nested NMIs will corrupt that part of the stack */
+ movq (%rsp), %rdx
+ CFI_RESTORE rdx
+
/* Set the NMI executing variable on the stack. */
pushq_cfi $1
@@ -1675,22 +1683,39 @@ first_nmi:
.rept 5
pushq_cfi 6*8(%rsp)
.endr
+ CFI_DEF_CFA_OFFSET SS+8-RIP
+
+ /* Everything up to here is safe from nested NMIs */
+
+ /*
+ * If there was a nested NMI, the first NMI's iret will return
+ * here. But NMIs are still enabled and we can take another
+ * nested NMI. The nested NMI checks the interrupted RIP to see
+ * if it is between repeat_nmi and end_repeat_nmi, and if so
+ * it will just return, as we are about to repeat an NMI anyway.
+ * This makes it safe to copy to the stack frame that a nested
+ * NMI will update.
+ */
+repeat_nmi:
+ /*
+ * Update the stack variable to say we are still in NMI (the update
+ * is benign for the non-repeat case, where 1 was pushed just above
+ * to this very stack slot).
+ */
+ movq $1, 5*8(%rsp)
/* Make another copy, this one may be modified by nested NMIs */
.rept 5
pushq_cfi 4*8(%rsp)
.endr
-
- /* Do not pop rdx, nested NMIs will corrupt it */
- movq 11*8(%rsp), %rdx
+ CFI_DEF_CFA_OFFSET SS+8-RIP
+end_repeat_nmi:
/*
* Everything below this point can be preempted by a nested
- * NMI if the first NMI took an exception. Repeated NMIs
- * caused by an exception and nested NMI will start here, and
- * can still be preempted by another NMI.
+ * NMI if the first NMI took an exception and reset our iret stack
+ * so that we repeat another NMI.
*/
-restart_nmi:
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@@ -1719,26 +1744,6 @@ nmi_restore:
CFI_ENDPROC
END(nmi)
- /*
- * If an NMI hit an iret because of an exception or breakpoint,
- * it can lose its NMI context, and a nested NMI may come in.
- * In that case, the nested NMI will change the preempted NMI's
- * stack to jump to here when it does the final iret.
- */
-repeat_nmi:
- INTR_FRAME
- /* Update the stack variable to say we are still in NMI */
- movq $1, 5*8(%rsp)
-
- /* copy the saved stack back to copy stack */
- .rept 5
- pushq_cfi 4*8(%rsp)
- .endr
-
- jmp restart_nmi
- CFI_ENDPROC
-end_repeat_nmi:
-
ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 739d859..7734bcb 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -16,6 +16,7 @@
#include <asm/uaccess.h>
#include <asm/ptrace.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/user.h>
#ifdef CONFIG_X86_64
@@ -32,6 +33,86 @@
# define user32_fxsr_struct user_fxsr_struct
#endif
+/*
+ * Were we in an interrupt that interrupted kernel mode?
+ *
+ * We can do a kernel_fpu_begin/end() pair *ONLY* if that
+ * pair does nothing at all: the thread must not have fpu (so
+ * that we don't try to save the FPU state), and TS must
+ * be set (so that the clts/stts pair does nothing that is
+ * visible in the interrupted kernel thread).
+ */
+static inline bool interrupted_kernel_fpu_idle(void)
+{
+ return !__thread_has_fpu(current) &&
+ (read_cr0() & X86_CR0_TS);
+}
+
+/*
+ * Were we in user mode (or vm86 mode) when we were
+ * interrupted?
+ *
+ * Doing kernel_fpu_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static inline bool interrupted_user_mode(void)
+{
+ struct pt_regs *regs = get_irq_regs();
+ return regs && user_mode_vm(regs);
+}
+
+/*
+ * Can we use the FPU in kernel mode with the
+ * whole "kernel_fpu_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+bool irq_fpu_usable(void)
+{
+ return !in_interrupt() ||
+ interrupted_user_mode() ||
+ interrupted_kernel_fpu_idle();
+}
+EXPORT_SYMBOL(irq_fpu_usable);
+
+void kernel_fpu_begin(void)
+{
+ struct task_struct *me = current;
+
+ WARN_ON_ONCE(!irq_fpu_usable());
+ preempt_disable();
+ if (__thread_has_fpu(me)) {
+ __save_init_fpu(me);
+ __thread_clear_has_fpu(me);
+ /* We do 'stts()' in kernel_fpu_end() */
+ } else {
+ percpu_write(fpu_owner_task, NULL);
+ clts();
+ }
+}
+EXPORT_SYMBOL(kernel_fpu_begin);
+
+void kernel_fpu_end(void)
+{
+ stts();
+ preempt_enable();
+}
+EXPORT_SYMBOL(kernel_fpu_end);
+
+void unlazy_fpu(struct task_struct *tsk)
+{
+ preempt_disable();
+ if (__thread_has_fpu(tsk)) {
+ __save_init_fpu(tsk);
+ __thread_fpu_end(tsk);
+ } else
+ tsk->fpu_counter = 0;
+ preempt_enable();
+}
+EXPORT_SYMBOL(unlazy_fpu);
+
#ifdef CONFIG_MATH_EMULATION
# define HAVE_HWFP (boot_cpu_data.hard_math)
#else
@@ -44,7 +125,7 @@ EXPORT_SYMBOL_GPL(xstate_size);
unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata;
-void __cpuinit mxcsr_feature_mask_init(void)
+static void __cpuinit mxcsr_feature_mask_init(void)
{
unsigned long mask = 0;
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 6104852..36d1853 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -15,7 +15,6 @@
#include <linux/delay.h>
#include <linux/atomic.h>
-#include <asm/system.h>
#include <asm/timer.h>
#include <asm/hw_irq.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7943e0c..3dafc60 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -282,8 +282,13 @@ void fixup_irqs(void)
else if (!(warned++))
set_affinity = 0;
+ /*
+ * We unmask if the irq was not marked masked by the
+ * core code. That respects the lazy irq disable
+ * behaviour.
+ */
if (!irqd_can_move_in_process_context(data) &&
- !irqd_irq_disabled(data) && chip->irq_unmask)
+ !irqd_irq_masked(data) && chip->irq_unmask)
chip->irq_unmask(data);
raw_spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 40fc861..58b7f27 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -100,13 +100,8 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
irqctx->tinfo.task = curctx->tinfo.task;
irqctx->tinfo.previous_esp = current_stack_pointer;
- /*
- * Copy the softirq bits in preempt_count so that the
- * softirq checks work in the hardirq context.
- */
- irqctx->tinfo.preempt_count =
- (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
- (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+ /* Copy the preempt_count so that the [soft]irq checks work. */
+ irqctx->tinfo.preempt_count = curctx->tinfo.preempt_count;
if (unlikely(overflow))
call_on_stack(print_stack_overflow, isp);
@@ -196,7 +191,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
if (unlikely(!desc))
return false;
- if (!execute_on_irq_stack(overflow, desc, irq)) {
+ if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
print_stack_overflow();
desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 7b77062..252981a 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -16,7 +16,6 @@
#include <linux/delay.h>
#include <linux/atomic.h>
-#include <asm/system.h>
#include <asm/timer.h>
#include <asm/hw_irq.h>
#include <asm/pgtable.h>
@@ -306,10 +305,10 @@ void __init native_init_IRQ(void)
* us. (some of these will be overridden and become
* 'special' SMP interrupts)
*/
- for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+ i = FIRST_EXTERNAL_VECTOR;
+ for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
- if (!test_bit(i, used_vectors))
- set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+ set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
}
if (!acpi_ioapic && !of_ioapic)
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 90fcf62..1d5d31e 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -68,16 +68,9 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
return count;
}
-static int setup_data_open(struct inode *inode, struct file *file)
-{
- file->private_data = inode->i_private;
-
- return 0;
-}
-
static const struct file_operations fops_setup_data = {
.read = setup_data_read,
- .open = setup_data_open,
+ .open = simple_open,
.llseek = default_llseek,
};
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index faba577..8bfb614 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,10 +43,11 @@
#include <linux/smp.h>
#include <linux/nmi.h>
#include <linux/hw_breakpoint.h>
+#include <linux/uaccess.h>
+#include <linux/memory.h>
#include <asm/debugreg.h>
#include <asm/apicdef.h>
-#include <asm/system.h>
#include <asm/apic.h>
#include <asm/nmi.h>
@@ -67,8 +68,6 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{ "ss", 4, offsetof(struct pt_regs, ss) },
{ "ds", 4, offsetof(struct pt_regs, ds) },
{ "es", 4, offsetof(struct pt_regs, es) },
- { "fs", 4, -1 },
- { "gs", 4, -1 },
#else
{ "ax", 8, offsetof(struct pt_regs, ax) },
{ "bx", 8, offsetof(struct pt_regs, bx) },
@@ -90,7 +89,11 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{ "flags", 4, offsetof(struct pt_regs, flags) },
{ "cs", 4, offsetof(struct pt_regs, cs) },
{ "ss", 4, offsetof(struct pt_regs, ss) },
+ { "ds", 4, -1 },
+ { "es", 4, -1 },
#endif
+ { "fs", 4, -1 },
+ { "gs", 4, -1 },
};
int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
@@ -740,6 +743,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ char opc[BREAK_INSTR_SIZE];
+
+ bpt->type = BP_BREAKPOINT;
+ err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+ err = probe_kernel_write((char *)bpt->bpt_addr,
+ arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+#ifdef CONFIG_DEBUG_RODATA
+ if (!err)
+ return err;
+ /*
+ * It is safe to call text_poke() because normal kernel execution
+ * is stopped on all cores, so long as the text_mutex is not locked.
+ */
+ if (mutex_is_locked(&text_mutex))
+ return -EBUSY;
+ text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
+ BREAK_INSTR_SIZE);
+ err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+ if (err)
+ return err;
+ if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
+ return -EINVAL;
+ bpt->type = BP_POKE_BREAKPOINT;
+#endif /* CONFIG_DEBUG_RODATA */
+ return err;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+#ifdef CONFIG_DEBUG_RODATA
+ int err;
+ char opc[BREAK_INSTR_SIZE];
+
+ if (bpt->type != BP_POKE_BREAKPOINT)
+ goto knl_write;
+ /*
+ * It is safe to call text_poke() because normal kernel execution
+ * is stopped on all cores, so long as the text_mutex is not locked.
+ */
+ if (mutex_is_locked(&text_mutex))
+ goto knl_write;
+ text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
+ err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
+ if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
+ goto knl_write;
+ return err;
+knl_write:
+#endif /* CONFIG_DEBUG_RODATA */
+ return probe_kernel_write((char *)bpt->bpt_addr,
+ (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
+}
+
struct kgdb_arch arch_kgdb_ops = {
/* Breakpoint instruction: */
.gdb_bpt_instr = { 0xcc },
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h
new file mode 100644
index 0000000..3230b68
--- /dev/null
+++ b/arch/x86/kernel/kprobes-common.h
@@ -0,0 +1,102 @@
+#ifndef __X86_KERNEL_KPROBES_COMMON_H
+#define __X86_KERNEL_KPROBES_COMMON_H
+
+/* Kprobes and Optprobes common header */
+
+#ifdef CONFIG_X86_64
+#define SAVE_REGS_STRING \
+ /* Skip cs, ip, orig_ax. */ \
+ " subq $24, %rsp\n" \
+ " pushq %rdi\n" \
+ " pushq %rsi\n" \
+ " pushq %rdx\n" \
+ " pushq %rcx\n" \
+ " pushq %rax\n" \
+ " pushq %r8\n" \
+ " pushq %r9\n" \
+ " pushq %r10\n" \
+ " pushq %r11\n" \
+ " pushq %rbx\n" \
+ " pushq %rbp\n" \
+ " pushq %r12\n" \
+ " pushq %r13\n" \
+ " pushq %r14\n" \
+ " pushq %r15\n"
+#define RESTORE_REGS_STRING \
+ " popq %r15\n" \
+ " popq %r14\n" \
+ " popq %r13\n" \
+ " popq %r12\n" \
+ " popq %rbp\n" \
+ " popq %rbx\n" \
+ " popq %r11\n" \
+ " popq %r10\n" \
+ " popq %r9\n" \
+ " popq %r8\n" \
+ " popq %rax\n" \
+ " popq %rcx\n" \
+ " popq %rdx\n" \
+ " popq %rsi\n" \
+ " popq %rdi\n" \
+ /* Skip orig_ax, ip, cs */ \
+ " addq $24, %rsp\n"
+#else
+#define SAVE_REGS_STRING \
+ /* Skip cs, ip, orig_ax and gs. */ \
+ " subl $16, %esp\n" \
+ " pushl %fs\n" \
+ " pushl %es\n" \
+ " pushl %ds\n" \
+ " pushl %eax\n" \
+ " pushl %ebp\n" \
+ " pushl %edi\n" \
+ " pushl %esi\n" \
+ " pushl %edx\n" \
+ " pushl %ecx\n" \
+ " pushl %ebx\n"
+#define RESTORE_REGS_STRING \
+ " popl %ebx\n" \
+ " popl %ecx\n" \
+ " popl %edx\n" \
+ " popl %esi\n" \
+ " popl %edi\n" \
+ " popl %ebp\n" \
+ " popl %eax\n" \
+ /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
+ " addl $24, %esp\n"
+#endif
+
+/* Ensure if the instruction can be boostable */
+extern int can_boost(kprobe_opcode_t *instruction);
+/* Recover instruction if given address is probed */
+extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
+ unsigned long addr);
+/*
+ * Copy an instruction and adjust the displacement if the instruction
+ * uses the %rip-relative addressing mode.
+ */
+extern int __copy_instruction(u8 *dest, u8 *src);
+
+/* Generate a relative-jump/call instruction */
+extern void synthesize_reljump(void *from, void *to);
+extern void synthesize_relcall(void *from, void *to);
+
+#ifdef CONFIG_OPTPROBES
+extern int arch_init_optprobes(void);
+extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
+extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
+#else /* !CONFIG_OPTPROBES */
+static inline int arch_init_optprobes(void)
+{
+ return 0;
+}
+static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+{
+ return 0;
+}
+static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+{
+ return addr;
+}
+#endif
+#endif
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c
new file mode 100644
index 0000000..c5e410e
--- /dev/null
+++ b/arch/x86/kernel/kprobes-opt.c
@@ -0,0 +1,512 @@
+/*
+ * Kernel Probes Jump Optimization (Optprobes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/kallsyms.h>
+#include <linux/ftrace.h>
+
+#include <asm/cacheflush.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/alternative.h>
+#include <asm/insn.h>
+#include <asm/debugreg.h>
+
+#include "kprobes-common.h"
+
+unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
+{
+ struct optimized_kprobe *op;
+ struct kprobe *kp;
+ long offs;
+ int i;
+
+ for (i = 0; i < RELATIVEJUMP_SIZE; i++) {
+ kp = get_kprobe((void *)addr - i);
+ /* This function only handles jump-optimized kprobe */
+ if (kp && kprobe_optimized(kp)) {
+ op = container_of(kp, struct optimized_kprobe, kp);
+ /* If op->list is not empty, op is under optimizing */
+ if (list_empty(&op->list))
+ goto found;
+ }
+ }
+
+ return addr;
+found:
+ /*
+ * If the kprobe can be optimized, original bytes which can be
+ * overwritten by jump destination address. In this case, original
+ * bytes must be recovered from op->optinsn.copied_insn buffer.
+ */
+ memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ if (addr == (unsigned long)kp->addr) {
+ buf[0] = kp->opcode;
+ memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ } else {
+ offs = addr - (unsigned long)kp->addr - 1;
+ memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs);
+ }
+
+ return (unsigned long)buf;
+}
+
+/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
+static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
+{
+#ifdef CONFIG_X86_64
+ *addr++ = 0x48;
+ *addr++ = 0xbf;
+#else
+ *addr++ = 0xb8;
+#endif
+ *(unsigned long *)addr = val;
+}
+
+static void __used __kprobes kprobes_optinsn_template_holder(void)
+{
+ asm volatile (
+ ".global optprobe_template_entry\n"
+ "optprobe_template_entry:\n"
+#ifdef CONFIG_X86_64
+ /* We don't bother saving the ss register */
+ " pushq %rsp\n"
+ " pushfq\n"
+ SAVE_REGS_STRING
+ " movq %rsp, %rsi\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val:\n"
+ ASM_NOP5
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call:\n"
+ ASM_NOP5
+ /* Move flags to rsp */
+ " movq 144(%rsp), %rdx\n"
+ " movq %rdx, 152(%rsp)\n"
+ RESTORE_REGS_STRING
+ /* Skip flags entry */
+ " addq $8, %rsp\n"
+ " popfq\n"
+#else /* CONFIG_X86_32 */
+ " pushf\n"
+ SAVE_REGS_STRING
+ " movl %esp, %edx\n"
+ ".global optprobe_template_val\n"
+ "optprobe_template_val:\n"
+ ASM_NOP5
+ ".global optprobe_template_call\n"
+ "optprobe_template_call:\n"
+ ASM_NOP5
+ RESTORE_REGS_STRING
+ " addl $4, %esp\n" /* skip cs */
+ " popf\n"
+#endif
+ ".global optprobe_template_end\n"
+ "optprobe_template_end:\n");
+}
+
+#define TMPL_MOVE_IDX \
+ ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+#define TMPL_CALL_IDX \
+ ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+#define TMPL_END_IDX \
+ ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+
+#define INT3_SIZE sizeof(kprobe_opcode_t)
+
+/* Optimized kprobe call back function: called from optinsn */
+static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ unsigned long flags;
+
+ /* This is possible if op is under delayed unoptimizing */
+ if (kprobe_disabled(&op->kp))
+ return;
+
+ local_irq_save(flags);
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(&op->kp);
+ } else {
+ /* Save skipped registers */
+#ifdef CONFIG_X86_64
+ regs->cs = __KERNEL_CS;
+#else
+ regs->cs = __KERNEL_CS | get_kernel_rpl();
+ regs->gs = 0;
+#endif
+ regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
+ regs->orig_ax = ~0UL;
+
+ __this_cpu_write(current_kprobe, &op->kp);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ opt_pre_handler(&op->kp, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+ local_irq_restore(flags);
+}
+
+static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
+{
+ int len = 0, ret;
+
+ while (len < RELATIVEJUMP_SIZE) {
+ ret = __copy_instruction(dest + len, src + len);
+ if (!ret || !can_boost(dest + len))
+ return -EINVAL;
+ len += ret;
+ }
+ /* Check whether the address range is reserved */
+ if (ftrace_text_reserved(src, src + len - 1) ||
+ alternatives_text_reserved(src, src + len - 1) ||
+ jump_label_text_reserved(src, src + len - 1))
+ return -EBUSY;
+
+ return len;
+}
+
+/* Check whether insn is indirect jump */
+static int __kprobes insn_is_indirect_jump(struct insn *insn)
+{
+ return ((insn->opcode.bytes[0] == 0xff &&
+ (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
+ insn->opcode.bytes[0] == 0xea); /* Segment based jump */
+}
+
+/* Check whether insn jumps into specified address range */
+static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
+{
+ unsigned long target = 0;
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xe0: /* loopne */
+ case 0xe1: /* loope */
+ case 0xe2: /* loop */
+ case 0xe3: /* jcxz */
+ case 0xe9: /* near relative jump */
+ case 0xeb: /* short relative jump */
+ break;
+ case 0x0f:
+ if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
+ break;
+ return 0;
+ default:
+ if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
+ break;
+ return 0;
+ }
+ target = (unsigned long)insn->next_byte + insn->immediate.value;
+
+ return (start <= target && target <= start + len);
+}
+
+/* Decode whole function to ensure any instructions don't jump into target */
+static int __kprobes can_optimize(unsigned long paddr)
+{
+ unsigned long addr, size = 0, offset = 0;
+ struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
+
+ /* Lookup symbol including addr */
+ if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
+ return 0;
+
+ /*
+ * Do not optimize in the entry code due to the unstable
+ * stack handling.
+ */
+ if ((paddr >= (unsigned long)__entry_text_start) &&
+ (paddr < (unsigned long)__entry_text_end))
+ return 0;
+
+ /* Check there is enough space for a relative jump. */
+ if (size - offset < RELATIVEJUMP_SIZE)
+ return 0;
+
+ /* Decode instructions */
+ addr = paddr - offset;
+ while (addr < paddr - offset + size) { /* Decode until function end */
+ if (search_exception_tables(addr))
+ /*
+ * Since some fixup code will jumps into this function,
+ * we can't optimize kprobe in this function.
+ */
+ return 0;
+ kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr));
+ insn_get_length(&insn);
+ /* Another subsystem puts a breakpoint */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
+ /* Recover address */
+ insn.kaddr = (void *)addr;
+ insn.next_byte = (void *)(addr + insn.length);
+ /* Check any instructions don't jump into target */
+ if (insn_is_indirect_jump(&insn) ||
+ insn_jump_into_range(&insn, paddr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE))
+ return 0;
+ addr += insn.length;
+ }
+
+ return 1;
+}
+
+/* Check optimized_kprobe can actually be optimized. */
+int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+ int i;
+ struct kprobe *p;
+
+ for (i = 1; i < op->optinsn.size; i++) {
+ p = get_kprobe(op->kp.addr + i);
+ if (p && !kprobe_disabled(p))
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+/* Check the addr is within the optimized instructions. */
+int __kprobes
+arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
+{
+ return ((unsigned long)op->kp.addr <= addr &&
+ (unsigned long)op->kp.addr + op->optinsn.size > addr);
+}
+
+/* Free optimized instruction slot */
+static __kprobes
+void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+ if (op->optinsn.insn) {
+ free_optinsn_slot(op->optinsn.insn, dirty);
+ op->optinsn.insn = NULL;
+ op->optinsn.size = 0;
+ }
+}
+
+void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+ __arch_remove_optimized_kprobe(op, 1);
+}
+
+/*
+ * Copy replacing target instructions
+ * Target instructions MUST be relocatable (checked inside)
+ * This is called when new aggr(opt)probe is allocated or reused.
+ */
+int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
+{
+ u8 *buf;
+ int ret;
+ long rel;
+
+ if (!can_optimize((unsigned long)op->kp.addr))
+ return -EILSEQ;
+
+ op->optinsn.insn = get_optinsn_slot();
+ if (!op->optinsn.insn)
+ return -ENOMEM;
+
+ /*
+ * Verify if the address gap is in 2GB range, because this uses
+ * a relative jump.
+ */
+ rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+ if (abs(rel) > 0x7fffffff)
+ return -ERANGE;
+
+ buf = (u8 *)op->optinsn.insn;
+
+ /* Copy instructions into the out-of-line buffer */
+ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
+ if (ret < 0) {
+ __arch_remove_optimized_kprobe(op, 0);
+ return ret;
+ }
+ op->optinsn.size = ret;
+
+ /* Copy arch-dep-instance from template */
+ memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+
+ /* Set probe information */
+ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
+
+ /* Set probe function call */
+ synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+
+ /* Set returning jmp instruction at the tail of out-of-line buffer */
+ synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+ (u8 *)op->kp.addr + op->optinsn.size);
+
+ flush_icache_range((unsigned long) buf,
+ (unsigned long) buf + TMPL_END_IDX +
+ op->optinsn.size + RELATIVEJUMP_SIZE);
+ return 0;
+}
+
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+ u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ s32 rel = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
+ /* Backup instructions which will be replaced by jump address */
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE);
+
+ insn_buf[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buf[1]) = rel;
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ WARN_ON(kprobe_disabled(&op->kp));
+ /* Setup param */
+ setup_optimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_del_init(&op->list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ /* Set int3 to first byte for kprobes */
+ insn_buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ /* Setup param */
+ setup_unoptimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_move(&op->list, done_list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
+/* Replace a relative jump with a breakpoint (int3). */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ u8 buf[RELATIVEJUMP_SIZE];
+
+ /* Set int3 to first byte for kprobes */
+ buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+ text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
+}
+
+int __kprobes
+setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
+{
+ struct optimized_kprobe *op;
+
+ if (p->flags & KPROBE_FLAG_OPTIMIZED) {
+ /* This kprobe is really able to run optimized path. */
+ op = container_of(p, struct optimized_kprobe, kp);
+ /* Detour through copied instructions */
+ regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
+ if (!reenter)
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
+}
+
+int __kprobes arch_init_optprobes(void)
+{
+ /* Allocate code buffer and parameter array */
+ jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_bufs)
+ return -ENOMEM;
+
+ jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_params) {
+ kfree(jump_poke_bufs);
+ jump_poke_bufs = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 7da647d..e213fc8 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -30,16 +30,15 @@
* <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
* <prasanna@in.ibm.com> added function-return probes.
* 2005-May Rusty Lynch <rusty.lynch@intel.com>
- * Added function return probes functionality
+ * Added function return probes functionality
* 2006-Feb Masami Hiramatsu <hiramatu@sdl.hitachi.co.jp> added
- * kprobe-booster and kretprobe-booster for i386.
+ * kprobe-booster and kretprobe-booster for i386.
* 2007-Dec Masami Hiramatsu <mhiramat@redhat.com> added kprobe-booster
- * and kretprobe-booster for x86-64
+ * and kretprobe-booster for x86-64
* 2007-Dec Masami Hiramatsu <mhiramat@redhat.com>, Arjan van de Ven
- * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
- * unified x86 kprobes code.
+ * <arjan@infradead.org> and Jim Keniston <jkenisto@us.ibm.com>
+ * unified x86 kprobes code.
*/
-
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/string.h>
@@ -59,6 +58,8 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
+#include "kprobes-common.h"
+
void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -108,6 +109,7 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
doesn't switch kernel stack.*/
{NULL, NULL} /* Terminator */
};
+
const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
@@ -123,11 +125,17 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
}
/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-static void __kprobes synthesize_reljump(void *from, void *to)
+void __kprobes synthesize_reljump(void *from, void *to)
{
__synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
}
+/* Insert a call instruction at address 'from', which calls address 'to'.*/
+void __kprobes synthesize_relcall(void *from, void *to)
+{
+ __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+}
+
/*
* Skip the prefixes of the instruction.
*/
@@ -151,7 +159,7 @@ static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
* Returns non-zero if opcode is boostable.
* RIP relative instructions are adjusted at copying time in 64 bits mode
*/
-static int __kprobes can_boost(kprobe_opcode_t *opcodes)
+int __kprobes can_boost(kprobe_opcode_t *opcodes)
{
kprobe_opcode_t opcode;
kprobe_opcode_t *orig_opcodes = opcodes;
@@ -207,13 +215,15 @@ retry:
}
}
-/* Recover the probed instruction at addr for further analysis. */
-static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+static unsigned long
+__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
struct kprobe *kp;
+
kp = get_kprobe((void *)addr);
+ /* There is no probe, return original address */
if (!kp)
- return -EINVAL;
+ return addr;
/*
* Basically, kp->ainsn.insn has an original instruction.
@@ -230,14 +240,29 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
*/
memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
buf[0] = kp->opcode;
- return 0;
+ return (unsigned long)buf;
+}
+
+/*
+ * Recover the probed instruction at addr for further analysis.
+ * Caller must lock kprobes by kprobe_mutex, or disable preemption
+ * for preventing to release referencing kprobes.
+ */
+unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr)
+{
+ unsigned long __addr;
+
+ __addr = __recover_optprobed_insn(buf, addr);
+ if (__addr != addr)
+ return __addr;
+
+ return __recover_probed_insn(buf, addr);
}
/* Check if paddr is at an instruction boundary */
static int __kprobes can_probe(unsigned long paddr)
{
- int ret;
- unsigned long addr, offset = 0;
+ unsigned long addr, __addr, offset = 0;
struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
@@ -247,26 +272,24 @@ static int __kprobes can_probe(unsigned long paddr)
/* Decode instructions */
addr = paddr - offset;
while (addr < paddr) {
- kernel_insn_init(&insn, (void *)addr);
- insn_get_opcode(&insn);
-
/*
* Check if the instruction has been modified by another
* kprobe, in which case we replace the breakpoint by the
* original instruction in our buffer.
+ * Also, jump optimization will change the breakpoint to
+ * relative-jump. Since the relative-jump itself is
+ * normally used, we just go through if there is no kprobe.
*/
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf, addr);
- if (ret)
- /*
- * Another debugging subsystem might insert
- * this breakpoint. In that case, we can't
- * recover it.
- */
- return 0;
- kernel_insn_init(&insn, buf);
- }
+ __addr = recover_probed_instruction(buf, addr);
+ kernel_insn_init(&insn, (void *)__addr);
insn_get_length(&insn);
+
+ /*
+ * Another debugging subsystem might insert this breakpoint.
+ * In that case, we can't recover it.
+ */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
addr += insn.length;
}
@@ -299,24 +322,16 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
* If not, return null.
* Only applicable to 64-bit x86.
*/
-static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
+int __kprobes __copy_instruction(u8 *dest, u8 *src)
{
struct insn insn;
- int ret;
kprobe_opcode_t buf[MAX_INSN_SIZE];
- kernel_insn_init(&insn, src);
- if (recover) {
- insn_get_opcode(&insn);
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf,
- (unsigned long)src);
- if (ret)
- return 0;
- kernel_insn_init(&insn, buf);
- }
- }
+ kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src));
insn_get_length(&insn);
+ /* Another subsystem puts a breakpoint, failed to recover */
+ if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
+ return 0;
memcpy(dest, insn.kaddr, insn.length);
#ifdef CONFIG_X86_64
@@ -337,8 +352,7 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
* extension of the original signed 32-bit displacement would
* have given.
*/
- newdisp = (u8 *) src + (s64) insn.displacement.value -
- (u8 *) dest;
+ newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest;
BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */
disp = (u8 *) dest + insn_offset_displacement(&insn);
*(s32 *) disp = (s32) newdisp;
@@ -349,18 +363,20 @@ static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover)
static void __kprobes arch_copy_kprobe(struct kprobe *p)
{
+ /* Copy an instruction with recovering if other optprobe modifies it.*/
+ __copy_instruction(p->ainsn.insn, p->addr);
+
/*
- * Copy an instruction without recovering int3, because it will be
- * put by another subsystem.
+ * __copy_instruction can modify the displacement of the instruction,
+ * but it doesn't affect boostable check.
*/
- __copy_instruction(p->ainsn.insn, p->addr, 0);
-
- if (can_boost(p->addr))
+ if (can_boost(p->ainsn.insn))
p->ainsn.boostable = 0;
else
p->ainsn.boostable = -1;
- p->opcode = *p->addr;
+ /* Also, displacement change doesn't affect the first byte */
+ p->opcode = p->ainsn.insn[0];
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -442,8 +458,8 @@ static void __kprobes restore_btf(void)
}
}
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
+void __kprobes
+arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
unsigned long *sara = stack_addr(regs);
@@ -453,16 +469,8 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
*sara = (unsigned long) &kretprobe_trampoline;
}
-#ifdef CONFIG_OPTPROBES
-static int __kprobes setup_detour_execution(struct kprobe *p,
- struct pt_regs *regs,
- int reenter);
-#else
-#define setup_detour_execution(p, regs, reenter) (0)
-#endif
-
-static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb, int reenter)
+static void __kprobes
+setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
{
if (setup_detour_execution(p, regs, reenter))
return;
@@ -504,8 +512,8 @@ static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
* within the handler. We save the original kprobes variables and just single
* step on the instruction of the new probe without calling any user handlers.
*/
-static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
+static int __kprobes
+reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
switch (kcb->kprobe_status) {
case KPROBE_HIT_SSDONE:
@@ -600,69 +608,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
return 0;
}
-#ifdef CONFIG_X86_64
-#define SAVE_REGS_STRING \
- /* Skip cs, ip, orig_ax. */ \
- " subq $24, %rsp\n" \
- " pushq %rdi\n" \
- " pushq %rsi\n" \
- " pushq %rdx\n" \
- " pushq %rcx\n" \
- " pushq %rax\n" \
- " pushq %r8\n" \
- " pushq %r9\n" \
- " pushq %r10\n" \
- " pushq %r11\n" \
- " pushq %rbx\n" \
- " pushq %rbp\n" \
- " pushq %r12\n" \
- " pushq %r13\n" \
- " pushq %r14\n" \
- " pushq %r15\n"
-#define RESTORE_REGS_STRING \
- " popq %r15\n" \
- " popq %r14\n" \
- " popq %r13\n" \
- " popq %r12\n" \
- " popq %rbp\n" \
- " popq %rbx\n" \
- " popq %r11\n" \
- " popq %r10\n" \
- " popq %r9\n" \
- " popq %r8\n" \
- " popq %rax\n" \
- " popq %rcx\n" \
- " popq %rdx\n" \
- " popq %rsi\n" \
- " popq %rdi\n" \
- /* Skip orig_ax, ip, cs */ \
- " addq $24, %rsp\n"
-#else
-#define SAVE_REGS_STRING \
- /* Skip cs, ip, orig_ax and gs. */ \
- " subl $16, %esp\n" \
- " pushl %fs\n" \
- " pushl %es\n" \
- " pushl %ds\n" \
- " pushl %eax\n" \
- " pushl %ebp\n" \
- " pushl %edi\n" \
- " pushl %esi\n" \
- " pushl %edx\n" \
- " pushl %ecx\n" \
- " pushl %ebx\n"
-#define RESTORE_REGS_STRING \
- " popl %ebx\n" \
- " popl %ecx\n" \
- " popl %edx\n" \
- " popl %esi\n" \
- " popl %edi\n" \
- " popl %ebp\n" \
- " popl %eax\n" \
- /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\
- " addl $24, %esp\n"
-#endif
-
/*
* When a retprobed function returns, this code saves registers and
* calls trampoline_handler() runs, which calls the kretprobe's handler.
@@ -816,8 +761,8 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
* jump instruction after the copied instruction, that jumps to the next
* instruction after the probepoint.
*/
-static void __kprobes resume_execution(struct kprobe *p,
- struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+static void __kprobes
+resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
unsigned long *tos = stack_addr(regs);
unsigned long copy_ip = (unsigned long)p->ainsn.insn;
@@ -996,8 +941,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
/*
* Wrapper routine for handling exceptions.
*/
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
+int __kprobes
+kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data)
{
struct die_args *args = data;
int ret = NOTIFY_DONE;
@@ -1107,466 +1052,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-
-#ifdef CONFIG_OPTPROBES
-
-/* Insert a call instruction at address 'from', which calls address 'to'.*/
-static void __kprobes synthesize_relcall(void *from, void *to)
-{
- __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
-}
-
-/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */
-static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr,
- unsigned long val)
-{
-#ifdef CONFIG_X86_64
- *addr++ = 0x48;
- *addr++ = 0xbf;
-#else
- *addr++ = 0xb8;
-#endif
- *(unsigned long *)addr = val;
-}
-
-static void __used __kprobes kprobes_optinsn_template_holder(void)
-{
- asm volatile (
- ".global optprobe_template_entry\n"
- "optprobe_template_entry: \n"
-#ifdef CONFIG_X86_64
- /* We don't bother saving the ss register */
- " pushq %rsp\n"
- " pushfq\n"
- SAVE_REGS_STRING
- " movq %rsp, %rsi\n"
- ".global optprobe_template_val\n"
- "optprobe_template_val: \n"
- ASM_NOP5
- ASM_NOP5
- ".global optprobe_template_call\n"
- "optprobe_template_call: \n"
- ASM_NOP5
- /* Move flags to rsp */
- " movq 144(%rsp), %rdx\n"
- " movq %rdx, 152(%rsp)\n"
- RESTORE_REGS_STRING
- /* Skip flags entry */
- " addq $8, %rsp\n"
- " popfq\n"
-#else /* CONFIG_X86_32 */
- " pushf\n"
- SAVE_REGS_STRING
- " movl %esp, %edx\n"
- ".global optprobe_template_val\n"
- "optprobe_template_val: \n"
- ASM_NOP5
- ".global optprobe_template_call\n"
- "optprobe_template_call: \n"
- ASM_NOP5
- RESTORE_REGS_STRING
- " addl $4, %esp\n" /* skip cs */
- " popf\n"
-#endif
- ".global optprobe_template_end\n"
- "optprobe_template_end: \n");
-}
-
-#define TMPL_MOVE_IDX \
- ((long)&optprobe_template_val - (long)&optprobe_template_entry)
-#define TMPL_CALL_IDX \
- ((long)&optprobe_template_call - (long)&optprobe_template_entry)
-#define TMPL_END_IDX \
- ((long)&optprobe_template_end - (long)&optprobe_template_entry)
-
-#define INT3_SIZE sizeof(kprobe_opcode_t)
-
-/* Optimized kprobe call back function: called from optinsn */
-static void __kprobes optimized_callback(struct optimized_kprobe *op,
- struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long flags;
-
- /* This is possible if op is under delayed unoptimizing */
- if (kprobe_disabled(&op->kp))
- return;
-
- local_irq_save(flags);
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(&op->kp);
- } else {
- /* Save skipped registers */
-#ifdef CONFIG_X86_64
- regs->cs = __KERNEL_CS;
-#else
- regs->cs = __KERNEL_CS | get_kernel_rpl();
- regs->gs = 0;
-#endif
- regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
- regs->orig_ax = ~0UL;
-
- __this_cpu_write(current_kprobe, &op->kp);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- opt_pre_handler(&op->kp, regs);
- __this_cpu_write(current_kprobe, NULL);
- }
- local_irq_restore(flags);
-}
-
-static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src)
-{
- int len = 0, ret;
-
- while (len < RELATIVEJUMP_SIZE) {
- ret = __copy_instruction(dest + len, src + len, 1);
- if (!ret || !can_boost(dest + len))
- return -EINVAL;
- len += ret;
- }
- /* Check whether the address range is reserved */
- if (ftrace_text_reserved(src, src + len - 1) ||
- alternatives_text_reserved(src, src + len - 1) ||
- jump_label_text_reserved(src, src + len - 1))
- return -EBUSY;
-
- return len;
-}
-
-/* Check whether insn is indirect jump */
-static int __kprobes insn_is_indirect_jump(struct insn *insn)
-{
- return ((insn->opcode.bytes[0] == 0xff &&
- (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
- insn->opcode.bytes[0] == 0xea); /* Segment based jump */
-}
-
-/* Check whether insn jumps into specified address range */
-static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
-{
- unsigned long target = 0;
-
- switch (insn->opcode.bytes[0]) {
- case 0xe0: /* loopne */
- case 0xe1: /* loope */
- case 0xe2: /* loop */
- case 0xe3: /* jcxz */
- case 0xe9: /* near relative jump */
- case 0xeb: /* short relative jump */
- break;
- case 0x0f:
- if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */
- break;
- return 0;
- default:
- if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */
- break;
- return 0;
- }
- target = (unsigned long)insn->next_byte + insn->immediate.value;
-
- return (start <= target && target <= start + len);
-}
-
-/* Decode whole function to ensure any instructions don't jump into target */
-static int __kprobes can_optimize(unsigned long paddr)
-{
- int ret;
- unsigned long addr, size = 0, offset = 0;
- struct insn insn;
- kprobe_opcode_t buf[MAX_INSN_SIZE];
-
- /* Lookup symbol including addr */
- if (!kallsyms_lookup_size_offset(paddr, &size, &offset))
- return 0;
-
- /*
- * Do not optimize in the entry code due to the unstable
- * stack handling.
- */
- if ((paddr >= (unsigned long )__entry_text_start) &&
- (paddr < (unsigned long )__entry_text_end))
- return 0;
-
- /* Check there is enough space for a relative jump. */
- if (size - offset < RELATIVEJUMP_SIZE)
- return 0;
-
- /* Decode instructions */
- addr = paddr - offset;
- while (addr < paddr - offset + size) { /* Decode until function end */
- if (search_exception_tables(addr))
- /*
- * Since some fixup code will jumps into this function,
- * we can't optimize kprobe in this function.
- */
- return 0;
- kernel_insn_init(&insn, (void *)addr);
- insn_get_opcode(&insn);
- if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) {
- ret = recover_probed_instruction(buf, addr);
- if (ret)
- return 0;
- kernel_insn_init(&insn, buf);
- }
- insn_get_length(&insn);
- /* Recover address */
- insn.kaddr = (void *)addr;
- insn.next_byte = (void *)(addr + insn.length);
- /* Check any instructions don't jump into target */
- if (insn_is_indirect_jump(&insn) ||
- insn_jump_into_range(&insn, paddr + INT3_SIZE,
- RELATIVE_ADDR_SIZE))
- return 0;
- addr += insn.length;
- }
-
- return 1;
-}
-
-/* Check optimized_kprobe can actually be optimized. */
-int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op)
-{
- int i;
- struct kprobe *p;
-
- for (i = 1; i < op->optinsn.size; i++) {
- p = get_kprobe(op->kp.addr + i);
- if (p && !kprobe_disabled(p))
- return -EEXIST;
- }
-
- return 0;
-}
-
-/* Check the addr is within the optimized instructions. */
-int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op,
- unsigned long addr)
-{
- return ((unsigned long)op->kp.addr <= addr &&
- (unsigned long)op->kp.addr + op->optinsn.size > addr);
-}
-
-/* Free optimized instruction slot */
-static __kprobes
-void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
-{
- if (op->optinsn.insn) {
- free_optinsn_slot(op->optinsn.insn, dirty);
- op->optinsn.insn = NULL;
- op->optinsn.size = 0;
- }
-}
-
-void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op)
-{
- __arch_remove_optimized_kprobe(op, 1);
-}
-
-/*
- * Copy replacing target instructions
- * Target instructions MUST be relocatable (checked inside)
- */
-int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
-{
- u8 *buf;
- int ret;
- long rel;
-
- if (!can_optimize((unsigned long)op->kp.addr))
- return -EILSEQ;
-
- op->optinsn.insn = get_optinsn_slot();
- if (!op->optinsn.insn)
- return -ENOMEM;
-
- /*
- * Verify if the address gap is in 2GB range, because this uses
- * a relative jump.
- */
- rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
- if (abs(rel) > 0x7fffffff)
- return -ERANGE;
-
- buf = (u8 *)op->optinsn.insn;
-
- /* Copy instructions into the out-of-line buffer */
- ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
- if (ret < 0) {
- __arch_remove_optimized_kprobe(op, 0);
- return ret;
- }
- op->optinsn.size = ret;
-
- /* Copy arch-dep-instance from template */
- memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
-
- /* Set probe information */
- synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
-
- /* Set probe function call */
- synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
-
- /* Set returning jmp instruction at the tail of out-of-line buffer */
- synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
- (u8 *)op->kp.addr + op->optinsn.size);
-
- flush_icache_range((unsigned long) buf,
- (unsigned long) buf + TMPL_END_IDX +
- op->optinsn.size + RELATIVEJUMP_SIZE);
- return 0;
-}
-
-#define MAX_OPTIMIZE_PROBES 256
-static struct text_poke_param *jump_poke_params;
-static struct jump_poke_buffer {
- u8 buf[RELATIVEJUMP_SIZE];
-} *jump_poke_bufs;
-
-static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
-{
- s32 rel = (s32)((long)op->optinsn.insn -
- ((long)op->kp.addr + RELATIVEJUMP_SIZE));
-
- /* Backup instructions which will be replaced by jump address */
- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
- RELATIVE_ADDR_SIZE);
-
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
-}
-
-/*
- * Replace breakpoints (int3) with relative jumps.
- * Caller must call with locking kprobe_mutex and text_mutex.
- */
-void __kprobes arch_optimize_kprobes(struct list_head *oplist)
-{
- struct optimized_kprobe *op, *tmp;
- int c = 0;
-
- list_for_each_entry_safe(op, tmp, oplist, list) {
- WARN_ON(kprobe_disabled(&op->kp));
- /* Setup param */
- setup_optimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
- list_del_init(&op->list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
- }
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
-}
-
-static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
-{
- /* Set int3 to first byte for kprobes */
- insn_buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
-}
-
-/*
- * Recover original instructions and breakpoints from relative jumps.
- * Caller must call with locking kprobe_mutex.
- */
-extern void arch_unoptimize_kprobes(struct list_head *oplist,
- struct list_head *done_list)
-{
- struct optimized_kprobe *op, *tmp;
- int c = 0;
-
- list_for_each_entry_safe(op, tmp, oplist, list) {
- /* Setup param */
- setup_unoptimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
- list_move(&op->list, done_list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
- }
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
-}
-
-/* Replace a relative jump with a breakpoint (int3). */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
-{
- u8 buf[RELATIVEJUMP_SIZE];
-
- /* Set int3 to first byte for kprobes */
- buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
-}
-
-static int __kprobes setup_detour_execution(struct kprobe *p,
- struct pt_regs *regs,
- int reenter)
-{
- struct optimized_kprobe *op;
-
- if (p->flags & KPROBE_FLAG_OPTIMIZED) {
- /* This kprobe is really able to run optimized path. */
- op = container_of(p, struct optimized_kprobe, kp);
- /* Detour through copied instructions */
- regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX;
- if (!reenter)
- reset_current_kprobe();
- preempt_enable_no_resched();
- return 1;
- }
- return 0;
-}
-
-static int __kprobes init_poke_params(void)
-{
- /* Allocate code buffer and parameter array */
- jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_bufs)
- return -ENOMEM;
-
- jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_params) {
- kfree(jump_poke_bufs);
- jump_poke_bufs = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
-#else /* !CONFIG_OPTPROBES */
-static int __kprobes init_poke_params(void)
-{
- return 0;
-}
-#endif
-
int __init arch_init_kprobes(void)
{
- return init_poke_params();
+ return arch_init_optprobes();
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f0c6fd6..b8ba6e4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -38,6 +38,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/tlbflush.h>
+#include <asm/idle.h>
static int kvmapf = 1;
@@ -253,7 +254,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
kvm_async_pf_task_wait((u32)read_cr2());
break;
case KVM_PV_REASON_PAGE_READY:
+ rcu_irq_enter();
+ exit_idle();
kvm_async_pf_task_wake((u32)read_cr2());
+ rcu_irq_exit();
break;
}
}
@@ -438,9 +442,9 @@ void __init kvm_guest_init(void)
static __init int activate_jump_labels(void)
{
if (has_steal_clock) {
- jump_label_inc(&paravirt_steal_enabled);
+ static_key_slow_inc(&paravirt_steal_enabled);
if (steal_acc)
- jump_label_inc(&paravirt_steal_rq_enabled);
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
}
return 0;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 44842d7..f8492da6 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -136,6 +136,15 @@ int kvm_register_clock(char *txt)
return ret;
}
+static void kvm_save_sched_clock_state(void)
+{
+}
+
+static void kvm_restore_sched_clock_state(void)
+{
+ kvm_register_clock("primary cpu clock, resume");
+}
+
#ifdef CONFIG_X86_LOCAL_APIC
static void __cpuinit kvm_setup_secondary_clock(void)
{
@@ -144,8 +153,6 @@ static void __cpuinit kvm_setup_secondary_clock(void)
* we shouldn't fail.
*/
WARN_ON(kvm_register_clock("secondary cpu clock"));
- /* ok, done with our trickery, call native */
- setup_secondary_APIC_clock();
}
#endif
@@ -194,9 +201,11 @@ void __init kvmclock_init(void)
x86_platform.get_wallclock = kvm_get_wallclock;
x86_platform.set_wallclock = kvm_set_wallclock;
#ifdef CONFIG_X86_LOCAL_APIC
- x86_cpuinit.setup_percpu_clockev =
+ x86_cpuinit.early_percpu_clock_init =
kvm_setup_secondary_clock;
#endif
+ x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
+ x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
machine_ops.shutdown = kvm_shutdown;
#ifdef CONFIG_KEXEC
machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ea69726..ebc9873 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -15,7 +15,6 @@
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
-#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index a3fa43b..5b19e4d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -23,7 +23,6 @@
#include <asm/apic.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
-#include <asm/system.h>
#include <asm/cacheflush.h>
#include <asm/debugreg.h>
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 177183c..7eb1e2b 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -43,7 +43,6 @@
#include <linux/mca.h>
#include <linux/kprobes.h>
#include <linux/slab.h>
-#include <asm/system.h>
#include <asm/io.h>
#include <linux/proc_fs.h>
#include <linux/mman.h>
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index fda91c3..87a0f86 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -86,6 +86,7 @@
#include <asm/microcode.h>
#include <asm/processor.h>
+#include <asm/cpu_device_id.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -504,6 +505,20 @@ static struct notifier_block __refdata mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};
+#ifdef MODULE
+/* Autoload on Intel and AMD systems */
+static const struct x86_cpu_id microcode_id[] = {
+#ifdef CONFIG_MICROCODE_INTEL
+ { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, },
+#endif
+#ifdef CONFIG_MICROCODE_AMD
+ { X86_VENDOR_AMD, X86_FAMILY_ANY, X86_MODEL_ANY, },
+#endif
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, microcode_id);
+#endif
+
static int __init microcode_init(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 925179f..f21fd94 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -26,7 +26,6 @@
#include <linux/gfp.h>
#include <linux/jump_label.h>
-#include <asm/system.h>
#include <asm/page.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 9635676..eb11369 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -40,7 +40,6 @@
#include <asm/processor.h>
#include <asm/msr.h>
-#include <asm/system.h>
static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index 0d01a8e..2c39dcd 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -12,6 +12,7 @@
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <linux/delay.h>
+#include <linux/init.h>
#include <asm/apic.h>
#include <asm/nmi.h>
@@ -20,35 +21,35 @@
#define FAILURE 1
#define TIMEOUT 2
-static int nmi_fail;
+static int __initdata nmi_fail;
/* check to see if NMI IPIs work on this machine */
-static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly;
+static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __initdata;
-static int testcase_total;
-static int testcase_successes;
-static int expected_testcase_failures;
-static int unexpected_testcase_failures;
-static int unexpected_testcase_unknowns;
+static int __initdata testcase_total;
+static int __initdata testcase_successes;
+static int __initdata expected_testcase_failures;
+static int __initdata unexpected_testcase_failures;
+static int __initdata unexpected_testcase_unknowns;
-static int nmi_unk_cb(unsigned int val, struct pt_regs *regs)
+static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs)
{
unexpected_testcase_unknowns++;
return NMI_HANDLED;
}
-static void init_nmi_testsuite(void)
+static void __init init_nmi_testsuite(void)
{
/* trap all the unknown NMIs we may generate */
register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk");
}
-static void cleanup_nmi_testsuite(void)
+static void __init cleanup_nmi_testsuite(void)
{
unregister_nmi_handler(NMI_UNKNOWN, "nmi_selftest_unk");
}
-static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
+static int __init test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
{
int cpu = raw_smp_processor_id();
@@ -58,7 +59,7 @@ static int test_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
return NMI_DONE;
}
-static void test_nmi_ipi(struct cpumask *mask)
+static void __init test_nmi_ipi(struct cpumask *mask)
{
unsigned long timeout;
@@ -86,7 +87,7 @@ static void test_nmi_ipi(struct cpumask *mask)
return;
}
-static void remote_ipi(void)
+static void __init remote_ipi(void)
{
cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
@@ -94,19 +95,19 @@ static void remote_ipi(void)
test_nmi_ipi(to_cpumask(nmi_ipi_mask));
}
-static void local_ipi(void)
+static void __init local_ipi(void)
{
cpumask_clear(to_cpumask(nmi_ipi_mask));
cpumask_set_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
test_nmi_ipi(to_cpumask(nmi_ipi_mask));
}
-static void reset_nmi(void)
+static void __init reset_nmi(void)
{
nmi_fail = 0;
}
-static void dotest(void (*testcase_fn)(void), int expected)
+static void __init dotest(void (*testcase_fn)(void), int expected)
{
testcase_fn();
/*
@@ -131,12 +132,12 @@ static void dotest(void (*testcase_fn)(void), int expected)
reset_nmi();
}
-static inline void print_testname(const char *testname)
+static inline void __init print_testname(const char *testname)
{
printk("%12s:", testname);
}
-void nmi_selftest(void)
+void __init nmi_selftest(void)
{
init_nmi_testsuite();
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index d90272e..ab13760 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -26,6 +26,7 @@
#include <asm/bug.h>
#include <asm/paravirt.h>
+#include <asm/debugreg.h>
#include <asm/desc.h>
#include <asm/setup.h>
#include <asm/pgtable.h>
@@ -37,6 +38,7 @@
#include <asm/apic.h>
#include <asm/tlbflush.h>
#include <asm/timer.h>
+#include <asm/special_insns.h>
/* nop stub */
void _paravirt_nop(void)
@@ -202,8 +204,8 @@ static void native_flush_tlb_single(unsigned long addr)
__native_flush_tlb_single(addr);
}
-struct jump_label_key paravirt_steal_enabled;
-struct jump_label_key paravirt_steal_rq_enabled;
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
static u64 native_steal_clock(int cpu)
{
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 726494b..d0b2fb9 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -42,7 +42,6 @@
#include <asm/calgary.h>
#include <asm/tce.h>
#include <asm/pci-direct.h>
-#include <asm/system.h>
#include <asm/dma.h>
#include <asm/rio.h>
#include <asm/bios_ebda.h>
@@ -431,7 +430,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
}
static void* calgary_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
+ dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs)
{
void *ret = NULL;
dma_addr_t mapping;
@@ -464,7 +463,8 @@ error:
}
static void calgary_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle)
+ void *vaddr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
{
unsigned int npages;
struct iommu_table *tbl = find_iommu_table(dev);
@@ -477,8 +477,8 @@ static void calgary_free_coherent(struct device *dev, size_t size,
}
static struct dma_map_ops calgary_dma_ops = {
- .alloc_coherent = calgary_alloc_coherent,
- .free_coherent = calgary_free_coherent,
+ .alloc = calgary_alloc_coherent,
+ .free = calgary_free_coherent,
.map_sg = calgary_map_sg,
.unmap_sg = calgary_unmap_sg,
.map_page = calgary_map_page,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1c4d769..3003250 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -96,7 +96,8 @@ void __init pci_iommu_alloc(void)
}
}
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag)
+ dma_addr_t *dma_addr, gfp_t flag,
+ struct dma_attrs *attrs)
{
unsigned long dma_mask;
struct page *page;
@@ -262,10 +263,11 @@ rootfs_initcall(pci_iommu_init);
static __devinit void via_no_dac(struct pci_dev *dev)
{
- if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
+ if (forbid_dac == 0) {
dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
forbid_dac = 1;
}
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID,
+ PCI_CLASS_BRIDGE_PCI, 8, via_no_dac);
#endif
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 3af4af81..f960506 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -75,7 +75,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
}
static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr)
+ dma_addr_t dma_addr, struct dma_attrs *attrs)
{
free_pages((unsigned long)vaddr, get_order(size));
}
@@ -96,8 +96,8 @@ static void nommu_sync_sg_for_device(struct device *dev,
}
struct dma_map_ops nommu_dma_ops = {
- .alloc_coherent = dma_generic_alloc_coherent,
- .free_coherent = nommu_free_coherent,
+ .alloc = dma_generic_alloc_coherent,
+ .free = nommu_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
.sync_single_for_device = nommu_sync_single_for_device,
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 8f972cb..6c483ba 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -15,21 +15,30 @@
int swiotlb __read_mostly;
static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags)
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs)
{
void *vaddr;
- vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags);
+ vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
+ attrs);
if (vaddr)
return vaddr;
return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
}
+static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ struct dma_attrs *attrs)
+{
+ swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+}
+
static struct dma_map_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
- .alloc_coherent = x86_swiotlb_alloc_coherent,
- .free_coherent = swiotlb_free_coherent,
+ .alloc = x86_swiotlb_alloc_coherent,
+ .free = x86_swiotlb_free_coherent,
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 34e06e8..0bc72e2 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/export.h>
+#include <asm/probe_roms.h>
#include <asm/pci-direct.h>
#include <asm/e820.h>
#include <asm/mmzone.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 15763af..1d92a5a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -12,16 +12,37 @@
#include <linux/user-return-notifier.h>
#include <linux/dmi.h>
#include <linux/utsname.h>
+#include <linux/stackprotector.h>
+#include <linux/tick.h>
+#include <linux/cpuidle.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/cpu.h>
-#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
+#include <asm/nmi.h>
+
+#ifdef CONFIG_X86_64
+static DEFINE_PER_CPU(unsigned char, is_idle);
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+ atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_unregister);
+#endif
struct kmem_cache *task_xstate_cachep;
EXPORT_SYMBOL_GPL(task_xstate_cachep);
@@ -341,44 +362,113 @@ void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
#endif
-#ifdef CONFIG_X86_32
-/*
- * This halt magic was a workaround for ancient floppy DMA
- * wreckage. It should be safe to remove.
- */
-static int hlt_counter;
-void disable_hlt(void)
+static inline int hlt_use_halt(void)
{
- hlt_counter++;
+ return 1;
}
-EXPORT_SYMBOL(disable_hlt);
-void enable_hlt(void)
+#ifndef CONFIG_SMP
+static inline void play_dead(void)
{
- hlt_counter--;
+ BUG();
}
-EXPORT_SYMBOL(enable_hlt);
+#endif
-static inline int hlt_use_halt(void)
+#ifdef CONFIG_X86_64
+void enter_idle(void)
{
- return (!hlt_counter && boot_cpu_data.hlt_works_ok);
+ percpu_write(is_idle, 1);
+ atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
}
-#else
-static inline int hlt_use_halt(void)
+
+static void __exit_idle(void)
{
- return 1;
+ if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
+ return;
+ atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+}
+
+/* Called from interrupts to signify idle end */
+void exit_idle(void)
+{
+ /* idle loop has pid 0 */
+ if (current->pid)
+ return;
+ __exit_idle();
}
#endif
/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle(void)
+{
+ /*
+ * If we're the non-boot CPU, nothing set the stack canary up
+ * for us. CPU0 already has it initialized but no harm in
+ * doing it again. This is a good place for updating it, as
+ * we wont ever return from this function (so the invalid
+ * canaries already on the stack wont ever trigger).
+ */
+ boot_init_stack_canary();
+ current_thread_info()->status |= TS_POLLING;
+
+ while (1) {
+ tick_nohz_idle_enter();
+
+ while (!need_resched()) {
+ rmb();
+
+ if (cpu_is_offline(smp_processor_id()))
+ play_dead();
+
+ /*
+ * Idle routines should keep interrupts disabled
+ * from here on, until they go to idle.
+ * Otherwise, idle callbacks can misfire.
+ */
+ local_touch_nmi();
+ local_irq_disable();
+
+ enter_idle();
+
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
+
+ /* enter_idle() needs rcu for notifiers */
+ rcu_idle_enter();
+
+ if (cpuidle_idle_call())
+ pm_idle();
+
+ rcu_idle_exit();
+ start_critical_timings();
+
+ /* In many cases the interrupt that ended idle
+ has already called exit_idle. But some idle
+ loops can be woken up without interrupt. */
+ __exit_idle();
+ }
+
+ tick_nohz_idle_exit();
+ preempt_enable_no_resched();
+ schedule();
+ preempt_disable();
+ }
+}
+
+/*
* We use this if we don't have any better
* idle routine..
*/
void default_idle(void)
{
if (hlt_use_halt()) {
- trace_power_start(POWER_CSTATE, 1, smp_processor_id());
- trace_cpu_idle(1, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -391,8 +481,8 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else {
local_irq_enable();
/* loop is done by the caller */
@@ -450,8 +540,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
static void mwait_idle(void)
{
if (!need_resched()) {
- trace_power_start(POWER_CSTATE, 1, smp_processor_id());
- trace_cpu_idle(1, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -461,8 +551,8 @@ static void mwait_idle(void)
__sti_mwait(0, 0);
else
local_irq_enable();
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else
local_irq_enable();
}
@@ -474,13 +564,13 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
- trace_power_start(POWER_CSTATE, 0, smp_processor_id());
- trace_cpu_idle(0, smp_processor_id());
+ trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id());
+ trace_cpu_idle_rcuidle(0, smp_processor_id());
local_irq_enable();
while (!need_resched())
cpu_relax();
- trace_power_end(smp_processor_id());
- trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c08d1ff..ae68473 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -9,7 +9,6 @@
* This file handles the architecture-dependent parts of process handling..
*/
-#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
@@ -31,20 +30,18 @@
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
#include <linux/personality.h>
-#include <linux/tick.h>
#include <linux/percpu.h>
#include <linux/prctl.h>
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
-#include <linux/cpuidle.h>
#include <asm/pgtable.h>
-#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/desc.h>
#ifdef CONFIG_MATH_EMULATION
#include <asm/math_emu.h>
@@ -57,7 +54,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/debugreg.h>
-#include <asm/nmi.h>
+#include <asm/switch_to.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -69,62 +66,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return ((unsigned long *)tsk->thread.sp)[3];
}
-#ifndef CONFIG_SMP
-static inline void play_dead(void)
-{
- BUG();
-}
-#endif
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle(void)
-{
- int cpu = smp_processor_id();
-
- /*
- * If we're the non-boot CPU, nothing set the stack canary up
- * for us. CPU0 already has it initialized but no harm in
- * doing it again. This is a good place for updating it, as
- * we wont ever return from this function (so the invalid
- * canaries already on the stack wont ever trigger).
- */
- boot_init_stack_canary();
-
- current_thread_info()->status |= TS_POLLING;
-
- /* endless idle loop with no priority at all */
- while (1) {
- tick_nohz_idle_enter();
- rcu_idle_enter();
- while (!need_resched()) {
-
- check_pgt_cache();
- rmb();
-
- if (cpu_is_offline(cpu))
- play_dead();
-
- local_touch_nmi();
- local_irq_disable();
- /* Don't trace irqs off for idle */
- stop_critical_timings();
- if (cpuidle_idle_call())
- pm_idle();
- start_critical_timings();
- }
- rcu_idle_exit();
- tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- }
-}
-
void __show_regs(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 550e77b..733ca39 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -14,7 +14,6 @@
* This file handles the architecture-dependent parts of process handling..
*/
-#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
@@ -32,17 +31,15 @@
#include <linux/notifier.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
-#include <linux/tick.h>
#include <linux/prctl.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
-#include <linux/cpuidle.h>
#include <asm/pgtable.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
#include <asm/desc.h>
@@ -51,116 +48,11 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/debugreg.h>
-#include <asm/nmi.h>
+#include <asm/switch_to.h>
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(unsigned long, old_rsp);
-static DEFINE_PER_CPU(unsigned char, is_idle);
-
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
- atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_unregister);
-
-void enter_idle(void)
-{
- percpu_write(is_idle, 1);
- atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
-}
-
-static void __exit_idle(void)
-{
- if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
- return;
- atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
-}
-
-/* Called from interrupts to signify idle end */
-void exit_idle(void)
-{
- /* idle loop has pid 0 */
- if (current->pid)
- return;
- __exit_idle();
-}
-
-#ifndef CONFIG_SMP
-static inline void play_dead(void)
-{
- BUG();
-}
-#endif
-
-/*
- * The idle thread. There's no useful work to be
- * done, so just try to conserve power and have a
- * low exit latency (ie sit in a loop waiting for
- * somebody to say that they'd like to reschedule)
- */
-void cpu_idle(void)
-{
- current_thread_info()->status |= TS_POLLING;
-
- /*
- * If we're the non-boot CPU, nothing set the stack canary up
- * for us. CPU0 already has it initialized but no harm in
- * doing it again. This is a good place for updating it, as
- * we wont ever return from this function (so the invalid
- * canaries already on the stack wont ever trigger).
- */
- boot_init_stack_canary();
-
- /* endless idle loop with no priority at all */
- while (1) {
- tick_nohz_idle_enter();
- while (!need_resched()) {
-
- rmb();
-
- if (cpu_is_offline(smp_processor_id()))
- play_dead();
- /*
- * Idle routines should keep interrupts disabled
- * from here on, until they go to idle.
- * Otherwise, idle callbacks can misfire.
- */
- local_touch_nmi();
- local_irq_disable();
- enter_idle();
- /* Don't trace irqs off for idle */
- stop_critical_timings();
-
- /* enter_idle() needs rcu for notifiers */
- rcu_idle_enter();
-
- if (cpuidle_idle_call())
- pm_idle();
-
- rcu_idle_exit();
- start_critical_timings();
-
- /* In many cases the interrupt that ended idle
- has already called exit_idle. But some idle
- loops can be woken up without interrupt. */
- __exit_idle();
- }
-
- tick_nohz_idle_exit();
- preempt_enable_no_resched();
- schedule();
- preempt_disable();
- }
-}
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all)
@@ -342,6 +234,7 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
loadsegment(es, _ds);
loadsegment(ds, _ds);
load_gs_index(0);
+ current->thread.usersp = new_sp;
regs->ip = new_ip;
regs->sp = new_sp;
percpu_write(old_rsp, new_sp);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 6fb330a..685845c 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -24,9 +24,9 @@
#include <asm/uaccess.h>
#include <asm/pgtable.h>
-#include <asm/system.h>
#include <asm/processor.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
#include <asm/ldt.h>
#include <asm/desc.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d7d5099..1a29015 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -90,7 +90,6 @@
#include <asm/processor.h>
#include <asm/bugs.h>
-#include <asm/system.h>
#include <asm/vsyscall.h>
#include <asm/cpu.h>
#include <asm/desc.h>
@@ -509,15 +508,6 @@ static void __init memblock_x86_reserve_range_setup_data(void)
#ifdef CONFIG_KEXEC
-static inline unsigned long long get_total_mem(void)
-{
- unsigned long long total;
-
- total = max_pfn - min_low_pfn;
-
- return total << PAGE_SHIFT;
-}
-
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
@@ -536,7 +526,7 @@ static void __init reserve_crashkernel(void)
unsigned long long crash_size, crash_base;
int ret;
- total_mem = get_total_mem();
+ total_mem = memblock_phys_mem_size();
ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
@@ -749,10 +739,16 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
- EFI_LOADER_SIGNATURE, 4)) {
+ "EL32", 4)) {
+ efi_enabled = 1;
+ efi_64bit = false;
+ } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
+ "EL64", 4)) {
efi_enabled = 1;
- efi_memblock_x86_reserve_range();
+ efi_64bit = true;
}
+ if (efi_enabled && efi_memblock_x86_reserve_range())
+ efi_enabled = 0;
#endif
x86_init.oem.arch_setup();
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b3cd691..041af2f 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -23,6 +23,7 @@
#include <asm/processor.h>
#include <asm/ucontext.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/vdso.h>
#include <asm/mce.h>
#include <asm/sighandling.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 66d250c..6e1e406 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -50,6 +50,7 @@
#include <linux/tboot.h>
#include <linux/stackprotector.h>
#include <linux/gfp.h>
+#include <linux/cpuidle.h>
#include <asm/acpi.h>
#include <asm/desc.h>
@@ -219,14 +220,9 @@ static void __cpuinit smp_callin(void)
* Update loops_per_jiffy in cpu_data. Previous call to
* smp_store_cpu_info() stored a value that is close but not as
* accurate as the value just calculated.
- *
- * Need to enable IRQs because it can take longer and then
- * the NMI watchdog might kill us.
*/
- local_irq_enable();
calibrate_delay();
cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
- local_irq_disable();
pr_debug("Stack at about %p\n", &cpuid);
/*
@@ -255,6 +251,7 @@ notrace static void __cpuinit start_secondary(void *unused)
* most necessary things.
*/
cpu_init();
+ x86_cpuinit.early_percpu_clock_init();
preempt_disable();
smp_callin();
@@ -291,19 +288,6 @@ notrace static void __cpuinit start_secondary(void *unused)
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
x86_platform.nmi_init();
- /*
- * Wait until the cpu which brought this one up marked it
- * online before enabling interrupts. If we don't do that then
- * we can end up waking up the softirq thread before this cpu
- * reached the active state, which makes the scheduler unhappy
- * and schedule the softirq thread on the wrong cpu. This is
- * only observable with forced threaded interrupts, but in
- * theory it could also happen w/o them. It's just way harder
- * to achieve.
- */
- while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
- cpu_relax();
-
/* enable local interrupts */
local_irq_enable();
@@ -740,8 +724,6 @@ do_rest:
* the targeted processor.
*/
- printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip);
-
atomic_set(&init_deasserted, 0);
if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
@@ -791,9 +773,10 @@ do_rest:
schedule();
}
- if (cpumask_test_cpu(cpu, cpu_callin_mask))
+ if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
+ print_cpu_msr(&cpu_data(cpu));
pr_debug("CPU%d: has booted.\n", cpu);
- else {
+ } else {
boot_error = 1;
if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
== 0xA5A5A5A5)
@@ -847,7 +830,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
!physid_isset(apicid, phys_cpu_present_map) ||
- (!x2apic_mode && apicid >= 255)) {
+ !apic->apic_id_valid(apicid)) {
printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
return -EINVAL;
}
@@ -1422,7 +1405,8 @@ void native_play_dead(void)
tboot_shutdown(TB_SHUTDOWN_WFS);
mwait_play_dead(); /* Only returns on failure */
- hlt_play_dead();
+ if (cpuidle_play_dead())
+ hlt_play_dead();
}
#else /* ... !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index f921df8..b4d3c39 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -195,7 +195,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- unsigned long addr = addr0;
+ unsigned long addr = addr0, start_addr;
/* requested length too big for entire address space */
if (len > TASK_SIZE)
@@ -223,25 +223,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
mm->free_area_cache = mm->mmap_base;
}
+try_again:
/* either no address requested or can't fit in requested address hole */
- addr = mm->free_area_cache;
-
- /* make sure it can fit in the remaining address space */
- if (addr > len) {
- unsigned long tmp_addr = align_addr(addr - len, filp,
- ALIGN_TOPDOWN);
-
- vma = find_vma(mm, tmp_addr);
- if (!vma || tmp_addr + len <= vma->vm_start)
- /* remember the address as a hint for next time */
- return mm->free_area_cache = tmp_addr;
- }
-
- if (mm->mmap_base < len)
- goto bottomup;
+ start_addr = addr = mm->free_area_cache;
- addr = mm->mmap_base-len;
+ if (addr < len)
+ goto fail;
+ addr -= len;
do {
addr = align_addr(addr, filp, ALIGN_TOPDOWN);
@@ -263,6 +252,17 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
addr = vma->vm_start-len;
} while (len < vma->vm_start);
+fail:
+ /*
+ * if hint left us with no space for the requested
+ * mapping then try again:
+ */
+ if (start_addr != mm->mmap_base) {
+ mm->free_area_cache = mm->mmap_base;
+ mm->cached_hole_size = 0;
+ goto try_again;
+ }
+
bottomup:
/*
* A failed mmap() very likely causes application failure,
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index e2410e2..6410744 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -272,7 +272,7 @@ static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
offsetof(struct acpi_table_facs, firmware_waking_vector);
}
-void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
+static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
{
static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
/* S0,1,2: */ -1, -1, -1,
@@ -281,7 +281,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
/* S5: */ TB_SHUTDOWN_S5 };
if (!tboot_enabled())
- return;
+ return 0;
tboot_copy_fadt(&acpi_gbl_FADT);
tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
@@ -292,10 +292,11 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
if (sleep_state >= ACPI_S_STATE_COUNT ||
acpi_shutdown_map[sleep_state] == -1) {
pr_warning("unsupported sleep state 0x%x\n", sleep_state);
- return;
+ return -1;
}
tboot_shutdown(acpi_shutdown_map[sleep_state]);
+ return 0;
}
static atomic_t ap_wfs_count;
@@ -345,6 +346,8 @@ static __init int tboot_late_init(void)
atomic_set(&ap_wfs_count, 0);
register_hotcpu_notifier(&tboot_cpu_notifier);
+
+ acpi_os_set_prepare_sleep(&tboot_sleep);
return 0;
}
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 9e540fe..ab40954 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -34,6 +34,7 @@
#include <asm/tce.h>
#include <asm/calgary.h>
#include <asm/proto.h>
+#include <asm/cacheflush.h>
/* flush a tce at 'tceaddr' to main memory */
static inline void flush_tce(void* tceaddr)
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index dd5fbf4..c6eba2b 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -57,9 +57,6 @@ EXPORT_SYMBOL(profile_pc);
*/
static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
- /* Keep nmi watchdog up to date */
- inc_irq_stat(irq0_irqs);
-
global_clock_event->event_handler(global_clock_event);
/* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 6bb7b85..9d9d2f9 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -6,7 +6,6 @@
#include <asm/uaccess.h>
#include <asm/desc.h>
-#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -163,7 +162,7 @@ int regset_tls_get(struct task_struct *target, const struct user_regset *regset,
{
const struct desc_struct *tls;
- if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+ if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
(pos % sizeof(struct user_desc)) != 0 ||
(count % sizeof(struct user_desc)) != 0)
return -EINVAL;
@@ -198,7 +197,7 @@ int regset_tls_set(struct task_struct *target, const struct user_regset *regset,
struct user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
const struct user_desc *info;
- if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
+ if (pos >= GDT_ENTRY_TLS_ENTRIES * sizeof(struct user_desc) ||
(pos % sizeof(struct user_desc)) != 0 ||
(count % sizeof(struct user_desc)) != 0)
return -EINVAL;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index c6d17ad..ff9281f1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -50,10 +50,10 @@
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <linux/atomic.h>
-#include <asm/system.h>
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#include <asm/mce.h>
#include <asm/mach_traps.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..fc0a147 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -620,7 +620,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
if (cpu_khz) {
*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
- *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR);
+ *offset = ns_now - mult_frac(tsc_now, *scale,
+ (1UL << CYC2NS_SCALE_FACTOR));
}
sched_clock_idle_wakeup_event(0);
@@ -629,7 +630,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
static unsigned long long cyc2ns_suspend;
-void save_sched_clock_state(void)
+void tsc_save_sched_clock_state(void)
{
if (!sched_clock_stable)
return;
@@ -645,7 +646,7 @@ void save_sched_clock_state(void)
* that sched_clock() continues from the point where it was left off during
* suspend.
*/
-void restore_sched_clock_state(void)
+void tsc_restore_sched_clock_state(void)
{
unsigned long long offset;
unsigned long flags;
@@ -932,6 +933,16 @@ static int __init init_tsc_clocksource(void)
clocksource_tsc.rating = 0;
clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
}
+
+ /*
+ * Trust the results of the earlier calibration on systems
+ * exporting a reliable TSC.
+ */
+ if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
+ clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ return 0;
+ }
+
schedule_delayed_work(&tsc_irqwork, 0);
return 0;
}
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9eba29b..fc25e60 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -42,7 +42,7 @@ static __cpuinitdata int nr_warps;
/*
* TSC-warp measurement loop running on both CPUs:
*/
-static __cpuinit void check_tsc_warp(void)
+static __cpuinit void check_tsc_warp(unsigned int timeout)
{
cycles_t start, now, prev, end;
int i;
@@ -51,9 +51,9 @@ static __cpuinit void check_tsc_warp(void)
start = get_cycles();
rdtsc_barrier();
/*
- * The measurement runs for 20 msecs:
+ * The measurement runs for 'timeout' msecs:
*/
- end = start + tsc_khz * 20ULL;
+ end = start + (cycles_t) tsc_khz * timeout;
now = start;
for (i = 0; ; i++) {
@@ -99,6 +99,25 @@ static __cpuinit void check_tsc_warp(void)
}
/*
+ * If the target CPU coming online doesn't have any of its core-siblings
+ * online, a timeout of 20msec will be used for the TSC-warp measurement
+ * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
+ * information about this socket already (and this information grows as we
+ * have more and more logical-siblings in that socket).
+ *
+ * Ideally we should be able to skip the TSC sync check on the other
+ * core-siblings, if the first logical CPU in a socket passed the sync test.
+ * But as the TSC is per-logical CPU and can potentially be modified wrongly
+ * by the bios, TSC sync test for smaller duration should be able
+ * to catch such errors. Also this will catch the condition where all the
+ * cores in the socket doesn't get reset at the same time.
+ */
+static inline unsigned int loop_timeout(int cpu)
+{
+ return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20;
+}
+
+/*
* Source CPU calls into this - it waits for the freshly booted
* target CPU to arrive and then starts the measurement:
*/
@@ -135,7 +154,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
*/
atomic_inc(&start_count);
- check_tsc_warp();
+ check_tsc_warp(loop_timeout(cpu));
while (atomic_read(&stop_count) != cpus-1)
cpu_relax();
@@ -183,7 +202,7 @@ void __cpuinit check_tsc_sync_target(void)
while (atomic_read(&start_count) != cpus)
cpu_relax();
- check_tsc_warp();
+ check_tsc_warp(loop_timeout(smp_processor_id()));
/*
* Ok, we are done:
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index a1315ab..255f58a 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -172,6 +172,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
spinlock_t *ptl;
int i;
+ down_write(&mm->mmap_sem);
pgd = pgd_offset(mm, 0xA0000);
if (pgd_none_or_clear_bad(pgd))
goto out;
@@ -190,6 +191,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
}
pte_unmap_unlock(pte, ptl);
out:
+ up_write(&mm->mmap_sem);
flush_tlb();
}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 327509b..f386dc4 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -52,10 +52,7 @@
#include "vsyscall_trace.h"
DEFINE_VVAR(int, vgetcpu_mode);
-DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
-{
- .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
-};
+DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
@@ -80,20 +77,15 @@ early_param("vsyscall", vsyscall_setup);
void update_vsyscall_tz(void)
{
- unsigned long flags;
-
- write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
- /* sys_tz has changed */
vsyscall_gtod_data.sys_tz = sys_tz;
- write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
struct clocksource *clock, u32 mult)
{
- unsigned long flags;
+ struct timespec monotonic;
- write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ write_seqcount_begin(&vsyscall_gtod_data.seq);
/* copy vsyscall data */
vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
@@ -101,12 +93,19 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
vsyscall_gtod_data.clock.mask = clock->mask;
vsyscall_gtod_data.clock.mult = mult;
vsyscall_gtod_data.clock.shift = clock->shift;
+
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
- vsyscall_gtod_data.wall_to_monotonic = *wtm;
+
+ monotonic = timespec_add(*wall_time, *wtm);
+ vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec;
+ vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec;
+
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
+ vsyscall_gtod_data.monotonic_time_coarse =
+ timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
- write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+ write_seqcount_end(&vsyscall_gtod_data.seq);
}
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 947a06c..e9f265f 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -91,6 +91,7 @@ struct x86_init_ops x86_init __initdata = {
};
struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
+ .early_percpu_clock_init = x86_init_noop,
.setup_percpu_clockev = setup_secondary_APIC_clock,
.fixup_cpu_id = x86_default_fixup_cpu_id,
};
@@ -107,7 +108,9 @@ struct x86_platform_ops x86_platform = {
.is_untracked_pat_range = is_ISA_range,
.nmi_init = default_nmi_init,
.get_nmi_reason = default_get_nmi_reason,
- .i8042_detect = default_i8042_detect
+ .i8042_detect = default_i8042_detect,
+ .save_sched_clock_state = tsc_save_sched_clock_state,
+ .restore_sched_clock_state = tsc_restore_sched_clock_state,
};
EXPORT_SYMBOL_GPL(x86_platform);
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 7110911..e62728e 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -6,6 +6,7 @@
#include <linux/bootmem.h>
#include <linux/compat.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#ifdef CONFIG_IA32_EMULATION
#include <asm/sigcontext32.h>
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 89b02bf..9fed5be 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -236,7 +236,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_supported_word6_x86_features =
F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
- F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) |
+ F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
/* cpuid 0xC0000001.edx */
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 5b97e17..26d1fb4 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -43,4 +43,12 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
}
+static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ return best && (best->ecx & bit(X86_FEATURE_OSVW));
+}
+
#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 0982507..8375622 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -57,6 +57,7 @@
#define OpDS 23ull /* DS */
#define OpFS 24ull /* FS */
#define OpGS 25ull /* GS */
+#define OpMem8 26ull /* 8-bit zero extended memory operand */
#define OpBits 5 /* Width of operand field */
#define OpMask ((1ull << OpBits) - 1)
@@ -101,6 +102,7 @@
#define SrcAcc (OpAcc << SrcShift)
#define SrcImmU16 (OpImmU16 << SrcShift)
#define SrcDX (OpDX << SrcShift)
+#define SrcMem8 (OpMem8 << SrcShift)
#define SrcMask (OpMask << SrcShift)
#define BitOp (1<<11)
#define MemAbs (1<<12) /* Memory operand is absolute displacement */
@@ -858,8 +860,7 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
}
static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
- struct operand *op,
- int inhibit_bytereg)
+ struct operand *op)
{
unsigned reg = ctxt->modrm_reg;
int highbyte_regs = ctxt->rex_prefix == 0;
@@ -876,7 +877,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
}
op->type = OP_REG;
- if ((ctxt->d & ByteOp) && !inhibit_bytereg) {
+ if (ctxt->d & ByteOp) {
op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
op->bytes = 1;
} else {
@@ -1151,6 +1152,22 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
return 1;
}
+static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
+ u16 index, struct desc_struct *desc)
+{
+ struct desc_ptr dt;
+ ulong addr;
+
+ ctxt->ops->get_idt(ctxt, &dt);
+
+ if (dt.size < index * 8 + 7)
+ return emulate_gp(ctxt, index << 3 | 0x2);
+
+ addr = dt.address + index * 8;
+ return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
+ &ctxt->exception);
+}
+
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
u16 selector, struct desc_ptr *dt)
{
@@ -1227,6 +1244,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
seg_desc.type = 3;
seg_desc.p = 1;
seg_desc.s = 1;
+ if (ctxt->mode == X86EMUL_MODE_VM86)
+ seg_desc.dpl = 3;
goto load;
}
@@ -1891,6 +1910,17 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
ss->p = 1;
}
+static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = ecx = 0;
+ return ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx)
+ && ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx
+ && ecx == X86EMUL_CPUID_VENDOR_GenuineIntel_ecx
+ && edx == X86EMUL_CPUID_VENDOR_GenuineIntel_edx;
+}
+
static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
{
struct x86_emulate_ops *ops = ctxt->ops;
@@ -2007,6 +2037,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
if (ctxt->mode == X86EMUL_MODE_REAL)
return emulate_gp(ctxt, 0);
+ /*
+ * Not recognized on AMD in compat mode (but is recognized in legacy
+ * mode).
+ */
+ if ((ctxt->mode == X86EMUL_MODE_PROT32) && (efer & EFER_LMA)
+ && !vendor_intel(ctxt))
+ return emulate_ud(ctxt);
+
/* XXX sysenter/sysexit have not been tested in 64bit mode.
* Therefore, we inject an #UD.
*/
@@ -2306,6 +2344,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
return emulate_gp(ctxt, 0);
ctxt->_eip = tss->eip;
ctxt->eflags = tss->eflags | 2;
+
+ /* General purpose registers */
ctxt->regs[VCPU_REGS_RAX] = tss->eax;
ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
ctxt->regs[VCPU_REGS_RDX] = tss->edx;
@@ -2328,6 +2368,24 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
set_segment_selector(ctxt, tss->gs, VCPU_SREG_GS);
/*
+ * If we're switching between Protected Mode and VM86, we need to make
+ * sure to update the mode before loading the segment descriptors so
+ * that the selectors are interpreted correctly.
+ *
+ * Need to get rflags to the vcpu struct immediately because it
+ * influences the CPL which is checked at least when loading the segment
+ * descriptors and when pushing an error code to the new kernel stack.
+ *
+ * TODO Introduce a separate ctxt->ops->set_cpl callback
+ */
+ if (ctxt->eflags & X86_EFLAGS_VM)
+ ctxt->mode = X86EMUL_MODE_VM86;
+ else
+ ctxt->mode = X86EMUL_MODE_PROT32;
+
+ ctxt->ops->set_rflags(ctxt, ctxt->eflags);
+
+ /*
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
@@ -2401,7 +2459,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
}
static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
- u16 tss_selector, int reason,
+ u16 tss_selector, int idt_index, int reason,
bool has_error_code, u32 error_code)
{
struct x86_emulate_ops *ops = ctxt->ops;
@@ -2423,12 +2481,35 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
/* FIXME: check that next_tss_desc is tss */
- if (reason != TASK_SWITCH_IRET) {
- if ((tss_selector & 3) > next_tss_desc.dpl ||
- ops->cpl(ctxt) > next_tss_desc.dpl)
- return emulate_gp(ctxt, 0);
+ /*
+ * Check privileges. The three cases are task switch caused by...
+ *
+ * 1. jmp/call/int to task gate: Check against DPL of the task gate
+ * 2. Exception/IRQ/iret: No check is performed
+ * 3. jmp/call to TSS: Check agains DPL of the TSS
+ */
+ if (reason == TASK_SWITCH_GATE) {
+ if (idt_index != -1) {
+ /* Software interrupts */
+ struct desc_struct task_gate_desc;
+ int dpl;
+
+ ret = read_interrupt_descriptor(ctxt, idt_index,
+ &task_gate_desc);
+ if (ret != X86EMUL_CONTINUE)
+ return ret;
+
+ dpl = task_gate_desc.dpl;
+ if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
+ return emulate_gp(ctxt, (idt_index << 3) | 0x2);
+ }
+ } else if (reason != TASK_SWITCH_IRET) {
+ int dpl = next_tss_desc.dpl;
+ if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
+ return emulate_gp(ctxt, tss_selector);
}
+
desc_limit = desc_limit_scaled(&next_tss_desc);
if (!next_tss_desc.p ||
((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2481,7 +2562,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
}
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
- u16 tss_selector, int reason,
+ u16 tss_selector, int idt_index, int reason,
bool has_error_code, u32 error_code)
{
int rc;
@@ -2489,7 +2570,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
ctxt->_eip = ctxt->eip;
ctxt->dst.type = OP_NONE;
- rc = emulator_do_task_switch(ctxt, tss_selector, reason,
+ rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
has_error_code, error_code);
if (rc == X86EMUL_CONTINUE)
@@ -3514,13 +3595,13 @@ static struct opcode twobyte_table[256] = {
I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr),
I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg),
I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg),
- D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
+ D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
/* 0xB8 - 0xBF */
N, N,
G(BitOp, group8),
I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr),
- D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
+ D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
/* 0xC0 - 0xCF */
D2bv(DstMem | SrcReg | ModRM | Lock),
N, D(DstMem | SrcReg | ModRM | Mov),
@@ -3602,9 +3683,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
switch (d) {
case OpReg:
- decode_register_operand(ctxt, op,
- op == &ctxt->dst &&
- ctxt->twobyte && (ctxt->b == 0xb6 || ctxt->b == 0xb7));
+ decode_register_operand(ctxt, op);
break;
case OpImmUByte:
rc = decode_imm(ctxt, op, 1, false);
@@ -3656,6 +3735,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
case OpImm:
rc = decode_imm(ctxt, op, imm_size(ctxt), true);
break;
+ case OpMem8:
+ ctxt->memop.bytes = 1;
+ goto mem_common;
case OpMem16:
ctxt->memop.bytes = 2;
goto mem_common;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index b6a7353..81cf4fa 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -307,6 +307,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
if (val & 0x10) {
s->init4 = val & 1;
s->last_irr = 0;
+ s->irr &= s->elcr;
s->imr = 0;
s->priority_add = 0;
s->special_mask = 0;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cfdc6e0..8584322 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -433,7 +433,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
break;
case APIC_DM_INIT:
- if (level) {
+ if (!trig_mode || level) {
result = 1;
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -731,7 +731,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
u64 ns = 0;
struct kvm_vcpu *vcpu = apic->vcpu;
- unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu);
+ unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
unsigned long flags;
if (unlikely(!tscdeadline || !this_tsc_khz))
@@ -1283,9 +1283,9 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
return;
- vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
+ vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
- kunmap_atomic(vapic, KM_USER0);
+ kunmap_atomic(vapic);
apic_set_tpr(vcpu->arch.apic, data & 0xff);
}
@@ -1310,9 +1310,9 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
max_isr = 0;
data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
- vapic = kmap_atomic(vcpu->arch.apic->vapic_page, KM_USER0);
+ vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
*(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
- kunmap_atomic(vapic, KM_USER0);
+ kunmap_atomic(vapic);
}
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 224b02c..4cb1642 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -688,9 +688,8 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn,
{
unsigned long idx;
- idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) -
- (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
- return &slot->lpage_info[level - 2][idx];
+ idx = gfn_to_index(gfn, slot->base_gfn, level);
+ return &slot->arch.lpage_info[level - 2][idx];
}
static void account_shadowed(struct kvm *kvm, gfn_t gfn)
@@ -946,7 +945,7 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
}
}
-static unsigned long *__gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level,
+static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
struct kvm_lpage_info *linfo;
@@ -966,7 +965,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level)
struct kvm_memory_slot *slot;
slot = gfn_to_memslot(kvm, gfn);
- return __gfn_to_rmap(kvm, gfn, level, slot);
+ return __gfn_to_rmap(gfn, level, slot);
}
static bool rmap_can_add(struct kvm_vcpu *vcpu)
@@ -988,7 +987,7 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
return pte_list_add(vcpu, spte, rmapp);
}
-static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
+static u64 *rmap_next(unsigned long *rmapp, u64 *spte)
{
return pte_list_next(rmapp, spte);
}
@@ -1018,8 +1017,8 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
u64 *spte;
int i, write_protected = 0;
- rmapp = __gfn_to_rmap(kvm, gfn, PT_PAGE_TABLE_LEVEL, slot);
- spte = rmap_next(kvm, rmapp, NULL);
+ rmapp = __gfn_to_rmap(gfn, PT_PAGE_TABLE_LEVEL, slot);
+ spte = rmap_next(rmapp, NULL);
while (spte) {
BUG_ON(!(*spte & PT_PRESENT_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
@@ -1027,14 +1026,14 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
mmu_spte_update(spte, *spte & ~PT_WRITABLE_MASK);
write_protected = 1;
}
- spte = rmap_next(kvm, rmapp, spte);
+ spte = rmap_next(rmapp, spte);
}
/* check for huge page mappings */
for (i = PT_DIRECTORY_LEVEL;
i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
- rmapp = __gfn_to_rmap(kvm, gfn, i, slot);
- spte = rmap_next(kvm, rmapp, NULL);
+ rmapp = __gfn_to_rmap(gfn, i, slot);
+ spte = rmap_next(rmapp, NULL);
while (spte) {
BUG_ON(!(*spte & PT_PRESENT_MASK));
BUG_ON(!is_large_pte(*spte));
@@ -1045,7 +1044,7 @@ int kvm_mmu_rmap_write_protect(struct kvm *kvm, u64 gfn,
spte = NULL;
write_protected = 1;
}
- spte = rmap_next(kvm, rmapp, spte);
+ spte = rmap_next(rmapp, spte);
}
}
@@ -1066,7 +1065,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
u64 *spte;
int need_tlb_flush = 0;
- while ((spte = rmap_next(kvm, rmapp, NULL))) {
+ while ((spte = rmap_next(rmapp, NULL))) {
BUG_ON(!(*spte & PT_PRESENT_MASK));
rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte);
drop_spte(kvm, spte);
@@ -1085,14 +1084,14 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
WARN_ON(pte_huge(*ptep));
new_pfn = pte_pfn(*ptep);
- spte = rmap_next(kvm, rmapp, NULL);
+ spte = rmap_next(rmapp, NULL);
while (spte) {
BUG_ON(!is_shadow_present_pte(*spte));
rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
need_flush = 1;
if (pte_write(*ptep)) {
drop_spte(kvm, spte);
- spte = rmap_next(kvm, rmapp, NULL);
+ spte = rmap_next(rmapp, NULL);
} else {
new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
new_spte |= (u64)new_pfn << PAGE_SHIFT;
@@ -1102,7 +1101,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
new_spte &= ~shadow_accessed_mask;
mmu_spte_clear_track_bits(spte);
mmu_spte_set(spte, new_spte);
- spte = rmap_next(kvm, rmapp, spte);
+ spte = rmap_next(rmapp, spte);
}
}
if (need_flush)
@@ -1176,7 +1175,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
if (!shadow_accessed_mask)
return kvm_unmap_rmapp(kvm, rmapp, data);
- spte = rmap_next(kvm, rmapp, NULL);
+ spte = rmap_next(rmapp, NULL);
while (spte) {
int _young;
u64 _spte = *spte;
@@ -1186,7 +1185,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
young = 1;
clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte);
}
- spte = rmap_next(kvm, rmapp, spte);
+ spte = rmap_next(rmapp, spte);
}
return young;
}
@@ -1205,7 +1204,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
if (!shadow_accessed_mask)
goto out;
- spte = rmap_next(kvm, rmapp, NULL);
+ spte = rmap_next(rmapp, NULL);
while (spte) {
u64 _spte = *spte;
BUG_ON(!(_spte & PT_PRESENT_MASK));
@@ -1214,7 +1213,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
young = 1;
break;
}
- spte = rmap_next(kvm, rmapp, spte);
+ spte = rmap_next(rmapp, spte);
}
out:
return young;
@@ -1391,11 +1390,6 @@ struct kvm_mmu_pages {
unsigned int nr;
};
-#define for_each_unsync_children(bitmap, idx) \
- for (idx = find_first_bit(bitmap, 512); \
- idx < 512; \
- idx = find_next_bit(bitmap, 512, idx+1))
-
static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
int idx)
{
@@ -1417,7 +1411,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
{
int i, ret, nr_unsync_leaf = 0;
- for_each_unsync_children(sp->unsync_child_bitmap, i) {
+ for_each_set_bit(i, sp->unsync_child_bitmap, 512) {
struct kvm_mmu_page *child;
u64 ent = sp->spt[i];
@@ -1803,6 +1797,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep)
{
if (is_large_pte(*sptep)) {
drop_spte(vcpu->kvm, sptep);
+ --vcpu->kvm->stat.lpages;
kvm_flush_remote_tlbs(vcpu->kvm);
}
}
@@ -3190,15 +3185,14 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
#undef PTTYPE
static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
- struct kvm_mmu *context,
- int level)
+ struct kvm_mmu *context)
{
int maxphyaddr = cpuid_maxphyaddr(vcpu);
u64 exb_bit_rsvd = 0;
if (!context->nx)
exb_bit_rsvd = rsvd_bits(63, 63);
- switch (level) {
+ switch (context->root_level) {
case PT32_ROOT_LEVEL:
/* no rsvd bits for 2 level 4K page table entries */
context->rsvd_bits_mask[0][1] = 0;
@@ -3256,8 +3250,9 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
int level)
{
context->nx = is_nx(vcpu);
+ context->root_level = level;
- reset_rsvds_bits_mask(vcpu, context, level);
+ reset_rsvds_bits_mask(vcpu, context);
ASSERT(is_pae(vcpu));
context->new_cr3 = paging_new_cr3;
@@ -3267,7 +3262,6 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
context->invlpg = paging64_invlpg;
context->update_pte = paging64_update_pte;
context->free = paging_free;
- context->root_level = level;
context->shadow_root_level = level;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
@@ -3284,8 +3278,9 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
struct kvm_mmu *context)
{
context->nx = false;
+ context->root_level = PT32_ROOT_LEVEL;
- reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
+ reset_rsvds_bits_mask(vcpu, context);
context->new_cr3 = paging_new_cr3;
context->page_fault = paging32_page_fault;
@@ -3294,7 +3289,6 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
context->sync_page = paging32_sync_page;
context->invlpg = paging32_invlpg;
context->update_pte = paging32_update_pte;
- context->root_level = PT32_ROOT_LEVEL;
context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
@@ -3325,7 +3319,6 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->get_cr3 = get_cr3;
context->get_pdptr = kvm_pdptr_read;
context->inject_page_fault = kvm_inject_page_fault;
- context->nx = is_nx(vcpu);
if (!is_paging(vcpu)) {
context->nx = false;
@@ -3333,19 +3326,19 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->root_level = 0;
} else if (is_long_mode(vcpu)) {
context->nx = is_nx(vcpu);
- reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL);
- context->gva_to_gpa = paging64_gva_to_gpa;
context->root_level = PT64_ROOT_LEVEL;
+ reset_rsvds_bits_mask(vcpu, context);
+ context->gva_to_gpa = paging64_gva_to_gpa;
} else if (is_pae(vcpu)) {
context->nx = is_nx(vcpu);
- reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL);
- context->gva_to_gpa = paging64_gva_to_gpa;
context->root_level = PT32E_ROOT_LEVEL;
+ reset_rsvds_bits_mask(vcpu, context);
+ context->gva_to_gpa = paging64_gva_to_gpa;
} else {
context->nx = false;
- reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL);
- context->gva_to_gpa = paging32_gva_to_gpa;
context->root_level = PT32_ROOT_LEVEL;
+ reset_rsvds_bits_mask(vcpu, context);
+ context->gva_to_gpa = paging32_gva_to_gpa;
}
return 0;
@@ -3408,18 +3401,18 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
} else if (is_long_mode(vcpu)) {
g_context->nx = is_nx(vcpu);
- reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL);
g_context->root_level = PT64_ROOT_LEVEL;
+ reset_rsvds_bits_mask(vcpu, g_context);
g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
} else if (is_pae(vcpu)) {
g_context->nx = is_nx(vcpu);
- reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL);
g_context->root_level = PT32E_ROOT_LEVEL;
+ reset_rsvds_bits_mask(vcpu, g_context);
g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
} else {
g_context->nx = false;
- reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL);
g_context->root_level = PT32_ROOT_LEVEL;
+ reset_rsvds_bits_mask(vcpu, g_context);
g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
}
@@ -3555,7 +3548,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
* If we're seeing too many writes to a page, it may no longer be a page table,
* or we may be forking, in which case it is better to unmap the page.
*/
-static bool detect_write_flooding(struct kvm_mmu_page *sp, u64 *spte)
+static bool detect_write_flooding(struct kvm_mmu_page *sp)
{
/*
* Skip write-flooding detected for the sp whose level is 1, because
@@ -3664,10 +3657,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) {
- spte = get_written_sptes(sp, gpa, &npte);
-
if (detect_write_misaligned(sp, gpa, bytes) ||
- detect_write_flooding(sp, spte)) {
+ detect_write_flooding(sp)) {
zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
&invalid_list);
++vcpu->kvm->stat.mmu_flooded;
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index fe15dcc..715da5a 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -200,13 +200,13 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
slot = gfn_to_memslot(kvm, sp->gfn);
rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
- spte = rmap_next(kvm, rmapp, NULL);
+ spte = rmap_next(rmapp, NULL);
while (spte) {
if (is_writable_pte(*spte))
audit_printk(kvm, "shadow page has writable "
"mappings: gfn %llx role %x\n",
sp->gfn, sp->role.word);
- spte = rmap_next(kvm, rmapp, spte);
+ spte = rmap_next(rmapp, spte);
}
}
@@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
}
static bool mmu_audit;
-static struct jump_label_key mmu_audit_key;
+static struct static_key mmu_audit_key;
static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
@@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
{
- if (static_branch((&mmu_audit_key)))
+ if (static_key_false((&mmu_audit_key)))
__kvm_mmu_audit(vcpu, point);
}
@@ -259,7 +259,7 @@ static void mmu_audit_enable(void)
if (mmu_audit)
return;
- jump_label_inc(&mmu_audit_key);
+ static_key_slow_inc(&mmu_audit_key);
mmu_audit = true;
}
@@ -268,7 +268,7 @@ static void mmu_audit_disable(void)
if (!mmu_audit)
return;
- jump_label_dec(&mmu_audit_key);
+ static_key_slow_dec(&mmu_audit_key);
mmu_audit = false;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1561028..df5a703 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -92,9 +92,9 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
if (unlikely(npages != 1))
return -EFAULT;
- table = kmap_atomic(page, KM_USER0);
+ table = kmap_atomic(page);
ret = CMPXCHG(&table[index], orig_pte, new_pte);
- kunmap_atomic(table, KM_USER0);
+ kunmap_atomic(table);
kvm_release_page_dirty(page);
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 7aad544..173df38 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -33,10 +33,11 @@ static struct kvm_arch_event_perf_mapping {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
+ [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
};
/* mapping between fixed pmc index and arch_events array */
-int fixed_pmc_events[] = {1, 0, 2};
+int fixed_pmc_events[] = {1, 0, 7};
static bool pmc_is_gp(struct kvm_pmc *pmc)
{
@@ -210,6 +211,9 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
unsigned config, type = PERF_TYPE_RAW;
u8 event_select, unit_mask;
+ if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
+ printk_once("kvm pmu: pin control bit is ignored\n");
+
pmc->eventsel = eventsel;
stop_counter(pmc);
@@ -220,7 +224,7 @@ static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
- if (!(event_select & (ARCH_PERFMON_EVENTSEL_EDGE |
+ if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
ARCH_PERFMON_EVENTSEL_INV |
ARCH_PERFMON_EVENTSEL_CMASK))) {
config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
@@ -365,7 +369,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data)
case MSR_CORE_PERF_FIXED_CTR_CTRL:
if (pmu->fixed_ctr_ctrl == data)
return 0;
- if (!(data & 0xfffffffffffff444)) {
+ if (!(data & 0xfffffffffffff444ull)) {
reprogram_fixed_counters(pmu, data);
return 0;
}
@@ -413,7 +417,7 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data)
struct kvm_pmc *counters;
u64 ctr;
- pmc &= (3u << 30) - 1;
+ pmc &= ~(3u << 30);
if (!fixed && pmc >= pmu->nr_arch_gp_counters)
return 1;
if (fixed && pmc >= pmu->nr_arch_fixed_counters)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e385214..e334389 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -111,6 +111,12 @@ struct nested_state {
#define MSRPM_OFFSETS 16
static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
+/*
+ * Set osvw_len to higher value when updated Revision Guides
+ * are published and we know what the new status bits are
+ */
+static uint64_t osvw_len = 4, osvw_status;
+
struct vcpu_svm {
struct kvm_vcpu vcpu;
struct vmcb *vmcb;
@@ -177,11 +183,13 @@ static bool npt_enabled = true;
#else
static bool npt_enabled;
#endif
-static int npt = 1;
+/* allow nested paging (virtualized MMU) for all guests */
+static int npt = true;
module_param(npt, int, S_IRUGO);
-static int nested = 1;
+/* allow nested virtualization in KVM/SVM */
+static int nested = true;
module_param(nested, int, S_IRUGO);
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
@@ -557,6 +565,27 @@ static void svm_init_erratum_383(void)
erratum_383_found = true;
}
+static void svm_init_osvw(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Guests should see errata 400 and 415 as fixed (assuming that
+ * HLT and IO instructions are intercepted).
+ */
+ vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3;
+ vcpu->arch.osvw.status = osvw_status & ~(6ULL);
+
+ /*
+ * By increasing VCPU's osvw.length to 3 we are telling the guest that
+ * all osvw.status bits inside that length, including bit 0 (which is
+ * reserved for erratum 298), are valid. However, if host processor's
+ * osvw_len is 0 then osvw_status[0] carries no information. We need to
+ * be conservative here and therefore we tell the guest that erratum 298
+ * is present (because we really don't know).
+ */
+ if (osvw_len == 0 && boot_cpu_data.x86 == 0x10)
+ vcpu->arch.osvw.status |= 1;
+}
+
static int has_svm(void)
{
const char *msg;
@@ -623,6 +652,36 @@ static int svm_hardware_enable(void *garbage)
__get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
}
+
+ /*
+ * Get OSVW bits.
+ *
+ * Note that it is possible to have a system with mixed processor
+ * revisions and therefore different OSVW bits. If bits are not the same
+ * on different processors then choose the worst case (i.e. if erratum
+ * is present on one processor and not on another then assume that the
+ * erratum is present everywhere).
+ */
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) {
+ uint64_t len, status = 0;
+ int err;
+
+ len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err);
+ if (!err)
+ status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS,
+ &err);
+
+ if (err)
+ osvw_status = osvw_len = 0;
+ else {
+ if (len < osvw_len)
+ osvw_len = len;
+ osvw_status |= status;
+ osvw_status &= (1ULL << osvw_len) - 1;
+ }
+ } else
+ osvw_status = osvw_len = 0;
+
svm_init_erratum_383();
amd_pmu_enable_virt();
@@ -910,20 +969,25 @@ static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
return _tsc;
}
-static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 ratio;
u64 khz;
- /* TSC scaling supported? */
- if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
+ /* Guest TSC same frequency as host TSC? */
+ if (!scale) {
+ svm->tsc_ratio = TSC_RATIO_DEFAULT;
return;
+ }
- /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
- if (user_tsc_khz == 0) {
- vcpu->arch.virtual_tsc_khz = 0;
- svm->tsc_ratio = TSC_RATIO_DEFAULT;
+ /* TSC scaling supported? */
+ if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
+ if (user_tsc_khz > tsc_khz) {
+ vcpu->arch.tsc_catchup = 1;
+ vcpu->arch.tsc_always_catchup = 1;
+ } else
+ WARN(1, "user requested TSC rate below hardware speed\n");
return;
}
@@ -938,7 +1002,6 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
user_tsc_khz);
return;
}
- vcpu->arch.virtual_tsc_khz = user_tsc_khz;
svm->tsc_ratio = ratio;
}
@@ -958,10 +1021,14 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
-static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ WARN_ON(adjustment < 0);
+ if (host)
+ adjustment = svm_scale_tsc(vcpu, adjustment);
+
svm->vmcb->control.tsc_offset += adjustment;
if (is_guest_mode(vcpu))
svm->nested.hsave->control.tsc_offset += adjustment;
@@ -1191,6 +1258,8 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
+ svm_init_osvw(&svm->vcpu);
+
return &svm->vcpu;
free_page4:
@@ -1268,6 +1337,21 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
}
+static void svm_update_cpl(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ int cpl;
+
+ if (!is_protmode(vcpu))
+ cpl = 0;
+ else if (svm->vmcb->save.rflags & X86_EFLAGS_VM)
+ cpl = 3;
+ else
+ cpl = svm->vmcb->save.cs.selector & 0x3;
+
+ svm->vmcb->save.cpl = cpl;
+}
+
static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->vmcb->save.rflags;
@@ -1275,7 +1359,11 @@ static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
+ unsigned long old_rflags = to_svm(vcpu)->vmcb->save.rflags;
+
to_svm(vcpu)->vmcb->save.rflags = rflags;
+ if ((old_rflags ^ rflags) & X86_EFLAGS_VM)
+ svm_update_cpl(vcpu);
}
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
@@ -1543,9 +1631,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
}
if (seg == VCPU_SREG_CS)
- svm->vmcb->save.cpl
- = (svm->vmcb->save.cs.attrib
- >> SVM_SELECTOR_DPL_SHIFT) & 3;
+ svm_update_cpl(vcpu);
mark_dirty(svm->vmcb, VMCB_SEG);
}
@@ -2735,7 +2821,10 @@ static int task_switch_interception(struct vcpu_svm *svm)
(int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
skip_emulated_instruction(&svm->vcpu);
- if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
+ if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
+ int_vec = -1;
+
+ if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
has_error_code, error_code) == EMULATE_FAIL) {
svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3b4c8d8..ad85adf 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -70,9 +70,6 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
static bool __read_mostly vmm_exclusive = 1;
module_param(vmm_exclusive, bool, S_IRUGO);
-static bool __read_mostly yield_on_hlt = 1;
-module_param(yield_on_hlt, bool, S_IRUGO);
-
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
@@ -1457,7 +1454,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#ifdef CONFIG_X86_64
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
- if (__thread_has_fpu(current))
+ if (user_has_fpu())
clts();
load_gdt(&__get_cpu_var(host_gdt));
}
@@ -1655,17 +1652,6 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
vmx_set_interrupt_shadow(vcpu, 0);
}
-static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
-{
- /* Ensure that we clear the HLT state in the VMCS. We don't need to
- * explicitly skip the instruction because if the HLT state is set, then
- * the instruction is already executing and RIP has already been
- * advanced. */
- if (!yield_on_hlt &&
- vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
- vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
-}
-
/*
* KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2.
@@ -1678,7 +1664,7 @@ static int nested_pf_handled(struct kvm_vcpu *vcpu)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
- if (!(vmcs12->exception_bitmap & PF_VECTOR))
+ if (!(vmcs12->exception_bitmap & (1u << PF_VECTOR)))
return 0;
nested_vmx_vmexit(vcpu);
@@ -1718,7 +1704,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
intr_info |= INTR_TYPE_HARD_EXCEPTION;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
- vmx_clear_hlt(vcpu);
}
static bool vmx_rdtscp_supported(void)
@@ -1817,13 +1802,19 @@ u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
}
/*
- * Empty call-back. Needs to be implemented when VMX enables the SET_TSC_KHZ
- * ioctl. In this case the call-back should update internal vmx state to make
- * the changes effective.
+ * Engage any workarounds for mis-matched TSC rates. Currently limited to
+ * software catchup for faster rates on slower CPUs.
*/
-static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
+static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
{
- /* Nothing to do here */
+ if (!scale)
+ return;
+
+ if (user_tsc_khz > tsc_khz) {
+ vcpu->arch.tsc_catchup = 1;
+ vcpu->arch.tsc_always_catchup = 1;
+ } else
+ WARN(1, "user requested TSC rate below hardware speed\n");
}
/*
@@ -1850,7 +1841,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
}
}
-static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
+static void vmx_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host)
{
u64 offset = vmcs_read64(TSC_OFFSET);
vmcs_write64(TSC_OFFSET, offset + adjustment);
@@ -2219,6 +2210,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
msr = find_msr_entry(vmx, msr_index);
if (msr) {
msr->data = data;
+ if (msr - vmx->guest_msrs < vmx->save_nmsrs)
+ kvm_set_shared_msr(msr->index, msr->data,
+ msr->mask);
break;
}
ret = kvm_set_msr_common(vcpu, msr_index, data);
@@ -2399,7 +2393,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_pin_based_exec_control) < 0)
return -EIO;
- min =
+ min = CPU_BASED_HLT_EXITING |
#ifdef CONFIG_X86_64
CPU_BASED_CR8_LOAD_EXITING |
CPU_BASED_CR8_STORE_EXITING |
@@ -2414,9 +2408,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_INVLPG_EXITING |
CPU_BASED_RDPMC_EXITING;
- if (yield_on_hlt)
- min |= CPU_BASED_HLT_EXITING;
-
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3915,7 +3906,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
+ vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
+ srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
vmx_set_cr4(&vmx->vcpu, 0);
vmx_set_efer(&vmx->vcpu, 0);
vmx_fpu_activate(&vmx->vcpu);
@@ -4003,7 +3996,6 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
} else
intr |= INTR_TYPE_EXT_INTR;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
- vmx_clear_hlt(vcpu);
}
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -4035,7 +4027,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
}
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
- vmx_clear_hlt(vcpu);
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -4672,9 +4663,10 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
bool has_error_code = false;
u32 error_code = 0;
u16 tss_selector;
- int reason, type, idt_v;
+ int reason, type, idt_v, idt_index;
idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK);
+ idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK);
type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK);
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -4712,8 +4704,9 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
type != INTR_TYPE_NMI_INTR))
skip_emulated_instruction(vcpu);
- if (kvm_task_switch(vcpu, tss_selector, reason,
- has_error_code, error_code) == EMULATE_FAIL) {
+ if (kvm_task_switch(vcpu, tss_selector,
+ type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
+ has_error_code, error_code) == EMULATE_FAIL) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9cbfc06..4044ce0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -57,6 +57,7 @@
#include <asm/mtrr.h>
#include <asm/mce.h>
#include <asm/i387.h>
+#include <asm/fpu-internal.h> /* Ugh! */
#include <asm/xcr.h>
#include <asm/pvclock.h>
#include <asm/div64.h>
@@ -96,6 +97,10 @@ EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
u32 kvm_max_guest_tsc_khz;
EXPORT_SYMBOL_GPL(kvm_max_guest_tsc_khz);
+/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
+static u32 tsc_tolerance_ppm = 250;
+module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -968,50 +973,51 @@ static inline u64 get_kernel_ns(void)
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
unsigned long max_tsc_khz;
-static inline int kvm_tsc_changes_freq(void)
+static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
{
- int cpu = get_cpu();
- int ret = !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
- cpufreq_quick_get(cpu) != 0;
- put_cpu();
- return ret;
+ return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
}
-u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu)
+static u32 adjust_tsc_khz(u32 khz, s32 ppm)
{
- if (vcpu->arch.virtual_tsc_khz)
- return vcpu->arch.virtual_tsc_khz;
- else
- return __this_cpu_read(cpu_tsc_khz);
+ u64 v = (u64)khz * (1000000 + ppm);
+ do_div(v, 1000000);
+ return v;
}
-static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
+static void kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
{
- u64 ret;
-
- WARN_ON(preemptible());
- if (kvm_tsc_changes_freq())
- printk_once(KERN_WARNING
- "kvm: unreliable cycle conversion on adjustable rate TSC\n");
- ret = nsec * vcpu_tsc_khz(vcpu);
- do_div(ret, USEC_PER_SEC);
- return ret;
-}
+ u32 thresh_lo, thresh_hi;
+ int use_scaling = 0;
-static void kvm_init_tsc_catchup(struct kvm_vcpu *vcpu, u32 this_tsc_khz)
-{
/* Compute a scale to convert nanoseconds in TSC cycles */
kvm_get_time_scale(this_tsc_khz, NSEC_PER_SEC / 1000,
- &vcpu->arch.tsc_catchup_shift,
- &vcpu->arch.tsc_catchup_mult);
+ &vcpu->arch.virtual_tsc_shift,
+ &vcpu->arch.virtual_tsc_mult);
+ vcpu->arch.virtual_tsc_khz = this_tsc_khz;
+
+ /*
+ * Compute the variation in TSC rate which is acceptable
+ * within the range of tolerance and decide if the
+ * rate being applied is within that bounds of the hardware
+ * rate. If so, no scaling or compensation need be done.
+ */
+ thresh_lo = adjust_tsc_khz(tsc_khz, -tsc_tolerance_ppm);
+ thresh_hi = adjust_tsc_khz(tsc_khz, tsc_tolerance_ppm);
+ if (this_tsc_khz < thresh_lo || this_tsc_khz > thresh_hi) {
+ pr_debug("kvm: requested TSC rate %u falls outside tolerance [%u,%u]\n", this_tsc_khz, thresh_lo, thresh_hi);
+ use_scaling = 1;
+ }
+ kvm_x86_ops->set_tsc_khz(vcpu, this_tsc_khz, use_scaling);
}
static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
{
- u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.last_tsc_nsec,
- vcpu->arch.tsc_catchup_mult,
- vcpu->arch.tsc_catchup_shift);
- tsc += vcpu->arch.last_tsc_write;
+ u64 tsc = pvclock_scale_delta(kernel_ns-vcpu->arch.this_tsc_nsec,
+ vcpu->arch.virtual_tsc_mult,
+ vcpu->arch.virtual_tsc_shift);
+ tsc += vcpu->arch.this_tsc_write;
return tsc;
}
@@ -1020,48 +1026,88 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
- s64 sdiff;
+ s64 usdiff;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
ns = get_kernel_ns();
elapsed = ns - kvm->arch.last_tsc_nsec;
- sdiff = data - kvm->arch.last_tsc_write;
- if (sdiff < 0)
- sdiff = -sdiff;
+
+ /* n.b - signed multiplication and division required */
+ usdiff = data - kvm->arch.last_tsc_write;
+#ifdef CONFIG_X86_64
+ usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
+#else
+ /* do_div() only does unsigned */
+ asm("idivl %2; xor %%edx, %%edx"
+ : "=A"(usdiff)
+ : "A"(usdiff * 1000), "rm"(vcpu->arch.virtual_tsc_khz));
+#endif
+ do_div(elapsed, 1000);
+ usdiff -= elapsed;
+ if (usdiff < 0)
+ usdiff = -usdiff;
/*
- * Special case: close write to TSC within 5 seconds of
- * another CPU is interpreted as an attempt to synchronize
- * The 5 seconds is to accommodate host load / swapping as
- * well as any reset of TSC during the boot process.
- *
- * In that case, for a reliable TSC, we can match TSC offsets,
- * or make a best guest using elapsed value.
- */
- if (sdiff < nsec_to_cycles(vcpu, 5ULL * NSEC_PER_SEC) &&
- elapsed < 5ULL * NSEC_PER_SEC) {
+ * Special case: TSC write with a small delta (1 second) of virtual
+ * cycle time against real time is interpreted as an attempt to
+ * synchronize the CPU.
+ *
+ * For a reliable TSC, we can match TSC offsets, and for an unstable
+ * TSC, we add elapsed time in this computation. We could let the
+ * compensation code attempt to catch up if we fall behind, but
+ * it's better to try to match offsets from the beginning.
+ */
+ if (usdiff < USEC_PER_SEC &&
+ vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!check_tsc_unstable()) {
- offset = kvm->arch.last_tsc_offset;
+ offset = kvm->arch.cur_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
u64 delta = nsec_to_cycles(vcpu, elapsed);
- offset += delta;
+ data += delta;
+ offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
- ns = kvm->arch.last_tsc_nsec;
+ } else {
+ /*
+ * We split periods of matched TSC writes into generations.
+ * For each generation, we track the original measured
+ * nanosecond time, offset, and write, so if TSCs are in
+ * sync, we can match exact offset, and if not, we can match
+ * exact software computaion in compute_guest_tsc()
+ *
+ * These values are tracked in kvm->arch.cur_xxx variables.
+ */
+ kvm->arch.cur_tsc_generation++;
+ kvm->arch.cur_tsc_nsec = ns;
+ kvm->arch.cur_tsc_write = data;
+ kvm->arch.cur_tsc_offset = offset;
+ pr_debug("kvm: new tsc generation %u, clock %llu\n",
+ kvm->arch.cur_tsc_generation, data);
}
+
+ /*
+ * We also track th most recent recorded KHZ, write and time to
+ * allow the matching interval to be extended at each write.
+ */
kvm->arch.last_tsc_nsec = ns;
kvm->arch.last_tsc_write = data;
- kvm->arch.last_tsc_offset = offset;
- kvm_x86_ops->write_tsc_offset(vcpu, offset);
- raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
+ kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
/* Reset of TSC must disable overshoot protection below */
vcpu->arch.hv_clock.tsc_timestamp = 0;
- vcpu->arch.last_tsc_write = data;
- vcpu->arch.last_tsc_nsec = ns;
+ vcpu->arch.last_guest_tsc = data;
+
+ /* Keep track of which generation this VCPU has synchronized to */
+ vcpu->arch.this_tsc_generation = kvm->arch.cur_tsc_generation;
+ vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
+ vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
+
+ kvm_x86_ops->write_tsc_offset(vcpu, offset);
+ raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
}
+
EXPORT_SYMBOL_GPL(kvm_write_tsc);
static int kvm_guest_time_update(struct kvm_vcpu *v)
@@ -1077,7 +1123,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
local_irq_save(flags);
tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
kernel_ns = get_kernel_ns();
- this_tsc_khz = vcpu_tsc_khz(v);
+ this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
if (unlikely(this_tsc_khz == 0)) {
local_irq_restore(flags);
kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -1097,7 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
if (vcpu->tsc_catchup) {
u64 tsc = compute_guest_tsc(v, kernel_ns);
if (tsc > tsc_timestamp) {
- kvm_x86_ops->adjust_tsc_offset(v, tsc - tsc_timestamp);
+ adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
tsc_timestamp = tsc;
}
}
@@ -1129,7 +1175,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* observed by the guest and ensure the new system time is greater.
*/
max_kernel_ns = 0;
- if (vcpu->hv_clock.tsc_timestamp && vcpu->last_guest_tsc) {
+ if (vcpu->hv_clock.tsc_timestamp) {
max_kernel_ns = vcpu->last_guest_tsc -
vcpu->hv_clock.tsc_timestamp;
max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
@@ -1162,12 +1208,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
*/
vcpu->hv_clock.version += 2;
- shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
+ shared_kaddr = kmap_atomic(vcpu->time_page);
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
sizeof(vcpu->hv_clock));
- kunmap_atomic(shared_kaddr, KM_USER0);
+ kunmap_atomic(shared_kaddr);
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
return 0;
@@ -1503,6 +1549,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case MSR_K7_HWCR:
data &= ~(u64)0x40; /* ignore flush filter disable */
data &= ~(u64)0x100; /* ignore ignne emulation enable */
+ data &= ~(u64)0x8; /* ignore TLB cache disable */
if (data != 0) {
pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
@@ -1675,6 +1722,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
*/
pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
break;
+ case MSR_AMD64_OSVW_ID_LENGTH:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ vcpu->arch.osvw.length = data;
+ break;
+ case MSR_AMD64_OSVW_STATUS:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ vcpu->arch.osvw.status = data;
+ break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
@@ -1959,6 +2016,16 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
*/
data = 0xbe702111;
break;
+ case MSR_AMD64_OSVW_ID_LENGTH:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ data = vcpu->arch.osvw.length;
+ break;
+ case MSR_AMD64_OSVW_STATUS:
+ if (!guest_cpuid_has_osvw(vcpu))
+ return 1;
+ data = vcpu->arch.osvw.status;
+ break;
default:
if (kvm_pmu_msr(vcpu, msr))
return kvm_pmu_get_msr(vcpu, msr, pdata);
@@ -2079,6 +2146,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_XSAVE:
case KVM_CAP_ASYNC_PF:
case KVM_CAP_GET_TSC_KHZ:
+ case KVM_CAP_PCI_2_3:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -2213,19 +2281,23 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
kvm_x86_ops->vcpu_load(vcpu, cpu);
- if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
- /* Make sure TSC doesn't go backwards */
- s64 tsc_delta;
- u64 tsc;
- tsc = kvm_x86_ops->read_l1_tsc(vcpu);
- tsc_delta = !vcpu->arch.last_guest_tsc ? 0 :
- tsc - vcpu->arch.last_guest_tsc;
+ /* Apply any externally detected TSC adjustments (due to suspend) */
+ if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
+ adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
+ vcpu->arch.tsc_offset_adjustment = 0;
+ set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ }
+ if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
+ s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
+ native_read_tsc() - vcpu->arch.last_host_tsc;
if (tsc_delta < 0)
mark_tsc_unstable("KVM discovered backwards TSC");
if (check_tsc_unstable()) {
- kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta);
+ u64 offset = kvm_x86_ops->compute_tsc_offset(vcpu,
+ vcpu->arch.last_guest_tsc);
+ kvm_x86_ops->write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1;
}
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2242,7 +2314,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
- vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
+ vcpu->arch.last_host_tsc = native_read_tsc();
}
static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@ -2784,26 +2856,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
u32 user_tsc_khz;
r = -EINVAL;
- if (!kvm_has_tsc_control)
- break;
-
user_tsc_khz = (u32)arg;
if (user_tsc_khz >= kvm_max_guest_tsc_khz)
goto out;
- kvm_x86_ops->set_tsc_khz(vcpu, user_tsc_khz);
+ if (user_tsc_khz == 0)
+ user_tsc_khz = tsc_khz;
+
+ kvm_set_tsc_khz(vcpu, user_tsc_khz);
r = 0;
goto out;
}
case KVM_GET_TSC_KHZ: {
- r = -EIO;
- if (check_tsc_unstable())
- goto out;
-
- r = vcpu_tsc_khz(vcpu);
-
+ r = vcpu->arch.virtual_tsc_khz;
goto out;
}
default:
@@ -2814,6 +2881,11 @@ out:
return r;
}
+int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+{
+ return VM_FAULT_SIGBUS;
+}
+
static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
{
int ret;
@@ -2997,6 +3069,8 @@ static void write_protect_slot(struct kvm *kvm,
unsigned long *dirty_bitmap,
unsigned long nr_dirty_pages)
{
+ spin_lock(&kvm->mmu_lock);
+
/* Not many dirty pages compared to # of shadow pages. */
if (nr_dirty_pages < kvm->arch.n_used_mmu_pages) {
unsigned long gfn_offset;
@@ -3004,16 +3078,13 @@ static void write_protect_slot(struct kvm *kvm,
for_each_set_bit(gfn_offset, dirty_bitmap, memslot->npages) {
unsigned long gfn = memslot->base_gfn + gfn_offset;
- spin_lock(&kvm->mmu_lock);
kvm_mmu_rmap_write_protect(kvm, gfn, memslot);
- spin_unlock(&kvm->mmu_lock);
}
kvm_flush_remote_tlbs(kvm);
- } else {
- spin_lock(&kvm->mmu_lock);
+ } else
kvm_mmu_slot_remove_write_access(kvm, memslot->id);
- spin_unlock(&kvm->mmu_lock);
- }
+
+ spin_unlock(&kvm->mmu_lock);
}
/*
@@ -3132,6 +3203,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EEXIST;
if (kvm->arch.vpic)
goto create_irqchip_unlock;
+ r = -EINVAL;
+ if (atomic_read(&kvm->online_vcpus))
+ goto create_irqchip_unlock;
r = -ENOMEM;
vpic = kvm_create_pic(kvm);
if (vpic) {
@@ -3848,7 +3922,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
goto emul_write;
}
- kaddr = kmap_atomic(page, KM_USER0);
+ kaddr = kmap_atomic(page);
kaddr += offset_in_page(gpa);
switch (bytes) {
case 1:
@@ -3866,7 +3940,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
default:
BUG();
}
- kunmap_atomic(kaddr, KM_USER0);
+ kunmap_atomic(kaddr);
kvm_release_page_dirty(page);
if (!exchanged)
@@ -4062,6 +4136,11 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
return res;
}
+static void emulator_set_rflags(struct x86_emulate_ctxt *ctxt, ulong val)
+{
+ kvm_set_rflags(emul_to_vcpu(ctxt), val);
+}
+
static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
{
return kvm_x86_ops->get_cpl(emul_to_vcpu(ctxt));
@@ -4243,6 +4322,7 @@ static struct x86_emulate_ops emulate_ops = {
.set_idt = emulator_set_idt,
.get_cr = emulator_get_cr,
.set_cr = emulator_set_cr,
+ .set_rflags = emulator_set_rflags,
.cpl = emulator_get_cpl,
.get_dr = emulator_get_dr,
.set_dr = emulator_set_dr,
@@ -5287,6 +5367,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
profile_hit(KVM_PROFILING, (void *)rip);
}
+ if (unlikely(vcpu->arch.tsc_always_catchup))
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
kvm_lapic_sync_from_vapic(vcpu);
@@ -5586,15 +5668,15 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return 0;
}
-int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason,
- bool has_error_code, u32 error_code)
+int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
+ int reason, bool has_error_code, u32 error_code)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
int ret;
init_emulate_ctxt(vcpu);
- ret = emulator_task_switch(ctxt, tss_selector, reason,
+ ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
has_error_code, error_code);
if (ret)
@@ -5927,13 +6009,88 @@ int kvm_arch_hardware_enable(void *garbage)
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i;
+ int ret;
+ u64 local_tsc;
+ u64 max_tsc = 0;
+ bool stable, backwards_tsc = false;
kvm_shared_msr_cpu_online();
- list_for_each_entry(kvm, &vm_list, vm_list)
- kvm_for_each_vcpu(i, vcpu, kvm)
- if (vcpu->cpu == smp_processor_id())
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- return kvm_x86_ops->hardware_enable(garbage);
+ ret = kvm_x86_ops->hardware_enable(garbage);
+ if (ret != 0)
+ return ret;
+
+ local_tsc = native_read_tsc();
+ stable = !check_tsc_unstable();
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!stable && vcpu->cpu == smp_processor_id())
+ set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+ if (stable && vcpu->arch.last_host_tsc > local_tsc) {
+ backwards_tsc = true;
+ if (vcpu->arch.last_host_tsc > max_tsc)
+ max_tsc = vcpu->arch.last_host_tsc;
+ }
+ }
+ }
+
+ /*
+ * Sometimes, even reliable TSCs go backwards. This happens on
+ * platforms that reset TSC during suspend or hibernate actions, but
+ * maintain synchronization. We must compensate. Fortunately, we can
+ * detect that condition here, which happens early in CPU bringup,
+ * before any KVM threads can be running. Unfortunately, we can't
+ * bring the TSCs fully up to date with real time, as we aren't yet far
+ * enough into CPU bringup that we know how much real time has actually
+ * elapsed; our helper function, get_kernel_ns() will be using boot
+ * variables that haven't been updated yet.
+ *
+ * So we simply find the maximum observed TSC above, then record the
+ * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
+ * the adjustment will be applied. Note that we accumulate
+ * adjustments, in case multiple suspend cycles happen before some VCPU
+ * gets a chance to run again. In the event that no KVM threads get a
+ * chance to run, we will miss the entire elapsed period, as we'll have
+ * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
+ * loose cycle time. This isn't too big a deal, since the loss will be
+ * uniform across all VCPUs (not to mention the scenario is extremely
+ * unlikely). It is possible that a second hibernate recovery happens
+ * much faster than a first, causing the observed TSC here to be
+ * smaller; this would require additional padding adjustment, which is
+ * why we set last_host_tsc to the local tsc observed here.
+ *
+ * N.B. - this code below runs only on platforms with reliable TSC,
+ * as that is the only way backwards_tsc is set above. Also note
+ * that this runs for ALL vcpus, which is not a bug; all VCPUs should
+ * have the same delta_cyc adjustment applied if backwards_tsc
+ * is detected. Note further, this adjustment is only done once,
+ * as we reset last_host_tsc on all VCPUs to stop this from being
+ * called multiple times (one for each physical CPU bringup).
+ *
+ * Platforms with unnreliable TSCs don't have to deal with this, they
+ * will be compensated by the logic in vcpu_load, which sets the TSC to
+ * catchup mode. This will catchup all VCPUs to real time, but cannot
+ * guarantee that they stay in perfect synchronization.
+ */
+ if (backwards_tsc) {
+ u64 delta_cyc = max_tsc - local_tsc;
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.tsc_offset_adjustment += delta_cyc;
+ vcpu->arch.last_host_tsc = local_tsc;
+ }
+
+ /*
+ * We have to disable TSC offset matching.. if you were
+ * booting a VM while issuing an S4 host suspend....
+ * you may have some problem. Solving this issue is
+ * left as an exercise to the reader.
+ */
+ kvm->arch.last_tsc_nsec = 0;
+ kvm->arch.last_tsc_write = 0;
+ }
+
+ }
+ return 0;
}
void kvm_arch_hardware_disable(void *garbage)
@@ -5957,6 +6114,11 @@ void kvm_arch_check_processor_compat(void *rtn)
kvm_x86_ops->check_processor_compatibility(rtn);
}
+bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
+{
+ return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+}
+
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
struct page *page;
@@ -5979,7 +6141,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
}
vcpu->arch.pio_data = page_address(page);
- kvm_init_tsc_catchup(vcpu, max_tsc_khz);
+ kvm_set_tsc_khz(vcpu, max_tsc_khz);
r = kvm_mmu_create(vcpu);
if (r < 0)
@@ -6031,8 +6193,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.pio_data);
}
-int kvm_arch_init_vm(struct kvm *kvm)
+int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ if (type)
+ return -EINVAL;
+
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -6092,6 +6257,65 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
put_page(kvm->arch.ept_identity_pagetable);
}
+void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+ struct kvm_memory_slot *dont)
+{
+ int i;
+
+ for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
+ vfree(free->arch.lpage_info[i]);
+ free->arch.lpage_info[i] = NULL;
+ }
+ }
+}
+
+int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+{
+ int i;
+
+ for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ unsigned long ugfn;
+ int lpages;
+ int level = i + 2;
+
+ lpages = gfn_to_index(slot->base_gfn + npages - 1,
+ slot->base_gfn, level) + 1;
+
+ slot->arch.lpage_info[i] =
+ vzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
+ if (!slot->arch.lpage_info[i])
+ goto out_free;
+
+ if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
+ slot->arch.lpage_info[i][0].write_count = 1;
+ if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
+ slot->arch.lpage_info[i][lpages - 1].write_count = 1;
+ ugfn = slot->userspace_addr >> PAGE_SHIFT;
+ /*
+ * If the gfn and userspace address are not aligned wrt each
+ * other, or if explicitly asked to, disable large page
+ * support for this slot
+ */
+ if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
+ !kvm_largepages_enabled()) {
+ unsigned long j;
+
+ for (j = 0; j < lpages; ++j)
+ slot->arch.lpage_info[i][j].write_count = 1;
+ }
+ }
+
+ return 0;
+
+out_free:
+ for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+ vfree(slot->arch.lpage_info[i]);
+ slot->arch.lpage_info[i] = NULL;
+ }
+ return -ENOMEM;
+}
+
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 042f682..a0b4a35 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -1,59 +1,4 @@
-#include <linux/compiler.h>
-#include <linux/module.h>
-#include <linux/types.h>
+#define ATOMIC64_EXPORT EXPORT_SYMBOL
-#include <asm/processor.h>
-#include <asm/cmpxchg.h>
+#include <linux/export.h>
#include <linux/atomic.h>
-
-long long atomic64_read_cx8(long long, const atomic64_t *v);
-EXPORT_SYMBOL(atomic64_read_cx8);
-long long atomic64_set_cx8(long long, const atomic64_t *v);
-EXPORT_SYMBOL(atomic64_set_cx8);
-long long atomic64_xchg_cx8(long long, unsigned high);
-EXPORT_SYMBOL(atomic64_xchg_cx8);
-long long atomic64_add_return_cx8(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_add_return_cx8);
-long long atomic64_sub_return_cx8(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_sub_return_cx8);
-long long atomic64_inc_return_cx8(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_return_cx8);
-long long atomic64_dec_return_cx8(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_return_cx8);
-long long atomic64_dec_if_positive_cx8(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_if_positive_cx8);
-int atomic64_inc_not_zero_cx8(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_not_zero_cx8);
-int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u);
-EXPORT_SYMBOL(atomic64_add_unless_cx8);
-
-#ifndef CONFIG_X86_CMPXCHG64
-long long atomic64_read_386(long long, const atomic64_t *v);
-EXPORT_SYMBOL(atomic64_read_386);
-long long atomic64_set_386(long long, const atomic64_t *v);
-EXPORT_SYMBOL(atomic64_set_386);
-long long atomic64_xchg_386(long long, unsigned high);
-EXPORT_SYMBOL(atomic64_xchg_386);
-long long atomic64_add_return_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_add_return_386);
-long long atomic64_sub_return_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_sub_return_386);
-long long atomic64_inc_return_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_return_386);
-long long atomic64_dec_return_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_return_386);
-long long atomic64_add_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_add_386);
-long long atomic64_sub_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_sub_386);
-long long atomic64_inc_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_386);
-long long atomic64_dec_386(long long a, atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_386);
-long long atomic64_dec_if_positive_386(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_dec_if_positive_386);
-int atomic64_inc_not_zero_386(atomic64_t *v);
-EXPORT_SYMBOL(atomic64_inc_not_zero_386);
-int atomic64_add_unless_386(atomic64_t *v, long long a, long long u);
-EXPORT_SYMBOL(atomic64_add_unless_386);
-#endif
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index e8e7e0d..00933d5 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -137,13 +137,13 @@ BEGIN(dec_return)
RET_ENDP
#undef v
-#define v %ecx
+#define v %esi
BEGIN(add_unless)
- addl %eax, %esi
+ addl %eax, %ecx
adcl %edx, %edi
addl (v), %eax
adcl 4(v), %edx
- cmpl %eax, %esi
+ cmpl %eax, %ecx
je 3f
1:
movl %eax, (v)
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 391a083..f5cc9eb 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8)
ENTRY(atomic64_xchg_cx8)
CFI_STARTPROC
- movl %ebx, %eax
- movl %ecx, %edx
1:
LOCK_PREFIX
cmpxchg8b (%esi)
@@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8)
movl %edx, %edi
movl %ecx, %ebp
- read64 %ebp
+ read64 %ecx
1:
movl %eax, %ebx
movl %edx, %ecx
@@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8)
SAVE ebx
/* these just push these two parameters on the stack */
SAVE edi
- SAVE esi
+ SAVE ecx
- movl %ecx, %ebp
- movl %eax, %esi
+ movl %eax, %ebp
movl %edx, %edi
- read64 %ebp
+ read64 %esi
1:
cmpl %eax, 0(%esp)
je 4f
2:
movl %eax, %ebx
movl %edx, %ecx
- addl %esi, %ebx
+ addl %ebp, %ebx
adcl %edi, %ecx
LOCK_PREFIX
- cmpxchg8b (%ebp)
+ cmpxchg8b (%esi)
jne 1b
movl $1, %eax
@@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8)
read64 %esi
1:
- testl %eax, %eax
- je 4f
-2:
+ movl %eax, %ecx
+ orl %edx, %ecx
+ jz 3f
movl %eax, %ebx
- movl %edx, %ecx
+ xorl %ecx, %ecx
addl $1, %ebx
- adcl $0, %ecx
+ adcl %edx, %ecx
LOCK_PREFIX
cmpxchg8b (%esi)
jne 1b
@@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8)
3:
RESTORE ebx
ret
-4:
- testl %edx, %edx
- jne 2b
- jmp 3b
CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 01c805b..6b34d04 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
ENTRY(copy_page)
CFI_STARTPROC
- subq $3*8,%rsp
- CFI_ADJUST_CFA_OFFSET 3*8
+ subq $2*8,%rsp
+ CFI_ADJUST_CFA_OFFSET 2*8
movq %rbx,(%rsp)
CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
- movq %r13,2*8(%rsp)
- CFI_REL_OFFSET r13, 2*8
movl $(4096/64)-5,%ecx
.p2align 4
@@ -91,10 +89,8 @@ ENTRY(copy_page)
CFI_RESTORE rbx
movq 1*8(%rsp),%r12
CFI_RESTORE r12
- movq 2*8(%rsp),%r13
- CFI_RESTORE r13
- addq $3*8,%rsp
- CFI_ADJUST_CFA_OFFSET -3*8
+ addq $2*8,%rsp
+ CFI_ADJUST_CFA_OFFSET -2*8
ret
.Lcopy_page_end:
CFI_ENDPROC
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index fc45ba8..e395693 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -48,9 +48,9 @@ static void delay_loop(unsigned long loops)
}
/* TSC based delay: */
-static void delay_tsc(unsigned long loops)
+static void delay_tsc(unsigned long __loops)
{
- unsigned long bclock, now;
+ u32 bclock, now, loops = __loops;
int cpu;
preempt_disable();
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index efbf2a0..1c273be 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -27,9 +27,8 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax
-
- movl %edx, %ecx
- shrl $3, %ecx
+ movq %rdx, %rcx
+ shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
@@ -48,8 +47,7 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
movq %rdi, %rax
-
- movl %edx, %ecx
+ movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
@@ -60,10 +58,7 @@ ENTRY(memcpy)
CFI_STARTPROC
movq %rdi, %rax
- /*
- * Use 32bit CMP here to avoid long NOP padding.
- */
- cmp $0x20, %edx
+ cmpq $0x20, %rdx
jb .Lhandle_tail
/*
@@ -72,7 +67,7 @@ ENTRY(memcpy)
*/
cmp %dil, %sil
jl .Lcopy_backward
- subl $0x20, %edx
+ subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx
@@ -91,7 +86,7 @@ ENTRY(memcpy)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
- addq $0x20, %rdx
+ addl $0x20, %edx
jmp .Lhandle_tail
.Lcopy_backward:
@@ -123,11 +118,11 @@ ENTRY(memcpy)
/*
* Calculate copy position to head.
*/
- addq $0x20, %rdx
+ addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
- cmpq $16, %rdx
+ cmpl $16, %edx
jb .Lless_16bytes
/*
@@ -144,7 +139,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_16bytes:
- cmpq $8, %rdx
+ cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
@@ -156,7 +151,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_8bytes:
- cmpq $4, %rdx
+ cmpl $4, %edx
jb .Lless_3bytes
/*
@@ -169,18 +164,19 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_3bytes:
- cmpl $0, %edx
- je .Lend
+ subl $1, %edx
+ jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
-.Lloop_1:
- movb (%rsi), %r8b
- movb %r8b, (%rdi)
- incq %rdi
- incq %rsi
- decl %edx
- jnz .Lloop_1
+ movzbl (%rsi), %ecx
+ jz .Lstore_1byte
+ movzbq 1(%rsi), %r8
+ movzbq (%rsi, %rdx), %r9
+ movb %r8b, 1(%rdi)
+ movb %r9b, (%rdi, %rdx)
+.Lstore_1byte:
+ movb %cl, (%rdi)
.Lend:
retq
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 79bd454..2dcb380 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,16 +19,15 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemset_c:
movq %rdi,%r9
- movl %edx,%r8d
- andl $7,%r8d
- movl %edx,%ecx
- shrl $3,%ecx
+ movq %rdx,%rcx
+ andl $7,%edx
+ shrq $3,%rcx
/* expand byte value */
movzbl %sil,%esi
movabs $0x0101010101010101,%rax
- mulq %rsi /* with rax, clobbers rdx */
+ imulq %rsi,%rax
rep stosq
- movl %r8d,%ecx
+ movl %edx,%ecx
rep stosb
movq %r9,%rax
ret
@@ -50,7 +49,7 @@
.Lmemset_c_e:
movq %rdi,%r9
movb %sil,%al
- movl %edx,%ecx
+ movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
@@ -61,12 +60,11 @@ ENTRY(memset)
ENTRY(__memset)
CFI_STARTPROC
movq %rdi,%r10
- movq %rdx,%r11
/* expand byte value */
movzbl %sil,%ecx
movabs $0x0101010101010101,%rax
- mul %rcx /* with rax, clobbers rdx */
+ imulq %rcx,%rax
/* align dst */
movl %edi,%r9d
@@ -75,13 +73,13 @@ ENTRY(__memset)
CFI_REMEMBER_STATE
.Lafter_bad_alignment:
- movl %r11d,%ecx
- shrl $6,%ecx
+ movq %rdx,%rcx
+ shrq $6,%rcx
jz .Lhandle_tail
.p2align 4
.Lloop_64:
- decl %ecx
+ decq %rcx
movq %rax,(%rdi)
movq %rax,8(%rdi)
movq %rax,16(%rdi)
@@ -97,7 +95,7 @@ ENTRY(__memset)
to predict jump tables. */
.p2align 4
.Lhandle_tail:
- movl %r11d,%ecx
+ movl %edx,%ecx
andl $63&(~7),%ecx
jz .Lhandle_7
shrl $3,%ecx
@@ -109,12 +107,11 @@ ENTRY(__memset)
jnz .Lloop_8
.Lhandle_7:
- movl %r11d,%ecx
- andl $7,%ecx
+ andl $7,%edx
jz .Lende
.p2align 4
.Lloop_1:
- decl %ecx
+ decl %edx
movb %al,(%rdi)
leaq 1(%rdi),%rdi
jnz .Lloop_1
@@ -125,13 +122,13 @@ ENTRY(__memset)
CFI_RESTORE_STATE
.Lbad_alignment:
- cmpq $7,%r11
+ cmpq $7,%rdx
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
- subq %r8,%r11
+ subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index e218d5d..d9b094c 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -760,9 +760,9 @@ survive:
break;
}
- maddr = kmap_atomic(pg, KM_USER0);
+ maddr = kmap_atomic(pg);
memcpy(maddr + offset, from, len);
- kunmap_atomic(maddr, KM_USER0);
+ kunmap_atomic(maddr);
set_page_dirty_lock(pg);
put_page(pg);
up_read(&current->mm->mmap_sem);
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index f4f29b1..6f31ee5 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -51,11 +51,11 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
}
EXPORT_SYMBOL(kmap_atomic_prot);
-void *__kmap_atomic(struct page *page)
+void *kmap_atomic(struct page *page)
{
return kmap_atomic_prot(page, kmap_prot);
}
-EXPORT_SYMBOL(__kmap_atomic);
+EXPORT_SYMBOL(kmap_atomic);
/*
* This is the same as kmap_atomic() but can map memory that doesn't
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index f581a18..f6679a7 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -308,10 +308,11 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
{
struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma, *prev_vma;
- unsigned long base = mm->mmap_base, addr = addr0;
+ struct vm_area_struct *vma;
+ unsigned long base = mm->mmap_base;
+ unsigned long addr = addr0;
unsigned long largest_hole = mm->cached_hole_size;
- int first_time = 1;
+ unsigned long start_addr;
/* don't allow allocations above current base */
if (mm->free_area_cache > base)
@@ -322,6 +323,8 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
mm->free_area_cache = base;
}
try_again:
+ start_addr = mm->free_area_cache;
+
/* make sure it can fit in the remaining address space */
if (mm->free_area_cache < len)
goto fail;
@@ -333,24 +336,18 @@ try_again:
* Lookup failure means no vma is above this address,
* i.e. return with success:
*/
- if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
+ vma = find_vma(mm, addr);
+ if (!vma)
return addr;
- /*
- * new region fits between prev_vma->vm_end and
- * vma->vm_start, use it:
- */
- if (addr + len <= vma->vm_start &&
- (!prev_vma || (addr >= prev_vma->vm_end))) {
+ if (addr + len <= vma->vm_start) {
/* remember the address as a hint for next time */
mm->cached_hole_size = largest_hole;
return (mm->free_area_cache = addr);
- } else {
+ } else if (mm->free_area_cache == vma->vm_end) {
/* pull free_area_cache down to the first hole */
- if (mm->free_area_cache == vma->vm_end) {
- mm->free_area_cache = vma->vm_start;
- mm->cached_hole_size = largest_hole;
- }
+ mm->free_area_cache = vma->vm_start;
+ mm->cached_hole_size = largest_hole;
}
/* remember the largest hole we saw so far */
@@ -366,10 +363,9 @@ fail:
* if hint left us with no space for the requested
* mapping then try again:
*/
- if (first_time) {
+ if (start_addr != base) {
mm->free_area_cache = base;
largest_hole = 0;
- first_time = 0;
goto try_again;
}
/*
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6cabf65..4f0cec7 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -12,7 +12,6 @@
#include <asm/page_types.h>
#include <asm/sections.h>
#include <asm/setup.h>
-#include <asm/system.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/proto.h>
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 8663f6c..575d86f 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -35,7 +35,6 @@
#include <asm/asm.h>
#include <asm/bios_ebda.h>
#include <asm/processor.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/dma.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 436a030..fc18be0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -35,7 +35,6 @@
#include <asm/processor.h>
#include <asm/bios_ebda.h>
-#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c
index 036efbe..aef7140 100644
--- a/arch/x86/mm/kmemcheck/selftest.c
+++ b/arch/x86/mm/kmemcheck/selftest.c
@@ -1,3 +1,4 @@
+#include <linux/bug.h>
#include <linux/kernel.h>
#include "opcode.h"
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 46db568..53489ff 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -28,7 +28,7 @@ static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)
return -ENOENT;
}
-static u64 mem_hole_size(u64 start, u64 end)
+static u64 __init mem_hole_size(u64 start, u64 end)
{
unsigned long start_pfn = PFN_UP(start);
unsigned long end_pfn = PFN_DOWN(end);
@@ -60,7 +60,7 @@ static int __init emu_setup_memblk(struct numa_meminfo *ei,
eb->nid = nid;
if (emu_nid_to_phys[nid] == NUMA_NO_NODE)
- emu_nid_to_phys[nid] = pb->nid;
+ emu_nid_to_phys[nid] = nid;
pb->start += size;
if (pb->start >= pb->end) {
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index cac7184..a69bcb8 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -10,7 +10,6 @@
#include <linux/spinlock.h>
#include <linux/module.h>
-#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c
index 1c1c4f4..efb5b4b 100644
--- a/arch/x86/mm/srat.c
+++ b/arch/x86/mm/srat.c
@@ -70,7 +70,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
return;
pxm = pa->proximity_domain;
apic_id = pa->apic_id;
- if (!cpu_has_x2apic && (apic_id >= 0xff)) {
+ if (!apic->apic_id_valid(apic_id)) {
printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
pxm, apic_id);
return;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 6687022..877b9a1 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -18,17 +18,17 @@
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r8
-
-sk_load_word_ind:
- .globl sk_load_word_ind
-
- add %ebx,%esi /* offset += X */
-# test %esi,%esi /* if (offset < 0) goto bpf_error; */
- js bpf_error
+#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
sk_load_word:
.globl sk_load_word
+ test %esi,%esi
+ js bpf_slow_path_word_neg
+
+sk_load_word_positive_offset:
+ .globl sk_load_word_positive_offset
+
mov %r9d,%eax # hlen
sub %esi,%eax # hlen - offset
cmp $3,%eax
@@ -37,16 +37,15 @@ sk_load_word:
bswap %eax /* ntohl() */
ret
-
-sk_load_half_ind:
- .globl sk_load_half_ind
-
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_half:
.globl sk_load_half
+ test %esi,%esi
+ js bpf_slow_path_half_neg
+
+sk_load_half_positive_offset:
+ .globl sk_load_half_positive_offset
+
mov %r9d,%eax
sub %esi,%eax # hlen - offset
cmp $1,%eax
@@ -55,14 +54,15 @@ sk_load_half:
rol $8,%ax # ntohs()
ret
-sk_load_byte_ind:
- .globl sk_load_byte_ind
- add %ebx,%esi /* offset += X */
- js bpf_error
-
sk_load_byte:
.globl sk_load_byte
+ test %esi,%esi
+ js bpf_slow_path_byte_neg
+
+sk_load_byte_positive_offset:
+ .globl sk_load_byte_positive_offset
+
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */
jle bpf_slow_path_byte
movzbl (SKBDATA,%rsi),%eax
@@ -73,25 +73,21 @@ sk_load_byte:
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value, already known positive
+ * Inputs : %esi is the offset value
*/
-ENTRY(sk_load_byte_msh)
- CFI_STARTPROC
+sk_load_byte_msh:
+ .globl sk_load_byte_msh
+ test %esi,%esi
+ js bpf_slow_path_byte_msh_neg
+
+sk_load_byte_msh_positive_offset:
+ .globl sk_load_byte_msh_positive_offset
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
and $15,%bl
shl $2,%bl
ret
- CFI_ENDPROC
-ENDPROC(sk_load_byte_msh)
-
-bpf_error:
-# force a return 0 from jit handler
- xor %eax,%eax
- mov -8(%rbp),%rbx
- leaveq
- ret
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
@@ -138,3 +134,67 @@ bpf_slow_path_byte_msh:
shl $2,%al
xchg %eax,%ebx
ret
+
+#define sk_negative_common(SIZE) \
+ push %rdi; /* save skb */ \
+ push %r9; \
+ push SKBDATA; \
+/* rsi already has offset */ \
+ mov $SIZE,%ecx; /* size */ \
+ call bpf_internal_load_pointer_neg_helper; \
+ test %rax,%rax; \
+ pop SKBDATA; \
+ pop %r9; \
+ pop %rdi; \
+ jz bpf_error
+
+
+bpf_slow_path_word_neg:
+ cmp SKF_MAX_NEG_OFF, %esi /* test range */
+ jl bpf_error /* offset lower -> error */
+sk_load_word_negative_offset:
+ .globl sk_load_word_negative_offset
+ sk_negative_common(4)
+ mov (%rax), %eax
+ bswap %eax
+ ret
+
+bpf_slow_path_half_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_half_negative_offset:
+ .globl sk_load_half_negative_offset
+ sk_negative_common(2)
+ mov (%rax),%ax
+ rol $8,%ax
+ movzwl %ax,%eax
+ ret
+
+bpf_slow_path_byte_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_negative_offset:
+ .globl sk_load_byte_negative_offset
+ sk_negative_common(1)
+ movzbl (%rax), %eax
+ ret
+
+bpf_slow_path_byte_msh_neg:
+ cmp SKF_MAX_NEG_OFF, %esi
+ jl bpf_error
+sk_load_byte_msh_negative_offset:
+ .globl sk_load_byte_msh_negative_offset
+ xchg %eax,%ebx /* dont lose A , X is about to be scratched */
+ sk_negative_common(1)
+ movzbl (%rax),%eax
+ and $15,%al
+ shl $2,%al
+ xchg %eax,%ebx
+ ret
+
+bpf_error:
+# force a return 0 from jit handler
+ xor %eax,%eax
+ mov -8(%rbp),%rbx
+ leaveq
+ ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 7c1b765..0597f95 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly;
* assembly code in arch/x86/net/bpf_jit.S
*/
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
-extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[];
+extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
+extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
+extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end)
set_fs(old_fs);
}
+#define CHOOSE_LOAD_FUNC(K, func) \
+ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
void bpf_jit_compile(struct sk_filter *fp)
{
@@ -289,7 +294,7 @@ void bpf_jit_compile(struct sk_filter *fp)
EMIT2(0x24, K & 0xFF); /* and imm8,%al */
} else if (K >= 0xFFFF0000) {
EMIT2(0x66, 0x25); /* and imm16,%ax */
- EMIT2(K, 2);
+ EMIT(K, 2);
} else {
EMIT1_off32(0x25, K); /* and imm32,%eax */
}
@@ -473,48 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp)
#endif
break;
case BPF_S_LD_W_ABS:
- func = sk_load_word;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_word);
common_load: seen |= SEEN_DATAREF;
- if ((int)K < 0)
- goto out;
t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call */
break;
case BPF_S_LD_H_ABS:
- func = sk_load_half;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_half);
goto common_load;
case BPF_S_LD_B_ABS:
- func = sk_load_byte;
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
goto common_load;
case BPF_S_LDX_B_MSH:
- if ((int)K < 0) {
- if (pc_ret0 > 0) {
- /* addrs[pc_ret0 - 1] is the start address */
- EMIT_JMP(addrs[pc_ret0 - 1] - addrs[i]);
- break;
- }
- CLEAR_A();
- EMIT_JMP(cleanup_addr - addrs[i]);
- break;
- }
+ func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
seen |= SEEN_DATAREF | SEEN_XREG;
- t_offset = sk_load_byte_msh - (image + addrs[i]);
+ t_offset = func - (image + addrs[i]);
EMIT1_off32(0xbe, K); /* mov imm32,%esi */
EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
break;
case BPF_S_LD_W_IND:
- func = sk_load_word_ind;
+ func = sk_load_word;
common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG;
t_offset = func - (image + addrs[i]);
- EMIT1_off32(0xbe, K); /* mov imm32,%esi */
+ if (K) {
+ if (is_imm8(K)) {
+ EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
+ } else {
+ EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
+ EMIT(K, 4);
+ }
+ } else {
+ EMIT2(0x89,0xde); /* mov %ebx,%esi */
+ }
EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */
break;
case BPF_S_LD_H_IND:
- func = sk_load_half_ind;
+ func = sk_load_half;
goto common_load_ind;
case BPF_S_LD_B_IND:
- func = sk_load_byte_ind;
+ func = sk_load_byte;
goto common_load_ind;
case BPF_S_JMP_JA:
t_offset = addrs[i + K] - addrs[i];
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index a312e76..ed2835e 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -60,6 +60,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
},
},
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=42619 */
+ {
+ .callback = set_use_crs,
+ .ident = "MSI MS-7253",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
+ DMI_MATCH(DMI_BOARD_NAME, "MS-7253"),
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+ },
+ },
/* Now for the blacklist.. */
@@ -282,9 +292,6 @@ static void add_resources(struct pci_root_info *info)
int i;
struct resource *res, *root, *conflict;
- if (!pci_use_crs)
- return;
-
coalesce_windows(info, IORESOURCE_MEM);
coalesce_windows(info, IORESOURCE_IO);
@@ -336,8 +343,13 @@ get_current_resources(struct acpi_device *device, int busnum,
acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
&info);
- add_resources(&info);
- return;
+ if (pci_use_crs) {
+ add_resources(&info);
+
+ return;
+ }
+
+ kfree(info.name);
name_alloc_fail:
kfree(info.res);
@@ -404,7 +416,12 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
kfree(sd);
} else {
get_current_resources(device, busnum, domain, &resources);
- if (list_empty(&resources))
+
+ /*
+ * _CRS with no apertures is normal, so only fall back to
+ * defaults or native bridge info if we're ignoring _CRS.
+ */
+ if (!pci_use_crs)
x86_pci_root_bus_resources(busnum, &resources);
bus = pci_create_root_bus(NULL, busnum, &pci_root_ops, sd,
&resources);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 6dd8955..d0e6e40 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -164,11 +164,11 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_
*/
static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev)
{
- if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI &&
- (dev->device & 0xff00) == 0x2400)
+ if ((dev->device & 0xff00) == 0x2400)
dev->transparent = 1;
}
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_fixup_transparent_bridge);
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID,
+ PCI_CLASS_BRIDGE_PCI, 8, pci_fixup_transparent_bridge);
/*
* Fixup for C1 Halt Disconnect problem on nForce2 systems.
@@ -322,9 +322,6 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
struct pci_bus *bus;
u16 config;
- if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
- return;
-
/* Is VGA routed to us? */
bus = pdev->bus;
while (bus) {
@@ -353,7 +350,8 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev)
dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n");
}
}
-DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
+DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 91821a1..831971e 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -39,6 +39,87 @@
#include <asm/io_apic.h>
+/*
+ * This list of dynamic mappings is for temporarily maintaining
+ * original BIOS BAR addresses for possible reinstatement.
+ */
+struct pcibios_fwaddrmap {
+ struct list_head list;
+ struct pci_dev *dev;
+ resource_size_t fw_addr[DEVICE_COUNT_RESOURCE];
+};
+
+static LIST_HEAD(pcibios_fwaddrmappings);
+static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock);
+
+/* Must be called with 'pcibios_fwaddrmap_lock' lock held. */
+static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev)
+{
+ struct pcibios_fwaddrmap *map;
+
+ WARN_ON(!spin_is_locked(&pcibios_fwaddrmap_lock));
+
+ list_for_each_entry(map, &pcibios_fwaddrmappings, list)
+ if (map->dev == dev)
+ return map;
+
+ return NULL;
+}
+
+static void
+pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr)
+{
+ unsigned long flags;
+ struct pcibios_fwaddrmap *map;
+
+ spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
+ map = pcibios_fwaddrmap_lookup(dev);
+ if (!map) {
+ spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map)
+ return;
+
+ map->dev = pci_dev_get(dev);
+ map->fw_addr[idx] = fw_addr;
+ INIT_LIST_HEAD(&map->list);
+
+ spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
+ list_add_tail(&map->list, &pcibios_fwaddrmappings);
+ } else
+ map->fw_addr[idx] = fw_addr;
+ spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
+}
+
+resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx)
+{
+ unsigned long flags;
+ struct pcibios_fwaddrmap *map;
+ resource_size_t fw_addr = 0;
+
+ spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
+ map = pcibios_fwaddrmap_lookup(dev);
+ if (map)
+ fw_addr = map->fw_addr[idx];
+ spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
+
+ return fw_addr;
+}
+
+static void pcibios_fw_addr_list_del(void)
+{
+ unsigned long flags;
+ struct pcibios_fwaddrmap *entry, *next;
+
+ spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags);
+ list_for_each_entry_safe(entry, next, &pcibios_fwaddrmappings, list) {
+ list_del(&entry->list);
+ pci_dev_put(entry->dev);
+ kfree(entry);
+ }
+ spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags);
+}
+
static int
skip_isa_ioresource_align(struct pci_dev *dev) {
@@ -182,7 +263,8 @@ static void __init pcibios_allocate_resources(int pass)
idx, r, disabled, pass);
if (pci_claim_resource(dev, idx) < 0) {
/* We'll assign a new address later */
- dev->fw_addr[idx] = r->start;
+ pcibios_save_fw_addr(dev,
+ idx, r->start);
r->end -= r->start;
r->start = 0;
}
@@ -228,6 +310,7 @@ static int __init pcibios_assign_resources(void)
}
pci_assign_unassigned_resources();
+ pcibios_fw_addr_list_del();
return 0;
}
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index cb29191..140942f 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -43,6 +43,8 @@
#define PCI_FIXED_BAR_4_SIZE 0x14
#define PCI_FIXED_BAR_5_SIZE 0x1c
+static int pci_soc_mode = 0;
+
/**
* fixed_bar_cap - return the offset of the fixed BAR cap if found
* @bus: PCI bus
@@ -148,7 +150,9 @@ static bool type1_access_ok(unsigned int bus, unsigned int devfn, int reg)
*/
if (reg >= 0x100 || reg == PCI_STATUS || reg == PCI_HEADER_TYPE)
return 0;
- if (bus == 0 && (devfn == PCI_DEVFN(2, 0) || devfn == PCI_DEVFN(0, 0)))
+ if (bus == 0 && (devfn == PCI_DEVFN(2, 0)
+ || devfn == PCI_DEVFN(0, 0)
+ || devfn == PCI_DEVFN(3, 0)))
return 1;
return 0; /* langwell on others */
}
@@ -231,14 +235,43 @@ struct pci_ops pci_mrst_ops = {
*/
int __init pci_mrst_init(void)
{
- printk(KERN_INFO "Moorestown platform detected, using MRST PCI ops\n");
+ printk(KERN_INFO "Intel MID platform detected, using MID PCI ops\n");
pci_mmcfg_late_init();
pcibios_enable_irq = mrst_pci_irq_enable;
pci_root_ops = pci_mrst_ops;
+ pci_soc_mode = 1;
/* Continue with standard init */
return 1;
}
+/* Langwell devices are not true pci devices, they are not subject to 10 ms
+ * d3 to d0 delay required by pci spec.
+ */
+static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
+{
+ /* PCI fixups are effectively decided compile time. If we have a dual
+ SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */
+ if (!pci_soc_mode)
+ return;
+ /* true pci devices in lincroft should allow type 1 access, the rest
+ * are langwell fake pci devices.
+ */
+ if (type1_access_ok(dev->bus->number, dev->devfn, PCI_DEVICE_ID))
+ return;
+ dev->d3_delay = 0;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
+
+static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev)
+{
+ pci_set_power_state(dev, PCI_D3cold);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0801, mrst_power_off_unused_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0809, mrst_power_off_unused_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x080C, mrst_power_off_unused_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0812, mrst_power_off_unused_dev);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
+
/*
* Langwell devices reside at fixed offsets, don't try to move them.
*/
@@ -248,6 +281,9 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
u32 size;
int i;
+ if (!pci_soc_mode)
+ return;
+
/* Must have extended configuration space */
if (dev->cfg_size < PCIE_CAP_OFFSET + 4)
return;
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index d99346e..7415aa9 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -324,6 +324,32 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
out:
return ret;
}
+
+static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq)
+{
+ int ret = 0;
+
+ if (pci_seg_supported) {
+ struct physdev_pci_device restore_ext;
+
+ restore_ext.seg = pci_domain_nr(dev->bus);
+ restore_ext.bus = dev->bus->number;
+ restore_ext.devfn = dev->devfn;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
+ &restore_ext);
+ if (ret == -ENOSYS)
+ pci_seg_supported = false;
+ WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
+ }
+ if (!pci_seg_supported) {
+ struct physdev_restore_msi restore;
+
+ restore.bus = dev->bus->number;
+ restore.devfn = dev->devfn;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
+ WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
+ }
+}
#endif
static void xen_teardown_msi_irqs(struct pci_dev *dev)
@@ -446,6 +472,7 @@ int __init pci_xen_initial_domain(void)
#ifdef CONFIG_PCI_MSI
x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+ x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
#endif
xen_setup_acpi_sci();
__acpi_register_gsi = acpi_register_gsi_xen;
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts
index e70be38..ce874f8 100644
--- a/arch/x86/platform/ce4100/falconfalls.dts
+++ b/arch/x86/platform/ce4100/falconfalls.dts
@@ -208,16 +208,19 @@
interrupts = <14 1>;
};
- gpio@b,1 {
+ pcigpio: gpio@b,1 {
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
compatible = "pci8086,2e67.2",
"pci8086,2e67",
"pciclassff0000",
"pciclassff00";
- #gpio-cells = <2>;
reg = <0x15900 0x0 0x0 0x0 0x0>;
interrupts = <15 1>;
+ interrupt-controller;
gpio-controller;
+ intel,muxctl = <0>;
};
i2c-controller@b,2 {
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 4cf9bd0..92660eda 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -26,6 +26,8 @@
* Skip non-WB memory and ignore empty memory ranges.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/efi.h>
@@ -47,7 +49,6 @@
#include <asm/x86_init.h>
#define EFI_DEBUG 1
-#define PFX "EFI: "
int efi_enabled;
EXPORT_SYMBOL(efi_enabled);
@@ -67,6 +68,9 @@ EXPORT_SYMBOL(efi);
struct efi_memory_map memmap;
+bool efi_64bit;
+static bool efi_native;
+
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
@@ -254,7 +258,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
status = efi.get_time(&eft, &cap);
if (status != EFI_SUCCESS) {
- printk(KERN_ERR "Oops: efitime: can't read time!\n");
+ pr_err("Oops: efitime: can't read time!\n");
return -1;
}
@@ -268,7 +272,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
status = efi.set_time(&eft);
if (status != EFI_SUCCESS) {
- printk(KERN_ERR "Oops: efitime: can't write time!\n");
+ pr_err("Oops: efitime: can't write time!\n");
return -1;
}
return 0;
@@ -282,7 +286,7 @@ unsigned long efi_get_time(void)
status = efi.get_time(&eft, &cap);
if (status != EFI_SUCCESS)
- printk(KERN_ERR "Oops: efitime: can't read time!\n");
+ pr_err("Oops: efitime: can't read time!\n");
return mktime(eft.year, eft.month, eft.day, eft.hour,
eft.minute, eft.second);
@@ -338,11 +342,16 @@ static void __init do_add_efi_memmap(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
-void __init efi_memblock_x86_reserve_range(void)
+int __init efi_memblock_x86_reserve_range(void)
{
unsigned long pmap;
#ifdef CONFIG_X86_32
+ /* Can't handle data above 4GB at this time */
+ if (boot_params.efi_info.efi_memmap_hi) {
+ pr_err("Memory map is above 4GB, disabling EFI.\n");
+ return -EINVAL;
+ }
pmap = boot_params.efi_info.efi_memmap;
#else
pmap = (boot_params.efi_info.efi_memmap |
@@ -354,6 +363,8 @@ void __init efi_memblock_x86_reserve_range(void)
memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
memblock_reserve(pmap, memmap.nr_map * memmap.desc_size);
+
+ return 0;
}
#if EFI_DEBUG
@@ -367,7 +378,7 @@ static void __init print_efi_memmap(void)
p < memmap.map_end;
p += memmap.desc_size, i++) {
md = p;
- printk(KERN_INFO PFX "mem%02u: type=%u, attr=0x%llx, "
+ pr_info("mem%02u: type=%u, attr=0x%llx, "
"range=[0x%016llx-0x%016llx) (%lluMB)\n",
i, md->type, md->attribute, md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
@@ -400,7 +411,7 @@ void __init efi_reserve_boot_services(void)
memblock_is_region_reserved(start, size)) {
/* Could not reserve, skip it */
md->num_pages = 0;
- memblock_dbg(PFX "Could not reserve boot range "
+ memblock_dbg("Could not reserve boot range "
"[0x%010llx-0x%010llx]\n",
start, start+size-1);
} else
@@ -429,103 +440,172 @@ static void __init efi_free_boot_services(void)
}
}
-void __init efi_init(void)
+static int __init efi_systab_init(void *phys)
{
- efi_config_table_t *config_tables;
- efi_runtime_services_t *runtime;
- efi_char16_t *c16;
- char vendor[100] = "unknown";
- int i = 0;
- void *tmp;
+ if (efi_64bit) {
+ efi_system_table_64_t *systab64;
+ u64 tmp = 0;
+
+ systab64 = early_ioremap((unsigned long)phys,
+ sizeof(*systab64));
+ if (systab64 == NULL) {
+ pr_err("Couldn't map the system table!\n");
+ return -ENOMEM;
+ }
+ efi_systab.hdr = systab64->hdr;
+ efi_systab.fw_vendor = systab64->fw_vendor;
+ tmp |= systab64->fw_vendor;
+ efi_systab.fw_revision = systab64->fw_revision;
+ efi_systab.con_in_handle = systab64->con_in_handle;
+ tmp |= systab64->con_in_handle;
+ efi_systab.con_in = systab64->con_in;
+ tmp |= systab64->con_in;
+ efi_systab.con_out_handle = systab64->con_out_handle;
+ tmp |= systab64->con_out_handle;
+ efi_systab.con_out = systab64->con_out;
+ tmp |= systab64->con_out;
+ efi_systab.stderr_handle = systab64->stderr_handle;
+ tmp |= systab64->stderr_handle;
+ efi_systab.stderr = systab64->stderr;
+ tmp |= systab64->stderr;
+ efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
+ tmp |= systab64->runtime;
+ efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
+ tmp |= systab64->boottime;
+ efi_systab.nr_tables = systab64->nr_tables;
+ efi_systab.tables = systab64->tables;
+ tmp |= systab64->tables;
+
+ early_iounmap(systab64, sizeof(*systab64));
#ifdef CONFIG_X86_32
- efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
-#else
- efi_phys.systab = (efi_system_table_t *)
- (boot_params.efi_info.efi_systab |
- ((__u64)boot_params.efi_info.efi_systab_hi<<32));
+ if (tmp >> 32) {
+ pr_err("EFI data located above 4GB, disabling EFI.\n");
+ return -EINVAL;
+ }
#endif
+ } else {
+ efi_system_table_32_t *systab32;
+
+ systab32 = early_ioremap((unsigned long)phys,
+ sizeof(*systab32));
+ if (systab32 == NULL) {
+ pr_err("Couldn't map the system table!\n");
+ return -ENOMEM;
+ }
+
+ efi_systab.hdr = systab32->hdr;
+ efi_systab.fw_vendor = systab32->fw_vendor;
+ efi_systab.fw_revision = systab32->fw_revision;
+ efi_systab.con_in_handle = systab32->con_in_handle;
+ efi_systab.con_in = systab32->con_in;
+ efi_systab.con_out_handle = systab32->con_out_handle;
+ efi_systab.con_out = systab32->con_out;
+ efi_systab.stderr_handle = systab32->stderr_handle;
+ efi_systab.stderr = systab32->stderr;
+ efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
+ efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
+ efi_systab.nr_tables = systab32->nr_tables;
+ efi_systab.tables = systab32->tables;
+
+ early_iounmap(systab32, sizeof(*systab32));
+ }
- efi.systab = early_ioremap((unsigned long)efi_phys.systab,
- sizeof(efi_system_table_t));
- if (efi.systab == NULL)
- printk(KERN_ERR "Couldn't map the EFI system table!\n");
- memcpy(&efi_systab, efi.systab, sizeof(efi_system_table_t));
- early_iounmap(efi.systab, sizeof(efi_system_table_t));
efi.systab = &efi_systab;
/*
* Verify the EFI Table
*/
- if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
- printk(KERN_ERR "EFI system table signature incorrect!\n");
+ if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
+ pr_err("System table signature incorrect!\n");
+ return -EINVAL;
+ }
if ((efi.systab->hdr.revision >> 16) == 0)
- printk(KERN_ERR "Warning: EFI system table version "
+ pr_err("Warning: System table version "
"%d.%02d, expected 1.00 or greater!\n",
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff);
- /*
- * Show what we know for posterity
- */
- c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
- if (c16) {
- for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
- vendor[i] = *c16++;
- vendor[i] = '\0';
- } else
- printk(KERN_ERR PFX "Could not map the firmware vendor!\n");
- early_iounmap(tmp, 2);
+ return 0;
+}
- printk(KERN_INFO "EFI v%u.%.02u by %s\n",
- efi.systab->hdr.revision >> 16,
- efi.systab->hdr.revision & 0xffff, vendor);
+static int __init efi_config_init(u64 tables, int nr_tables)
+{
+ void *config_tables, *tablep;
+ int i, sz;
+
+ if (efi_64bit)
+ sz = sizeof(efi_config_table_64_t);
+ else
+ sz = sizeof(efi_config_table_32_t);
/*
* Let's see what config tables the firmware passed to us.
*/
- config_tables = early_ioremap(
- efi.systab->tables,
- efi.systab->nr_tables * sizeof(efi_config_table_t));
- if (config_tables == NULL)
- printk(KERN_ERR "Could not map EFI Configuration Table!\n");
+ config_tables = early_ioremap(tables, nr_tables * sz);
+ if (config_tables == NULL) {
+ pr_err("Could not map Configuration table!\n");
+ return -ENOMEM;
+ }
- printk(KERN_INFO);
+ tablep = config_tables;
+ pr_info("");
for (i = 0; i < efi.systab->nr_tables; i++) {
- if (!efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID)) {
- efi.mps = config_tables[i].table;
- printk(" MPS=0x%lx ", config_tables[i].table);
- } else if (!efi_guidcmp(config_tables[i].guid,
- ACPI_20_TABLE_GUID)) {
- efi.acpi20 = config_tables[i].table;
- printk(" ACPI 2.0=0x%lx ", config_tables[i].table);
- } else if (!efi_guidcmp(config_tables[i].guid,
- ACPI_TABLE_GUID)) {
- efi.acpi = config_tables[i].table;
- printk(" ACPI=0x%lx ", config_tables[i].table);
- } else if (!efi_guidcmp(config_tables[i].guid,
- SMBIOS_TABLE_GUID)) {
- efi.smbios = config_tables[i].table;
- printk(" SMBIOS=0x%lx ", config_tables[i].table);
+ efi_guid_t guid;
+ unsigned long table;
+
+ if (efi_64bit) {
+ u64 table64;
+ guid = ((efi_config_table_64_t *)tablep)->guid;
+ table64 = ((efi_config_table_64_t *)tablep)->table;
+ table = table64;
+#ifdef CONFIG_X86_32
+ if (table64 >> 32) {
+ pr_cont("\n");
+ pr_err("Table located above 4GB, disabling EFI.\n");
+ early_iounmap(config_tables,
+ efi.systab->nr_tables * sz);
+ return -EINVAL;
+ }
+#endif
+ } else {
+ guid = ((efi_config_table_32_t *)tablep)->guid;
+ table = ((efi_config_table_32_t *)tablep)->table;
+ }
+ if (!efi_guidcmp(guid, MPS_TABLE_GUID)) {
+ efi.mps = table;
+ pr_cont(" MPS=0x%lx ", table);
+ } else if (!efi_guidcmp(guid, ACPI_20_TABLE_GUID)) {
+ efi.acpi20 = table;
+ pr_cont(" ACPI 2.0=0x%lx ", table);
+ } else if (!efi_guidcmp(guid, ACPI_TABLE_GUID)) {
+ efi.acpi = table;
+ pr_cont(" ACPI=0x%lx ", table);
+ } else if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) {
+ efi.smbios = table;
+ pr_cont(" SMBIOS=0x%lx ", table);
#ifdef CONFIG_X86_UV
- } else if (!efi_guidcmp(config_tables[i].guid,
- UV_SYSTEM_TABLE_GUID)) {
- efi.uv_systab = config_tables[i].table;
- printk(" UVsystab=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(guid, UV_SYSTEM_TABLE_GUID)) {
+ efi.uv_systab = table;
+ pr_cont(" UVsystab=0x%lx ", table);
#endif
- } else if (!efi_guidcmp(config_tables[i].guid,
- HCDP_TABLE_GUID)) {
- efi.hcdp = config_tables[i].table;
- printk(" HCDP=0x%lx ", config_tables[i].table);
- } else if (!efi_guidcmp(config_tables[i].guid,
- UGA_IO_PROTOCOL_GUID)) {
- efi.uga = config_tables[i].table;
- printk(" UGA=0x%lx ", config_tables[i].table);
+ } else if (!efi_guidcmp(guid, HCDP_TABLE_GUID)) {
+ efi.hcdp = table;
+ pr_cont(" HCDP=0x%lx ", table);
+ } else if (!efi_guidcmp(guid, UGA_IO_PROTOCOL_GUID)) {
+ efi.uga = table;
+ pr_cont(" UGA=0x%lx ", table);
}
+ tablep += sz;
}
- printk("\n");
- early_iounmap(config_tables,
- efi.systab->nr_tables * sizeof(efi_config_table_t));
+ pr_cont("\n");
+ early_iounmap(config_tables, efi.systab->nr_tables * sz);
+ return 0;
+}
+
+static int __init efi_runtime_init(void)
+{
+ efi_runtime_services_t *runtime;
/*
* Check out the runtime services table. We need to map
@@ -535,43 +615,116 @@ void __init efi_init(void)
*/
runtime = early_ioremap((unsigned long)efi.systab->runtime,
sizeof(efi_runtime_services_t));
- if (runtime != NULL) {
- /*
- * We will only need *early* access to the following
- * two EFI runtime services before set_virtual_address_map
- * is invoked.
- */
- efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
- efi_phys.set_virtual_address_map =
- (efi_set_virtual_address_map_t *)
- runtime->set_virtual_address_map;
- /*
- * Make efi_get_time can be called before entering
- * virtual mode.
- */
- efi.get_time = phys_efi_get_time;
- } else
- printk(KERN_ERR "Could not map the EFI runtime service "
- "table!\n");
+ if (!runtime) {
+ pr_err("Could not map the runtime service table!\n");
+ return -ENOMEM;
+ }
+ /*
+ * We will only need *early* access to the following
+ * two EFI runtime services before set_virtual_address_map
+ * is invoked.
+ */
+ efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
+ efi_phys.set_virtual_address_map =
+ (efi_set_virtual_address_map_t *)
+ runtime->set_virtual_address_map;
+ /*
+ * Make efi_get_time can be called before entering
+ * virtual mode.
+ */
+ efi.get_time = phys_efi_get_time;
early_iounmap(runtime, sizeof(efi_runtime_services_t));
+ return 0;
+}
+
+static int __init efi_memmap_init(void)
+{
/* Map the EFI memory map */
memmap.map = early_ioremap((unsigned long)memmap.phys_map,
memmap.nr_map * memmap.desc_size);
- if (memmap.map == NULL)
- printk(KERN_ERR "Could not map the EFI memory map!\n");
+ if (memmap.map == NULL) {
+ pr_err("Could not map the memory map!\n");
+ return -ENOMEM;
+ }
memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size);
- if (memmap.desc_size != sizeof(efi_memory_desc_t))
- printk(KERN_WARNING
- "Kernel-defined memdesc doesn't match the one from EFI!\n");
-
if (add_efi_memmap)
do_add_efi_memmap();
+ return 0;
+}
+
+void __init efi_init(void)
+{
+ efi_char16_t *c16;
+ char vendor[100] = "unknown";
+ int i = 0;
+ void *tmp;
+
+#ifdef CONFIG_X86_32
+ if (boot_params.efi_info.efi_systab_hi ||
+ boot_params.efi_info.efi_memmap_hi) {
+ pr_info("Table located above 4GB, disabling EFI.\n");
+ efi_enabled = 0;
+ return;
+ }
+ efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
+ efi_native = !efi_64bit;
+#else
+ efi_phys.systab = (efi_system_table_t *)
+ (boot_params.efi_info.efi_systab |
+ ((__u64)boot_params.efi_info.efi_systab_hi<<32));
+ efi_native = efi_64bit;
+#endif
+
+ if (efi_systab_init(efi_phys.systab)) {
+ efi_enabled = 0;
+ return;
+ }
+
+ /*
+ * Show what we know for posterity
+ */
+ c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
+ if (c16) {
+ for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
+ vendor[i] = *c16++;
+ vendor[i] = '\0';
+ } else
+ pr_err("Could not map the firmware vendor!\n");
+ early_iounmap(tmp, 2);
+
+ pr_info("EFI v%u.%.02u by %s\n",
+ efi.systab->hdr.revision >> 16,
+ efi.systab->hdr.revision & 0xffff, vendor);
+
+ if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) {
+ efi_enabled = 0;
+ return;
+ }
+
+ /*
+ * Note: We currently don't support runtime services on an EFI
+ * that doesn't match the kernel 32/64-bit mode.
+ */
+
+ if (!efi_native)
+ pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
+ else if (efi_runtime_init()) {
+ efi_enabled = 0;
+ return;
+ }
+
+ if (efi_memmap_init()) {
+ efi_enabled = 0;
+ return;
+ }
#ifdef CONFIG_X86_32
- x86_platform.get_wallclock = efi_get_time;
- x86_platform.set_wallclock = efi_set_rtc_mmss;
+ if (efi_native) {
+ x86_platform.get_wallclock = efi_get_time;
+ x86_platform.set_wallclock = efi_set_rtc_mmss;
+ }
#endif
#if EFI_DEBUG
@@ -629,6 +782,14 @@ void __init efi_enter_virtual_mode(void)
efi.systab = NULL;
+ /*
+ * We don't do virtual mode, since we don't do runtime services, on
+ * non-native EFI
+ */
+
+ if (!efi_native)
+ goto out;
+
/* Merge contiguous regions of the same type and attribute */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
u64 prev_size;
@@ -677,7 +838,7 @@ void __init efi_enter_virtual_mode(void)
md->virt_addr = (u64) (unsigned long) va;
if (!va) {
- printk(KERN_ERR PFX "ioremap of 0x%llX failed!\n",
+ pr_err("ioremap of 0x%llX failed!\n",
(unsigned long long)md->phys_addr);
continue;
}
@@ -711,8 +872,8 @@ void __init efi_enter_virtual_mode(void)
(efi_memory_desc_t *)__pa(new_memmap));
if (status != EFI_SUCCESS) {
- printk(KERN_ALERT "Unable to switch EFI into virtual mode "
- "(status=%lx)!\n", status);
+ pr_alert("Unable to switch EFI into virtual mode "
+ "(status=%lx)!\n", status);
panic("EFI call to SetVirtualAddressMap() failed!");
}
@@ -744,6 +905,8 @@ void __init efi_enter_virtual_mode(void)
efi.query_capsule_caps = virt_efi_query_capsule_caps;
if (__supported_pte_mask & _PAGE_NX)
runtime_code_page_mkexec();
+
+out:
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
kfree(new_memmap);
diff --git a/arch/x86/platform/geode/Makefile b/arch/x86/platform/geode/Makefile
index 07c9cd0..5b51194 100644
--- a/arch/x86/platform/geode/Makefile
+++ b/arch/x86/platform/geode/Makefile
@@ -1 +1,3 @@
obj-$(CONFIG_ALIX) += alix.o
+obj-$(CONFIG_NET5501) += net5501.o
+obj-$(CONFIG_GEOS) += geos.o
diff --git a/arch/x86/platform/geode/alix.c b/arch/x86/platform/geode/alix.c
index dc5f1d3..90e23e7 100644
--- a/arch/x86/platform/geode/alix.c
+++ b/arch/x86/platform/geode/alix.c
@@ -6,6 +6,7 @@
*
* Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
* Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
+ * and Philip Prindeville <philipp@redfish-solutions.com>
*
* TODO: There are large similarities with leds-net5501.c
* by Alessandro Zummo <a.zummo@towertech.it>
@@ -24,14 +25,47 @@
#include <linux/leds.h>
#include <linux/platform_device.h>
#include <linux/gpio.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
+#include <linux/dmi.h>
#include <asm/geode.h>
+#define BIOS_SIGNATURE_TINYBIOS 0xf0000
+#define BIOS_SIGNATURE_COREBOOT 0x500
+#define BIOS_REGION_SIZE 0x10000
+
static bool force = 0;
module_param(force, bool, 0444);
/* FIXME: Award bios is not automatically detected as Alix platform */
MODULE_PARM_DESC(force, "Force detection as ALIX.2/ALIX.3 platform");
+static struct gpio_keys_button alix_gpio_buttons[] = {
+ {
+ .code = KEY_RESTART,
+ .gpio = 24,
+ .active_low = 1,
+ .desc = "Reset button",
+ .type = EV_KEY,
+ .wakeup = 0,
+ .debounce_interval = 100,
+ .can_disable = 0,
+ }
+};
+static struct gpio_keys_platform_data alix_buttons_data = {
+ .buttons = alix_gpio_buttons,
+ .nbuttons = ARRAY_SIZE(alix_gpio_buttons),
+ .poll_interval = 20,
+};
+
+static struct platform_device alix_buttons_dev = {
+ .name = "gpio-keys-polled",
+ .id = 1,
+ .dev = {
+ .platform_data = &alix_buttons_data,
+ }
+};
+
static struct gpio_led alix_leds[] = {
{
.name = "alix:1",
@@ -64,17 +98,22 @@ static struct platform_device alix_leds_dev = {
.dev.platform_data = &alix_leds_data,
};
+static struct __initdata platform_device *alix_devs[] = {
+ &alix_buttons_dev,
+ &alix_leds_dev,
+};
+
static void __init register_alix(void)
{
/* Setup LED control through leds-gpio driver */
- platform_device_register(&alix_leds_dev);
+ platform_add_devices(alix_devs, ARRAY_SIZE(alix_devs));
}
-static int __init alix_present(unsigned long bios_phys,
+static bool __init alix_present(unsigned long bios_phys,
const char *alix_sig,
size_t alix_sig_len)
{
- const size_t bios_len = 0x00010000;
+ const size_t bios_len = BIOS_REGION_SIZE;
const char *bios_virt;
const char *scan_end;
const char *p;
@@ -84,7 +123,7 @@ static int __init alix_present(unsigned long bios_phys,
printk(KERN_NOTICE "%s: forced to skip BIOS test, "
"assume system is ALIX.2/ALIX.3\n",
KBUILD_MODNAME);
- return 1;
+ return true;
}
bios_virt = phys_to_virt(bios_phys);
@@ -109,15 +148,33 @@ static int __init alix_present(unsigned long bios_phys,
*a = '\0';
tail = p + alix_sig_len;
- if ((tail[0] == '2' || tail[0] == '3')) {
+ if ((tail[0] == '2' || tail[0] == '3' || tail[0] == '6')) {
printk(KERN_INFO
"%s: system is recognized as \"%s\"\n",
KBUILD_MODNAME, name);
- return 1;
+ return true;
}
}
- return 0;
+ return false;
+}
+
+static bool __init alix_present_dmi(void)
+{
+ const char *vendor, *product;
+
+ vendor = dmi_get_system_info(DMI_SYS_VENDOR);
+ if (!vendor || strcmp(vendor, "PC Engines"))
+ return false;
+
+ product = dmi_get_system_info(DMI_PRODUCT_NAME);
+ if (!product || (strcmp(product, "ALIX.2D") && strcmp(product, "ALIX.6")))
+ return false;
+
+ printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n",
+ KBUILD_MODNAME, vendor, product);
+
+ return true;
}
static int __init alix_init(void)
@@ -128,8 +185,9 @@ static int __init alix_init(void)
if (!is_geode())
return 0;
- if (alix_present(0xf0000, tinybios_sig, sizeof(tinybios_sig) - 1) ||
- alix_present(0x500, coreboot_sig, sizeof(coreboot_sig) - 1))
+ if (alix_present(BIOS_SIGNATURE_TINYBIOS, tinybios_sig, sizeof(tinybios_sig) - 1) ||
+ alix_present(BIOS_SIGNATURE_COREBOOT, coreboot_sig, sizeof(coreboot_sig) - 1) ||
+ alix_present_dmi())
register_alix();
return 0;
diff --git a/arch/x86/platform/geode/geos.c b/arch/x86/platform/geode/geos.c
new file mode 100644
index 0000000..c2e6d53
--- /dev/null
+++ b/arch/x86/platform/geode/geos.c
@@ -0,0 +1,128 @@
+/*
+ * System Specific setup for Traverse Technologies GEOS.
+ * At the moment this means setup of GPIO control of LEDs.
+ *
+ * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
+ * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
+ * and Philip Prindeville <philipp@redfish-solutions.com>
+ *
+ * TODO: There are large similarities with leds-net5501.c
+ * by Alessandro Zummo <a.zummo@towertech.it>
+ * In the future leds-net5501.c should be migrated over to platform
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
+#include <linux/dmi.h>
+
+#include <asm/geode.h>
+
+static struct gpio_keys_button geos_gpio_buttons[] = {
+ {
+ .code = KEY_RESTART,
+ .gpio = 3,
+ .active_low = 1,
+ .desc = "Reset button",
+ .type = EV_KEY,
+ .wakeup = 0,
+ .debounce_interval = 100,
+ .can_disable = 0,
+ }
+};
+static struct gpio_keys_platform_data geos_buttons_data = {
+ .buttons = geos_gpio_buttons,
+ .nbuttons = ARRAY_SIZE(geos_gpio_buttons),
+ .poll_interval = 20,
+};
+
+static struct platform_device geos_buttons_dev = {
+ .name = "gpio-keys-polled",
+ .id = 1,
+ .dev = {
+ .platform_data = &geos_buttons_data,
+ }
+};
+
+static struct gpio_led geos_leds[] = {
+ {
+ .name = "geos:1",
+ .gpio = 6,
+ .default_trigger = "default-on",
+ .active_low = 1,
+ },
+ {
+ .name = "geos:2",
+ .gpio = 25,
+ .default_trigger = "default-off",
+ .active_low = 1,
+ },
+ {
+ .name = "geos:3",
+ .gpio = 27,
+ .default_trigger = "default-off",
+ .active_low = 1,
+ },
+};
+
+static struct gpio_led_platform_data geos_leds_data = {
+ .num_leds = ARRAY_SIZE(geos_leds),
+ .leds = geos_leds,
+};
+
+static struct platform_device geos_leds_dev = {
+ .name = "leds-gpio",
+ .id = -1,
+ .dev.platform_data = &geos_leds_data,
+};
+
+static struct __initdata platform_device *geos_devs[] = {
+ &geos_buttons_dev,
+ &geos_leds_dev,
+};
+
+static void __init register_geos(void)
+{
+ /* Setup LED control through leds-gpio driver */
+ platform_add_devices(geos_devs, ARRAY_SIZE(geos_devs));
+}
+
+static int __init geos_init(void)
+{
+ const char *vendor, *product;
+
+ if (!is_geode())
+ return 0;
+
+ vendor = dmi_get_system_info(DMI_SYS_VENDOR);
+ if (!vendor || strcmp(vendor, "Traverse Technologies"))
+ return 0;
+
+ product = dmi_get_system_info(DMI_PRODUCT_NAME);
+ if (!product || strcmp(product, "Geos"))
+ return 0;
+
+ printk(KERN_INFO "%s: system is recognized as \"%s %s\"\n",
+ KBUILD_MODNAME, vendor, product);
+
+ register_geos();
+
+ return 0;
+}
+
+module_init(geos_init);
+
+MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
+MODULE_DESCRIPTION("Traverse Technologies Geos System Setup");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c
new file mode 100644
index 0000000..66d377e
--- /dev/null
+++ b/arch/x86/platform/geode/net5501.c
@@ -0,0 +1,154 @@
+/*
+ * System Specific setup for Soekris net5501
+ * At the moment this means setup of GPIO control of LEDs and buttons
+ * on net5501 boards.
+ *
+ *
+ * Copyright (C) 2008-2009 Tower Technologies
+ * Written by Alessandro Zummo <a.zummo@towertech.it>
+ *
+ * Copyright (C) 2008 Constantin Baranov <const@mimas.ru>
+ * Copyright (C) 2011 Ed Wildgoose <kernel@wildgooses.com>
+ * and Philip Prindeville <philipp@redfish-solutions.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/leds.h>
+#include <linux/platform_device.h>
+#include <linux/gpio.h>
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
+
+#include <asm/geode.h>
+
+#define BIOS_REGION_BASE 0xffff0000
+#define BIOS_REGION_SIZE 0x00010000
+
+static struct gpio_keys_button net5501_gpio_buttons[] = {
+ {
+ .code = KEY_RESTART,
+ .gpio = 24,
+ .active_low = 1,
+ .desc = "Reset button",
+ .type = EV_KEY,
+ .wakeup = 0,
+ .debounce_interval = 100,
+ .can_disable = 0,
+ }
+};
+static struct gpio_keys_platform_data net5501_buttons_data = {
+ .buttons = net5501_gpio_buttons,
+ .nbuttons = ARRAY_SIZE(net5501_gpio_buttons),
+ .poll_interval = 20,
+};
+
+static struct platform_device net5501_buttons_dev = {
+ .name = "gpio-keys-polled",
+ .id = 1,
+ .dev = {
+ .platform_data = &net5501_buttons_data,
+ }
+};
+
+static struct gpio_led net5501_leds[] = {
+ {
+ .name = "net5501:1",
+ .gpio = 6,
+ .default_trigger = "default-on",
+ .active_low = 1,
+ },
+};
+
+static struct gpio_led_platform_data net5501_leds_data = {
+ .num_leds = ARRAY_SIZE(net5501_leds),
+ .leds = net5501_leds,
+};
+
+static struct platform_device net5501_leds_dev = {
+ .name = "leds-gpio",
+ .id = -1,
+ .dev.platform_data = &net5501_leds_data,
+};
+
+static struct __initdata platform_device *net5501_devs[] = {
+ &net5501_buttons_dev,
+ &net5501_leds_dev,
+};
+
+static void __init register_net5501(void)
+{
+ /* Setup LED control through leds-gpio driver */
+ platform_add_devices(net5501_devs, ARRAY_SIZE(net5501_devs));
+}
+
+struct net5501_board {
+ u16 offset;
+ u16 len;
+ char *sig;
+};
+
+static struct net5501_board __initdata boards[] = {
+ { 0xb7b, 7, "net5501" }, /* net5501 v1.33/1.33c */
+ { 0xb1f, 7, "net5501" }, /* net5501 v1.32i */
+};
+
+static bool __init net5501_present(void)
+{
+ int i;
+ unsigned char *rombase, *bios;
+ bool found = false;
+
+ rombase = ioremap(BIOS_REGION_BASE, BIOS_REGION_SIZE - 1);
+ if (!rombase) {
+ printk(KERN_ERR "%s: failed to get rombase\n", KBUILD_MODNAME);
+ return found;
+ }
+
+ bios = rombase + 0x20; /* null terminated */
+
+ if (memcmp(bios, "comBIOS", 7))
+ goto unmap;
+
+ for (i = 0; i < ARRAY_SIZE(boards); i++) {
+ unsigned char *model = rombase + boards[i].offset;
+
+ if (!memcmp(model, boards[i].sig, boards[i].len)) {
+ printk(KERN_INFO "%s: system is recognized as \"%s\"\n",
+ KBUILD_MODNAME, model);
+
+ found = true;
+ break;
+ }
+ }
+
+unmap:
+ iounmap(rombase);
+ return found;
+}
+
+static int __init net5501_init(void)
+{
+ if (!is_geode())
+ return 0;
+
+ if (!net5501_present())
+ return 0;
+
+ register_net5501();
+
+ return 0;
+}
+
+module_init(net5501_init);
+
+MODULE_AUTHOR("Philip Prindeville <philipp@redfish-solutions.com>");
+MODULE_DESCRIPTION("Soekris net5501 System Setup");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index 7baed51..af1da7e 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1,4 +1,3 @@
obj-$(CONFIG_X86_INTEL_MID) += mrst.o
obj-$(CONFIG_X86_INTEL_MID) += vrtc.o
obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_mrst.o
-obj-$(CONFIG_X86_MRST) += pmu.o
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 475e2cd..e0a3723 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -28,6 +28,8 @@
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/mfd/intel_msic.h>
+#include <linux/gpio.h>
+#include <linux/i2c/tc35876x.h>
#include <asm/setup.h>
#include <asm/mpspec_def.h>
@@ -78,16 +80,11 @@ int sfi_mrtc_num;
static void mrst_power_off(void)
{
- if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
- intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 1);
}
static void mrst_reboot(void)
{
- if (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT)
- intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
- else
- intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+ intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
}
/* parse all the mtimer info to a static mtimer array */
@@ -200,34 +197,28 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
static unsigned long __init mrst_calibrate_tsc(void)
{
- unsigned long flags, fast_calibrate;
- if (__mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) {
- u32 lo, hi, ratio, fsb;
-
- rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
- pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
- ratio = (hi >> 8) & 0x1f;
- pr_debug("ratio is %d\n", ratio);
- if (!ratio) {
- pr_err("read a zero ratio, should be incorrect!\n");
- pr_err("force tsc ratio to 16 ...\n");
- ratio = 16;
- }
- rdmsr(MSR_FSB_FREQ, lo, hi);
- if ((lo & 0x7) == 0x7)
- fsb = PENWELL_FSB_FREQ_83SKU;
- else
- fsb = PENWELL_FSB_FREQ_100SKU;
- fast_calibrate = ratio * fsb;
- pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
- lapic_timer_frequency = fsb * 1000 / HZ;
- /* mark tsc clocksource as reliable */
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
- } else {
- local_irq_save(flags);
- fast_calibrate = apbt_quick_calibrate();
- local_irq_restore(flags);
+ unsigned long fast_calibrate;
+ u32 lo, hi, ratio, fsb;
+
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
+ ratio = (hi >> 8) & 0x1f;
+ pr_debug("ratio is %d\n", ratio);
+ if (!ratio) {
+ pr_err("read a zero ratio, should be incorrect!\n");
+ pr_err("force tsc ratio to 16 ...\n");
+ ratio = 16;
}
+ rdmsr(MSR_FSB_FREQ, lo, hi);
+ if ((lo & 0x7) == 0x7)
+ fsb = PENWELL_FSB_FREQ_83SKU;
+ else
+ fsb = PENWELL_FSB_FREQ_100SKU;
+ fast_calibrate = ratio * fsb;
+ pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
+ lapic_timer_frequency = fsb * 1000 / HZ;
+ /* mark tsc clocksource as reliable */
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
if (fast_calibrate)
return fast_calibrate;
@@ -261,16 +252,11 @@ static void __cpuinit mrst_arch_setup(void)
{
if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27)
__mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
- else if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x26)
- __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
else {
- pr_err("Unknown Moorestown CPU (%d:%d), default to Lincroft\n",
+ pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
boot_cpu_data.x86, boot_cpu_data.x86_model);
- __mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
+ __mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
}
- pr_debug("Moorestown CPU %s identified\n",
- (__mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
- "Lincroft" : "Penwell");
}
/* MID systems don't have i8042 controller */
@@ -686,6 +672,24 @@ static void *msic_ocd_platform_data(void *info)
return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
}
+static void *msic_thermal_platform_data(void *info)
+{
+ return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
+}
+
+/* tc35876x DSI-LVDS bridge chip and panel platform data */
+static void *tc35876x_platform_data(void *data)
+{
+ static struct tc35876x_platform_data pdata;
+
+ /* gpio pins set to -1 will not be used by the driver */
+ pdata.gpio_bridge_reset = get_gpio_by_name("LCMB_RXEN");
+ pdata.gpio_panel_bl_en = get_gpio_by_name("6S6P_BL_EN");
+ pdata.gpio_panel_vadd = get_gpio_by_name("EN_VREG_LCD_V3P3");
+
+ return &pdata;
+}
+
static const struct devs_id __initconst device_ids[] = {
{"bma023", SFI_DEV_TYPE_I2C, 1, &no_platform_data},
{"pmic_gpio", SFI_DEV_TYPE_SPI, 1, &pmic_gpio_platform_data},
@@ -698,6 +702,7 @@ static const struct devs_id __initconst device_ids[] = {
{"i2c_accel", SFI_DEV_TYPE_I2C, 0, &lis331dl_platform_data},
{"pmic_audio", SFI_DEV_TYPE_IPC, 1, &no_platform_data},
{"mpu3050", SFI_DEV_TYPE_I2C, 1, &mpu3050_platform_data},
+ {"i2c_disp_brig", SFI_DEV_TYPE_I2C, 0, &tc35876x_platform_data},
/* MSIC subdevices */
{"msic_battery", SFI_DEV_TYPE_IPC, 1, &msic_battery_platform_data},
@@ -705,6 +710,7 @@ static const struct devs_id __initconst device_ids[] = {
{"msic_audio", SFI_DEV_TYPE_IPC, 1, &msic_audio_platform_data},
{"msic_power_btn", SFI_DEV_TYPE_IPC, 1, &msic_power_btn_platform_data},
{"msic_ocd", SFI_DEV_TYPE_IPC, 1, &msic_ocd_platform_data},
+ {"msic_thermal", SFI_DEV_TYPE_IPC, 1, &msic_thermal_platform_data},
{},
};
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
deleted file mode 100644
index c0ac06d..0000000
--- a/arch/x86/platform/mrst/pmu.c
+++ /dev/null
@@ -1,817 +0,0 @@
-/*
- * mrst/pmu.c - driver for MRST Power Management Unit
- *
- * Copyright (c) 2011, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/cpuidle.h>
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/seq_file.h>
-#include <linux/sfi.h>
-#include <asm/intel_scu_ipc.h>
-#include "pmu.h"
-
-#define IPCMSG_FW_REVISION 0xF4
-
-struct mrst_device {
- u16 pci_dev_num; /* DEBUG only */
- u16 lss;
- u16 latest_request;
- unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
-};
-
-/*
- * comlete list of MRST PCI devices
- */
-static struct mrst_device mrst_devs[] = {
-/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */
-/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */
-/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */
-/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */
-/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */
-/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */
-/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */
-/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */
-/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */
-/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */
-/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */
-/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */
-/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */
-/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */
-/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */
-/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */
-/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */
-/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */
-/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */
-/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */
-/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */
-/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */
-
-/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */
-
-/* 23 */ { 0x4102, 0 }, /* Lincroft */
-/* 24 */ { 0x4110, 0 }, /* Lincroft */
-};
-
-/* n.b. We ignore PCI-id 0x815 in LSS9 b/c Linux has no driver for it */
-static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
-static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
- 0x0804, 0x0805, 0x080f, 0};
-
-/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
-static spinlock_t mrst_pmu_power_state_lock;
-
-static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */
-static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */
-
-static int graphics_is_off;
-static int lss_s0i3_enabled;
-static bool mrst_pmu_s0i3_enable;
-
-/* debug counters */
-static u32 pmu_wait_ready_calls;
-static u32 pmu_wait_ready_udelays;
-static u32 pmu_wait_ready_udelays_max;
-static u32 pmu_wait_done_calls;
-static u32 pmu_wait_done_udelays;
-static u32 pmu_wait_done_udelays_max;
-static u32 pmu_set_power_state_entry;
-static u32 pmu_set_power_state_send_cmd;
-
-static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
-{
- int index = 0;
-
- if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
- index = pci_dev_num - 0x800;
- else if (pci_dev_num == 0x084F)
- index = 22;
- else if (pci_dev_num == 0x4102)
- index = 23;
- else if (pci_dev_num == 0x4110)
- index = 24;
-
- if (pci_dev_num != mrst_devs[index].pci_dev_num) {
- WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
- return 0;
- }
-
- return &mrst_devs[index];
-}
-
-/**
- * mrst_pmu_validate_cstates
- * @dev: cpuidle_device
- *
- * Certain states are not appropriate for governor to pick in some cases.
- * This function will be called as cpuidle_device's prepare callback and
- * thus tells governor to ignore such states when selecting the next state
- * to enter.
- */
-
-#define IDLE_STATE4_IS_C6 4
-#define IDLE_STATE5_IS_S0I3 5
-
-int mrst_pmu_invalid_cstates(void)
-{
- int cpu = smp_processor_id();
-
- /*
- * Demote to C4 if the PMU is busy.
- * Since LSS changes leave the busy bit clear...
- * busy means either the PMU is waiting for an ACK-C6 that
- * isn't coming due to an MWAIT that returned immediately;
- * or we returned from S0i3 successfully, and the PMU
- * is not done sending us interrupts.
- */
- if (pmu_read_busy_status())
- return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
-
- /*
- * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
- * or if device LSS is insufficient, or the GPU is active,
- * or if it has been explicitly disabled.
- */
- if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
- !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
- return 1 << IDLE_STATE5_IS_S0I3;
- else
- return 0;
-}
-
-/*
- * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
- * DEBUG only.
- */
-static void pmu_update_wake_counters(void)
-{
- int lss;
- u32 wake_status;
-
- wake_status = pmu_read_wks();
-
- for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
- if (wake_status & (1 << lss))
- wake_counters[lss]++;
- }
-}
-
-int mrst_pmu_s0i3_entry(void)
-{
- int status;
-
- /* Clear any possible error conditions */
- pmu_write_ics(0x300);
-
- /* set wake control to current D-states */
- pmu_write_wssc(S0I3_SSS_TARGET);
-
- status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
- pmu_update_wake_counters();
- return status;
-}
-
-/* poll for maximum of 5ms for busy bit to clear */
-static int pmu_wait_ready(void)
-{
- int udelays;
-
- pmu_wait_ready_calls++;
-
- for (udelays = 0; udelays < 500; ++udelays) {
- if (udelays > pmu_wait_ready_udelays_max)
- pmu_wait_ready_udelays_max = udelays;
-
- if (pmu_read_busy_status() == 0)
- return 0;
-
- udelay(10);
- pmu_wait_ready_udelays++;
- }
-
- /*
- * if this fires, observe
- * /sys/kernel/debug/mrst_pmu_wait_ready_calls
- * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
- */
- WARN_ONCE(1, "SCU not ready for 5ms");
- return -EBUSY;
-}
-/* poll for maximum of 50ms us for busy bit to clear */
-static int pmu_wait_done(void)
-{
- int udelays;
-
- pmu_wait_done_calls++;
-
- for (udelays = 0; udelays < 500; ++udelays) {
- if (udelays > pmu_wait_done_udelays_max)
- pmu_wait_done_udelays_max = udelays;
-
- if (pmu_read_busy_status() == 0)
- return 0;
-
- udelay(100);
- pmu_wait_done_udelays++;
- }
-
- /*
- * if this fires, observe
- * /sys/kernel/debug/mrst_pmu_wait_done_calls
- * /sys/kernel/debug/mrst_pmu_wait_done_udelays
- */
- WARN_ONCE(1, "SCU not done for 50ms");
- return -EBUSY;
-}
-
-u32 mrst_pmu_msi_is_disabled(void)
-{
- return pmu_msi_is_disabled();
-}
-
-void mrst_pmu_enable_msi(void)
-{
- pmu_msi_enable();
-}
-
-/**
- * pmu_irq - pmu driver interrupt handler
- * Context: interrupt context
- */
-static irqreturn_t pmu_irq(int irq, void *dummy)
-{
- union pmu_pm_ics pmu_ics;
-
- pmu_ics.value = pmu_read_ics();
-
- if (!pmu_ics.bits.pending)
- return IRQ_NONE;
-
- switch (pmu_ics.bits.cause) {
- case INT_SPURIOUS:
- case INT_CMD_DONE:
- case INT_CMD_ERR:
- case INT_WAKE_RX:
- case INT_SS_ERROR:
- case INT_S0IX_MISS:
- case INT_NO_ACKC6:
- pmu_irq_stats[pmu_ics.bits.cause]++;
- break;
- default:
- pmu_irq_stats[INT_INVALID]++;
- }
-
- pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
-
- return IRQ_HANDLED;
-}
-
-/*
- * Translate PCI power management to MRST LSS D-states
- */
-static int pci_2_mrst_state(int lss, pci_power_t pci_state)
-{
- switch (pci_state) {
- case PCI_D0:
- if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
- return D0i1;
- else
- return D0;
- case PCI_D1:
- return D0i1;
- case PCI_D2:
- return D0i2;
- case PCI_D3hot:
- case PCI_D3cold:
- return D0i3;
- default:
- WARN(1, "pci_state %d\n", pci_state);
- return 0;
- }
-}
-
-static int pmu_issue_command(u32 pm_ssc)
-{
- union pmu_pm_set_cfg_cmd_t command;
-
- if (pmu_read_busy_status()) {
- pr_debug("pmu is busy, Operation not permitted\n");
- return -1;
- }
-
- /*
- * enable interrupts in PMU so that interrupts are
- * propagated when ioc bit for a particular set
- * command is set
- */
-
- pmu_irq_enable();
-
- /* Configure the sub systems for pmu2 */
-
- pmu_write_ssc(pm_ssc);
-
- /*
- * Send the set config command for pmu its configured
- * for mode CM_IMMEDIATE & hence with No Trigger
- */
-
- command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
- command.pmu2_params.d_param.cfg_delay = 0;
- command.pmu2_params.d_param.rsvd = 0;
-
- /* construct the command to send SET_CFG to particular PMU */
- command.pmu2_params.d_param.cmd = SET_CFG_CMD;
- command.pmu2_params.d_param.ioc = 0;
- command.pmu2_params.d_param.mode_id = 0;
- command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
-
- /* write the value of PM_CMD into particular PMU */
- pr_debug("pmu command being written %x\n",
- command.pmu_pm_set_cfg_cmd_value);
-
- pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
-
- return 0;
-}
-
-static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
-{
- u16 existing_request;
- int i;
-
- for (i = 0; ids[i]; ++i) {
- struct mrst_device *mrst_dev;
-
- mrst_dev = pci_id_2_mrst_dev(ids[i]);
- if (unlikely(!mrst_dev))
- continue;
-
- existing_request = mrst_dev->latest_request;
- if (existing_request < pci_state)
- pci_state = existing_request;
- }
- return pci_state;
-}
-
-/**
- * pmu_pci_set_power_state - Callback function is used by all the PCI devices
- * for a platform specific device power on/shutdown.
- */
-
-int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
-{
- u32 old_sss, new_sss;
- int status = 0;
- struct mrst_device *mrst_dev;
-
- pmu_set_power_state_entry++;
-
- BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
- BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
-
- mrst_dev = pci_id_2_mrst_dev(pdev->device);
- if (unlikely(!mrst_dev))
- return -ENODEV;
-
- mrst_dev->pci_state_counts[pci_state]++; /* count invocations */
-
- /* PMU driver calls self as part of PCI initialization, ignore */
- if (pdev->device == PCI_DEV_ID_MRST_PMU)
- return 0;
-
- BUG_ON(!pmu_reg); /* SW bug if called before initialized */
-
- spin_lock(&mrst_pmu_power_state_lock);
-
- if (pdev->d3_delay) {
- dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
- pdev->d3_delay);
- pdev->d3_delay = 0;
- }
- /*
- * If Lincroft graphics, simply remember state
- */
- if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
- && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
- if (pci_state == PCI_D0)
- graphics_is_off = 0;
- else
- graphics_is_off = 1;
- goto ret;
- }
-
- if (!mrst_dev->lss)
- goto ret; /* device with no LSS */
-
- if (mrst_dev->latest_request == pci_state)
- goto ret; /* no change */
-
- mrst_dev->latest_request = pci_state; /* record latest request */
-
- /*
- * LSS9 and LSS10 contain multiple PCI devices.
- * Use the lowest numbered (highest power) state in the LSS
- */
- if (mrst_dev->lss == 9)
- pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
- else if (mrst_dev->lss == 10)
- pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
-
- status = pmu_wait_ready();
- if (status)
- goto ret;
-
- old_sss = pmu_read_sss();
- new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
- new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
- mrst_dev->lss);
-
- if (new_sss == old_sss)
- goto ret; /* nothing to do */
-
- pmu_set_power_state_send_cmd++;
-
- status = pmu_issue_command(new_sss);
-
- if (unlikely(status != 0)) {
- dev_err(&pdev->dev, "Failed to Issue a PM command\n");
- goto ret;
- }
-
- if (pmu_wait_done())
- goto ret;
-
- lss_s0i3_enabled =
- ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
-ret:
- spin_unlock(&mrst_pmu_power_state_lock);
- return status;
-}
-
-#ifdef CONFIG_DEBUG_FS
-static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
-
-static inline const char *d0ix_name(int state)
-{
- return d0ix_names[(int) state];
-}
-
-static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
-{
- struct pci_dev *pdev = NULL;
- u32 cur_pmsss;
- int lss;
-
- seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
-
- cur_pmsss = pmu_read_sss();
-
- seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
-
- seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
- seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
-
- if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
- seq_printf(s, "cpu0 is only cpu online\n");
- else
- seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
-
- seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
-
-
- for_each_pci_dev(pdev) {
- int pos;
- u16 pmcsr;
- struct mrst_device *mrst_dev;
- int i;
-
- mrst_dev = pci_id_2_mrst_dev(pdev->device);
-
- seq_printf(s, "%s %04x/%04X %-16.16s ",
- dev_name(&pdev->dev),
- pdev->vendor, pdev->device,
- dev_driver_string(&pdev->dev));
-
- if (unlikely (!mrst_dev)) {
- seq_printf(s, " UNKNOWN\n");
- continue;
- }
-
- if (mrst_dev->lss)
- seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
- d0ix_name(((cur_pmsss >>
- (mrst_dev->lss * 2)) & 0x3)));
- else
- seq_printf(s, " ");
-
- /* PCI PM config space setting */
- pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
- if (pos != 0) {
- pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
- seq_printf(s, "PCI-%-4s",
- pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
- } else {
- seq_printf(s, " ");
- }
-
- seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
- for (i = 0; i <= PCI_D3cold; ++i)
- seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
-
- if (mrst_dev->lss) {
- unsigned int lssmask;
-
- lssmask = SSMSK(D0i3, mrst_dev->lss);
-
- if ((lssmask & S0I3_SSS_TARGET) &&
- ((lssmask & cur_pmsss) !=
- (lssmask & S0I3_SSS_TARGET)))
- seq_printf(s , "[BLOCKS s0i3]");
- }
-
- seq_printf(s, "\n");
- }
- seq_printf(s, "Wake Counters:\n");
- for (lss = 0; lss < MRST_NUM_LSS; ++lss)
- seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
-
- seq_printf(s, "Interrupt Counters:\n");
- seq_printf(s,
- "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
- "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n"
- "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
- "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n",
- pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
- pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
- pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
- pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
-
- seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n",
- pmu_wait_ready_calls);
- seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n",
- pmu_wait_ready_udelays);
- seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n",
- pmu_wait_ready_udelays_max);
- seq_printf(s, "mrst_pmu_wait_done_calls %8d\n",
- pmu_wait_done_calls);
- seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n",
- pmu_wait_done_udelays);
- seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n",
- pmu_wait_done_udelays_max);
- seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n",
- pmu_set_power_state_entry);
- seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n",
- pmu_set_power_state_send_cmd);
- seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
-
- return 0;
-}
-
-static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
-{
- return single_open(file, debug_mrst_pmu_show, NULL);
-}
-
-static const struct file_operations devices_state_operations = {
- .open = debug_mrst_pmu_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-#endif /* DEBUG_FS */
-
-/*
- * Validate SCU PCI shim PCI vendor capability byte
- * against LSS hard-coded in mrst_devs[] above.
- * DEBUG only.
- */
-static void pmu_scu_firmware_debug(void)
-{
- struct pci_dev *pdev = NULL;
-
- for_each_pci_dev(pdev) {
- struct mrst_device *mrst_dev;
- u8 pci_config_lss;
- int pos;
-
- mrst_dev = pci_id_2_mrst_dev(pdev->device);
- if (unlikely(!mrst_dev)) {
- printk(KERN_ERR FW_BUG "pmu: Unknown "
- "PCI device 0x%04X\n", pdev->device);
- continue;
- }
-
- if (mrst_dev->lss == 0)
- continue; /* no LSS in our table */
-
- pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
- if (!pos != 0) {
- printk(KERN_ERR FW_BUG "pmu: 0x%04X "
- "missing PCI Vendor Capability\n",
- pdev->device);
- continue;
- }
- pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
- if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
- printk(KERN_ERR FW_BUG "pmu: 0x%04X "
- "invalid PCI Vendor Capability 0x%x "
- " expected LSS 0x%X\n",
- pdev->device, pci_config_lss, mrst_dev->lss);
- continue;
- }
- pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
-
- if (mrst_dev->lss == pci_config_lss)
- continue;
-
- printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
- pdev->device, pci_config_lss, mrst_dev->lss);
- }
-}
-
-/**
- * pmu_probe
- */
-static int __devinit pmu_probe(struct pci_dev *pdev,
- const struct pci_device_id *pci_id)
-{
- int ret;
- struct mrst_pmu_reg *pmu;
-
- /* Init the device */
- ret = pci_enable_device(pdev);
- if (ret) {
- dev_err(&pdev->dev, "Unable to Enable PCI device\n");
- return ret;
- }
-
- ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
- if (ret < 0) {
- dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
- goto out_err1;
- }
-
- /* Map the memory of PMU reg base */
- pmu = pci_iomap(pdev, 0, 0);
- if (!pmu) {
- dev_err(&pdev->dev, "Unable to map the PMU address space\n");
- ret = -ENOMEM;
- goto out_err2;
- }
-
-#ifdef CONFIG_DEBUG_FS
- /* /sys/kernel/debug/mrst_pmu */
- (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
- NULL, NULL, &devices_state_operations);
-#endif
- pmu_reg = pmu; /* success */
-
- if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
- dev_err(&pdev->dev, "Registering isr has failed\n");
- ret = -1;
- goto out_err3;
- }
-
- pmu_scu_firmware_debug();
-
- pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */
-
- pmu_wait_ready();
-
- pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */
- pmu_write_cmd(0x201);
-
- spin_lock_init(&mrst_pmu_power_state_lock);
-
- /* Enable the hardware interrupt */
- pmu_irq_enable();
- return 0;
-
-out_err3:
- free_irq(pdev->irq, NULL);
- pci_iounmap(pdev, pmu_reg);
- pmu_reg = NULL;
-out_err2:
- pci_release_region(pdev, 0);
-out_err1:
- pci_disable_device(pdev);
- return ret;
-}
-
-static void __devexit pmu_remove(struct pci_dev *pdev)
-{
- dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
-
- /* Freeing up the irq */
- free_irq(pdev->irq, NULL);
-
- pci_iounmap(pdev, pmu_reg);
- pmu_reg = NULL;
-
- /* disable the current PCI device */
- pci_release_region(pdev, 0);
- pci_disable_device(pdev);
-}
-
-static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
- { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
- { }
-};
-
-MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
-
-static struct pci_driver driver = {
- .name = MRST_PMU_DRV_NAME,
- .id_table = pmu_pci_ids,
- .probe = pmu_probe,
- .remove = __devexit_p(pmu_remove),
-};
-
-/**
- * pmu_pci_register - register the PMU driver as PCI device
- */
-static int __init pmu_pci_register(void)
-{
- return pci_register_driver(&driver);
-}
-
-/* Register and probe via fs_initcall() to preceed device_initcall() */
-fs_initcall(pmu_pci_register);
-
-static void __exit mid_pci_cleanup(void)
-{
- pci_unregister_driver(&driver);
-}
-
-static int ia_major;
-static int ia_minor;
-
-static int pmu_sfi_parse_oem(struct sfi_table_header *table)
-{
- struct sfi_table_simple *sb;
-
- sb = (struct sfi_table_simple *)table;
- ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
- ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
- printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
- ia_major, ia_minor);
-
- return 0;
-}
-
-static int __init scu_fw_check(void)
-{
- int ret;
- u32 fw_version;
-
- if (!pmu_reg)
- return 0; /* this driver didn't probe-out */
-
- sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
-
- if (ia_major < 0x6005 || ia_minor < 0x1525) {
- WARN(1, "mrst_pmu: IA FW version too old\n");
- return -1;
- }
-
- ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
- &fw_version, 1);
-
- if (ret) {
- WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
- } else {
- int scu_major = (fw_version >> 8) & 0xFF;
- int scu_minor = (fw_version >> 0) & 0xFF;
-
- printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
-
- if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
- printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
- mrst_pmu_s0i3_enable = true;
- } else {
- WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
- scu_major, scu_minor);
- }
- }
- return 0;
-}
-late_initcall(scu_fw_check);
-module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
deleted file mode 100644
index bfbfe64..0000000
--- a/arch/x86/platform/mrst/pmu.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
- *
- * Copyright (c) 2011, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _MRST_PMU_H_
-#define _MRST_PMU_H_
-
-#define PCI_DEV_ID_MRST_PMU 0x0810
-#define MRST_PMU_DRV_NAME "mrst_pmu"
-#define PCI_SUB_CLASS_MASK 0xFF00
-
-#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F
-#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80
-
-#define SUB_SYS_ALL_D0I1 0x01155555
-#define S0I3_WAKE_SOURCES 0x00001FFF
-
-#define PM_S0I3_COMMAND \
- ((0 << 31) | /* Reserved */ \
- (0 << 30) | /* Core must be idle */ \
- (0xc2 << 22) | /* ACK C6 trigger */ \
- (3 << 19) | /* Trigger on DMI message */ \
- (3 << 16) | /* Enter S0i3 */ \
- (0 << 13) | /* Numeric mode ID (sw) */ \
- (3 << 9) | /* Trigger mode */ \
- (0 << 8) | /* Do not interrupt */ \
- (1 << 0)) /* Set configuration */
-
-#define LSS_DMI 0
-#define LSS_SD_HC0 1
-#define LSS_SD_HC1 2
-#define LSS_NAND 3
-#define LSS_IMAGING 4
-#define LSS_SECURITY 5
-#define LSS_DISPLAY 6
-#define LSS_USB_HC 7
-#define LSS_USB_OTG 8
-#define LSS_AUDIO 9
-#define LSS_AUDIO_LPE 9
-#define LSS_AUDIO_SSP 9
-#define LSS_I2C0 10
-#define LSS_I2C1 10
-#define LSS_I2C2 10
-#define LSS_KBD 10
-#define LSS_SPI0 10
-#define LSS_SPI1 10
-#define LSS_SPI2 10
-#define LSS_GPIO 10
-#define LSS_SRAM 11 /* used by SCU, do not touch */
-#define LSS_SD_HC2 12
-/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
-#define MRST_NUM_LSS 13
-
-#define MIN(a, b) (((a) < (b)) ? (a) : (b))
-
-#define SSMSK(mask, lss) ((mask) << ((lss) * 2))
-#define D0 0
-#define D0i1 1
-#define D0i2 2
-#define D0i3 3
-
-#define S0I3_SSS_TARGET ( \
- SSMSK(D0i1, LSS_DMI) | \
- SSMSK(D0i3, LSS_SD_HC0) | \
- SSMSK(D0i3, LSS_SD_HC1) | \
- SSMSK(D0i3, LSS_NAND) | \
- SSMSK(D0i3, LSS_SD_HC2) | \
- SSMSK(D0i3, LSS_IMAGING) | \
- SSMSK(D0i3, LSS_SECURITY) | \
- SSMSK(D0i3, LSS_DISPLAY) | \
- SSMSK(D0i3, LSS_USB_HC) | \
- SSMSK(D0i3, LSS_USB_OTG) | \
- SSMSK(D0i3, LSS_AUDIO) | \
- SSMSK(D0i1, LSS_I2C0))
-
-/*
- * D0i1 on Langwell is Autonomous Clock Gating (ACG).
- * Enable ACG on every LSS except camera and audio
- */
-#define D0I1_ACG_SSS_TARGET \
- (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
-
-enum cm_mode {
- CM_NOP, /* ignore the config mode value */
- CM_IMMEDIATE,
- CM_DELAY,
- CM_TRIGGER,
- CM_INVALID
-};
-
-enum sys_state {
- SYS_STATE_S0I0,
- SYS_STATE_S0I1,
- SYS_STATE_S0I2,
- SYS_STATE_S0I3,
- SYS_STATE_S3,
- SYS_STATE_S5
-};
-
-#define SET_CFG_CMD 1
-
-enum int_status {
- INT_SPURIOUS = 0,
- INT_CMD_DONE = 1,
- INT_CMD_ERR = 2,
- INT_WAKE_RX = 3,
- INT_SS_ERROR = 4,
- INT_S0IX_MISS = 5,
- INT_NO_ACKC6 = 6,
- INT_INVALID = 7,
-};
-
-/* PMU register interface */
-static struct mrst_pmu_reg {
- u32 pm_sts; /* 0x00 */
- u32 pm_cmd; /* 0x04 */
- u32 pm_ics; /* 0x08 */
- u32 _resv1; /* 0x0C */
- u32 pm_wkc[2]; /* 0x10 */
- u32 pm_wks[2]; /* 0x18 */
- u32 pm_ssc[4]; /* 0x20 */
- u32 pm_sss[4]; /* 0x30 */
- u32 pm_wssc[4]; /* 0x40 */
- u32 pm_c3c4; /* 0x50 */
- u32 pm_c5c6; /* 0x54 */
- u32 pm_msi_disable; /* 0x58 */
-} *pmu_reg;
-
-static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
-static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
-static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
-static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
-
-static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
-static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
-static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
-static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
-static inline void pmu_write_wssc(u32 arg)
- { writel(arg, &pmu_reg->pm_wssc[0]); }
-
-static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
-static inline u32 pmu_msi_is_disabled(void)
- { return readl(&pmu_reg->pm_msi_disable); }
-
-union pmu_pm_ics {
- struct {
- u32 cause:8;
- u32 enable:1;
- u32 pending:1;
- u32 reserved:22;
- } bits;
- u32 value;
-};
-
-static inline void pmu_irq_enable(void)
-{
- union pmu_pm_ics pmu_ics;
-
- pmu_ics.value = pmu_read_ics();
- pmu_ics.bits.enable = 1;
- pmu_write_ics(pmu_ics.value);
-}
-
-union pmu_pm_status {
- struct {
- u32 pmu_rev:8;
- u32 pmu_busy:1;
- u32 mode_id:4;
- u32 Reserved:19;
- } pmu_status_parts;
- u32 pmu_status_value;
-};
-
-static inline int pmu_read_busy_status(void)
-{
- union pmu_pm_status result;
-
- result.pmu_status_value = pmu_read_sts();
-
- return result.pmu_status_parts.pmu_busy;
-}
-
-/* pmu set config parameters */
-struct cfg_delay_param_t {
- u32 cmd:8;
- u32 ioc:1;
- u32 cfg_mode:4;
- u32 mode_id:3;
- u32 sys_state:3;
- u32 cfg_delay:8;
- u32 rsvd:5;
-};
-
-struct cfg_trig_param_t {
- u32 cmd:8;
- u32 ioc:1;
- u32 cfg_mode:4;
- u32 mode_id:3;
- u32 sys_state:3;
- u32 cfg_trig_type:3;
- u32 cfg_trig_val:8;
- u32 cmbi:1;
- u32 rsvd1:1;
-};
-
-union pmu_pm_set_cfg_cmd_t {
- union {
- struct cfg_delay_param_t d_param;
- struct cfg_trig_param_t t_param;
- } pmu2_params;
- u32 pmu_pm_set_cfg_cmd_value;
-};
-
-#ifdef FUTURE_PATCH
-extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
-#else
-static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
-#endif
-#endif
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 2b235b7..23e5b9d 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -23,7 +23,66 @@
#define XO15_SCI_CLASS DRV_NAME
#define XO15_SCI_DEVICE_NAME "OLPC XO-1.5 SCI"
-static unsigned long xo15_sci_gpe;
+static unsigned long xo15_sci_gpe;
+static bool lid_wake_on_close;
+
+/*
+ * The normal ACPI LID wakeup behavior is wake-on-open, but not
+ * wake-on-close. This is implemented as standard by the XO-1.5 DSDT.
+ *
+ * We provide here a sysfs attribute that will additionally enable
+ * wake-on-close behavior. This is useful (e.g.) when we oportunistically
+ * suspend with the display running; if the lid is then closed, we want to
+ * wake up to turn the display off.
+ *
+ * This is controlled through a custom method in the XO-1.5 DSDT.
+ */
+static int set_lid_wake_behavior(bool wake_on_close)
+{
+ struct acpi_object_list arg_list;
+ union acpi_object arg;
+ acpi_status status;
+
+ arg_list.count = 1;
+ arg_list.pointer = &arg;
+ arg.type = ACPI_TYPE_INTEGER;
+ arg.integer.value = wake_on_close;
+
+ status = acpi_evaluate_object(NULL, "\\_SB.PCI0.LID.LIDW", &arg_list, NULL);
+ if (ACPI_FAILURE(status)) {
+ pr_warning(PFX "failed to set lid behavior\n");
+ return 1;
+ }
+
+ lid_wake_on_close = wake_on_close;
+
+ return 0;
+}
+
+static ssize_t
+lid_wake_on_close_show(struct kobject *s, struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", lid_wake_on_close);
+}
+
+static ssize_t lid_wake_on_close_store(struct kobject *s,
+ struct kobj_attribute *attr,
+ const char *buf, size_t n)
+{
+ unsigned int val;
+
+ if (sscanf(buf, "%u", &val) != 1)
+ return -EINVAL;
+
+ set_lid_wake_behavior(!!val);
+
+ return n;
+}
+
+static struct kobj_attribute lid_wake_on_close_attr =
+ __ATTR(lid_wake_on_close, 0644,
+ lid_wake_on_close_show,
+ lid_wake_on_close_store);
static void battery_status_changed(void)
{
@@ -91,6 +150,7 @@ static int xo15_sci_add(struct acpi_device *device)
{
unsigned long long tmp;
acpi_status status;
+ int r;
if (!device)
return -EINVAL;
@@ -112,6 +172,10 @@ static int xo15_sci_add(struct acpi_device *device)
dev_info(&device->dev, "Initialized, GPE = 0x%lx\n", xo15_sci_gpe);
+ r = sysfs_create_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
+ if (r)
+ goto err_sysfs;
+
/* Flush queue, and enable all SCI events */
process_sci_queue();
olpc_ec_mask_write(EC_SCI_SRC_ALL);
@@ -123,6 +187,11 @@ static int xo15_sci_add(struct acpi_device *device)
device_init_wakeup(&device->dev, true);
return 0;
+
+err_sysfs:
+ acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
+ cancel_work_sync(&sci_work);
+ return r;
}
static int xo15_sci_remove(struct acpi_device *device, int type)
@@ -130,6 +199,7 @@ static int xo15_sci_remove(struct acpi_device *device, int type)
acpi_disable_gpe(NULL, xo15_sci_gpe);
acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler);
cancel_work_sync(&sci_work);
+ sysfs_remove_file(&device->dev.kobj, &lid_wake_on_close_attr.attr);
return 0;
}
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 7cce722..a4bee53 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -20,6 +20,8 @@
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/syscore_ops.h>
+#include <linux/debugfs.h>
+#include <linux/mutex.h>
#include <asm/geode.h>
#include <asm/setup.h>
@@ -31,6 +33,15 @@ EXPORT_SYMBOL_GPL(olpc_platform_info);
static DEFINE_SPINLOCK(ec_lock);
+/* debugfs interface to EC commands */
+#define EC_MAX_CMD_ARGS (5 + 1) /* cmd byte + 5 args */
+#define EC_MAX_CMD_REPLY (8)
+
+static struct dentry *ec_debugfs_dir;
+static DEFINE_MUTEX(ec_debugfs_cmd_lock);
+static unsigned char ec_debugfs_resp[EC_MAX_CMD_REPLY];
+static unsigned int ec_debugfs_resp_bytes;
+
/* EC event mask to be applied during suspend (defining wakeup sources). */
static u16 ec_wakeup_mask;
@@ -269,6 +280,91 @@ int olpc_ec_sci_query(u16 *sci_value)
}
EXPORT_SYMBOL_GPL(olpc_ec_sci_query);
+static ssize_t ec_debugfs_cmd_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ int i, m;
+ unsigned char ec_cmd[EC_MAX_CMD_ARGS];
+ unsigned int ec_cmd_int[EC_MAX_CMD_ARGS];
+ char cmdbuf[64];
+ int ec_cmd_bytes;
+
+ mutex_lock(&ec_debugfs_cmd_lock);
+
+ size = simple_write_to_buffer(cmdbuf, sizeof(cmdbuf), ppos, buf, size);
+
+ m = sscanf(cmdbuf, "%x:%u %x %x %x %x %x", &ec_cmd_int[0],
+ &ec_debugfs_resp_bytes,
+ &ec_cmd_int[1], &ec_cmd_int[2], &ec_cmd_int[3],
+ &ec_cmd_int[4], &ec_cmd_int[5]);
+ if (m < 2 || ec_debugfs_resp_bytes > EC_MAX_CMD_REPLY) {
+ /* reset to prevent overflow on read */
+ ec_debugfs_resp_bytes = 0;
+
+ printk(KERN_DEBUG "olpc-ec: bad ec cmd: "
+ "cmd:response-count [arg1 [arg2 ...]]\n");
+ size = -EINVAL;
+ goto out;
+ }
+
+ /* convert scanf'd ints to char */
+ ec_cmd_bytes = m - 2;
+ for (i = 0; i <= ec_cmd_bytes; i++)
+ ec_cmd[i] = ec_cmd_int[i];
+
+ printk(KERN_DEBUG "olpc-ec: debugfs cmd 0x%02x with %d args "
+ "%02x %02x %02x %02x %02x, want %d returns\n",
+ ec_cmd[0], ec_cmd_bytes, ec_cmd[1], ec_cmd[2], ec_cmd[3],
+ ec_cmd[4], ec_cmd[5], ec_debugfs_resp_bytes);
+
+ olpc_ec_cmd(ec_cmd[0], (ec_cmd_bytes == 0) ? NULL : &ec_cmd[1],
+ ec_cmd_bytes, ec_debugfs_resp, ec_debugfs_resp_bytes);
+
+ printk(KERN_DEBUG "olpc-ec: response "
+ "%02x %02x %02x %02x %02x %02x %02x %02x (%d bytes expected)\n",
+ ec_debugfs_resp[0], ec_debugfs_resp[1], ec_debugfs_resp[2],
+ ec_debugfs_resp[3], ec_debugfs_resp[4], ec_debugfs_resp[5],
+ ec_debugfs_resp[6], ec_debugfs_resp[7], ec_debugfs_resp_bytes);
+
+out:
+ mutex_unlock(&ec_debugfs_cmd_lock);
+ return size;
+}
+
+static ssize_t ec_debugfs_cmd_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ unsigned int i, r;
+ char *rp;
+ char respbuf[64];
+
+ mutex_lock(&ec_debugfs_cmd_lock);
+ rp = respbuf;
+ rp += sprintf(rp, "%02x", ec_debugfs_resp[0]);
+ for (i = 1; i < ec_debugfs_resp_bytes; i++)
+ rp += sprintf(rp, ", %02x", ec_debugfs_resp[i]);
+ mutex_unlock(&ec_debugfs_cmd_lock);
+ rp += sprintf(rp, "\n");
+
+ r = rp - respbuf;
+ return simple_read_from_buffer(buf, size, ppos, respbuf, r);
+}
+
+static const struct file_operations ec_debugfs_genops = {
+ .write = ec_debugfs_cmd_write,
+ .read = ec_debugfs_cmd_read,
+};
+
+static void setup_debugfs(void)
+{
+ ec_debugfs_dir = debugfs_create_dir("olpc-ec", 0);
+ if (ec_debugfs_dir == ERR_PTR(-ENODEV))
+ return;
+
+ debugfs_create_file("cmd", 0600, ec_debugfs_dir, NULL,
+ &ec_debugfs_genops);
+}
+
static int olpc_ec_suspend(void)
{
return olpc_ec_mask_write(ec_wakeup_mask);
@@ -372,6 +468,7 @@ static int __init olpc_init(void)
}
register_syscore_ops(&olpc_syscore_ops);
+ setup_debugfs();
return 0;
}
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 7e004ac..7a9ad30 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -17,8 +17,6 @@
/* Verify that the configuration block really is there */
#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))
-#define NAME "scx200"
-
MODULE_AUTHOR("Christer Weinigel <wingel@nano-system.com>");
MODULE_DESCRIPTION("NatSemi SCx200 Driver");
MODULE_LICENSE("GPL");
@@ -29,10 +27,10 @@ unsigned long scx200_gpio_shadow[2];
unsigned scx200_cb_base = 0;
static struct pci_device_id scx200_tbl[] = {
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
- { PCI_DEVICE(PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
+ { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_BRIDGE) },
+ { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE) },
+ { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_XBUS) },
+ { PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SC1100_XBUS) },
{ },
};
MODULE_DEVICE_TABLE(pci,scx200_tbl);
@@ -63,10 +61,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
if (pdev->device == PCI_DEVICE_ID_NS_SCx200_BRIDGE ||
pdev->device == PCI_DEVICE_ID_NS_SC1100_BRIDGE) {
base = pci_resource_start(pdev, 0);
- printk(KERN_INFO NAME ": GPIO base 0x%x\n", base);
+ pr_info("GPIO base 0x%x\n", base);
- if (!request_region(base, SCx200_GPIO_SIZE, "NatSemi SCx200 GPIO")) {
- printk(KERN_ERR NAME ": can't allocate I/O for GPIOs\n");
+ if (!request_region(base, SCx200_GPIO_SIZE,
+ "NatSemi SCx200 GPIO")) {
+ pr_err("can't allocate I/O for GPIOs\n");
return -EBUSY;
}
@@ -82,11 +81,11 @@ static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_
if (scx200_cb_probe(base)) {
scx200_cb_base = base;
} else {
- printk(KERN_WARNING NAME ": Configuration Block not found\n");
+ pr_warn("Configuration Block not found\n");
return -ENODEV;
}
}
- printk(KERN_INFO NAME ": Configuration Block base 0x%x\n", scx200_cb_base);
+ pr_info("Configuration Block base 0x%x\n", scx200_cb_base);
}
return 0;
@@ -111,8 +110,7 @@ u32 scx200_gpio_configure(unsigned index, u32 mask, u32 bits)
static int __init scx200_init(void)
{
- printk(KERN_INFO NAME ": NatSemi SCx200 Driver\n");
-
+ pr_info("NatSemi SCx200 Driver\n");
return pci_register_driver(&scx200_pci_driver);
}
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c
index 9f29a01..5032e0d 100644
--- a/arch/x86/platform/uv/uv_time.c
+++ b/arch/x86/platform/uv/uv_time.c
@@ -37,7 +37,7 @@ static void uv_rtc_timer_setup(enum clock_event_mode,
static struct clocksource clocksource_uv = {
.name = RTC_NAME,
- .rating = 400,
+ .rating = 299,
.read = uv_read_rtc,
.mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
@@ -379,10 +379,6 @@ static __init int uv_rtc_setup_clock(void)
if (!is_uv_system())
return -ENODEV;
- /* If single blade, prefer tsc */
- if (uv_num_possible_blades() == 1)
- clocksource_uv.rating = 250;
-
rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second);
if (rc)
printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index f10c0af..218cdb1 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -20,6 +20,7 @@
#include <asm/xcr.h>
#include <asm/suspend.h>
#include <asm/debugreg.h>
+#include <asm/fpu-internal.h> /* pcntxt_mask */
#ifdef CONFIG_X86_32
static struct saved_context saved_context;
@@ -114,7 +115,7 @@ static void __save_processor_state(struct saved_context *ctxt)
void save_processor_state(void)
{
__save_processor_state(&saved_context);
- save_sched_clock_state();
+ x86_platform.save_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(save_processor_state);
@@ -224,6 +225,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
fix_processor_context();
do_fpu_end();
+ x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
}
@@ -231,7 +233,6 @@ static void __restore_processor_state(struct saved_context *ctxt)
void restore_processor_state(void)
{
__restore_processor_state(&saved_context);
- restore_sched_clock_state();
}
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 3769079..74202c1 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -10,7 +10,6 @@
#include <linux/suspend.h>
#include <linux/bootmem.h>
-#include <asm/system.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mmzone.h>
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 031cef8..29f9f05 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -288,7 +288,7 @@
279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
-282 i386 mq_getsetaddr sys_mq_getsetattr compat_sys_mq_getsetattr
+282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
284 i386 waitid sys_waitid compat_sys_waitid
# 285 sys_setaltroot
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index b2b54d2..9926e11 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -15,8 +15,8 @@ config UML_X86
select GENERIC_FIND_FIRST_BIT
config 64BIT
- bool
- default SUBARCH = "x86_64"
+ bool "64-bit kernel" if SUBARCH = "x86"
+ default SUBARCH != "i386"
config X86_32
def_bool !64BIT
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 2c32df6..04f82e0 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -17,6 +17,16 @@
#define ARCH_IS_STACKGROW(address) \
(address + 65536 + 32 * sizeof(unsigned long) >= UPT_SP(&current->thread.regs.regs))
+#include <asm/user.h>
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+ __asm__ __volatile__("rep;nop": : :"memory");
+}
+
+#define cpu_relax() rep_nop()
+
#include <asm/processor-generic.h>
#endif
diff --git a/arch/x86/um/asm/processor_32.h b/arch/x86/um/asm/processor_32.h
index 018f732..6c6689e 100644
--- a/arch/x86/um/asm/processor_32.h
+++ b/arch/x86/um/asm/processor_32.h
@@ -45,16 +45,6 @@ static inline void arch_copy_thread(struct arch_thread *from,
memcpy(&to->tls_array, &from->tls_array, sizeof(from->tls_array));
}
-#include <asm/user.h>
-
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
- __asm__ __volatile__("rep;nop": : :"memory");
-}
-
-#define cpu_relax() rep_nop()
-
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter"). Stolen
diff --git a/arch/x86/um/asm/processor_64.h b/arch/x86/um/asm/processor_64.h
index 61de92d..4b02a84 100644
--- a/arch/x86/um/asm/processor_64.h
+++ b/arch/x86/um/asm/processor_64.h
@@ -14,14 +14,6 @@ struct arch_thread {
struct faultinfo faultinfo;
};
-/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
-static inline void rep_nop(void)
-{
- __asm__ __volatile__("rep;nop": : :"memory");
-}
-
-#define cpu_relax() rep_nop()
-
#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \
.debugregs_seq = 0, \
.fs = 0, \
@@ -37,8 +29,6 @@ static inline void arch_copy_thread(struct arch_thread *from,
to->fs = from->fs;
}
-#include <asm/user.h>
-
#define current_text_addr() \
({ void *pc; __asm__("movq $1f,%0\n1:":"=g" (pc)); pc; })
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c
index a1fba5f..17d88cf 100644
--- a/arch/x86/um/bugs_32.c
+++ b/arch/x86/um/bugs_32.c
@@ -13,8 +13,6 @@
static int host_has_cmov = 1;
static jmp_buf cmov_test_return;
-#define TASK_PID(task) *((int *) &(((char *) (task))[HOST_TASK_PID]))
-
static void cmov_sigill_test_handler(int sig)
{
host_has_cmov = 0;
@@ -51,7 +49,7 @@ void arch_examine_signal(int sig, struct uml_pt_regs *regs)
* This is testing for a cmov (0x0f 0x4x) instruction causing a
* SIGILL in init.
*/
- if ((sig != SIGILL) || (TASK_PID(get_current()) != 1))
+ if ((sig != SIGILL) || (get_current_pid() != 1))
return;
if (copy_from_user_proc(tmp, (void *) UPT_IP(regs), 2)) {
diff --git a/arch/x86/um/mem_32.c b/arch/x86/um/mem_32.c
index 639900a..f40281e 100644
--- a/arch/x86/um/mem_32.c
+++ b/arch/x86/um/mem_32.c
@@ -23,14 +23,6 @@ static int __init gate_vma_init(void)
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
gate_vma.vm_page_prot = __P101;
- /*
- * Make sure the vDSO gets into every core dump.
- * Dumping its contents makes post-mortem fully interpretable later
- * without matching up the same kernel and hardware config to see
- * what PC values meant.
- */
- gate_vma.vm_flags |= VM_ALWAYSDUMP;
-
return 0;
}
__initcall(gate_vma_init);
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index 91f4ec9..af91901 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -64,8 +64,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdsop);
up_write(&mm->mmap_sem);
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6bc0e72..885eff4 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -70,100 +70,98 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
return ret;
}
+notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
+{
+ long ret;
+
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ return ret;
+}
+
+
notrace static inline long vgetns(void)
{
long v;
cycles_t cycles;
if (gtod->clock.vclock_mode == VCLOCK_TSC)
cycles = vread_tsc();
- else
+ else if (gtod->clock.vclock_mode == VCLOCK_HPET)
cycles = vread_hpet();
+ else
+ return 0;
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
return (v * gtod->clock.mult) >> gtod->clock.shift;
}
-notrace static noinline int do_realtime(struct timespec *ts)
+/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
+notrace static int __always_inline do_realtime(struct timespec *ts)
{
unsigned long seq, ns;
+ int mode;
+
do {
- seq = read_seqbegin(&gtod->lock);
+ seq = read_seqcount_begin(&gtod->seq);
+ mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
ts->tv_nsec = gtod->wall_time_nsec;
ns = vgetns();
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+
timespec_add_ns(ts, ns);
- return 0;
+ return mode;
}
-notrace static noinline int do_monotonic(struct timespec *ts)
+notrace static int do_monotonic(struct timespec *ts)
{
- unsigned long seq, ns, secs;
+ unsigned long seq, ns;
+ int mode;
+
do {
- seq = read_seqbegin(&gtod->lock);
- secs = gtod->wall_time_sec;
- ns = gtod->wall_time_nsec + vgetns();
- secs += gtod->wall_to_monotonic.tv_sec;
- ns += gtod->wall_to_monotonic.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
-
- /* wall_time_nsec, vgetns(), and wall_to_monotonic.tv_nsec
- * are all guaranteed to be nonnegative.
- */
- while (ns >= NSEC_PER_SEC) {
- ns -= NSEC_PER_SEC;
- ++secs;
- }
- ts->tv_sec = secs;
- ts->tv_nsec = ns;
+ seq = read_seqcount_begin(&gtod->seq);
+ mode = gtod->clock.vclock_mode;
+ ts->tv_sec = gtod->monotonic_time_sec;
+ ts->tv_nsec = gtod->monotonic_time_nsec;
+ ns = vgetns();
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+ timespec_add_ns(ts, ns);
- return 0;
+ return mode;
}
-notrace static noinline int do_realtime_coarse(struct timespec *ts)
+notrace static int do_realtime_coarse(struct timespec *ts)
{
unsigned long seq;
do {
- seq = read_seqbegin(&gtod->lock);
+ seq = read_seqcount_begin(&gtod->seq);
ts->tv_sec = gtod->wall_time_coarse.tv_sec;
ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
return 0;
}
-notrace static noinline int do_monotonic_coarse(struct timespec *ts)
+notrace static int do_monotonic_coarse(struct timespec *ts)
{
- unsigned long seq, ns, secs;
+ unsigned long seq;
do {
- seq = read_seqbegin(&gtod->lock);
- secs = gtod->wall_time_coarse.tv_sec;
- ns = gtod->wall_time_coarse.tv_nsec;
- secs += gtod->wall_to_monotonic.tv_sec;
- ns += gtod->wall_to_monotonic.tv_nsec;
- } while (unlikely(read_seqretry(&gtod->lock, seq)));
-
- /* wall_time_nsec and wall_to_monotonic.tv_nsec are
- * guaranteed to be between 0 and NSEC_PER_SEC.
- */
- if (ns >= NSEC_PER_SEC) {
- ns -= NSEC_PER_SEC;
- ++secs;
- }
- ts->tv_sec = secs;
- ts->tv_nsec = ns;
+ seq = read_seqcount_begin(&gtod->seq);
+ ts->tv_sec = gtod->monotonic_time_coarse.tv_sec;
+ ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec;
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
return 0;
}
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
+ int ret = VCLOCK_NONE;
+
switch (clock) {
case CLOCK_REALTIME:
- if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
- return do_realtime(ts);
+ ret = do_realtime(ts);
break;
case CLOCK_MONOTONIC:
- if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
- return do_monotonic(ts);
+ ret = do_monotonic(ts);
break;
case CLOCK_REALTIME_COARSE:
return do_realtime_coarse(ts);
@@ -171,32 +169,33 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
return do_monotonic_coarse(ts);
}
- return vdso_fallback_gettime(clock, ts);
+ if (ret == VCLOCK_NONE)
+ return vdso_fallback_gettime(clock, ts);
+ return 0;
}
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
- long ret;
- if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) {
- if (likely(tv != NULL)) {
- BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
- offsetof(struct timespec, tv_nsec) ||
- sizeof(*tv) != sizeof(struct timespec));
- do_realtime((struct timespec *)tv);
- tv->tv_usec /= 1000;
- }
- if (unlikely(tz != NULL)) {
- /* Avoid memcpy. Some old compilers fail to inline it */
- tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
- tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
- }
- return 0;
+ long ret = VCLOCK_NONE;
+
+ if (likely(tv != NULL)) {
+ BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
+ offsetof(struct timespec, tv_nsec) ||
+ sizeof(*tv) != sizeof(struct timespec));
+ ret = do_realtime((struct timespec *)tv);
+ tv->tv_usec /= 1000;
}
- asm("syscall" : "=a" (ret) :
- "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
- return ret;
+ if (unlikely(tz != NULL)) {
+ /* Avoid memcpy. Some old compilers fail to inline it */
+ tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest;
+ tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
+ }
+
+ if (ret == VCLOCK_NONE)
+ return vdso_fallback_gtod(tv, tz);
+ return 0;
}
int gettimeofday(struct timeval *, struct timezone *)
__attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 10f9f59..66e6d93 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -250,13 +250,7 @@ static int __init gate_vma_init(void)
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
gate_vma.vm_page_prot = __P101;
- /*
- * Make sure the vDSO gets into every core dump.
- * Dumping its contents makes post-mortem fully interpretable later
- * without matching up the same kernel and hardware config to see
- * what PC values meant.
- */
- gate_vma.vm_flags |= VM_ALWAYSDUMP;
+
return 0;
}
@@ -348,17 +342,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (compat_uses_vma || !compat) {
/*
* MAYWRITE to allow gdb to COW and set breakpoints
- *
- * Make sure the vDSO gets into every core dump.
- * Dumping its contents makes post-mortem fully
- * interpretable later without matching up the same
- * kernel and hardware config to see what PC values
- * meant.
*/
ret = install_special_mapping(mm, addr, PAGE_SIZE,
VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso32_pages);
if (ret)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index d7dce1d..00aaf04 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -172,8 +172,7 @@ static int setup_additional_pages(struct linux_binprm *bprm,
ret = install_special_mapping(mm, addr, size,
VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
- VM_ALWAYSDUMP,
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
pages);
if (ret) {
current->mm->context.vdso = NULL;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 4172af8..4f51beb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -62,6 +62,15 @@
#include <asm/reboot.h>
#include <asm/stackprotector.h>
#include <asm/hypervisor.h>
+#include <asm/mwait.h>
+
+#ifdef CONFIG_ACPI
+#include <linux/acpi.h>
+#include <asm/acpi.h>
+#include <acpi/pdc_intel.h>
+#include <acpi/processor.h>
+#include <xen/interface/platform.h>
+#endif
#include "xen-ops.h"
#include "mmu.h"
@@ -200,13 +209,17 @@ static void __init xen_banner(void)
static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
+static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
+static __read_mostly unsigned int cpuid_leaf5_ecx_val;
+static __read_mostly unsigned int cpuid_leaf5_edx_val;
+
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
unsigned maskebx = ~0;
unsigned maskecx = ~0;
unsigned maskedx = ~0;
-
+ unsigned setecx = 0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
@@ -214,9 +227,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
switch (*ax) {
case 1:
maskecx = cpuid_leaf1_ecx_mask;
+ setecx = cpuid_leaf1_ecx_set_mask;
maskedx = cpuid_leaf1_edx_mask;
break;
+ case CPUID_MWAIT_LEAF:
+ /* Synthesize the values.. */
+ *ax = 0;
+ *bx = 0;
+ *cx = cpuid_leaf5_ecx_val;
+ *dx = cpuid_leaf5_edx_val;
+ return;
+
case 0xb:
/* Suppress extended topology stuff */
maskebx = 0;
@@ -232,9 +254,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
*bx &= maskebx;
*cx &= maskecx;
+ *cx |= setecx;
*dx &= maskedx;
+
}
+static bool __init xen_check_mwait(void)
+{
+#ifdef CONFIG_ACPI
+ struct xen_platform_op op = {
+ .cmd = XENPF_set_processor_pminfo,
+ .u.set_pminfo.id = -1,
+ .u.set_pminfo.type = XEN_PM_PDC,
+ };
+ uint32_t buf[3];
+ unsigned int ax, bx, cx, dx;
+ unsigned int mwait_mask;
+
+ /* We need to determine whether it is OK to expose the MWAIT
+ * capability to the kernel to harvest deeper than C3 states from ACPI
+ * _CST using the processor_harvest_xen.c module. For this to work, we
+ * need to gather the MWAIT_LEAF values (which the cstate.c code
+ * checks against). The hypervisor won't expose the MWAIT flag because
+ * it would break backwards compatibility; so we will find out directly
+ * from the hardware and hypercall.
+ */
+ if (!xen_initial_domain())
+ return false;
+
+ ax = 1;
+ cx = 0;
+
+ native_cpuid(&ax, &bx, &cx, &dx);
+
+ mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
+ (1 << (X86_FEATURE_MWAIT % 32));
+
+ if ((cx & mwait_mask) != mwait_mask)
+ return false;
+
+ /* We need to emulate the MWAIT_LEAF and for that we need both
+ * ecx and edx. The hypercall provides only partial information.
+ */
+
+ ax = CPUID_MWAIT_LEAF;
+ bx = 0;
+ cx = 0;
+ dx = 0;
+
+ native_cpuid(&ax, &bx, &cx, &dx);
+
+ /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
+ * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
+ */
+ buf[0] = ACPI_PDC_REVISION_ID;
+ buf[1] = 1;
+ buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
+
+ set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
+
+ if ((HYPERVISOR_dom0_op(&op) == 0) &&
+ (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
+ cpuid_leaf5_ecx_val = cx;
+ cpuid_leaf5_edx_val = dx;
+ }
+ return true;
+#else
+ return false;
+#endif
+}
static void __init xen_init_cpuid_mask(void)
{
unsigned int ax, bx, cx, dx;
@@ -261,6 +349,9 @@ static void __init xen_init_cpuid_mask(void)
/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
if ((cx & xsave_mask) != xsave_mask)
cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
+
+ if (xen_check_mwait())
+ cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
}
static void xen_set_debugreg(int reg, unsigned long val)
@@ -777,11 +868,11 @@ static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
static unsigned long xen_read_cr0(void)
{
- unsigned long cr0 = percpu_read(xen_cr0_value);
+ unsigned long cr0 = this_cpu_read(xen_cr0_value);
if (unlikely(cr0 == 0)) {
cr0 = native_read_cr0();
- percpu_write(xen_cr0_value, cr0);
+ this_cpu_write(xen_cr0_value, cr0);
}
return cr0;
@@ -791,7 +882,7 @@ static void xen_write_cr0(unsigned long cr0)
{
struct multicall_space mcs;
- percpu_write(xen_cr0_value, cr0);
+ this_cpu_write(xen_cr0_value, cr0);
/* Only pay attention to cr0.TS; everything else is
ignored. */
@@ -876,7 +967,7 @@ void xen_setup_shared_info(void)
xen_setup_mfn_list_list();
}
-/* This is called once we have the cpu_possible_map */
+/* This is called once we have the cpu_possible_mask */
void xen_setup_vcpu_info_placement(void)
{
int cpu;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 8bbb465..1573376 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -26,7 +26,7 @@ static unsigned long xen_save_fl(void)
struct vcpu_info *vcpu;
unsigned long flags;
- vcpu = percpu_read(xen_vcpu);
+ vcpu = this_cpu_read(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
@@ -50,7 +50,7 @@ static void xen_restore_fl(unsigned long flags)
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
- vcpu = percpu_read(xen_vcpu);
+ vcpu = this_cpu_read(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
preempt_enable_no_resched();
@@ -72,7 +72,7 @@ static void xen_irq_disable(void)
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
- percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
+ this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
@@ -86,7 +86,7 @@ static void xen_irq_enable(void)
the caller is confused and is trying to re-enable interrupts
on an indeterminate processor. */
- vcpu = percpu_read(xen_vcpu);
+ vcpu = this_cpu_read(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 95c1cf6..b8e2794 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1071,14 +1071,14 @@ static void drop_other_mm_ref(void *info)
struct mm_struct *mm = info;
struct mm_struct *active_mm;
- active_mm = percpu_read(cpu_tlbstate.active_mm);
+ active_mm = this_cpu_read(cpu_tlbstate.active_mm);
- if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
+ if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
leave_mm(smp_processor_id());
/* If this cpu still has a stale cr3 reference, then make sure
it has been flushed. */
- if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
+ if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
load_cr3(swapper_pg_dir);
}
@@ -1185,17 +1185,17 @@ static void __init xen_pagetable_setup_done(pgd_t *base)
static void xen_write_cr2(unsigned long cr2)
{
- percpu_read(xen_vcpu)->arch.cr2 = cr2;
+ this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
}
static unsigned long xen_read_cr2(void)
{
- return percpu_read(xen_vcpu)->arch.cr2;
+ return this_cpu_read(xen_vcpu)->arch.cr2;
}
unsigned long xen_read_cr2_direct(void)
{
- return percpu_read(xen_vcpu_info.arch.cr2);
+ return this_cpu_read(xen_vcpu_info.arch.cr2);
}
static void xen_flush_tlb(void)
@@ -1278,12 +1278,12 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
static unsigned long xen_read_cr3(void)
{
- return percpu_read(xen_cr3);
+ return this_cpu_read(xen_cr3);
}
static void set_current_cr3(void *v)
{
- percpu_write(xen_current_cr3, (unsigned long)v);
+ this_cpu_write(xen_current_cr3, (unsigned long)v);
}
static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -1306,7 +1306,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
xen_extend_mmuext_op(&op);
if (kernel) {
- percpu_write(xen_cr3, cr3);
+ this_cpu_write(xen_cr3, cr3);
/* Update xen_current_cr3 once the batch has actually
been submitted. */
@@ -1322,7 +1322,7 @@ static void xen_write_cr3(unsigned long cr3)
/* Update while interrupts are disabled, so its atomic with
respect to ipis */
- percpu_write(xen_cr3, cr3);
+ this_cpu_write(xen_cr3, cr3);
__xen_write_cr3(true, cr3);
@@ -1859,6 +1859,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
#endif /* CONFIG_X86_64 */
static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
+static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss;
static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
@@ -1899,7 +1900,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
* We just don't map the IO APIC - all access is via
* hypercalls. Keep the address in the pte for reference.
*/
- pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
+ pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL);
break;
#endif
@@ -2064,6 +2065,7 @@ void __init xen_init_mmu_ops(void)
pv_mmu_ops = xen_mmu_ops;
memset(dummy_mapping, 0xff, PAGE_SIZE);
+ memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE);
}
/* Protected by xen_reservation_lock. */
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index dee79b7..9c2e74f 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -47,7 +47,7 @@ static inline void xen_mc_issue(unsigned mode)
xen_mc_flush();
/* restore flags saved in xen_mc_batch */
- local_irq_restore(percpu_read(xen_mc_irq_flags));
+ local_irq_restore(this_cpu_read(xen_mc_irq_flags));
}
/* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index b480d42..967633a 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -12,8 +12,8 @@ int xen_swiotlb __read_mostly;
static struct dma_map_ops xen_swiotlb_dma_ops = {
.mapping_error = xen_swiotlb_dma_mapping_error,
- .alloc_coherent = xen_swiotlb_alloc_coherent,
- .free_coherent = xen_swiotlb_free_coherent,
+ .alloc = xen_swiotlb_alloc_coherent,
+ .free = xen_swiotlb_free_coherent,
.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
.sync_single_for_device = xen_swiotlb_sync_single_for_device,
.sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e03c636..1ba8dff 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -10,6 +10,7 @@
#include <linux/pm.h>
#include <linux/memblock.h>
#include <linux/cpuidle.h>
+#include <linux/cpufreq.h>
#include <asm/elf.h>
#include <asm/vdso.h>
@@ -420,7 +421,7 @@ void __init xen_arch_setup(void)
boot_cpu_data.hlt_works_ok = 1;
#endif
disable_cpuidle();
- boot_option_idle_override = IDLE_HALT;
+ disable_cpufreq();
WARN_ON(set_pm_idle_to_default());
fiddle_vdso();
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 501d4e0..5fac691 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -59,7 +59,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
static void __cpuinit cpu_bringup(void)
{
- int cpu = smp_processor_id();
+ int cpu;
cpu_init();
touch_softlockup_watchdog();
@@ -75,8 +75,14 @@ static void __cpuinit cpu_bringup(void)
xen_setup_cpu_clockevents();
+ notify_cpu_starting(cpu);
+
+ ipi_call_lock();
set_cpu_online(cpu, true);
- percpu_write(cpu_state, CPU_ONLINE);
+ ipi_call_unlock();
+
+ this_cpu_write(cpu_state, CPU_ONLINE);
+
wmb();
/* We can take interrupts now: we're officially "up". */
OpenPOWER on IntegriCloud