summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig67
-rw-r--r--arch/x86/Kconfig.cpu13
-rw-r--r--arch/x86/Makefile23
-rw-r--r--arch/x86/boot/compressed/Makefile4
-rw-r--r--arch/x86/boot/compressed/eboot.c7
-rw-r--r--arch/x86/boot/compressed/head_64.S168
-rw-r--r--arch/x86/boot/compressed/kaslr.c14
-rw-r--r--arch/x86/boot/compressed/kaslr_64.c (renamed from arch/x86/boot/compressed/pagetable.c)14
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S17
-rw-r--r--arch/x86/boot/compressed/misc.c26
-rw-r--r--arch/x86/boot/compressed/misc.h7
-rw-r--r--arch/x86/boot/compressed/pgtable.h20
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c148
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S1404
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c230
-rw-r--r--arch/x86/crypto/blowfish_glue.c230
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c491
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c495
-rw-r--r--arch/x86/crypto/camellia_glue.c356
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c352
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c489
-rw-r--r--arch/x86/crypto/des3_ede_glue.c238
-rw-r--r--arch/x86/crypto/glue_helper.c391
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c478
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c518
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c519
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb.c28
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_ctx.h8
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb.c27
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_ctx.h8
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb.c30
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_ctx.h8
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c493
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c339
-rw-r--r--arch/x86/entry/calling.h34
-rw-r--r--arch/x86/entry/entry_32.S6
-rw-r--r--arch/x86/entry/entry_64.S165
-rw-r--r--arch/x86/entry/entry_64_compat.S83
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl40
-rw-r--r--arch/x86/entry/vdso/vdso32/vclock_gettime.c2
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c18
-rw-r--r--arch/x86/events/core.c28
-rw-r--r--arch/x86/events/intel/core.c35
-rw-r--r--arch/x86/events/intel/cstate.c44
-rw-r--r--arch/x86/events/intel/ds.c132
-rw-r--r--arch/x86/events/intel/pt.c13
-rw-r--r--arch/x86/events/intel/rapl.c2
-rw-r--r--arch/x86/events/intel/uncore.c2
-rw-r--r--arch/x86/events/intel/uncore_snbep.c34
-rw-r--r--arch/x86/events/msr.c3
-rw-r--r--arch/x86/events/perf_event.h11
-rw-r--r--arch/x86/hyperv/hv_init.c45
-rw-r--r--arch/x86/ia32/ia32_signal.c1
-rw-r--r--arch/x86/ia32/sys_ia32.c118
-rw-r--r--arch/x86/include/asm/acpi.h11
-rw-r--r--arch/x86/include/asm/alternative.h4
-rw-r--r--arch/x86/include/asm/apic.h18
-rw-r--r--arch/x86/include/asm/apm.h6
-rw-r--r--arch/x86/include/asm/asm-prototypes.h3
-rw-r--r--arch/x86/include/asm/atomic.h106
-rw-r--r--arch/x86/include/asm/atomic64_32.h108
-rw-r--r--arch/x86/include/asm/atomic64_64.h108
-rw-r--r--arch/x86/include/asm/barrier.h30
-rw-r--r--arch/x86/include/asm/bitops.h29
-rw-r--r--arch/x86/include/asm/cmpxchg.h12
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h8
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h8
-rw-r--r--arch/x86/include/asm/cpufeatures.h3
-rw-r--r--arch/x86/include/asm/crypto/camellia.h16
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h75
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h17
-rw-r--r--arch/x86/include/asm/crypto/twofish.h19
-rw-r--r--arch/x86/include/asm/device.h3
-rw-r--r--arch/x86/include/asm/dma-direct.h25
-rw-r--r--arch/x86/include/asm/dma-mapping.h33
-rw-r--r--arch/x86/include/asm/efi.h43
-rw-r--r--arch/x86/include/asm/hardirq.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h1
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h (renamed from arch/x86/include/uapi/asm/hyperv.h)320
-rw-r--r--arch/x86/include/asm/intel_pconfig.h65
-rw-r--r--arch/x86/include/asm/io.h15
-rw-r--r--arch/x86/include/asm/io_apic.h10
-rw-r--r--arch/x86/include/asm/iommu.h3
-rw-r--r--arch/x86/include/asm/irq_vectors.h3
-rw-r--r--arch/x86/include/asm/jailhouse_para.h2
-rw-r--r--arch/x86/include/asm/kaslr.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h60
-rw-r--r--arch/x86/include/asm/kvm_para.h6
-rw-r--r--arch/x86/include/asm/mce.h53
-rw-r--r--arch/x86/include/asm/mem_encrypt.h3
-rw-r--r--arch/x86/include/asm/microcode.h24
-rw-r--r--arch/x86/include/asm/mmu_context.h6
-rw-r--r--arch/x86/include/asm/mshyperv.h107
-rw-r--r--arch/x86/include/asm/msr-index.h14
-rw-r--r--arch/x86/include/asm/msr.h17
-rw-r--r--arch/x86/include/asm/nospec-branch.h180
-rw-r--r--arch/x86/include/asm/page_64.h6
-rw-r--r--arch/x86/include/asm/page_64_types.h20
-rw-r--r--arch/x86/include/asm/paravirt.h38
-rw-r--r--arch/x86/include/asm/paravirt_types.h5
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/pgalloc.h5
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable.h19
-rw-r--r--arch/x86/include/asm/pgtable_32.h3
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h24
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h70
-rw-r--r--arch/x86/include/asm/pgtable_types.h12
-rw-r--r--arch/x86/include/asm/platform_sst_audio.h1
-rw-r--r--arch/x86/include/asm/processor.h11
-rw-r--r--arch/x86/include/asm/refcount.h6
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/rmwcc.h16
-rw-r--r--arch/x86/include/asm/sections.h1
-rw-r--r--arch/x86/include/asm/smp.h10
-rw-r--r--arch/x86/include/asm/sparsemem.h9
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/svm.h3
-rw-r--r--arch/x86/include/asm/swiotlb.h8
-rw-r--r--arch/x86/include/asm/sys_ia32.h55
-rw-r--r--arch/x86/include/asm/syscalls.h3
-rw-r--r--arch/x86/include/asm/tsc.h1
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h2
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/asm/x86_init.h21
-rw-r--r--arch/x86/include/uapi/asm/kvm.h19
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h10
-rw-r--r--arch/x86/include/uapi/asm/mce.h51
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/acpi/boot.c22
-rw-r--r--arch/x86/kernel/amd_gart_64.c41
-rw-r--r--arch/x86/kernel/apic/apic.c111
-rw-r--r--arch/x86/kernel/apic/io_apic.c18
-rw-r--r--arch/x86/kernel/apic/vector.c25
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c2
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c12
-rw-r--r--arch/x86/kernel/cpu/common.c33
-rw-r--r--arch/x86/kernel/cpu/intel.c97
-rw-r--r--arch/x86/kernel/cpu/intel_pconfig.c82
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h59
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c64
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c78
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c46
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c181
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c62
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c62
-rw-r--r--arch/x86/kernel/cpuid.c34
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/devicetree.c68
-rw-r--r--arch/x86/kernel/dumpstack.c49
-rw-r--r--arch/x86/kernel/dumpstack_32.c42
-rw-r--r--arch/x86/kernel/dumpstack_64.c42
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kernel/head64.c81
-rw-r--r--arch/x86/kernel/head_64.S24
-rw-r--r--arch/x86/kernel/idt.c5
-rw-r--r--arch/x86/kernel/ioport.c7
-rw-r--r--arch/x86/kernel/irq.c7
-rw-r--r--arch/x86/kernel/irqinit.c9
-rw-r--r--arch/x86/kernel/jailhouse.c8
-rw-r--r--arch/x86/kernel/kprobes/core.c10
-rw-r--r--arch/x86/kernel/kvm.c38
-rw-r--r--arch/x86/kernel/machine_kexec_32.c8
-rw-r--r--arch/x86/kernel/machine_kexec_64.c10
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/pci-calgary_64.c5
-rw-r--r--arch/x86/kernel/pci-dma.c71
-rw-r--r--arch/x86/kernel/pci-nommu.c19
-rw-r--r--arch/x86/kernel/pci-swiotlb.c48
-rw-r--r--arch/x86/kernel/process_64.c14
-rw-r--r--arch/x86/kernel/reboot.c3
-rw-r--r--arch/x86/kernel/rtc.c6
-rw-r--r--arch/x86/kernel/setup.c22
-rw-r--r--arch/x86/kernel/setup_percpu.c17
-rw-r--r--arch/x86/kernel/signal.c5
-rw-r--r--arch/x86/kernel/signal_compat.c69
-rw-r--r--arch/x86/kernel/smpboot.c1
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/traps.c15
-rw-r--r--arch/x86/kernel/tsc.c39
-rw-r--r--arch/x86/kernel/unwind_orc.c3
-rw-r--r--arch/x86/kernel/vm86_32.c3
-rw-r--r--arch/x86/kernel/vmlinux.lds.S9
-rw-r--r--arch/x86/kernel/x86_init.c22
-rw-r--r--arch/x86/kvm/cpuid.c10
-rw-r--r--arch/x86/kvm/emulate.c27
-rw-r--r--arch/x86/kvm/hyperv.c192
-rw-r--r--arch/x86/kvm/hyperv.h4
-rw-r--r--arch/x86/kvm/irq.c26
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h7
-rw-r--r--arch/x86/kvm/lapic.c21
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/paging_tmpl.h11
-rw-r--r--arch/x86/kvm/pmu.c37
-rw-r--r--arch/x86/kvm/pmu.h6
-rw-r--r--arch/x86/kvm/pmu_amd.c142
-rw-r--r--arch/x86/kvm/svm.c401
-rw-r--r--arch/x86/kvm/vmx.c1129
-rw-r--r--arch/x86/kvm/vmx_evmcs.h324
-rw-r--r--arch/x86/kvm/x86.c481
-rw-r--r--arch/x86/kvm/x86.h86
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/clear_page_64.S2
-rw-r--r--arch/x86/lib/msr-smp.c43
-rw-r--r--arch/x86/lib/retpoline.S56
-rw-r--r--arch/x86/mm/Makefile15
-rw-r--r--arch/x86/mm/cpu_entry_area.c6
-rw-r--r--arch/x86/mm/debug_pagetables.c32
-rw-r--r--arch/x86/mm/dump_pagetables.c125
-rw-r--r--arch/x86/mm/fault.c71
-rw-r--r--arch/x86/mm/ident_map.c2
-rw-r--r--arch/x86/mm/init_32.c16
-rw-r--r--arch/x86/mm/init_64.c126
-rw-r--r--arch/x86/mm/kasan_init_64.c20
-rw-r--r--arch/x86/mm/kaslr.c29
-rw-r--r--arch/x86/mm/mem_encrypt.c668
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S2
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c564
-rw-r--r--arch/x86/mm/mmap.c18
-rw-r--r--arch/x86/mm/numa_32.c11
-rw-r--r--arch/x86/mm/pgtable.c48
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/mm/tlb.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c246
-rw-r--r--arch/x86/oprofile/nmi_int.c2
-rw-r--r--arch/x86/pci/acpi.c8
-rw-r--r--arch/x86/pci/direct.c5
-rw-r--r--arch/x86/pci/legacy.c4
-rw-r--r--arch/x86/pci/mmconfig-shared.c13
-rw-r--r--arch/x86/pci/sta2x11-fixup.c52
-rw-r--r--arch/x86/platform/atom/punit_atom_debug.c17
-rw-r--r--arch/x86/platform/efi/efi_64.c68
-rw-r--r--arch/x86/platform/efi/efi_thunk_64.S2
-rw-r--r--arch/x86/platform/efi/quirks.c10
-rw-r--r--arch/x86/platform/intel-mid/intel-mid.c8
-rw-r--r--arch/x86/platform/intel-quark/imr.c24
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/power/hibernate_64.c6
-rw-r--r--arch/x86/purgatory/Makefile2
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S2
-rw-r--r--arch/x86/tools/relocs.c3
-rw-r--r--arch/x86/um/asm/barrier.h4
-rw-r--r--arch/x86/xen/Kconfig5
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/enlighten_pv.c6
-rw-r--r--arch/x86/xen/enlighten_pvh.c14
-rw-r--r--arch/x86/xen/mmu_pv.c59
-rw-r--r--arch/x86/xen/suspend.c16
256 files changed, 9092 insertions, 8608 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c1236b1..d234cca 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,7 +54,6 @@ config X86
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV if X86_64
- select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_REFCOUNT
@@ -83,6 +82,7 @@ config X86
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
select DCACHE_WORD_ACCESS
+ select DMA_DIRECT_OPS
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS
@@ -393,17 +393,6 @@ config X86_FEATURE_NAMES
If in doubt, say Y.
-config X86_FAST_FEATURE_TESTS
- bool "Fast CPU feature tests" if EMBEDDED
- default y
- ---help---
- Some fast-paths in the kernel depend on the capabilities of the CPU.
- Say Y here for the kernel to patch in the appropriate code at runtime
- based on the capabilities of the CPU. The infrastructure for patching
- code at runtime takes up some additional space; space-constrained
- embedded systems may wish to say N here to produce smaller, slightly
- slower code.
-
config X86_X2APIC
bool "Support x2apic"
depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST)
@@ -430,6 +419,7 @@ config GOLDFISH
config RETPOLINE
bool "Avoid speculative indirect branches in kernel"
default y
+ select STACK_VALIDATION if HAVE_STACK_VALIDATION
help
Compile kernel with the retpoline compiler options to guard against
kernel-to-user data leaks by avoiding speculative indirect
@@ -690,6 +680,7 @@ config X86_SUPPORTS_MEMORY_FAILURE
config STA2X11
bool "STA2X11 Companion Chip Support"
depends on X86_32_NON_STANDARD && PCI
+ select ARCH_HAS_PHYS_TO_DMA
select X86_DEV_DMA_OPS
select X86_DMA_REMAP
select SWIOTLB
@@ -1305,7 +1296,7 @@ config MICROCODE
the Linux kernel.
The preferred method to load microcode from a detached initrd is described
- in Documentation/x86/early-microcode.txt. For that you need to enable
+ in Documentation/x86/microcode.txt. For that you need to enable
CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
initrd for microcode blobs.
@@ -1471,6 +1462,8 @@ config X86_PAE
config X86_5LEVEL
bool "Enable 5-level page tables support"
+ select DYNAMIC_MEMORY_LAYOUT
+ select SPARSEMEM_VMEMMAP
depends on X86_64
---help---
5-level paging enables access to larger address space:
@@ -1479,8 +1472,8 @@ config X86_5LEVEL
It will be supported by future Intel CPUs.
- Note: a kernel with this option enabled can only be booted
- on machines that support the feature.
+ A kernel with the option enabled can be booted on machines that
+ support 4- or 5-level paging.
See Documentation/x86/x86_64/5level-paging.txt for more
information.
@@ -1605,10 +1598,6 @@ config ARCH_HAVE_MEMORY_PRESENT
def_bool y
depends on X86_32 && DISCONTIGMEM
-config NEED_NODE_MEMMAP_SIZE
- def_bool y
- depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
-
config ARCH_FLATMEM_ENABLE
def_bool y
depends on X86_32 && !NUMA
@@ -2184,10 +2173,17 @@ config PHYSICAL_ALIGN
Don't change this unless you know what you are doing.
+config DYNAMIC_MEMORY_LAYOUT
+ bool
+ ---help---
+ This option makes base addresses of vmalloc and vmemmap as well as
+ __PAGE_OFFSET movable during boot.
+
config RANDOMIZE_MEMORY
bool "Randomize the kernel memory sections"
depends on X86_64
depends on RANDOMIZE_BASE
+ select DYNAMIC_MEMORY_LAYOUT
default RANDOMIZE_BASE
---help---
Randomizes the base virtual address of kernel memory sections
@@ -2306,7 +2302,7 @@ choice
it can be used to assist security vulnerability exploitation.
This setting can be changed at boot time via the kernel command
- line parameter vsyscall=[native|emulate|none].
+ line parameter vsyscall=[emulate|none].
On a system with recent enough glibc (2.14 or newer) and no
static binaries, you can say None without a performance penalty
@@ -2314,15 +2310,6 @@ choice
If unsure, select "Emulate".
- config LEGACY_VSYSCALL_NATIVE
- bool "Native"
- help
- Actual executable code is located in the fixed vsyscall
- address mapping, implementing time() efficiently. Since
- this makes the mapping executable, it can be used during
- security vulnerability exploitation (traditionally as
- ROP gadgets). This configuration is not recommended.
-
config LEGACY_VSYSCALL_EMULATE
bool "Emulate"
help
@@ -2640,8 +2627,10 @@ config PCI_DIRECT
depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG))
config PCI_MMCONFIG
- def_bool y
- depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY)
+ bool "Support mmconfig PCI config space access" if X86_64
+ default y
+ depends on PCI && (ACPI || SFI || JAILHOUSE_GUEST)
+ depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG)
config PCI_OLPC
def_bool y
@@ -2656,9 +2645,9 @@ config PCI_DOMAINS
def_bool y
depends on PCI
-config PCI_MMCONFIG
- bool "Support mmconfig PCI config space access"
- depends on X86_64 && PCI && ACPI
+config MMCONF_FAM10H
+ def_bool y
+ depends on X86_64 && PCI_MMCONFIG && ACPI
config PCI_CNB20LE_QUIRK
bool "Read CNB20LE Host Bridge Windows" if EXPERT
@@ -2676,11 +2665,13 @@ config PCI_CNB20LE_QUIRK
source "drivers/pci/Kconfig"
config ISA_BUS
- bool "ISA-style bus support on modern systems" if EXPERT
- select ISA_BUS_API
+ bool "ISA bus support on modern systems" if EXPERT
help
- Enables ISA-style drivers on modern systems. This is necessary to
- support PC/104 devices on X86_64 platforms.
+ Expose ISA bus device drivers and options available for selection and
+ configuration. Enable this option if your target machine has an ISA
+ bus. ISA is an older system, displaced by PCI and newer bus
+ architectures -- if your target machine is modern, it probably does
+ not have an ISA bus.
If unsure, say N.
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8b8d229..638411f 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT
default "4" if MELAN || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-config X86_PPRO_FENCE
- bool "PentiumPro memory ordering errata workaround"
- depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
- ---help---
- Old PentiumPro multiprocessor systems had errata that could cause
- memory operations to violate the x86 ordering standard in rare cases.
- Enabling this option will attempt to work around some (but not all)
- occurrences of this problem, at the cost of much heavier spinlock and
- memory barrier operations.
-
- If unsure, say n here. Even distro kernels should think twice before
- enabling this: there are few systems, and an unlikely bug.
-
config X86_F00F_BUG
def_bool y
depends on M586MMX || M586TSC || M586 || M486
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad5516..60135cb 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -31,8 +31,7 @@ endif
CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h
M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
-REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \
- -DDISABLE_BRANCH_PROFILING \
+REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \
-Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
-fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-mno-mmx -mno-sse
@@ -181,6 +180,10 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
endif
endif
+ifndef CC_HAVE_ASM_GOTO
+ $(error Compiler lacks asm-goto support.)
+endif
+
#
# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way
@@ -223,6 +226,15 @@ KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
LDFLAGS := -m elf_$(UTS_MACHINE)
+#
+# The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to
+# the linker to force 2MB page size regardless of the default page size used
+# by the linker.
+#
+ifdef CONFIG_X86_64
+LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
+endif
+
# Speed up the build
KBUILD_CFLAGS += -pipe
# Workaround for a gcc prelease that unfortunately was shipped in a suse release
@@ -232,10 +244,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
- RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
- ifneq ($(RETPOLINE_CFLAGS),)
- KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
- endif
+ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+endif
endif
archscripts: scripts_basic
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index f25e153..fa42f89 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -26,7 +26,7 @@ KCOV_INSTRUMENT := n
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
-KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ -O2
+KBUILD_CFLAGS := -m$(BITS) -O2
KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
@@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
- vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
+ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
endif
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 353e20c..47d3eff 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -421,9 +421,10 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
}
}
+static const efi_char16_t apple[] = L"Apple";
+
static void setup_quirks(struct boot_params *boot_params)
{
- efi_char16_t const apple[] = { 'A', 'p', 'p', 'l', 'e', 0 };
efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
efi_table_attr(efi_system_table, fw_vendor, sys_table);
@@ -439,7 +440,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
struct efi_uga_draw_protocol *uga = NULL, *first_uga;
efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
unsigned long nr_ugas;
- u32 *handles = (u32 *)uga_handle;;
+ u32 *handles = (u32 *)uga_handle;
efi_status_t status = EFI_INVALID_PARAMETER;
int i;
@@ -484,7 +485,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height)
struct efi_uga_draw_protocol *uga = NULL, *first_uga;
efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
unsigned long nr_ugas;
- u64 *handles = (u64 *)uga_handle;;
+ u64 *handles = (u64 *)uga_handle;
efi_status_t status = EFI_INVALID_PARAMETER;
int i;
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index fc313e2..fca012b 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
#include <asm/bootparam.h>
+#include "pgtable.h"
/*
* Locally defined symbols should be marked hidden:
@@ -304,55 +305,77 @@ ENTRY(startup_64)
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp
-#ifdef CONFIG_X86_5LEVEL
/*
- * Check if we need to enable 5-level paging.
- * RSI holds real mode data and need to be preserved across
- * a function call.
+ * At this point we are in long mode with 4-level paging enabled,
+ * but we might want to enable 5-level paging or vice versa.
+ *
+ * The problem is that we cannot do it directly. Setting or clearing
+ * CR4.LA57 in long mode would trigger #GP. So we need to switch off
+ * long mode and paging first.
+ *
+ * We also need a trampoline in lower memory to switch over from
+ * 4- to 5-level paging for cases when the bootloader puts the kernel
+ * above 4G, but didn't enable 5-level paging for us.
+ *
+ * The same trampoline can be used to switch from 5- to 4-level paging
+ * mode, like when starting 4-level paging kernel via kexec() when
+ * original kernel worked in 5-level paging mode.
+ *
+ * For the trampoline, we need the top page table to reside in lower
+ * memory as we don't have a way to load 64-bit values into CR3 in
+ * 32-bit mode.
+ *
+ * We go though the trampoline even if we don't have to: if we're
+ * already in a desired paging mode. This way the trampoline code gets
+ * tested on every boot.
*/
- pushq %rsi
- call l5_paging_required
- popq %rsi
- /* If l5_paging_required() returned zero, we're done here. */
- cmpq $0, %rax
- je lvl5
+ /* Make sure we have GDT with 32-bit code segment */
+ leaq gdt(%rip), %rax
+ movq %rax, gdt64+2(%rip)
+ lgdt gdt64(%rip)
/*
- * At this point we are in long mode with 4-level paging enabled,
- * but we want to enable 5-level paging.
+ * paging_prepare() sets up the trampoline and checks if we need to
+ * enable 5-level paging.
*
- * The problem is that we cannot do it directly. Setting LA57 in
- * long mode would trigger #GP. So we need to switch off long mode
- * first.
+ * Address of the trampoline is returned in RAX.
+ * Non zero RDX on return means we need to enable 5-level paging.
*
- * NOTE: This is not going to work if bootloader put us above 4G
- * limit.
- *
- * The first step is go into compatibility mode.
+ * RSI holds real mode data and needs to be preserved across
+ * this function call.
*/
+ pushq %rsi
+ call paging_prepare
+ popq %rsi
- /* Clear additional page table */
- leaq lvl5_pgtable(%rbx), %rdi
- xorq %rax, %rax
- movq $(PAGE_SIZE/8), %rcx
- rep stosq
+ /* Save the trampoline address in RCX */
+ movq %rax, %rcx
/*
- * Setup current CR3 as the first and only entry in a new top level
- * page table.
+ * Load the address of trampoline_return() into RDI.
+ * It will be used by the trampoline to return to the main code.
*/
- movq %cr3, %rdi
- leaq 0x7 (%rdi), %rax
- movq %rax, lvl5_pgtable(%rbx)
+ leaq trampoline_return(%rip), %rdi
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
pushq $__KERNEL32_CS
- leaq compatible_mode(%rip), %rax
+ leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
pushq %rax
lretq
-lvl5:
-#endif
+trampoline_return:
+ /* Restore the stack, the 32-bit trampoline uses its own stack */
+ leaq boot_stack_end(%rbx), %rsp
+
+ /*
+ * cleanup_trampoline() would restore trampoline memory.
+ *
+ * RSI holds real mode data and needs to be preserved across
+ * this function call.
+ */
+ pushq %rsi
+ call cleanup_trampoline
+ popq %rsi
/* Zero EFLAGS */
pushq $0
@@ -490,46 +513,82 @@ relocated:
jmp *%rax
.code32
-#ifdef CONFIG_X86_5LEVEL
-compatible_mode:
- /* Setup data and stack segments */
+/*
+ * This is the 32-bit trampoline that will be copied over to low memory.
+ *
+ * RDI contains the return address (might be above 4G).
+ * ECX contains the base address of the trampoline memory.
+ * Non zero RDX on return means we need to enable 5-level paging.
+ */
+ENTRY(trampoline_32bit_src)
+ /* Set up data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %ss
+ /* Set up new stack */
+ leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
+
/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
- /* Point CR3 to 5-level paging */
- leal lvl5_pgtable(%ebx), %eax
- movl %eax, %cr3
+ /* Check what paging mode we want to be in after the trampoline */
+ cmpl $0, %edx
+ jz 1f
- /* Enable PAE and LA57 mode */
+ /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
+ movl %cr4, %eax
+ testl $X86_CR4_LA57, %eax
+ jnz 3f
+ jmp 2f
+1:
+ /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
movl %cr4, %eax
- orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
+ testl $X86_CR4_LA57, %eax
+ jz 3f
+2:
+ /* Point CR3 to the trampoline's new top level page table */
+ leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
+ movl %eax, %cr3
+3:
+ /* Enable PAE and LA57 (if required) paging modes */
+ movl $X86_CR4_PAE, %eax
+ cmpl $0, %edx
+ jz 1f
+ orl $X86_CR4_LA57, %eax
+1:
movl %eax, %cr4
- /* Calculate address we are running at */
- call 1f
-1: popl %edi
- subl $1b, %edi
+ /* Calculate address of paging_enabled() once we are executing in the trampoline */
+ leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
- /* Prepare stack for far return to Long Mode */
+ /* Prepare the stack for far return to Long Mode */
pushl $__KERNEL_CS
- leal lvl5(%edi), %eax
- push %eax
+ pushl %eax
- /* Enable paging back */
+ /* Enable paging again */
movl $(X86_CR0_PG | X86_CR0_PE), %eax
movl %eax, %cr0
lret
-#endif
+ .code64
+paging_enabled:
+ /* Return from the trampoline */
+ jmp *%rdi
+
+ /*
+ * The trampoline code has a size limit.
+ * Make sure we fail to compile if the trampoline code grows
+ * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
+ */
+ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
+
+ .code32
no_longmode:
- /* This isn't an x86-64 CPU so hang */
+ /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
hlt
jmp 1b
@@ -537,6 +596,11 @@ no_longmode:
#include "../../kernel/verify_cpu.S"
.data
+gdt64:
+ .word gdt_end - gdt
+ .long 0
+ .word 0
+ .quad 0
gdt:
.word gdt_end - gdt
.long gdt
@@ -585,7 +649,3 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
-#ifdef CONFIG_X86_5LEVEL
-lvl5_pgtable:
- .fill PAGE_SIZE, 1, 0
-#endif
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 8199a61..66e42a0 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -46,6 +46,12 @@
#define STATIC
#include <linux/decompress/mm.h>
+#ifdef CONFIG_X86_5LEVEL
+unsigned int pgtable_l5_enabled __ro_after_init;
+unsigned int pgdir_shift __ro_after_init = 39;
+unsigned int ptrs_per_p4d __ro_after_init = 1;
+#endif
+
extern unsigned long get_cmd_line_ptr(void);
/* Simplified build-specific string for starting entropy. */
@@ -723,6 +729,14 @@ void choose_random_location(unsigned long input,
return;
}
+#ifdef CONFIG_X86_5LEVEL
+ if (__read_cr4() & X86_CR4_LA57) {
+ pgtable_l5_enabled = 1;
+ pgdir_shift = 48;
+ ptrs_per_p4d = 512;
+ }
+#endif
+
boot_params->hdr.loadflags |= KASLR_FLAG;
/* Prepare to add new identity pagetables on demand. */
diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/kaslr_64.c
index b5e5e02..522d114 100644
--- a/arch/x86/boot/compressed/pagetable.c
+++ b/arch/x86/boot/compressed/kaslr_64.c
@@ -16,13 +16,6 @@
#define __pa(x) ((unsigned long)(x))
#define __va(x) ((void *)((unsigned long)(x)))
-/*
- * The pgtable.h and mm/ident_map.c includes make use of the SME related
- * information which is not used in the compressed image support. Un-define
- * the SME support to avoid any compile and link errors.
- */
-#undef CONFIG_AMD_MEM_ENCRYPT
-
/* No PAGE_TABLE_ISOLATION support needed either: */
#undef CONFIG_PAGE_TABLE_ISOLATION
@@ -85,13 +78,14 @@ static struct x86_mapping_info mapping_info;
/* Locates and clears a region for a new top level page table. */
void initialize_identity_maps(void)
{
- unsigned long sev_me_mask = get_sev_encryption_mask();
+ /* If running as an SEV guest, the encryption mask is required. */
+ set_sev_encryption_mask();
/* Init mapping_info with run-time function/buffer pointers. */
mapping_info.alloc_pgt_page = alloc_pgt_page;
mapping_info.context = &pgt_data;
- mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sev_me_mask;
- mapping_info.kernpg_flag = _KERNPG_TABLE | sev_me_mask;
+ mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask;
+ mapping_info.kernpg_flag = _KERNPG_TABLE;
/*
* It should be impossible for this not to already be true,
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index 54f5f66..eaa843a 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -88,9 +88,7 @@ ENTRY(get_sev_encryption_bit)
ENDPROC(get_sev_encryption_bit)
.code64
-ENTRY(get_sev_encryption_mask)
- xor %rax, %rax
-
+ENTRY(set_sev_encryption_mask)
#ifdef CONFIG_AMD_MEM_ENCRYPT
push %rbp
push %rdx
@@ -101,9 +99,7 @@ ENTRY(get_sev_encryption_mask)
testl %eax, %eax
jz .Lno_sev_mask
- xor %rdx, %rdx
- bts %rax, %rdx /* Create the encryption mask */
- mov %rdx, %rax /* ... and return it */
+ bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
.Lno_sev_mask:
movq %rbp, %rsp /* Restore original stack pointer */
@@ -112,9 +108,16 @@ ENTRY(get_sev_encryption_mask)
pop %rbp
#endif
+ xor %rax, %rax
ret
-ENDPROC(get_sev_encryption_mask)
+ENDPROC(set_sev_encryption_mask)
.data
enc_bit:
.int 0xffffffff
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ .balign 8
+GLOBAL(sme_me_mask)
+ .quad 0
+#endif
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 98761a1..8dd1d5c 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -14,6 +14,7 @@
#include "misc.h"
#include "error.h"
+#include "pgtable.h"
#include "../string.h"
#include "../voffset.h"
@@ -169,16 +170,6 @@ void __puthex(unsigned long value)
}
}
-static bool l5_supported(void)
-{
- /* Check if leaf 7 is supported. */
- if (native_cpuid_eax(0) < 7)
- return 0;
-
- /* Check if la57 is supported. */
- return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
-}
-
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
@@ -309,6 +300,10 @@ static void parse_elf(void *output)
switch (phdr->p_type) {
case PT_LOAD:
+#ifdef CONFIG_X86_64
+ if ((phdr->p_align % 0x200000) != 0)
+ error("Alignment of LOAD segment isn't multiple of 2MB");
+#endif
#ifdef CONFIG_RELOCATABLE
dest = output;
dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
@@ -372,12 +367,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
console_init();
debug_putstr("early console in extract_kernel\n");
- if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
- error("This linux kernel as configured requires 5-level paging\n"
- "This CPU does not support the required 'cr4.la57' feature\n"
- "Unable to boot - please use a kernel appropriate for your CPU\n");
- }
-
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
@@ -388,6 +377,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
debug_putaddr(output_len);
debug_putaddr(kernel_total_size);
+#ifdef CONFIG_X86_64
+ /* Report address of 32-bit trampoline */
+ debug_putaddr(trampoline_32bit);
+#endif
+
/*
* The memory hole needed for the kernel is the larger of either
* the entire decompressed kernel plus relocation table, or the
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 9d323dc..9e11be4 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -12,6 +12,11 @@
#undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_KASAN
+#ifdef CONFIG_X86_5LEVEL
+/* cpu_feature_enabled() cannot be used that early */
+#define pgtable_l5_enabled __pgtable_l5_enabled
+#endif
+
#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <linux/elf.h>
@@ -109,6 +114,6 @@ static inline void console_init(void)
{ }
#endif
-unsigned long get_sev_encryption_mask(void);
+void set_sev_encryption_mask(void);
#endif
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
new file mode 100644
index 0000000..91f7563
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -0,0 +1,20 @@
+#ifndef BOOT_COMPRESSED_PAGETABLE_H
+#define BOOT_COMPRESSED_PAGETABLE_H
+
+#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
+
+#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
+
+#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE 0x60
+
+#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
+
+#ifndef __ASSEMBLER__
+
+extern unsigned long *trampoline_32bit;
+
+extern void trampoline_32bit_src(void *return_ptr);
+
+#endif /* __ASSEMBLER__ */
+#endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index b4469a3..32af1cb 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,4 +1,6 @@
#include <asm/processor.h>
+#include "pgtable.h"
+#include "../string.h"
/*
* __force_order is used by special_insns.h asm code to force instruction
@@ -9,20 +11,144 @@
*/
unsigned long __force_order;
-int l5_paging_required(void)
+#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
+#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
+
+struct paging_config {
+ unsigned long trampoline_start;
+ unsigned long l5_required;
+};
+
+/* Buffer to preserve trampoline memory */
+static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
+
+/*
+ * The page table is going to be used instead of page table in the trampoline
+ * memory.
+ *
+ * It must not be in BSS as BSS is cleared after cleanup_trampoline().
+ */
+static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data);
+
+/*
+ * Trampoline address will be printed by extract_kernel() for debugging
+ * purposes.
+ *
+ * Avoid putting the pointer into .bss as it will be cleared between
+ * paging_prepare() and extract_kernel().
+ */
+unsigned long *trampoline_32bit __section(.data);
+
+struct paging_config paging_prepare(void)
{
- /* Check if leaf 7 is supported. */
+ struct paging_config paging_config = {};
+ unsigned long bios_start, ebda_start;
+
+ /*
+ * Check if LA57 is desired and supported.
+ *
+ * There are two parts to the check:
+ * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
+ * - if the machine supports 5-level paging:
+ * + CPUID leaf 7 is supported
+ * + the leaf has the feature bit set
+ *
+ * That's substitute for boot_cpu_has() in early boot code.
+ */
+ if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
+ native_cpuid_eax(0) >= 7 &&
+ (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
+ paging_config.l5_required = 1;
+ }
+
+ /*
+ * Find a suitable spot for the trampoline.
+ * This code is based on reserve_bios_regions().
+ */
+
+ ebda_start = *(unsigned short *)0x40e << 4;
+ bios_start = *(unsigned short *)0x413 << 10;
- if (native_cpuid_eax(0) < 7)
- return 0;
+ if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
+ bios_start = BIOS_START_MAX;
+
+ if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
+ bios_start = ebda_start;
+
+ /* Place the trampoline just below the end of low memory, aligned to 4k */
+ paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE;
+ paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE);
+
+ trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
+
+ /* Preserve trampoline memory */
+ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
+
+ /* Clear trampoline memory first */
+ memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
+
+ /* Copy trampoline code in place */
+ memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+ &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
+
+ /*
+ * The code below prepares page table in trampoline memory.
+ *
+ * The new page table will be used by trampoline code for switching
+ * from 4- to 5-level paging or vice versa.
+ *
+ * If switching is not required, the page table is unused: trampoline
+ * code wouldn't touch CR3.
+ */
+
+ /*
+ * We are not going to use the page table in trampoline memory if we
+ * are already in the desired paging mode.
+ */
+ if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+ goto out;
+
+ if (paging_config.l5_required) {
+ /*
+ * For 4- to 5-level paging transition, set up current CR3 as
+ * the first and the only entry in a new top-level page table.
+ */
+ trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
+ } else {
+ unsigned long src;
+
+ /*
+ * For 5- to 4-level paging transition, copy page table pointed
+ * by first entry in the current top-level page table as our
+ * new top-level page table.
+ *
+ * We cannot just point to the page table from trampoline as it
+ * may be above 4G.
+ */
+ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
+ memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
+ (void *)src, PAGE_SIZE);
+ }
+
+out:
+ return paging_config;
+}
+
+void cleanup_trampoline(void)
+{
+ void *trampoline_pgtable;
- /* Check if la57 is supported. */
- if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
- return 0;
+ trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
- /* Check if 5-level paging has already been enabled. */
- if (native_read_cr4() & X86_CR4_LA57)
- return 0;
+ /*
+ * Move the top level page table out of trampoline memory,
+ * if it's there.
+ */
+ if ((void *)__native_read_cr3() == trampoline_pgtable) {
+ memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE);
+ native_write_cr3((unsigned long)top_pgtable);
+ }
- return 1;
+ /* Restore trampoline memory */
+ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
}
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 12e8484..e762ef4 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -94,23 +94,30 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
#define STACK_OFFSET 8*3
-#define HashKey 16*0 // store HashKey <<1 mod poly here
-#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here
-#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here
-#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here
-#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64
+
+#define AadHash 16*0
+#define AadLen 16*1
+#define InLen (16*1)+8
+#define PBlockEncKey 16*2
+#define OrigIV 16*3
+#define CurCount 16*4
+#define PBlockLen 16*5
+#define HashKey 16*6 // store HashKey <<1 mod poly here
+#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here
+#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here
+#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here
+#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64
// bits of HashKey <<1 mod poly here
//(for Karatsuba purposes)
-#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64
+#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64
// bits of HashKey^2 <<1 mod poly here
// (for Karatsuba purposes)
-#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64
+#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64
// bits of HashKey^3 <<1 mod poly here
// (for Karatsuba purposes)
-#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64
+#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64
// bits of HashKey^4 <<1 mod poly here
// (for Karatsuba purposes)
-#define VARIABLE_OFFSET 16*8
#define arg1 rdi
#define arg2 rsi
@@ -118,10 +125,11 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
#define arg4 rcx
#define arg5 r8
#define arg6 r9
-#define arg7 STACK_OFFSET+8(%r14)
-#define arg8 STACK_OFFSET+16(%r14)
-#define arg9 STACK_OFFSET+24(%r14)
-#define arg10 STACK_OFFSET+32(%r14)
+#define arg7 STACK_OFFSET+8(%rsp)
+#define arg8 STACK_OFFSET+16(%rsp)
+#define arg9 STACK_OFFSET+24(%rsp)
+#define arg10 STACK_OFFSET+32(%rsp)
+#define arg11 STACK_OFFSET+40(%rsp)
#define keysize 2*15*16(%arg1)
#endif
@@ -171,6 +179,332 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
#define TKEYP T1
#endif
+.macro FUNC_SAVE
+ push %r12
+ push %r13
+ push %r14
+#
+# states of %xmm registers %xmm6:%xmm15 not saved
+# all %xmm registers are clobbered
+#
+.endm
+
+
+.macro FUNC_RESTORE
+ pop %r14
+ pop %r13
+ pop %r12
+.endm
+
+# Precompute hashkeys.
+# Input: Hash subkey.
+# Output: HashKeys stored in gcm_context_data. Only needs to be called
+# once per key.
+# clobbers r12, and tmp xmm registers.
+.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
+ mov \SUBKEY, %r12
+ movdqu (%r12), \TMP3
+ movdqa SHUF_MASK(%rip), \TMP2
+ PSHUFB_XMM \TMP2, \TMP3
+
+ # precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
+
+ movdqa \TMP3, \TMP2
+ psllq $1, \TMP3
+ psrlq $63, \TMP2
+ movdqa \TMP2, \TMP1
+ pslldq $8, \TMP2
+ psrldq $8, \TMP1
+ por \TMP2, \TMP3
+
+ # reduce HashKey<<1
+
+ pshufd $0x24, \TMP1, \TMP2
+ pcmpeqd TWOONE(%rip), \TMP2
+ pand POLY(%rip), \TMP2
+ pxor \TMP2, \TMP3
+ movdqa \TMP3, HashKey(%arg2)
+
+ movdqa \TMP3, \TMP5
+ pshufd $78, \TMP3, \TMP1
+ pxor \TMP3, \TMP1
+ movdqa \TMP1, HashKey_k(%arg2)
+
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^2<<1 (mod poly)
+ movdqa \TMP5, HashKey_2(%arg2)
+# HashKey_2 = HashKey^2<<1 (mod poly)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_2_k(%arg2)
+
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+ movdqa \TMP5, HashKey_3(%arg2)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_3_k(%arg2)
+
+ GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
+# TMP5 = HashKey^3<<1 (mod poly)
+ movdqa \TMP5, HashKey_4(%arg2)
+ pshufd $78, \TMP5, \TMP1
+ pxor \TMP5, \TMP1
+ movdqa \TMP1, HashKey_4_k(%arg2)
+.endm
+
+# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
+# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
+.macro GCM_INIT Iv SUBKEY AAD AADLEN
+ mov \AADLEN, %r11
+ mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
+ xor %r11, %r11
+ mov %r11, InLen(%arg2) # ctx_data.in_length = 0
+ mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
+ mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
+ mov \Iv, %rax
+ movdqu (%rax), %xmm0
+ movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
+
+ movdqa SHUF_MASK(%rip), %xmm2
+ PSHUFB_XMM %xmm2, %xmm0
+ movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
+
+ PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ movdqa HashKey(%arg2), %xmm13
+
+ CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
+ %xmm4, %xmm5, %xmm6
+.endm
+
+# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
+# struct has been initialized by GCM_INIT.
+# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
+# Clobbers rax, r10-r13, and xmm0-xmm15
+.macro GCM_ENC_DEC operation
+ movdqu AadHash(%arg2), %xmm8
+ movdqu HashKey(%arg2), %xmm13
+ add %arg5, InLen(%arg2)
+
+ xor %r11, %r11 # initialise the data pointer offset as zero
+ PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
+
+ sub %r11, %arg5 # sub partial block data used
+ mov %arg5, %r13 # save the number of bytes
+
+ and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
+ mov %r13, %r12
+ # Encrypt/Decrypt first few blocks
+
+ and $(3<<4), %r12
+ jz _initial_num_blocks_is_0_\@
+ cmp $(2<<4), %r12
+ jb _initial_num_blocks_is_1_\@
+ je _initial_num_blocks_is_2_\@
+_initial_num_blocks_is_3_\@:
+ INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
+ sub $48, %r13
+ jmp _initial_blocks_\@
+_initial_num_blocks_is_2_\@:
+ INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
+ sub $32, %r13
+ jmp _initial_blocks_\@
+_initial_num_blocks_is_1_\@:
+ INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
+ sub $16, %r13
+ jmp _initial_blocks_\@
+_initial_num_blocks_is_0_\@:
+ INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
+_initial_blocks_\@:
+
+ # Main loop - Encrypt/Decrypt remaining blocks
+
+ cmp $0, %r13
+ je _zero_cipher_left_\@
+ sub $64, %r13
+ je _four_cipher_left_\@
+_crypt_by_4_\@:
+ GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \
+ %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
+ %xmm7, %xmm8, enc
+ add $64, %r11
+ sub $64, %r13
+ jne _crypt_by_4_\@
+_four_cipher_left_\@:
+ GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
+%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
+_zero_cipher_left_\@:
+ movdqu %xmm8, AadHash(%arg2)
+ movdqu %xmm0, CurCount(%arg2)
+
+ mov %arg5, %r13
+ and $15, %r13 # %r13 = arg5 (mod 16)
+ je _multiple_of_16_bytes_\@
+
+ mov %r13, PBlockLen(%arg2)
+
+ # Handle the last <16 Byte block separately
+ paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
+ movdqu %xmm0, CurCount(%arg2)
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10, %xmm0
+
+ ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
+ movdqu %xmm0, PBlockEncKey(%arg2)
+
+ cmp $16, %arg5
+ jge _large_enough_update_\@
+
+ lea (%arg4,%r11,1), %r10
+ mov %r13, %r12
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+ jmp _data_read_\@
+
+_large_enough_update_\@:
+ sub $16, %r11
+ add %r13, %r11
+
+ # receive the last <16 Byte block
+ movdqu (%arg4, %r11, 1), %xmm1
+
+ sub %r13, %r11
+ add $16, %r11
+
+ lea SHIFT_MASK+16(%rip), %r12
+ # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
+ # (r13 is the number of bytes in plaintext mod 16)
+ sub %r13, %r12
+ # get the appropriate shuffle mask
+ movdqu (%r12), %xmm2
+ # shift right 16-r13 bytes
+ PSHUFB_XMM %xmm2, %xmm1
+
+_data_read_\@:
+ lea ALL_F+16(%rip), %r12
+ sub %r13, %r12
+
+.ifc \operation, dec
+ movdqa %xmm1, %xmm2
+.endif
+ pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn)
+ movdqu (%r12), %xmm1
+ # get the appropriate mask to mask out top 16-r13 bytes of xmm0
+ pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0
+.ifc \operation, dec
+ pand %xmm1, %xmm2
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10 ,%xmm2
+
+ pxor %xmm2, %xmm8
+.else
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10,%xmm0
+
+ pxor %xmm0, %xmm8
+.endif
+
+ movdqu %xmm8, AadHash(%arg2)
+.ifc \operation, enc
+ # GHASH computation for the last <16 byte block
+ movdqa SHUF_MASK(%rip), %xmm10
+ # shuffle xmm0 back to output as ciphertext
+ PSHUFB_XMM %xmm10, %xmm0
+.endif
+
+ # Output %r13 bytes
+ MOVQ_R64_XMM %xmm0, %rax
+ cmp $8, %r13
+ jle _less_than_8_bytes_left_\@
+ mov %rax, (%arg3 , %r11, 1)
+ add $8, %r11
+ psrldq $8, %xmm0
+ MOVQ_R64_XMM %xmm0, %rax
+ sub $8, %r13
+_less_than_8_bytes_left_\@:
+ mov %al, (%arg3, %r11, 1)
+ add $1, %r11
+ shr $8, %rax
+ sub $1, %r13
+ jne _less_than_8_bytes_left_\@
+_multiple_of_16_bytes_\@:
+.endm
+
+# GCM_COMPLETE Finishes update of tag of last partial block
+# Output: Authorization Tag (AUTH_TAG)
+# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
+.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
+ movdqu AadHash(%arg2), %xmm8
+ movdqu HashKey(%arg2), %xmm13
+
+ mov PBlockLen(%arg2), %r12
+
+ cmp $0, %r12
+ je _partial_done\@
+
+ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+
+_partial_done\@:
+ mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes)
+ shl $3, %r12 # convert into number of bits
+ movd %r12d, %xmm15 # len(A) in %xmm15
+ mov InLen(%arg2), %r12
+ shl $3, %r12 # len(C) in bits (*128)
+ MOVQ_R64_XMM %r12, %xmm1
+
+ pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
+ pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
+ pxor %xmm15, %xmm8
+ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+ # final GHASH computation
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10, %xmm8
+
+ movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0
+ ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
+ pxor %xmm8, %xmm0
+_return_T_\@:
+ mov \AUTHTAG, %r10 # %r10 = authTag
+ mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len
+ cmp $16, %r11
+ je _T_16_\@
+ cmp $8, %r11
+ jl _T_4_\@
+_T_8_\@:
+ MOVQ_R64_XMM %xmm0, %rax
+ mov %rax, (%r10)
+ add $8, %r10
+ sub $8, %r11
+ psrldq $8, %xmm0
+ cmp $0, %r11
+ je _return_T_done_\@
+_T_4_\@:
+ movd %xmm0, %eax
+ mov %eax, (%r10)
+ add $4, %r10
+ sub $4, %r11
+ psrldq $4, %xmm0
+ cmp $0, %r11
+ je _return_T_done_\@
+_T_123_\@:
+ movd %xmm0, %eax
+ cmp $2, %r11
+ jl _T_1_\@
+ mov %ax, (%r10)
+ cmp $2, %r11
+ je _return_T_done_\@
+ add $2, %r10
+ sar $16, %eax
+_T_1_\@:
+ mov %al, (%r10)
+ jmp _return_T_done_\@
+_T_16_\@:
+ movdqu %xmm0, (%r10)
+_return_T_done_\@:
+.endm
#ifdef __x86_64__
/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
@@ -264,232 +598,188 @@ _read_next_byte_lt8_\@:
_done_read_partial_block_\@:
.endm
-/*
-* if a = number of total plaintext bytes
-* b = floor(a/16)
-* num_initial_blocks = b mod 4
-* encrypt the initial num_initial_blocks blocks and apply ghash on
-* the ciphertext
-* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
-* are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
-*/
-
-
-.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
- MOVADQ SHUF_MASK(%rip), %xmm14
- mov arg7, %r10 # %r10 = AAD
- mov arg8, %r11 # %r11 = aadLen
- pxor %xmm\i, %xmm\i
- pxor \XMM2, \XMM2
+# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
+# clobbers r10-11, xmm14
+.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
+ TMP6 TMP7
+ MOVADQ SHUF_MASK(%rip), %xmm14
+ mov \AAD, %r10 # %r10 = AAD
+ mov \AADLEN, %r11 # %r11 = aadLen
+ pxor \TMP7, \TMP7
+ pxor \TMP6, \TMP6
cmp $16, %r11
- jl _get_AAD_rest\num_initial_blocks\operation
-_get_AAD_blocks\num_initial_blocks\operation:
- movdqu (%r10), %xmm\i
- PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
- pxor %xmm\i, \XMM2
- GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ jl _get_AAD_rest\@
+_get_AAD_blocks\@:
+ movdqu (%r10), \TMP7
+ PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data
+ pxor \TMP7, \TMP6
+ GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
add $16, %r10
sub $16, %r11
cmp $16, %r11
- jge _get_AAD_blocks\num_initial_blocks\operation
+ jge _get_AAD_blocks\@
- movdqu \XMM2, %xmm\i
+ movdqu \TMP6, \TMP7
/* read the last <16B of AAD */
-_get_AAD_rest\num_initial_blocks\operation:
+_get_AAD_rest\@:
cmp $0, %r11
- je _get_AAD_done\num_initial_blocks\operation
-
- READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
- PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
- pxor \XMM2, %xmm\i
- GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+ je _get_AAD_done\@
-_get_AAD_done\num_initial_blocks\operation:
- xor %r11, %r11 # initialise the data pointer offset as zero
- # start AES for num_initial_blocks blocks
-
- mov %arg5, %rax # %rax = *Y0
- movdqu (%rax), \XMM0 # XMM0 = Y0
- PSHUFB_XMM %xmm14, \XMM0
+ READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
+ PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data
+ pxor \TMP6, \TMP7
+ GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
+ movdqu \TMP7, \TMP6
-.if (\i == 5) || (\i == 6) || (\i == 7)
- MOVADQ ONE(%RIP),\TMP1
- MOVADQ (%arg1),\TMP2
-.irpc index, \i_seq
- paddd \TMP1, \XMM0 # INCR Y0
- movdqa \XMM0, %xmm\index
- PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
- pxor \TMP2, %xmm\index
-.endr
- lea 0x10(%arg1),%r10
- mov keysize,%eax
- shr $2,%eax # 128->4, 192->6, 256->8
- add $5,%eax # 128->9, 192->11, 256->13
-
-aes_loop_initial_dec\num_initial_blocks:
- MOVADQ (%r10),\TMP1
-.irpc index, \i_seq
- AESENC \TMP1, %xmm\index
-.endr
- add $16,%r10
- sub $1,%eax
- jnz aes_loop_initial_dec\num_initial_blocks
-
- MOVADQ (%r10), \TMP1
-.irpc index, \i_seq
- AESENCLAST \TMP1, %xmm\index # Last Round
-.endr
-.irpc index, \i_seq
- movdqu (%arg3 , %r11, 1), \TMP1
- pxor \TMP1, %xmm\index
- movdqu %xmm\index, (%arg2 , %r11, 1)
- # write back plaintext/ciphertext for num_initial_blocks
- add $16, %r11
-
- movdqa \TMP1, %xmm\index
- PSHUFB_XMM %xmm14, %xmm\index
- # prepare plaintext/ciphertext for GHASH computation
-.endr
-.endif
-
- # apply GHASH on num_initial_blocks blocks
+_get_AAD_done\@:
+ movdqu \TMP6, AadHash(%arg2)
+.endm
-.if \i == 5
- pxor %xmm5, %xmm6
- GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
- pxor %xmm6, %xmm7
- GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
- pxor %xmm7, %xmm8
- GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 6
- pxor %xmm6, %xmm7
- GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
- pxor %xmm7, %xmm8
- GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-.elseif \i == 7
- pxor %xmm7, %xmm8
- GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
+# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
+# between update calls.
+# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
+# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
+# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
+.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+ AAD_HASH operation
+ mov PBlockLen(%arg2), %r13
+ cmp $0, %r13
+ je _partial_block_done_\@ # Leave Macro if no partial blocks
+ # Read in input data without over reading
+ cmp $16, \PLAIN_CYPH_LEN
+ jl _fewer_than_16_bytes_\@
+ movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm
+ jmp _data_read_\@
+
+_fewer_than_16_bytes_\@:
+ lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
+ mov \PLAIN_CYPH_LEN, %r12
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
+
+ mov PBlockLen(%arg2), %r13
+
+_data_read_\@: # Finished reading in data
+
+ movdqu PBlockEncKey(%arg2), %xmm9
+ movdqu HashKey(%arg2), %xmm13
+
+ lea SHIFT_MASK(%rip), %r12
+
+ # adjust the shuffle mask pointer to be able to shift r13 bytes
+ # r16-r13 is the number of bytes in plaintext mod 16)
+ add %r13, %r12
+ movdqu (%r12), %xmm2 # get the appropriate shuffle mask
+ PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes
+
+.ifc \operation, dec
+ movdqa %xmm1, %xmm3
+ pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn)
+
+ mov \PLAIN_CYPH_LEN, %r10
+ add %r13, %r10
+ # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+ sub $16, %r10
+ # Determine if if partial block is not being filled and
+ # shift mask accordingly
+ jge _no_extra_mask_1_\@
+ sub %r10, %r12
+_no_extra_mask_1_\@:
+
+ movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+ # get the appropriate mask to mask out bottom r13 bytes of xmm9
+ pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9
+
+ pand %xmm1, %xmm3
+ movdqa SHUF_MASK(%rip), %xmm10
+ PSHUFB_XMM %xmm10, %xmm3
+ PSHUFB_XMM %xmm2, %xmm3
+ pxor %xmm3, \AAD_HASH
+
+ cmp $0, %r10
+ jl _partial_incomplete_1_\@
+
+ # GHASH computation for the last <16 Byte block
+ GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+ xor %rax,%rax
+
+ mov %rax, PBlockLen(%arg2)
+ jmp _dec_done_\@
+_partial_incomplete_1_\@:
+ add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_dec_done_\@:
+ movdqu \AAD_HASH, AadHash(%arg2)
+.else
+ pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn)
+
+ mov \PLAIN_CYPH_LEN, %r10
+ add %r13, %r10
+ # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+ sub $16, %r10
+ # Determine if if partial block is not being filled and
+ # shift mask accordingly
+ jge _no_extra_mask_2_\@
+ sub %r10, %r12
+_no_extra_mask_2_\@:
+
+ movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+ # get the appropriate mask to mask out bottom r13 bytes of xmm9
+ pand %xmm1, %xmm9
+
+ movdqa SHUF_MASK(%rip), %xmm1
+ PSHUFB_XMM %xmm1, %xmm9
+ PSHUFB_XMM %xmm2, %xmm9
+ pxor %xmm9, \AAD_HASH
+
+ cmp $0, %r10
+ jl _partial_incomplete_2_\@
+
+ # GHASH computation for the last <16 Byte block
+ GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+ xor %rax,%rax
+
+ mov %rax, PBlockLen(%arg2)
+ jmp _encode_done_\@
+_partial_incomplete_2_\@:
+ add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
+_encode_done_\@:
+ movdqu \AAD_HASH, AadHash(%arg2)
+
+ movdqa SHUF_MASK(%rip), %xmm10
+ # shuffle xmm9 back to output as ciphertext
+ PSHUFB_XMM %xmm10, %xmm9
+ PSHUFB_XMM %xmm2, %xmm9
.endif
- cmp $64, %r13
- jl _initial_blocks_done\num_initial_blocks\operation
- # no need for precomputed values
-/*
-*
-* Precomputations for HashKey parallel with encryption of first 4 blocks.
-* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
-*/
- MOVADQ ONE(%rip), \TMP1
- paddd \TMP1, \XMM0 # INCR Y0
- MOVADQ \XMM0, \XMM1
- PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
-
- paddd \TMP1, \XMM0 # INCR Y0
- MOVADQ \XMM0, \XMM2
- PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
-
- paddd \TMP1, \XMM0 # INCR Y0
- MOVADQ \XMM0, \XMM3
- PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
-
- paddd \TMP1, \XMM0 # INCR Y0
- MOVADQ \XMM0, \XMM4
- PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
-
- MOVADQ 0(%arg1),\TMP1
- pxor \TMP1, \XMM1
- pxor \TMP1, \XMM2
- pxor \TMP1, \XMM3
- pxor \TMP1, \XMM4
- movdqa \TMP3, \TMP5
- pshufd $78, \TMP3, \TMP1
- pxor \TMP3, \TMP1
- movdqa \TMP1, HashKey_k(%rsp)
- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
- movdqa \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
- pshufd $78, \TMP5, \TMP1
- pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_2_k(%rsp)
-.irpc index, 1234 # do 4 rounds
- movaps 0x10*\index(%arg1), \TMP1
- AESENC \TMP1, \XMM1
- AESENC \TMP1, \XMM2
- AESENC \TMP1, \XMM3
- AESENC \TMP1, \XMM4
-.endr
- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
- movdqa \TMP5, HashKey_3(%rsp)
- pshufd $78, \TMP5, \TMP1
- pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_3_k(%rsp)
-.irpc index, 56789 # do next 5 rounds
- movaps 0x10*\index(%arg1), \TMP1
- AESENC \TMP1, \XMM1
- AESENC \TMP1, \XMM2
- AESENC \TMP1, \XMM3
- AESENC \TMP1, \XMM4
-.endr
- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
- movdqa \TMP5, HashKey_4(%rsp)
- pshufd $78, \TMP5, \TMP1
- pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_4_k(%rsp)
- lea 0xa0(%arg1),%r10
- mov keysize,%eax
- shr $2,%eax # 128->4, 192->6, 256->8
- sub $4,%eax # 128->0, 192->2, 256->4
- jz aes_loop_pre_dec_done\num_initial_blocks
-
-aes_loop_pre_dec\num_initial_blocks:
- MOVADQ (%r10),\TMP2
-.irpc index, 1234
- AESENC \TMP2, %xmm\index
-.endr
- add $16,%r10
- sub $1,%eax
- jnz aes_loop_pre_dec\num_initial_blocks
-
-aes_loop_pre_dec_done\num_initial_blocks:
- MOVADQ (%r10), \TMP2
- AESENCLAST \TMP2, \XMM1
- AESENCLAST \TMP2, \XMM2
- AESENCLAST \TMP2, \XMM3
- AESENCLAST \TMP2, \XMM4
- movdqu 16*0(%arg3 , %r11 , 1), \TMP1
- pxor \TMP1, \XMM1
- movdqu \XMM1, 16*0(%arg2 , %r11 , 1)
- movdqa \TMP1, \XMM1
- movdqu 16*1(%arg3 , %r11 , 1), \TMP1
- pxor \TMP1, \XMM2
- movdqu \XMM2, 16*1(%arg2 , %r11 , 1)
- movdqa \TMP1, \XMM2
- movdqu 16*2(%arg3 , %r11 , 1), \TMP1
- pxor \TMP1, \XMM3
- movdqu \XMM3, 16*2(%arg2 , %r11 , 1)
- movdqa \TMP1, \XMM3
- movdqu 16*3(%arg3 , %r11 , 1), \TMP1
- pxor \TMP1, \XMM4
- movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
- movdqa \TMP1, \XMM4
- add $64, %r11
- PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
- pxor \XMMDst, \XMM1
-# combine GHASHed value with the corresponding ciphertext
- PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
- PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
- PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
-
-_initial_blocks_done\num_initial_blocks\operation:
-
-.endm
+ # output encrypted Bytes
+ cmp $0, %r10
+ jl _partial_fill_\@
+ mov %r13, %r12
+ mov $16, %r13
+ # Set r13 to be the number of bytes to write out
+ sub %r12, %r13
+ jmp _count_set_\@
+_partial_fill_\@:
+ mov \PLAIN_CYPH_LEN, %r13
+_count_set_\@:
+ movdqa %xmm9, %xmm0
+ MOVQ_R64_XMM %xmm0, %rax
+ cmp $8, %r13
+ jle _less_than_8_bytes_left_\@
+ mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+ add $8, \DATA_OFFSET
+ psrldq $8, %xmm0
+ MOVQ_R64_XMM %xmm0, %rax
+ sub $8, %r13
+_less_than_8_bytes_left_\@:
+ movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+ add $1, \DATA_OFFSET
+ shr $8, %rax
+ sub $1, %r13
+ jne _less_than_8_bytes_left_\@
+_partial_block_done_\@:
+.endm # PARTIAL_BLOCK
/*
* if a = number of total plaintext bytes
@@ -499,49 +789,19 @@ _initial_blocks_done\num_initial_blocks\operation:
* the ciphertext
* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
* are clobbered
-* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified
+* arg1, %arg2, %arg3 are used as a pointer only, not modified
*/
-.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
-XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
- MOVADQ SHUF_MASK(%rip), %xmm14
- mov arg7, %r10 # %r10 = AAD
- mov arg8, %r11 # %r11 = aadLen
- pxor %xmm\i, %xmm\i
- pxor \XMM2, \XMM2
+.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
+ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
+ MOVADQ SHUF_MASK(%rip), %xmm14
- cmp $16, %r11
- jl _get_AAD_rest\num_initial_blocks\operation
-_get_AAD_blocks\num_initial_blocks\operation:
- movdqu (%r10), %xmm\i
- PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
- pxor %xmm\i, \XMM2
- GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
- add $16, %r10
- sub $16, %r11
- cmp $16, %r11
- jge _get_AAD_blocks\num_initial_blocks\operation
+ movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0
- movdqu \XMM2, %xmm\i
-
- /* read the last <16B of AAD */
-_get_AAD_rest\num_initial_blocks\operation:
- cmp $0, %r11
- je _get_AAD_done\num_initial_blocks\operation
-
- READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i
- PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
- pxor \XMM2, %xmm\i
- GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
-
-_get_AAD_done\num_initial_blocks\operation:
- xor %r11, %r11 # initialise the data pointer offset as zero
# start AES for num_initial_blocks blocks
- mov %arg5, %rax # %rax = *Y0
- movdqu (%rax), \XMM0 # XMM0 = Y0
- PSHUFB_XMM %xmm14, \XMM0
+ movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0
.if (\i == 5) || (\i == 6) || (\i == 7)
@@ -549,7 +809,11 @@ _get_AAD_done\num_initial_blocks\operation:
MOVADQ 0(%arg1),\TMP2
.irpc index, \i_seq
paddd \TMP1, \XMM0 # INCR Y0
+.ifc \operation, dec
+ movdqa \XMM0, %xmm\index
+.else
MOVADQ \XMM0, %xmm\index
+.endif
PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
pxor \TMP2, %xmm\index
.endr
@@ -558,25 +822,29 @@ _get_AAD_done\num_initial_blocks\operation:
shr $2,%eax # 128->4, 192->6, 256->8
add $5,%eax # 128->9, 192->11, 256->13
-aes_loop_initial_enc\num_initial_blocks:
+aes_loop_initial_\@:
MOVADQ (%r10),\TMP1
.irpc index, \i_seq
AESENC \TMP1, %xmm\index
.endr
add $16,%r10
sub $1,%eax
- jnz aes_loop_initial_enc\num_initial_blocks
+ jnz aes_loop_initial_\@
MOVADQ (%r10), \TMP1
.irpc index, \i_seq
AESENCLAST \TMP1, %xmm\index # Last Round
.endr
.irpc index, \i_seq
- movdqu (%arg3 , %r11, 1), \TMP1
+ movdqu (%arg4 , %r11, 1), \TMP1
pxor \TMP1, %xmm\index
- movdqu %xmm\index, (%arg2 , %r11, 1)
+ movdqu %xmm\index, (%arg3 , %r11, 1)
# write back plaintext/ciphertext for num_initial_blocks
add $16, %r11
+
+.ifc \operation, dec
+ movdqa \TMP1, %xmm\index
+.endif
PSHUFB_XMM %xmm14, %xmm\index
# prepare plaintext/ciphertext for GHASH computation
@@ -602,7 +870,7 @@ aes_loop_initial_enc\num_initial_blocks:
GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
.endif
cmp $64, %r13
- jl _initial_blocks_done\num_initial_blocks\operation
+ jl _initial_blocks_done\@
# no need for precomputed values
/*
*
@@ -631,17 +899,6 @@ aes_loop_initial_enc\num_initial_blocks:
pxor \TMP1, \XMM2
pxor \TMP1, \XMM3
pxor \TMP1, \XMM4
- movdqa \TMP3, \TMP5
- pshufd $78, \TMP3, \TMP1
- pxor \TMP3, \TMP1
- movdqa \TMP1, HashKey_k(%rsp)
- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^2<<1 (mod poly)
- movdqa \TMP5, HashKey_2(%rsp)
-# HashKey_2 = HashKey^2<<1 (mod poly)
- pshufd $78, \TMP5, \TMP1
- pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_2_k(%rsp)
.irpc index, 1234 # do 4 rounds
movaps 0x10*\index(%arg1), \TMP1
AESENC \TMP1, \XMM1
@@ -649,12 +906,6 @@ aes_loop_initial_enc\num_initial_blocks:
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
.endr
- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
- movdqa \TMP5, HashKey_3(%rsp)
- pshufd $78, \TMP5, \TMP1
- pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_3_k(%rsp)
.irpc index, 56789 # do next 5 rounds
movaps 0x10*\index(%arg1), \TMP1
AESENC \TMP1, \XMM1
@@ -662,45 +913,56 @@ aes_loop_initial_enc\num_initial_blocks:
AESENC \TMP1, \XMM3
AESENC \TMP1, \XMM4
.endr
- GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
-# TMP5 = HashKey^3<<1 (mod poly)
- movdqa \TMP5, HashKey_4(%rsp)
- pshufd $78, \TMP5, \TMP1
- pxor \TMP5, \TMP1
- movdqa \TMP1, HashKey_4_k(%rsp)
lea 0xa0(%arg1),%r10
mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8
sub $4,%eax # 128->0, 192->2, 256->4
- jz aes_loop_pre_enc_done\num_initial_blocks
+ jz aes_loop_pre_done\@
-aes_loop_pre_enc\num_initial_blocks:
+aes_loop_pre_\@:
MOVADQ (%r10),\TMP2
.irpc index, 1234
AESENC \TMP2, %xmm\index
.endr
add $16,%r10
sub $1,%eax
- jnz aes_loop_pre_enc\num_initial_blocks
+ jnz aes_loop_pre_\@
-aes_loop_pre_enc_done\num_initial_blocks:
+aes_loop_pre_done\@:
MOVADQ (%r10), \TMP2
AESENCLAST \TMP2, \XMM1
AESENCLAST \TMP2, \XMM2
AESENCLAST \TMP2, \XMM3
AESENCLAST \TMP2, \XMM4
- movdqu 16*0(%arg3 , %r11 , 1), \TMP1
+ movdqu 16*0(%arg4 , %r11 , 1), \TMP1
pxor \TMP1, \XMM1
- movdqu 16*1(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+ movdqu \XMM1, 16*0(%arg3 , %r11 , 1)
+ movdqa \TMP1, \XMM1
+.endif
+ movdqu 16*1(%arg4 , %r11 , 1), \TMP1
pxor \TMP1, \XMM2
- movdqu 16*2(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+ movdqu \XMM2, 16*1(%arg3 , %r11 , 1)
+ movdqa \TMP1, \XMM2
+.endif
+ movdqu 16*2(%arg4 , %r11 , 1), \TMP1
pxor \TMP1, \XMM3
- movdqu 16*3(%arg3 , %r11 , 1), \TMP1
+.ifc \operation, dec
+ movdqu \XMM3, 16*2(%arg3 , %r11 , 1)
+ movdqa \TMP1, \XMM3
+.endif
+ movdqu 16*3(%arg4 , %r11 , 1), \TMP1
pxor \TMP1, \XMM4
- movdqu \XMM1, 16*0(%arg2 , %r11 , 1)
- movdqu \XMM2, 16*1(%arg2 , %r11 , 1)
- movdqu \XMM3, 16*2(%arg2 , %r11 , 1)
- movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
+.ifc \operation, dec
+ movdqu \XMM4, 16*3(%arg3 , %r11 , 1)
+ movdqa \TMP1, \XMM4
+.else
+ movdqu \XMM1, 16*0(%arg3 , %r11 , 1)
+ movdqu \XMM2, 16*1(%arg3 , %r11 , 1)
+ movdqu \XMM3, 16*2(%arg3 , %r11 , 1)
+ movdqu \XMM4, 16*3(%arg3 , %r11 , 1)
+.endif
add $64, %r11
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
@@ -710,14 +972,14 @@ aes_loop_pre_enc_done\num_initial_blocks:
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
-_initial_blocks_done\num_initial_blocks\operation:
+_initial_blocks_done\@:
.endm
/*
* encrypt 4 blocks at a time
* ghash the 4 previously encrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
+* arg1, %arg3, %arg4 are used as pointers only, not modified
* %r11 is the data offset value
*/
.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -735,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM5, \TMP6
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
- movdqa HashKey_4(%rsp), \TMP5
+ movdqa HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
@@ -754,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
- movdqa HashKey_4_k(%rsp), \TMP5
+ movdqa HashKey_4_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1
@@ -769,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
- movdqa HashKey_3(%rsp), \TMP5
+ movdqa HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3
@@ -782,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_3_k(%rsp), \TMP5
+ movdqa HashKey_3_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5
@@ -796,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM7, \TMP1
pshufd $78, \XMM7, \TMP2
pxor \XMM7, \TMP2
- movdqa HashKey_2(%rsp ), \TMP5
+ movdqa HashKey_2(%arg2), \TMP5
# Multiply TMP5 * HashKey using karatsuba
@@ -812,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_2_k(%rsp), \TMP5
+ movdqa HashKey_2_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8
@@ -830,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM8, \TMP1
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
- movdqa HashKey(%rsp), \TMP5
+ movdqa HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9
@@ -842,37 +1104,37 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8
sub $4,%eax # 128->0, 192->2, 256->4
- jz aes_loop_par_enc_done
+ jz aes_loop_par_enc_done\@
-aes_loop_par_enc:
+aes_loop_par_enc\@:
MOVADQ (%r10),\TMP3
.irpc index, 1234
AESENC \TMP3, %xmm\index
.endr
add $16,%r10
sub $1,%eax
- jnz aes_loop_par_enc
+ jnz aes_loop_par_enc\@
-aes_loop_par_enc_done:
+aes_loop_par_enc_done\@:
MOVADQ (%r10), \TMP3
AESENCLAST \TMP3, \XMM1 # Round 10
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
- movdqa HashKey_k(%rsp), \TMP5
+ movdqa HashKey_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
- movdqu (%arg3,%r11,1), \TMP3
+ movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
- movdqu 16(%arg3,%r11,1), \TMP3
+ movdqu 16(%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
- movdqu 32(%arg3,%r11,1), \TMP3
+ movdqu 32(%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
- movdqu 48(%arg3,%r11,1), \TMP3
+ movdqu 48(%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
- movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer
- movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer
- movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer
- movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer
+ movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer
+ movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer
+ movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer
+ movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap
@@ -925,7 +1187,7 @@ aes_loop_par_enc_done:
/*
* decrypt 4 blocks at a time
* ghash the 4 previously decrypted ciphertext blocks
-* arg1, %arg2, %arg3 are used as pointers only, not modified
+* arg1, %arg3, %arg4 are used as pointers only, not modified
* %r11 is the data offset value
*/
.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \
@@ -943,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pshufd $78, \XMM5, \TMP6
pxor \XMM5, \TMP6
paddd ONE(%rip), \XMM0 # INCR CNT
- movdqa HashKey_4(%rsp), \TMP5
+ movdqa HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1
movdqa \XMM0, \XMM1
paddd ONE(%rip), \XMM0 # INCR CNT
@@ -962,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
pxor (%arg1), \XMM2
pxor (%arg1), \XMM3
pxor (%arg1), \XMM4
- movdqa HashKey_4_k(%rsp), \TMP5
+ movdqa HashKey_4_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
movaps 0x10(%arg1), \TMP1
AESENC \TMP1, \XMM1 # Round 1
@@ -977,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM6, \TMP1
pshufd $78, \XMM6, \TMP2
pxor \XMM6, \TMP2
- movdqa HashKey_3(%rsp), \TMP5
+ movdqa HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
movaps 0x30(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 3
@@ -990,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_3_k(%rsp), \TMP5
+ movdqa HashKey_3_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x50(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 5
@@ -1004,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM7, \TMP1
pshufd $78, \XMM7, \TMP2
pxor \XMM7, \TMP2
- movdqa HashKey_2(%rsp ), \TMP5
+ movdqa HashKey_2(%arg2), \TMP5
# Multiply TMP5 * HashKey using karatsuba
@@ -1020,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM2
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
- movdqa HashKey_2_k(%rsp), \TMP5
+ movdqa HashKey_2_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movaps 0x80(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 8
@@ -1038,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
movdqa \XMM8, \TMP1
pshufd $78, \XMM8, \TMP2
pxor \XMM8, \TMP2
- movdqa HashKey(%rsp), \TMP5
+ movdqa HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
movaps 0x90(%arg1), \TMP3
AESENC \TMP3, \XMM1 # Round 9
@@ -1050,40 +1312,40 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
mov keysize,%eax
shr $2,%eax # 128->4, 192->6, 256->8
sub $4,%eax # 128->0, 192->2, 256->4
- jz aes_loop_par_dec_done
+ jz aes_loop_par_dec_done\@
-aes_loop_par_dec:
+aes_loop_par_dec\@:
MOVADQ (%r10),\TMP3
.irpc index, 1234
AESENC \TMP3, %xmm\index
.endr
add $16,%r10
sub $1,%eax
- jnz aes_loop_par_dec
+ jnz aes_loop_par_dec\@
-aes_loop_par_dec_done:
+aes_loop_par_dec_done\@:
MOVADQ (%r10), \TMP3
AESENCLAST \TMP3, \XMM1 # last round
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
- movdqa HashKey_k(%rsp), \TMP5
+ movdqa HashKey_k(%arg2), \TMP5
PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
- movdqu (%arg3,%r11,1), \TMP3
+ movdqu (%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
- movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer
+ movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM1
- movdqu 16(%arg3,%r11,1), \TMP3
+ movdqu 16(%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
- movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer
+ movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM2
- movdqu 32(%arg3,%r11,1), \TMP3
+ movdqu 32(%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
- movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer
+ movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM3
- movdqu 48(%arg3,%r11,1), \TMP3
+ movdqu 48(%arg4,%r11,1), \TMP3
pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
- movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer
+ movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer
movdqa \TMP3, \XMM4
PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap
PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap
@@ -1143,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM1, \TMP6
pshufd $78, \XMM1, \TMP2
pxor \XMM1, \TMP2
- movdqa HashKey_4(%rsp), \TMP5
+ movdqa HashKey_4(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0
- movdqa HashKey_4_k(%rsp), \TMP4
+ movdqa HashKey_4_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
movdqa \XMM1, \XMMDst
movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
@@ -1156,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM2, \TMP1
pshufd $78, \XMM2, \TMP2
pxor \XMM2, \TMP2
- movdqa HashKey_3(%rsp), \TMP5
+ movdqa HashKey_3(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0
- movdqa HashKey_3_k(%rsp), \TMP4
+ movdqa HashKey_3_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM2, \XMMDst
@@ -1171,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM3, \TMP1
pshufd $78, \XMM3, \TMP2
pxor \XMM3, \TMP2
- movdqa HashKey_2(%rsp), \TMP5
+ movdqa HashKey_2(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0
- movdqa HashKey_2_k(%rsp), \TMP4
+ movdqa HashKey_2_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM3, \XMMDst
@@ -1184,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
movdqa \XMM4, \TMP1
pshufd $78, \XMM4, \TMP2
pxor \XMM4, \TMP2
- movdqa HashKey(%rsp), \TMP5
+ movdqa HashKey(%arg2), \TMP5
PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1
PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0
- movdqa HashKey_k(%rsp), \TMP4
+ movdqa HashKey_k(%arg2), \TMP4
PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
pxor \TMP1, \TMP6
pxor \XMM4, \XMMDst
@@ -1256,6 +1518,8 @@ _esb_loop_\@:
.endm
/*****************************************************************************
* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
+* struct gcm_context_data *data
+* // Context data
* u8 *out, // Plaintext output. Encrypt in-place is allowed.
* const u8 *in, // Ciphertext input
* u64 plaintext_len, // Length of data in bytes for decryption.
@@ -1333,195 +1597,20 @@ _esb_loop_\@:
*
*****************************************************************************/
ENTRY(aesni_gcm_dec)
- push %r12
- push %r13
- push %r14
- mov %rsp, %r14
-/*
-* states of %xmm registers %xmm6:%xmm15 not saved
-* all %xmm registers are clobbered
-*/
- sub $VARIABLE_OFFSET, %rsp
- and $~63, %rsp # align rsp to 64 bytes
- mov %arg6, %r12
- movdqu (%r12), %xmm13 # %xmm13 = HashKey
- movdqa SHUF_MASK(%rip), %xmm2
- PSHUFB_XMM %xmm2, %xmm13
-
-
-# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH)
-
- movdqa %xmm13, %xmm2
- psllq $1, %xmm13
- psrlq $63, %xmm2
- movdqa %xmm2, %xmm1
- pslldq $8, %xmm2
- psrldq $8, %xmm1
- por %xmm2, %xmm13
-
- # Reduction
-
- pshufd $0x24, %xmm1, %xmm2
- pcmpeqd TWOONE(%rip), %xmm2
- pand POLY(%rip), %xmm2
- pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly)
-
-
- # Decrypt first few blocks
-
- movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly)
- mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext
- and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
- mov %r13, %r12
- and $(3<<4), %r12
- jz _initial_num_blocks_is_0_decrypt
- cmp $(2<<4), %r12
- jb _initial_num_blocks_is_1_decrypt
- je _initial_num_blocks_is_2_decrypt
-_initial_num_blocks_is_3_decrypt:
- INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec
- sub $48, %r13
- jmp _initial_blocks_decrypted
-_initial_num_blocks_is_2_decrypt:
- INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec
- sub $32, %r13
- jmp _initial_blocks_decrypted
-_initial_num_blocks_is_1_decrypt:
- INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec
- sub $16, %r13
- jmp _initial_blocks_decrypted
-_initial_num_blocks_is_0_decrypt:
- INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec
-_initial_blocks_decrypted:
- cmp $0, %r13
- je _zero_cipher_left_decrypt
- sub $64, %r13
- je _four_cipher_left_decrypt
-_decrypt_by_4:
- GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec
- add $64, %r11
- sub $64, %r13
- jne _decrypt_by_4
-_four_cipher_left_decrypt:
- GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_decrypt:
- mov %arg4, %r13
- and $15, %r13 # %r13 = arg4 (mod 16)
- je _multiple_of_16_bytes_decrypt
-
- # Handle the last <16 byte block separately
-
- paddd ONE(%rip), %xmm0 # increment CNT to get Yn
- movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm0
-
- ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn)
-
- lea (%arg3,%r11,1), %r10
- mov %r13, %r12
- READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
- lea ALL_F+16(%rip), %r12
- sub %r13, %r12
- movdqa %xmm1, %xmm2
- pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn)
- movdqu (%r12), %xmm1
- # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
- pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0
- pand %xmm1, %xmm2
- movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10 ,%xmm2
-
- pxor %xmm2, %xmm8
- GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-
- # output %r13 bytes
- MOVQ_R64_XMM %xmm0, %rax
- cmp $8, %r13
- jle _less_than_8_bytes_left_decrypt
- mov %rax, (%arg2 , %r11, 1)
- add $8, %r11
- psrldq $8, %xmm0
- MOVQ_R64_XMM %xmm0, %rax
- sub $8, %r13
-_less_than_8_bytes_left_decrypt:
- mov %al, (%arg2, %r11, 1)
- add $1, %r11
- shr $8, %rax
- sub $1, %r13
- jne _less_than_8_bytes_left_decrypt
-_multiple_of_16_bytes_decrypt:
- mov arg8, %r12 # %r13 = aadLen (number of bytes)
- shl $3, %r12 # convert into number of bits
- movd %r12d, %xmm15 # len(A) in %xmm15
- shl $3, %arg4 # len(C) in bits (*128)
- MOVQ_R64_XMM %arg4, %xmm1
- pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
- pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
- pxor %xmm15, %xmm8
- GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
- # final GHASH computation
- movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm8
+ FUNC_SAVE
- mov %arg5, %rax # %rax = *Y0
- movdqu (%rax), %xmm0 # %xmm0 = Y0
- ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
- pxor %xmm8, %xmm0
-_return_T_decrypt:
- mov arg9, %r10 # %r10 = authTag
- mov arg10, %r11 # %r11 = auth_tag_len
- cmp $16, %r11
- je _T_16_decrypt
- cmp $8, %r11
- jl _T_4_decrypt
-_T_8_decrypt:
- MOVQ_R64_XMM %xmm0, %rax
- mov %rax, (%r10)
- add $8, %r10
- sub $8, %r11
- psrldq $8, %xmm0
- cmp $0, %r11
- je _return_T_done_decrypt
-_T_4_decrypt:
- movd %xmm0, %eax
- mov %eax, (%r10)
- add $4, %r10
- sub $4, %r11
- psrldq $4, %xmm0
- cmp $0, %r11
- je _return_T_done_decrypt
-_T_123_decrypt:
- movd %xmm0, %eax
- cmp $2, %r11
- jl _T_1_decrypt
- mov %ax, (%r10)
- cmp $2, %r11
- je _return_T_done_decrypt
- add $2, %r10
- sar $16, %eax
-_T_1_decrypt:
- mov %al, (%r10)
- jmp _return_T_done_decrypt
-_T_16_decrypt:
- movdqu %xmm0, (%r10)
-_return_T_done_decrypt:
- mov %r14, %rsp
- pop %r14
- pop %r13
- pop %r12
+ GCM_INIT %arg6, arg7, arg8, arg9
+ GCM_ENC_DEC dec
+ GCM_COMPLETE arg10, arg11
+ FUNC_RESTORE
ret
ENDPROC(aesni_gcm_dec)
/*****************************************************************************
* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
+* struct gcm_context_data *data
+* // Context data
* u8 *out, // Ciphertext output. Encrypt in-place is allowed.
* const u8 *in, // Plaintext input
* u64 plaintext_len, // Length of data in bytes for encryption.
@@ -1596,195 +1685,78 @@ ENDPROC(aesni_gcm_dec)
* poly = x^128 + x^127 + x^126 + x^121 + 1
***************************************************************************/
ENTRY(aesni_gcm_enc)
- push %r12
- push %r13
- push %r14
- mov %rsp, %r14
-#
-# states of %xmm registers %xmm6:%xmm15 not saved
-# all %xmm registers are clobbered
-#
- sub $VARIABLE_OFFSET, %rsp
- and $~63, %rsp
- mov %arg6, %r12
- movdqu (%r12), %xmm13
- movdqa SHUF_MASK(%rip), %xmm2
- PSHUFB_XMM %xmm2, %xmm13
-
-
-# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
-
- movdqa %xmm13, %xmm2
- psllq $1, %xmm13
- psrlq $63, %xmm2
- movdqa %xmm2, %xmm1
- pslldq $8, %xmm2
- psrldq $8, %xmm1
- por %xmm2, %xmm13
-
- # reduce HashKey<<1
-
- pshufd $0x24, %xmm1, %xmm2
- pcmpeqd TWOONE(%rip), %xmm2
- pand POLY(%rip), %xmm2
- pxor %xmm2, %xmm13
- movdqa %xmm13, HashKey(%rsp)
- mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly)
- and $-16, %r13
- mov %r13, %r12
+ FUNC_SAVE
- # Encrypt first few blocks
+ GCM_INIT %arg6, arg7, arg8, arg9
+ GCM_ENC_DEC enc
- and $(3<<4), %r12
- jz _initial_num_blocks_is_0_encrypt
- cmp $(2<<4), %r12
- jb _initial_num_blocks_is_1_encrypt
- je _initial_num_blocks_is_2_encrypt
-_initial_num_blocks_is_3_encrypt:
- INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc
- sub $48, %r13
- jmp _initial_blocks_encrypted
-_initial_num_blocks_is_2_encrypt:
- INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc
- sub $32, %r13
- jmp _initial_blocks_encrypted
-_initial_num_blocks_is_1_encrypt:
- INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc
- sub $16, %r13
- jmp _initial_blocks_encrypted
-_initial_num_blocks_is_0_encrypt:
- INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc
-_initial_blocks_encrypted:
-
- # Main loop - Encrypt remaining blocks
-
- cmp $0, %r13
- je _zero_cipher_left_encrypt
- sub $64, %r13
- je _four_cipher_left_encrypt
-_encrypt_by_4_encrypt:
- GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc
- add $64, %r11
- sub $64, %r13
- jne _encrypt_by_4_encrypt
-_four_cipher_left_encrypt:
- GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_encrypt:
- mov %arg4, %r13
- and $15, %r13 # %r13 = arg4 (mod 16)
- je _multiple_of_16_bytes_encrypt
-
- # Handle the last <16 Byte block separately
- paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
- movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm0
-
- ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
-
- lea (%arg3,%r11,1), %r10
- mov %r13, %r12
- READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
- lea ALL_F+16(%rip), %r12
- sub %r13, %r12
- pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn)
- movdqu (%r12), %xmm1
- # get the appropriate mask to mask out top 16-r13 bytes of xmm0
- pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0
- movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10,%xmm0
+ GCM_COMPLETE arg10, arg11
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_enc)
- pxor %xmm0, %xmm8
- GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
- # GHASH computation for the last <16 byte block
- movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm0
+/*****************************************************************************
+* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
+* struct gcm_context_data *data,
+* // context data
+* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association)
+* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
+* // concatenated with 0x00000001. 16-byte aligned pointer.
+* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary.
+* const u8 *aad, // Additional Authentication Data (AAD)
+* u64 aad_len) // Length of AAD in bytes.
+*/
+ENTRY(aesni_gcm_init)
+ FUNC_SAVE
+ GCM_INIT %arg3, %arg4,%arg5, %arg6
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_init)
- # shuffle xmm0 back to output as ciphertext
+/*****************************************************************************
+* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
+* struct gcm_context_data *data,
+* // context data
+* u8 *out, // Ciphertext output. Encrypt in-place is allowed.
+* const u8 *in, // Plaintext input
+* u64 plaintext_len, // Length of data in bytes for encryption.
+*/
+ENTRY(aesni_gcm_enc_update)
+ FUNC_SAVE
+ GCM_ENC_DEC enc
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_enc_update)
- # Output %r13 bytes
- MOVQ_R64_XMM %xmm0, %rax
- cmp $8, %r13
- jle _less_than_8_bytes_left_encrypt
- mov %rax, (%arg2 , %r11, 1)
- add $8, %r11
- psrldq $8, %xmm0
- MOVQ_R64_XMM %xmm0, %rax
- sub $8, %r13
-_less_than_8_bytes_left_encrypt:
- mov %al, (%arg2, %r11, 1)
- add $1, %r11
- shr $8, %rax
- sub $1, %r13
- jne _less_than_8_bytes_left_encrypt
-_multiple_of_16_bytes_encrypt:
- mov arg8, %r12 # %r12 = addLen (number of bytes)
- shl $3, %r12
- movd %r12d, %xmm15 # len(A) in %xmm15
- shl $3, %arg4 # len(C) in bits (*128)
- MOVQ_R64_XMM %arg4, %xmm1
- pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
- pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
- pxor %xmm15, %xmm8
- GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
- # final GHASH computation
- movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap
+/*****************************************************************************
+* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
+* struct gcm_context_data *data,
+* // context data
+* u8 *out, // Ciphertext output. Encrypt in-place is allowed.
+* const u8 *in, // Plaintext input
+* u64 plaintext_len, // Length of data in bytes for encryption.
+*/
+ENTRY(aesni_gcm_dec_update)
+ FUNC_SAVE
+ GCM_ENC_DEC dec
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_dec_update)
- mov %arg5, %rax # %rax = *Y0
- movdqu (%rax), %xmm0 # %xmm0 = Y0
- ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0)
- pxor %xmm8, %xmm0
-_return_T_encrypt:
- mov arg9, %r10 # %r10 = authTag
- mov arg10, %r11 # %r11 = auth_tag_len
- cmp $16, %r11
- je _T_16_encrypt
- cmp $8, %r11
- jl _T_4_encrypt
-_T_8_encrypt:
- MOVQ_R64_XMM %xmm0, %rax
- mov %rax, (%r10)
- add $8, %r10
- sub $8, %r11
- psrldq $8, %xmm0
- cmp $0, %r11
- je _return_T_done_encrypt
-_T_4_encrypt:
- movd %xmm0, %eax
- mov %eax, (%r10)
- add $4, %r10
- sub $4, %r11
- psrldq $4, %xmm0
- cmp $0, %r11
- je _return_T_done_encrypt
-_T_123_encrypt:
- movd %xmm0, %eax
- cmp $2, %r11
- jl _T_1_encrypt
- mov %ax, (%r10)
- cmp $2, %r11
- je _return_T_done_encrypt
- add $2, %r10
- sar $16, %eax
-_T_1_encrypt:
- mov %al, (%r10)
- jmp _return_T_done_encrypt
-_T_16_encrypt:
- movdqu %xmm0, (%r10)
-_return_T_done_encrypt:
- mov %r14, %rsp
- pop %r14
- pop %r13
- pop %r12
+/*****************************************************************************
+* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
+* struct gcm_context_data *data,
+* // context data
+* u8 *auth_tag, // Authenticated Tag output.
+* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
+* // 12 or 8.
+*/
+ENTRY(aesni_gcm_finalize)
+ FUNC_SAVE
+ GCM_COMPLETE %arg3 %arg4
+ FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_enc)
+ENDPROC(aesni_gcm_finalize)
#endif
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 34cf1c1..acbe7e8 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -72,6 +72,21 @@ struct aesni_xts_ctx {
u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR;
};
+#define GCM_BLOCK_LEN 16
+
+struct gcm_context_data {
+ /* init, update and finalize context data */
+ u8 aad_hash[GCM_BLOCK_LEN];
+ u64 aad_length;
+ u64 in_length;
+ u8 partial_block_enc_key[GCM_BLOCK_LEN];
+ u8 orig_IV[GCM_BLOCK_LEN];
+ u8 current_counter[GCM_BLOCK_LEN];
+ u64 partial_block_len;
+ u64 unused;
+ u8 hash_keys[GCM_BLOCK_LEN * 8];
+};
+
asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
unsigned int key_len);
asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -105,6 +120,7 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
/* asmlinkage void aesni_gcm_enc()
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
+ * struct gcm_context_data. May be uninitialized.
* u8 *out, Ciphertext output. Encrypt in-place is allowed.
* const u8 *in, Plaintext input
* unsigned long plaintext_len, Length of data in bytes for encryption.
@@ -117,13 +133,15 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out,
* unsigned long auth_tag_len), Authenticated Tag Length in bytes.
* Valid values are 16 (most likely), 12 or 8.
*/
-asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_enc(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
/* asmlinkage void aesni_gcm_dec()
* void *ctx, AES Key schedule. Starts on a 16 byte boundary.
+ * struct gcm_context_data. May be uninitialized.
* u8 *out, Plaintext output. Decrypt in-place is allowed.
* const u8 *in, Ciphertext input
* unsigned long ciphertext_len, Length of data in bytes for decryption.
@@ -137,11 +155,28 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out,
* unsigned long auth_tag_len) Authenticated Tag Length in bytes.
* Valid values are 16 (most likely), 12 or 8.
*/
-asmlinkage void aesni_gcm_dec(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_dec(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
+/* Scatter / Gather routines, with args similar to above */
+asmlinkage void aesni_gcm_init(void *ctx,
+ struct gcm_context_data *gdata,
+ u8 *iv,
+ u8 *hash_subkey, const u8 *aad,
+ unsigned long aad_len);
+asmlinkage void aesni_gcm_enc_update(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
+ const u8 *in, unsigned long plaintext_len);
+asmlinkage void aesni_gcm_dec_update(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
+ const u8 *in,
+ unsigned long ciphertext_len);
+asmlinkage void aesni_gcm_finalize(void *ctx,
+ struct gcm_context_data *gdata,
+ u8 *auth_tag, unsigned long auth_tag_len);
#ifdef CONFIG_AS_AVX
asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
@@ -167,15 +202,17 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
-static void aesni_gcm_enc_avx(void *ctx, u8 *out,
+static void aesni_gcm_enc_avx(void *ctx,
+ struct gcm_context_data *data, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len)
{
struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
- aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
- aad_len, auth_tag, auth_tag_len);
+ aesni_gcm_enc(ctx, data, out, in,
+ plaintext_len, iv, hash_subkey, aad,
+ aad_len, auth_tag, auth_tag_len);
} else {
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
@@ -183,15 +220,17 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out,
}
}
-static void aesni_gcm_dec_avx(void *ctx, u8 *out,
+static void aesni_gcm_dec_avx(void *ctx,
+ struct gcm_context_data *data, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len)
{
struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
- aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
- aad_len, auth_tag, auth_tag_len);
+ aesni_gcm_dec(ctx, data, out, in,
+ ciphertext_len, iv, hash_subkey, aad,
+ aad_len, auth_tag, auth_tag_len);
} else {
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
@@ -218,15 +257,17 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
-static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
+static void aesni_gcm_enc_avx2(void *ctx,
+ struct gcm_context_data *data, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len)
{
struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
- aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
- aad_len, auth_tag, auth_tag_len);
+ aesni_gcm_enc(ctx, data, out, in,
+ plaintext_len, iv, hash_subkey, aad,
+ aad_len, auth_tag, auth_tag_len);
} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
@@ -238,15 +279,17 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
}
}
-static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
+static void aesni_gcm_dec_avx2(void *ctx,
+ struct gcm_context_data *data, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len)
{
struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
- aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
- aad, aad_len, auth_tag, auth_tag_len);
+ aesni_gcm_dec(ctx, data, out, in,
+ ciphertext_len, iv, hash_subkey,
+ aad, aad_len, auth_tag, auth_tag_len);
} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
@@ -259,15 +302,19 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
}
#endif
-static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out,
- const u8 *in, unsigned long plaintext_len, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len);
+static void (*aesni_gcm_enc_tfm)(void *ctx,
+ struct gcm_context_data *data, u8 *out,
+ const u8 *in, unsigned long plaintext_len,
+ u8 *iv, u8 *hash_subkey, const u8 *aad,
+ unsigned long aad_len, u8 *auth_tag,
+ unsigned long auth_tag_len);
-static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out,
- const u8 *in, unsigned long ciphertext_len, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len);
+static void (*aesni_gcm_dec_tfm)(void *ctx,
+ struct gcm_context_data *data, u8 *out,
+ const u8 *in, unsigned long ciphertext_len,
+ u8 *iv, u8 *hash_subkey, const u8 *aad,
+ unsigned long aad_len, u8 *auth_tag,
+ unsigned long auth_tag_len);
static inline struct
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
@@ -744,6 +791,127 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
return 0;
}
+static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
+ unsigned int assoclen, u8 *hash_subkey,
+ u8 *iv, void *aes_ctx)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+ struct gcm_context_data data AESNI_ALIGN_ATTR;
+ struct scatter_walk dst_sg_walk = {};
+ unsigned long left = req->cryptlen;
+ unsigned long len, srclen, dstlen;
+ struct scatter_walk assoc_sg_walk;
+ struct scatter_walk src_sg_walk;
+ struct scatterlist src_start[2];
+ struct scatterlist dst_start[2];
+ struct scatterlist *src_sg;
+ struct scatterlist *dst_sg;
+ u8 *src, *dst, *assoc;
+ u8 *assocmem = NULL;
+ u8 authTag[16];
+
+ if (!enc)
+ left -= auth_tag_len;
+
+ /* Linearize assoc, if not already linear */
+ if (req->src->length >= assoclen && req->src->length &&
+ (!PageHighMem(sg_page(req->src)) ||
+ req->src->offset + req->src->length < PAGE_SIZE)) {
+ scatterwalk_start(&assoc_sg_walk, req->src);
+ assoc = scatterwalk_map(&assoc_sg_walk);
+ } else {
+ /* assoc can be any length, so must be on heap */
+ assocmem = kmalloc(assoclen, GFP_ATOMIC);
+ if (unlikely(!assocmem))
+ return -ENOMEM;
+ assoc = assocmem;
+
+ scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
+ }
+
+ src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
+ scatterwalk_start(&src_sg_walk, src_sg);
+ if (req->src != req->dst) {
+ dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen);
+ scatterwalk_start(&dst_sg_walk, dst_sg);
+ }
+
+ kernel_fpu_begin();
+ aesni_gcm_init(aes_ctx, &data, iv,
+ hash_subkey, assoc, assoclen);
+ if (req->src != req->dst) {
+ while (left) {
+ src = scatterwalk_map(&src_sg_walk);
+ dst = scatterwalk_map(&dst_sg_walk);
+ srclen = scatterwalk_clamp(&src_sg_walk, left);
+ dstlen = scatterwalk_clamp(&dst_sg_walk, left);
+ len = min(srclen, dstlen);
+ if (len) {
+ if (enc)
+ aesni_gcm_enc_update(aes_ctx, &data,
+ dst, src, len);
+ else
+ aesni_gcm_dec_update(aes_ctx, &data,
+ dst, src, len);
+ }
+ left -= len;
+
+ scatterwalk_unmap(src);
+ scatterwalk_unmap(dst);
+ scatterwalk_advance(&src_sg_walk, len);
+ scatterwalk_advance(&dst_sg_walk, len);
+ scatterwalk_done(&src_sg_walk, 0, left);
+ scatterwalk_done(&dst_sg_walk, 1, left);
+ }
+ } else {
+ while (left) {
+ dst = src = scatterwalk_map(&src_sg_walk);
+ len = scatterwalk_clamp(&src_sg_walk, left);
+ if (len) {
+ if (enc)
+ aesni_gcm_enc_update(aes_ctx, &data,
+ src, src, len);
+ else
+ aesni_gcm_dec_update(aes_ctx, &data,
+ src, src, len);
+ }
+ left -= len;
+ scatterwalk_unmap(src);
+ scatterwalk_advance(&src_sg_walk, len);
+ scatterwalk_done(&src_sg_walk, 1, left);
+ }
+ }
+ aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len);
+ kernel_fpu_end();
+
+ if (!assocmem)
+ scatterwalk_unmap(assoc);
+ else
+ kfree(assocmem);
+
+ if (!enc) {
+ u8 authTagMsg[16];
+
+ /* Copy out original authTag */
+ scatterwalk_map_and_copy(authTagMsg, req->src,
+ req->assoclen + req->cryptlen -
+ auth_tag_len,
+ auth_tag_len, 0);
+
+ /* Compare generated tag with passed in tag. */
+ return crypto_memneq(authTagMsg, authTag, auth_tag_len) ?
+ -EBADMSG : 0;
+ }
+
+ /* Copy in the authTag */
+ scatterwalk_map_and_copy(authTag, req->dst,
+ req->assoclen + req->cryptlen,
+ auth_tag_len, 1);
+
+ return 0;
+}
+
static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
@@ -753,7 +921,14 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
struct scatter_walk src_sg_walk;
struct scatter_walk dst_sg_walk = {};
+ struct gcm_context_data data AESNI_ALIGN_ATTR;
+ if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
+ aesni_gcm_enc_tfm == aesni_gcm_enc ||
+ req->cryptlen < AVX_GEN2_OPTSIZE) {
+ return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
+ aes_ctx);
+ }
if (sg_is_last(req->src) &&
(!PageHighMem(sg_page(req->src)) ||
req->src->offset + req->src->length <= PAGE_SIZE) &&
@@ -782,7 +957,7 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
}
kernel_fpu_begin();
- aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
+ aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv,
hash_subkey, assoc, assoclen,
dst + req->cryptlen, auth_tag_len);
kernel_fpu_end();
@@ -817,8 +992,15 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
u8 authTag[16];
struct scatter_walk src_sg_walk;
struct scatter_walk dst_sg_walk = {};
+ struct gcm_context_data data AESNI_ALIGN_ATTR;
int retval = 0;
+ if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
+ aesni_gcm_enc_tfm == aesni_gcm_enc ||
+ req->cryptlen < AVX_GEN2_OPTSIZE) {
+ return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
+ aes_ctx);
+ }
tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
if (sg_is_last(req->src) &&
@@ -849,7 +1031,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
kernel_fpu_begin();
- aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
+ aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv,
hash_subkey, assoc, assoclen,
authTag, auth_tag_len);
kernel_fpu_end();
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index f9eca34..3e0c07c 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -25,13 +25,13 @@
*
*/
-#include <asm/processor.h>
+#include <crypto/algapi.h>
#include <crypto/blowfish.h>
+#include <crypto/internal/skcipher.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
-#include <crypto/algapi.h>
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
@@ -77,20 +77,28 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ return blowfish_setkey(&tfm->base, key, keylen);
+}
+
+static int ecb_crypt(struct skcipher_request *req,
void (*fn)(struct bf_ctx *, u8 *, const u8 *),
void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *))
{
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes;
int err;
- err = blkcipher_walk_virt(desc, walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk->nbytes)) {
- u8 *wsrc = walk->src.virt.addr;
- u8 *wdst = walk->dst.virt.addr;
+ while ((nbytes = walk.nbytes)) {
+ u8 *wsrc = walk.src.virt.addr;
+ u8 *wdst = walk.dst.virt.addr;
/* Process four block batch */
if (nbytes >= bsize * 4) {
@@ -116,34 +124,25 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
} while (nbytes >= bsize);
done:
- err = blkcipher_walk_done(desc, walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way);
+ return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way);
}
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way);
+ return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way);
}
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static unsigned int __cbc_encrypt(struct bf_ctx *ctx,
+ struct skcipher_walk *walk)
{
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
@@ -164,27 +163,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
return nbytes;
}
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_encrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ nbytes = __cbc_encrypt(ctx, &walk);
+ err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct bf_ctx *ctx,
+ struct skcipher_walk *walk)
{
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
@@ -245,24 +244,25 @@ done:
return nbytes;
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_decrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ nbytes = __cbc_decrypt(ctx, &walk);
+ err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
-static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
+static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u8 keystream[BF_BLOCK_SIZE];
@@ -276,10 +276,8 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk)
crypto_inc(ctrblk, BF_BLOCK_SIZE);
}
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk)
{
- struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
@@ -332,29 +330,30 @@ done:
return nbytes;
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
- nbytes = __ctr_crypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ nbytes = __ctr_crypt(ctx, &walk);
+ err = skcipher_walk_done(&walk, nbytes);
}
- if (walk.nbytes) {
- ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
+ if (nbytes) {
+ ctr_crypt_final(ctx, &walk);
+ err = skcipher_walk_done(&walk, 0);
}
return err;
}
-static struct crypto_alg bf_algs[4] = { {
+static struct crypto_alg bf_cipher_alg = {
.cra_name = "blowfish",
.cra_driver_name = "blowfish-asm",
.cra_priority = 200,
@@ -372,66 +371,50 @@ static struct crypto_alg bf_algs[4] = { {
.cia_decrypt = blowfish_decrypt,
}
}
-}, {
- .cra_name = "ecb(blowfish)",
- .cra_driver_name = "ecb-blowfish-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
+};
+
+static struct skcipher_alg bf_skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(blowfish)",
+ .base.cra_driver_name = "ecb-blowfish-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = BF_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct bf_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = BF_MIN_KEY_SIZE,
+ .max_keysize = BF_MAX_KEY_SIZE,
+ .setkey = blowfish_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(blowfish)",
+ .base.cra_driver_name = "cbc-blowfish-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = BF_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct bf_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = BF_MIN_KEY_SIZE,
+ .max_keysize = BF_MAX_KEY_SIZE,
+ .ivsize = BF_BLOCK_SIZE,
+ .setkey = blowfish_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(blowfish)",
+ .base.cra_driver_name = "ctr-blowfish-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct bf_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = BF_MIN_KEY_SIZE,
+ .max_keysize = BF_MAX_KEY_SIZE,
+ .ivsize = BF_BLOCK_SIZE,
+ .chunksize = BF_BLOCK_SIZE,
+ .setkey = blowfish_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
},
-}, {
- .cra_name = "cbc(blowfish)",
- .cra_driver_name = "cbc-blowfish-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = BF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .ivsize = BF_BLOCK_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(blowfish)",
- .cra_driver_name = "ctr-blowfish-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct bf_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .ivsize = BF_BLOCK_SIZE,
- .setkey = blowfish_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-} };
+};
static bool is_blacklisted_cpu(void)
{
@@ -456,6 +439,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
+ int err;
+
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"blowfish-x86_64: performance on this CPU "
@@ -464,12 +449,23 @@ static int __init init(void)
return -ENODEV;
}
- return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
+ err = crypto_register_alg(&bf_cipher_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_skciphers(bf_skcipher_algs,
+ ARRAY_SIZE(bf_skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&bf_cipher_alg);
+
+ return err;
}
static void __exit fini(void)
{
- crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
+ crypto_unregister_alg(&bf_cipher_alg);
+ crypto_unregister_skciphers(bf_skcipher_algs,
+ ARRAY_SIZE(bf_skcipher_algs));
}
module_init(init);
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 60907c1..d4992e4 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -10,18 +10,15 @@
*
*/
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/fpu/api.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
@@ -150,413 +147,120 @@ static const struct common_glue_ctx camellia_dec_xts = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
{
- return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
+ return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
+ &tfm->base.crt_flags);
}
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+ return glue_ecb_req_128bit(&camellia_enc, req);
}
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
- dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
- nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
-}
-
-static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
-{
- return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
- CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
- nbytes);
-}
-
-static inline void camellia_fpu_end(bool fpu_enabled)
-{
- glue_fpu_end(fpu_enabled);
-}
-
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
-{
- return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
- &tfm->crt_flags);
-}
-
-struct crypt_priv {
- struct camellia_ctx *ctx;
- bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
- camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
- }
-
- if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
- camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
- }
-
- while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
- camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+ return glue_ecb_req_128bit(&camellia_dec, req);
}
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) {
- camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS;
- }
-
- if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
- camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
- }
-
- while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
- camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+ req);
}
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->camellia_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- camellia_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
+ return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
}
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->camellia_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- camellia_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
+ return glue_ctr_req_128bit(&camellia_ctr, req);
}
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
{
- struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(camellia_enc_blk),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
+ return glue_xts_req_128bit(&camellia_enc_xts, req,
+ XTS_TWEAK_CAST(camellia_enc_blk),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
{
- struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(camellia_enc_blk),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
+ return glue_xts_req_128bit(&camellia_dec_xts, req,
+ XTS_TWEAK_CAST(camellia_enc_blk),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static struct crypto_alg cmll_algs[10] = { {
- .cra_name = "__ecb-camellia-aesni-avx2",
- .cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = camellia_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-camellia-aesni-avx2",
- .cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = camellia_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-camellia-aesni-avx2",
- .cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "__lrw-camellia-aesni-avx2",
- .cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_camellia_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = lrw_camellia_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-camellia-aesni-avx2",
- .cra_driver_name = "__driver-xts-camellia-aesni-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = xts_camellia_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(camellia)",
- .cra_driver_name = "ecb-camellia-aesni-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(camellia)",
- .cra_driver_name = "cbc-camellia-aesni-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(camellia)",
- .cra_driver_name = "ctr-camellia-aesni-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-}, {
- .cra_name = "lrw(camellia)",
- .cra_driver_name = "lrw-camellia-aesni-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(camellia)",
- .cra_driver_name = "xts-camellia-aesni-avx2",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
+static struct skcipher_alg camellia_algs[] = {
+ {
+ .base.cra_name = "__ecb(camellia)",
+ .base.cra_driver_name = "__ecb-camellia-aesni-avx2",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "__cbc(camellia)",
+ .base.cra_driver_name = "__cbc-camellia-aesni-avx2",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "__ctr(camellia)",
+ .base.cra_driver_name = "__ctr-camellia-aesni-avx2",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .chunksize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }, {
+ .base.cra_name = "__xts(camellia)",
+ .base.cra_driver_name = "__xts-camellia-aesni-avx2",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = xts_camellia_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
},
-} };
+};
+
+static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
static int __init camellia_aesni_init(void)
{
@@ -576,12 +280,15 @@ static int __init camellia_aesni_init(void)
return -ENODEV;
}
- return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+ return simd_register_skciphers_compat(camellia_algs,
+ ARRAY_SIZE(camellia_algs),
+ camellia_simd_algs);
}
static void __exit camellia_aesni_fini(void)
{
- crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+ simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
+ camellia_simd_algs);
}
module_init(camellia_aesni_init);
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index d96429d..d09f652 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -10,18 +10,15 @@
*
*/
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
-#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
-#include <asm/fpu/api.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
+#include <crypto/xts.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
@@ -154,401 +151,142 @@ static const struct common_glue_ctx camellia_dec_xts = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
{
- return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+ return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen,
+ &tfm->base.crt_flags);
}
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
- dst, src, nbytes);
+ return glue_ecb_req_128bit(&camellia_enc, req);
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
- nbytes);
+ return glue_ecb_req_128bit(&camellia_dec, req);
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+ req);
}
-static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
- CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
- nbytes);
+ return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
}
-static inline void camellia_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
{
- glue_fpu_end(fpu_enabled);
+ return glue_ctr_req_128bit(&camellia_ctr, req);
}
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
- unsigned int key_len)
+int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
{
- return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
- &tfm->crt_flags);
+ struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ u32 *flags = &tfm->base.crt_flags;
+ int err;
+
+ err = xts_verify_key(tfm, key, keylen);
+ if (err)
+ return err;
+
+ /* first half of xts-key is for crypt */
+ err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+ if (err)
+ return err;
+
+ /* second half of xts-key is for tweak */
+ return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+ flags);
}
+EXPORT_SYMBOL_GPL(xts_camellia_setkey);
-struct crypt_priv {
- struct camellia_ctx *ctx;
- bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
{
- const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
- camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
- }
-
- while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
- camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
- }
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+ return glue_xts_req_128bit(&camellia_enc_xts, req,
+ XTS_TWEAK_CAST(camellia_enc_blk),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
{
- const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
- camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
- }
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
- camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
- nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+ return glue_xts_req_128bit(&camellia_dec_xts, req,
+ XTS_TWEAK_CAST(camellia_enc_blk),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->camellia_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- camellia_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->camellia_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- camellia_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(camellia_enc_blk),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(camellia_enc_blk),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg cmll_algs[10] = { {
- .cra_name = "__ecb-camellia-aesni",
- .cra_driver_name = "__driver-ecb-camellia-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = camellia_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-camellia-aesni",
- .cra_driver_name = "__driver-cbc-camellia-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = camellia_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-camellia-aesni",
- .cra_driver_name = "__driver-ctr-camellia-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "__lrw-camellia-aesni",
- .cra_driver_name = "__driver-lrw-camellia-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_camellia_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = lrw_camellia_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-camellia-aesni",
- .cra_driver_name = "__driver-xts-camellia-aesni",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = xts_camellia_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(camellia)",
- .cra_driver_name = "ecb-camellia-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(camellia)",
- .cra_driver_name = "cbc-camellia-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(camellia)",
- .cra_driver_name = "ctr-camellia-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-}, {
- .cra_name = "lrw(camellia)",
- .cra_driver_name = "lrw-camellia-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(camellia)",
- .cra_driver_name = "xts-camellia-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
+static struct skcipher_alg camellia_algs[] = {
+ {
+ .base.cra_name = "__ecb(camellia)",
+ .base.cra_driver_name = "__ecb-camellia-aesni",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "__cbc(camellia)",
+ .base.cra_driver_name = "__cbc-camellia-aesni",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "__ctr(camellia)",
+ .base.cra_driver_name = "__ctr-camellia-aesni",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .chunksize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }, {
+ .base.cra_name = "__xts(camellia)",
+ .base.cra_driver_name = "__xts-camellia-aesni",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = xts_camellia_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
},
-} };
+};
+
+static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
static int __init camellia_aesni_init(void)
{
@@ -567,12 +305,15 @@ static int __init camellia_aesni_init(void)
return -ENODEV;
}
- return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+ return simd_register_skciphers_compat(camellia_algs,
+ ARRAY_SIZE(camellia_algs),
+ camellia_simd_algs);
}
static void __exit camellia_aesni_fini(void)
{
- crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+ simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
+ camellia_simd_algs);
}
module_init(camellia_aesni_init);
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index af4840a..dcd5e0f7 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -23,15 +23,12 @@
*
*/
-#include <asm/processor.h>
#include <asm/unaligned.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
@@ -1272,13 +1269,19 @@ int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
}
EXPORT_SYMBOL_GPL(__camellia_setkey);
-static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int key_len)
{
- return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
+ return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len,
&tfm->crt_flags);
}
+static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ return camellia_setkey(&tfm->base, key, key_len);
+}
+
void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
{
u128 iv = *src;
@@ -1373,188 +1376,33 @@ static const struct common_glue_ctx camellia_dec_cbc = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
- dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
- nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- struct camellia_ctx *ctx = priv;
- int i;
-
- while (nbytes >= 2 * bsize) {
- camellia_enc_blk_2way(ctx, srcdst, srcdst);
- srcdst += bsize * 2;
- nbytes -= bsize * 2;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- camellia_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
- struct camellia_ctx *ctx = priv;
- int i;
-
- while (nbytes >= 2 * bsize) {
- camellia_dec_blk_2way(ctx, srcdst, srcdst);
- srcdst += bsize * 2;
- nbytes -= bsize * 2;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- camellia_dec_blk(ctx, srcdst, srcdst);
-}
-
-int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
- int err;
-
- err = __camellia_setkey(&ctx->camellia_ctx, key,
- keylen - CAMELLIA_BLOCK_SIZE,
- &tfm->crt_flags);
- if (err)
- return err;
-
- return lrw_init_table(&ctx->lrw_table,
- key + keylen - CAMELLIA_BLOCK_SIZE);
-}
-EXPORT_SYMBOL_GPL(lrw_camellia_setkey);
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[2 * 4];
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &ctx->camellia_ctx,
- .crypt_fn = encrypt_callback,
- };
-
- return lrw_crypt(desc, dst, src, nbytes, &req);
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[2 * 4];
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &ctx->camellia_ctx,
- .crypt_fn = decrypt_callback,
- };
-
- return lrw_crypt(desc, dst, src, nbytes, &req);
+ return glue_ecb_req_128bit(&camellia_enc, req);
}
-void lrw_camellia_exit_tfm(struct crypto_tfm *tfm)
+static int ecb_decrypt(struct skcipher_request *req)
{
- struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
- lrw_free_table(&ctx->lrw_table);
+ return glue_ecb_req_128bit(&camellia_dec, req);
}
-EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm);
-int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- u32 *flags = &tfm->crt_flags;
- int err;
-
- err = xts_check_key(tfm, key, keylen);
- if (err)
- return err;
-
- /* first half of xts-key is for crypt */
- err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
- flags);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk),
+ req);
}
-EXPORT_SYMBOL_GPL(xts_camellia_setkey);
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- le128 buf[2 * 4];
- struct xts_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .tweak_ctx = &ctx->tweak_ctx,
- .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
- .crypt_ctx = &ctx->crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
-
- return xts_crypt(desc, dst, src, nbytes, &req);
+ return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
}
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- le128 buf[2 * 4];
- struct xts_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .tweak_ctx = &ctx->tweak_ctx,
- .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
- .crypt_ctx = &ctx->crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
-
- return xts_crypt(desc, dst, src, nbytes, &req);
+ return glue_ctr_req_128bit(&camellia_ctr, req);
}
-static struct crypto_alg camellia_algs[6] = { {
+static struct crypto_alg camellia_cipher_alg = {
.cra_name = "camellia",
.cra_driver_name = "camellia-asm",
.cra_priority = 200,
@@ -1572,109 +1420,50 @@ static struct crypto_alg camellia_algs[6] = { {
.cia_decrypt = camellia_decrypt
}
}
-}, {
- .cra_name = "ecb(camellia)",
- .cra_driver_name = "ecb-camellia-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = camellia_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(camellia)",
- .cra_driver_name = "cbc-camellia-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(camellia)",
- .cra_driver_name = "ctr-camellia-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct camellia_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "lrw(camellia)",
- .cra_driver_name = "lrw-camellia-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_camellia_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE +
- CAMELLIA_BLOCK_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = lrw_camellia_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "xts(camellia)",
- .cra_driver_name = "xts-camellia-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = xts_camellia_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-} };
+};
+
+static struct skcipher_alg camellia_skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(camellia)",
+ .base.cra_driver_name = "ecb-camellia-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(camellia)",
+ .base.cra_driver_name = "cbc-camellia-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(camellia)",
+ .base.cra_driver_name = "ctr-camellia-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct camellia_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .chunksize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
static bool is_blacklisted_cpu(void)
{
@@ -1700,6 +1489,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init init(void)
{
+ int err;
+
if (!force && is_blacklisted_cpu()) {
printk(KERN_INFO
"camellia-x86_64: performance on this CPU "
@@ -1708,12 +1499,23 @@ static int __init init(void)
return -ENODEV;
}
- return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+ err = crypto_register_alg(&camellia_cipher_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_skciphers(camellia_skcipher_algs,
+ ARRAY_SIZE(camellia_skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&camellia_cipher_alg);
+
+ return err;
}
static void __exit fini(void)
{
- crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs));
+ crypto_unregister_alg(&camellia_cipher_alg);
+ crypto_unregister_skciphers(camellia_skcipher_algs,
+ ARRAY_SIZE(camellia_skcipher_algs));
}
module_init(init);
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index dbea602..4103474 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -21,18 +21,14 @@
*
*/
-#include <linux/module.h>
-#include <linux/hardirq.h>
-#include <linux/types.h>
-#include <linux/crypto.h>
-#include <linux/err.h>
-#include <crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
#include <crypto/algapi.h>
#include <crypto/cast5.h>
-#include <crypto/cryptd.h>
-#include <crypto/ctr.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/glue_helper.h>
+#include <crypto/internal/simd.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/types.h>
#define CAST5_PARALLEL_BLOCKS 16
@@ -45,10 +41,17 @@ asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
__be64 *iv);
-static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ return cast5_setkey(&tfm->base, key, keylen);
+}
+
+static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
+ unsigned int nbytes)
{
return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
- NULL, fpu_enabled, nbytes);
+ walk, fpu_enabled, nbytes);
}
static inline void cast5_fpu_end(bool fpu_enabled)
@@ -56,29 +59,28 @@ static inline void cast5_fpu_end(bool fpu_enabled)
return glue_fpu_end(fpu_enabled);
}
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
- bool enc)
+static int ecb_crypt(struct skcipher_request *req, bool enc)
{
bool fpu_enabled = false;
- struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes;
void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
int err;
- fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
-
- err = blkcipher_walk_virt(desc, walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk->nbytes)) {
- u8 *wsrc = walk->src.virt.addr;
- u8 *wdst = walk->dst.virt.addr;
+ while ((nbytes = walk.nbytes)) {
+ u8 *wsrc = walk.src.virt.addr;
+ u8 *wdst = walk.dst.virt.addr;
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+ fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+ fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
do {
fn(ctx, wdst, wsrc);
@@ -103,76 +105,58 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
} while (nbytes >= bsize);
done:
- err = blkcipher_walk_done(desc, walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
return err;
}
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, true);
+ return ecb_crypt(req, true);
}
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, false);
+ return ecb_crypt(req, false);
}
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 *iv = (u64 *)walk->iv;
-
- do {
- *dst = *src ^ *iv;
- __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- *(u64 *)walk->iv = *iv;
- return nbytes;
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_encrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ u64 *src = (u64 *)walk.src.virt.addr;
+ u64 *dst = (u64 *)walk.dst.virt.addr;
+ u64 *iv = (u64 *)walk.iv;
+
+ do {
+ *dst = *src ^ *iv;
+ __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
+ iv = dst;
+ src++;
+ dst++;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+ *(u64 *)walk.iv = *iv;
+ err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
+ struct skcipher_walk *walk)
{
- struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
@@ -224,31 +208,29 @@ done:
return nbytes;
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
bool fpu_enabled = false;
- struct blkcipher_walk walk;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
- nbytes = __cbc_decrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+ nbytes = __cbc_decrypt(ctx, &walk);
+ err = skcipher_walk_done(&walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
return err;
}
-static void ctr_crypt_final(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
{
- struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *ctrblk = walk->iv;
u8 keystream[CAST5_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
@@ -261,10 +243,9 @@ static void ctr_crypt_final(struct blkcipher_desc *desc,
crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
}
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct skcipher_walk *walk,
+ struct cast5_ctx *ctx)
{
- struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
@@ -307,162 +288,80 @@ done:
return nbytes;
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
bool fpu_enabled = false;
- struct blkcipher_walk walk;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
- nbytes = __ctr_crypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
+ nbytes = __ctr_crypt(&walk, ctx);
+ err = skcipher_walk_done(&walk, nbytes);
}
cast5_fpu_end(fpu_enabled);
if (walk.nbytes) {
- ctr_crypt_final(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
+ ctr_crypt_final(&walk, ctx);
+ err = skcipher_walk_done(&walk, 0);
}
return err;
}
+static struct skcipher_alg cast5_algs[] = {
+ {
+ .base.cra_name = "__ecb(cast5)",
+ .base.cra_driver_name = "__ecb-cast5-avx",
+ .base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAST5_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct cast5_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAST5_MIN_KEY_SIZE,
+ .max_keysize = CAST5_MAX_KEY_SIZE,
+ .setkey = cast5_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "__cbc(cast5)",
+ .base.cra_driver_name = "__cbc-cast5-avx",
+ .base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAST5_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct cast5_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAST5_MIN_KEY_SIZE,
+ .max_keysize = CAST5_MAX_KEY_SIZE,
+ .ivsize = CAST5_BLOCK_SIZE,
+ .setkey = cast5_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "__ctr(cast5)",
+ .base.cra_driver_name = "__ctr-cast5-avx",
+ .base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct cast5_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAST5_MIN_KEY_SIZE,
+ .max_keysize = CAST5_MAX_KEY_SIZE,
+ .ivsize = CAST5_BLOCK_SIZE,
+ .chunksize = CAST5_BLOCK_SIZE,
+ .setkey = cast5_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
-static struct crypto_alg cast5_algs[6] = { {
- .cra_name = "__ecb-cast5-avx",
- .cra_driver_name = "__driver-ecb-cast5-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAST5_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct cast5_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAST5_MIN_KEY_SIZE,
- .max_keysize = CAST5_MAX_KEY_SIZE,
- .setkey = cast5_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-cast5-avx",
- .cra_driver_name = "__driver-cbc-cast5-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAST5_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct cast5_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAST5_MIN_KEY_SIZE,
- .max_keysize = CAST5_MAX_KEY_SIZE,
- .setkey = cast5_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-cast5-avx",
- .cra_driver_name = "__driver-ctr-cast5-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct cast5_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAST5_MIN_KEY_SIZE,
- .max_keysize = CAST5_MAX_KEY_SIZE,
- .ivsize = CAST5_BLOCK_SIZE,
- .setkey = cast5_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "ecb(cast5)",
- .cra_driver_name = "ecb-cast5-avx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAST5_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAST5_MIN_KEY_SIZE,
- .max_keysize = CAST5_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(cast5)",
- .cra_driver_name = "cbc-cast5-avx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAST5_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAST5_MIN_KEY_SIZE,
- .max_keysize = CAST5_MAX_KEY_SIZE,
- .ivsize = CAST5_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(cast5)",
- .cra_driver_name = "ctr-cast5-avx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAST5_MIN_KEY_SIZE,
- .max_keysize = CAST5_MAX_KEY_SIZE,
- .ivsize = CAST5_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-} };
+static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
static int __init cast5_init(void)
{
@@ -474,12 +373,15 @@ static int __init cast5_init(void)
return -ENODEV;
}
- return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+ return simd_register_skciphers_compat(cast5_algs,
+ ARRAY_SIZE(cast5_algs),
+ cast5_simd_algs);
}
static void __exit cast5_exit(void)
{
- crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+ simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
+ cast5_simd_algs);
}
module_init(cast5_init);
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 50e6847..9fb66b5 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -24,19 +24,13 @@
*/
#include <linux/module.h>
-#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
-#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
#include <crypto/cast6.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
+#include <crypto/internal/simd.h>
#include <crypto/xts.h>
-#include <asm/fpu/api.h>
#include <asm/crypto/glue_helper.h>
#define CAST6_PARALLEL_BLOCKS 8
@@ -56,6 +50,12 @@ asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst,
asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
+static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ return cast6_setkey(&tfm->base, key, keylen);
+}
+
static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
@@ -157,164 +157,30 @@ static const struct common_glue_ctx cast6_dec_xts = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
- dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
- nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
+ return glue_ecb_req_128bit(&cast6_enc, req);
}
-static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
- NULL, fpu_enabled, nbytes);
+ return glue_ecb_req_128bit(&cast6_dec, req);
}
-static inline void cast6_fpu_end(bool fpu_enabled)
+static int cbc_encrypt(struct skcipher_request *req)
{
- glue_fpu_end(fpu_enabled);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt),
+ req);
}
-struct crypt_priv {
- struct cast6_ctx *ctx;
- bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- const unsigned int bsize = CAST6_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
- cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- __cast6_encrypt(ctx->ctx, srcdst, srcdst);
+ return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req);
}
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- const unsigned int bsize = CAST6_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
- cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- __cast6_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct cast6_lrw_ctx {
- struct lrw_table_ctx lrw_table;
- struct cast6_ctx cast6_ctx;
-};
-
-static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
- int err;
-
- err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
- &tfm->crt_flags);
- if (err)
- return err;
-
- return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[CAST6_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->cast6_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- cast6_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[CAST6_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->cast6_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- cast6_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
- struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
- lrw_free_table(&ctx->lrw_table);
+ return glue_ctr_req_128bit(&cast6_ctr, req);
}
struct cast6_xts_ctx {
@@ -322,14 +188,14 @@ struct cast6_xts_ctx {
struct cast6_ctx crypt_ctx;
};
-static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
{
- struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- u32 *flags = &tfm->crt_flags;
+ struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ u32 *flags = &tfm->base.crt_flags;
int err;
- err = xts_check_key(tfm, key, keylen);
+ err = xts_verify_key(tfm, key, keylen);
if (err)
return err;
@@ -343,245 +209,87 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
flags);
}
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
{
- struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(__cast6_encrypt),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
+ return glue_xts_req_128bit(&cast6_enc_xts, req,
+ XTS_TWEAK_CAST(__cast6_encrypt),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
{
- struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(__cast6_encrypt),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
+ return glue_xts_req_128bit(&cast6_dec_xts, req,
+ XTS_TWEAK_CAST(__cast6_encrypt),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static struct crypto_alg cast6_algs[10] = { {
- .cra_name = "__ecb-cast6-avx",
- .cra_driver_name = "__driver-ecb-cast6-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAST6_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct cast6_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE,
- .setkey = cast6_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-cast6-avx",
- .cra_driver_name = "__driver-cbc-cast6-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAST6_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct cast6_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE,
- .setkey = cast6_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-cast6-avx",
- .cra_driver_name = "__driver-ctr-cast6-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct cast6_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE,
- .ivsize = CAST6_BLOCK_SIZE,
- .setkey = cast6_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "__lrw-cast6-avx",
- .cra_driver_name = "__driver-lrw-cast6-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAST6_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct cast6_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE +
- CAST6_BLOCK_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE +
- CAST6_BLOCK_SIZE,
- .ivsize = CAST6_BLOCK_SIZE,
- .setkey = lrw_cast6_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-cast6-avx",
- .cra_driver_name = "__driver-xts-cast6-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = CAST6_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct cast6_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE * 2,
- .max_keysize = CAST6_MAX_KEY_SIZE * 2,
- .ivsize = CAST6_BLOCK_SIZE,
- .setkey = xts_cast6_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(cast6)",
- .cra_driver_name = "ecb-cast6-avx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAST6_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(cast6)",
- .cra_driver_name = "cbc-cast6-avx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAST6_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE,
- .ivsize = CAST6_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(cast6)",
- .cra_driver_name = "ctr-cast6-avx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE,
- .ivsize = CAST6_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-}, {
- .cra_name = "lrw(cast6)",
- .cra_driver_name = "lrw-cast6-avx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAST6_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE +
- CAST6_BLOCK_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE +
- CAST6_BLOCK_SIZE,
- .ivsize = CAST6_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(cast6)",
- .cra_driver_name = "xts-cast6-avx",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = CAST6_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = CAST6_MIN_KEY_SIZE * 2,
- .max_keysize = CAST6_MAX_KEY_SIZE * 2,
- .ivsize = CAST6_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
+static struct skcipher_alg cast6_algs[] = {
+ {
+ .base.cra_name = "__ecb(cast6)",
+ .base.cra_driver_name = "__ecb-cast6-avx",
+ .base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAST6_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct cast6_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAST6_MIN_KEY_SIZE,
+ .max_keysize = CAST6_MAX_KEY_SIZE,
+ .setkey = cast6_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "__cbc(cast6)",
+ .base.cra_driver_name = "__cbc-cast6-avx",
+ .base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAST6_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct cast6_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAST6_MIN_KEY_SIZE,
+ .max_keysize = CAST6_MAX_KEY_SIZE,
+ .ivsize = CAST6_BLOCK_SIZE,
+ .setkey = cast6_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "__ctr(cast6)",
+ .base.cra_driver_name = "__ctr-cast6-avx",
+ .base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct cast6_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAST6_MIN_KEY_SIZE,
+ .max_keysize = CAST6_MAX_KEY_SIZE,
+ .ivsize = CAST6_BLOCK_SIZE,
+ .chunksize = CAST6_BLOCK_SIZE,
+ .setkey = cast6_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }, {
+ .base.cra_name = "__xts(cast6)",
+ .base.cra_driver_name = "__xts-cast6-avx",
+ .base.cra_priority = 200,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = CAST6_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct cast6_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = 2 * CAST6_MIN_KEY_SIZE,
+ .max_keysize = 2 * CAST6_MAX_KEY_SIZE,
+ .ivsize = CAST6_BLOCK_SIZE,
+ .setkey = xts_cast6_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
},
-} };
+};
+
+static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)];
static int __init cast6_init(void)
{
@@ -593,12 +301,15 @@ static int __init cast6_init(void)
return -ENODEV;
}
- return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+ return simd_register_skciphers_compat(cast6_algs,
+ ARRAY_SIZE(cast6_algs),
+ cast6_simd_algs);
}
static void __exit cast6_exit(void)
{
- crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+ simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs),
+ cast6_simd_algs);
}
module_init(cast6_init);
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 30c0a37..5c610d4 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -20,13 +20,13 @@
*
*/
-#include <asm/processor.h>
+#include <crypto/algapi.h>
#include <crypto/des.h>
+#include <crypto/internal/skcipher.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
-#include <crypto/algapi.h>
struct des3_ede_x86_ctx {
u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
@@ -83,18 +83,18 @@ static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src);
}
-static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
- const u32 *expkey)
+static int ecb_crypt(struct skcipher_request *req, const u32 *expkey)
{
- unsigned int bsize = DES3_EDE_BLOCK_SIZE;
+ const unsigned int bsize = DES3_EDE_BLOCK_SIZE;
+ struct skcipher_walk walk;
unsigned int nbytes;
int err;
- err = blkcipher_walk_virt(desc, walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk->nbytes)) {
- u8 *wsrc = walk->src.virt.addr;
- u8 *wdst = walk->dst.virt.addr;
+ while ((nbytes = walk.nbytes)) {
+ u8 *wsrc = walk.src.virt.addr;
+ u8 *wdst = walk.dst.virt.addr;
/* Process four block batch */
if (nbytes >= bsize * 3) {
@@ -121,36 +121,31 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
} while (nbytes >= bsize);
done:
- err = blkcipher_walk_done(desc, walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, ctx->enc_expkey);
+ return ecb_crypt(req, ctx->enc_expkey);
}
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_crypt(desc, &walk, ctx->dec_expkey);
+ return ecb_crypt(req, ctx->dec_expkey);
}
-static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
+ struct skcipher_walk *walk)
{
- struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
@@ -171,27 +166,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
return nbytes;
}
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_encrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ nbytes = __cbc_encrypt(ctx, &walk);
+ err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
-static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx,
+ struct skcipher_walk *walk)
{
- struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
@@ -250,25 +245,26 @@ done:
return nbytes;
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- nbytes = __cbc_decrypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ nbytes = __cbc_decrypt(ctx, &walk);
+ err = skcipher_walk_done(&walk, nbytes);
}
return err;
}
static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
- struct blkcipher_walk *walk)
+ struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u8 keystream[DES3_EDE_BLOCK_SIZE];
@@ -282,10 +278,9 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
}
-static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx,
+ struct skcipher_walk *walk)
{
- struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = DES3_EDE_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
__be64 *src = (__be64 *)walk->src.virt.addr;
@@ -333,23 +328,24 @@ done:
return nbytes;
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE);
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
- nbytes = __ctr_crypt(desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ nbytes = __ctr_crypt(ctx, &walk);
+ err = skcipher_walk_done(&walk, nbytes);
}
- if (walk.nbytes) {
- ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
+ if (nbytes) {
+ ctr_crypt_final(ctx, &walk);
+ err = skcipher_walk_done(&walk, 0);
}
return err;
@@ -381,7 +377,14 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
-static struct crypto_alg des3_ede_algs[4] = { {
+static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key,
+ unsigned int keylen)
+{
+ return des3_ede_x86_setkey(&tfm->base, key, keylen);
+}
+
+static struct crypto_alg des3_ede_cipher = {
.cra_name = "des3_ede",
.cra_driver_name = "des3_ede-asm",
.cra_priority = 200,
@@ -399,66 +402,50 @@ static struct crypto_alg des3_ede_algs[4] = { {
.cia_decrypt = des3_ede_x86_decrypt,
}
}
-}, {
- .cra_name = "ecb(des3_ede)",
- .cra_driver_name = "ecb-des3_ede-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .setkey = des3_ede_x86_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(des3_ede)",
- .cra_driver_name = "cbc-des3_ede-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .setkey = des3_ede_x86_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(des3_ede)",
- .cra_driver_name = "ctr-des3_ede-asm",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .setkey = des3_ede_x86_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-} };
+};
+
+static struct skcipher_alg des3_ede_skciphers[] = {
+ {
+ .base.cra_name = "ecb(des3_ede)",
+ .base.cra_driver_name = "ecb-des3_ede-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .setkey = des3_ede_x86_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(des3_ede)",
+ .base.cra_driver_name = "cbc-des3_ede-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .setkey = des3_ede_x86_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(des3_ede)",
+ .base.cra_driver_name = "ctr-des3_ede-asm",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .chunksize = DES3_EDE_BLOCK_SIZE,
+ .setkey = des3_ede_x86_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }
+};
static bool is_blacklisted_cpu(void)
{
@@ -483,17 +470,30 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
static int __init des3_ede_x86_init(void)
{
+ int err;
+
if (!force && is_blacklisted_cpu()) {
pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n");
return -ENODEV;
}
- return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
+ err = crypto_register_alg(&des3_ede_cipher);
+ if (err)
+ return err;
+
+ err = crypto_register_skciphers(des3_ede_skciphers,
+ ARRAY_SIZE(des3_ede_skciphers));
+ if (err)
+ crypto_unregister_alg(&des3_ede_cipher);
+
+ return err;
}
static void __exit des3_ede_x86_fini(void)
{
- crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs));
+ crypto_unregister_alg(&des3_ede_cipher);
+ crypto_unregister_skciphers(des3_ede_skciphers,
+ ARRAY_SIZE(des3_ede_skciphers));
}
module_init(des3_ede_x86_init);
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index d61e579..a78ef99 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -29,313 +29,212 @@
#include <crypto/b128ops.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/skcipher.h>
-#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
-static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req)
{
- void *ctx = crypto_blkcipher_ctx(desc->tfm);
+ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
- unsigned int nbytes, i, func_bytes;
+ struct skcipher_walk walk;
bool fpu_enabled = false;
+ unsigned int nbytes;
int err;
- err = blkcipher_walk_virt(desc, walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk->nbytes)) {
- u8 *wsrc = walk->src.virt.addr;
- u8 *wdst = walk->dst.virt.addr;
+ while ((nbytes = walk.nbytes)) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ unsigned int func_bytes;
+ unsigned int i;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- desc, fpu_enabled, nbytes);
-
+ &walk, fpu_enabled, nbytes);
for (i = 0; i < gctx->num_funcs; i++) {
func_bytes = bsize * gctx->funcs[i].num_blocks;
- /* Process multi-block batch */
- if (nbytes >= func_bytes) {
- do {
- gctx->funcs[i].fn_u.ecb(ctx, wdst,
- wsrc);
+ if (nbytes < func_bytes)
+ continue;
- wsrc += func_bytes;
- wdst += func_bytes;
- nbytes -= func_bytes;
- } while (nbytes >= func_bytes);
+ /* Process multi-block batch */
+ do {
+ gctx->funcs[i].fn_u.ecb(ctx, dst, src);
+ src += func_bytes;
+ dst += func_bytes;
+ nbytes -= func_bytes;
+ } while (nbytes >= func_bytes);
- if (nbytes < bsize)
- goto done;
- }
+ if (nbytes < bsize)
+ break;
}
-
-done:
- err = blkcipher_walk_done(desc, walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
glue_fpu_end(fpu_enabled);
return err;
}
+EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
-int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
+ struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return __glue_ecb_crypt_128bit(gctx, desc, &walk);
-}
-EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit);
-
-static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
- struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- void *ctx = crypto_blkcipher_ctx(desc->tfm);
+ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 *iv = (u128 *)walk->iv;
-
- do {
- u128_xor(dst, src, iv);
- fn(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- *(u128 *)walk->iv = *iv;
- return nbytes;
-}
-
-int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
- struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct blkcipher_walk walk;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
while ((nbytes = walk.nbytes)) {
- nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ const u128 *src = (u128 *)walk.src.virt.addr;
+ u128 *dst = (u128 *)walk.dst.virt.addr;
+ u128 *iv = (u128 *)walk.iv;
+
+ do {
+ u128_xor(dst, src, iv);
+ fn(ctx, (u8 *)dst, (u8 *)dst);
+ iv = dst;
+ src++;
+ dst++;
+ nbytes -= bsize;
+ } while (nbytes >= bsize);
+
+ *(u128 *)walk.iv = *iv;
+ err = skcipher_walk_done(&walk, nbytes);
}
-
return err;
}
-EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit);
+EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit);
-static unsigned int
-__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req)
{
- void *ctx = crypto_blkcipher_ctx(desc->tfm);
+ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
const unsigned int bsize = 128 / 8;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 last_iv;
- unsigned int num_blocks, func_bytes;
- unsigned int i;
+ struct skcipher_walk walk;
+ bool fpu_enabled = false;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
+ while ((nbytes = walk.nbytes)) {
+ const u128 *src = walk.src.virt.addr;
+ u128 *dst = walk.dst.virt.addr;
+ unsigned int func_bytes, num_blocks;
+ unsigned int i;
+ u128 last_iv;
- last_iv = *src;
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ &walk, fpu_enabled, nbytes);
+ /* Start of the last block. */
+ src += nbytes / bsize - 1;
+ dst += nbytes / bsize - 1;
- for (i = 0; i < gctx->num_funcs; i++) {
- num_blocks = gctx->funcs[i].num_blocks;
- func_bytes = bsize * num_blocks;
+ last_iv = *src;
- /* Process multi-block batch */
- if (nbytes >= func_bytes) {
+ for (i = 0; i < gctx->num_funcs; i++) {
+ num_blocks = gctx->funcs[i].num_blocks;
+ func_bytes = bsize * num_blocks;
+
+ if (nbytes < func_bytes)
+ continue;
+
+ /* Process multi-block batch */
do {
- nbytes -= func_bytes - bsize;
src -= num_blocks - 1;
dst -= num_blocks - 1;
gctx->funcs[i].fn_u.cbc(ctx, dst, src);
- nbytes -= bsize;
+ nbytes -= func_bytes;
if (nbytes < bsize)
goto done;
- u128_xor(dst, dst, src - 1);
- src -= 1;
- dst -= 1;
+ u128_xor(dst, dst, --src);
+ dst--;
} while (nbytes >= func_bytes);
}
- }
-
done:
- u128_xor(dst, dst, (u128 *)walk->iv);
- *(u128 *)walk->iv = last_iv;
-
- return nbytes;
-}
-
-int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- const unsigned int bsize = 128 / 8;
- bool fpu_enabled = false;
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
-
- while ((nbytes = walk.nbytes)) {
- fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- desc, fpu_enabled, nbytes);
- nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ u128_xor(dst, dst, (u128 *)walk.iv);
+ *(u128 *)walk.iv = last_iv;
+ err = skcipher_walk_done(&walk, nbytes);
}
glue_fpu_end(fpu_enabled);
return err;
}
-EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
+EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
-static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
- struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
+int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req)
{
- void *ctx = crypto_blkcipher_ctx(desc->tfm);
- u8 *src = (u8 *)walk->src.virt.addr;
- u8 *dst = (u8 *)walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
- le128 ctrblk;
- u128 tmp;
+ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+ const unsigned int bsize = 128 / 8;
+ struct skcipher_walk walk;
+ bool fpu_enabled = false;
+ unsigned int nbytes;
+ int err;
- be128_to_le128(&ctrblk, (be128 *)walk->iv);
+ err = skcipher_walk_virt(&walk, req, false);
- memcpy(&tmp, src, nbytes);
- fn_ctr(ctx, &tmp, &tmp, &ctrblk);
- memcpy(dst, &tmp, nbytes);
+ while ((nbytes = walk.nbytes) >= bsize) {
+ const u128 *src = walk.src.virt.addr;
+ u128 *dst = walk.dst.virt.addr;
+ unsigned int func_bytes, num_blocks;
+ unsigned int i;
+ le128 ctrblk;
- le128_to_be128((be128 *)walk->iv, &ctrblk);
-}
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ &walk, fpu_enabled, nbytes);
-static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- const unsigned int bsize = 128 / 8;
- void *ctx = crypto_blkcipher_ctx(desc->tfm);
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- le128 ctrblk;
- unsigned int num_blocks, func_bytes;
- unsigned int i;
+ be128_to_le128(&ctrblk, (be128 *)walk.iv);
- be128_to_le128(&ctrblk, (be128 *)walk->iv);
+ for (i = 0; i < gctx->num_funcs; i++) {
+ num_blocks = gctx->funcs[i].num_blocks;
+ func_bytes = bsize * num_blocks;
- /* Process multi-block batch */
- for (i = 0; i < gctx->num_funcs; i++) {
- num_blocks = gctx->funcs[i].num_blocks;
- func_bytes = bsize * num_blocks;
+ if (nbytes < func_bytes)
+ continue;
- if (nbytes >= func_bytes) {
+ /* Process multi-block batch */
do {
gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk);
-
src += num_blocks;
dst += num_blocks;
nbytes -= func_bytes;
} while (nbytes >= func_bytes);
if (nbytes < bsize)
- goto done;
+ break;
}
- }
-
-done:
- le128_to_be128((be128 *)walk->iv, &ctrblk);
- return nbytes;
-}
-
-int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- const unsigned int bsize = 128 / 8;
- bool fpu_enabled = false;
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, bsize);
- while ((nbytes = walk.nbytes) >= bsize) {
- fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- desc, fpu_enabled, nbytes);
- nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ le128_to_be128((be128 *)walk.iv, &ctrblk);
+ err = skcipher_walk_done(&walk, nbytes);
}
glue_fpu_end(fpu_enabled);
- if (walk.nbytes) {
- glue_ctr_crypt_final_128bit(
- gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit);
-
-static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
- void *ctx,
- struct blkcipher_desc *desc,
- struct blkcipher_walk *walk)
-{
- const unsigned int bsize = 128 / 8;
- unsigned int nbytes = walk->nbytes;
- u128 *src = (u128 *)walk->src.virt.addr;
- u128 *dst = (u128 *)walk->dst.virt.addr;
- unsigned int num_blocks, func_bytes;
- unsigned int i;
-
- /* Process multi-block batch */
- for (i = 0; i < gctx->num_funcs; i++) {
- num_blocks = gctx->funcs[i].num_blocks;
- func_bytes = bsize * num_blocks;
-
- if (nbytes >= func_bytes) {
- do {
- gctx->funcs[i].fn_u.xts(ctx, dst, src,
- (le128 *)walk->iv);
+ if (nbytes) {
+ le128 ctrblk;
+ u128 tmp;
- src += num_blocks;
- dst += num_blocks;
- nbytes -= func_bytes;
- } while (nbytes >= func_bytes);
+ be128_to_le128(&ctrblk, (be128 *)walk.iv);
+ memcpy(&tmp, walk.src.virt.addr, nbytes);
+ gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp,
+ &ctrblk);
+ memcpy(walk.dst.virt.addr, &tmp, nbytes);
+ le128_to_be128((be128 *)walk.iv, &ctrblk);
- if (nbytes < bsize)
- goto done;
- }
+ err = skcipher_walk_done(&walk, 0);
}
-done:
- return nbytes;
+ return err;
}
+EXPORT_SYMBOL_GPL(glue_ctr_req_128bit);
static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
void *ctx,
@@ -372,46 +271,6 @@ done:
return nbytes;
}
-/* for implementations implementing faster XTS IV generator */
-int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes,
- void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src),
- void *tweak_ctx, void *crypt_ctx)
-{
- const unsigned int bsize = 128 / 8;
- bool fpu_enabled = false;
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
-
- err = blkcipher_walk_virt(desc, &walk);
- nbytes = walk.nbytes;
- if (!nbytes)
- return err;
-
- /* set minimum length to bsize, for tweak_fn */
- fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- desc, fpu_enabled,
- nbytes < bsize ? bsize : nbytes);
-
- /* calculate first value of T */
- tweak_fn(tweak_ctx, walk.iv, walk.iv);
-
- while (nbytes) {
- nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
-
- err = blkcipher_walk_done(desc, &walk, nbytes);
- nbytes = walk.nbytes;
- }
-
- glue_fpu_end(fpu_enabled);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
-
int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
struct skcipher_request *req,
common_glue_func_t tweak_fn, void *tweak_ctx,
@@ -429,9 +288,9 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
return err;
/* set minimum length to bsize, for tweak_fn */
- fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled,
- nbytes < bsize ? bsize : nbytes);
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ &walk, fpu_enabled,
+ nbytes < bsize ? bsize : nbytes);
/* calculate first value of T */
tweak_fn(tweak_ctx, walk.iv, walk.iv);
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 870f6d8..03347b1 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -14,15 +14,12 @@
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
-#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
+#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/serpent-avx.h>
+#include <crypto/xts.h>
#include <asm/crypto/glue_helper.h>
+#include <asm/crypto/serpent-avx.h>
#define SERPENT_AVX2_PARALLEL_BLOCKS 16
@@ -40,6 +37,12 @@ asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst,
asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
static const struct common_glue_ctx serpent_enc = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
@@ -136,403 +139,113 @@ static const struct common_glue_ctx serpent_dec_xts = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+ return glue_ecb_req_128bit(&serpent_enc, req);
}
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+ return glue_ecb_req_128bit(&serpent_dec, req);
}
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
- dst, src, nbytes);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+ req);
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
- nbytes);
+ return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+ return glue_ctr_req_128bit(&serpent_ctr, req);
}
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
{
- /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
- return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
-}
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
- glue_fpu_end(fpu_enabled);
+ return glue_xts_req_128bit(&serpent_enc_xts, req,
+ XTS_TWEAK_CAST(__serpent_encrypt),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-struct crypt_priv {
- struct serpent_ctx *ctx;
- bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
{
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
- serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
- nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
- }
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
- serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
- nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- __serpent_encrypt(ctx->ctx, srcdst, srcdst);
+ return glue_xts_req_128bit(&serpent_dec_xts, req,
+ XTS_TWEAK_CAST(__serpent_encrypt),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) {
- serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
- nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS;
- }
-
- while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
- serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
- srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
- nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- __serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->serpent_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- serpent_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->serpent_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- serpent_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(__serpent_encrypt),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(__serpent_encrypt),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg srp_algs[10] = { {
- .cra_name = "__ecb-serpent-avx2",
- .cra_driver_name = "__driver-ecb-serpent-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[0].cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-serpent-avx2",
- .cra_driver_name = "__driver-cbc-serpent-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[1].cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-serpent-avx2",
- .cra_driver_name = "__driver-ctr-serpent-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[2].cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "__lrw-serpent-avx2",
- .cra_driver_name = "__driver-lrw-serpent-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[3].cra_list),
- .cra_exit = lrw_serpent_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = lrw_serpent_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-serpent-avx2",
- .cra_driver_name = "__driver-xts-serpent-avx2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[4].cra_list),
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
- .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = xts_serpent_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(serpent)",
- .cra_driver_name = "ecb-serpent-avx2",
- .cra_priority = 600,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[5].cra_list),
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(serpent)",
- .cra_driver_name = "cbc-serpent-avx2",
- .cra_priority = 600,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[6].cra_list),
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(serpent)",
- .cra_driver_name = "ctr-serpent-avx2",
- .cra_priority = 600,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[7].cra_list),
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-}, {
- .cra_name = "lrw(serpent)",
- .cra_driver_name = "lrw-serpent-avx2",
- .cra_priority = 600,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[8].cra_list),
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(serpent)",
- .cra_driver_name = "xts-serpent-avx2",
- .cra_priority = 600,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(srp_algs[9].cra_list),
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
- .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
+static struct skcipher_alg serpent_algs[] = {
+ {
+ .base.cra_name = "__ecb(serpent)",
+ .base.cra_driver_name = "__ecb-serpent-avx2",
+ .base.cra_priority = 600,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = SERPENT_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "__cbc(serpent)",
+ .base.cra_driver_name = "__cbc-serpent-avx2",
+ .base.cra_priority = 600,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = SERPENT_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "__ctr(serpent)",
+ .base.cra_driver_name = "__ctr-serpent-avx2",
+ .base.cra_priority = 600,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .chunksize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }, {
+ .base.cra_name = "__xts(serpent)",
+ .base.cra_driver_name = "__xts-serpent-avx2",
+ .base.cra_priority = 600,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = SERPENT_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct serpent_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = 2 * SERPENT_MIN_KEY_SIZE,
+ .max_keysize = 2 * SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = xts_serpent_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
},
-} };
+};
+
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
static int __init init(void)
{
@@ -548,12 +261,15 @@ static int __init init(void)
return -ENODEV;
}
- return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs));
+ return simd_register_skciphers_compat(serpent_algs,
+ ARRAY_SIZE(serpent_algs),
+ serpent_simd_algs);
}
static void __exit fini(void)
{
- crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs));
+ simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+ serpent_simd_algs);
}
module_init(init);
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 6f778d3..458567e 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -24,21 +24,15 @@
*/
#include <linux/module.h>
-#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
-#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
#include <crypto/xts.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/serpent-avx.h>
#include <asm/crypto/glue_helper.h>
+#include <asm/crypto/serpent-avx.h>
/* 8-way parallel cipher functions */
asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
@@ -91,6 +85,31 @@ void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
}
EXPORT_SYMBOL_GPL(serpent_xts_dec);
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
+int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int err;
+
+ err = xts_verify_key(tfm, key, keylen);
+ if (err)
+ return err;
+
+ /* first half of xts-key is for crypt */
+ err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
+ if (err)
+ return err;
+
+ /* second half of xts-key is for tweak */
+ return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
+}
+EXPORT_SYMBOL_GPL(xts_serpent_setkey);
static const struct common_glue_ctx serpent_enc = {
.num_funcs = 2,
@@ -170,423 +189,113 @@ static const struct common_glue_ctx serpent_dec_xts = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
+ return glue_ecb_req_128bit(&serpent_enc, req);
}
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
+ return glue_ecb_req_128bit(&serpent_dec, req);
}
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
- dst, src, nbytes);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+ req);
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
- nbytes);
+ return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+ return glue_ctr_req_128bit(&serpent_ctr, req);
}
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
{
- return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
- NULL, fpu_enabled, nbytes);
-}
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-static inline void serpent_fpu_end(bool fpu_enabled)
-{
- glue_fpu_end(fpu_enabled);
+ return glue_xts_req_128bit(&serpent_enc_xts, req,
+ XTS_TWEAK_CAST(__serpent_encrypt),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-struct crypt_priv {
- struct serpent_ctx *ctx;
- bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
- serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- __serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
{
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
- serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- __serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
- int err;
-
- err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
- SERPENT_BLOCK_SIZE);
- if (err)
- return err;
-
- return lrw_init_table(&ctx->lrw_table, key + keylen -
- SERPENT_BLOCK_SIZE);
-}
-EXPORT_SYMBOL_GPL(lrw_serpent_setkey);
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[SERPENT_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->serpent_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- serpent_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
+ return glue_xts_req_128bit(&serpent_dec_xts, req,
+ XTS_TWEAK_CAST(__serpent_encrypt),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[SERPENT_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->serpent_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- serpent_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-void lrw_serpent_exit_tfm(struct crypto_tfm *tfm)
-{
- struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
- lrw_free_table(&ctx->lrw_table);
-}
-EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm);
-
-int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- int err;
-
- err = xts_check_key(tfm, key, keylen);
- if (err)
- return err;
-
- /* first half of xts-key is for crypt */
- err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-EXPORT_SYMBOL_GPL(xts_serpent_setkey);
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(__serpent_encrypt),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(__serpent_encrypt),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg serpent_algs[10] = { {
- .cra_name = "__ecb-serpent-avx",
- .cra_driver_name = "__driver-ecb-serpent-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-serpent-avx",
- .cra_driver_name = "__driver-cbc-serpent-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-serpent-avx",
- .cra_driver_name = "__driver-ctr-serpent-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "__lrw-serpent-avx",
- .cra_driver_name = "__driver-lrw-serpent-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_serpent_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = lrw_serpent_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-serpent-avx",
- .cra_driver_name = "__driver-xts-serpent-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
- .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = xts_serpent_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(serpent)",
- .cra_driver_name = "ecb-serpent-avx",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(serpent)",
- .cra_driver_name = "cbc-serpent-avx",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(serpent)",
- .cra_driver_name = "ctr-serpent-avx",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-}, {
- .cra_name = "lrw(serpent)",
- .cra_driver_name = "lrw-serpent-avx",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(serpent)",
- .cra_driver_name = "xts-serpent-avx",
- .cra_priority = 500,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
- .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
+static struct skcipher_alg serpent_algs[] = {
+ {
+ .base.cra_name = "__ecb(serpent)",
+ .base.cra_driver_name = "__ecb-serpent-avx",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = SERPENT_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "__cbc(serpent)",
+ .base.cra_driver_name = "__cbc-serpent-avx",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = SERPENT_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "__ctr(serpent)",
+ .base.cra_driver_name = "__ctr-serpent-avx",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .chunksize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }, {
+ .base.cra_name = "__xts(serpent)",
+ .base.cra_driver_name = "__xts-serpent-avx",
+ .base.cra_priority = 500,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = SERPENT_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct serpent_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = 2 * SERPENT_MIN_KEY_SIZE,
+ .max_keysize = 2 * SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = xts_serpent_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
},
-} };
+};
+
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
static int __init serpent_init(void)
{
@@ -598,12 +307,15 @@ static int __init serpent_init(void)
return -ENODEV;
}
- return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+ return simd_register_skciphers_compat(serpent_algs,
+ ARRAY_SIZE(serpent_algs),
+ serpent_simd_algs);
}
static void __exit serpent_exit(void)
{
- crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+ simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+ serpent_simd_algs);
}
module_init(serpent_init);
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index ac0e831..3dafe13 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -30,21 +30,22 @@
*/
#include <linux/module.h>
-#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
-#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
-#include <crypto/serpent.h>
-#include <crypto/cryptd.h>
#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
+#include <crypto/internal/simd.h>
+#include <crypto/serpent.h>
#include <asm/crypto/serpent-sse2.h>
#include <asm/crypto/glue_helper.h>
+static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
+}
+
static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
{
u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
@@ -139,464 +140,79 @@ static const struct common_glue_ctx serpent_dec_cbc = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc,
- dst, src, nbytes);
+ return glue_ecb_req_128bit(&serpent_enc, req);
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src,
- nbytes);
+ return glue_ecb_req_128bit(&serpent_dec, req);
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt),
+ req);
}
-static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS,
- NULL, fpu_enabled, nbytes);
+ return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
}
-static inline void serpent_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
{
- glue_fpu_end(fpu_enabled);
+ return glue_ctr_req_128bit(&serpent_ctr, req);
}
-struct crypt_priv {
- struct serpent_ctx *ctx;
- bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
- serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- __serpent_encrypt(ctx->ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = SERPENT_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
- serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- __serpent_decrypt(ctx->ctx, srcdst, srcdst);
-}
-
-struct serpent_lrw_ctx {
- struct lrw_table_ctx lrw_table;
- struct serpent_ctx serpent_ctx;
-};
-
-static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
- int err;
-
- err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
- SERPENT_BLOCK_SIZE);
- if (err)
- return err;
-
- return lrw_init_table(&ctx->lrw_table, key + keylen -
- SERPENT_BLOCK_SIZE);
-}
-
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[SERPENT_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->serpent_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- serpent_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[SERPENT_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->serpent_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- serpent_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
-{
- struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
- lrw_free_table(&ctx->lrw_table);
-}
-
-struct serpent_xts_ctx {
- struct serpent_ctx tweak_ctx;
- struct serpent_ctx crypt_ctx;
+static struct skcipher_alg serpent_algs[] = {
+ {
+ .base.cra_name = "__ecb(serpent)",
+ .base.cra_driver_name = "__ecb-serpent-sse2",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = SERPENT_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "__cbc(serpent)",
+ .base.cra_driver_name = "__cbc-serpent-sse2",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = SERPENT_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "__ctr(serpent)",
+ .base.cra_driver_name = "__ctr-serpent-sse2",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct serpent_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = SERPENT_MIN_KEY_SIZE,
+ .max_keysize = SERPENT_MAX_KEY_SIZE,
+ .ivsize = SERPENT_BLOCK_SIZE,
+ .chunksize = SERPENT_BLOCK_SIZE,
+ .setkey = serpent_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
};
-static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- int err;
-
- err = xts_check_key(tfm, key, keylen);
- if (err)
- return err;
-
- /* first half of xts-key is for crypt */
- err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- le128 buf[SERPENT_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->crypt_ctx,
- .fpu_enabled = false,
- };
- struct xts_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .tweak_ctx = &ctx->tweak_ctx,
- .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = xts_crypt(desc, dst, src, nbytes, &req);
- serpent_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- le128 buf[SERPENT_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->crypt_ctx,
- .fpu_enabled = false,
- };
- struct xts_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .tweak_ctx = &ctx->tweak_ctx,
- .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = xts_crypt(desc, dst, src, nbytes, &req);
- serpent_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static struct crypto_alg serpent_algs[10] = { {
- .cra_name = "__ecb-serpent-sse2",
- .cra_driver_name = "__driver-ecb-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-serpent-sse2",
- .cra_driver_name = "__driver-cbc-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = serpent_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-serpent-sse2",
- .cra_driver_name = "__driver-ctr-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct serpent_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = serpent_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "__lrw-serpent-sse2",
- .cra_driver_name = "__driver-lrw-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = lrw_serpent_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-serpent-sse2",
- .cra_driver_name = "__driver-xts-serpent-sse2",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct serpent_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
- .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = xts_serpent_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(serpent)",
- .cra_driver_name = "ecb-serpent-sse2",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(serpent)",
- .cra_driver_name = "cbc-serpent-sse2",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(serpent)",
- .cra_driver_name = "ctr-serpent-sse2",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-}, {
- .cra_name = "lrw(serpent)",
- .cra_driver_name = "lrw-serpent-sse2",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE +
- SERPENT_BLOCK_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(serpent)",
- .cra_driver_name = "xts-serpent-sse2",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = SERPENT_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = SERPENT_MIN_KEY_SIZE * 2,
- .max_keysize = SERPENT_MAX_KEY_SIZE * 2,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-} };
+static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
static int __init serpent_sse2_init(void)
{
@@ -605,12 +221,15 @@ static int __init serpent_sse2_init(void)
return -ENODEV;
}
- return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+ return simd_register_skciphers_compat(serpent_algs,
+ ARRAY_SIZE(serpent_algs),
+ serpent_simd_algs);
}
static void __exit serpent_sse2_exit(void)
{
- crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs));
+ simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
+ serpent_simd_algs);
}
module_init(serpent_sse2_init);
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c
index acf9fdf..e17655f 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha1-mb/sha1_mb.c
@@ -106,13 +106,6 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_flush)
static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job)
(struct sha1_mb_mgr *state);
-static inline void sha1_init_digest(uint32_t *digest)
-{
- static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0,
- SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 };
- memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2],
uint64_t total_len)
{
@@ -244,11 +237,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
uint32_t len,
int flags)
{
- if (flags & (~HASH_ENTIRE)) {
- /*
- * User should not pass anything other than FIRST, UPDATE, or
- * LAST
- */
+ if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+ /* User should not pass anything other than UPDATE or LAST */
ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
return ctx;
}
@@ -259,24 +249,12 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr,
return ctx;
}
- if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+ if (ctx->status & HASH_CTX_STS_COMPLETE) {
/* Cannot update a finished job. */
ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
return ctx;
}
-
- if (flags & HASH_FIRST) {
- /* Init digest */
- sha1_init_digest(ctx->job.result_digest);
-
- /* Reset byte counter */
- ctx->total_length = 0;
-
- /* Clear extra blocks */
- ctx->partial_block_buffer_length = 0;
- }
-
/*
* If we made it here, there were no errors during this call to
* submit
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
index 13590cc..9454bd1 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h
@@ -57,11 +57,9 @@
#include "sha1_mb_mgr.h"
#define HASH_UPDATE 0x00
-#define HASH_FIRST 0x01
-#define HASH_LAST 0x02
-#define HASH_ENTIRE 0x03
-#define HASH_DONE 0x04
-#define HASH_FINAL 0x08
+#define HASH_LAST 0x01
+#define HASH_DONE 0x02
+#define HASH_FINAL 0x04
#define HASH_CTX_STS_IDLE 0x00
#define HASH_CTX_STS_PROCESSING 0x01
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
index 7926a22..4c46ac1 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -106,14 +106,6 @@ static asmlinkage struct job_sha256* (*sha256_job_mgr_flush)
static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job)
(struct sha256_mb_mgr *state);
-inline void sha256_init_digest(uint32_t *digest)
-{
- static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = {
- SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
- SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7};
- memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2],
uint64_t total_len)
{
@@ -245,10 +237,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
uint32_t len,
int flags)
{
- if (flags & (~HASH_ENTIRE)) {
- /* User should not pass anything other than FIRST, UPDATE
- * or LAST
- */
+ if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+ /* User should not pass anything other than UPDATE or LAST */
ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
return ctx;
}
@@ -259,23 +249,12 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr,
return ctx;
}
- if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+ if (ctx->status & HASH_CTX_STS_COMPLETE) {
/* Cannot update a finished job. */
ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
return ctx;
}
- if (flags & HASH_FIRST) {
- /* Init digest */
- sha256_init_digest(ctx->job.result_digest);
-
- /* Reset byte counter */
- ctx->total_length = 0;
-
- /* Clear extra blocks */
- ctx->partial_block_buffer_length = 0;
- }
-
/* If we made it here, there was no error during this call to submit */
ctx->error = HASH_CTX_ERROR_NONE;
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
index aabb303..7c43254 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h
@@ -57,11 +57,9 @@
#include "sha256_mb_mgr.h"
#define HASH_UPDATE 0x00
-#define HASH_FIRST 0x01
-#define HASH_LAST 0x02
-#define HASH_ENTIRE 0x03
-#define HASH_DONE 0x04
-#define HASH_FINAL 0x08
+#define HASH_LAST 0x01
+#define HASH_DONE 0x02
+#define HASH_FINAL 0x04
#define HASH_CTX_STS_IDLE 0x00
#define HASH_CTX_STS_PROCESSING 0x01
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
index 458409b..39e2bbd 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -107,15 +107,6 @@ static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
(struct sha512_mb_mgr *state);
-inline void sha512_init_digest(uint64_t *digest)
-{
- static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = {
- SHA512_H0, SHA512_H1, SHA512_H2,
- SHA512_H3, SHA512_H4, SHA512_H5,
- SHA512_H6, SHA512_H7 };
- memcpy(digest, initial_digest, sizeof(initial_digest));
-}
-
inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
uint64_t total_len)
{
@@ -263,11 +254,8 @@ static struct sha512_hash_ctx
mgr = cstate->mgr;
spin_lock_irqsave(&cstate->work_lock, irqflags);
- if (flags & (~HASH_ENTIRE)) {
- /*
- * User should not pass anything other than FIRST, UPDATE, or
- * LAST
- */
+ if (flags & ~(HASH_UPDATE | HASH_LAST)) {
+ /* User should not pass anything other than UPDATE or LAST */
ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
goto unlock;
}
@@ -278,24 +266,12 @@ static struct sha512_hash_ctx
goto unlock;
}
- if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+ if (ctx->status & HASH_CTX_STS_COMPLETE) {
/* Cannot update a finished job. */
ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
goto unlock;
}
-
- if (flags & HASH_FIRST) {
- /* Init digest */
- sha512_init_digest(ctx->job.result_digest);
-
- /* Reset byte counter */
- ctx->total_length = 0;
-
- /* Clear extra blocks */
- ctx->partial_block_buffer_length = 0;
- }
-
/*
* If we made it here, there were no errors during this call to
* submit
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
index e4653f5..e5c465b 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
@@ -57,11 +57,9 @@
#include "sha512_mb_mgr.h"
#define HASH_UPDATE 0x00
-#define HASH_FIRST 0x01
-#define HASH_LAST 0x02
-#define HASH_ENTIRE 0x03
-#define HASH_DONE 0x04
-#define HASH_FINAL 0x08
+#define HASH_LAST 0x01
+#define HASH_DONE 0x02
+#define HASH_FINAL 0x04
#define HASH_CTX_STS_IDLE 0x00
#define HASH_CTX_STS_PROCESSING 0x01
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index b7a3904..66d9892 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -24,24 +24,15 @@
*/
#include <linux/module.h>
-#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
-#include <crypto/ablk_helper.h>
#include <crypto/algapi.h>
+#include <crypto/internal/simd.h>
#include <crypto/twofish.h>
-#include <crypto/cryptd.h>
-#include <crypto/b128ops.h>
-#include <crypto/ctr.h>
-#include <crypto/lrw.h>
#include <crypto/xts.h>
-#include <asm/fpu/api.h>
-#include <asm/crypto/twofish.h>
#include <asm/crypto/glue_helper.h>
-#include <crypto/scatterwalk.h>
-#include <linux/workqueue.h>
-#include <linux/spinlock.h>
+#include <asm/crypto/twofish.h>
#define TWOFISH_PARALLEL_BLOCKS 8
@@ -61,6 +52,12 @@ asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
+static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ return twofish_setkey(&tfm->base, key, keylen);
+}
+
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
@@ -79,6 +76,31 @@ static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
GLUE_FUNC_CAST(twofish_dec_blk));
}
+struct twofish_xts_ctx {
+ struct twofish_ctx tweak_ctx;
+ struct twofish_ctx crypt_ctx;
+};
+
+static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ u32 *flags = &tfm->base.crt_flags;
+ int err;
+
+ err = xts_verify_key(tfm, key, keylen);
+ if (err)
+ return err;
+
+ /* first half of xts-key is for crypt */
+ err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+ if (err)
+ return err;
+
+ /* second half of xts-key is for tweak */
+ return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+ flags);
+}
static const struct common_glue_ctx twofish_enc = {
.num_funcs = 3,
@@ -170,389 +192,113 @@ static const struct common_glue_ctx twofish_dec_xts = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
- dst, src, nbytes);
+ return glue_ecb_req_128bit(&twofish_enc, req);
}
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
- nbytes);
+ return glue_ecb_req_128bit(&twofish_dec, req);
}
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+ req);
}
-static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL,
- fpu_enabled, nbytes);
+ return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
}
-static inline void twofish_fpu_end(bool fpu_enabled)
+static int ctr_crypt(struct skcipher_request *req)
{
- glue_fpu_end(fpu_enabled);
+ return glue_ctr_req_128bit(&twofish_ctr, req);
}
-struct crypt_priv {
- struct twofish_ctx *ctx;
- bool fpu_enabled;
-};
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_encrypt(struct skcipher_request *req)
{
- const unsigned int bsize = TF_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
- twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
- twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- nbytes %= bsize * 3;
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- twofish_enc_blk(ctx->ctx, srcdst, srcdst);
+ return glue_xts_req_128bit(&twofish_enc_xts, req,
+ XTS_TWEAK_CAST(twofish_enc_blk),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int xts_decrypt(struct skcipher_request *req)
{
- const unsigned int bsize = TF_BLOCK_SIZE;
- struct crypt_priv *ctx = priv;
- int i;
-
- ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
-
- if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
- twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
- return;
- }
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
- twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
-
- nbytes %= bsize * 3;
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- twofish_dec_blk(ctx->ctx, srcdst, srcdst);
+ return glue_xts_req_128bit(&twofish_dec_xts, req,
+ XTS_TWEAK_CAST(twofish_enc_blk),
+ &ctx->tweak_ctx, &ctx->crypt_ctx);
}
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[TWOFISH_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->twofish_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- twofish_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[TWOFISH_PARALLEL_BLOCKS];
- struct crypt_priv crypt_ctx = {
- .ctx = &ctx->twofish_ctx,
- .fpu_enabled = false,
- };
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
- int ret;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- ret = lrw_crypt(desc, dst, src, nbytes, &req);
- twofish_fpu_end(crypt_ctx.fpu_enabled);
-
- return ret;
-}
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(twofish_enc_blk),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-
- return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
- XTS_TWEAK_CAST(twofish_enc_blk),
- &ctx->tweak_ctx, &ctx->crypt_ctx);
-}
-
-static struct crypto_alg twofish_algs[10] = { {
- .cra_name = "__ecb-twofish-avx",
- .cra_driver_name = "__driver-ecb-twofish-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "__cbc-twofish-avx",
- .cra_driver_name = "__driver-cbc-twofish-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = twofish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "__ctr-twofish-avx",
- .cra_driver_name = "__driver-ctr-twofish-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "__lrw-twofish-avx",
- .cra_driver_name = "__driver-lrw-twofish-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_twofish_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE +
- TF_BLOCK_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE +
- TF_BLOCK_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = lrw_twofish_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "__xts-twofish-avx",
- .cra_driver_name = "__driver-xts-twofish-avx",
- .cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE * 2,
- .max_keysize = TF_MAX_KEY_SIZE * 2,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = xts_twofish_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
- },
-}, {
- .cra_name = "ecb(twofish)",
- .cra_driver_name = "ecb-twofish-avx",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(twofish)",
- .cra_driver_name = "cbc-twofish-avx",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = __ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(twofish)",
- .cra_driver_name = "ctr-twofish-avx",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_encrypt,
- .geniv = "chainiv",
- },
- },
-}, {
- .cra_name = "lrw(twofish)",
- .cra_driver_name = "lrw-twofish-avx",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE +
- TF_BLOCK_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE +
- TF_BLOCK_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
- },
-}, {
- .cra_name = "xts(twofish)",
- .cra_driver_name = "xts-twofish-avx",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct async_helper_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_ablkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = ablk_init,
- .cra_exit = ablk_exit,
- .cra_u = {
- .ablkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE * 2,
- .max_keysize = TF_MAX_KEY_SIZE * 2,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = ablk_set_key,
- .encrypt = ablk_encrypt,
- .decrypt = ablk_decrypt,
- },
+static struct skcipher_alg twofish_algs[] = {
+ {
+ .base.cra_name = "__ecb(twofish)",
+ .base.cra_driver_name = "__ecb-twofish-avx",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = TF_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct twofish_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .setkey = twofish_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "__cbc(twofish)",
+ .base.cra_driver_name = "__cbc-twofish-avx",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = TF_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct twofish_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "__ctr(twofish)",
+ .base.cra_driver_name = "__ctr-twofish-avx",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct twofish_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .chunksize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ }, {
+ .base.cra_name = "__xts(twofish)",
+ .base.cra_driver_name = "__xts-twofish-avx",
+ .base.cra_priority = 400,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = TF_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct twofish_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = 2 * TF_MIN_KEY_SIZE,
+ .max_keysize = 2 * TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = xts_twofish_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
},
-} };
+};
+
+static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)];
static int __init twofish_init(void)
{
@@ -563,12 +309,15 @@ static int __init twofish_init(void)
return -ENODEV;
}
- return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+ return simd_register_skciphers_compat(twofish_algs,
+ ARRAY_SIZE(twofish_algs),
+ twofish_simd_algs);
}
static void __exit twofish_exit(void)
{
- crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs));
+ simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs),
+ twofish_simd_algs);
}
module_init(twofish_init);
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 243e90a..5714855 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -20,22 +20,26 @@
*
*/
-#include <asm/processor.h>
+#include <asm/crypto/glue_helper.h>
+#include <asm/crypto/twofish.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/twofish.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/twofish.h>
-#include <crypto/b128ops.h>
-#include <asm/crypto/twofish.h>
-#include <asm/crypto/glue_helper.h>
-#include <crypto/lrw.h>
-#include <crypto/xts.h>
EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
+static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ return twofish_setkey(&tfm->base, key, keylen);
+}
+
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
@@ -151,284 +155,74 @@ static const struct common_glue_ctx twofish_dec_cbc = {
} }
};
-static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
-}
-
-static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
-}
-
-static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
- dst, src, nbytes);
-}
-
-static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
- nbytes);
-}
-
-static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
-}
-
-static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- const unsigned int bsize = TF_BLOCK_SIZE;
- struct twofish_ctx *ctx = priv;
- int i;
-
- if (nbytes == 3 * bsize) {
- twofish_enc_blk_3way(ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- twofish_enc_blk(ctx, srcdst, srcdst);
-}
-
-static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
-{
- const unsigned int bsize = TF_BLOCK_SIZE;
- struct twofish_ctx *ctx = priv;
- int i;
-
- if (nbytes == 3 * bsize) {
- twofish_dec_blk_3way(ctx, srcdst, srcdst);
- return;
- }
-
- for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
- twofish_dec_blk(ctx, srcdst, srcdst);
-}
-
-int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
- int err;
-
- err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE,
- &tfm->crt_flags);
- if (err)
- return err;
-
- return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE);
+ return glue_ecb_req_128bit(&twofish_enc, req);
}
-EXPORT_SYMBOL_GPL(lrw_twofish_setkey);
-static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[3];
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &ctx->twofish_ctx,
- .crypt_fn = encrypt_callback,
- };
-
- return lrw_crypt(desc, dst, src, nbytes, &req);
+ return glue_ecb_req_128bit(&twofish_dec, req);
}
-static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- be128 buf[3];
- struct lrw_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .table_ctx = &ctx->lrw_table,
- .crypt_ctx = &ctx->twofish_ctx,
- .crypt_fn = decrypt_callback,
- };
-
- return lrw_crypt(desc, dst, src, nbytes, &req);
+ return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk),
+ req);
}
-void lrw_twofish_exit_tfm(struct crypto_tfm *tfm)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
-
- lrw_free_table(&ctx->lrw_table);
-}
-EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm);
-
-int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm);
- u32 *flags = &tfm->crt_flags;
- int err;
-
- err = xts_check_key(tfm, key, keylen);
- if (err)
- return err;
-
- /* first half of xts-key is for crypt */
- err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
- flags);
-}
-EXPORT_SYMBOL_GPL(xts_twofish_setkey);
-
-static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
-{
- struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- le128 buf[3];
- struct xts_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .tweak_ctx = &ctx->tweak_ctx,
- .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
- .crypt_ctx = &ctx->crypt_ctx,
- .crypt_fn = encrypt_callback,
- };
-
- return xts_crypt(desc, dst, src, nbytes, &req);
+ return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
}
-static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- le128 buf[3];
- struct xts_crypt_req req = {
- .tbuf = buf,
- .tbuflen = sizeof(buf),
-
- .tweak_ctx = &ctx->tweak_ctx,
- .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
- .crypt_ctx = &ctx->crypt_ctx,
- .crypt_fn = decrypt_callback,
- };
-
- return xts_crypt(desc, dst, src, nbytes, &req);
+ return glue_ctr_req_128bit(&twofish_ctr, req);
}
-static struct crypto_alg tf_algs[5] = { {
- .cra_name = "ecb(twofish)",
- .cra_driver_name = "ecb-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(twofish)",
- .cra_driver_name = "cbc-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(twofish)",
- .cra_driver_name = "ctr-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct twofish_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-}, {
- .cra_name = "lrw(twofish)",
- .cra_driver_name = "lrw-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_exit = lrw_twofish_exit_tfm,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = lrw_twofish_setkey,
- .encrypt = lrw_encrypt,
- .decrypt = lrw_decrypt,
- },
- },
-}, {
- .cra_name = "xts(twofish)",
- .cra_driver_name = "xts-twofish-3way",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = TF_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct twofish_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = TF_MIN_KEY_SIZE * 2,
- .max_keysize = TF_MAX_KEY_SIZE * 2,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = xts_twofish_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
+static struct skcipher_alg tf_skciphers[] = {
+ {
+ .base.cra_name = "ecb(twofish)",
+ .base.cra_driver_name = "ecb-twofish-3way",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = TF_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct twofish_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .setkey = twofish_setkey_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(twofish)",
+ .base.cra_driver_name = "cbc-twofish-3way",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = TF_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct twofish_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(twofish)",
+ .base.cra_driver_name = "ctr-twofish-3way",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct twofish_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = TF_MIN_KEY_SIZE,
+ .max_keysize = TF_MAX_KEY_SIZE,
+ .ivsize = TF_BLOCK_SIZE,
+ .chunksize = TF_BLOCK_SIZE,
+ .setkey = twofish_setkey_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
},
-} };
+};
static bool is_blacklisted_cpu(void)
{
@@ -478,12 +272,13 @@ static int __init init(void)
return -ENODEV;
}
- return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
+ return crypto_register_skciphers(tf_skciphers,
+ ARRAY_SIZE(tf_skciphers));
}
static void __exit fini(void)
{
- crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
+ crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
}
module_init(init);
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index dce7092..be63330 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
/*
* Push registers and sanitize registers of values that a
* speculation attack might otherwise want to exploit. The
@@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with
* could be put to use in a speculative execution gadget.
* Interleave XOR with PUSH for better uop scheduling:
*/
+ .if \save_ret
+ pushq %rsi /* pt_regs->si */
+ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
+ movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
+ .else
pushq %rdi /* pt_regs->di */
pushq %rsi /* pt_regs->si */
+ .endif
pushq \rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
- xorq %r11, %r11 /* nospec r11*/
+ xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
- xorq %r12, %r12 /* nospec r12*/
+ xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
- xorq %r13, %r13 /* nospec r13*/
+ xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
- xorq %r14, %r14 /* nospec r14*/
+ xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
- xorq %r15, %r15 /* nospec r15*/
+ xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS
+ .if \save_ret
+ pushq %rsi /* return address on top of stack */
+ .endif
.endm
.macro POP_REGS pop_rdi=1 skip_r11rcx=0
@@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with
*/
.macro ENCODE_FRAME_POINTER ptregs_offset=0
#ifdef CONFIG_FRAME_POINTER
- .if \ptregs_offset
- leaq \ptregs_offset(%rsp), %rbp
- .else
- mov %rsp, %rbp
- .endif
- orq $0x1, %rbp
+ leaq 1+\ptregs_offset(%rsp), %rbp
#endif
.endm
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c02..bef8e2b 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %ebx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
@@ -903,6 +902,9 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR,
hyperv_reenlightenment_intr)
+BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
+ hv_stimer0_vector_handler)
+
#endif /* CONFIG_HYPERV */
ENTRY(page_fault)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8971bd6..b0a4649 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -55,7 +55,7 @@ END(native_usergs_sysret64)
.macro TRACE_IRQS_FLAGS flags:req
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9, \flags /* interrupts off? */
+ btl $9, \flags /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
@@ -260,8 +260,13 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
* Change top bits to match most significant bit (47th or 56th bit
* depending on paging mode) in the address.
*/
+#ifdef CONFIG_X86_5LEVEL
+ ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
+ "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
+#else
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+#endif
/* If this changed %rcx, it was not canonical */
cmpq %rcx, %r11
@@ -364,8 +369,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %rbx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
@@ -449,9 +453,19 @@ END(irq_entries_start)
*
* The invariant is that, if irq_count != -1, then the IRQ stack is in use.
*/
-.macro ENTER_IRQ_STACK regs=1 old_rsp
+.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0
DEBUG_ENTRY_ASSERT_IRQS_OFF
+
+ .if \save_ret
+ /*
+ * If save_ret is set, the original stack contains one additional
+ * entry -- the return address. Therefore, move the address one
+ * entry below %rsp to \old_rsp.
+ */
+ leaq 8(%rsp), \old_rsp
+ .else
movq %rsp, \old_rsp
+ .endif
.if \regs
UNWIND_HINT_REGS base=\old_rsp
@@ -497,6 +511,15 @@ END(irq_entries_start)
.if \regs
UNWIND_HINT_REGS indirect=1
.endif
+
+ .if \save_ret
+ /*
+ * Push the return address to the stack. This return address can
+ * be found at the "real" original RSP, which was offset by 8 at
+ * the beginning of this macro.
+ */
+ pushq -8(\old_rsp)
+ .endif
.endm
/*
@@ -520,27 +543,65 @@ END(irq_entries_start)
.endm
/*
- * Interrupt entry/exit.
- *
- * Interrupt entry points save only callee clobbered registers in fast path.
+ * Interrupt entry helper function.
*
- * Entry runs with interrupts off.
+ * Entry runs with interrupts off. Stack layout at entry:
+ * +----------------------------------------------------+
+ * | regs->ss |
+ * | regs->rsp |
+ * | regs->eflags |
+ * | regs->cs |
+ * | regs->ip |
+ * +----------------------------------------------------+
+ * | regs->orig_ax = ~(interrupt number) |
+ * +----------------------------------------------------+
+ * | return address |
+ * +----------------------------------------------------+
*/
-
-/* 0(%rsp): ~(interrupt number) */
- .macro interrupt func
+ENTRY(interrupt_entry)
+ UNWIND_HINT_FUNC
+ ASM_CLAC
cld
- testb $3, CS-ORIG_RAX(%rsp)
+ testb $3, CS-ORIG_RAX+8(%rsp)
jz 1f
SWAPGS
- call switch_to_thread_stack
+
+ /*
+ * Switch to the thread stack. The IRET frame and orig_ax are
+ * on the stack, as well as the return address. RDI..R12 are
+ * not (yet) on the stack and space has not (yet) been
+ * allocated for them.
+ */
+ pushq %rdi
+
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+
+ /*
+ * We have RDI, return address, and orig_ax on the stack on
+ * top of the IRET frame. That means offset=24
+ */
+ UNWIND_HINT_IRET_REGS base=%rdi offset=24
+
+ pushq 7*8(%rdi) /* regs->ss */
+ pushq 6*8(%rdi) /* regs->rsp */
+ pushq 5*8(%rdi) /* regs->eflags */
+ pushq 4*8(%rdi) /* regs->cs */
+ pushq 3*8(%rdi) /* regs->ip */
+ pushq 2*8(%rdi) /* regs->orig_ax */
+ pushq 8(%rdi) /* return address */
+ UNWIND_HINT_FUNC
+
+ movq (%rdi), %rdi
1:
- PUSH_AND_CLEAR_REGS
- ENCODE_FRAME_POINTER
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
- testb $3, CS(%rsp)
+ testb $3, CS+8(%rsp)
jz 1f
/*
@@ -548,7 +609,7 @@ END(irq_entries_start)
*
* We need to tell lockdep that IRQs are off. We can't do this until
* we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks). Since TRACE_IRQS_OFF idempotent,
+ * (which can take locks). Since TRACE_IRQS_OFF is idempotent,
* the simplest way to handle it is to just call it twice if
* we enter from user mode. There's no reason to optimize this since
* TRACE_IRQS_OFF is a no-op if lockdep is off.
@@ -558,12 +619,15 @@ END(irq_entries_start)
CALL_enter_from_user_mode
1:
- ENTER_IRQ_STACK old_rsp=%rdi
+ ENTER_IRQ_STACK old_rsp=%rdi save_ret=1
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
- call \func /* rdi points to pt_regs */
- .endm
+ ret
+END(interrupt_entry)
+
+
+/* Interrupt entry/exit. */
/*
* The interrupt stubs push (~vector+0x80) onto the stack and
@@ -571,9 +635,10 @@ END(irq_entries_start)
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
- ASM_CLAC
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
- interrupt do_IRQ
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call do_IRQ /* rdi points to pt_regs */
/* 0(%rsp): old RSP */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -766,10 +831,11 @@ END(common_interrupt)
.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
UNWIND_HINT_IRET_REGS
- ASM_CLAC
pushq $~(\num)
.Lcommon_\sym:
- interrupt \do_sym
+ call interrupt_entry
+ UNWIND_HINT_REGS indirect=1
+ call \do_sym /* rdi points to pt_regs */
jmp ret_from_intr
END(\sym)
.endm
@@ -832,34 +898,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
-/*
- * Switch to the thread stack. This is called with the IRET frame and
- * orig_ax on the stack. (That is, RDI..R12 are not on the stack and
- * space has not been allocated for them.)
- */
-ENTRY(switch_to_thread_stack)
- UNWIND_HINT_FUNC
-
- pushq %rdi
- /* Need to switch before accessing the thread stack. */
- SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
- movq %rsp, %rdi
- movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
-
- pushq 7*8(%rdi) /* regs->ss */
- pushq 6*8(%rdi) /* regs->rsp */
- pushq 5*8(%rdi) /* regs->eflags */
- pushq 4*8(%rdi) /* regs->cs */
- pushq 3*8(%rdi) /* regs->ip */
- pushq 2*8(%rdi) /* regs->orig_ax */
- pushq 8(%rdi) /* return address */
- UNWIND_HINT_FUNC
-
- movq (%rdi), %rdi
- ret
-END(switch_to_thread_stack)
-
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -875,12 +913,8 @@ ENTRY(\sym)
pushq $-1 /* ORIG_RAX: no syscall to restart */
.endif
- /* Save all registers in pt_regs */
- PUSH_AND_CLEAR_REGS
- ENCODE_FRAME_POINTER
-
.if \paranoid < 2
- testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
+ testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */
jnz .Lfrom_usermode_switch_stack_\@
.endif
@@ -1106,10 +1140,13 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
hyperv_reenlightenment_vector hyperv_reenlightenment_intr
+
+apicinterrupt3 HYPERV_STIMER0_VECTOR \
+ hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
+idtentry int3 do_int3 has_error_code=0
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
@@ -1130,13 +1167,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch gs if needed.
* Use slow, but surefire "are we in kernel?" check.
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
ENTRY(paranoid_entry)
UNWIND_HINT_FUNC
cld
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
rdmsr
@@ -1181,12 +1220,14 @@ ENTRY(paranoid_exit)
END(paranoid_exit)
/*
- * Switch gs if needed.
+ * Save all registers in pt_regs, and switch GS if needed.
* Return: EBX=0: came from user mode; EBX=1: otherwise
*/
ENTRY(error_entry)
- UNWIND_HINT_REGS offset=8
+ UNWIND_HINT_FUNC
cld
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1577,8 +1618,6 @@ end_repeat_nmi:
* frame to point back to repeat_nmi.
*/
pushq $-1 /* ORIG_RAX: no syscall to restart */
- PUSH_AND_CLEAR_REGS
- ENCODE_FRAME_POINTER
/*
* Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index fd65e01..08425c4 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat)
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
cld
/*
@@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
pushq %rbp /* pt_regs->cx (stashed in bp) */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp (will be overwritten) */
xorl %ebp, %ebp /* nospec rbp */
pushq $0 /* pt_regs->r12 = 0 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq $0 /* pt_regs->r13 = 0 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq $0 /* pt_regs->r14 = 0 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq $0 /* pt_regs->r15 = 0 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
/*
* User mode is traced as though IRQs are on, and SYSENTER
@@ -298,9 +298,9 @@ sysret32_from_system_call:
*/
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r10
+ xorl %r8d, %r8d
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
swapgs
sysretl
END(entry_SYSCALL_compat)
@@ -347,36 +347,47 @@ ENTRY(entry_INT80_compat)
*/
movl %eax, %eax
+ /* switch to thread stack expects orig_ax and rdi to be pushed */
pushq %rax /* pt_regs->orig_ax */
+ pushq %rdi /* pt_regs->di */
- /* switch to thread stack expects orig_ax to be pushed */
- call switch_to_thread_stack
+ /* Need to switch before accessing the thread stack. */
+ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+ movq %rsp, %rdi
+ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
- pushq %rdi /* pt_regs->di */
+ pushq 6*8(%rdi) /* regs->ss */
+ pushq 5*8(%rdi) /* regs->rsp */
+ pushq 4*8(%rdi) /* regs->eflags */
+ pushq 3*8(%rdi) /* regs->cs */
+ pushq 2*8(%rdi) /* regs->ip */
+ pushq 1*8(%rdi) /* regs->orig_ax */
+
+ pushq (%rdi) /* pt_regs->di */
pushq %rsi /* pt_regs->si */
pushq %rdx /* pt_regs->dx */
pushq %rcx /* pt_regs->cx */
pushq $-ENOSYS /* pt_regs->ax */
pushq $0 /* pt_regs->r8 = 0 */
- xorq %r8, %r8 /* nospec r8 */
+ xorl %r8d, %r8d /* nospec r8 */
pushq $0 /* pt_regs->r9 = 0 */
- xorq %r9, %r9 /* nospec r9 */
+ xorl %r9d, %r9d /* nospec r9 */
pushq $0 /* pt_regs->r10 = 0 */
- xorq %r10, %r10 /* nospec r10 */
+ xorl %r10d, %r10d /* nospec r10 */
pushq $0 /* pt_regs->r11 = 0 */
- xorq %r11, %r11 /* nospec r11 */
+ xorl %r11d, %r11d /* nospec r11 */
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx */
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp */
pushq %r12 /* pt_regs->r12 */
- xorq %r12, %r12 /* nospec r12 */
+ xorl %r12d, %r12d /* nospec r12 */
pushq %r13 /* pt_regs->r13 */
- xorq %r13, %r13 /* nospec r13 */
+ xorl %r13d, %r13d /* nospec r13 */
pushq %r14 /* pt_regs->r14 */
- xorq %r14, %r14 /* nospec r14 */
+ xorl %r14d, %r14d /* nospec r14 */
pushq %r15 /* pt_regs->r15 */
- xorq %r15, %r15 /* nospec r15 */
+ xorl %r15d, %r15d /* nospec r15 */
cld
/*
@@ -393,15 +404,3 @@ ENTRY(entry_INT80_compat)
TRACE_IRQS_ON
jmp swapgs_restore_regs_and_return_to_usermode
END(entry_INT80_compat)
-
-ENTRY(stub32_clone)
- /*
- * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
- * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
- *
- * The native 64-bit kernel's sys_clone() implements the latter,
- * so we need to swap arguments here before calling it:
- */
- xchg %r8, %rcx
- jmp sys_clone
-ENDPROC(stub32_clone)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 448ac21..c58f75b 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -8,12 +8,12 @@
#
0 i386 restart_syscall sys_restart_syscall
1 i386 exit sys_exit
-2 i386 fork sys_fork sys_fork
+2 i386 fork sys_fork
3 i386 read sys_read
4 i386 write sys_write
5 i386 open sys_open compat_sys_open
6 i386 close sys_close
-7 i386 waitpid sys_waitpid sys32_waitpid
+7 i386 waitpid sys_waitpid
8 i386 creat sys_creat
9 i386 link sys_link
10 i386 unlink sys_unlink
@@ -78,7 +78,7 @@
69 i386 ssetmask sys_ssetmask
70 i386 setreuid sys_setreuid16
71 i386 setregid sys_setregid16
-72 i386 sigsuspend sys_sigsuspend sys_sigsuspend
+72 i386 sigsuspend sys_sigsuspend
73 i386 sigpending sys_sigpending compat_sys_sigpending
74 i386 sethostname sys_sethostname
75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
@@ -96,7 +96,7 @@
87 i386 swapon sys_swapon
88 i386 reboot sys_reboot
89 i386 readdir sys_old_readdir compat_sys_old_readdir
-90 i386 mmap sys_old_mmap sys32_mmap
+90 i386 mmap sys_old_mmap compat_sys_x86_mmap
91 i386 munmap sys_munmap
92 i386 truncate sys_truncate compat_sys_truncate
93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
@@ -126,7 +126,7 @@
117 i386 ipc sys_ipc compat_sys_ipc
118 i386 fsync sys_fsync
119 i386 sigreturn sys_sigreturn sys32_sigreturn
-120 i386 clone sys_clone stub32_clone
+120 i386 clone sys_clone compat_sys_x86_clone
121 i386 setdomainname sys_setdomainname
122 i386 uname sys_newuname
123 i386 modify_ldt sys_modify_ldt
@@ -137,7 +137,7 @@
128 i386 init_module sys_init_module
129 i386 delete_module sys_delete_module
130 i386 get_kernel_syms
-131 i386 quotactl sys_quotactl sys32_quotactl
+131 i386 quotactl sys_quotactl compat_sys_quotactl32
132 i386 getpgid sys_getpgid
133 i386 fchdir sys_fchdir
134 i386 bdflush sys_bdflush
@@ -186,8 +186,8 @@
177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
179 i386 rt_sigsuspend sys_rt_sigsuspend
-180 i386 pread64 sys_pread64 sys32_pread
-181 i386 pwrite64 sys_pwrite64 sys32_pwrite
+180 i386 pread64 sys_pread64 compat_sys_x86_pread
+181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite
182 i386 chown sys_chown16
183 i386 getcwd sys_getcwd
184 i386 capget sys_capget
@@ -196,14 +196,14 @@
187 i386 sendfile sys_sendfile compat_sys_sendfile
188 i386 getpmsg
189 i386 putpmsg
-190 i386 vfork sys_vfork sys_vfork
+190 i386 vfork sys_vfork
191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
192 i386 mmap2 sys_mmap_pgoff
-193 i386 truncate64 sys_truncate64 sys32_truncate64
-194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
-195 i386 stat64 sys_stat64 sys32_stat64
-196 i386 lstat64 sys_lstat64 sys32_lstat64
-197 i386 fstat64 sys_fstat64 sys32_fstat64
+193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64
+194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64
+195 i386 stat64 sys_stat64 compat_sys_x86_stat64
+196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64
+197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64
198 i386 lchown32 sys_lchown
199 i386 getuid32 sys_getuid
200 i386 getgid32 sys_getgid
@@ -231,7 +231,7 @@
# 222 is unused
# 223 is unused
224 i386 gettid sys_gettid
-225 i386 readahead sys_readahead sys32_readahead
+225 i386 readahead sys_readahead compat_sys_x86_readahead
226 i386 setxattr sys_setxattr
227 i386 lsetxattr sys_lsetxattr
228 i386 fsetxattr sys_fsetxattr
@@ -256,7 +256,7 @@
247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
248 i386 io_submit sys_io_submit compat_sys_io_submit
249 i386 io_cancel sys_io_cancel
-250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
+250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
252 i386 exit_group sys_exit_group
253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
@@ -278,7 +278,7 @@
269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
270 i386 tgkill sys_tgkill
271 i386 utimes sys_utimes compat_sys_utimes
-272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
+272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64
273 i386 vserver
274 i386 mbind sys_mbind
275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
@@ -306,7 +306,7 @@
297 i386 mknodat sys_mknodat
298 i386 fchownat sys_fchownat
299 i386 futimesat sys_futimesat compat_sys_futimesat
-300 i386 fstatat64 sys_fstatat64 sys32_fstatat
+300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat
301 i386 unlinkat sys_unlinkat
302 i386 renameat sys_renameat
303 i386 linkat sys_linkat
@@ -320,7 +320,7 @@
311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
313 i386 splice sys_splice
-314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
+314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range
315 i386 tee sys_tee
316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
317 i386 move_pages sys_move_pages compat_sys_move_pages
@@ -330,7 +330,7 @@
321 i386 signalfd sys_signalfd compat_sys_signalfd
322 i386 timerfd_create sys_timerfd_create
323 i386 eventfd sys_eventfd
-324 i386 fallocate sys_fallocate sys32_fallocate
+324 i386 fallocate sys_fallocate compat_sys_x86_fallocate
325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 7780bbf..9242b28 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -5,8 +5,6 @@
#undef CONFIG_OPTIMIZE_INLINING
#endif
-#undef CONFIG_X86_PPRO_FENCE
-
#ifdef CONFIG_X86_64
/*
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 577fa8a..317be36 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -42,10 +42,8 @@
#define CREATE_TRACE_POINTS
#include "vsyscall_trace.h"
-static enum { EMULATE, NATIVE, NONE } vsyscall_mode =
-#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE)
- NATIVE;
-#elif defined(CONFIG_LEGACY_VSYSCALL_NONE)
+static enum { EMULATE, NONE } vsyscall_mode =
+#ifdef CONFIG_LEGACY_VSYSCALL_NONE
NONE;
#else
EMULATE;
@@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str)
if (str) {
if (!strcmp("emulate", str))
vsyscall_mode = EMULATE;
- else if (!strcmp("native", str))
- vsyscall_mode = NATIVE;
else if (!strcmp("none", str))
vsyscall_mode = NONE;
else
@@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
WARN_ON_ONCE(address != regs->ip);
- /* This should be unreachable in NATIVE mode. */
- if (WARN_ON(vsyscall_mode == NATIVE))
- return false;
-
if (vsyscall_mode == NONE) {
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall attempted with vsyscall=none");
@@ -355,7 +347,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
#if CONFIG_PGTABLE_LEVELS >= 5
- p4d->p4d |= _PAGE_USER;
+ set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER));
#endif
pud = pud_offset(p4d, VSYSCALL_ADDR);
set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
@@ -370,9 +362,7 @@ void __init map_vsyscall(void)
if (vsyscall_mode != NONE) {
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
- vsyscall_mode == NATIVE
- ? PAGE_KERNEL_VSYSCALL
- : PAGE_KERNEL_VVAR);
+ PAGE_KERNEL_VVAR);
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 140d332..a6006e7 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -48,7 +48,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
-struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE;
+DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
u64 __read_mostly hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
@@ -990,7 +990,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
if (!dogrp)
return n;
- list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ for_each_sibling_event(event, leader) {
if (!is_x86_event(event) ||
event->state <= PERF_EVENT_STATE_OFF)
continue;
@@ -1156,16 +1156,13 @@ int x86_perf_event_set_period(struct perf_event *event)
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
- if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
- local64_read(&hwc->prev_count) != (u64)-left) {
- /*
- * The hw event starts counting from this event offset,
- * mark it to be able to extra future deltas:
- */
- local64_set(&hwc->prev_count, (u64)-left);
+ /*
+ * The hw event starts counting from this event offset,
+ * mark it to be able to extra future deltas:
+ */
+ local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
- }
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Due to erratum on certan cpu we need
@@ -1884,6 +1881,8 @@ early_initcall(init_hw_perf_events);
static inline void x86_pmu_read(struct perf_event *event)
{
+ if (x86_pmu.read)
+ return x86_pmu.read(event);
x86_perf_event_update(event);
}
@@ -2118,7 +2117,8 @@ static int x86_pmu_event_init(struct perf_event *event)
event->destroy(event);
}
- if (READ_ONCE(x86_pmu.attr_rdpmc))
+ if (READ_ONCE(x86_pmu.attr_rdpmc) &&
+ !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED;
return err;
@@ -2206,9 +2206,9 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
* but only root can trigger it, so it's okay.
*/
if (val == 2)
- static_key_slow_inc(&rdpmc_always_available);
+ static_branch_inc(&rdpmc_always_available_key);
else
- static_key_slow_dec(&rdpmc_always_available);
+ static_branch_dec(&rdpmc_always_available_key);
on_each_cpu(refresh_pce, NULL, 1);
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 56457cb..607bf56 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2060,6 +2060,14 @@ static void intel_pmu_del_event(struct perf_event *event)
intel_pmu_pebs_del(event);
}
+static void intel_pmu_read_event(struct perf_event *event)
+{
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ intel_pmu_auto_reload_read(event);
+ else
+ x86_perf_event_update(event);
+}
+
static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
{
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
@@ -2201,16 +2209,23 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
int bit, loops;
u64 status;
int handled;
+ int pmu_enabled;
cpuc = this_cpu_ptr(&cpu_hw_events);
/*
+ * Save the PMU state.
+ * It needs to be restored when leaving the handler.
+ */
+ pmu_enabled = cpuc->enabled;
+ /*
* No known reason to not always do late ACK,
* but just in case do it opt-in.
*/
if (!x86_pmu.late_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_bts_disable_local();
+ cpuc->enabled = 0;
__intel_pmu_disable_all();
handled = intel_pmu_drain_bts_buffer();
handled += intel_bts_interrupt();
@@ -2320,7 +2335,8 @@ again:
done:
/* Only restore PMU state when it's active. See x86_pmu_disable(). */
- if (cpuc->enabled)
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
__intel_pmu_enable_all(0, true);
intel_bts_enable_local();
@@ -2952,9 +2968,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event)
return intel_pebs_aliases_precdist(event);
}
-static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
+static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
{
- unsigned long flags = x86_pmu.free_running_flags;
+ unsigned long flags = x86_pmu.large_pebs_flags;
if (event->attr.use_clockid)
flags &= ~PERF_SAMPLE_TIME;
@@ -2976,8 +2992,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (!event->attr.freq) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
- ~intel_pmu_free_running_flags(event)))
- event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+ ~intel_pmu_large_pebs_flags(event)))
+ event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
}
if (x86_pmu.pebs_aliases)
x86_pmu.pebs_aliases(event);
@@ -3188,13 +3204,13 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
* Therefore the effective (average) period matches the requested period,
* despite coarser hardware granularity.
*/
-static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+static u64 bdw_limit_period(struct perf_event *event, u64 left)
{
if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
X86_CONFIG(.event=0xc0, .umask=0x01)) {
if (left < 128)
left = 128;
- left &= ~0x3fu;
+ left &= ~0x3fULL;
}
return left;
}
@@ -3460,7 +3476,7 @@ static __initconst const struct x86_pmu core_pmu = {
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
- .free_running_flags = PEBS_FREERUNNING_FLAGS,
+ .large_pebs_flags = LARGE_PEBS_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32-bit width,
@@ -3495,6 +3511,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.disable = intel_pmu_disable_event,
.add = intel_pmu_add_event,
.del = intel_pmu_del_event,
+ .read = intel_pmu_read_event,
.hw_config = intel_pmu_hw_config,
.schedule_events = x86_schedule_events,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
@@ -3502,7 +3519,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
- .free_running_flags = PEBS_FREERUNNING_FLAGS,
+ .large_pebs_flags = LARGE_PEBS_FLAGS,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 72db066..9aca448 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -40,50 +40,51 @@
* Model specific counters:
* MSR_CORE_C1_RES: CORE C1 Residency Counter
* perf code: 0x00
- * Available model: SLM,AMT,GLM
+ * Available model: SLM,AMT,GLM,CNL
* Scope: Core (each processor core has a MSR)
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
- * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
+ CNL
* Scope: Core
* MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
* perf code: 0x02
- * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- * SKL,KNL,GLM
+ * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
+ * SKL,KNL,GLM,CNL
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
- * Available model: SNB,IVB,HSW,BDW,SKL
+ * Available model: SNB,IVB,HSW,BDW,SKL,CNL
* Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
- * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM
+ * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
- * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL
- * GLM
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
+ * GLM,CNL
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- * SKL,KNL,GLM
+ * SKL,KNL,GLM,CNL
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
- * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
- * Available model: HSW ULT only
+ * Available model: HSW ULT,CNL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
- * Available model: HSW ULT only
+ * Available model: HSW ULT,CNL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
- * Available model: HSW ULT, GLM
+ * Available model: HSW ULT,GLM,CNL
* Scope: Package (physical package)
*
*/
@@ -486,6 +487,21 @@ static const struct cstate_model hswult_cstates __initconst = {
BIT(PERF_CSTATE_PKG_C10_RES),
};
+static const struct cstate_model cnl_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
+ BIT(PERF_CSTATE_CORE_C3_RES) |
+ BIT(PERF_CSTATE_CORE_C6_RES) |
+ BIT(PERF_CSTATE_CORE_C7_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES) |
+ BIT(PERF_CSTATE_PKG_C8_RES) |
+ BIT(PERF_CSTATE_PKG_C9_RES) |
+ BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
static const struct cstate_model slm_cstates __initconst = {
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
BIT(PERF_CSTATE_CORE_C6_RES),
@@ -557,6 +573,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates),
+
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 18c25ab..da67801 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -935,7 +935,7 @@ void intel_pmu_pebs_add(struct perf_event *event)
bool needed_cb = pebs_needs_sched_cb(cpuc);
cpuc->n_pebs++;
- if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+ if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs++;
pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
@@ -975,7 +975,7 @@ void intel_pmu_pebs_del(struct perf_event *event)
bool needed_cb = pebs_needs_sched_cb(cpuc);
cpuc->n_pebs--;
- if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+ if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs--;
pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
@@ -1153,6 +1153,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
if (pebs == NULL)
return;
+ regs->flags &= ~PERF_EFLAGS_EXACT;
sample_type = event->attr.sample_type;
dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
@@ -1197,7 +1198,6 @@ static void setup_pebs_sample_data(struct perf_event *event,
*/
*regs = *iregs;
regs->flags = pebs->flags;
- set_linear_ip(regs, pebs->ip);
if (sample_type & PERF_SAMPLE_REGS_INTR) {
regs->ax = pebs->ax;
@@ -1233,13 +1233,22 @@ static void setup_pebs_sample_data(struct perf_event *event,
#endif
}
- if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
- regs->ip = pebs->real_ip;
- regs->flags |= PERF_EFLAGS_EXACT;
- } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
- regs->flags |= PERF_EFLAGS_EXACT;
- else
- regs->flags &= ~PERF_EFLAGS_EXACT;
+ if (event->attr.precise_ip > 1) {
+ /* Haswell and later have the eventing IP, so use it: */
+ if (x86_pmu.intel_cap.pebs_format >= 2) {
+ set_linear_ip(regs, pebs->real_ip);
+ regs->flags |= PERF_EFLAGS_EXACT;
+ } else {
+ /* Otherwise use PEBS off-by-1 IP: */
+ set_linear_ip(regs, pebs->ip);
+
+ /* ... and try to fix it up using the LBR entries: */
+ if (intel_pmu_pebs_fixup_ip(regs))
+ regs->flags |= PERF_EFLAGS_EXACT;
+ }
+ } else
+ set_linear_ip(regs, pebs->ip);
+
if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) &&
x86_pmu.intel_cap.pebs_format >= 1)
@@ -1306,17 +1315,93 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
return NULL;
}
+void intel_pmu_auto_reload_read(struct perf_event *event)
+{
+ WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
+
+ perf_pmu_disable(event->pmu);
+ intel_pmu_drain_pebs_buffer();
+ perf_pmu_enable(event->pmu);
+}
+
+/*
+ * Special variant of intel_pmu_save_and_restart() for auto-reload.
+ */
+static int
+intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - x86_pmu.cntval_bits;
+ u64 period = hwc->sample_period;
+ u64 prev_raw_count, new_raw_count;
+ s64 new, old;
+
+ WARN_ON(!period);
+
+ /*
+ * drain_pebs() only happens when the PMU is disabled.
+ */
+ WARN_ON(this_cpu_read(cpu_hw_events.enabled));
+
+ prev_raw_count = local64_read(&hwc->prev_count);
+ rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+ local64_set(&hwc->prev_count, new_raw_count);
+
+ /*
+ * Since the counter increments a negative counter value and
+ * overflows on the sign switch, giving the interval:
+ *
+ * [-period, 0]
+ *
+ * the difference between two consequtive reads is:
+ *
+ * A) value2 - value1;
+ * when no overflows have happened in between,
+ *
+ * B) (0 - value1) + (value2 - (-period));
+ * when one overflow happened in between,
+ *
+ * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
+ * when @n overflows happened in between.
+ *
+ * Here A) is the obvious difference, B) is the extension to the
+ * discrete interval, where the first term is to the top of the
+ * interval and the second term is from the bottom of the next
+ * interval and C) the extension to multiple intervals, where the
+ * middle term is the whole intervals covered.
+ *
+ * An equivalent of C, by reduction, is:
+ *
+ * value2 - value1 + n * period
+ */
+ new = ((s64)(new_raw_count << shift) >> shift);
+ old = ((s64)(prev_raw_count << shift) >> shift);
+ local64_add(new - old + count * period, &event->count);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs,
void *base, void *top,
int bit, int count)
{
+ struct hw_perf_event *hwc = &event->hw;
struct perf_sample_data data;
struct pt_regs regs;
void *at = get_next_pebs_record_by_bit(base, top, bit);
- if (!intel_pmu_save_and_restart(event) &&
- !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+ /*
+ * Now, auto-reload is only enabled in fixed period mode.
+ * The reload value is always hwc->sample_period.
+ * May need to change it, if auto-reload is enabled in
+ * freq mode later.
+ */
+ intel_pmu_save_and_restart_reload(event, count);
+ } else if (!intel_pmu_save_and_restart(event))
return;
while (count > 1) {
@@ -1368,8 +1453,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
return;
n = top - at;
- if (n <= 0)
+ if (n <= 0) {
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ intel_pmu_save_and_restart_reload(event, 0);
return;
+ }
__intel_pmu_pebs_event(event, iregs, at, top, 0, n);
}
@@ -1392,8 +1480,22 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
ds->pebs_index = ds->pebs_buffer_base;
- if (unlikely(base >= top))
+ if (unlikely(base >= top)) {
+ /*
+ * The drain_pebs() could be called twice in a short period
+ * for auto-reload event in pmu::read(). There are no
+ * overflows have happened in between.
+ * It needs to call intel_pmu_save_and_restart_reload() to
+ * update the event->count for this case.
+ */
+ for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
+ x86_pmu.max_pebs_events) {
+ event = cpuc->events[bit];
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ intel_pmu_save_and_restart_reload(event, 0);
+ }
return;
+ }
for (at = base; at < top; at += x86_pmu.pebs_record_size) {
struct pebs_record_nhm *p = at;
@@ -1530,7 +1632,7 @@ void __init intel_ds_init(void)
x86_pmu.pebs_record_size =
sizeof(struct pebs_record_skl);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- x86_pmu.free_running_flags |= PERF_SAMPLE_TIME;
+ x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
break;
default:
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 81fd41d..3b99394 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1186,8 +1186,12 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
int range = 0;
list_for_each_entry(filter, filters, entry) {
- /* PT doesn't support single address triggers */
- if (!filter->range || !filter->size)
+ /*
+ * PT doesn't support single address triggers and
+ * 'start' filters.
+ */
+ if (!filter->size ||
+ filter->action == PERF_ADDR_FILTER_ACTION_START)
return -EOPNOTSUPP;
if (!filter->inode) {
@@ -1227,7 +1231,10 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
filters->filter[range].msr_a = msr_a;
filters->filter[range].msr_b = msr_b;
- filters->filter[range].config = filter->filter ? 1 : 2;
+ if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER)
+ filters->filter[range].config = 1;
+ else
+ filters->filter[range].config = 2;
range++;
}
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index a2efb49..32f3e94 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -774,6 +774,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init),
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init),
+
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init),
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 7874c98..a7956fc 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -354,7 +354,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
if (!dogrp)
return n;
- list_for_each_entry(event, &leader->sibling_list, group_entry) {
+ for_each_sibling_event(event, leader) {
if (!is_box_event(box, event) ||
event->state <= PERF_EVENT_STATE_OFF)
continue;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 6d8044a..c98b943 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3343,6 +3343,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = {
SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4),
SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8),
SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8),
+ SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3),
EVENT_EXTRA_END
};
@@ -3562,24 +3563,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = {
NULL,
};
+/*
+ * To determine the number of CHAs, it should read bits 27:0 in the CAPID6
+ * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083.
+ */
+#define SKX_CAPID6 0x9c
+#define SKX_CHA_BIT_MASK GENMASK(27, 0)
+
static int skx_count_chabox(void)
{
- struct pci_dev *chabox_dev = NULL;
- int bus, count = 0;
+ struct pci_dev *dev = NULL;
+ u32 val = 0;
- while (1) {
- chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev);
- if (!chabox_dev)
- break;
- if (count == 0)
- bus = chabox_dev->bus->number;
- if (bus != chabox_dev->bus->number)
- break;
- count++;
- }
+ dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev);
+ if (!dev)
+ goto out;
- pci_dev_put(chabox_dev);
- return count;
+ pci_read_config_dword(dev, SKX_CAPID6, &val);
+ val &= SKX_CHA_BIT_MASK;
+out:
+ pci_dev_put(dev);
+ return hweight32(val);
}
void skx_uncore_cpu_init(void)
@@ -3606,7 +3610,7 @@ static struct intel_uncore_type skx_uncore_imc = {
};
static struct attribute *skx_upi_uncore_formats_attr[] = {
- &format_attr_event_ext.attr,
+ &format_attr_event.attr,
&format_attr_umask_ext.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index 18e2628..e7edf19 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -188,10 +188,11 @@ static inline u64 msr_read_counter(struct perf_event *event)
if (event->hw.event_base)
rdmsrl(event->hw.event_base, now);
else
- rdtscll(now);
+ now = rdtsc_ordered();
return now;
}
+
static void msr_event_update(struct perf_event *event)
{
u64 prev, now;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78f91ec..9f37114 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -69,7 +69,7 @@ struct event_constraint {
#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */
+#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */
struct amd_nb {
@@ -88,7 +88,7 @@ struct amd_nb {
* REGS_USER can be handled for events limited to ring 3.
*
*/
-#define PEBS_FREERUNNING_FLAGS \
+#define LARGE_PEBS_FLAGS \
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
@@ -520,6 +520,7 @@ struct x86_pmu {
void (*disable)(struct perf_event *);
void (*add)(struct perf_event *);
void (*del)(struct perf_event *);
+ void (*read)(struct perf_event *event);
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
@@ -557,7 +558,7 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
bool late_ack;
- unsigned (*limit_period)(struct perf_event *event, unsigned l);
+ u64 (*limit_period)(struct perf_event *event, u64 l);
/*
* sysfs attrs
@@ -608,7 +609,7 @@ struct x86_pmu {
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
int max_pebs_events;
- unsigned long free_running_flags;
+ unsigned long large_pebs_flags;
/*
* Intel LBR
@@ -923,6 +924,8 @@ void intel_pmu_pebs_disable_all(void);
void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+void intel_pmu_auto_reload_read(struct perf_event *event);
+
void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 2edc49e..cfecc22 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -21,7 +21,7 @@
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/hypervisor.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <linux/version.h>
#include <linux/vmalloc.h>
@@ -88,11 +88,15 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
u32 *hv_vp_index;
EXPORT_SYMBOL_GPL(hv_vp_index);
+struct hv_vp_assist_page **hv_vp_assist_page;
+EXPORT_SYMBOL_GPL(hv_vp_assist_page);
+
u32 hv_max_vp_index;
static int hv_cpu_init(unsigned int cpu)
{
u64 msr_vp_index;
+ struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
hv_get_vp_index(msr_vp_index);
@@ -101,6 +105,22 @@ static int hv_cpu_init(unsigned int cpu)
if (msr_vp_index > hv_max_vp_index)
hv_max_vp_index = msr_vp_index;
+ if (!hv_vp_assist_page)
+ return 0;
+
+ if (!*hvp)
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+
+ if (*hvp) {
+ u64 val;
+
+ val = vmalloc_to_pfn(*hvp);
+ val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) |
+ HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val);
+ }
+
return 0;
}
@@ -198,6 +218,9 @@ static int hv_cpu_die(unsigned int cpu)
struct hv_reenlightenment_control re_ctrl;
unsigned int new_cpu;
+ if (hv_vp_assist_page && hv_vp_assist_page[cpu])
+ wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
+
if (hv_reenlightenment_cb == NULL)
return 0;
@@ -224,6 +247,7 @@ void hyperv_init(void)
{
u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
+ int cpuhp;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
@@ -241,9 +265,17 @@ void hyperv_init(void)
if (!hv_vp_index)
return;
- if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
- hv_cpu_init, hv_cpu_die) < 0)
+ hv_vp_assist_page = kcalloc(num_possible_cpus(),
+ sizeof(*hv_vp_assist_page), GFP_KERNEL);
+ if (!hv_vp_assist_page) {
+ ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
goto free_vp_index;
+ }
+
+ cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ hv_cpu_init, hv_cpu_die);
+ if (cpuhp < 0)
+ goto free_vp_assist_page;
/*
* Setup the hypercall page and enable hypercalls.
@@ -256,7 +288,7 @@ void hyperv_init(void)
hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX);
if (hv_hypercall_pg == NULL) {
wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
- goto free_vp_index;
+ goto remove_cpuhp_state;
}
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -304,6 +336,11 @@ register_msr_cs:
return;
+remove_cpuhp_state:
+ cpuhp_remove_state(cpuhp);
+free_vp_assist_page:
+ kfree(hv_vp_assist_page);
+ hv_vp_assist_page = NULL;
free_vp_index:
kfree(hv_vp_index);
hv_vp_index = NULL;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 41c6718..86b1341 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -33,7 +33,6 @@
#include <asm/vdso.h>
#include <asm/sigframe.h>
#include <asm/sighandling.h>
-#include <asm/sys_ia32.h>
#include <asm/smap.h>
/*
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 96cd33b..11ef7b7 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -41,27 +41,28 @@
#include <linux/highuid.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
+#include <linux/sched/task.h>
#include <asm/mman.h>
#include <asm/types.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <asm/vgtod.h>
-#include <asm/sys_ia32.h>
+#include <asm/ia32.h>
#define AA(__x) ((unsigned long)(__x))
-asmlinkage long sys32_truncate64(const char __user *filename,
- unsigned long offset_low,
- unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename,
+ unsigned long, offset_low, unsigned long, offset_high)
{
- return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
+ return ksys_truncate(filename,
+ ((loff_t) offset_high << 32) | offset_low);
}
-asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
- unsigned long offset_high)
+COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd,
+ unsigned long, offset_low, unsigned long, offset_high)
{
- return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
+ return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
}
/*
@@ -96,8 +97,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
return 0;
}
-asmlinkage long sys32_stat64(const char __user *filename,
- struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename,
+ struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
@@ -107,8 +108,8 @@ asmlinkage long sys32_stat64(const char __user *filename,
return ret;
}
-asmlinkage long sys32_lstat64(const char __user *filename,
- struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename,
+ struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
@@ -117,7 +118,8 @@ asmlinkage long sys32_lstat64(const char __user *filename,
return ret;
}
-asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd,
+ struct stat64 __user *, statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
@@ -126,8 +128,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
return ret;
}
-asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,
- struct stat64 __user *statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd,
+ const char __user *, filename,
+ struct stat64 __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
@@ -153,7 +156,7 @@ struct mmap_arg_struct32 {
unsigned int offset;
};
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
+COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg)
{
struct mmap_arg_struct32 a;
@@ -163,29 +166,23 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
if (a.offset & ~PAGE_MASK)
return -EINVAL;
- return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+ return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
a.offset>>PAGE_SHIFT);
}
-asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
- int options)
-{
- return compat_sys_wait4(pid, stat_addr, options, NULL);
-}
-
/* warning: next two assume little endian */
-asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
- u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf,
+ u32, count, u32, poslo, u32, poshi)
{
- return sys_pread64(fd, ubuf, count,
- ((loff_t)AA(poshi) << 32) | AA(poslo));
+ return ksys_pread64(fd, ubuf, count,
+ ((loff_t)AA(poshi) << 32) | AA(poslo));
}
-asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
- u32 count, u32 poslo, u32 poshi)
+COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf,
+ u32, count, u32, poslo, u32, poshi)
{
- return sys_pwrite64(fd, ubuf, count,
- ((loff_t)AA(poshi) << 32) | AA(poslo));
+ return ksys_pwrite64(fd, ubuf, count,
+ ((loff_t)AA(poshi) << 32) | AA(poslo));
}
@@ -193,40 +190,53 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
* Some system calls that need sign extended arguments. This could be
* done by a generic wrapper.
*/
-long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
- __u32 len_low, __u32 len_high, int advice)
+COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low,
+ __u32, offset_high, __u32, len_low, __u32, len_high,
+ int, advice)
+{
+ return ksys_fadvise64_64(fd,
+ (((u64)offset_high)<<32) | offset_low,
+ (((u64)len_high)<<32) | len_low,
+ advice);
+}
+
+COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo,
+ unsigned int, off_hi, size_t, count)
{
- return sys_fadvise64_64(fd,
- (((u64)offset_high)<<32) | offset_low,
- (((u64)len_high)<<32) | len_low,
- advice);
+ return ksys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
}
-asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
- size_t count)
+COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low,
+ unsigned int, off_hi, unsigned int, n_low,
+ unsigned int, n_hi, int, flags)
{
- return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
+ return ksys_sync_file_range(fd,
+ ((u64)off_hi << 32) | off_low,
+ ((u64)n_hi << 32) | n_low, flags);
}
-asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
- unsigned n_low, unsigned n_hi, int flags)
+COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo,
+ unsigned int, offset_hi, size_t, len, int, advice)
{
- return sys_sync_file_range(fd,
- ((u64)off_hi << 32) | off_low,
- ((u64)n_hi << 32) | n_low, flags);
+ return ksys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
+ len, advice);
}
-asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
- size_t len, int advice)
+COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode,
+ unsigned int, offset_lo, unsigned int, offset_hi,
+ unsigned int, len_lo, unsigned int, len_hi)
{
- return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
- len, advice);
+ return ksys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
+ ((u64)len_hi << 32) | len_lo);
}
-asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
- unsigned offset_hi, unsigned len_lo,
- unsigned len_hi)
+/*
+ * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS
+ */
+COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags,
+ unsigned long, newsp, int __user *, parent_tidptr,
+ unsigned long, tls_val, int __user *, child_tidptr)
{
- return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
- ((u64)len_hi << 32) | len_lo);
+ return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr,
+ tls_val);
}
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 1188172..a303d7b 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -31,6 +31,7 @@
#include <asm/mmu.h>
#include <asm/mpspec.h>
#include <asm/realmode.h>
+#include <asm/x86_init.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
@@ -133,6 +134,14 @@ static inline bool acpi_has_cpu_in_madt(void)
return !!acpi_lapic;
}
+#define ACPI_HAVE_ARCH_GET_ROOT_POINTER
+static inline u64 acpi_arch_get_root_pointer(void)
+{
+ return x86_init.acpi.get_root_pointer();
+}
+
+void acpi_generic_reduced_hw_init(void);
+
#else /* !CONFIG_ACPI */
#define acpi_lapic 0
@@ -142,6 +151,8 @@ static inline void acpi_noirq_set(void) { }
static inline void acpi_disable_pci(void) { }
static inline void disable_acpi(void) { }
+static inline void acpi_generic_reduced_hw_init(void) { }
+
#endif /* !CONFIG_ACPI */
#define ARCH_HAS_POWER_INIT 1
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index cf5961c..4cd6a3b 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -218,13 +218,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
output, input...) \
-{ \
asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
"call %P[new2]", feature2) \
: output, ASM_CALL_CONSTRAINT \
: [old] "i" (oldfunc), [new1] "i" (newfunc1), \
- [new2] "i" (newfunc2), ## input); \
-}
+ [new2] "i" (newfunc2), ## input)
/*
* use this macro(s) if you need more than one output parameter
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9872277..40a3d36 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -138,7 +138,6 @@ extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
extern void apic_intr_mode_init(void);
-extern void setup_local_APIC(void);
extern void init_apic_mappings(void);
void register_lapic_address(unsigned long address);
extern void setup_boot_APIC_clock(void);
@@ -183,6 +182,7 @@ static inline void disable_local_APIC(void) { }
# define setup_boot_APIC_clock x86_init_noop
# define setup_secondary_APIC_clock x86_init_noop
static inline void lapic_update_tsc_freq(void) { }
+static inline void init_bsp_APIC(void) { }
static inline void apic_intr_mode_init(void) { }
static inline void lapic_assign_system_vectors(void) { }
static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { }
@@ -304,12 +304,6 @@ struct apic {
u32 irq_delivery_mode;
u32 irq_dest_mode;
- /* Functions and data related to vector allocation */
- void (*vector_allocation_domain)(int cpu, struct cpumask *retmask,
- const struct cpumask *mask);
- int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
- struct irq_data *irqdata,
- unsigned int *apicid);
u32 (*calc_dest_apicid)(unsigned int cpu);
/* ICR related functions */
@@ -499,17 +493,7 @@ extern void default_setup_apic_routing(void);
extern u32 apic_default_calc_apicid(unsigned int cpu);
extern u32 apic_flat_calc_apicid(unsigned int cpu);
-extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
- struct irq_data *irqdata,
- unsigned int *apicid);
-extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
- struct irq_data *irqdata,
- unsigned int *apicid);
extern bool default_check_apicid_used(physid_mask_t *map, int apicid);
-extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask);
-extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask,
- const struct cpumask *mask);
extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap);
extern int default_cpu_present_to_apicid(int mps_cpu);
extern int default_check_phys_apicid_present(int phys_apicid);
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015d..c356098 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
+#include <asm/nospec-branch.h>
+
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
}
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
return error;
}
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d11161..1908214 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK(si)
INDIRECT_THUNK(di)
INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 72759f1..0db6bec 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -17,36 +17,40 @@
#define ATOMIC_INIT(i) { (i) }
/**
- * atomic_read - read atomic variable
+ * arch_atomic_read - read atomic variable
* @v: pointer of type atomic_t
*
* Atomically reads the value of @v.
*/
-static __always_inline int atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
{
+ /*
+ * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here,
+ * it's non-inlined function that increases binary size and stack usage.
+ */
return READ_ONCE((v)->counter);
}
/**
- * atomic_set - set atomic variable
+ * arch_atomic_set - set atomic variable
* @v: pointer of type atomic_t
* @i: required value
*
* Atomically sets the value of @v to @i.
*/
-static __always_inline void atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
{
WRITE_ONCE(v->counter, i);
}
/**
- * atomic_add - add integer to atomic variable
+ * arch_atomic_add - add integer to atomic variable
* @i: integer value to add
* @v: pointer of type atomic_t
*
* Atomically adds @i to @v.
*/
-static __always_inline void atomic_add(int i, atomic_t *v)
+static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "addl %1,%0"
: "+m" (v->counter)
@@ -54,13 +58,13 @@ static __always_inline void atomic_add(int i, atomic_t *v)
}
/**
- * atomic_sub - subtract integer from atomic variable
+ * arch_atomic_sub - subtract integer from atomic variable
* @i: integer value to subtract
* @v: pointer of type atomic_t
*
* Atomically subtracts @i from @v.
*/
-static __always_inline void atomic_sub(int i, atomic_t *v)
+static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "subl %1,%0"
: "+m" (v->counter)
@@ -68,7 +72,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v)
}
/**
- * atomic_sub_and_test - subtract value from variable and test result
+ * arch_atomic_sub_and_test - subtract value from variable and test result
* @i: integer value to subtract
* @v: pointer of type atomic_t
*
@@ -76,63 +80,63 @@ static __always_inline void atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
+static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
}
/**
- * atomic_inc - increment atomic variable
+ * arch_atomic_inc - increment atomic variable
* @v: pointer of type atomic_t
*
* Atomically increments @v by 1.
*/
-static __always_inline void atomic_inc(atomic_t *v)
+static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter));
}
/**
- * atomic_dec - decrement atomic variable
+ * arch_atomic_dec - decrement atomic variable
* @v: pointer of type atomic_t
*
* Atomically decrements @v by 1.
*/
-static __always_inline void atomic_dec(atomic_t *v)
+static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter));
}
/**
- * atomic_dec_and_test - decrement and test
+ * arch_atomic_dec_and_test - decrement and test
* @v: pointer of type atomic_t
*
* Atomically decrements @v by 1 and
* returns true if the result is 0, or false for all other
* cases.
*/
-static __always_inline bool atomic_dec_and_test(atomic_t *v)
+static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
}
/**
- * atomic_inc_and_test - increment and test
+ * arch_atomic_inc_and_test - increment and test
* @v: pointer of type atomic_t
*
* Atomically increments @v by 1
* and returns true if the result is zero, or false for all
* other cases.
*/
-static __always_inline bool atomic_inc_and_test(atomic_t *v)
+static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
}
/**
- * atomic_add_negative - add and test if negative
+ * arch_atomic_add_negative - add and test if negative
* @i: integer value to add
* @v: pointer of type atomic_t
*
@@ -140,65 +144,65 @@ static __always_inline bool atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static __always_inline bool atomic_add_negative(int i, atomic_t *v)
+static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
}
/**
- * atomic_add_return - add integer and return
+ * arch_atomic_add_return - add integer and return
* @i: integer value to add
* @v: pointer of type atomic_t
*
* Atomically adds @i to @v and returns @i + @v
*/
-static __always_inline int atomic_add_return(int i, atomic_t *v)
+static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
{
return i + xadd(&v->counter, i);
}
/**
- * atomic_sub_return - subtract integer and return
+ * arch_atomic_sub_return - subtract integer and return
* @v: pointer of type atomic_t
* @i: integer value to subtract
*
* Atomically subtracts @i from @v and returns @v - @i
*/
-static __always_inline int atomic_sub_return(int i, atomic_t *v)
+static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
{
- return atomic_add_return(-i, v);
+ return arch_atomic_add_return(-i, v);
}
-#define atomic_inc_return(v) (atomic_add_return(1, v))
-#define atomic_dec_return(v) (atomic_sub_return(1, v))
+#define arch_atomic_inc_return(v) (arch_atomic_add_return(1, v))
+#define arch_atomic_dec_return(v) (arch_atomic_sub_return(1, v))
-static __always_inline int atomic_fetch_add(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
return xadd(&v->counter, i);
}
-static __always_inline int atomic_fetch_sub(int i, atomic_t *v)
+static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
{
return xadd(&v->counter, -i);
}
-static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
- return cmpxchg(&v->counter, old, new);
+ return arch_cmpxchg(&v->counter, old, new);
}
-#define atomic_try_cmpxchg atomic_try_cmpxchg
-static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg
+static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new)
{
return try_cmpxchg(&v->counter, old, new);
}
-static inline int atomic_xchg(atomic_t *v, int new)
+static inline int arch_atomic_xchg(atomic_t *v, int new)
{
return xchg(&v->counter, new);
}
-static inline void atomic_and(int i, atomic_t *v)
+static inline void arch_atomic_and(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "andl %1,%0"
: "+m" (v->counter)
@@ -206,16 +210,16 @@ static inline void atomic_and(int i, atomic_t *v)
: "memory");
}
-static inline int atomic_fetch_and(int i, atomic_t *v)
+static inline int arch_atomic_fetch_and(int i, atomic_t *v)
{
- int val = atomic_read(v);
+ int val = arch_atomic_read(v);
- do { } while (!atomic_try_cmpxchg(v, &val, val & i));
+ do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i));
return val;
}
-static inline void atomic_or(int i, atomic_t *v)
+static inline void arch_atomic_or(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "orl %1,%0"
: "+m" (v->counter)
@@ -223,16 +227,16 @@ static inline void atomic_or(int i, atomic_t *v)
: "memory");
}
-static inline int atomic_fetch_or(int i, atomic_t *v)
+static inline int arch_atomic_fetch_or(int i, atomic_t *v)
{
- int val = atomic_read(v);
+ int val = arch_atomic_read(v);
- do { } while (!atomic_try_cmpxchg(v, &val, val | i));
+ do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i));
return val;
}
-static inline void atomic_xor(int i, atomic_t *v)
+static inline void arch_atomic_xor(int i, atomic_t *v)
{
asm volatile(LOCK_PREFIX "xorl %1,%0"
: "+m" (v->counter)
@@ -240,17 +244,17 @@ static inline void atomic_xor(int i, atomic_t *v)
: "memory");
}
-static inline int atomic_fetch_xor(int i, atomic_t *v)
+static inline int arch_atomic_fetch_xor(int i, atomic_t *v)
{
- int val = atomic_read(v);
+ int val = arch_atomic_read(v);
- do { } while (!atomic_try_cmpxchg(v, &val, val ^ i));
+ do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i));
return val;
}
/**
- * __atomic_add_unless - add unless the number is already a given value
+ * __arch_atomic_add_unless - add unless the number is already a given value
* @v: pointer of type atomic_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
@@ -258,14 +262,14 @@ static inline int atomic_fetch_xor(int i, atomic_t *v)
* Atomically adds @a to @v, so long as @v was not already @u.
* Returns the old value of @v.
*/
-static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
+static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u)
{
- int c = atomic_read(v);
+ int c = arch_atomic_read(v);
do {
if (unlikely(c == u))
break;
- } while (!atomic_try_cmpxchg(v, &c, c + a));
+ } while (!arch_atomic_try_cmpxchg(v, &c, c + a));
return c;
}
@@ -276,4 +280,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u)
# include <asm/atomic64_64.h>
#endif
+#include <asm-generic/atomic-instrumented.h>
+
#endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 97c46b8..92212bf 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -62,7 +62,7 @@ ATOMIC64_DECL(add_unless);
#undef ATOMIC64_EXPORT
/**
- * atomic64_cmpxchg - cmpxchg atomic64 variable
+ * arch_atomic64_cmpxchg - cmpxchg atomic64 variable
* @v: pointer to type atomic64_t
* @o: expected value
* @n: new value
@@ -71,20 +71,21 @@ ATOMIC64_DECL(add_unless);
* the old value.
*/
-static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o,
+ long long n)
{
- return cmpxchg64(&v->counter, o, n);
+ return arch_cmpxchg64(&v->counter, o, n);
}
/**
- * atomic64_xchg - xchg atomic64 variable
+ * arch_atomic64_xchg - xchg atomic64 variable
* @v: pointer to type atomic64_t
* @n: value to assign
*
* Atomically xchgs the value of @v to @n and returns
* the old value.
*/
-static inline long long atomic64_xchg(atomic64_t *v, long long n)
+static inline long long arch_atomic64_xchg(atomic64_t *v, long long n)
{
long long o;
unsigned high = (unsigned)(n >> 32);
@@ -96,13 +97,13 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n)
}
/**
- * atomic64_set - set atomic64 variable
+ * arch_atomic64_set - set atomic64 variable
* @v: pointer to type atomic64_t
* @i: value to assign
*
* Atomically sets the value of @v to @n.
*/
-static inline void atomic64_set(atomic64_t *v, long long i)
+static inline void arch_atomic64_set(atomic64_t *v, long long i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
@@ -112,26 +113,26 @@ static inline void atomic64_set(atomic64_t *v, long long i)
}
/**
- * atomic64_read - read atomic64 variable
+ * arch_atomic64_read - read atomic64 variable
* @v: pointer to type atomic64_t
*
* Atomically reads the value of @v and returns it.
*/
-static inline long long atomic64_read(const atomic64_t *v)
+static inline long long arch_atomic64_read(const atomic64_t *v)
{
long long r;
alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
return r;
- }
+}
/**
- * atomic64_add_return - add and return
+ * arch_atomic64_add_return - add and return
* @i: integer value to add
* @v: pointer to type atomic64_t
*
* Atomically adds @i to @v and returns @i + *@v
*/
-static inline long long atomic64_add_return(long long i, atomic64_t *v)
+static inline long long arch_atomic64_add_return(long long i, atomic64_t *v)
{
alternative_atomic64(add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -142,7 +143,7 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v)
/*
* Other variants with different arithmetic operators:
*/
-static inline long long atomic64_sub_return(long long i, atomic64_t *v)
+static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v)
{
alternative_atomic64(sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -150,7 +151,7 @@ static inline long long atomic64_sub_return(long long i, atomic64_t *v)
return i;
}
-static inline long long atomic64_inc_return(atomic64_t *v)
+static inline long long arch_atomic64_inc_return(atomic64_t *v)
{
long long a;
alternative_atomic64(inc_return, "=&A" (a),
@@ -158,7 +159,7 @@ static inline long long atomic64_inc_return(atomic64_t *v)
return a;
}
-static inline long long atomic64_dec_return(atomic64_t *v)
+static inline long long arch_atomic64_dec_return(atomic64_t *v)
{
long long a;
alternative_atomic64(dec_return, "=&A" (a),
@@ -167,13 +168,13 @@ static inline long long atomic64_dec_return(atomic64_t *v)
}
/**
- * atomic64_add - add integer to atomic64 variable
+ * arch_atomic64_add - add integer to atomic64 variable
* @i: integer value to add
* @v: pointer to type atomic64_t
*
* Atomically adds @i to @v.
*/
-static inline long long atomic64_add(long long i, atomic64_t *v)
+static inline long long arch_atomic64_add(long long i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -182,13 +183,13 @@ static inline long long atomic64_add(long long i, atomic64_t *v)
}
/**
- * atomic64_sub - subtract the atomic64 variable
+ * arch_atomic64_sub - subtract the atomic64 variable
* @i: integer value to subtract
* @v: pointer to type atomic64_t
*
* Atomically subtracts @i from @v.
*/
-static inline long long atomic64_sub(long long i, atomic64_t *v)
+static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
ASM_OUTPUT2("+A" (i), "+c" (v)),
@@ -197,7 +198,7 @@ static inline long long atomic64_sub(long long i, atomic64_t *v)
}
/**
- * atomic64_sub_and_test - subtract value from variable and test result
+ * arch_atomic64_sub_and_test - subtract value from variable and test result
* @i: integer value to subtract
* @v: pointer to type atomic64_t
*
@@ -205,46 +206,46 @@ static inline long long atomic64_sub(long long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static inline int atomic64_sub_and_test(long long i, atomic64_t *v)
+static inline int arch_atomic64_sub_and_test(long long i, atomic64_t *v)
{
- return atomic64_sub_return(i, v) == 0;
+ return arch_atomic64_sub_return(i, v) == 0;
}
/**
- * atomic64_inc - increment atomic64 variable
+ * arch_atomic64_inc - increment atomic64 variable
* @v: pointer to type atomic64_t
*
* Atomically increments @v by 1.
*/
-static inline void atomic64_inc(atomic64_t *v)
+static inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
}
/**
- * atomic64_dec - decrement atomic64 variable
+ * arch_atomic64_dec - decrement atomic64 variable
* @v: pointer to type atomic64_t
*
* Atomically decrements @v by 1.
*/
-static inline void atomic64_dec(atomic64_t *v)
+static inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
}
/**
- * atomic64_dec_and_test - decrement and test
+ * arch_atomic64_dec_and_test - decrement and test
* @v: pointer to type atomic64_t
*
* Atomically decrements @v by 1 and
* returns true if the result is 0, or false for all other
* cases.
*/
-static inline int atomic64_dec_and_test(atomic64_t *v)
+static inline int arch_atomic64_dec_and_test(atomic64_t *v)
{
- return atomic64_dec_return(v) == 0;
+ return arch_atomic64_dec_return(v) == 0;
}
/**
@@ -255,13 +256,13 @@ static inline int atomic64_dec_and_test(atomic64_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-static inline int atomic64_inc_and_test(atomic64_t *v)
+static inline int arch_atomic64_inc_and_test(atomic64_t *v)
{
- return atomic64_inc_return(v) == 0;
+ return arch_atomic64_inc_return(v) == 0;
}
/**
- * atomic64_add_negative - add and test if negative
+ * arch_atomic64_add_negative - add and test if negative
* @i: integer value to add
* @v: pointer to type atomic64_t
*
@@ -269,13 +270,13 @@ static inline int atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static inline int atomic64_add_negative(long long i, atomic64_t *v)
+static inline int arch_atomic64_add_negative(long long i, atomic64_t *v)
{
- return atomic64_add_return(i, v) < 0;
+ return arch_atomic64_add_return(i, v) < 0;
}
/**
- * atomic64_add_unless - add unless the number is a given value
+ * arch_atomic64_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
@@ -283,7 +284,8 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns non-zero if the add was done, zero otherwise.
*/
-static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
+ long long u)
{
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
@@ -294,7 +296,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
}
-static inline int atomic64_inc_not_zero(atomic64_t *v)
+static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
alternative_atomic64(inc_not_zero, "=&a" (r),
@@ -302,7 +304,7 @@ static inline int atomic64_inc_not_zero(atomic64_t *v)
return r;
}
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
+static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
alternative_atomic64(dec_if_positive, "=&A" (r),
@@ -313,70 +315,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
#undef alternative_atomic64
#undef __alternative_atomic64
-static inline void atomic64_and(long long i, atomic64_t *v)
+static inline void arch_atomic64_and(long long i, atomic64_t *v)
{
long long old, c = 0;
- while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
+ while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
}
-static inline long long atomic64_fetch_and(long long i, atomic64_t *v)
+static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v)
{
long long old, c = 0;
- while ((old = atomic64_cmpxchg(v, c, c & i)) != c)
+ while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
c = old;
return old;
}
-static inline void atomic64_or(long long i, atomic64_t *v)
+static inline void arch_atomic64_or(long long i, atomic64_t *v)
{
long long old, c = 0;
- while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
+ while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
}
-static inline long long atomic64_fetch_or(long long i, atomic64_t *v)
+static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v)
{
long long old, c = 0;
- while ((old = atomic64_cmpxchg(v, c, c | i)) != c)
+ while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
c = old;
return old;
}
-static inline void atomic64_xor(long long i, atomic64_t *v)
+static inline void arch_atomic64_xor(long long i, atomic64_t *v)
{
long long old, c = 0;
- while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
+ while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
}
-static inline long long atomic64_fetch_xor(long long i, atomic64_t *v)
+static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v)
{
long long old, c = 0;
- while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c)
+ while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
c = old;
return old;
}
-static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v)
{
long long old, c = 0;
- while ((old = atomic64_cmpxchg(v, c, c + i)) != c)
+ while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
c = old;
return old;
}
-#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v))
+#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), (v))
#endif /* _ASM_X86_ATOMIC64_32_H */
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 738495c..6106b59 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -11,37 +11,37 @@
#define ATOMIC64_INIT(i) { (i) }
/**
- * atomic64_read - read atomic64 variable
+ * arch_atomic64_read - read atomic64 variable
* @v: pointer of type atomic64_t
*
* Atomically reads the value of @v.
* Doesn't imply a read memory barrier.
*/
-static inline long atomic64_read(const atomic64_t *v)
+static inline long arch_atomic64_read(const atomic64_t *v)
{
return READ_ONCE((v)->counter);
}
/**
- * atomic64_set - set atomic64 variable
+ * arch_atomic64_set - set atomic64 variable
* @v: pointer to type atomic64_t
* @i: required value
*
* Atomically sets the value of @v to @i.
*/
-static inline void atomic64_set(atomic64_t *v, long i)
+static inline void arch_atomic64_set(atomic64_t *v, long i)
{
WRITE_ONCE(v->counter, i);
}
/**
- * atomic64_add - add integer to atomic64 variable
+ * arch_atomic64_add - add integer to atomic64 variable
* @i: integer value to add
* @v: pointer to type atomic64_t
*
* Atomically adds @i to @v.
*/
-static __always_inline void atomic64_add(long i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "addq %1,%0"
: "=m" (v->counter)
@@ -49,13 +49,13 @@ static __always_inline void atomic64_add(long i, atomic64_t *v)
}
/**
- * atomic64_sub - subtract the atomic64 variable
+ * arch_atomic64_sub - subtract the atomic64 variable
* @i: integer value to subtract
* @v: pointer to type atomic64_t
*
* Atomically subtracts @i from @v.
*/
-static inline void atomic64_sub(long i, atomic64_t *v)
+static inline void arch_atomic64_sub(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "subq %1,%0"
: "=m" (v->counter)
@@ -63,7 +63,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
}
/**
- * atomic64_sub_and_test - subtract value from variable and test result
+ * arch_atomic64_sub_and_test - subtract value from variable and test result
* @i: integer value to subtract
* @v: pointer to type atomic64_t
*
@@ -71,18 +71,18 @@ static inline void atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-static inline bool atomic64_sub_and_test(long i, atomic64_t *v)
+static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
}
/**
- * atomic64_inc - increment atomic64 variable
+ * arch_atomic64_inc - increment atomic64 variable
* @v: pointer to type atomic64_t
*
* Atomically increments @v by 1.
*/
-static __always_inline void atomic64_inc(atomic64_t *v)
+static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
@@ -90,12 +90,12 @@ static __always_inline void atomic64_inc(atomic64_t *v)
}
/**
- * atomic64_dec - decrement atomic64 variable
+ * arch_atomic64_dec - decrement atomic64 variable
* @v: pointer to type atomic64_t
*
* Atomically decrements @v by 1.
*/
-static __always_inline void atomic64_dec(atomic64_t *v)
+static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
@@ -103,33 +103,33 @@ static __always_inline void atomic64_dec(atomic64_t *v)
}
/**
- * atomic64_dec_and_test - decrement and test
+ * arch_atomic64_dec_and_test - decrement and test
* @v: pointer to type atomic64_t
*
* Atomically decrements @v by 1 and
* returns true if the result is 0, or false for all other
* cases.
*/
-static inline bool atomic64_dec_and_test(atomic64_t *v)
+static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
}
/**
- * atomic64_inc_and_test - increment and test
+ * arch_atomic64_inc_and_test - increment and test
* @v: pointer to type atomic64_t
*
* Atomically increments @v by 1
* and returns true if the result is zero, or false for all
* other cases.
*/
-static inline bool atomic64_inc_and_test(atomic64_t *v)
+static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
}
/**
- * atomic64_add_negative - add and test if negative
+ * arch_atomic64_add_negative - add and test if negative
* @i: integer value to add
* @v: pointer to type atomic64_t
*
@@ -137,59 +137,59 @@ static inline bool atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-static inline bool atomic64_add_negative(long i, atomic64_t *v)
+static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
}
/**
- * atomic64_add_return - add and return
+ * arch_atomic64_add_return - add and return
* @i: integer value to add
* @v: pointer to type atomic64_t
*
* Atomically adds @i to @v and returns @i + @v
*/
-static __always_inline long atomic64_add_return(long i, atomic64_t *v)
+static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v)
{
return i + xadd(&v->counter, i);
}
-static inline long atomic64_sub_return(long i, atomic64_t *v)
+static inline long arch_atomic64_sub_return(long i, atomic64_t *v)
{
- return atomic64_add_return(-i, v);
+ return arch_atomic64_add_return(-i, v);
}
-static inline long atomic64_fetch_add(long i, atomic64_t *v)
+static inline long arch_atomic64_fetch_add(long i, atomic64_t *v)
{
return xadd(&v->counter, i);
}
-static inline long atomic64_fetch_sub(long i, atomic64_t *v)
+static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v)
{
return xadd(&v->counter, -i);
}
-#define atomic64_inc_return(v) (atomic64_add_return(1, (v)))
-#define atomic64_dec_return(v) (atomic64_sub_return(1, (v)))
+#define arch_atomic64_inc_return(v) (arch_atomic64_add_return(1, (v)))
+#define arch_atomic64_dec_return(v) (arch_atomic64_sub_return(1, (v)))
-static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
+static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new)
{
- return cmpxchg(&v->counter, old, new);
+ return arch_cmpxchg(&v->counter, old, new);
}
-#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new)
{
return try_cmpxchg(&v->counter, old, new);
}
-static inline long atomic64_xchg(atomic64_t *v, long new)
+static inline long arch_atomic64_xchg(atomic64_t *v, long new)
{
return xchg(&v->counter, new);
}
/**
- * atomic64_add_unless - add unless the number is a given value
+ * arch_atomic64_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
@@ -197,37 +197,37 @@ static inline long atomic64_xchg(atomic64_t *v, long new)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static inline bool atomic64_add_unless(atomic64_t *v, long a, long u)
+static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u)
{
- s64 c = atomic64_read(v);
+ s64 c = arch_atomic64_read(v);
do {
if (unlikely(c == u))
return false;
- } while (!atomic64_try_cmpxchg(v, &c, c + a));
+ } while (!arch_atomic64_try_cmpxchg(v, &c, c + a));
return true;
}
-#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+#define arch_atomic64_inc_not_zero(v) arch_atomic64_add_unless((v), 1, 0)
/*
- * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
* @v: pointer of type atomic_t
*
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
-static inline long atomic64_dec_if_positive(atomic64_t *v)
+static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
- s64 dec, c = atomic64_read(v);
+ s64 dec, c = arch_atomic64_read(v);
do {
dec = c - 1;
if (unlikely(dec < 0))
break;
- } while (!atomic64_try_cmpxchg(v, &c, dec));
+ } while (!arch_atomic64_try_cmpxchg(v, &c, dec));
return dec;
}
-static inline void atomic64_and(long i, atomic64_t *v)
+static inline void arch_atomic64_and(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "andq %1,%0"
: "+m" (v->counter)
@@ -235,16 +235,16 @@ static inline void atomic64_and(long i, atomic64_t *v)
: "memory");
}
-static inline long atomic64_fetch_and(long i, atomic64_t *v)
+static inline long arch_atomic64_fetch_and(long i, atomic64_t *v)
{
- s64 val = atomic64_read(v);
+ s64 val = arch_atomic64_read(v);
do {
- } while (!atomic64_try_cmpxchg(v, &val, val & i));
+ } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
return val;
}
-static inline void atomic64_or(long i, atomic64_t *v)
+static inline void arch_atomic64_or(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "orq %1,%0"
: "+m" (v->counter)
@@ -252,16 +252,16 @@ static inline void atomic64_or(long i, atomic64_t *v)
: "memory");
}
-static inline long atomic64_fetch_or(long i, atomic64_t *v)
+static inline long arch_atomic64_fetch_or(long i, atomic64_t *v)
{
- s64 val = atomic64_read(v);
+ s64 val = arch_atomic64_read(v);
do {
- } while (!atomic64_try_cmpxchg(v, &val, val | i));
+ } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
return val;
}
-static inline void atomic64_xor(long i, atomic64_t *v)
+static inline void arch_atomic64_xor(long i, atomic64_t *v)
{
asm volatile(LOCK_PREFIX "xorq %1,%0"
: "+m" (v->counter)
@@ -269,12 +269,12 @@ static inline void atomic64_xor(long i, atomic64_t *v)
: "memory");
}
-static inline long atomic64_fetch_xor(long i, atomic64_t *v)
+static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v)
{
- s64 val = atomic64_read(v);
+ s64 val = arch_atomic64_read(v);
do {
- } while (!atomic64_try_cmpxchg(v, &val, val ^ i));
+ } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
return val;
}
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e1259f0..042b5e8 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
"lfence", X86_FEATURE_LFENCE_RDTSC)
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb() rmb()
-#else
#define dma_rmb() barrier()
-#endif
#define dma_wmb() barrier()
#ifdef CONFIG_X86_32
@@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
#define __smp_wmb() barrier()
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#if defined(CONFIG_X86_PPRO_FENCE)
-
-/*
- * For this option x86 doesn't have a strong TSO memory
- * model and we should fall back to full barriers.
- */
-
-#define __smp_store_release(p, v) \
-do { \
- compiletime_assert_atomic_type(*p); \
- __smp_mb(); \
- WRITE_ONCE(*p, v); \
-} while (0)
-
-#define __smp_load_acquire(p) \
-({ \
- typeof(*p) ___p1 = READ_ONCE(*p); \
- compiletime_assert_atomic_type(*p); \
- __smp_mb(); \
- ___p1; \
-})
-
-#else /* regular x86 TSO memory ordering */
-
#define __smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
@@ -107,8 +79,6 @@ do { \
___p1; \
})
-#endif
-
/* Atomic operations are already serializing on x86 */
#define __smp_mb__before_atomic() barrier()
#define __smp_mb__after_atomic() barrier()
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 3fa0398..9f645ba 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr)
: "iq" ((u8)CONST_MASK(nr))
: "memory");
} else {
- asm volatile(LOCK_PREFIX "bts %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
: BITOP_ADDR(addr) : "Ir" (nr) : "memory");
}
}
@@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
+ asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
}
/**
@@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr)
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)~CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX "btr %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
@@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad
static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
}
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
@@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *
*/
static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
+ asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
}
/**
@@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
: CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX "btc %1,%0"
+ asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
: BITOP_ADDR(addr)
: "Ir" (nr));
}
@@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr)
*/
static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts),
+ *addr, "Ir", nr, "%0", c);
}
/**
@@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
{
bool oldbit;
- asm("bts %2,%1"
+ asm(__ASM_SIZE(bts) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
*/
static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr),
+ *addr, "Ir", nr, "%0", c);
}
/**
@@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
{
bool oldbit;
- asm volatile("btr %2,%1"
+ asm volatile(__ASM_SIZE(btr) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr));
@@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
{
bool oldbit;
- asm volatile("btc %2,%1"
+ asm volatile(__ASM_SIZE(btc) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit), ADDR
: "Ir" (nr) : "memory");
@@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
*/
static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
{
- GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c);
+ GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc),
+ *addr, "Ir", nr, "%0", c);
}
static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
@@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
{
bool oldbit;
- asm volatile("bt %2,%1"
+ asm volatile(__ASM_SIZE(bt) " %2,%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 56bd436..e3efd8a 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -145,13 +145,13 @@ extern void __add_wrong_size(void)
# include <asm/cmpxchg_64.h>
#endif
-#define cmpxchg(ptr, old, new) \
+#define arch_cmpxchg(ptr, old, new) \
__cmpxchg(ptr, old, new, sizeof(*(ptr)))
-#define sync_cmpxchg(ptr, old, new) \
+#define arch_sync_cmpxchg(ptr, old, new) \
__sync_cmpxchg(ptr, old, new, sizeof(*(ptr)))
-#define cmpxchg_local(ptr, old, new) \
+#define arch_cmpxchg_local(ptr, old, new) \
__cmpxchg_local(ptr, old, new, sizeof(*(ptr)))
@@ -221,7 +221,7 @@ extern void __add_wrong_size(void)
#define __try_cmpxchg(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
-#define try_cmpxchg(ptr, pold, new) \
+#define try_cmpxchg(ptr, pold, new) \
__try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr)))
/*
@@ -250,10 +250,10 @@ extern void __add_wrong_size(void)
__ret; \
})
-#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \
__cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
-#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
+#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
__cmpxchg_double(, p1, p2, o1, o2, n1, n2)
#endif /* ASM_X86_CMPXCHG_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 1732704..1a2eafca 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -36,10 +36,10 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
}
#ifdef CONFIG_X86_CMPXCHG64
-#define cmpxchg64(ptr, o, n) \
+#define arch_cmpxchg64(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
-#define cmpxchg64_local(ptr, o, n) \
+#define arch_cmpxchg64_local(ptr, o, n) \
((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
(unsigned long long)(n)))
#endif
@@ -76,7 +76,7 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
* to simulate the cmpxchg8b on the 80386 and 80486 CPU.
*/
-#define cmpxchg64(ptr, o, n) \
+#define arch_cmpxchg64(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (o); \
@@ -93,7 +93,7 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
__ret; })
-#define cmpxchg64_local(ptr, o, n) \
+#define arch_cmpxchg64_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
__typeof__(*(ptr)) __old = (o); \
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 03cad19..bfca3b3 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -7,13 +7,13 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
*ptr = val;
}
-#define cmpxchg64(ptr, o, n) \
+#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
cmpxchg((ptr), (o), (n)); \
})
-#define cmpxchg64_local(ptr, o, n) \
+#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
cmpxchg_local((ptr), (o), (n)); \
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 736771c..b27da96 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -140,7 +140,6 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
/*
* Static testing of CPU features. Used the same as boot_cpu_has().
* These will statically patch the target code for additional
@@ -196,13 +195,6 @@ t_no:
boot_cpu_has(bit) : \
_static_cpu_has(bit) \
)
-#else
-/*
- * Fall back to dynamic for gcc versions which don't support asm goto. Should be
- * a minority now anyway.
- */
-#define static_cpu_has(bit) boot_cpu_has(bit)
-#endif
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3..d554c11 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -315,6 +316,7 @@
#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
@@ -327,6 +329,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
index 10f8d59..a5d86fc 100644
--- a/arch/x86/include/asm/crypto/camellia.h
+++ b/arch/x86/include/asm/crypto/camellia.h
@@ -2,8 +2,9 @@
#ifndef ASM_X86_CAMELLIA_H
#define ASM_X86_CAMELLIA_H
-#include <linux/kernel.h>
+#include <crypto/b128ops.h>
#include <linux/crypto.h>
+#include <linux/kernel.h>
#define CAMELLIA_MIN_KEY_SIZE 16
#define CAMELLIA_MAX_KEY_SIZE 32
@@ -11,16 +12,13 @@
#define CAMELLIA_TABLE_BYTE_LEN 272
#define CAMELLIA_PARALLEL_BLOCKS 2
+struct crypto_skcipher;
+
struct camellia_ctx {
u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
u32 key_length;
};
-struct camellia_lrw_ctx {
- struct lrw_table_ctx lrw_table;
- struct camellia_ctx camellia_ctx;
-};
-
struct camellia_xts_ctx {
struct camellia_ctx tweak_ctx;
struct camellia_ctx crypt_ctx;
@@ -30,11 +28,7 @@ extern int __camellia_setkey(struct camellia_ctx *cctx,
const unsigned char *key,
unsigned int key_len, u32 *flags);
-extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen);
-extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen);
/* regular block cipher functions */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 553a03d..d181863 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -45,7 +45,7 @@ struct common_glue_ctx {
};
static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
- struct blkcipher_desc *desc,
+ struct skcipher_walk *walk,
bool fpu_enabled, unsigned int nbytes)
{
if (likely(fpu_blocks_limit < 0))
@@ -61,33 +61,6 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit,
if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
return false;
- if (desc) {
- /* prevent sleeping if FPU is in use */
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- }
-
- kernel_fpu_begin();
- return true;
-}
-
-static inline bool glue_skwalk_fpu_begin(unsigned int bsize,
- int fpu_blocks_limit,
- struct skcipher_walk *walk,
- bool fpu_enabled, unsigned int nbytes)
-{
- if (likely(fpu_blocks_limit < 0))
- return false;
-
- if (fpu_enabled)
- return true;
-
- /*
- * Vector-registers are only used when chunk to be processed is large
- * enough, so do not enable FPU until it is necessary.
- */
- if (nbytes < bsize * (unsigned int)fpu_blocks_limit)
- return false;
-
/* prevent sleeping if FPU is in use */
skcipher_walk_atomise(walk);
@@ -126,41 +99,17 @@ static inline void le128_inc(le128 *i)
i->b = cpu_to_le64(b);
}
-extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes);
-
-extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn,
- struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src,
- unsigned int nbytes);
-
-extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src,
- unsigned int nbytes);
-
-extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes);
-
-extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes,
- common_glue_func_t tweak_fn, void *tweak_ctx,
- void *crypt_ctx);
-
-extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
- struct blkcipher_desc *desc,
- struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes,
- common_glue_func_t tweak_fn, void *tweak_ctx,
- void *crypt_ctx);
+extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req);
+
+extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
+ struct skcipher_request *req);
+
+extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req);
+
+extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
+ struct skcipher_request *req);
extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
struct skcipher_request *req,
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
index c958b7b..db7c9cc 100644
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -2,15 +2,13 @@
#ifndef ASM_X86_SERPENT_AVX_H
#define ASM_X86_SERPENT_AVX_H
-#include <linux/crypto.h>
+#include <crypto/b128ops.h>
#include <crypto/serpent.h>
+#include <linux/types.h>
-#define SERPENT_PARALLEL_BLOCKS 8
+struct crypto_skcipher;
-struct serpent_lrw_ctx {
- struct lrw_table_ctx lrw_table;
- struct serpent_ctx serpent_ctx;
-};
+#define SERPENT_PARALLEL_BLOCKS 8
struct serpent_xts_ctx {
struct serpent_ctx tweak_ctx;
@@ -38,12 +36,7 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
-extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen);
-
-extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
+extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen);
#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index 65bb80a..f618bf2 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -4,19 +4,8 @@
#include <linux/crypto.h>
#include <crypto/twofish.h>
-#include <crypto/lrw.h>
#include <crypto/b128ops.h>
-struct twofish_lrw_ctx {
- struct lrw_table_ctx lrw_table;
- struct twofish_ctx twofish_ctx;
-};
-
-struct twofish_xts_ctx {
- struct twofish_ctx tweak_ctx;
- struct twofish_ctx crypt_ctx;
-};
-
/* regular block cipher functions from twofish_x86_64 module */
asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
@@ -36,12 +25,4 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
le128 *iv);
-extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen);
-
-extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
-
-extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen);
-
#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63..a8f6c80 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,6 +6,9 @@ struct dev_archdata {
#if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
void *iommu; /* hook for IOMMU specific extension */
#endif
+#ifdef CONFIG_STA2X11
+ bool is_sta2x11;
+#endif
};
#if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h
index 1295bc6..1a19251 100644
--- a/arch/x86/include/asm/dma-direct.h
+++ b/arch/x86/include/asm/dma-direct.h
@@ -2,29 +2,8 @@
#ifndef ASM_X86_DMA_DIRECT_H
#define ASM_X86_DMA_DIRECT_H 1
-#include <linux/mem_encrypt.h>
-
-#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */
bool dma_capable(struct device *dev, dma_addr_t addr, size_t size);
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
-#else
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
- if (!dev->dma_mask)
- return 0;
-
- return addr + size - 1 <= *dev->dma_mask;
-}
-
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
- return __sme_set(paddr);
-}
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
- return __sme_clr(daddr);
-}
-#endif /* CONFIG_X86_DMA_REMAP */
#endif /* ASM_X86_DMA_DIRECT_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6277c83..89ce4bf 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,37 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask);
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs);
-
-extern void dma_generic_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs);
-
-static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
- gfp_t gfp)
-{
- unsigned long dma_mask = 0;
-
- dma_mask = dev->coherent_dma_mask;
- if (!dma_mask)
- dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32);
-
- return dma_mask;
-}
-
-static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
-{
- unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp);
-
- if (dma_mask <= DMA_BIT_MASK(24))
- gfp |= GFP_DMA;
-#ifdef CONFIG_X86_64
- if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
- gfp |= GFP_DMA32;
-#endif
- return gfp;
-}
-
#endif
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb..cec5fae 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,8 @@
#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/tlb.h>
+#include <asm/nospec-branch.h>
+#include <asm/mmu_context.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +38,18 @@
extern asmlinkage unsigned long efi_call_phys(void *, ...);
-#define arch_efi_call_virt_setup() kernel_fpu_begin()
-#define arch_efi_call_virt_teardown() kernel_fpu_end()
+#define arch_efi_call_virt_setup() \
+({ \
+ kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
+})
+
+#define arch_efi_call_virt_teardown() \
+({ \
+ firmware_restrict_branch_speculation_end(); \
+ kernel_fpu_end(); \
+})
+
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -58,14 +70,13 @@ extern asmlinkage u64 efi_call(void *fp, ...);
#define efi_call_phys(f, args...) efi_call((f), args)
/*
- * Scratch space used for switching the pagetable in the EFI stub
+ * struct efi_scratch - Scratch space used while switching to/from efi_mm
+ * @phys_stack: stack used during EFI Mixed Mode
+ * @prev_mm: store/restore stolen mm_struct while switching to/from efi_mm
*/
struct efi_scratch {
- u64 r15;
- u64 prev_cr3;
- pgd_t *efi_pgt;
- bool use_pgd;
- u64 phys_stack;
+ u64 phys_stack;
+ struct mm_struct *prev_mm;
} __packed;
#define arch_efi_call_virt_setup() \
@@ -73,12 +84,10 @@ struct efi_scratch {
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
\
- if (efi_scratch.use_pgd) { \
- efi_scratch.prev_cr3 = __read_cr3(); \
- write_cr3((unsigned long)efi_scratch.efi_pgt); \
- __flush_tlb_all(); \
- } \
+ if (!efi_enabled(EFI_OLD_MEMMAP)) \
+ efi_switch_mm(&efi_mm); \
})
#define arch_efi_call_virt(p, f, args...) \
@@ -86,11 +95,10 @@ struct efi_scratch {
#define arch_efi_call_virt_teardown() \
({ \
- if (efi_scratch.use_pgd) { \
- write_cr3(efi_scratch.prev_cr3); \
- __flush_tlb_all(); \
- } \
+ if (!efi_enabled(EFI_OLD_MEMMAP)) \
+ efi_switch_mm(efi_scratch.prev_mm); \
\
+ firmware_restrict_branch_speculation_end(); \
__kernel_fpu_end(); \
preempt_enable(); \
})
@@ -131,6 +139,7 @@ extern void __init efi_dump_pagetable(void);
extern void __init efi_apply_memmap_quirks(void);
extern int __init efi_reuse_config(u64 tables, int nr_tables);
extern void efi_delete_dummy_variable(void);
+extern void efi_switch_mm(struct mm_struct *mm);
struct efi_setup_data {
u64 fw_vendor;
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 7c341a7..5ea2afd 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -40,6 +40,7 @@ typedef struct {
#endif
#if IS_ENABLED(CONFIG_HYPERV)
unsigned int irq_hv_reenlightenment_count;
+ unsigned int hyperv_stimer0_count;
#endif
} ____cacheline_aligned irq_cpustat_t;
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 2851077..32e666e 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void);
extern asmlinkage void kvm_posted_intr_nested_ipi(void);
extern asmlinkage void error_interrupt(void);
extern asmlinkage void irq_work_interrupt(void);
+extern asmlinkage void uv_bau_message_intr1(void);
extern asmlinkage void spurious_interrupt(void);
extern asmlinkage void thermal_interrupt(void);
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/asm/hyperv-tlfs.h
index 197c2e6..416cb0e 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -1,6 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_X86_HYPERV_H
-#define _ASM_X86_HYPERV_H
+
+/*
+ * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
+ * Specification (TLFS):
+ * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
+ */
+
+#ifndef _ASM_X86_HYPERV_TLFS_H
+#define _ASM_X86_HYPERV_TLFS_H
#include <linux/types.h>
@@ -14,6 +21,7 @@
#define HYPERV_CPUID_FEATURES 0x40000003
#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000
#define HYPERV_CPUID_MIN 0x40000005
@@ -77,6 +85,9 @@
/* Crash MSR available */
#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10)
+/* stimer Direct Mode is available */
+#define HV_X64_STIMER_DIRECT_MODE_AVAILABLE (1 << 19)
+
/*
* Feature identification: EBX indicates which flags were specified at
* partition creation. The format is the same as the partition creation
@@ -156,6 +167,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11)
+/* Recommend using enlightened VMCS */
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14)
+
/*
* Crash notification flag.
*/
@@ -189,7 +203,7 @@
#define HV_X64_MSR_EOI 0x40000070
#define HV_X64_MSR_ICR 0x40000071
#define HV_X64_MSR_TPR 0x40000072
-#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
/* Define synthetic interrupt controller model specific registers. */
#define HV_X64_MSR_SCONTROL 0x40000080
@@ -237,28 +251,77 @@
#define HV_X64_MSR_CRASH_PARAMS \
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
+/*
+ * Declare the MSR used to setup pages used to communicate with the hypervisor.
+ */
+union hv_x64_msr_hypercall_contents {
+ u64 as_uint64;
+ struct {
+ u64 enable:1;
+ u64 reserved:11;
+ u64 guest_physical_address:52;
+ };
+};
+
+/*
+ * TSC page layout.
+ */
+struct ms_hyperv_tsc_page {
+ volatile u32 tsc_sequence;
+ u32 reserved1;
+ volatile u64 tsc_scale;
+ volatile s64 tsc_offset;
+ u64 reserved2[509];
+};
+
+/*
+ * The guest OS needs to register the guest ID with the hypervisor.
+ * The guest ID is a 64 bit entity and the structure of this ID is
+ * specified in the Hyper-V specification:
+ *
+ * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
+ *
+ * While the current guideline does not specify how Linux guest ID(s)
+ * need to be generated, our plan is to publish the guidelines for
+ * Linux and other guest operating systems that currently are hosted
+ * on Hyper-V. The implementation here conforms to this yet
+ * unpublished guidelines.
+ *
+ *
+ * Bit(s)
+ * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
+ * 62:56 - Os Type; Linux is 0x100
+ * 55:48 - Distro specific identification
+ * 47:16 - Linux kernel version number
+ * 15:0 - Distro specific identification
+ *
+ *
+ */
+
+#define HV_LINUX_VENDOR_ID 0x8100
+
/* TSC emulation after migration */
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
struct hv_reenlightenment_control {
- u64 vector:8;
- u64 reserved1:8;
- u64 enabled:1;
- u64 reserved2:15;
- u64 target_vp:32;
+ __u64 vector:8;
+ __u64 reserved1:8;
+ __u64 enabled:1;
+ __u64 reserved2:15;
+ __u64 target_vp:32;
};
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
struct hv_tsc_emulation_control {
- u64 enabled:1;
- u64 reserved:63;
+ __u64 enabled:1;
+ __u64 reserved:63;
};
struct hv_tsc_emulation_status {
- u64 inprogress:1;
- u64 reserved:63;
+ __u64 inprogress:1;
+ __u64 reserved:63;
};
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
@@ -275,10 +338,13 @@ struct hv_tsc_emulation_status {
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+/* Hyper-V Enlightened VMCS version mask in nested features CPUID */
+#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
@@ -298,12 +364,22 @@ enum HV_GENERIC_SET_FORMAT {
HV_GENERIC_SET_ALL,
};
+#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
+#define HV_HYPERCALL_REP_START_OFFSET 48
+#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
+
/* hypercall status code */
#define HV_STATUS_SUCCESS 0
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INVALID_PARAMETER 5
#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_PORT_ID 17
#define HV_STATUS_INVALID_CONNECTION_ID 18
#define HV_STATUS_INSUFFICIENT_BUFFERS 19
@@ -318,6 +394,8 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
#define HV_SYNIC_SINT_COUNT (16)
/* Define the expected SynIC version. */
#define HV_SYNIC_VERSION_1 (0x1)
+/* Valid SynIC vectors are 16-255. */
+#define HV_SYNIC_FIRST_VALID_VECTOR (16)
#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
@@ -412,6 +490,216 @@ struct hv_timer_message_payload {
__u64 delivery_time; /* When the message was delivered */
};
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+ __u32 apic_assist;
+ __u32 reserved;
+ __u64 vtl_control[2];
+ __u64 nested_enlightenments_control[2];
+ __u32 enlighten_vmentry;
+ __u64 current_nested_vmcs;
+};
+
+struct hv_enlightened_vmcs {
+ u32 revision_id;
+ u32 abort;
+
+ u16 host_es_selector;
+ u16 host_cs_selector;
+ u16 host_ss_selector;
+ u16 host_ds_selector;
+ u16 host_fs_selector;
+ u16 host_gs_selector;
+ u16 host_tr_selector;
+
+ u64 host_ia32_pat;
+ u64 host_ia32_efer;
+
+ u64 host_cr0;
+ u64 host_cr3;
+ u64 host_cr4;
+
+ u64 host_ia32_sysenter_esp;
+ u64 host_ia32_sysenter_eip;
+ u64 host_rip;
+ u32 host_ia32_sysenter_cs;
+
+ u32 pin_based_vm_exec_control;
+ u32 vm_exit_controls;
+ u32 secondary_vm_exec_control;
+
+ u64 io_bitmap_a;
+ u64 io_bitmap_b;
+ u64 msr_bitmap;
+
+ u16 guest_es_selector;
+ u16 guest_cs_selector;
+ u16 guest_ss_selector;
+ u16 guest_ds_selector;
+ u16 guest_fs_selector;
+ u16 guest_gs_selector;
+ u16 guest_ldtr_selector;
+ u16 guest_tr_selector;
+
+ u32 guest_es_limit;
+ u32 guest_cs_limit;
+ u32 guest_ss_limit;
+ u32 guest_ds_limit;
+ u32 guest_fs_limit;
+ u32 guest_gs_limit;
+ u32 guest_ldtr_limit;
+ u32 guest_tr_limit;
+ u32 guest_gdtr_limit;
+ u32 guest_idtr_limit;
+
+ u32 guest_es_ar_bytes;
+ u32 guest_cs_ar_bytes;
+ u32 guest_ss_ar_bytes;
+ u32 guest_ds_ar_bytes;
+ u32 guest_fs_ar_bytes;
+ u32 guest_gs_ar_bytes;
+ u32 guest_ldtr_ar_bytes;
+ u32 guest_tr_ar_bytes;
+
+ u64 guest_es_base;
+ u64 guest_cs_base;
+ u64 guest_ss_base;
+ u64 guest_ds_base;
+ u64 guest_fs_base;
+ u64 guest_gs_base;
+ u64 guest_ldtr_base;
+ u64 guest_tr_base;
+ u64 guest_gdtr_base;
+ u64 guest_idtr_base;
+
+ u64 padding64_1[3];
+
+ u64 vm_exit_msr_store_addr;
+ u64 vm_exit_msr_load_addr;
+ u64 vm_entry_msr_load_addr;
+
+ u64 cr3_target_value0;
+ u64 cr3_target_value1;
+ u64 cr3_target_value2;
+ u64 cr3_target_value3;
+
+ u32 page_fault_error_code_mask;
+ u32 page_fault_error_code_match;
+
+ u32 cr3_target_count;
+ u32 vm_exit_msr_store_count;
+ u32 vm_exit_msr_load_count;
+ u32 vm_entry_msr_load_count;
+
+ u64 tsc_offset;
+ u64 virtual_apic_page_addr;
+ u64 vmcs_link_pointer;
+
+ u64 guest_ia32_debugctl;
+ u64 guest_ia32_pat;
+ u64 guest_ia32_efer;
+
+ u64 guest_pdptr0;
+ u64 guest_pdptr1;
+ u64 guest_pdptr2;
+ u64 guest_pdptr3;
+
+ u64 guest_pending_dbg_exceptions;
+ u64 guest_sysenter_esp;
+ u64 guest_sysenter_eip;
+
+ u32 guest_activity_state;
+ u32 guest_sysenter_cs;
+
+ u64 cr0_guest_host_mask;
+ u64 cr4_guest_host_mask;
+ u64 cr0_read_shadow;
+ u64 cr4_read_shadow;
+ u64 guest_cr0;
+ u64 guest_cr3;
+ u64 guest_cr4;
+ u64 guest_dr7;
+
+ u64 host_fs_base;
+ u64 host_gs_base;
+ u64 host_tr_base;
+ u64 host_gdtr_base;
+ u64 host_idtr_base;
+ u64 host_rsp;
+
+ u64 ept_pointer;
+
+ u16 virtual_processor_id;
+ u16 padding16[3];
+
+ u64 padding64_2[5];
+ u64 guest_physical_address;
+
+ u32 vm_instruction_error;
+ u32 vm_exit_reason;
+ u32 vm_exit_intr_info;
+ u32 vm_exit_intr_error_code;
+ u32 idt_vectoring_info_field;
+ u32 idt_vectoring_error_code;
+ u32 vm_exit_instruction_len;
+ u32 vmx_instruction_info;
+
+ u64 exit_qualification;
+ u64 exit_io_instruction_ecx;
+ u64 exit_io_instruction_esi;
+ u64 exit_io_instruction_edi;
+ u64 exit_io_instruction_eip;
+
+ u64 guest_linear_address;
+ u64 guest_rsp;
+ u64 guest_rflags;
+
+ u32 guest_interruptibility_info;
+ u32 cpu_based_vm_exec_control;
+ u32 exception_bitmap;
+ u32 vm_entry_controls;
+ u32 vm_entry_intr_info_field;
+ u32 vm_entry_exception_error_code;
+ u32 vm_entry_instruction_len;
+ u32 tpr_threshold;
+
+ u64 guest_rip;
+
+ u32 hv_clean_fields;
+ u32 hv_padding_32;
+ u32 hv_synthetic_controls;
+ u32 hv_enlightenments_control;
+ u32 hv_vp_id;
+
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 padding64_4[4];
+ u64 guest_bndcfgs;
+ u64 padding64_5[7];
+ u64 xss_exit_bitmap;
+ u64 padding64_6[7];
+};
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
+
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
+
#define HV_STIMER_ENABLE (1ULL << 0)
#define HV_STIMER_PERIODIC (1ULL << 1)
#define HV_STIMER_LAZY (1ULL << 2)
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h
new file mode 100644
index 0000000..3cb002b
--- /dev/null
+++ b/arch/x86/include/asm/intel_pconfig.h
@@ -0,0 +1,65 @@
+#ifndef _ASM_X86_INTEL_PCONFIG_H
+#define _ASM_X86_INTEL_PCONFIG_H
+
+#include <asm/asm.h>
+#include <asm/processor.h>
+
+enum pconfig_target {
+ INVALID_TARGET = 0,
+ MKTME_TARGET = 1,
+ PCONFIG_TARGET_NR
+};
+
+int pconfig_target_supported(enum pconfig_target target);
+
+enum pconfig_leaf {
+ MKTME_KEY_PROGRAM = 0,
+ PCONFIG_LEAF_INVALID,
+};
+
+#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
+
+/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */
+
+/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */
+#define MKTME_KEYID_SET_KEY_DIRECT 0
+#define MKTME_KEYID_SET_KEY_RANDOM 1
+#define MKTME_KEYID_CLEAR_KEY 2
+#define MKTME_KEYID_NO_ENCRYPT 3
+
+/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */
+#define MKTME_AES_XTS_128 (1 << 8)
+
+/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */
+#define MKTME_PROG_SUCCESS 0
+#define MKTME_INVALID_PROG_CMD 1
+#define MKTME_ENTROPY_ERROR 2
+#define MKTME_INVALID_KEYID 3
+#define MKTME_INVALID_ENC_ALG 4
+#define MKTME_DEVICE_BUSY 5
+
+/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */
+struct mktme_key_program {
+ u16 keyid;
+ u32 keyid_ctrl;
+ u8 __rsvd[58];
+ u8 key_field_1[64];
+ u8 key_field_2[64];
+} __packed __aligned(256);
+
+static inline int mktme_key_program(struct mktme_key_program *key_program)
+{
+ unsigned long rax = MKTME_KEY_PROGRAM;
+
+ if (!pconfig_target_supported(MKTME_TARGET))
+ return -ENXIO;
+
+ asm volatile(PCONFIG
+ : "=a" (rax), "=b" (key_program)
+ : "0" (rax), "1" (key_program)
+ : "memory", "cc");
+
+ return rax;
+}
+
+#endif /* _ASM_X86_INTEL_PCONFIG_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 95e9486..f6e5b93 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void);
*/
#define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
-/*
- * Cache management
- *
- * This needed for two cases
- * 1. Out of order aware processors
- * 2. Accidentally out of order processors (PPro errata #51)
- */
-
-static inline void flush_write_buffers(void)
-{
-#if defined(CONFIG_X86_PPRO_FENCE)
- asm volatile("lock; addl $0,0(%%esp)": : :"memory");
-#endif
-}
-
#endif /* __KERNEL__ */
extern void native_io_delay(void);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index a8834dd..fd20a23 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -183,16 +183,17 @@ extern void disable_ioapic_support(void);
extern void __init io_apic_init_mappings(void);
extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
-extern void native_disable_io_apic(void);
+extern void native_restore_boot_irq_mode(void);
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
- return x86_io_apic_ops.read(apic, reg);
+ return x86_apic_ops.io_apic_read(apic, reg);
}
extern void setup_IO_APIC(void);
extern void enable_IO_APIC(void);
-extern void disable_IO_APIC(void);
+extern void clear_IO_APIC(void);
+extern void restore_boot_irq_mode(void);
extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
extern void print_IO_APICs(void);
#else /* !CONFIG_X86_IO_APIC */
@@ -228,10 +229,11 @@ static inline void mp_save_irq(struct mpc_intsrc *m) { }
static inline void disable_ioapic_support(void) { }
static inline void io_apic_init_mappings(void) { }
#define native_io_apic_read NULL
-#define native_disable_io_apic NULL
+#define native_restore_boot_irq_mode NULL
static inline void setup_IO_APIC(void) { }
static inline void enable_IO_APIC(void) { }
+static inline void restore_boot_irq_mode(void) { }
#endif
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 1e5d5d9..baedab8 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,13 +2,10 @@
#ifndef _ASM_X86_IOMMU_H
#define _ASM_X86_IOMMU_H
-extern const struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_pass_through;
-int x86_dma_supported(struct device *dev, u64 mask);
-
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index e71c112..404c5fd 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -106,9 +106,10 @@
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
+#define HYPERV_STIMER0_VECTOR 0xed
#endif
-#define LOCAL_TIMER_VECTOR 0xed
+#define LOCAL_TIMER_VECTOR 0xec
#define NR_VECTORS 256
diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h
index 875b543..b885a96 100644
--- a/arch/x86/include/asm/jailhouse_para.h
+++ b/arch/x86/include/asm/jailhouse_para.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL2.0 */
/*
- * Jailhouse paravirt_ops implementation
+ * Jailhouse paravirt detection
*
* Copyright (c) Siemens AG, 2015-2017
*
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 460991e..db7ba2f 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -5,10 +5,6 @@
unsigned long kaslr_get_random_long(const char *purpose);
#ifdef CONFIG_RANDOMIZE_MEMORY
-extern unsigned long page_offset_base;
-extern unsigned long vmalloc_base;
-extern unsigned long vmemmap_base;
-
void kernel_randomize_memory(void);
#else
static inline void kernel_randomize_memory(void) { }
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dd6f57a..949c977 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -34,6 +34,7 @@
#include <asm/msr-index.h>
#include <asm/asm.h>
#include <asm/kvm_page_track.h>
+#include <asm/hyperv-tlfs.h>
#define KVM_MAX_VCPUS 288
#define KVM_SOFT_MAX_VCPUS 240
@@ -73,6 +74,7 @@
#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20)
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
+#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -498,6 +500,7 @@ struct kvm_vcpu_arch {
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
bool apicv_active;
+ bool load_eoi_exitmap_pending;
DECLARE_BITMAP(ioapic_handled_vectors, 256);
unsigned long apic_attention;
int32_t apic_arb_prio;
@@ -507,6 +510,7 @@ struct kvm_vcpu_arch {
u64 smi_count;
bool tpr_access_reporting;
u64 ia32_xss;
+ u64 microcode_version;
/*
* Paging state of the vcpu
@@ -570,7 +574,7 @@ struct kvm_vcpu_arch {
} exception;
struct kvm_queued_interrupt {
- bool pending;
+ bool injected;
bool soft;
u8 nr;
} interrupt;
@@ -753,6 +757,12 @@ struct kvm_hv {
u64 hv_crash_ctl;
HV_REFERENCE_TSC_PAGE tsc_ref;
+
+ struct idr conn_to_evt;
+
+ u64 hv_reenlightenment_control;
+ u64 hv_tsc_emulation_control;
+ u64 hv_tsc_emulation_status;
};
enum kvm_irqchip_mode {
@@ -761,15 +771,6 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
-struct kvm_sev_info {
- bool active; /* SEV enabled guest */
- unsigned int asid; /* ASID used for this guest */
- unsigned int handle; /* SEV firmware handle */
- int fd; /* SEV device fd */
- unsigned long pages_locked; /* Number of pages locked */
- struct list_head regions_list; /* List of registered regions */
-};
-
struct kvm_arch {
unsigned int n_used_mmu_pages;
unsigned int n_requested_mmu_pages;
@@ -799,13 +800,13 @@ struct kvm_arch {
struct mutex apic_map_lock;
struct kvm_apic_map *apic_map;
- unsigned int tss_addr;
bool apic_access_page_done;
gpa_t wall_clock;
- bool ept_identity_pagetable_done;
- gpa_t ept_identity_map_addr;
+ bool mwait_in_guest;
+ bool hlt_in_guest;
+ bool pause_in_guest;
unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
@@ -848,17 +849,8 @@ struct kvm_arch {
bool disabled_lapic_found;
- /* Struct members for AVIC */
- u32 avic_vm_id;
- u32 ldr_mode;
- struct page *avic_logical_id_table_page;
- struct page *avic_physical_id_table_page;
- struct hlist_node hnode;
-
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
-
- struct kvm_sev_info sev_info;
};
struct kvm_vm_stat {
@@ -935,6 +927,8 @@ struct kvm_x86_ops {
bool (*cpu_has_high_real_mode_segbase)(void);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
+ struct kvm *(*vm_alloc)(void);
+ void (*vm_free)(struct kvm *);
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
@@ -1006,6 +1000,7 @@ struct kvm_x86_ops {
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
+ int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
int (*get_tdp_level)(struct kvm_vcpu *vcpu);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
int (*get_lpage_level)(void);
@@ -1095,6 +1090,8 @@ struct kvm_x86_ops {
int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+
+ int (*get_msr_feature)(struct kvm_msr_entry *entry);
};
struct kvm_arch_async_pf {
@@ -1106,6 +1103,17 @@ struct kvm_arch_async_pf {
extern struct kvm_x86_ops *kvm_x86_ops;
+#define __KVM_HAVE_ARCH_VM_ALLOC
+static inline struct kvm *kvm_arch_alloc_vm(void)
+{
+ return kvm_x86_ops->vm_alloc();
+}
+
+static inline void kvm_arch_free_vm(struct kvm *kvm)
+{
+ return kvm_x86_ops->vm_free(kvm);
+}
+
int kvm_mmu_module_init(void);
void kvm_mmu_module_exit(void);
@@ -1184,6 +1192,8 @@ enum emulation_result {
#define EMULTYPE_SKIP (1 << 2)
#define EMULTYPE_RETRY (1 << 3)
#define EMULTYPE_NO_REEXECUTE (1 << 4)
+#define EMULTYPE_NO_UD_ON_FAIL (1 << 5)
+#define EMULTYPE_VMWARE (1 << 6)
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
@@ -1201,8 +1211,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
struct x86_emulate_ctxt;
-int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
-int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
@@ -1464,7 +1473,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
#define put_smstate(type, buf, offset, val) \
*(type *)((buf) + (offset) - 0x7e00) = val
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
-
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 7b407dd..3aea265 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -88,6 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
#ifdef CONFIG_KVM_GUEST
bool kvm_para_available(void);
unsigned int kvm_arch_para_features(void);
+unsigned int kvm_arch_para_hints(void);
void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
@@ -115,6 +116,11 @@ static inline unsigned int kvm_arch_para_features(void)
return 0;
}
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
static inline u32 kvm_read_and_reset_pf_reason(void)
{
return 0;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 96ea4b5..8c7b3e5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -138,58 +138,6 @@ struct mce_log_buffer {
struct mce entry[MCE_LOG_LEN];
};
-struct mca_config {
- bool dont_log_ce;
- bool cmci_disabled;
- bool lmce_disabled;
- bool ignore_ce;
- bool disabled;
- bool ser;
- bool recovery;
- bool bios_cmci_threshold;
- u8 banks;
- s8 bootlog;
- int tolerant;
- int monarch_timeout;
- int panic_timeout;
- u32 rip_msr;
-};
-
-struct mce_vendor_flags {
- /*
- * Indicates that overflow conditions are not fatal, when set.
- */
- __u64 overflow_recov : 1,
-
- /*
- * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
- * Recovery. It indicates support for data poisoning in HW and deferred
- * error interrupts.
- */
- succor : 1,
-
- /*
- * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
- * the register space for each MCA bank and also increases number of
- * banks. Also, to accommodate the new banks and registers, the MCA
- * register space is moved to a new MSR range.
- */
- smca : 1,
-
- __reserved_0 : 61;
-};
-
-struct mca_msr_regs {
- u32 (*ctl) (int bank);
- u32 (*status) (int bank);
- u32 (*addr) (int bank);
- u32 (*misc) (int bank);
-};
-
-extern struct mce_vendor_flags mce_flags;
-
-extern struct mca_msr_regs msr_ops;
-
enum mce_notifier_prios {
MCE_PRIO_FIRST = INT_MAX,
MCE_PRIO_SRAO = INT_MAX - 1,
@@ -346,6 +294,7 @@ enum smca_bank_types {
SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */
+ SMCA_RESERVED, /* Reserved */
SMCA_EX, /* Execution Unit */
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 22c5f3e..c064383 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -22,6 +22,7 @@
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern u64 sme_me_mask;
+extern bool sev_enabled;
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
unsigned long decrypted_kernel_vaddr,
@@ -48,8 +49,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
-
bool sme_active(void);
bool sev_active(void);
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 55520cec..2b7cc53 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -6,20 +6,6 @@
#include <linux/earlycpio.h>
#include <linux/initrd.h>
-#define native_rdmsr(msr, val1, val2) \
-do { \
- u64 __val = __rdmsr((msr)); \
- (void)((val1) = (u32)__val); \
- (void)((val2) = (u32)(__val >> 32)); \
-} while (0)
-
-#define native_wrmsr(msr, low, high) \
- __wrmsr(msr, low, high)
-
-#define native_wrmsrl(msr, val) \
- __wrmsr((msr), (u32)((u64)(val)), \
- (u32)((u64)(val) >> 32))
-
struct ucode_patch {
struct list_head plist;
void *data; /* Intel uses only this one */
@@ -37,7 +23,13 @@ struct cpu_signature {
struct device;
-enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+enum ucode_state {
+ UCODE_OK = 0,
+ UCODE_NEW,
+ UCODE_UPDATED,
+ UCODE_NFOUND,
+ UCODE_ERROR,
+};
struct microcode_ops {
enum ucode_state (*request_microcode_user) (int cpu,
@@ -54,7 +46,7 @@ struct microcode_ops {
* are being called.
* See also the "Synchronization" section in microcode_core.c.
*/
- int (*apply_microcode) (int cpu);
+ enum ucode_state (*apply_microcode) (int cpu);
int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
};
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index c931b88..57e3785 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -24,11 +24,12 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
#endif /* !CONFIG_PARAVIRT */
#ifdef CONFIG_PERF_EVENTS
-extern struct static_key rdpmc_always_available;
+
+DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
static inline void load_mm_cr4(struct mm_struct *mm)
{
- if (static_key_false(&rdpmc_always_available) ||
+ if (static_branch_unlikely(&rdpmc_always_available_key) ||
atomic_read(&mm->context.perf_rdpmc_allowed))
cr4_set_bits(X86_CR4_PCE);
else
@@ -74,6 +75,7 @@ static inline void *ldt_slot_va(int slot)
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
#else
BUG();
+ return (void *)fix_to_virt(FIX_HOLE);
#endif
}
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 25283f7..b90e796 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,90 +6,23 @@
#include <linux/atomic.h>
#include <linux/nmi.h>
#include <asm/io.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
-/*
- * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
- * is set by CPUID(HVCPUID_VERSION_FEATURES).
- */
-enum hv_cpuid_function {
- HVCPUID_VERSION_FEATURES = 0x00000001,
- HVCPUID_VENDOR_MAXFUNCTION = 0x40000000,
- HVCPUID_INTERFACE = 0x40000001,
-
- /*
- * The remaining functions depend on the value of
- * HVCPUID_INTERFACE
- */
- HVCPUID_VERSION = 0x40000002,
- HVCPUID_FEATURES = 0x40000003,
- HVCPUID_ENLIGHTENMENT_INFO = 0x40000004,
- HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005,
-};
-
struct ms_hyperv_info {
u32 features;
u32 misc_features;
u32 hints;
+ u32 nested_features;
u32 max_vp_index;
u32 max_lp_index;
};
extern struct ms_hyperv_info ms_hyperv;
-/*
- * Declare the MSR used to setup pages used to communicate with the hypervisor.
- */
-union hv_x64_msr_hypercall_contents {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:11;
- u64 guest_physical_address:52;
- };
-};
-
-/*
- * TSC page layout.
- */
-
-struct ms_hyperv_tsc_page {
- volatile u32 tsc_sequence;
- u32 reserved1;
- volatile u64 tsc_scale;
- volatile s64 tsc_offset;
- u64 reserved2[509];
-};
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0 - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID 0x8100
/*
- * Generate the guest ID based on the guideline described above.
+ * Generate the guest ID.
*/
static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version,
@@ -173,6 +106,19 @@ void hv_remove_kexec_handler(void);
void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs));
void hv_remove_crash_handler(void);
+/*
+ * Routines for stimer0 Direct Mode handling.
+ * On x86/x64, there are no percpu actions to take.
+ */
+void hv_stimer0_vector_handler(struct pt_regs *regs);
+void hv_stimer0_callback_vector(void);
+int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void));
+void hv_remove_stimer0_irq(int irq);
+
+static inline void hv_enable_stimer0_percpu_irq(int irq) {}
+static inline void hv_disable_stimer0_percpu_irq(int irq) {}
+
+
#if IS_ENABLED(CONFIG_HYPERV)
extern struct clocksource *hyperv_cs;
extern void *hv_hypercall_pg;
@@ -215,14 +161,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
-#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET 17
-#define HV_HYPERCALL_REP_COMP_OFFSET 32
-#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_REP_START_OFFSET 48
-#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
-
/* Fast hypercall with 8 bytes of input and no output */
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
{
@@ -294,6 +232,15 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
*/
extern u32 *hv_vp_index;
extern u32 hv_max_vp_index;
+extern struct hv_vp_assist_page **hv_vp_assist_page;
+
+static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
+{
+ if (!hv_vp_assist_page)
+ return NULL;
+
+ return hv_vp_assist_page[cpu];
+}
/**
* hv_cpu_number_to_vp_number() - Map CPU to VP.
@@ -330,6 +277,10 @@ static inline void hyperv_setup_mmu_ops(void) {}
static inline void set_hv_tscchange_cb(void (*cb)(void)) {}
static inline void clear_hv_tscchange_cb(void) {}
static inline void hyperv_stop_tsc_emulation(void) {};
+static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
+{
+ return NULL;
+}
#endif /* CONFIG_HYPERV */
#ifdef CONFIG_HYPERV_TSCPAGE
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index c9084de..53d5b1b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -353,7 +353,21 @@
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
+#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL
+#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2)
+#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4)
+#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6)
+#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8)
+#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10)
+
#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR
+#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2)
+#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4)
+#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6)
+#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8)
+#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10)
+
#define MSR_F15H_NB_PERF_CTL 0xc0010240
#define MSR_F15H_NB_PERF_CTR 0xc0010241
#define MSR_F15H_PTSC 0xc0010280
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 30df295..04addd6 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -108,6 +108,20 @@ static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
+#define native_rdmsr(msr, val1, val2) \
+do { \
+ u64 __val = __rdmsr((msr)); \
+ (void)((val1) = (u32)__val); \
+ (void)((val2) = (u32)(__val >> 32)); \
+} while (0)
+
+#define native_wrmsr(msr, low, high) \
+ __wrmsr(msr, low, high)
+
+#define native_wrmsrl(msr, val) \
+ __wrmsr((msr), (u32)((u64)(val)), \
+ (u32)((u64)(val) >> 32))
+
static inline unsigned long long native_read_msr(unsigned int msr)
{
unsigned long long val;
@@ -218,9 +232,6 @@ static __always_inline unsigned long long rdtsc_ordered(void)
return rdtsc();
}
-/* Deprecated, keep it for a cycle for easier merging: */
-#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0)
-
static inline unsigned long long native_read_pmc(int counter)
{
DECLARE_ARGS(val, low, high);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 76b0585..f928ad9 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
#ifdef __ASSEMBLY__
/*
@@ -24,6 +68,18 @@
.endm
/*
+ * This should be used immediately before an indirect jump/call. It tells
+ * objtool the subsequent indirect jump/call is vouched safe for retpoline
+ * builds.
+ */
+.macro ANNOTATE_RETPOLINE_SAFE
+ .Lannotate_\@:
+ .pushsection .discard.retpoline_safe
+ _ASM_PTR .Lannotate_\@
+ .popsection
+.endm
+
+/*
* These are the bare retpoline primitives for indirect jmp and call.
* Do not use these directly; they only exist to make the ALTERNATIVE
* invocation below less ugly.
@@ -59,9 +115,9 @@
.macro JMP_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(jmp *\reg), \
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \
__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
#else
jmp *\reg
#endif
@@ -70,18 +126,25 @@
.macro CALL_NOSPEC reg:req
#ifdef CONFIG_RETPOLINE
ANNOTATE_NOSPEC_ALTERNATIVE
- ALTERNATIVE_2 __stringify(call *\reg), \
+ ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \
__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
- __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
#else
call *\reg
#endif
.endm
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "", "call __clear_rsb", \ftr
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
#endif
.endm
@@ -93,6 +156,12 @@
".long 999b - .\n\t" \
".popsection\n\t"
+#define ANNOTATE_RETPOLINE_SAFE \
+ "999:\n\t" \
+ ".pushsection .discard.retpoline_safe\n\t" \
+ _ASM_PTR " 999b\n\t" \
+ ".popsection\n\t"
+
#if defined(CONFIG_X86_64) && defined(RETPOLINE)
/*
@@ -102,6 +171,7 @@
# define CALL_NOSPEC \
ANNOTATE_NOSPEC_ALTERNATIVE \
ALTERNATIVE( \
+ ANNOTATE_RETPOLINE_SAFE \
"call *%[thunk_target]\n", \
"call __x86_indirect_thunk_%V[thunk_target]\n", \
X86_FEATURE_RETPOLINE)
@@ -113,7 +183,10 @@
* otherwise we'll run out of registers. We don't care about CET
* here, anyway.
*/
-# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \
+# define CALL_NOSPEC \
+ ALTERNATIVE( \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%[thunk_target]\n", \
" jmp 904f;\n" \
" .align 16\n" \
"901: call 903f;\n" \
@@ -156,25 +229,90 @@ extern char __indirect_thunk_end[];
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- alternative_input("",
- "call __fill_rsb",
- X86_FEATURE_RETPOLINE,
- ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+ unsigned long loops;
+
+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
+#define alternative_msr_write(_msr, _val, _feature) \
+ asm volatile(ALTERNATIVE("", \
+ "movl %[msr], %%ecx\n\t" \
+ "movl %[val], %%eax\n\t" \
+ "movl $0, %%edx\n\t" \
+ "wrmsr", \
+ _feature) \
+ : : [msr] "i" (_msr), [val] "i" (_val) \
+ : "eax", "ecx", "edx", "memory")
+
static inline void indirect_branch_prediction_barrier(void)
{
- asm volatile(ALTERNATIVE("",
- "movl %[msr], %%ecx\n\t"
- "movl %[val], %%eax\n\t"
- "movl $0, %%edx\n\t"
- "wrmsr",
- X86_FEATURE_USE_IBPB)
- : : [msr] "i" (MSR_IA32_PRED_CMD),
- [val] "i" (PRED_CMD_IBPB)
- : "eax", "ecx", "edx", "memory");
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+ X86_FEATURE_USE_IBPB);
}
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ *
+ * (Implemented as CPP macros due to header hell.)
+ */
+#define firmware_restrict_branch_speculation_start() \
+do { \
+ preempt_disable(); \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
+ X86_FEATURE_USE_IBRS_FW); \
+} while (0)
+
+#define firmware_restrict_branch_speculation_end() \
+do { \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
+ X86_FEATURE_USE_IBRS_FW); \
+ preempt_enable(); \
+} while (0)
+
#endif /* __ASSEMBLY__ */
+
+/*
+ * Below is used in the eBPF JIT compiler and emits the byte sequence
+ * for the following assembly:
+ *
+ * With retpolines configured:
+ *
+ * callq do_rop
+ * spec_trap:
+ * pause
+ * lfence
+ * jmp spec_trap
+ * do_rop:
+ * mov %rax,(%rsp)
+ * retq
+ *
+ * Without retpolines configured:
+ *
+ * jmp *%rax
+ */
+#ifdef CONFIG_RETPOLINE
+# define RETPOLINE_RAX_BPF_JIT_SIZE 17
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT1_off32(0xE8, 7); /* callq do_rop */ \
+ /* spec_trap: */ \
+ EMIT2(0xF3, 0x90); /* pause */ \
+ EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
+ EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
+ /* do_rop: */ \
+ EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
+ EMIT1(0xC3); /* retq */
+#else
+# define RETPOLINE_RAX_BPF_JIT_SIZE 2
+# define RETPOLINE_RAX_BPF_JIT() \
+ EMIT2(0xFF, 0xE0); /* jmp *%rax */
+#endif
+
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index d652a38..939b1cf 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -11,6 +11,10 @@
extern unsigned long max_pfn;
extern unsigned long phys_base;
+extern unsigned long page_offset_base;
+extern unsigned long vmalloc_base;
+extern unsigned long vmemmap_base;
+
static inline unsigned long __phys_addr_nodebug(unsigned long x)
{
unsigned long y = x - __START_KERNEL_map;
@@ -47,7 +51,7 @@ static inline void clear_page(void *page)
clear_page_erms, X86_FEATURE_ERMS,
"=D" (page),
"0" (page)
- : "memory", "rax", "rcx");
+ : "cc", "memory", "rax", "rcx");
}
void copy_page(void *to, void *from);
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index e140731..2c5a966 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -37,26 +37,24 @@
* hypervisor to fit. Choosing 16 slots here is arbitrary, but it's
* what Xen requires.
*/
-#ifdef CONFIG_X86_5LEVEL
-#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL)
-#else
-#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL)
-#endif
+#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL)
+#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL)
-#ifdef CONFIG_RANDOMIZE_MEMORY
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
#define __PAGE_OFFSET page_offset_base
#else
-#define __PAGE_OFFSET __PAGE_OFFSET_BASE
-#endif /* CONFIG_RANDOMIZE_MEMORY */
+#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
+#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
-#ifdef CONFIG_X86_5LEVEL
+
#define __PHYSICAL_MASK_SHIFT 52
-#define __VIRTUAL_MASK_SHIFT 56
+
+#ifdef CONFIG_X86_5LEVEL
+#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
#else
-#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 47
#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 554841f..9be2bf1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -7,6 +7,7 @@
#ifdef CONFIG_PARAVIRT
#include <asm/pgtable_types.h>
#include <asm/asm.h>
+#include <asm/nospec-branch.h>
#include <asm/paravirt_types.h>
@@ -567,17 +568,22 @@ static inline p4dval_t p4d_val(p4d_t p4d)
return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d);
}
-static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- pgdval_t val = native_pgd_val(pgd);
-
- PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val);
+ PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd));
}
-static inline void pgd_clear(pgd_t *pgdp)
-{
- set_pgd(pgdp, __pgd(0));
-}
+#define set_pgd(pgdp, pgdval) do { \
+ if (pgtable_l5_enabled) \
+ __set_pgd(pgdp, pgdval); \
+ else \
+ set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \
+} while (0)
+
+#define pgd_clear(pgdp) do { \
+ if (pgtable_l5_enabled) \
+ set_pgd(pgdp, __pgd(0)); \
+} while (0)
#endif /* CONFIG_PGTABLE_LEVELS == 5 */
@@ -879,23 +885,27 @@ extern void default_banner(void);
#define INTERRUPT_RETURN \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+ ANNOTATE_RETPOLINE_SAFE; \
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);)
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#define ENABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#ifdef CONFIG_X86_32
#define GET_CR0_INTO_EAX \
push %ecx; push %edx; \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \
pop %edx; pop %ecx
#else /* !CONFIG_X86_32 */
@@ -917,21 +927,25 @@ extern void default_banner(void);
*/
#define SWAPGS \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \
- call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \
+ ANNOTATE_RETPOLINE_SAFE; \
+ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \
)
#define GET_CR2_INTO_RAX \
- call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
+ ANNOTATE_RETPOLINE_SAFE; \
+ call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2);
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
+ ANNOTATE_RETPOLINE_SAFE; \
+ jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);)
#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
+ ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#endif
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f624f1f..180bc0b 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -43,6 +43,7 @@
#include <asm/desc_defs.h>
#include <asm/kmap_types.h>
#include <asm/pgtable_types.h>
+#include <asm/nospec-branch.h>
struct page;
struct thread_struct;
@@ -392,7 +393,9 @@ int paravirt_disable_iospace(void);
* offset into the paravirt_patch_template structure, and can therefore be
* freely converted back into a structure offset.
*/
-#define PARAVIRT_CALL "call *%c[paravirt_opptr];"
+#define PARAVIRT_CALL \
+ ANNOTATE_RETPOLINE_SAFE \
+ "call *%c[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index eb66fa9..959d618 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -151,6 +151,8 @@ extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
phys_addr_t addr);
extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
+extern struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
+ int end, u64 addr);
extern struct list_head pci_mmcfg_list;
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ba3c523..a06b073 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
{
bool oldbit;
- asm volatile("bt "__percpu_arg(2)",%1"
+ asm volatile("btl "__percpu_arg(2)",%1"
CC_SET(c)
: CC_OUT(c) (oldbit)
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index aff42e1..263c142 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#if CONFIG_PGTABLE_LEVELS > 4
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
+ if (!pgtable_l5_enabled)
+ return;
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
}
@@ -191,7 +193,8 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
- ___p4d_free_tlb(tlb, p4d);
+ if (pgtable_l5_enabled)
+ ___p4d_free_tlb(tlb, p4d);
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 876b4c7..6a59a6d 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -44,5 +44,6 @@ typedef union {
*/
#define PTRS_PER_PTE 512
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 63c2552..89d5c88 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags;
#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
-#define pgd_clear(pgd) native_pgd_clear(pgd)
+#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
#endif
#ifndef set_p4d
@@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
{
pmdval_t v = native_pmd_val(pmd);
- return __pmd(v | set);
+ return native_make_pmd(v | set);
}
static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
{
pmdval_t v = native_pmd_val(pmd);
- return __pmd(v & ~clear);
+ return native_make_pmd(v & ~clear);
}
static inline pmd_t pmd_mkold(pmd_t pmd)
@@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
{
pudval_t v = native_pud_val(pud);
- return __pud(v | set);
+ return native_make_pud(v | set);
}
static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
{
pudval_t v = native_pud_val(pud);
- return __pud(v & ~clear);
+ return native_make_pud(v & ~clear);
}
static inline pud_t pud_mkold(pud_t pud)
@@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address)
#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
+ if (!pgtable_l5_enabled)
+ return 1;
return pgd_flags(pgd) & _PAGE_PRESENT;
}
@@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
/* to find an entry in a page-table-directory. */
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
+ if (!pgtable_l5_enabled)
+ return (p4d_t *)pgd;
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
@@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd)
{
unsigned long ignore_flags = _PAGE_USER;
+ if (!pgtable_l5_enabled)
+ return 0;
+
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
ignore_flags |= _PAGE_NX;
@@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd)
static inline int pgd_none(pgd_t pgd)
{
+ if (!pgtable_l5_enabled)
+ return 0;
/*
* There is no need to do a workaround for the KNL stray
* A/D bit erratum here. PGDs only point to page tables
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index e554667..88a056b 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -32,6 +32,9 @@ extern pmd_t initial_pg_pmd[];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
void paging_init(void);
+void sync_initial_page_table(void);
+
+static inline int pgd_large(pgd_t pgd) { return 0; }
/*
* Define this if things work differently on an i386 and an i486:
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 0777e18..e3225e8 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -15,6 +15,8 @@
# include <asm/pgtable-2level_types.h>
#endif
+#define pgtable_l5_enabled 0
+
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 81462e9..877bc27 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[];
#define swapper_pg_dir init_top_pgt
extern void paging_init(void);
+static inline void sync_initial_page_table(void) { }
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %p(%016lx)\n", \
@@ -217,29 +218,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
-#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
- p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
-#else
- *p4dp = p4d;
-#endif
+ pgd_t pgd;
+
+ if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ *p4dp = p4d;
+ return;
+ }
+
+ pgd = native_make_pgd(native_p4d_val(p4d));
+ pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd);
+ *p4dp = native_make_p4d(native_pgd_val(pgd));
}
static inline void native_p4d_clear(p4d_t *p4d)
{
-#ifdef CONFIG_X86_5LEVEL
native_set_p4d(p4d, native_make_p4d(0));
-#else
- native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)});
-#endif
}
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
*pgdp = pti_set_user_pgd(pgdp, pgd);
-#else
- *pgdp = pgd;
-#endif
}
static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6b8f73d..d5c21a3 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -20,6 +20,18 @@ typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
+#ifdef CONFIG_X86_5LEVEL
+extern unsigned int __pgtable_l5_enabled;
+#ifndef pgtable_l5_enabled
+#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
+#endif
+#else
+#define pgtable_l5_enabled 0
+#endif
+
+extern unsigned int pgdir_shift;
+extern unsigned int ptrs_per_p4d;
+
#endif /* !__ASSEMBLY__ */
#define SHARED_KERNEL_PMD 0
@@ -29,24 +41,28 @@ typedef struct { pteval_t pte; } pte_t;
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT 48
+#define PGDIR_SHIFT pgdir_shift
#define PTRS_PER_PGD 512
/*
* 4th level page in 5-level paging case
*/
-#define P4D_SHIFT 39
-#define PTRS_PER_P4D 512
-#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
-#define P4D_MASK (~(P4D_SIZE - 1))
+#define P4D_SHIFT 39
+#define MAX_PTRS_PER_P4D 512
+#define PTRS_PER_P4D ptrs_per_p4d
+#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK (~(P4D_SIZE - 1))
+
+#define MAX_POSSIBLE_PHYSMEM_BITS 52
#else /* CONFIG_X86_5LEVEL */
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT 39
-#define PTRS_PER_PGD 512
+#define PGDIR_SHIFT 39
+#define PTRS_PER_PGD 512
+#define MAX_PTRS_PER_P4D 1
#endif /* CONFIG_X86_5LEVEL */
@@ -82,31 +98,33 @@ typedef struct { pteval_t pte; } pte_t;
* range must not overlap with anything except the KASAN shadow area, which
* is correct as KASAN disables KASLR.
*/
-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+#define MAXMEM (1UL << MAX_PHYSMEM_BITS)
-#ifdef CONFIG_X86_5LEVEL
-# define VMALLOC_SIZE_TB _AC(12800, UL)
-# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
-# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
-# define LDT_PGD_ENTRY _AC(-112, UL)
-# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
-#else
-# define VMALLOC_SIZE_TB _AC(32, UL)
-# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
-# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
-# define LDT_PGD_ENTRY _AC(-3, UL)
-# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
-#endif
+#define LDT_PGD_ENTRY_L4 -3UL
+#define LDT_PGD_ENTRY_L5 -112UL
+#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
+
+#define __VMALLOC_BASE_L4 0xffffc90000000000
+#define __VMALLOC_BASE_L5 0xffa0000000000000
+
+#define VMALLOC_SIZE_TB_L4 32UL
+#define VMALLOC_SIZE_TB_L5 12800UL
+
+#define __VMEMMAP_BASE_L4 0xffffea0000000000
+#define __VMEMMAP_BASE_L5 0xffd4000000000000
-#ifdef CONFIG_RANDOMIZE_MEMORY
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
+# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base
#else
-# define VMALLOC_START __VMALLOC_BASE
-# define VMEMMAP_START __VMEMMAP_BASE
-#endif /* CONFIG_RANDOMIZE_MEMORY */
+# define VMALLOC_START __VMALLOC_BASE_L4
+# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4
+# define VMEMMAP_START __VMEMMAP_BASE_L4
+#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
-#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
+#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
/* The module sections ends with the start of the fixmap */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 3696398..acfe755 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -174,7 +174,6 @@ enum page_cache_mode {
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE)
-#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -206,7 +205,6 @@ enum page_cache_mode {
#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC)
#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC)
#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC)
-#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC)
#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC)
#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
@@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud)
#else
#include <asm-generic/pgtable-nopud.h>
+static inline pud_t native_make_pud(pudval_t val)
+{
+ return (pud_t) { .p4d.pgd = native_make_pgd(val) };
+}
+
static inline pudval_t native_pud_val(pud_t pud)
{
return native_pgd_val(pud.p4d.pgd);
@@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
#else
#include <asm-generic/pgtable-nopmd.h>
+static inline pmd_t native_make_pmd(pmdval_t val)
+{
+ return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) };
+}
+
static inline pmdval_t native_pmd_val(pmd_t pmd)
{
return native_pgd_val(pmd.pud.p4d.pgd);
diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h
index 5973a2f..059823b 100644
--- a/arch/x86/include/asm/platform_sst_audio.h
+++ b/arch/x86/include/asm/platform_sst_audio.h
@@ -135,6 +135,7 @@ struct sst_platform_info {
const struct sst_res_info *res_info;
const struct sst_lib_dnld_info *lib_info;
const char *platform;
+ bool streams_lost_on_suspend;
};
int add_sst_platform_device(void);
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 1bd9ed8..4fa4206 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -407,9 +407,19 @@ union irq_stack_union {
DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
DECLARE_INIT_PER_CPU(irq_stack_union);
+static inline unsigned long cpu_kernelmode_gs_base(int cpu)
+{
+ return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+}
+
DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
extern asmlinkage void ignore_sysret(void);
+
+#if IS_ENABLED(CONFIG_KVM)
+/* Save actual FS/GS selectors and bases to current->thread */
+void save_fsgs_for_kvm(void);
+#endif
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
/*
@@ -977,4 +987,5 @@ bool xen_set_default_idle(void);
void stop_this_cpu(void *dummy);
void df_debug(struct pt_regs *regs, long error_code);
+void microcode_check(void);
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
index 4e44250..4cf11d8 100644
--- a/arch/x86/include/asm/refcount.h
+++ b/arch/x86/include/asm/refcount.h
@@ -17,7 +17,7 @@
#define _REFCOUNT_EXCEPTION \
".pushsection .text..refcount\n" \
"111:\tlea %[counter], %%" _ASM_CX "\n" \
- "112:\t" ASM_UD0 "\n" \
+ "112:\t" ASM_UD2 "\n" \
ASM_UNREACHABLE \
".popsection\n" \
"113:\n" \
@@ -67,13 +67,13 @@ static __always_inline __must_check
bool refcount_sub_and_test(unsigned int i, refcount_t *r)
{
GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, "er", i, "%0", e);
+ r->refs.counter, "er", i, "%0", e, "cx");
}
static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
{
GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, "%0", e);
+ r->refs.counter, "%0", e, "cx");
}
static __always_inline __must_check
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index fb3a6de..6847d85 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -53,12 +53,6 @@
# define NEED_MOVBE 0
#endif
-#ifdef CONFIG_X86_5LEVEL
-# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#else
-# define NEED_LA57 0
-#endif
-
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
@@ -104,7 +98,7 @@
#define REQUIRED_MASK13 0
#define REQUIRED_MASK14 0
#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 (NEED_LA57)
+#define REQUIRED_MASK16 0
#define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0
#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index f91c365..4914a3e 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -2,8 +2,7 @@
#ifndef _ASM_X86_RMWcc
#define _ASM_X86_RMWcc
-#define __CLOBBERS_MEM "memory"
-#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx"
+#define __CLOBBERS_MEM(clb...) "memory", ## clb
#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO)
@@ -40,18 +39,19 @@ do { \
#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */
#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
- __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM)
+ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM())
-#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \
+#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\
__GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \
- __CLOBBERS_MEM_CC_CX)
+ __CLOBBERS_MEM(clobbers))
#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \
- __CLOBBERS_MEM, vcon (val))
+ __CLOBBERS_MEM(), vcon (val))
-#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \
+#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \
+ clobbers...) \
__GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \
- __CLOBBERS_MEM_CC_CX, vcon (val))
+ __CLOBBERS_MEM(clobbers), vcon (val))
#endif /* _ASM_X86_RMWcc */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index d6baf23..5c019d2 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[];
#if defined(CONFIG_X86_64)
extern char __end_rodata_hpage_align[];
+extern char __entry_trampoline_start[], __entry_trampoline_end[];
#endif
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index a418976..f75bff8 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -177,16 +177,6 @@ static inline int wbinvd_on_all_cpus(void)
extern unsigned disabled_cpus;
#ifdef CONFIG_X86_LOCAL_APIC
-
-#ifndef CONFIG_X86_64
-static inline int logical_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-}
-
-#endif
-
extern int hard_smp_processor_id(void);
#else /* CONFIG_X86_LOCAL_APIC */
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 4fc1e9d..4617a2b 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -27,13 +27,8 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
-# ifdef CONFIG_X86_5LEVEL
-# define MAX_PHYSADDR_BITS 52
-# define MAX_PHYSMEM_BITS 52
-# else
-# define MAX_PHYSADDR_BITS 44
-# define MAX_PHYSMEM_BITS 46
-# endif
+# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
+# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
#endif
#endif /* CONFIG_SPARSEMEM */
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f737068..133d942 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -87,8 +87,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl);
-extern unsigned int code_bytes;
-
/* The form of the top of the frame on the stack */
struct stack_frame {
struct stack_frame *next_frame;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 0487ac0..93b462e 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -60,7 +60,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 intercept_dr;
u32 intercept_exceptions;
u64 intercept;
- u8 reserved_1[42];
+ u8 reserved_1[40];
+ u16 pause_filter_thresh;
u16 pause_filter_count;
u64 iopm_base_pa;
u64 msrpm_base_pa;
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 1c6a6cb..ff6c92e 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void)
{
}
#endif
-
-extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs);
-extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs);
-
#endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
deleted file mode 100644
index 82c34ee..0000000
--- a/arch/x86/include/asm/sys_ia32.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * sys_ia32.h - Linux ia32 syscall interfaces
- *
- * Copyright (c) 2008 Jaswinder Singh Rajput
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#ifndef _ASM_X86_SYS_IA32_H
-#define _ASM_X86_SYS_IA32_H
-
-#ifdef CONFIG_COMPAT
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <linux/types.h>
-#include <linux/signal.h>
-#include <asm/compat.h>
-#include <asm/ia32.h>
-
-/* ia32/sys_ia32.c */
-asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long);
-asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
-
-asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *);
-asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
-asmlinkage long sys32_fstatat(unsigned int, const char __user *,
- struct stat64 __user *, int);
-struct mmap_arg_struct32;
-asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *);
-
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
-
-asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
-asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
-
-long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
-long sys32_vm86_warning(void);
-
-asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
-asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
- unsigned, unsigned, int);
-asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
-asmlinkage long sys32_fallocate(int, int, unsigned,
- unsigned, unsigned, unsigned);
-
-/* ia32/ia32_signal.c */
-asmlinkage long sys32_sigreturn(void);
-asmlinkage long sys32_rt_sigreturn(void);
-
-#endif /* CONFIG_COMPAT */
-
-#endif /* _ASM_X86_SYS_IA32_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index bad25bb..ae6e05f 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -17,6 +17,7 @@
/* Common in X86_32 and X86_64 */
/* kernel/ioport.c */
+long ksys_ioperm(unsigned long from, unsigned long num, int turn_on);
asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
asmlinkage long sys_iopl(unsigned int);
@@ -34,7 +35,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *);
#ifdef CONFIG_X86_32
/* kernel/signal.c */
-asmlinkage unsigned long sys_sigreturn(void);
+asmlinkage long sys_sigreturn(void);
/* kernel/vm86_32.c */
struct vm86_struct;
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index cf5d53c..2701d22 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -31,6 +31,7 @@ static inline cycles_t get_cycles(void)
}
extern struct system_counterval_t convert_art_to_tsc(u64 art);
+extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
extern void tsc_early_delay_calibrate(void);
extern void tsc_init(void);
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index ecb9dde..62c79e2 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -3833,7 +3833,7 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u {
#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR")
#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL
-#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL
+#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL
#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \
is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 8b67807..5db8b0b 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
+#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */
#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index fc2f082..ce8b4da 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -122,12 +122,24 @@ struct x86_init_pci {
* @guest_late_init: guest late init
* @x2apic_available: X2APIC detection
* @init_mem_mapping: setup early mappings during init_mem_mapping()
+ * @init_after_bootmem: guest init after boot allocator is finished
*/
struct x86_hyper_init {
void (*init_platform)(void);
void (*guest_late_init)(void);
bool (*x2apic_available)(void);
void (*init_mem_mapping)(void);
+ void (*init_after_bootmem)(void);
+};
+
+/**
+ * struct x86_init_acpi - x86 ACPI init functions
+ * @get_root_pointer: get RSDP address
+ * @reduced_hw_early_init: hardware reduced platform early init
+ */
+struct x86_init_acpi {
+ u64 (*get_root_pointer)(void);
+ void (*reduced_hw_early_init)(void);
};
/**
@@ -144,6 +156,7 @@ struct x86_init_ops {
struct x86_init_iommu iommu;
struct x86_init_pci pci;
struct x86_hyper_init hyper;
+ struct x86_init_acpi acpi;
};
/**
@@ -274,16 +287,16 @@ struct x86_msi_ops {
void (*restore_msi_irqs)(struct pci_dev *dev);
};
-struct x86_io_apic_ops {
- unsigned int (*read) (unsigned int apic, unsigned int reg);
- void (*disable)(void);
+struct x86_apic_ops {
+ unsigned int (*io_apic_read) (unsigned int apic, unsigned int reg);
+ void (*restore)(void);
};
extern struct x86_init_ops x86_init;
extern struct x86_cpuinit_ops x86_cpuinit;
extern struct x86_platform_ops x86_platform;
extern struct x86_msi_ops x86_msi;
-extern struct x86_io_apic_ops x86_io_apic_ops;
+extern struct x86_apic_ops x86_apic_ops;
extern void x86_early_init_platform_quirks(void);
extern void x86_init_noop(void);
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index f3a9604..c535c2f 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -354,8 +354,25 @@ struct kvm_xcrs {
__u64 padding[16];
};
-/* definition of registers in kvm_run */
+#define KVM_SYNC_X86_REGS (1UL << 0)
+#define KVM_SYNC_X86_SREGS (1UL << 1)
+#define KVM_SYNC_X86_EVENTS (1UL << 2)
+
+#define KVM_SYNC_X86_VALID_FIELDS \
+ (KVM_SYNC_X86_REGS| \
+ KVM_SYNC_X86_SREGS| \
+ KVM_SYNC_X86_EVENTS)
+
+/* kvm_sync_regs struct included by kvm_run struct */
struct kvm_sync_regs {
+ /* Members of this structure are potentially malicious.
+ * Care must be taken by code reading, esp. interpreting,
+ * data fields from them inside KVM to prevent TOCTOU and
+ * double-fetch types of vulnerabilities.
+ */
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu_events events;
};
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 7a2ade4..4c851eb 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -3,15 +3,16 @@
#define _UAPI_ASM_X86_KVM_PARA_H
#include <linux/types.h>
-#include <asm/hyperv.h>
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
-/* This CPUID returns a feature bitmap in eax. Before enabling a particular
- * paravirtualization, the appropriate feature bit should be checked.
+/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
+ * a particular paravirtualization, the appropriate feature bit should
+ * be checked in eax. The performance hint feature bit should be checked
+ * in edx.
*/
#define KVM_CPUID_FEATURES 0x40000001
#define KVM_FEATURE_CLOCKSOURCE 0
@@ -26,6 +27,9 @@
#define KVM_FEATURE_PV_EOI 6
#define KVM_FEATURE_PV_UNHALT 7
#define KVM_FEATURE_PV_TLB_FLUSH 9
+#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
+
+#define KVM_HINTS_DEDICATED 0
/* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored.
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 91723461..955c2a2 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -5,31 +5,36 @@
#include <linux/types.h>
#include <linux/ioctl.h>
-/* Fields are zero when not available */
+/*
+ * Fields are zero when not available. Also, this struct is shared with
+ * userspace mcelog and thus must keep existing fields at current offsets.
+ * Only add new fields to the end of the structure
+ */
struct mce {
- __u64 status;
- __u64 misc;
- __u64 addr;
- __u64 mcgstatus;
- __u64 ip;
- __u64 tsc; /* cpu time stamp counter */
- __u64 time; /* wall time_t when error was detected */
- __u8 cpuvendor; /* cpu vendor as encoded in system.h */
- __u8 inject_flags; /* software inject flags */
- __u8 severity;
+ __u64 status; /* Bank's MCi_STATUS MSR */
+ __u64 misc; /* Bank's MCi_MISC MSR */
+ __u64 addr; /* Bank's MCi_ADDR MSR */
+ __u64 mcgstatus; /* Machine Check Global Status MSR */
+ __u64 ip; /* Instruction Pointer when the error happened */
+ __u64 tsc; /* CPU time stamp counter */
+ __u64 time; /* Wall time_t when error was detected */
+ __u8 cpuvendor; /* Kernel's X86_VENDOR enum */
+ __u8 inject_flags; /* Software inject flags */
+ __u8 severity; /* Error severity */
__u8 pad;
- __u32 cpuid; /* CPUID 1 EAX */
- __u8 cs; /* code segment */
- __u8 bank; /* machine check bank */
- __u8 cpu; /* cpu number; obsolete; use extcpu now */
- __u8 finished; /* entry is valid */
- __u32 extcpu; /* linux cpu number that detected the error */
- __u32 socketid; /* CPU socket ID */
- __u32 apicid; /* CPU initial apic ID */
- __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
- __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
- __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
- __u64 ppin; /* Protected Processor Inventory Number */
+ __u32 cpuid; /* CPUID 1 EAX */
+ __u8 cs; /* Code segment */
+ __u8 bank; /* Machine check bank reporting the error */
+ __u8 cpu; /* CPU number; obsoleted by extcpu */
+ __u8 finished; /* Entry is valid */
+ __u32 extcpu; /* Linux CPU number that detected the error */
+ __u32 socketid; /* CPU socket ID */
+ __u32 apicid; /* CPU initial APIC ID */
+ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
+ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
+ __u64 ppin; /* Protected Processor Inventory Number */
+ __u32 microcode; /* Microcode revision */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 29786c8..02d6f5c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
+obj-y += alternative.o i8253.o hw_breakpoint.o
obj-y += tsc.o tsc_msr.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
@@ -146,6 +146,6 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
- obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
+ obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o
obj-y += vsmp_64.o
endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2aa9209..7a37d93 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1376,17 +1376,21 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
*
* We initialize the Hardware-reduced ACPI model here:
*/
+void __init acpi_generic_reduced_hw_init(void)
+{
+ /*
+ * Override x86_init functions and bypass legacy PIC in
+ * hardware reduced ACPI mode.
+ */
+ x86_init.timers.timer_init = x86_init_noop;
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+ legacy_pic = &null_legacy_pic;
+}
+
static void __init acpi_reduced_hw_init(void)
{
- if (acpi_gbl_reduced_hardware) {
- /*
- * Override x86_init functions and bypass legacy pic
- * in Hardware-reduced ACPI mode
- */
- x86_init.timers.timer_init = x86_init_noop;
- x86_init.irqs.pre_vector_init = x86_init_noop;
- legacy_pic = &null_legacy_pic;
- }
+ if (acpi_gbl_reduced_hardware)
+ x86_init.acpi.reduced_hw_early_init();
}
/*
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index ecd486c..f299d8a 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -480,30 +480,21 @@ static void *
gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
gfp_t flag, unsigned long attrs)
{
- dma_addr_t paddr;
- unsigned long align_mask;
- struct page *page;
-
- if (force_iommu && !(flag & GFP_DMA)) {
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- page = alloc_pages(flag | __GFP_ZERO, get_order(size));
- if (!page)
- return NULL;
-
- align_mask = (1UL << get_order(size)) - 1;
- paddr = dma_map_area(dev, page_to_phys(page), size,
- DMA_BIDIRECTIONAL, align_mask);
-
- flush_gart();
- if (paddr != bad_dma_addr) {
- *dma_addr = paddr;
- return page_address(page);
- }
- __free_pages(page, get_order(size));
- } else
- return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
- attrs);
+ void *vaddr;
+
+ vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+ if (!vaddr ||
+ !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+ return vaddr;
+ *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size,
+ DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1);
+ flush_gart();
+ if (unlikely(*dma_addr == bad_dma_addr))
+ goto out_free;
+ return vaddr;
+out_free:
+ dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
return NULL;
}
@@ -513,7 +504,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs)
{
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
- dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+ dma_direct_free(dev, size, vaddr, dma_addr, attrs);
}
static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -705,7 +696,7 @@ static const struct dma_map_ops gart_dma_ops = {
.alloc = gart_alloc_coherent,
.free = gart_free_coherent,
.mapping_error = gart_mapping_error,
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
};
static void gart_iommu_shutdown(void)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b203af0..2aabd4c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1408,22 +1408,69 @@ static void lapic_setup_esr(void)
oldvalue, value);
}
+static void apic_pending_intr_clear(void)
+{
+ long long max_loops = cpu_khz ? cpu_khz : 1000000;
+ unsigned long long tsc = 0, ntsc;
+ unsigned int queued;
+ unsigned long value;
+ int i, j, acked = 0;
+
+ if (boot_cpu_has(X86_FEATURE_TSC))
+ tsc = rdtsc();
+ /*
+ * After a crash, we no longer service the interrupts and a pending
+ * interrupt from previous kernel might still have ISR bit set.
+ *
+ * Most probably by now CPU has serviced that pending interrupt and
+ * it might not have done the ack_APIC_irq() because it thought,
+ * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
+ * does not clear the ISR bit and cpu thinks it has already serivced
+ * the interrupt. Hence a vector might get locked. It was noticed
+ * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+ */
+ do {
+ queued = 0;
+ for (i = APIC_ISR_NR - 1; i >= 0; i--)
+ queued |= apic_read(APIC_IRR + i*0x10);
+
+ for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+ value = apic_read(APIC_ISR + i*0x10);
+ for_each_set_bit(j, &value, 32) {
+ ack_APIC_irq();
+ acked++;
+ }
+ }
+ if (acked > 256) {
+ pr_err("LAPIC pending interrupts after %d EOI\n", acked);
+ break;
+ }
+ if (queued) {
+ if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
+ ntsc = rdtsc();
+ max_loops = (cpu_khz << 10) - (ntsc - tsc);
+ } else {
+ max_loops--;
+ }
+ }
+ } while (queued && max_loops > 0);
+ WARN_ON(max_loops <= 0);
+}
+
/**
* setup_local_APIC - setup the local APIC
*
* Used to setup local APIC while initializing BSP or bringing up APs.
* Always called with preemption disabled.
*/
-void setup_local_APIC(void)
+static void setup_local_APIC(void)
{
int cpu = smp_processor_id();
- unsigned int value, queued;
- int i, j, acked = 0;
- unsigned long long tsc = 0, ntsc;
- long long max_loops = cpu_khz ? cpu_khz : 1000000;
+ unsigned int value;
+#ifdef CONFIG_X86_32
+ int logical_apicid, ldr_apicid;
+#endif
- if (boot_cpu_has(X86_FEATURE_TSC))
- tsc = rdtsc();
if (disable_apic) {
disable_ioapic_support();
@@ -1460,11 +1507,11 @@ void setup_local_APIC(void)
* initialized during get_smp_config(), make sure it matches the
* actual value.
*/
- i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
- WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+ logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+ WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid);
/* always use the value from LDR */
- early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
- logical_smp_processor_id();
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
#endif
/*
@@ -1475,45 +1522,7 @@ void setup_local_APIC(void)
value &= ~APIC_TPRI_MASK;
apic_write(APIC_TASKPRI, value);
- /*
- * After a crash, we no longer service the interrupts and a pending
- * interrupt from previous kernel might still have ISR bit set.
- *
- * Most probably by now CPU has serviced that pending interrupt and
- * it might not have done the ack_APIC_irq() because it thought,
- * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
- * does not clear the ISR bit and cpu thinks it has already serivced
- * the interrupt. Hence a vector might get locked. It was noticed
- * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
- */
- do {
- queued = 0;
- for (i = APIC_ISR_NR - 1; i >= 0; i--)
- queued |= apic_read(APIC_IRR + i*0x10);
-
- for (i = APIC_ISR_NR - 1; i >= 0; i--) {
- value = apic_read(APIC_ISR + i*0x10);
- for (j = 31; j >= 0; j--) {
- if (value & (1<<j)) {
- ack_APIC_irq();
- acked++;
- }
- }
- }
- if (acked > 256) {
- printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
- acked);
- break;
- }
- if (queued) {
- if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
- ntsc = rdtsc();
- max_loops = (cpu_khz << 10) - (ntsc - tsc);
- } else
- max_loops--;
- }
- } while (queued && max_loops > 0);
- WARN_ON(max_loops <= 0);
+ apic_pending_intr_clear();
/*
* Now that we are all set up, enable the APIC
@@ -1570,7 +1579,7 @@ void setup_local_APIC(void)
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!cpu && (pic_mode || !value)) {
+ if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
value = APIC_DM_EXTINT;
apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
} else {
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8ad2e41..7553819 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -587,7 +587,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
mpc_ioapic_id(apic), pin);
}
-static void clear_IO_APIC (void)
+void clear_IO_APIC (void)
{
int apic, pin;
@@ -1410,7 +1410,7 @@ void __init enable_IO_APIC(void)
clear_IO_APIC();
}
-void native_disable_io_apic(void)
+void native_restore_boot_irq_mode(void)
{
/*
* If the i8259 is routed through an IOAPIC
@@ -1438,20 +1438,12 @@ void native_disable_io_apic(void)
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
}
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
+void restore_boot_irq_mode(void)
{
- /*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
if (!nr_legacy_irqs())
return;
- x86_io_apic_ops.disable();
+ x86_apic_ops.restore();
}
#ifdef CONFIG_X86_32
@@ -1603,7 +1595,7 @@ static void __init delay_with_tsc(void)
do {
rep_nop();
now = rdtsc();
- } while ((now - start) < 40000000000UL / HZ &&
+ } while ((now - start) < 40000000000ULL / HZ &&
time_before_eq(jiffies, end));
}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 3cc471b..bb6f7a2 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
struct irq_desc *desc = irq_data_to_desc(irqd);
+ bool managed = irqd_affinity_is_managed(irqd);
lockdep_assert_held(&vector_lock);
trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
apicd->cpu);
- /* Setup the vector move, if required */
- if (apicd->vector && cpu_online(apicd->cpu)) {
+ /*
+ * If there is no vector associated or if the associated vector is
+ * the shutdown vector, which is associated to make PCI/MSI
+ * shutdown mode work, then there is nothing to release. Clear out
+ * prev_vector for this and the offlined target case.
+ */
+ apicd->prev_vector = 0;
+ if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR)
+ goto setnew;
+ /*
+ * If the target CPU of the previous vector is online, then mark
+ * the vector as move in progress and store it for cleanup when the
+ * first interrupt on the new vector arrives. If the target CPU is
+ * offline then the regular release mechanism via the cleanup
+ * vector is not possible and the vector can be immediately freed
+ * in the underlying matrix allocator.
+ */
+ if (cpu_online(apicd->cpu)) {
apicd->move_in_progress = true;
apicd->prev_vector = apicd->vector;
apicd->prev_cpu = apicd->cpu;
} else {
- apicd->prev_vector = 0;
+ irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector,
+ managed);
}
+setnew:
apicd->vector = newvec;
apicd->cpu = newcpu;
BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index f8d9d69..e2829bf 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -14,7 +14,7 @@ int x2apic_phys;
static struct apic apic_x2apic_phys;
-static int set_x2apic_phys_mode(char *arg)
+static int __init set_x2apic_phys_mode(char *arg)
{
x2apic_phys = 1;
return 0;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 570e8bb..a66229f 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f0e6456..12bc0a1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -716,7 +716,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
static void init_amd_gh(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_MMCONF_FAM10H
/* do this for boot cpu */
if (c == &boot_cpu_data)
check_enable_amd_mmconf_dmi();
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d71c8b5..bfca937 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ retpoline_auto:
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
+
+ /*
+ * Retpoline means the kernel is safe because it has no indirect
+ * branches. But firmware isn't, so use IBRS to protect that.
+ */
+ if (boot_cpu_has(X86_FEATURE_IBRS)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ pr_info("Enabling Restricted Speculation for firmware calls\n");
+ }
}
#undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
- return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
spectre_v2_module_string());
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 824aee0..4702fbd 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -487,7 +487,7 @@ void load_percpu_segment(int cpu)
loadsegment(fs, __KERNEL_PERCPU);
#else
__loadsegment_simple(gs, 0);
- wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
+ wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#endif
load_stack_canary_segment();
}
@@ -1398,6 +1398,7 @@ __setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
/*
* The following percpu variables are hot. Align current_task to
@@ -1749,3 +1750,33 @@ static int __init init_cpu_syscore(void)
return 0;
}
core_initcall(init_cpu_syscore);
+
+/*
+ * The microcode loader calls this upon late microcode load to recheck features,
+ * only when microcode has been updated. Caller holds microcode_mutex and CPU
+ * hotplug lock.
+ */
+void microcode_check(void)
+{
+ struct cpuinfo_x86 info;
+
+ perf_check_microcode();
+
+ /* Reload CPUID max function as it might've changed. */
+ info.cpuid_level = cpuid_eax(0);
+
+ /*
+ * Copy all capability leafs to pick up the synthetic ones so that
+ * memcmp() below doesn't fail on that. The ones coming from CPUID will
+ * get overwritten in get_cpu_cap().
+ */
+ memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability));
+
+ get_cpu_cap(&info);
+
+ if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)))
+ return;
+
+ pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n");
+ pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n");
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index d19e903..b9693b8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
/*
* Early microcode releases for the Spectre v2 mitigation were broken.
* Information taken from;
- * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf
+ * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf
* - https://kb.vmware.com/s/article/52345
* - Microcode revisions observed in the wild
* - Release note from 20180108 microcode release
@@ -123,7 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
{ INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 },
{ INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
{ INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
- { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 },
{ INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 },
{ INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b },
{ INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 },
@@ -144,6 +143,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
{
int i;
+ /*
+ * We know that the hypervisor lie to us on the microcode version so
+ * we may as well hope that it is running the correct version.
+ */
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return false;
+
for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
if (c->x86_model == spectre_bad_microcodes[i].model &&
c->x86_stepping == spectre_bad_microcodes[i].stepping)
@@ -503,6 +509,90 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
}
}
+#define MSR_IA32_TME_ACTIVATE 0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+
+#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED 0
+#define MKTME_DISABLED 1
+#define MKTME_UNINITIALIZED 2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme(struct cpuinfo_x86 *c)
+{
+ u64 tme_activate, tme_policy, tme_crypto_algs;
+ int keyid_bits = 0, nr_keyids = 0;
+ static u64 tme_activate_cpu0 = 0;
+
+ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+ if (mktme_status != MKTME_UNINITIALIZED) {
+ if (tme_activate != tme_activate_cpu0) {
+ /* Broken BIOS? */
+ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+ pr_err_once("x86/tme: MKTME is not usable\n");
+ mktme_status = MKTME_DISABLED;
+
+ /* Proceed. We may need to exclude bits from x86_phys_bits. */
+ }
+ } else {
+ tme_activate_cpu0 = tme_activate;
+ }
+
+ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+ pr_info_once("x86/tme: not enabled by BIOS\n");
+ mktme_status = MKTME_DISABLED;
+ return;
+ }
+
+ if (mktme_status != MKTME_UNINITIALIZED)
+ goto detect_keyid_bits;
+
+ pr_info("x86/tme: enabled by BIOS\n");
+
+ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+ tme_crypto_algs);
+ mktme_status = MKTME_DISABLED;
+ }
+detect_keyid_bits:
+ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+ nr_keyids = (1UL << keyid_bits) - 1;
+ if (nr_keyids) {
+ pr_info_once("x86/mktme: enabled by BIOS\n");
+ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+ } else {
+ pr_info_once("x86/mktme: disabled by BIOS\n");
+ }
+
+ if (mktme_status == MKTME_UNINITIALIZED) {
+ /* MKTME is usable */
+ mktme_status = MKTME_ENABLED;
+ }
+
+ /*
+ * KeyID bits effectively lower the number of physical address
+ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ */
+ c->x86_phys_bits -= keyid_bits;
+}
+
static void init_intel_energy_perf(struct cpuinfo_x86 *c)
{
u64 epb;
@@ -673,6 +763,9 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_VMX))
detect_vmx_virtcap(c);
+ if (cpu_has(c, X86_FEATURE_TME))
+ detect_tme(c);
+
init_intel_energy_perf(c);
init_intel_misc_features(c);
diff --git a/arch/x86/kernel/cpu/intel_pconfig.c b/arch/x86/kernel/cpu/intel_pconfig.c
new file mode 100644
index 0000000..0771a90
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_pconfig.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel PCONFIG instruction support.
+ *
+ * Copyright (C) 2017 Intel Corporation
+ *
+ * Author:
+ * Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+ */
+
+#include <asm/cpufeature.h>
+#include <asm/intel_pconfig.h>
+
+#define PCONFIG_CPUID 0x1b
+
+#define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1)
+
+/* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */
+enum {
+ PCONFIG_CPUID_SUBLEAF_INVALID = 0,
+ PCONFIG_CPUID_SUBLEAF_TARGETID = 1,
+};
+
+/* Bitmask of supported targets */
+static u64 targets_supported __read_mostly;
+
+int pconfig_target_supported(enum pconfig_target target)
+{
+ /*
+ * We would need to re-think the implementation once we get > 64
+ * PCONFIG targets. Spec allows up to 2^32 targets.
+ */
+ BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64);
+
+ if (WARN_ON_ONCE(target >= 64))
+ return 0;
+ return targets_supported & (1ULL << target);
+}
+
+static int __init intel_pconfig_init(void)
+{
+ int subleaf;
+
+ if (!boot_cpu_has(X86_FEATURE_PCONFIG))
+ return 0;
+
+ /*
+ * Scan subleafs of PCONFIG CPUID leaf.
+ *
+ * Subleafs of the same type need not to be consecutive.
+ *
+ * Stop on the first invalid subleaf type. All subleafs after the first
+ * invalid are invalid too.
+ */
+ for (subleaf = 0; subleaf < INT_MAX; subleaf++) {
+ struct cpuid_regs regs;
+
+ cpuid_count(PCONFIG_CPUID, subleaf,
+ &regs.eax, &regs.ebx, &regs.ecx, &regs.edx);
+
+ switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) {
+ case PCONFIG_CPUID_SUBLEAF_INVALID:
+ /* Stop on the first invalid subleaf */
+ goto out;
+ case PCONFIG_CPUID_SUBLEAF_TARGETID:
+ /* Mark supported PCONFIG targets */
+ if (regs.ebx < 64)
+ targets_supported |= (1ULL << regs.ebx);
+ if (regs.ecx < 64)
+ targets_supported |= (1ULL << regs.ecx);
+ if (regs.edx < 64)
+ targets_supported |= (1ULL << regs.edx);
+ break;
+ default:
+ /* Unknown CPUID.PCONFIG subleaf: ignore */
+ break;
+ }
+ }
+out:
+ return 0;
+}
+arch_initcall(intel_pconfig_init);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index bdab7d2..fca759d 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
goto out_common_fail;
}
closid = ret;
+ ret = 0;
rdtgrp->closid = closid;
list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 231ad23..475cb4f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -491,7 +491,7 @@ static void do_inject(void)
unsigned int cpu = i_mce.extcpu;
u8 b = i_mce.bank;
- rdtscll(i_mce.tsc);
+ i_mce.tsc = rdtsc_ordered();
if (i_mce.misc)
i_mce.status |= MCI_STATUS_MISCV;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index e956eb2..374d1aa 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -113,8 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb) { }
static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif
-extern struct mca_config mca_cfg;
-
#ifndef CONFIG_X86_64
/*
* On 32-bit systems it would be difficult to safely unmap a poison page
@@ -130,4 +128,61 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {}
#define mce_unmap_kpfn mce_unmap_kpfn
#endif
+struct mca_config {
+ bool dont_log_ce;
+ bool cmci_disabled;
+ bool ignore_ce;
+
+ __u64 lmce_disabled : 1,
+ disabled : 1,
+ ser : 1,
+ recovery : 1,
+ bios_cmci_threshold : 1,
+ __reserved : 59;
+
+ u8 banks;
+ s8 bootlog;
+ int tolerant;
+ int monarch_timeout;
+ int panic_timeout;
+ u32 rip_msr;
+};
+
+extern struct mca_config mca_cfg;
+
+struct mce_vendor_flags {
+ /*
+ * Indicates that overflow conditions are not fatal, when set.
+ */
+ __u64 overflow_recov : 1,
+
+ /*
+ * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and
+ * Recovery. It indicates support for data poisoning in HW and deferred
+ * error interrupts.
+ */
+ succor : 1,
+
+ /*
+ * (AMD) SMCA: This bit indicates support for Scalable MCA which expands
+ * the register space for each MCA bank and also increases number of
+ * banks. Also, to accommodate the new banks and registers, the MCA
+ * register space is moved to a new MSR range.
+ */
+ smca : 1,
+
+ __reserved_0 : 61;
+};
+
+extern struct mce_vendor_flags mce_flags;
+
+struct mca_msr_regs {
+ u32 (*ctl) (int bank);
+ u32 (*status) (int bank);
+ u32 (*addr) (int bank);
+ u32 (*misc) (int bank);
+};
+
+extern struct mca_msr_regs msr_ops;
+
#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8ff94d1..42cf288 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -56,6 +56,9 @@
static DEFINE_MUTEX(mce_log_mutex);
+/* sysfs synchronization */
+static DEFINE_MUTEX(mce_sysfs_mutex);
+
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
@@ -130,6 +133,8 @@ void mce_setup(struct mce *m)
if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
rdmsrl(MSR_PPIN, m->ppin);
+
+ m->microcode = boot_cpu_data.microcode;
}
DEFINE_PER_CPU(struct mce, injectm);
@@ -262,13 +267,15 @@ static void __print_mce(struct mce *m)
*/
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
- cpu_data(m->extcpu).microcode);
+ m->microcode);
}
static void print_mce(struct mce *m)
{
__print_mce(m);
- pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
+
+ if (m->cpuvendor != X86_VENDOR_AMD)
+ pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -1088,19 +1095,7 @@ static void mce_unmap_kpfn(unsigned long pfn)
* a legal address.
*/
-/*
- * Build time check to see if we have a spare virtual bit. Don't want
- * to leave this until run time because most developers don't have a
- * system that can exercise this code path. This will only become a
- * problem if/when we move beyond 5-level page tables.
- *
- * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD)
- */
-#if PGDIR_SHIFT + 9 < 63
decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63));
-#else
-#error "no unused virtual bit available"
-#endif
if (set_memory_np(decoy_addr, 1))
pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
@@ -1511,7 +1506,7 @@ static int __mcheck_cpu_cap_init(void)
mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
- mca_cfg.ser = true;
+ mca_cfg.ser = 1;
return 0;
}
@@ -1819,12 +1814,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
return;
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
return;
}
if (mce_gen_pool_init()) {
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
pr_emerg("Couldn't allocate MCE records pool!\n");
return;
}
@@ -1902,11 +1897,11 @@ static int __init mcheck_enable(char *str)
if (*str == '=')
str++;
if (!strcmp(str, "off"))
- cfg->disabled = true;
+ cfg->disabled = 1;
else if (!strcmp(str, "no_cmci"))
cfg->cmci_disabled = true;
else if (!strcmp(str, "no_lmce"))
- cfg->lmce_disabled = true;
+ cfg->lmce_disabled = 1;
else if (!strcmp(str, "dont_log_ce"))
cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
@@ -1914,9 +1909,9 @@ static int __init mcheck_enable(char *str)
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
- cfg->bios_cmci_threshold = true;
+ cfg->bios_cmci_threshold = 1;
else if (!strcmp(str, "recovery"))
- cfg->recovery = true;
+ cfg->recovery = 1;
else if (isdigit(str[0])) {
if (get_option(&str, &cfg->tolerant) == 2)
get_option(&str, &(cfg->monarch_timeout));
@@ -2086,6 +2081,7 @@ static ssize_t set_ignore_ce(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
@@ -2098,6 +2094,8 @@ static ssize_t set_ignore_ce(struct device *s,
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2110,6 +2108,7 @@ static ssize_t set_cmci_disabled(struct device *s,
if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
+ mutex_lock(&mce_sysfs_mutex);
if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
@@ -2121,6 +2120,8 @@ static ssize_t set_cmci_disabled(struct device *s,
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
+ mutex_unlock(&mce_sysfs_mutex);
+
return size;
}
@@ -2128,8 +2129,19 @@ static ssize_t store_int_with_restart(struct device *s,
struct device_attribute *attr,
const char *buf, size_t size)
{
- ssize_t ret = device_store_int(s, attr, buf, size);
+ unsigned long old_check_interval = check_interval;
+ ssize_t ret = device_store_ulong(s, attr, buf, size);
+
+ if (check_interval == old_check_interval)
+ return ret;
+
+ if (check_interval < 1)
+ check_interval = 1;
+
+ mutex_lock(&mce_sysfs_mutex);
mce_restart();
+ mutex_unlock(&mce_sysfs_mutex);
+
return ret;
}
@@ -2333,6 +2345,12 @@ static __init int mcheck_init_device(void)
{
int err;
+ /*
+ * Check if we have a spare virtual bit. This will only become
+ * a problem if/when we move beyond 5-level page tables.
+ */
+ MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63);
+
if (!mce_available(&boot_cpu_data)) {
err = -EIO;
goto err_out;
@@ -2381,7 +2399,7 @@ device_initcall_sync(mcheck_init_device);
*/
static int __init mcheck_disable(char *str)
{
- mca_cfg.disabled = true;
+ mca_cfg.disabled = 1;
return 1;
}
__setup("nomce", mcheck_disable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 0f32ad2..f7666ee 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = {
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" },
+ [SMCA_RESERVED] = { "reserved", "Reserved" },
[SMCA_EX] = { "execution_unit", "Execution Unit" },
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
@@ -110,14 +111,14 @@ const char *smca_get_long_name(enum smca_bank_types t)
}
EXPORT_SYMBOL_GPL(smca_get_long_name);
-static enum smca_bank_types smca_get_bank_type(struct mce *m)
+static enum smca_bank_types smca_get_bank_type(unsigned int bank)
{
struct smca_bank *b;
- if (m->bank >= N_SMCA_BANK_TYPES)
+ if (bank >= MAX_NR_BANKS)
return N_SMCA_BANK_TYPES;
- b = &smca_banks[m->bank];
+ b = &smca_banks[bank];
if (!b->hwid)
return N_SMCA_BANK_TYPES;
@@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(struct mce *m)
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
+ /* Reserved type */
+ { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
+
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
@@ -427,35 +431,58 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
+static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
+ unsigned int block)
+{
+ u32 low, high;
+ u32 addr = 0;
+
+ if (smca_get_bank_type(bank) == SMCA_RESERVED)
+ return addr;
+
+ if (!block)
+ return MSR_AMD64_SMCA_MCx_MISC(bank);
+
+ /*
+ * For SMCA enabled processors, BLKPTR field of the first MISC register
+ * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
+ */
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ return addr;
+
+ if (!(low & MCI_CONFIG_MCAX))
+ return addr;
+
+ if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ (low & MASK_BLKPTR_LO))
+ return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+
+ return addr;
+}
+
static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
u32 addr = 0, offset = 0;
- if (mce_flags.smca) {
- if (!block) {
- addr = MSR_AMD64_SMCA_MCx_MISC(bank);
- } else {
- /*
- * For SMCA enabled processors, BLKPTR field of the
- * first MISC register (MCx_MISC0) indicates presence of
- * additional MISC register set (MISC1-4).
- */
- u32 low, high;
+ if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
+ return addr;
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
- return addr;
+ /* Get address from already initialized block. */
+ if (per_cpu(threshold_banks, cpu)) {
+ struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank];
- if (!(low & MCI_CONFIG_MCAX))
- return addr;
+ if (bankp && bankp->blocks) {
+ struct threshold_block *blockp = &bankp->blocks[block];
- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
- (low & MASK_BLKPTR_LO))
- addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
+ if (blockp)
+ return blockp->address;
}
- return addr;
}
+ if (mce_flags.smca)
+ return smca_get_block_address(cpu, bank, block);
+
/* Fall back to method we used for older processors: */
switch (block) {
case 0:
@@ -760,7 +787,7 @@ bool amd_mce_is_memory_error(struct mce *m)
u8 xec = (m->status >> 16) & 0x1f;
if (mce_flags.smca)
- return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0;
+ return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0;
return m->bank == 4 && xec == 0x8;
}
@@ -1063,7 +1090,7 @@ static struct kobj_type threshold_ktype = {
static const char *get_name(unsigned int bank, struct threshold_block *b)
{
- unsigned int bank_type;
+ enum smca_bank_types bank_type;
if (!mce_flags.smca) {
if (b && bank == 4)
@@ -1072,11 +1099,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank];
}
- if (!smca_banks[bank].hwid)
+ bank_type = smca_get_bank_type(bank);
+ if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
- bank_type = smca_banks[bank].hwid->bank_type;
-
if (b && bank_type == SMCA_UMC) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 330b846..0624957 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -58,7 +58,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE];
/*
* Microcode patch container file is prepended to the initrd in cpio
- * format. See Documentation/x86/early-microcode.txt
+ * format. See Documentation/x86/microcode.txt
*/
static const char
ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
@@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
return -EINVAL;
ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
- if (ret != UCODE_OK)
+ if (ret > UCODE_UPDATED)
return -EINVAL;
return 0;
@@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
return patch_size;
}
-static int apply_microcode_amd(int cpu)
+static enum ucode_state apply_microcode_amd(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_amd *mc_amd;
@@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu)
p = find_patch(cpu);
if (!p)
- return 0;
+ return UCODE_NFOUND;
mc_amd = p->data;
uci->mc = p->data;
@@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu)
if (rev >= mc_amd->hdr.patch_id) {
c->microcode = rev;
uci->cpu_sig.rev = rev;
- return 0;
+ return UCODE_OK;
}
if (__apply_microcode_amd(mc_amd)) {
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
cpu, mc_amd->hdr.patch_id);
- return -1;
+ return UCODE_ERROR;
}
pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
mc_amd->hdr.patch_id);
@@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu)
uci->cpu_sig.rev = mc_amd->hdr.patch_id;
c->microcode = mc_amd->hdr.patch_id;
- return 0;
+ return UCODE_UPDATED;
}
static int install_equiv_cpu_table(const u8 *buf)
@@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
static enum ucode_state
load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
{
+ struct ucode_patch *p;
enum ucode_state ret;
/* free old equiv table */
free_equiv_cpu_table();
ret = __load_microcode_amd(family, data, size);
-
- if (ret != UCODE_OK)
+ if (ret != UCODE_OK) {
cleanup();
+ return ret;
+ }
-#ifdef CONFIG_X86_32
- /* save BSP's matching patch for early load */
- if (save) {
- struct ucode_patch *p = find_patch(0);
- if (p) {
- memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
- memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
- PATCH_MAX_SIZE));
- }
+ p = find_patch(0);
+ if (!p) {
+ return ret;
+ } else {
+ if (boot_cpu_data.microcode == p->patch_id)
+ return ret;
+
+ ret = UCODE_NEW;
}
-#endif
+
+ /* save BSP's matching patch for early load */
+ if (!save)
+ return ret;
+
+ memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
+ memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE));
+
return ret;
}
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 319dd65..10c4fc2 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -22,13 +22,16 @@
#define pr_fmt(fmt) "microcode: " fmt
#include <linux/platform_device.h>
+#include <linux/stop_machine.h>
#include <linux/syscore_ops.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
#include <linux/firmware.h>
#include <linux/kernel.h>
+#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
+#include <linux/nmi.h>
#include <linux/fs.h>
#include <linux/mm.h>
@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
*/
static DEFINE_MUTEX(microcode_mutex);
+/*
+ * Serialize late loading so that CPUs get updated one-by-one.
+ */
+static DEFINE_SPINLOCK(update_lock);
+
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
struct cpu_info_ctx {
@@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu)
return ret;
}
-struct apply_microcode_ctx {
- int err;
-};
-
static void apply_microcode_local(void *arg)
{
- struct apply_microcode_ctx *ctx = arg;
+ enum ucode_state *err = arg;
- ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+ *err = microcode_ops->apply_microcode(smp_processor_id());
}
static int apply_microcode_on_target(int cpu)
{
- struct apply_microcode_ctx ctx = { .err = 0 };
+ enum ucode_state err;
int ret;
- ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
- if (!ret)
- ret = ctx.err;
-
+ ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1);
+ if (!ret) {
+ if (err == UCODE_ERROR)
+ ret = 1;
+ }
return ret;
}
@@ -489,31 +494,124 @@ static void __exit microcode_dev_exit(void)
/* fake device for request_firmware */
static struct platform_device *microcode_pdev;
-static int reload_for_cpu(int cpu)
+/*
+ * Late loading dance. Why the heavy-handed stomp_machine effort?
+ *
+ * - HT siblings must be idle and not execute other code while the other sibling
+ * is loading microcode in order to avoid any negative interactions caused by
+ * the loading.
+ *
+ * - In addition, microcode update on the cores must be serialized until this
+ * requirement can be relaxed in the future. Right now, this is conservative
+ * and good.
+ */
+#define SPINUNIT 100 /* 100 nsec */
+
+static int check_online_cpus(void)
{
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- enum ucode_state ustate;
- int err = 0;
+ if (num_online_cpus() == num_present_cpus())
+ return 0;
- if (!uci->valid)
- return err;
+ pr_err("Not all CPUs online, aborting microcode update.\n");
- ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
- if (ustate == UCODE_OK)
- apply_microcode_on_target(cpu);
- else
- if (ustate == UCODE_ERROR)
- err = -EINVAL;
- return err;
+ return -EINVAL;
+}
+
+static atomic_t late_cpus_in;
+static atomic_t late_cpus_out;
+
+static int __wait_for_cpus(atomic_t *t, long long timeout)
+{
+ int all_cpus = num_online_cpus();
+
+ atomic_inc(t);
+
+ while (atomic_read(t) < all_cpus) {
+ if (timeout < SPINUNIT) {
+ pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
+ all_cpus - atomic_read(t));
+ return 1;
+ }
+
+ ndelay(SPINUNIT);
+ timeout -= SPINUNIT;
+
+ touch_nmi_watchdog();
+ }
+ return 0;
+}
+
+/*
+ * Returns:
+ * < 0 - on error
+ * 0 - no update done
+ * 1 - microcode was updated
+ */
+static int __reload_late(void *info)
+{
+ int cpu = smp_processor_id();
+ enum ucode_state err;
+ int ret = 0;
+
+ /*
+ * Wait for all CPUs to arrive. A load will not be attempted unless all
+ * CPUs show up.
+ * */
+ if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC))
+ return -1;
+
+ spin_lock(&update_lock);
+ apply_microcode_local(&err);
+ spin_unlock(&update_lock);
+
+ if (err > UCODE_NFOUND) {
+ pr_warn("Error reloading microcode on CPU %d\n", cpu);
+ return -1;
+ /* siblings return UCODE_OK because their engine got updated already */
+ } else if (err == UCODE_UPDATED || err == UCODE_OK) {
+ ret = 1;
+ } else {
+ return ret;
+ }
+
+ /*
+ * Increase the wait timeout to a safe value here since we're
+ * serializing the microcode update and that could take a while on a
+ * large number of CPUs. And that is fine as the *actual* timeout will
+ * be determined by the last CPU finished updating and thus cut short.
+ */
+ if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus()))
+ panic("Timeout during microcode update!\n");
+
+ return ret;
+}
+
+/*
+ * Reload microcode late on all CPUs. Wait for a sec until they
+ * all gather together.
+ */
+static int microcode_reload_late(void)
+{
+ int ret;
+
+ atomic_set(&late_cpus_in, 0);
+ atomic_set(&late_cpus_out, 0);
+
+ ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
+ if (ret > 0)
+ microcode_check();
+
+ return ret;
}
static ssize_t reload_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t size)
{
+ enum ucode_state tmp_ret = UCODE_OK;
+ int bsp = boot_cpu_data.cpu_index;
unsigned long val;
- int cpu;
- ssize_t ret = 0, tmp_ret;
+ ssize_t ret = 0;
ret = kstrtoul(buf, 0, &val);
if (ret)
@@ -522,23 +620,24 @@ static ssize_t reload_store(struct device *dev,
if (val != 1)
return size;
+ tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev, true);
+ if (tmp_ret != UCODE_NEW)
+ return size;
+
get_online_cpus();
- mutex_lock(&microcode_mutex);
- for_each_online_cpu(cpu) {
- tmp_ret = reload_for_cpu(cpu);
- if (tmp_ret != 0)
- pr_warn("Error reloading microcode on CPU %d\n", cpu);
- /* save retval of the first encountered reload error */
- if (!ret)
- ret = tmp_ret;
- }
- if (!ret)
- perf_check_microcode();
+ ret = check_online_cpus();
+ if (ret)
+ goto put;
+
+ mutex_lock(&microcode_mutex);
+ ret = microcode_reload_late();
mutex_unlock(&microcode_mutex);
+
+put:
put_online_cpus();
- if (!ret)
+ if (ret >= 0)
ret = size;
return ret;
@@ -606,10 +705,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
if (system_state != SYSTEM_RUNNING)
return UCODE_NFOUND;
- ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
- refresh_fw);
-
- if (ustate == UCODE_OK) {
+ ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, refresh_fw);
+ if (ustate == UCODE_NEW) {
pr_debug("CPU%d updated upon init\n", cpu);
apply_microcode_on_target(cpu);
}
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index a15db2b..32b8e57 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early)
if (!mc)
return 0;
+ /*
+ * Save us the MSR write below - which is a particular expensive
+ * operation - when the other hyperthread has updated the microcode
+ * already.
+ */
+ rev = intel_get_microcode_revision();
+ if (rev >= mc->hdr.rev) {
+ uci->cpu_sig.rev = rev;
+ return UCODE_OK;
+ }
+
+ /*
+ * Writeback and invalidate caches before updating microcode to avoid
+ * internal issues depending on what the microcode is updating.
+ */
+ native_wbinvd();
+
/* write microcode via MSR 0x79 */
native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
@@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
return 0;
}
-static int apply_microcode_intel(int cpu)
+static enum ucode_state apply_microcode_intel(int cpu)
{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_intel *mc;
- struct ucode_cpu_info *uci;
- struct cpuinfo_x86 *c;
static int prev_rev;
u32 rev;
/* We should bind the task to the CPU */
if (WARN_ON(raw_smp_processor_id() != cpu))
- return -1;
+ return UCODE_ERROR;
- uci = ucode_cpu_info + cpu;
- mc = uci->mc;
+ /* Look for a newer patch in our cache: */
+ mc = find_patch(uci);
if (!mc) {
- /* Look for a newer patch in our cache: */
- mc = find_patch(uci);
+ mc = uci->mc;
if (!mc)
- return 0;
+ return UCODE_NFOUND;
}
+ /*
+ * Save us the MSR write below - which is a particular expensive
+ * operation - when the other hyperthread has updated the microcode
+ * already.
+ */
+ rev = intel_get_microcode_revision();
+ if (rev >= mc->hdr.rev) {
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+ return UCODE_OK;
+ }
+
+ /*
+ * Writeback and invalidate caches before updating microcode to avoid
+ * internal issues depending on what the microcode is updating.
+ */
+ native_wbinvd();
+
/* write microcode via MSR 0x79 */
wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits);
@@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu)
if (rev != mc->hdr.rev) {
pr_err("CPU%d update to revision 0x%x failed\n",
cpu, mc->hdr.rev);
- return -1;
+ return UCODE_ERROR;
}
if (rev != prev_rev) {
@@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu)
prev_rev = rev;
}
- c = &cpu_data(cpu);
-
uci->cpu_sig.rev = rev;
c->microcode = rev;
- return 0;
+ return UCODE_UPDATED;
}
static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
@@ -830,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
unsigned int leftover = size;
unsigned int curr_mc_size = 0, new_mc_size = 0;
unsigned int csig, cpf;
+ enum ucode_state ret = UCODE_OK;
while (leftover) {
struct microcode_header_intel mc_header;
@@ -871,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
new_mc = mc;
new_mc_size = mc_size;
mc = NULL; /* trigger new vmalloc */
+ ret = UCODE_NEW;
}
ucode_ptr += mc_size;
@@ -900,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
- return UCODE_OK;
+ return ret;
}
static int get_ucode_fw(void *to, const void *from, size_t n)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9340f41..031082c 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -22,7 +22,7 @@
#include <linux/kexec.h>
#include <asm/processor.h>
#include <asm/hypervisor.h>
-#include <asm/hyperv.h>
+#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
#include <asm/desc.h>
#include <asm/irq_regs.h>
@@ -37,6 +37,7 @@ EXPORT_SYMBOL_GPL(ms_hyperv);
#if IS_ENABLED(CONFIG_HYPERV)
static void (*vmbus_handler)(void);
+static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
static void (*hv_crash_handler)(struct pt_regs *regs);
@@ -69,6 +70,41 @@ void hv_remove_vmbus_irq(void)
EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
+/*
+ * Routines to do per-architecture handling of stimer0
+ * interrupts when in Direct Mode
+ */
+
+__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ entering_irq();
+ inc_irq_stat(hyperv_stimer0_count);
+ if (hv_stimer0_handler)
+ hv_stimer0_handler();
+ ack_APIC_irq();
+
+ exiting_irq();
+ set_irq_regs(old_regs);
+}
+
+int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
+{
+ *vector = HYPERV_STIMER0_VECTOR;
+ *irq = 0; /* Unused on x86/x64 */
+ hv_stimer0_handler = handler;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq);
+
+void hv_remove_stimer0_irq(int irq)
+{
+ /* We have no way to deallocate the interrupt gate */
+ hv_stimer0_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq);
+
void hv_setup_kexec_handler(void (*handler)(void))
{
hv_kexec_handler = handler;
@@ -180,8 +216,8 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: features 0x%x, hints 0x%x\n",
ms_hyperv.features, ms_hyperv.hints);
- ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS);
- ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS);
+ ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
+ ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
@@ -189,11 +225,12 @@ static void __init ms_hyperv_init_platform(void)
/*
* Extract host information.
*/
- if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) {
- hv_host_info_eax = cpuid_eax(HVCPUID_VERSION);
- hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION);
- hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION);
- hv_host_info_edx = cpuid_edx(HVCPUID_VERSION);
+ if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >=
+ HYPERV_CPUID_VERSION) {
+ hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
+ hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
+ hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
+ hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION);
pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
hv_host_info_eax, hv_host_info_ebx >> 16,
@@ -207,6 +244,11 @@ static void __init ms_hyperv_init_platform(void)
x86_platform.calibrate_cpu = hv_get_tsc_khz;
}
+ if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
+ ms_hyperv.nested_features =
+ cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
+ }
+
#ifdef CONFIG_X86_LOCAL_APIC
if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS &&
ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) {
@@ -257,6 +299,10 @@ static void __init ms_hyperv_init_platform(void)
alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR,
hyperv_reenlightenment_vector);
+ /* Setup the IDT for stimer0 */
+ if (ms_hyperv.misc_features & HV_X64_STIMER_DIRECT_MODE_AVAILABLE)
+ alloc_intr_gate(HYPERV_STIMER0_VECTOR,
+ hv_stimer0_callback_vector);
#endif
}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 0931a10..1d300f9 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -40,6 +40,7 @@
#include <linux/notifier.h>
#include <linux/uaccess.h>
#include <linux/gfp.h>
+#include <linux/completion.h>
#include <asm/processor.h>
#include <asm/msr.h>
@@ -47,19 +48,27 @@
static struct class *cpuid_class;
static enum cpuhp_state cpuhp_cpuid_state;
+struct cpuid_regs_done {
+ struct cpuid_regs regs;
+ struct completion done;
+};
+
static void cpuid_smp_cpuid(void *cmd_block)
{
- struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
+ struct cpuid_regs_done *cmd = cmd_block;
+
+ cpuid_count(cmd->regs.eax, cmd->regs.ecx,
+ &cmd->regs.eax, &cmd->regs.ebx,
+ &cmd->regs.ecx, &cmd->regs.edx);
- cpuid_count(cmd->eax, cmd->ecx,
- &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
+ complete(&cmd->done);
}
static ssize_t cpuid_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
char __user *tmp = buf;
- struct cpuid_regs cmd;
+ struct cpuid_regs_done cmd;
int cpu = iminor(file_inode(file));
u64 pos = *ppos;
ssize_t bytes = 0;
@@ -68,19 +77,28 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
if (count % 16)
return -EINVAL; /* Invalid chunk size */
+ init_completion(&cmd.done);
for (; count; count -= 16) {
- cmd.eax = pos;
- cmd.ecx = pos >> 32;
- err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
+ call_single_data_t csd = {
+ .func = cpuid_smp_cpuid,
+ .info = &cmd,
+ };
+
+ cmd.regs.eax = pos;
+ cmd.regs.ecx = pos >> 32;
+
+ err = smp_call_function_single_async(cpu, &csd);
if (err)
break;
- if (copy_to_user(tmp, &cmd, 16)) {
+ wait_for_completion(&cmd.done);
+ if (copy_to_user(tmp, &cmd.regs, 16)) {
err = -EFAULT;
break;
}
tmp += 16;
bytes += 16;
*ppos = ++pos;
+ reinit_completion(&cmd.done);
}
return bytes ? bytes : err;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 10e74d4..1f66804 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -199,9 +199,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#ifdef CONFIG_X86_IO_APIC
/* Prevent crash_kexec() from deadlocking on ioapic_lock. */
ioapic_zap_locks();
- disable_IO_APIC();
+ clear_IO_APIC();
#endif
lapic_shutdown();
+ restore_boot_irq_mode();
#ifdef CONFIG_HPET_TIMER
hpet_disable();
#endif
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 25de5f6..f39f3a0 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -11,6 +11,7 @@
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/of_irq.h>
+#include <linux/libfdt.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/of_pci.h>
@@ -130,34 +131,52 @@ static void __init dtb_setup_hpet(void)
#endif
}
+#ifdef CONFIG_X86_LOCAL_APIC
+
+static void __init dtb_cpu_setup(void)
+{
+ struct device_node *dn;
+ u32 apic_id, version;
+ int ret;
+
+ version = GET_APIC_VERSION(apic_read(APIC_LVR));
+ for_each_node_by_type(dn, "cpu") {
+ ret = of_property_read_u32(dn, "reg", &apic_id);
+ if (ret < 0) {
+ pr_warn("%pOF: missing local APIC ID\n", dn);
+ continue;
+ }
+ generic_processor_info(apic_id, version);
+ }
+}
+
static void __init dtb_lapic_setup(void)
{
-#ifdef CONFIG_X86_LOCAL_APIC
struct device_node *dn;
struct resource r;
+ unsigned long lapic_addr = APIC_DEFAULT_PHYS_BASE;
int ret;
dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-lapic");
- if (!dn)
- return;
-
- ret = of_address_to_resource(dn, 0, &r);
- if (WARN_ON(ret))
- return;
+ if (dn) {
+ ret = of_address_to_resource(dn, 0, &r);
+ if (WARN_ON(ret))
+ return;
+ lapic_addr = r.start;
+ }
/* Did the boot loader setup the local APIC ? */
if (!boot_cpu_has(X86_FEATURE_APIC)) {
- if (apic_force_enable(r.start))
+ if (apic_force_enable(lapic_addr))
return;
}
smp_found_config = 1;
pic_mode = 1;
- register_lapic_address(r.start);
- generic_processor_info(boot_cpu_physical_apicid,
- GET_APIC_VERSION(apic_read(APIC_LVR)));
-#endif
+ register_lapic_address(lapic_addr);
}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
#ifdef CONFIG_X86_IO_APIC
static unsigned int ioapic_id;
@@ -194,19 +213,22 @@ static struct of_ioapic_type of_ioapic_type[] =
static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
- struct of_phandle_args *irq_data = (void *)arg;
+ struct irq_fwspec *fwspec = (struct irq_fwspec *)arg;
struct of_ioapic_type *it;
struct irq_alloc_info tmp;
+ int type_index;
- if (WARN_ON(irq_data->args_count < 2))
+ if (WARN_ON(fwspec->param_count < 2))
return -EINVAL;
- if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type))
+
+ type_index = fwspec->param[1];
+ if (type_index >= ARRAY_SIZE(of_ioapic_type))
return -EINVAL;
- it = &of_ioapic_type[irq_data->args[1]];
+ it = &of_ioapic_type[type_index];
ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity);
tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain));
- tmp.ioapic_pin = irq_data->args[0];
+ tmp.ioapic_pin = fwspec->param[0];
return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp);
}
@@ -255,11 +277,14 @@ static void __init dtb_ioapic_setup(void) {}
static void __init dtb_apic_setup(void)
{
+#ifdef CONFIG_X86_LOCAL_APIC
dtb_lapic_setup();
+ dtb_cpu_setup();
+#endif
dtb_ioapic_setup();
}
-#ifdef CONFIG_OF_FLATTREE
+#ifdef CONFIG_OF_EARLY_FLATTREE
static void __init x86_flattree_get_config(void)
{
u32 size, map_len;
@@ -270,14 +295,15 @@ static void __init x86_flattree_get_config(void)
map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
- initial_boot_params = dt = early_memremap(initial_dtb, map_len);
- size = of_get_flat_dt_size();
+ dt = early_memremap(initial_dtb, map_len);
+ size = fdt_totalsize(dt);
if (map_len < size) {
early_memunmap(dt, map_len);
- initial_boot_params = dt = early_memremap(initial_dtb, size);
+ dt = early_memremap(initial_dtb, size);
map_len = size;
}
+ early_init_dt_verify(dt);
unflatten_and_copy_device_tree();
early_memunmap(dt, map_len);
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index a2d8a39..18fa9d7 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -24,7 +24,7 @@
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
-unsigned int code_bytes = 64;
+static unsigned int code_bytes = 64;
static int die_counter;
bool in_task_stack(unsigned long *stack, struct task_struct *task,
@@ -375,3 +375,50 @@ static int __init code_bytes_setup(char *s)
return 1;
}
__setup("code_bytes=", code_bytes_setup);
+
+void show_regs(struct pt_regs *regs)
+{
+ bool all = true;
+ int i;
+
+ show_regs_print_info(KERN_DEFAULT);
+
+ if (IS_ENABLED(CONFIG_X86_32))
+ all = !user_mode(regs);
+
+ __show_regs(regs, all);
+
+ /*
+ * When in-kernel, we also print out the stack and code at the
+ * time of the fault..
+ */
+ if (!user_mode(regs)) {
+ unsigned int code_prologue = code_bytes * 43 / 64;
+ unsigned int code_len = code_bytes;
+ unsigned char c;
+ u8 *ip;
+
+ show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
+
+ printk(KERN_DEFAULT "Code: ");
+
+ ip = (u8 *)regs->ip - code_prologue;
+ if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
+ /* try starting at IP */
+ ip = (u8 *)regs->ip;
+ code_len = code_len - code_prologue + 1;
+ }
+ for (i = 0; i < code_len; i++, ip++) {
+ if (ip < (u8 *)PAGE_OFFSET ||
+ probe_kernel_address(ip, c)) {
+ pr_cont(" Bad RIP value.");
+ break;
+ }
+ if (ip == (u8 *)regs->ip)
+ pr_cont("<%02x> ", c);
+ else
+ pr_cont("%02x ", c);
+ }
+ }
+ pr_cont("\n");
+}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 04170f6..cd53f30 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -127,45 +127,3 @@ unknown:
info->type = STACK_TYPE_UNKNOWN;
return -EINVAL;
}
-
-void show_regs(struct pt_regs *regs)
-{
- int i;
-
- show_regs_print_info(KERN_EMERG);
- __show_regs(regs, !user_mode(regs));
-
- /*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
- */
- if (!user_mode(regs)) {
- unsigned int code_prologue = code_bytes * 43 / 64;
- unsigned int code_len = code_bytes;
- unsigned char c;
- u8 *ip;
-
- show_trace_log_lvl(current, regs, NULL, KERN_EMERG);
-
- pr_emerg("Code:");
-
- ip = (u8 *)regs->ip - code_prologue;
- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
- /* try starting at IP */
- ip = (u8 *)regs->ip;
- code_len = code_len - code_prologue + 1;
- }
- for (i = 0; i < code_len; i++, ip++) {
- if (ip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(ip, c)) {
- pr_cont(" Bad EIP value.");
- break;
- }
- if (ip == (u8 *)regs->ip)
- pr_cont(" <%02x>", c);
- else
- pr_cont(" %02x", c);
- }
- }
- pr_cont("\n");
-}
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 563e28d..5cdb9e8 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -149,45 +149,3 @@ unknown:
info->type = STACK_TYPE_UNKNOWN;
return -EINVAL;
}
-
-void show_regs(struct pt_regs *regs)
-{
- int i;
-
- show_regs_print_info(KERN_DEFAULT);
- __show_regs(regs, 1);
-
- /*
- * When in-kernel, we also print out the stack and code at the
- * time of the fault..
- */
- if (!user_mode(regs)) {
- unsigned int code_prologue = code_bytes * 43 / 64;
- unsigned int code_len = code_bytes;
- unsigned char c;
- u8 *ip;
-
- show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
-
- printk(KERN_DEFAULT "Code: ");
-
- ip = (u8 *)regs->ip - code_prologue;
- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
- /* try starting at IP */
- ip = (u8 *)regs->ip;
- code_len = code_len - code_prologue + 1;
- }
- for (i = 0; i < code_len; i++, ip++) {
- if (ip < (u8 *)PAGE_OFFSET ||
- probe_kernel_address(ip, c)) {
- pr_cont(" Bad RIP value.");
- break;
- }
- if (ip == (u8 *)regs->ip)
- pr_cont("<%02x> ", c);
- else
- pr_cont("%02x ", c);
- }
- }
- pr_cont("\n");
-}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 71c11ad..6a2cb14 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p)
} else if (*p == '!') {
start_at = memparse(p+1, &p);
e820__range_add(start_at, mem_size, E820_TYPE_PRAM);
+ } else if (*p == '%') {
+ enum e820_type from = 0, to = 0;
+
+ start_at = memparse(p + 1, &p);
+ if (*p == '-')
+ from = simple_strtoull(p + 1, &p, 0);
+ if (*p == '+')
+ to = simple_strtoull(p + 1, &p, 0);
+ if (*p != '\0')
+ return -EINVAL;
+ if (from && to)
+ e820__range_update(start_at, mem_size, from, to);
+ else if (to)
+ e820__range_add(start_at, mem_size, to);
+ else if (from)
+ e820__range_remove(start_at, mem_size, from, 1);
+ else
+ e820__range_remove(start_at, mem_size, 0, 0);
} else {
e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7ba5d81..0c855de 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -32,6 +32,11 @@
#include <asm/microcode.h>
#include <asm/kasan.h>
+#ifdef CONFIG_X86_5LEVEL
+#undef pgtable_l5_enabled
+#define pgtable_l5_enabled __pgtable_l5_enabled
+#endif
+
/*
* Manage page tables very early on.
*/
@@ -39,6 +44,24 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
+#ifdef CONFIG_X86_5LEVEL
+unsigned int __pgtable_l5_enabled __ro_after_init;
+EXPORT_SYMBOL(__pgtable_l5_enabled);
+unsigned int pgdir_shift __ro_after_init = 39;
+EXPORT_SYMBOL(pgdir_shift);
+unsigned int ptrs_per_p4d __ro_after_init = 1;
+EXPORT_SYMBOL(ptrs_per_p4d);
+#endif
+
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
+unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
+EXPORT_SYMBOL(page_offset_base);
+unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
+EXPORT_SYMBOL(vmalloc_base);
+unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
+EXPORT_SYMBOL(vmemmap_base);
+#endif
+
#define __head __section(.head.text)
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
@@ -46,6 +69,41 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
return ptr - (void *)_text + (void *)physaddr;
}
+static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr)
+{
+ return fixup_pointer(ptr, physaddr);
+}
+
+#ifdef CONFIG_X86_5LEVEL
+static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
+{
+ return fixup_pointer(ptr, physaddr);
+}
+
+static bool __head check_la57_support(unsigned long physaddr)
+{
+ if (native_cpuid_eax(0) < 7)
+ return false;
+
+ if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return false;
+
+ *fixup_int(&pgtable_l5_enabled, physaddr) = 1;
+ *fixup_int(&pgdir_shift, physaddr) = 48;
+ *fixup_int(&ptrs_per_p4d, physaddr) = 512;
+ *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
+ *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5;
+ *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5;
+
+ return true;
+}
+#else
+static bool __head check_la57_support(unsigned long physaddr)
+{
+ return false;
+}
+#endif
+
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
@@ -55,9 +113,12 @@ unsigned long __head __startup_64(unsigned long physaddr,
p4dval_t *p4d;
pudval_t *pud;
pmdval_t *pmd, pmd_entry;
+ bool la57;
int i;
unsigned int *next_pgt_ptr;
+ la57 = check_la57_support(physaddr);
+
/* Is the address too large? */
if (physaddr >> MAX_PHYSMEM_BITS)
for (;;);
@@ -81,9 +142,14 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Fixup the physical addresses in the page table */
pgd = fixup_pointer(&early_top_pgt, physaddr);
- pgd[pgd_index(__START_KERNEL_map)] += load_delta;
-
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p = pgd + pgd_index(__START_KERNEL_map);
+ if (la57)
+ *p = (unsigned long)level4_kernel_pgt;
+ else
+ *p = (unsigned long)level3_kernel_pgt;
+ *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta;
+
+ if (la57) {
p4d = fixup_pointer(&level4_kernel_pgt, physaddr);
p4d[511] += load_delta;
}
@@ -108,7 +174,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (la57) {
p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr);
i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
@@ -154,8 +220,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
* Fixup phys_base - remove the memory encryption mask to obtain
* the true physical address.
*/
- p = fixup_pointer(&phys_base, physaddr);
- *p += load_delta - sme_get_me_mask();
+ *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask();
/* Encrypt the kernel and related (if SME is active) */
sme_encrypt_kernel(bp);
@@ -206,7 +271,7 @@ again:
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (!pgtable_l5_enabled)
p4d_p = pgd_p;
else if (pgd)
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
@@ -322,7 +387,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
- BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+ MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 04a625f..48385c1 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -23,6 +23,7 @@
#include <asm/nops.h>
#include "../entry/calling.h"
#include <asm/export.h>
+#include <asm/nospec-branch.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -38,12 +39,12 @@
*
*/
+#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
-PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
-PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
-#endif
+L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+L4_START_KERNEL = l4_index(__START_KERNEL_map)
+
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
@@ -124,7 +125,10 @@ ENTRY(secondary_startup_64)
/* Enable PAE mode, PGE and LA57 */
movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL
+ testl $1, __pgtable_l5_enabled(%rip)
+ jz 1f
orl $X86_CR4_LA57, %ecx
+1:
#endif
movq %rcx, %cr4
@@ -134,6 +138,7 @@ ENTRY(secondary_startup_64)
/* Ensure I am executing from virtual addresses */
movq $1f, %rax
+ ANNOTATE_RETPOLINE_SAFE
jmp *%rax
1:
UNWIND_HINT_EMPTY
@@ -372,12 +377,7 @@ GLOBAL(name)
__INITDATA
NEXT_PGD_PAGE(early_top_pgt)
- .fill 511,8,0
-#ifdef CONFIG_X86_5LEVEL
- .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
-#else
- .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
-#endif
+ .fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts)
@@ -388,9 +388,9 @@ NEXT_PAGE(early_dynamic_pgts)
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PGD_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
+ .org init_top_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
- .org init_top_pgt + PGD_START_KERNEL*8, 0
+ .org init_top_pgt + L4_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 56d99be..2c3a1b4 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -140,6 +140,9 @@ static const __initconst struct idt_data apic_idts[] = {
# ifdef CONFIG_IRQ_WORK
INTG(IRQ_WORK_VECTOR, irq_work_interrupt),
# endif
+#ifdef CONFIG_X86_UV
+ INTG(UV_BAU_MESSAGE, uv_bau_message_intr1),
+#endif
INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt),
INTG(ERROR_APIC_VECTOR, error_interrupt),
#endif
@@ -160,7 +163,6 @@ static const __initconst struct idt_data early_pf_idts[] = {
*/
static const __initconst struct idt_data dbg_idts[] = {
INTG(X86_TRAP_DB, debug),
- INTG(X86_TRAP_BP, int3),
};
#endif
@@ -183,7 +185,6 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
static const __initconst struct idt_data ist_idts[] = {
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
- SISTG(X86_TRAP_BP, int3, DEBUG_STACK),
ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK),
#ifdef CONFIG_X86_MCE
ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 2f72330..0fe1c87 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -23,7 +23,7 @@
/*
* this changes the io permissions bitmap in the current task.
*/
-asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
struct thread_struct *t = &current->thread;
struct tss_struct *tss;
@@ -96,6 +96,11 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
return 0;
}
+SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
+{
+ return ksys_ioperm(from, num, turn_on);
+}
+
/*
* sys_iopl has to be used when you want to access the IO ports
* beyond the 0x3ff range: to get the full 65536 ports bitmapped
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 45fb4d2..328d027 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -150,6 +150,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
irq_stats(j)->irq_hv_reenlightenment_count);
seq_puts(p, " Hyper-V reenlightenment interrupts\n");
}
+ if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
+ seq_printf(p, "%*s: ", prec, "HVS");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ",
+ irq_stats(j)->hyperv_stimer0_count);
+ seq_puts(p, " Hyper-V stimer0 interrupts\n");
+ }
#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index a539410..772196c 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -61,9 +61,14 @@ void __init init_ISA_irqs(void)
struct irq_chip *chip = legacy_pic->chip;
int i;
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+ /*
+ * Try to set up the through-local-APIC virtual wire mode earlier.
+ *
+ * On some 32-bit UP machines, whose APIC has been disabled by BIOS
+ * and then got re-enabled by "lapic", it hangs at boot time without this.
+ */
init_bsp_APIC();
-#endif
+
legacy_pic->init(0);
for (i = 0; i < nr_legacy_irqs(); i++)
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index b68fd89..fa183a1 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -124,6 +124,14 @@ static int __init jailhouse_pci_arch_init(void)
if (pcibios_last_bus < 0)
pcibios_last_bus = 0xff;
+#ifdef CONFIG_PCI_MMCONFIG
+ if (setup_data.pci_mmconfig_base) {
+ pci_mmconfig_add(0, 0, pcibios_last_bus,
+ setup_data.pci_mmconfig_base);
+ pci_mmcfg_arch_init();
+ }
+#endif
+
return 0;
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index bd36f3c..0715f82 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler);
bool arch_within_kprobe_blacklist(unsigned long addr)
{
+ bool is_in_entry_trampoline_section = false;
+
+#ifdef CONFIG_X86_64
+ is_in_entry_trampoline_section =
+ (addr >= (unsigned long)__entry_trampoline_start &&
+ addr < (unsigned long)__entry_trampoline_end);
+#endif
return (addr >= (unsigned long)__kprobes_text_start &&
addr < (unsigned long)__kprobes_text_end) ||
(addr >= (unsigned long)__entry_text_start &&
- addr < (unsigned long)__entry_text_end);
+ addr < (unsigned long)__entry_text_end) ||
+ is_in_entry_trampoline_section;
}
int __init arch_init_kprobes(void)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4e37d1a..7867417 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -49,7 +49,7 @@
static int kvmapf = 1;
-static int parse_no_kvmapf(char *arg)
+static int __init parse_no_kvmapf(char *arg)
{
kvmapf = 0;
return 0;
@@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg)
early_param("no-kvmapf", parse_no_kvmapf);
static int steal_acc = 1;
-static int parse_no_stealacc(char *arg)
+static int __init parse_no_stealacc(char *arg)
{
steal_acc = 0;
return 0;
@@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
static int kvmclock_vsyscall = 1;
-static int parse_no_kvmclock_vsyscall(char *arg)
+static int __init parse_no_kvmclock_vsyscall(char *arg)
{
kvmclock_vsyscall = 0;
return 0;
@@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void)
#endif
pa |= KVM_ASYNC_PF_ENABLED;
- /* Async page fault support for L1 hypervisor is optional */
- if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
- (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
- wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
+ if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
+ pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
+
+ wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id());
@@ -454,6 +454,13 @@ static void __init sev_map_percpu_data(void)
}
#ifdef CONFIG_SMP
+static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
+{
+ native_smp_prepare_cpus(max_cpus);
+ if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+ static_branch_disable(&virt_spin_lock_key);
+}
+
static void __init kvm_smp_prepare_boot_cpu(void)
{
/*
@@ -545,7 +552,9 @@ static void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock;
}
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH))
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME))
pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others;
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -555,6 +564,7 @@ static void __init kvm_guest_init(void)
kvm_setup_vsyscall_timeinfo();
#ifdef CONFIG_SMP
+ smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
@@ -604,6 +614,11 @@ unsigned int kvm_arch_para_features(void)
return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
}
+unsigned int kvm_arch_para_hints(void)
+{
+ return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
+}
+
static uint32_t __init kvm_detect(void)
{
return kvm_cpuid_base();
@@ -633,7 +648,9 @@ static __init int kvm_setup_pv_tlb_flush(void)
{
int cpu;
- if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) {
+ if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
+ !kvm_para_has_hint(KVM_HINTS_DEDICATED) &&
+ kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu),
GFP_KERNEL, cpu_to_node(cpu));
@@ -728,6 +745,9 @@ void __init kvm_spinlock_init(void)
if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
return;
+ if (kvm_para_has_hint(KVM_HINTS_DEDICATED))
+ return;
+
__pv_init_lock_hash();
pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index edfede7..60cdec6 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -195,11 +195,11 @@ void machine_kexec(struct kimage *image)
/*
* We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
- * paths already have calls to disable_IO_APIC() in
- * one form or other. kexec jump path also need
- * one.
+ * paths already have calls to restore_boot_irq_mode()
+ * in one form or other. kexec jump path also need one.
*/
- disable_IO_APIC();
+ clear_IO_APIC();
+ restore_boot_irq_mode();
#endif
}
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 1f790cf..93bd4fb 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -293,11 +293,11 @@ void machine_kexec(struct kimage *image)
/*
* We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
- * paths already have calls to disable_IO_APIC() in
- * one form or other. kexec jump path also need
- * one.
+ * paths already have calls to restore_boot_irq_mode()
+ * in one form or other. kexec jump path also need one.
*/
- disable_IO_APIC();
+ clear_IO_APIC();
+ restore_boot_irq_mode();
#endif
}
@@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt);
+ VMCOREINFO_NUMBER(pgtable_l5_enabled);
#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
@@ -542,6 +543,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
goto overflow;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
value -= (u64)address;
*(u32 *)location = value;
break;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index da0c160..f58336a 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
goto overflow;
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
if (*(u32 *)loc != 0)
goto invalid_relocation;
val -= (u64)loc;
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 35c461f..bbfc8b1 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/crash_dump.h>
#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/bitmap.h>
#include <linux/pci_ids.h>
#include <linux/pci.h>
@@ -445,8 +446,6 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size,
npages = size >> PAGE_SHIFT;
order = get_order(size);
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
/* alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(flag, order);
if (!ret)
@@ -493,7 +492,7 @@ static const struct dma_map_ops calgary_dma_ops = {
.map_page = calgary_map_page,
.unmap_page = calgary_unmap_page,
.mapping_error = calgary_mapping_error,
- .dma_supported = x86_dma_supported,
+ .dma_supported = dma_direct_supported,
};
static inline void __iomem * busno_to_bbar(unsigned char num)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index df7ab02..77625b6 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -6,7 +6,6 @@
#include <linux/bootmem.h>
#include <linux/gfp.h>
#include <linux/pci.h>
-#include <linux/kmemleak.h>
#include <asm/proto.h>
#include <asm/dma.h>
@@ -18,7 +17,7 @@
static int forbid_dac __read_mostly;
-const struct dma_map_ops *dma_ops = &nommu_dma_ops;
+const struct dma_map_ops *dma_ops = &dma_direct_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -76,70 +75,12 @@ void __init pci_iommu_alloc(void)
}
}
}
-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs)
-{
- unsigned long dma_mask;
- struct page *page;
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- dma_addr_t addr;
-
- dma_mask = dma_alloc_coherent_mask(dev, flag);
-
-again:
- page = NULL;
- /* CMA can be used only in the context which permits sleeping */
- if (gfpflags_allow_blocking(flag)) {
- page = dma_alloc_from_contiguous(dev, count, get_order(size),
- flag);
- if (page) {
- addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dma_mask) {
- dma_release_from_contiguous(dev, page, count);
- page = NULL;
- }
- }
- }
- /* fallback */
- if (!page)
- page = alloc_pages_node(dev_to_node(dev), flag, get_order(size));
- if (!page)
- return NULL;
-
- addr = phys_to_dma(dev, page_to_phys(page));
- if (addr + size > dma_mask) {
- __free_pages(page, get_order(size));
-
- if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
- flag = (flag & ~GFP_DMA32) | GFP_DMA;
- goto again;
- }
-
- return NULL;
- }
- memset(page_address(page), 0, size);
- *dma_addr = addr;
- return page_address(page);
-}
-
-void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, unsigned long attrs)
-{
- unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct page *page = virt_to_page(vaddr);
-
- if (!dma_release_from_contiguous(dev, page, count))
- free_pages((unsigned long)vaddr, get_order(size));
-}
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
if (!*dev)
*dev = &x86_dma_fallback_dev;
- *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
-
if (!is_device_dma_capable(*dev))
return false;
return true;
@@ -245,16 +186,6 @@ int arch_dma_supported(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(arch_dma_supported);
-int x86_dma_supported(struct device *dev, u64 mask)
-{
- /* Copied from i386. Doesn't make much sense, because it will
- only work for pci_alloc_coherent.
- The caller just has to use GFP_DMA in this case. */
- if (mask < DMA_BIT_MASK(24))
- return 0;
- return 1;
-}
-
static int __init pci_iommu_init(void)
{
struct iommu_table_entry *p;
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 618285e..ac7ea3a 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
WARN_ON(size == 0);
if (!check_addr("map_single", dev, bus, size))
return NOMMU_MAPPING_ERROR;
- flush_write_buffers();
return bus;
}
@@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
return 0;
s->dma_length = s->length;
}
- flush_write_buffers();
return nents;
}
-static void nommu_sync_single_for_device(struct device *dev,
- dma_addr_t addr, size_t size,
- enum dma_data_direction dir)
-{
- flush_write_buffers();
-}
-
-
-static void nommu_sync_sg_for_device(struct device *dev,
- struct scatterlist *sg, int nelems,
- enum dma_data_direction dir)
-{
- flush_write_buffers();
-}
-
static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return dma_addr == NOMMU_MAPPING_ERROR;
@@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = {
.free = dma_generic_free_coherent,
.map_sg = nommu_map_sg,
.map_page = nommu_map_page,
- .sync_single_for_device = nommu_sync_single_for_device,
- .sync_sg_for_device = nommu_sync_sg_for_device,
.is_phys = 1,
.mapping_error = nommu_mapping_error,
.dma_supported = x86_dma_supported,
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 0ee0f8f3..6615836 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -17,52 +17,6 @@
int swiotlb __read_mostly;
-void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs)
-{
- void *vaddr;
-
- /*
- * Don't print a warning when the first allocation attempt fails.
- * swiotlb_alloc_coherent() will print a warning when the DMA
- * memory allocation ultimately failed.
- */
- flags |= __GFP_NOWARN;
-
- vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags,
- attrs);
- if (vaddr)
- return vaddr;
-
- return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
-}
-
-void x86_swiotlb_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
- swiotlb_free_coherent(dev, size, vaddr, dma_addr);
- else
- dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
-}
-
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
- .mapping_error = swiotlb_dma_mapping_error,
- .alloc = x86_swiotlb_alloc_coherent,
- .free = x86_swiotlb_free_coherent,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .dma_supported = NULL,
-};
-
/*
* pci_swiotlb_detect_override - set swiotlb to 1 if necessary
*
@@ -112,7 +66,7 @@ void __init pci_swiotlb_init(void)
{
if (swiotlb) {
swiotlb_init(0);
- dma_ops = &x86_swiotlb_dma_ops;
+ dma_ops = &swiotlb_dma_ops;
}
}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9eb448c..4b100fe 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -205,6 +205,20 @@ static __always_inline void save_fsgs(struct task_struct *task)
save_base_legacy(task, task->thread.gsindex, GS);
}
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * While a process is running,current->thread.fsbase and current->thread.gsbase
+ * may not match the corresponding CPU registers (see save_base_legacy()). KVM
+ * wants an efficient way to save and restore FSBASE and GSBASE.
+ * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE.
+ */
+void save_fsgs_for_kvm(void)
+{
+ save_fsgs(current);
+}
+EXPORT_SYMBOL_GPL(save_fsgs_for_kvm);
+#endif
+
static __always_inline void loadseg(enum which_selector which,
unsigned short sel)
{
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2126b9d..725624b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -666,7 +666,7 @@ void native_machine_shutdown(void)
* Even without the erratum, it still makes sense to quiet IO APIC
* before disabling Local APIC.
*/
- disable_IO_APIC();
+ clear_IO_APIC();
#endif
#ifdef CONFIG_SMP
@@ -680,6 +680,7 @@ void native_machine_shutdown(void)
#endif
lapic_shutdown();
+ restore_boot_irq_mode();
#ifdef CONFIG_HPET_TIMER
hpet_disable();
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 69ac9cb..f7b82ed 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -41,11 +41,11 @@ EXPORT_SYMBOL(rtc_lock);
*/
int mach_set_rtc_mmss(const struct timespec *now)
{
- unsigned long nowtime = now->tv_sec;
+ unsigned long long nowtime = now->tv_sec;
struct rtc_time tm;
int retval = 0;
- rtc_time_to_tm(nowtime, &tm);
+ rtc_time64_to_tm(nowtime, &tm);
if (!rtc_valid_tm(&tm)) {
retval = mc146818_set_time(&tm);
if (retval)
@@ -53,7 +53,7 @@ int mach_set_rtc_mmss(const struct timespec *now)
__func__, retval);
} else {
printk(KERN_ERR
- "%s: Invalid RTC value: write of %lx to RTC failed\n",
+ "%s: Invalid RTC value: write of %llx to RTC failed\n",
__func__, nowtime);
retval = -EINVAL;
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1ae67e9..6285697 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -189,9 +189,7 @@ struct ist_info ist_info;
#endif
#else
-struct cpuinfo_x86 boot_cpu_data __read_mostly = {
- .x86_phys_bits = MAX_PHYSMEM_BITS,
-};
+struct cpuinfo_x86 boot_cpu_data __read_mostly;
EXPORT_SYMBOL(boot_cpu_data);
#endif
@@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p)
__flush_tlb_all();
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
+ boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS;
#endif
/*
@@ -1204,20 +1203,13 @@ void __init setup_arch(char **cmdline_p)
kasan_init();
-#ifdef CONFIG_X86_32
- /* sync back kernel address range */
- clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- KERNEL_PGD_PTRS);
-
/*
- * sync back low identity map too. It is used for example
- * in the 32-bit EFI stub.
+ * Sync back kernel address range.
+ *
+ * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+ * this call?
*/
- clone_pgd_range(initial_page_table,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
+ sync_initial_page_table();
tboot_probe();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 497aa76..ea554f8 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void)
/* Setup cpu initialized, callin, callout masks */
setup_cpu_local_masks();
-#ifdef CONFIG_X86_32
/*
* Sync back kernel address range again. We already did this in
* setup_arch(), but percpu data also needs to be available in
* the smpboot asm. We can't reliably pick up percpu mappings
* using vmalloc_fault(), because exception dispatch needs
* percpu data.
+ *
+ * FIXME: Can the later sync in setup_cpu_entry_areas() replace
+ * this call?
*/
- clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- KERNEL_PGD_PTRS);
-
- /*
- * sync back low identity map too. It is used for example
- * in the 32-bit EFI stub.
- */
- clone_pgd_range(initial_page_table,
- swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
-#endif
+ sync_initial_page_table();
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4cdc0b2..da270b9 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -25,6 +25,7 @@
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
#include <linux/context_tracking.h>
+#include <linux/syscalls.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -601,7 +602,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_X86_32
-asmlinkage unsigned long sys_sigreturn(void)
+SYSCALL_DEFINE0(sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct sigframe __user *frame;
@@ -633,7 +634,7 @@ badframe:
}
#endif /* CONFIG_X86_32 */
-asmlinkage long sys_rt_sigreturn(void)
+SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index ac057f9..df92605 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -26,8 +26,8 @@ static inline void signal_compat_build_tests(void)
* new fields are handled in copy_siginfo_to_user32()!
*/
BUILD_BUG_ON(NSIGILL != 11);
- BUILD_BUG_ON(NSIGFPE != 13);
- BUILD_BUG_ON(NSIGSEGV != 4);
+ BUILD_BUG_ON(NSIGFPE != 14);
+ BUILD_BUG_ON(NSIGSEGV != 7);
BUILD_BUG_ON(NSIGBUS != 5);
BUILD_BUG_ON(NSIGTRAP != 4);
BUILD_BUG_ON(NSIGCHLD != 6);
@@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void)
BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int));
#define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name))
+ BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_code) != 8);
+
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code) != 8);
/*
* Ensure that the size of each si_field never changes.
* If it does, it is a sign that the
@@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void)
CHECK_CSI_SIZE (_kill, 2*sizeof(int));
CHECK_SI_SIZE (_kill, 2*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+
CHECK_CSI_OFFSET(_timer);
CHECK_CSI_SIZE (_timer, 3*sizeof(int));
CHECK_SI_SIZE (_timer, 6*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14);
+
CHECK_CSI_OFFSET(_rt);
CHECK_CSI_SIZE (_rt, 3*sizeof(int));
CHECK_SI_SIZE (_rt, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14);
+
CHECK_CSI_OFFSET(_sigchld);
CHECK_CSI_SIZE (_sigchld, 5*sizeof(int));
CHECK_SI_SIZE (_sigchld, 8*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_utime) != 0x20);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x28);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime) != 0x1C);
+
#ifdef CONFIG_X86_X32_ABI
CHECK_CSI_OFFSET(_sigchld_x32);
CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int));
/* no _sigchld_x32 in the generic siginfo_t */
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime) != 0x20);
#endif
CHECK_CSI_OFFSET(_sigfault);
CHECK_CSI_SIZE (_sigfault, 4*sizeof(int));
CHECK_SI_SIZE (_sigfault, 8*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18);
+
+ BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
+
CHECK_CSI_OFFSET(_sigpoll);
CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int));
CHECK_SI_SIZE (_sigpoll, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x18);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd) != 0x10);
+
CHECK_CSI_OFFSET(_sigsys);
CHECK_CSI_SIZE (_sigsys, 3*sizeof(int));
CHECK_SI_SIZE (_sigsys, 4*sizeof(int));
+ BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_syscall) != 0x18);
+ BUILD_BUG_ON(offsetof(siginfo_t, si_arch) != 0x1C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall) != 0x10);
+ BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch) != 0x14);
+
/* any new si_fields should be added here */
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9eee25d..ff99e2b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1437,6 +1437,7 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(topology_sibling_cpumask(cpu));
cpumask_clear(topology_core_cpumask(cpu));
c->cpu_core_id = 0;
+ c->booted_cores = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
recompute_smt_state();
}
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 676774b..a3f15ed 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -97,7 +97,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
if (off & ~PAGE_MASK)
goto out;
- error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+ error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
out:
return error;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 3d9b230..03f3d76 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -577,7 +577,6 @@ do_general_protection(struct pt_regs *regs, long error_code)
}
NOKPROBE_SYMBOL(do_general_protection);
-/* May run on IST stack. */
dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -592,6 +591,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
if (poke_int3_handler(regs))
return;
+ /*
+ * Use ist_enter despite the fact that we don't use an IST stack.
+ * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
+ * mode or even during context tracking state changes.
+ *
+ * This means that we can't schedule. That's okay.
+ */
ist_enter(regs);
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
@@ -609,15 +615,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
SIGTRAP) == NOTIFY_STOP)
goto exit;
- /*
- * Let others (NMI) know that the debug stack is in use
- * as we may switch to the interrupt stack.
- */
- debug_stack_usage_inc();
cond_local_irq_enable(regs);
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
cond_local_irq_disable(regs);
- debug_stack_usage_dec();
+
exit:
ist_exit(regs);
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index fb43027..ef32297 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1179,6 +1179,45 @@ struct system_counterval_t convert_art_to_tsc(u64 art)
}
EXPORT_SYMBOL(convert_art_to_tsc);
+/**
+ * convert_art_ns_to_tsc() - Convert ART in nanoseconds to TSC.
+ * @art_ns: ART (Always Running Timer) in unit of nanoseconds
+ *
+ * PTM requires all timestamps to be in units of nanoseconds. When user
+ * software requests a cross-timestamp, this function converts system timestamp
+ * to TSC.
+ *
+ * This is valid when CPU feature flag X86_FEATURE_TSC_KNOWN_FREQ is set
+ * indicating the tsc_khz is derived from CPUID[15H]. Drivers should check
+ * that this flag is set before conversion to TSC is attempted.
+ *
+ * Return:
+ * struct system_counterval_t - system counter value with the pointer to the
+ * corresponding clocksource
+ * @cycles: System counter value
+ * @cs: Clocksource corresponding to system counter value. Used
+ * by timekeeping code to verify comparibility of two cycle
+ * values.
+ */
+
+struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns)
+{
+ u64 tmp, res, rem;
+
+ rem = do_div(art_ns, USEC_PER_SEC);
+
+ res = art_ns * tsc_khz;
+ tmp = rem * tsc_khz;
+
+ do_div(tmp, USEC_PER_SEC);
+ res += tmp;
+
+ return (struct system_counterval_t) { .cs = art_related_clocksource,
+ .cycles = res};
+}
+EXPORT_SYMBOL(convert_art_ns_to_tsc);
+
+
static void tsc_refine_calibration_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
/**
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 1f9188f..feb28fe 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -5,7 +5,6 @@
#include <asm/unwind.h>
#include <asm/orc_types.h>
#include <asm/orc_lookup.h>
-#include <asm/sections.h>
#define orc_warn(fmt, ...) \
printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__)
@@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip)
}
/* vmlinux .init slow lookup: */
- if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext)
+ if (init_kernel_text(ip))
return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
__stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5edb27f..9d0b5af 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
return;
check_vip:
- if (VEFLAGS & X86_EFLAGS_VIP) {
+ if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) ==
+ (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) {
save_v86_state(regs, VM86_STI);
return;
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9b138a0..795f3a8 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -102,7 +102,6 @@ SECTIONS
_stext = .;
/* bootstrapping code */
HEAD_TEXT
- . = ALIGN(8);
TEXT_TEXT
SCHED_TEXT
CPUIDLE_TEXT
@@ -118,9 +117,11 @@ SECTIONS
#ifdef CONFIG_X86_64
. = ALIGN(PAGE_SIZE);
+ VMLINUX_SYMBOL(__entry_trampoline_start) = .;
_entry_trampoline = .;
*(.entry_trampoline)
. = ALIGN(PAGE_SIZE);
+ VMLINUX_SYMBOL(__entry_trampoline_end) = .;
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
#endif
@@ -198,7 +199,7 @@ SECTIONS
. = __vvar_beginning_hack + PAGE_SIZE;
} :data
- . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
+ . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
/* Init code and data - will be freed after init */
. = ALIGN(PAGE_SIZE);
@@ -366,8 +367,8 @@ SECTIONS
. = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */
_end = .;
- STABS_DEBUG
- DWARF_DEBUG
+ STABS_DEBUG
+ DWARF_DEBUG
/* Sections to be discarded */
DISCARDS
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 1151ccd..3ab8676 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -8,6 +8,7 @@
#include <linux/export.h>
#include <linux/pci.h>
+#include <asm/acpi.h>
#include <asm/bios_ebda.h>
#include <asm/paravirt.h>
#include <asm/pci_x86.h>
@@ -26,10 +27,11 @@
void x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
-int __init iommu_init_noop(void) { return 0; }
-void iommu_shutdown_noop(void) { }
-bool __init bool_x86_init_noop(void) { return false; }
-void x86_op_int_noop(int cpu) { }
+static int __init iommu_init_noop(void) { return 0; }
+static void iommu_shutdown_noop(void) { }
+static bool __init bool_x86_init_noop(void) { return false; }
+static void x86_op_int_noop(int cpu) { }
+static u64 u64_x86_init_noop(void) { return 0; }
/*
* The platform setup functions are preset with the default functions
@@ -90,6 +92,12 @@ struct x86_init_ops x86_init __initdata = {
.guest_late_init = x86_init_noop,
.x2apic_available = bool_x86_init_noop,
.init_mem_mapping = x86_init_noop,
+ .init_after_bootmem = x86_init_noop,
+ },
+
+ .acpi = {
+ .get_root_pointer = u64_x86_init_noop,
+ .reduced_hw_early_init = acpi_generic_reduced_hw_init,
},
};
@@ -146,7 +154,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
}
#endif
-struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = {
- .read = native_io_apic_read,
- .disable = native_disable_io_apic,
+struct x86_apic_ops x86_apic_ops __ro_after_init = {
+ .io_apic_read = native_io_apic_read,
+ .restore = native_restore_boot_irq_mode,
};
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index a0c5a69..82055b9 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
return -EINVAL;
}
+ best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+ if (kvm_hlt_in_guest(vcpu->kvm) && best &&
+ (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
+ best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+
/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_mmu_reset_context(vcpu);
@@ -370,7 +375,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
- F(TOPOEXT);
+ F(TOPOEXT) | F(PERFCTR_CORE);
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
@@ -607,7 +612,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
(1 << KVM_FEATURE_PV_UNHALT) |
- (1 << KVM_FEATURE_PV_TLB_FLUSH);
+ (1 << KVM_FEATURE_PV_TLB_FLUSH) |
+ (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d91eaeb..b3705ae 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,6 +30,7 @@
#include "x86.h"
#include "tss.h"
#include "mmu.h"
+#include "pmu.h"
/*
* Operand types
@@ -2887,6 +2888,9 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
return ctxt->ops->cpl(ctxt) > iopl;
}
+#define VMWARE_PORT_VMPORT (0x5658)
+#define VMWARE_PORT_VMRPC (0x5659)
+
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
u16 port, u16 len)
{
@@ -2898,6 +2902,14 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
unsigned mask = (1 << len) - 1;
unsigned long base;
+ /*
+ * VMware allows access to these ports even if denied
+ * by TSS I/O permission bitmap. Mimic behavior.
+ */
+ if (enable_vmware_backdoor &&
+ ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC)))
+ return true;
+
ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR);
if (!tr_seg.p)
return false;
@@ -4282,6 +4294,13 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
+ /*
+ * VMware allows access to these Pseduo-PMCs even when read via RDPMC
+ * in Ring3 when CR4.PCE=0.
+ */
+ if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx))
+ return X86EMUL_CONTINUE;
+
if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
ctxt->ops->check_pmc(ctxt, rcx))
return emulate_gp(ctxt, 0);
@@ -4498,6 +4517,10 @@ static const struct gprefix pfx_0f_2b = {
ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
};
+static const struct gprefix pfx_0f_10_0f_11 = {
+ I(Unaligned, em_mov), I(Unaligned, em_mov), N, N,
+};
+
static const struct gprefix pfx_0f_28_0f_29 = {
I(Aligned, em_mov), I(Aligned, em_mov), N, N,
};
@@ -4709,7 +4732,9 @@ static const struct opcode twobyte_table[256] = {
DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N,
N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N,
/* 0x10 - 0x1F */
- N, N, N, N, N, N, N, N,
+ GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11),
+ GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11),
+ N, N, N, N, N, N,
D(ImplicitOps | ModRM | SrcMem | NoAccess),
N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess),
/* 0x20 - 0x2F */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index dc97f25..98618e3 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -29,6 +29,7 @@
#include <linux/kvm_host.h>
#include <linux/highmem.h>
#include <linux/sched/cputime.h>
+#include <linux/eventfd.h>
#include <asm/apicdef.h>
#include <trace/events/kvm.h>
@@ -74,13 +75,38 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
return false;
}
+static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
+ int vector)
+{
+ if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
+ return;
+
+ if (synic_has_vector_connected(synic, vector))
+ __set_bit(vector, synic->vec_bitmap);
+ else
+ __clear_bit(vector, synic->vec_bitmap);
+
+ if (synic_has_vector_auto_eoi(synic, vector))
+ __set_bit(vector, synic->auto_eoi_bitmap);
+ else
+ __clear_bit(vector, synic->auto_eoi_bitmap);
+}
+
static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
u64 data, bool host)
{
- int vector;
+ int vector, old_vector;
+ bool masked;
vector = data & HV_SYNIC_SINT_VECTOR_MASK;
- if (vector < 16 && !host)
+ masked = data & HV_SYNIC_SINT_MASKED;
+
+ /*
+ * Valid vectors are 16-255, however, nested Hyper-V attempts to write
+ * default '0x10000' value on boot and this should not #GP. We need to
+ * allow zero-initing the register from host as well.
+ */
+ if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
return 1;
/*
* Guest may configure multiple SINTs to use the same vector, so
@@ -88,18 +114,13 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
* bitmap of vectors with auto-eoi behavior. The bitmaps are
* updated here, and atomically queried on fast paths.
*/
+ old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
atomic64_set(&synic->sint[sint], data);
- if (synic_has_vector_connected(synic, vector))
- __set_bit(vector, synic->vec_bitmap);
- else
- __clear_bit(vector, synic->vec_bitmap);
+ synic_update_vector(synic, old_vector);
- if (synic_has_vector_auto_eoi(synic, vector))
- __set_bit(vector, synic->auto_eoi_bitmap);
- else
- __clear_bit(vector, synic->auto_eoi_bitmap);
+ synic_update_vector(synic, vector);
/* Load SynIC vectors into EOI exit bitmap */
kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
@@ -736,6 +757,9 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_RESET:
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
r = true;
break;
}
@@ -981,6 +1005,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
kvm_make_request(KVM_REQ_HV_RESET, vcpu);
}
break;
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ hv->hv_reenlightenment_control = data;
+ break;
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ hv->hv_tsc_emulation_control = data;
+ break;
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
+ hv->hv_tsc_emulation_status = data;
+ break;
default:
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
@@ -1009,17 +1042,17 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
hv->vp_index = (u32)data;
break;
- case HV_X64_MSR_APIC_ASSIST_PAGE: {
+ case HV_X64_MSR_VP_ASSIST_PAGE: {
u64 gfn;
unsigned long addr;
- if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
+ if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv->hv_vapic = data;
if (kvm_lapic_enable_pv_eoi(vcpu, 0))
return 1;
break;
}
- gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+ gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
if (kvm_is_error_hva(addr))
return 1;
@@ -1105,6 +1138,15 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case HV_X64_MSR_RESET:
data = 0;
break;
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ data = hv->hv_reenlightenment_control;
+ break;
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ data = hv->hv_tsc_emulation_control;
+ break;
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
+ data = hv->hv_tsc_emulation_status;
+ break;
default:
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
return 1;
@@ -1129,7 +1171,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
- case HV_X64_MSR_APIC_ASSIST_PAGE:
+ case HV_X64_MSR_VP_ASSIST_PAGE:
data = hv->hv_vapic;
break;
case HV_X64_MSR_VP_RUNTIME:
@@ -1226,10 +1268,47 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
return 1;
}
+static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
+{
+ struct eventfd_ctx *eventfd;
+
+ if (unlikely(!fast)) {
+ int ret;
+ gpa_t gpa = param;
+
+ if ((gpa & (__alignof__(param) - 1)) ||
+ offset_in_page(gpa) + sizeof(param) > PAGE_SIZE)
+ return HV_STATUS_INVALID_ALIGNMENT;
+
+ ret = kvm_vcpu_read_guest(vcpu, gpa, &param, sizeof(param));
+ if (ret < 0)
+ return HV_STATUS_INVALID_ALIGNMENT;
+ }
+
+ /*
+ * Per spec, bits 32-47 contain the extra "flag number". However, we
+ * have no use for it, and in all known usecases it is zero, so just
+ * report lookup failure if it isn't.
+ */
+ if (param & 0xffff00000000ULL)
+ return HV_STATUS_INVALID_PORT_ID;
+ /* remaining bits are reserved-zero */
+ if (param & ~KVM_HYPERV_CONN_ID_MASK)
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ /* conn_to_evt is protected by vcpu->kvm->srcu */
+ eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param);
+ if (!eventfd)
+ return HV_STATUS_INVALID_PORT_ID;
+
+ eventfd_signal(eventfd, 1);
+ return HV_STATUS_SUCCESS;
+}
+
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
- u64 param, ingpa, outgpa, ret;
- uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
+ u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
+ uint16_t code, rep_idx, rep_cnt;
bool fast, longmode;
/*
@@ -1268,7 +1347,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
/* Hypercall continuation is not supported yet */
if (rep_cnt || rep_idx) {
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ ret = HV_STATUS_INVALID_HYPERCALL_CODE;
goto set_result;
}
@@ -1276,11 +1355,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
kvm_vcpu_on_spin(vcpu, true);
break;
- case HVCALL_POST_MESSAGE:
case HVCALL_SIGNAL_EVENT:
+ ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
+ if (ret != HV_STATUS_INVALID_PORT_ID)
+ break;
+ /* maybe userspace knows this conn_id: fall through */
+ case HVCALL_POST_MESSAGE:
/* don't bother userspace if it has no way to handle it */
if (!vcpu_to_synic(vcpu)->active) {
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
}
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
@@ -1292,12 +1375,79 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
kvm_hv_hypercall_complete_userspace;
return 0;
default:
- res = HV_STATUS_INVALID_HYPERCALL_CODE;
+ ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
}
set_result:
- ret = res | (((u64)rep_done & 0xfff) << 32);
kvm_hv_hypercall_set_result(vcpu, ret);
return 1;
}
+
+void kvm_hv_init_vm(struct kvm *kvm)
+{
+ mutex_init(&kvm->arch.hyperv.hv_lock);
+ idr_init(&kvm->arch.hyperv.conn_to_evt);
+}
+
+void kvm_hv_destroy_vm(struct kvm *kvm)
+{
+ struct eventfd_ctx *eventfd;
+ int i;
+
+ idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i)
+ eventfd_ctx_put(eventfd);
+ idr_destroy(&kvm->arch.hyperv.conn_to_evt);
+}
+
+static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
+{
+ struct kvm_hv *hv = &kvm->arch.hyperv;
+ struct eventfd_ctx *eventfd;
+ int ret;
+
+ eventfd = eventfd_ctx_fdget(fd);
+ if (IS_ERR(eventfd))
+ return PTR_ERR(eventfd);
+
+ mutex_lock(&hv->hv_lock);
+ ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
+ GFP_KERNEL);
+ mutex_unlock(&hv->hv_lock);
+
+ if (ret >= 0)
+ return 0;
+
+ if (ret == -ENOSPC)
+ ret = -EEXIST;
+ eventfd_ctx_put(eventfd);
+ return ret;
+}
+
+static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
+{
+ struct kvm_hv *hv = &kvm->arch.hyperv;
+ struct eventfd_ctx *eventfd;
+
+ mutex_lock(&hv->hv_lock);
+ eventfd = idr_remove(&hv->conn_to_evt, conn_id);
+ mutex_unlock(&hv->hv_lock);
+
+ if (!eventfd)
+ return -ENOENT;
+
+ synchronize_srcu(&kvm->srcu);
+ eventfd_ctx_put(eventfd);
+ return 0;
+}
+
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
+{
+ if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
+ (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
+ return -EINVAL;
+
+ if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
+ return kvm_hv_eventfd_deassign(kvm, args->conn_id);
+ return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
+}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index e637631..837465d 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -88,4 +88,8 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock);
+void kvm_hv_init_vm(struct kvm *kvm);
+void kvm_hv_destroy_vm(struct kvm *kvm);
+int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
+
#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index f171051..faa2648 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -73,8 +73,19 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
*/
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
{
+ /*
+ * FIXME: interrupt.injected represents an interrupt that it's
+ * side-effects have already been applied (e.g. bit from IRR
+ * already moved to ISR). Therefore, it is incorrect to rely
+ * on interrupt.injected to know if there is a pending
+ * interrupt in the user-mode LAPIC.
+ * This leads to nVMX/nSVM not be able to distinguish
+ * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
+ * pending interrupt or should re-inject an injected
+ * interrupt.
+ */
if (!lapic_in_kernel(v))
- return v->arch.interrupt.pending;
+ return v->arch.interrupt.injected;
if (kvm_cpu_has_extint(v))
return 1;
@@ -91,8 +102,19 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
+ /*
+ * FIXME: interrupt.injected represents an interrupt that it's
+ * side-effects have already been applied (e.g. bit from IRR
+ * already moved to ISR). Therefore, it is incorrect to rely
+ * on interrupt.injected to know if there is a pending
+ * interrupt in the user-mode LAPIC.
+ * This leads to nVMX/nSVM not be able to distinguish
+ * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
+ * pending interrupt or should re-inject an injected
+ * interrupt.
+ */
if (!lapic_in_kernel(v))
- return v->arch.interrupt.pending;
+ return v->arch.interrupt.injected;
if (kvm_cpu_has_extint(v))
return 1;
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f500293..9619dcc 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -41,7 +41,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
if (!test_bit(VCPU_EXREG_PDPTR,
(unsigned long *)&vcpu->arch.regs_avail))
- kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR);
+ kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR);
return vcpu->arch.walk_mmu->pdptrs[index];
}
@@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
{
vcpu->arch.hflags &= ~HF_GUEST_MASK;
+
+ if (vcpu->arch.load_eoi_exitmap_pending) {
+ vcpu->arch.load_eoi_exitmap_pending = false;
+ kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+ }
}
static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 924ac8c..70dcb55 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -321,8 +321,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;
+ /*
+ * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
+ * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
+ * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
+ * version first and level-triggered interrupts never get EOIed in
+ * IOAPIC.
+ */
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
+ if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
+ !ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
}
@@ -2002,14 +2010,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
{
- struct kvm_lapic *apic;
+ struct kvm_lapic *apic = vcpu->arch.apic;
int i;
- apic_debug("%s\n", __func__);
+ if (!apic)
+ return;
- ASSERT(vcpu);
- apic = vcpu->arch.apic;
- ASSERT(apic != NULL);
+ apic_debug("%s\n", __func__);
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2165,7 +2172,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
- kvm_lapic_reset(vcpu, false);
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
return 0;
@@ -2569,7 +2575,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
pe = xchg(&apic->pending_events, 0);
if (test_bit(KVM_APIC_INIT, &pe)) {
- kvm_lapic_reset(vcpu, true);
kvm_vcpu_reset(vcpu, true);
if (kvm_vcpu_is_bsp(apic->vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 56c3601..edce055 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -109,7 +109,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+ return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
}
int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 46ff304..8494dba 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
else
pte_access &= ~ACC_WRITE_MASK;
+ if (!kvm_is_mmio_pfn(pfn))
+ spte |= shadow_me_mask;
+
spte |= (u64)pfn << PAGE_SHIFT;
- spte |= shadow_me_mask;
if (pte_access & ACC_WRITE_MASK) {
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 5abae72..6288e9d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -452,14 +452,21 @@ error:
* done by is_rsvd_bits_set() above.
*
* We set up the value of exit_qualification to inject:
- * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation
+ * [2:0] - Derive from the access bits. The exit_qualification might be
+ * out of date if it is serving an EPT misconfiguration.
* [5:3] - Calculated by the page walk of the guest EPT page tables
* [7:8] - Derived from [7:8] of real exit_qualification
*
* The other bits are set to 0.
*/
if (!(errcode & PFERR_RSVD_MASK)) {
- vcpu->arch.exit_qualification &= 0x187;
+ vcpu->arch.exit_qualification &= 0x180;
+ if (write_fault)
+ vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE;
+ if (user_fault)
+ vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ;
+ if (fetch_fault)
+ vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR;
vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3;
}
#endif
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 026db42..58ead7d 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -244,12 +244,49 @@ int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx)
return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx);
}
+bool is_vmware_backdoor_pmc(u32 pmc_idx)
+{
+ switch (pmc_idx) {
+ case VMWARE_BACKDOOR_PMC_HOST_TSC:
+ case VMWARE_BACKDOOR_PMC_REAL_TIME:
+ case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
+ return true;
+ }
+ return false;
+}
+
+static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
+{
+ u64 ctr_val;
+
+ switch (idx) {
+ case VMWARE_BACKDOOR_PMC_HOST_TSC:
+ ctr_val = rdtsc();
+ break;
+ case VMWARE_BACKDOOR_PMC_REAL_TIME:
+ ctr_val = ktime_get_boot_ns();
+ break;
+ case VMWARE_BACKDOOR_PMC_APPARENT_TIME:
+ ctr_val = ktime_get_boot_ns() +
+ vcpu->kvm->arch.kvmclock_offset;
+ break;
+ default:
+ return 1;
+ }
+
+ *data = ctr_val;
+ return 0;
+}
+
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
{
bool fast_mode = idx & (1u << 31);
struct kvm_pmc *pmc;
u64 ctr_val;
+ if (is_vmware_backdoor_pmc(idx))
+ return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
+
pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx);
if (!pmc)
return 1;
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index a9a62b9..ba8898e 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -9,6 +9,10 @@
/* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */
#define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf)
+#define VMWARE_BACKDOOR_PMC_HOST_TSC 0x10000
+#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001
+#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002
+
struct kvm_event_hw_type_mapping {
u8 eventsel;
u8 unit_mask;
@@ -114,6 +118,8 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+bool is_vmware_backdoor_pmc(u32 pmc_idx);
+
extern struct kvm_pmu_ops intel_pmu_ops;
extern struct kvm_pmu_ops amd_pmu_ops;
#endif /* __KVM_X86_PMU_H */
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index cd94443..1495a73 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -19,6 +19,21 @@
#include "lapic.h"
#include "pmu.h"
+enum pmu_type {
+ PMU_TYPE_COUNTER = 0,
+ PMU_TYPE_EVNTSEL,
+};
+
+enum index {
+ INDEX_ZERO = 0,
+ INDEX_ONE,
+ INDEX_TWO,
+ INDEX_THREE,
+ INDEX_FOUR,
+ INDEX_FIVE,
+ INDEX_ERROR,
+};
+
/* duplicated from amd_perfmon_event_map, K7 and above should work. */
static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
@@ -31,6 +46,88 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
[7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
};
+static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type)
+{
+ struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
+ if (type == PMU_TYPE_COUNTER)
+ return MSR_F15H_PERF_CTR;
+ else
+ return MSR_F15H_PERF_CTL;
+ } else {
+ if (type == PMU_TYPE_COUNTER)
+ return MSR_K7_PERFCTR0;
+ else
+ return MSR_K7_EVNTSEL0;
+ }
+}
+
+static enum index msr_to_index(u32 msr)
+{
+ switch (msr) {
+ case MSR_F15H_PERF_CTL0:
+ case MSR_F15H_PERF_CTR0:
+ case MSR_K7_EVNTSEL0:
+ case MSR_K7_PERFCTR0:
+ return INDEX_ZERO;
+ case MSR_F15H_PERF_CTL1:
+ case MSR_F15H_PERF_CTR1:
+ case MSR_K7_EVNTSEL1:
+ case MSR_K7_PERFCTR1:
+ return INDEX_ONE;
+ case MSR_F15H_PERF_CTL2:
+ case MSR_F15H_PERF_CTR2:
+ case MSR_K7_EVNTSEL2:
+ case MSR_K7_PERFCTR2:
+ return INDEX_TWO;
+ case MSR_F15H_PERF_CTL3:
+ case MSR_F15H_PERF_CTR3:
+ case MSR_K7_EVNTSEL3:
+ case MSR_K7_PERFCTR3:
+ return INDEX_THREE;
+ case MSR_F15H_PERF_CTL4:
+ case MSR_F15H_PERF_CTR4:
+ return INDEX_FOUR;
+ case MSR_F15H_PERF_CTL5:
+ case MSR_F15H_PERF_CTR5:
+ return INDEX_FIVE;
+ default:
+ return INDEX_ERROR;
+ }
+}
+
+static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
+ enum pmu_type type)
+{
+ switch (msr) {
+ case MSR_F15H_PERF_CTL0:
+ case MSR_F15H_PERF_CTL1:
+ case MSR_F15H_PERF_CTL2:
+ case MSR_F15H_PERF_CTL3:
+ case MSR_F15H_PERF_CTL4:
+ case MSR_F15H_PERF_CTL5:
+ case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
+ if (type != PMU_TYPE_EVNTSEL)
+ return NULL;
+ break;
+ case MSR_F15H_PERF_CTR0:
+ case MSR_F15H_PERF_CTR1:
+ case MSR_F15H_PERF_CTR2:
+ case MSR_F15H_PERF_CTR3:
+ case MSR_F15H_PERF_CTR4:
+ case MSR_F15H_PERF_CTR5:
+ case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
+ if (type != PMU_TYPE_COUNTER)
+ return NULL;
+ break;
+ default:
+ return NULL;
+ }
+
+ return &pmu->gp_counters[msr_to_index(msr)];
+}
+
static unsigned amd_find_arch_event(struct kvm_pmu *pmu,
u8 event_select,
u8 unit_mask)
@@ -64,7 +161,18 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc)
static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
{
- return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + pmc_idx, MSR_K7_EVNTSEL0);
+ unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER);
+ struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) {
+ /*
+ * The idx is contiguous. The MSRs are not. The counter MSRs
+ * are interleaved with the event select MSRs.
+ */
+ pmc_idx *= 2;
+ }
+
+ return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
}
/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
@@ -96,8 +204,8 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int ret = false;
- ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) ||
- get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+ ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) ||
+ get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
return ret;
}
@@ -107,14 +215,14 @@ static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc;
- /* MSR_K7_PERFCTRn */
- pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
+ /* MSR_PERFCTRn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
*data = pmc_read_counter(pmc);
return 0;
}
- /* MSR_K7_EVNTSELn */
- pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+ /* MSR_EVNTSELn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
if (pmc) {
*data = pmc->eventsel;
return 0;
@@ -130,14 +238,14 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u32 msr = msr_info->index;
u64 data = msr_info->data;
- /* MSR_K7_PERFCTRn */
- pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0);
+ /* MSR_PERFCTRn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
if (pmc) {
pmc->counter += data - pmc_read_counter(pmc);
return 0;
}
- /* MSR_K7_EVNTSELn */
- pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
+ /* MSR_EVNTSELn */
+ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL);
if (pmc) {
if (data == pmc->eventsel)
return 0;
@@ -154,7 +262,11 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
- pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+ if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
+ pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE;
+ else
+ pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS;
+
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1;
pmu->reserved_bits = 0xffffffff00200000ull;
/* not applicable to AMD; but clean them to prevent any fall out */
@@ -169,7 +281,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- for (i = 0; i < AMD64_NUM_COUNTERS ; i++) {
+ BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
+
+ for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
@@ -181,7 +295,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;
- for (i = 0; i < AMD64_NUM_COUNTERS; i++) {
+ for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
struct kvm_pmc *pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3e488a..b58787d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -131,6 +131,28 @@ static const u32 host_save_user_msrs[] = {
#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
+struct kvm_sev_info {
+ bool active; /* SEV enabled guest */
+ unsigned int asid; /* ASID used for this guest */
+ unsigned int handle; /* SEV firmware handle */
+ int fd; /* SEV device fd */
+ unsigned long pages_locked; /* Number of pages locked */
+ struct list_head regions_list; /* List of registered regions */
+};
+
+struct kvm_svm {
+ struct kvm kvm;
+
+ /* Struct members for AVIC */
+ u32 avic_vm_id;
+ u32 ldr_mode;
+ struct page *avic_logical_id_table_page;
+ struct page *avic_physical_id_table_page;
+ struct hlist_node hnode;
+
+ struct kvm_sev_info sev_info;
+};
+
struct kvm_vcpu;
struct nested_state {
@@ -178,6 +200,8 @@ struct vcpu_svm {
uint64_t sysenter_eip;
uint64_t tsc_aux;
+ u64 msr_decfg;
+
u64 next_rip;
u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
@@ -274,6 +298,54 @@ static bool npt_enabled = true;
static bool npt_enabled;
#endif
+/*
+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
+ * pause_filter_count: On processors that support Pause filtering(indicated
+ * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter
+ * count value. On VMRUN this value is loaded into an internal counter.
+ * Each time a pause instruction is executed, this counter is decremented
+ * until it reaches zero at which time a #VMEXIT is generated if pause
+ * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause
+ * Intercept Filtering for more details.
+ * This also indicate if ple logic enabled.
+ *
+ * pause_filter_thresh: In addition, some processor families support advanced
+ * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on
+ * the amount of time a guest is allowed to execute in a pause loop.
+ * In this mode, a 16-bit pause filter threshold field is added in the
+ * VMCB. The threshold value is a cycle count that is used to reset the
+ * pause counter. As with simple pause filtering, VMRUN loads the pause
+ * count value from VMCB into an internal counter. Then, on each pause
+ * instruction the hardware checks the elapsed number of cycles since
+ * the most recent pause instruction against the pause filter threshold.
+ * If the elapsed cycle count is greater than the pause filter threshold,
+ * then the internal pause count is reloaded from the VMCB and execution
+ * continues. If the elapsed cycle count is less than the pause filter
+ * threshold, then the internal pause count is decremented. If the count
+ * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is
+ * triggered. If advanced pause filtering is supported and pause filter
+ * threshold field is set to zero, the filter will operate in the simpler,
+ * count only mode.
+ */
+
+static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
+module_param(pause_filter_thresh, ushort, 0444);
+
+static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
+module_param(pause_filter_count, ushort, 0444);
+
+/* Default doubles per-vcpu window every exit. */
+static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
+module_param(pause_filter_count_grow, ushort, 0444);
+
+/* Default resets per-vcpu window every exit to pause_filter_count. */
+static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(pause_filter_count_shrink, ushort, 0444);
+
+/* Default is to compute the maximum so we can never overflow. */
+static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
+module_param(pause_filter_count_max, ushort, 0444);
+
/* allow nested paging (virtualized MMU) for all guests */
static int npt = true;
module_param(npt, int, S_IRUGO);
@@ -300,6 +372,8 @@ module_param(vgif, int, 0444);
static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
module_param(sev, int, 0444);
+static u8 rsm_ins_bytes[] = "\x0f\xaa";
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
static void svm_complete_interrupts(struct vcpu_svm *svm);
@@ -348,6 +422,12 @@ struct enc_region {
unsigned long size;
};
+
+static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
+{
+ return container_of(kvm, struct kvm_svm, kvm);
+}
+
static inline bool svm_sev_enabled(void)
{
return max_sev_asid;
@@ -355,14 +435,14 @@ static inline bool svm_sev_enabled(void)
static inline bool sev_guest(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return sev->active;
}
static inline int sev_get_asid(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return sev->asid;
}
@@ -1079,7 +1159,7 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm)
}
/* Note:
- * This hash table is used to map VM_ID to a struct kvm_arch,
+ * This hash table is used to map VM_ID to a struct kvm_svm,
* when handling AMD IOMMU GALOG notification to schedule in
* a particular vCPU.
*/
@@ -1096,7 +1176,7 @@ static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
static int avic_ga_log_notifier(u32 ga_tag)
{
unsigned long flags;
- struct kvm_arch *ka = NULL;
+ struct kvm_svm *kvm_svm;
struct kvm_vcpu *vcpu = NULL;
u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
@@ -1104,13 +1184,10 @@ static int avic_ga_log_notifier(u32 ga_tag)
pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
- hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
- struct kvm *kvm = container_of(ka, struct kvm, arch);
- struct kvm_arch *vm_data = &kvm->arch;
-
- if (vm_data->avic_vm_id != vm_id)
+ hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
+ if (kvm_svm->avic_vm_id != vm_id)
continue;
- vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
+ vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
break;
}
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
@@ -1168,6 +1245,42 @@ err:
return rc;
}
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ int old = control->pause_filter_count;
+
+ control->pause_filter_count = __grow_ple_window(old,
+ pause_filter_count,
+ pause_filter_count_grow,
+ pause_filter_count_max);
+
+ if (control->pause_filter_count != old)
+ mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+
+ trace_kvm_ple_window_grow(vcpu->vcpu_id,
+ control->pause_filter_count, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ int old = control->pause_filter_count;
+
+ control->pause_filter_count =
+ __shrink_ple_window(old,
+ pause_filter_count,
+ pause_filter_count_shrink,
+ pause_filter_count);
+ if (control->pause_filter_count != old)
+ mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+
+ trace_kvm_ple_window_shrink(vcpu->vcpu_id,
+ control->pause_filter_count, old);
+}
+
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -1198,6 +1311,14 @@ static __init int svm_hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 32;
}
+ /* Check for pause filtering support */
+ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
+ pause_filter_count = 0;
+ pause_filter_thresh = 0;
+ } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
+ pause_filter_thresh = 0;
+ }
+
if (nested) {
printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
@@ -1324,10 +1445,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
static void avic_init_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb;
- struct kvm_arch *vm_data = &svm->vcpu.kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
- phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page));
- phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page));
+ phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
+ phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
@@ -1359,6 +1480,14 @@ static void init_vmcb(struct vcpu_svm *svm)
set_exception_intercept(svm, MC_VECTOR);
set_exception_intercept(svm, AC_VECTOR);
set_exception_intercept(svm, DB_VECTOR);
+ /*
+ * Guest access to VMware backdoor ports could legitimately
+ * trigger #GP because of TSS I/O permission bitmap.
+ * We intercept those #GP and allow access to them anyway
+ * as VMware does.
+ */
+ if (enable_vmware_backdoor)
+ set_exception_intercept(svm, GP_VECTOR);
set_intercept(svm, INTERCEPT_INTR);
set_intercept(svm, INTERCEPT_NMI);
@@ -1367,7 +1496,6 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_RDPMC);
set_intercept(svm, INTERCEPT_CPUID);
set_intercept(svm, INTERCEPT_INVD);
- set_intercept(svm, INTERCEPT_HLT);
set_intercept(svm, INTERCEPT_INVLPG);
set_intercept(svm, INTERCEPT_INVLPGA);
set_intercept(svm, INTERCEPT_IOIO_PROT);
@@ -1383,12 +1511,16 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_SKINIT);
set_intercept(svm, INTERCEPT_WBINVD);
set_intercept(svm, INTERCEPT_XSETBV);
+ set_intercept(svm, INTERCEPT_RSM);
- if (!kvm_mwait_in_guest()) {
+ if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
set_intercept(svm, INTERCEPT_MONITOR);
set_intercept(svm, INTERCEPT_MWAIT);
}
+ if (!kvm_hlt_in_guest(svm->vcpu.kvm))
+ set_intercept(svm, INTERCEPT_HLT);
+
control->iopm_base_pa = __sme_set(iopm_base);
control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
control->int_ctl = V_INTR_MASKING_MASK;
@@ -1444,9 +1576,13 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->nested.vmcb = 0;
svm->vcpu.arch.hflags = 0;
- if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
- control->pause_filter_count = 3000;
+ if (pause_filter_count) {
+ control->pause_filter_count = pause_filter_count;
+ if (pause_filter_thresh)
+ control->pause_filter_thresh = pause_filter_thresh;
set_intercept(svm, INTERCEPT_PAUSE);
+ } else {
+ clr_intercept(svm, INTERCEPT_PAUSE);
}
if (kvm_vcpu_apicv_active(&svm->vcpu))
@@ -1483,12 +1619,12 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
unsigned int index)
{
u64 *avic_physical_id_table;
- struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
return NULL;
- avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page);
+ avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
return &avic_physical_id_table[index];
}
@@ -1571,7 +1707,7 @@ static void __sev_asid_free(int asid)
static void sev_asid_free(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
__sev_asid_free(sev->asid);
}
@@ -1611,7 +1747,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long ulen, unsigned long *n,
int write)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
unsigned long npages, npinned, size;
unsigned long locked, lock_limit;
struct page **pages;
@@ -1662,7 +1798,7 @@ err:
static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
unsigned long npages)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
release_pages(pages, npages);
kvfree(pages);
@@ -1700,9 +1836,20 @@ static void __unregister_enc_region_locked(struct kvm *kvm,
kfree(region);
}
+static struct kvm *svm_vm_alloc(void)
+{
+ struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
+ return &kvm_svm->kvm;
+}
+
+static void svm_vm_free(struct kvm *kvm)
+{
+ kfree(to_kvm_svm(kvm));
+}
+
static void sev_vm_destroy(struct kvm *kvm)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct list_head *head = &sev->regions_list;
struct list_head *pos, *q;
@@ -1731,18 +1878,18 @@ static void sev_vm_destroy(struct kvm *kvm)
static void avic_vm_destroy(struct kvm *kvm)
{
unsigned long flags;
- struct kvm_arch *vm_data = &kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
if (!avic)
return;
- if (vm_data->avic_logical_id_table_page)
- __free_page(vm_data->avic_logical_id_table_page);
- if (vm_data->avic_physical_id_table_page)
- __free_page(vm_data->avic_physical_id_table_page);
+ if (kvm_svm->avic_logical_id_table_page)
+ __free_page(kvm_svm->avic_logical_id_table_page);
+ if (kvm_svm->avic_physical_id_table_page)
+ __free_page(kvm_svm->avic_physical_id_table_page);
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
- hash_del(&vm_data->hnode);
+ hash_del(&kvm_svm->hnode);
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
}
@@ -1756,10 +1903,10 @@ static int avic_vm_init(struct kvm *kvm)
{
unsigned long flags;
int err = -ENOMEM;
- struct kvm_arch *vm_data = &kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+ struct kvm_svm *k2;
struct page *p_page;
struct page *l_page;
- struct kvm_arch *ka;
u32 vm_id;
if (!avic)
@@ -1770,7 +1917,7 @@ static int avic_vm_init(struct kvm *kvm)
if (!p_page)
goto free_avic;
- vm_data->avic_physical_id_table_page = p_page;
+ kvm_svm->avic_physical_id_table_page = p_page;
clear_page(page_address(p_page));
/* Allocating logical APIC ID table (4KB) */
@@ -1778,7 +1925,7 @@ static int avic_vm_init(struct kvm *kvm)
if (!l_page)
goto free_avic;
- vm_data->avic_logical_id_table_page = l_page;
+ kvm_svm->avic_logical_id_table_page = l_page;
clear_page(page_address(l_page));
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
@@ -1790,15 +1937,13 @@ static int avic_vm_init(struct kvm *kvm)
}
/* Is it still in use? Only possible if wrapped at least once */
if (next_vm_id_wrapped) {
- hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
- struct kvm *k2 = container_of(ka, struct kvm, arch);
- struct kvm_arch *vd2 = &k2->arch;
- if (vd2->avic_vm_id == vm_id)
+ hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
+ if (k2->avic_vm_id == vm_id)
goto again;
}
}
- vm_data->avic_vm_id = vm_id;
- hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
+ kvm_svm->avic_vm_id = vm_id;
+ hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
return 0;
@@ -1902,6 +2047,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u32 dummy;
u32 eax = 1;
+ vcpu->arch.microcode_version = 0x01000065;
svm->spec_ctrl = 0;
if (!init_event) {
@@ -2529,14 +2675,7 @@ static int bp_interception(struct vcpu_svm *svm)
static int ud_interception(struct vcpu_svm *svm)
{
- int er;
-
- er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
- if (er == EMULATE_USER_EXIT)
- return 0;
- if (er != EMULATE_DONE)
- kvm_queue_exception(&svm->vcpu, UD_VECTOR);
- return 1;
+ return handle_ud(&svm->vcpu);
}
static int ac_interception(struct vcpu_svm *svm)
@@ -2545,6 +2684,23 @@ static int ac_interception(struct vcpu_svm *svm)
return 1;
}
+static int gp_interception(struct vcpu_svm *svm)
+{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
+ u32 error_code = svm->vmcb->control.exit_info_1;
+ int er;
+
+ WARN_ON_ONCE(!enable_vmware_backdoor);
+
+ er = emulate_instruction(vcpu,
+ EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
+ else if (er != EMULATE_DONE)
+ kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+ return 1;
+}
+
static bool is_erratum_383(void)
{
int err, i;
@@ -2633,7 +2789,7 @@ static int io_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
- int size, in, string, ret;
+ int size, in, string;
unsigned port;
++svm->vcpu.stat.io_exits;
@@ -2645,16 +2801,8 @@ static int io_interception(struct vcpu_svm *svm)
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
svm->next_rip = svm->vmcb->control.exit_info_2;
- ret = kvm_skip_emulated_instruction(&svm->vcpu);
- /*
- * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
- * KVM_EXIT_DEBUG here.
- */
- if (in)
- return kvm_fast_pio_in(vcpu, size, port) && ret;
- else
- return kvm_fast_pio_out(vcpu, size, port) && ret;
+ return kvm_fast_pio(&svm->vcpu, size, port, in);
}
static int nmi_interception(struct vcpu_svm *svm)
@@ -3699,6 +3847,12 @@ static int emulate_on_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
+static int rsm_interception(struct vcpu_svm *svm)
+{
+ return x86_emulate_instruction(&svm->vcpu, 0, 0,
+ rsm_ins_bytes, 2) == EMULATE_DONE;
+}
+
static int rdpmc_interception(struct vcpu_svm *svm)
{
int err;
@@ -3860,6 +4014,22 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
+static int svm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ msr->data = 0;
+
+ switch (msr->index) {
+ case MSR_F10H_DECFG:
+ if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
+ msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3935,9 +4105,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->spec_ctrl;
break;
- case MSR_IA32_UCODE_REV:
- msr_info->data = 0x01000065;
- break;
case MSR_F15H_IC_CFG: {
int family, model;
@@ -3955,6 +4122,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0x1E;
}
break;
+ case MSR_F10H_DECFG:
+ msr_info->data = svm->msr_decfg;
+ break;
default:
return kvm_get_msr_common(vcpu, msr_info);
}
@@ -4133,6 +4303,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_F10H_DECFG: {
+ struct kvm_msr_entry msr_entry;
+
+ msr_entry.index = msr->index;
+ if (svm_get_msr_feature(&msr_entry))
+ return 1;
+
+ /* Check the supported bits */
+ if (data & ~msr_entry.data)
+ return 1;
+
+ /* Don't allow the guest to change a bit, #GP */
+ if (!msr->host_initiated && (data ^ msr_entry.data))
+ return 1;
+
+ svm->msr_decfg = data;
+ break;
+ }
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
@@ -4187,6 +4375,9 @@ static int pause_interception(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu;
bool in_kernel = (svm_get_cpl(vcpu) == 0);
+ if (pause_filter_thresh)
+ grow_ple_window(vcpu);
+
kvm_vcpu_on_spin(vcpu, in_kernel);
return 1;
}
@@ -4277,7 +4468,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
{
- struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
int index;
u32 *logical_apic_id_table;
int dlid = GET_APIC_LOGICAL_ID(ldr);
@@ -4299,7 +4490,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
index = (cluster << 2) + apic;
}
- logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
+ logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
return &logical_apic_id_table[index];
}
@@ -4379,7 +4570,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- struct kvm_arch *vm_data = &vcpu->kvm->arch;
+ struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
u32 mod = (dfr >> 28) & 0xf;
@@ -4388,11 +4579,11 @@ static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
* If this changes, we need to flush the AVIC logical
* APID id table.
*/
- if (vm_data->ldr_mode == mod)
+ if (kvm_svm->ldr_mode == mod)
return 0;
- clear_page(page_address(vm_data->avic_logical_id_table_page));
- vm_data->ldr_mode = mod;
+ clear_page(page_address(kvm_svm->avic_logical_id_table_page));
+ kvm_svm->ldr_mode = mod;
if (svm->ldr_reg)
avic_handle_ldr_update(vcpu);
@@ -4512,6 +4703,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
[SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception,
[SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception,
+ [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception,
[SVM_EXIT_INTR] = intr_interception,
[SVM_EXIT_NMI] = nmi_interception,
[SVM_EXIT_SMI] = nop_on_interception,
@@ -4541,7 +4733,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
[SVM_EXIT_NPF] = npf_interception,
- [SVM_EXIT_RSM] = emulate_on_interception,
+ [SVM_EXIT_RSM] = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception,
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception,
};
@@ -4560,6 +4752,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
+ pr_err("%-20s%d\n", "pause filter threshold:",
+ control->pause_filter_thresh);
pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
@@ -5027,7 +5221,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
/* Try to enable guest_mode in IRTE */
pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
AVIC_HPA_MASK);
- pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
+ pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
svm->vcpu.vcpu_id);
pi.is_guest_mode = true;
pi.vcpu_data = &vcpu_info;
@@ -5191,6 +5385,11 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return 0;
}
+static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+{
+ return 0;
+}
+
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -5355,7 +5554,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* being speculatively taken.
*/
if (svm->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5464,11 +5663,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
- rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (svm->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
@@ -5492,14 +5691,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
- kvm_before_handle_nmi(&svm->vcpu);
+ kvm_before_interrupt(&svm->vcpu);
stgi();
/* Any pending NMI will happen here */
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
- kvm_after_handle_nmi(&svm->vcpu);
+ kvm_after_interrupt(&svm->vcpu);
sync_cr8_to_lapic(vcpu);
@@ -5875,6 +6074,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
+ if (pause_filter_thresh)
+ shrink_ple_window(vcpu);
}
static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
@@ -5991,7 +6192,7 @@ static int sev_asid_new(void)
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
int asid, ret;
ret = -EBUSY;
@@ -6056,14 +6257,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error)
static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
return __sev_issue_cmd(sev->fd, id, data, error);
}
static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_start *start;
struct kvm_sev_launch_start params;
void *dh_blob, *session_blob;
@@ -6161,7 +6362,7 @@ static int get_num_contig_pages(int idx, struct page **inpages,
static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_launch_update_data params;
struct sev_data_launch_update_data *data;
struct page **inpages;
@@ -6236,16 +6437,18 @@ e_free:
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ void __user *measure = (void __user *)(uintptr_t)argp->data;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_measure *data;
struct kvm_sev_launch_measure params;
+ void __user *p = NULL;
void *blob = NULL;
int ret;
if (!sev_guest(kvm))
return -ENOTTY;
- if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+ if (copy_from_user(&params, measure, sizeof(params)))
return -EFAULT;
data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -6256,17 +6459,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!params.len)
goto cmd;
- if (params.uaddr) {
+ p = (void __user *)(uintptr_t)params.uaddr;
+ if (p) {
if (params.len > SEV_FW_BLOB_MAX_SIZE) {
ret = -EINVAL;
goto e_free;
}
- if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
- ret = -EFAULT;
- goto e_free;
- }
-
ret = -ENOMEM;
blob = kmalloc(params.len, GFP_KERNEL);
if (!blob)
@@ -6290,13 +6489,13 @@ cmd:
goto e_free_blob;
if (blob) {
- if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+ if (copy_to_user(p, blob, params.len))
ret = -EFAULT;
}
done:
params.len = data->len;
- if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+ if (copy_to_user(measure, &params, sizeof(params)))
ret = -EFAULT;
e_free_blob:
kfree(blob);
@@ -6307,7 +6506,7 @@ e_free:
static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_finish *data;
int ret;
@@ -6327,7 +6526,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct kvm_sev_guest_status params;
struct sev_data_guest_status *data;
int ret;
@@ -6359,7 +6558,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
unsigned long dst, int size,
int *error, bool enc)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_dbg *data;
int ret;
@@ -6591,13 +6790,13 @@ err:
static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct sev_data_launch_secret *data;
struct kvm_sev_launch_secret params;
struct page **pages;
void *blob, *hdr;
unsigned long n;
- int ret;
+ int ret, offset;
if (!sev_guest(kvm))
return -ENOTTY;
@@ -6623,6 +6822,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (!data)
goto e_unpin_memory;
+ offset = params.guest_uaddr & (PAGE_SIZE - 1);
+ data->guest_address = __sme_page_pa(pages[0]) + offset;
+ data->guest_len = params.guest_len;
+
blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
if (IS_ERR(blob)) {
ret = PTR_ERR(blob);
@@ -6637,8 +6840,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
ret = PTR_ERR(hdr);
goto e_free_blob;
}
- data->trans_address = __psp_pa(blob);
- data->trans_len = params.trans_len;
+ data->hdr_address = __psp_pa(hdr);
+ data->hdr_len = params.hdr_len;
data->handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
@@ -6711,7 +6914,7 @@ out:
static int svm_register_enc_region(struct kvm *kvm,
struct kvm_enc_region *range)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct enc_region *region;
int ret = 0;
@@ -6753,7 +6956,7 @@ e_free:
static struct enc_region *
find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
{
- struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
struct list_head *head = &sev->regions_list;
struct enc_region *i;
@@ -6811,6 +7014,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_free = svm_free_vcpu,
.vcpu_reset = svm_vcpu_reset,
+ .vm_alloc = svm_vm_alloc,
+ .vm_free = svm_vm_free,
.vm_init = avic_vm_init,
.vm_destroy = svm_vm_destroy,
@@ -6821,6 +7026,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.vcpu_unblocking = svm_vcpu_unblocking,
.update_bp_intercept = update_bp_intercept,
+ .get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
.get_segment_base = svm_get_segment_base,
@@ -6876,6 +7082,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.apicv_post_state_restore = avic_post_state_restore,
.set_tss_addr = svm_set_tss_addr,
+ .set_identity_map_addr = svm_set_identity_map_addr,
.get_tdp_level = get_npt_level,
.get_mt_mask = svm_get_mt_mask,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dec126..aafcc98 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -52,9 +52,11 @@
#include <asm/irq_remapping.h>
#include <asm/mmu_context.h>
#include <asm/nospec-branch.h>
+#include <asm/mshyperv.h>
#include "trace.h"
#include "pmu.h"
+#include "vmx_evmcs.h"
#define __ex(x) __kvm_handle_fault_on_reboot(x)
#define __ex_clear(x, reg) \
@@ -130,13 +132,15 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#endif
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
-#define KVM_VM_CR0_ALWAYS_ON \
- (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
+#define KVM_VM_CR0_ALWAYS_ON \
+ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \
+ X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
+#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -165,34 +169,33 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
* Time is measured based on a counter that runs at the same rate as the TSC,
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
-#define KVM_VMX_DEFAULT_PLE_GAP 128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
-#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
-#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
-#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
- INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
-static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
-module_param(ple_gap, int, S_IRUGO);
-
-static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
-module_param(ple_window, int, S_IRUGO);
+static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
+module_param(ple_window, uint, 0444);
/* Default doubles per-vcpu window every exit. */
-static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
-module_param(ple_window_grow, int, S_IRUGO);
+static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, uint, 0444);
/* Default resets per-vcpu window every exit to ple_window. */
-static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
-module_param(ple_window_shrink, int, S_IRUGO);
+static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, uint, 0444);
/* Default is to compute the maximum so we can never overflow. */
-static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
-static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
-module_param(ple_window_max, int, S_IRUGO);
+static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, uint, 0444);
extern const ulong vmx_return;
+struct kvm_vmx {
+ struct kvm kvm;
+
+ unsigned int tss_addr;
+ bool ept_identity_pagetable_done;
+ gpa_t ept_identity_map_addr;
+};
+
#define NR_AUTOLOAD_MSRS 8
struct vmcs {
@@ -424,6 +427,35 @@ struct __packed vmcs12 {
*/
#define VMCS12_MAX_FIELD_INDEX 0x17
+struct nested_vmx_msrs {
+ /*
+ * We only store the "true" versions of the VMX capability MSRs. We
+ * generate the "non-true" versions by setting the must-be-1 bits
+ * according to the SDM.
+ */
+ u32 procbased_ctls_low;
+ u32 procbased_ctls_high;
+ u32 secondary_ctls_low;
+ u32 secondary_ctls_high;
+ u32 pinbased_ctls_low;
+ u32 pinbased_ctls_high;
+ u32 exit_ctls_low;
+ u32 exit_ctls_high;
+ u32 entry_ctls_low;
+ u32 entry_ctls_high;
+ u32 misc_low;
+ u32 misc_high;
+ u32 ept_caps;
+ u32 vpid_caps;
+ u64 basic;
+ u64 cr0_fixed0;
+ u64 cr0_fixed1;
+ u64 cr4_fixed0;
+ u64 cr4_fixed1;
+ u64 vmcs_enum;
+ u64 vmfunc_controls;
+};
+
/*
* The nested_vmx structure is part of vcpu_vmx, and holds information we need
* for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -475,32 +507,7 @@ struct nested_vmx {
u16 vpid02;
u16 last_vpid;
- /*
- * We only store the "true" versions of the VMX capability MSRs. We
- * generate the "non-true" versions by setting the must-be-1 bits
- * according to the SDM.
- */
- u32 nested_vmx_procbased_ctls_low;
- u32 nested_vmx_procbased_ctls_high;
- u32 nested_vmx_secondary_ctls_low;
- u32 nested_vmx_secondary_ctls_high;
- u32 nested_vmx_pinbased_ctls_low;
- u32 nested_vmx_pinbased_ctls_high;
- u32 nested_vmx_exit_ctls_low;
- u32 nested_vmx_exit_ctls_high;
- u32 nested_vmx_entry_ctls_low;
- u32 nested_vmx_entry_ctls_high;
- u32 nested_vmx_misc_low;
- u32 nested_vmx_misc_high;
- u32 nested_vmx_ept_caps;
- u32 nested_vmx_vpid_caps;
- u64 nested_vmx_basic;
- u64 nested_vmx_cr0_fixed0;
- u64 nested_vmx_cr0_fixed1;
- u64 nested_vmx_cr4_fixed0;
- u64 nested_vmx_cr4_fixed1;
- u64 nested_vmx_vmcs_enum;
- u64 nested_vmx_vmfunc_controls;
+ struct nested_vmx_msrs msrs;
/* SMM related state */
struct {
@@ -691,6 +698,11 @@ enum segment_cache_field {
SEG_FIELD_NR = 4
};
+static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
+{
+ return container_of(kvm, struct kvm_vmx, kvm);
+}
+
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
{
return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -953,6 +965,7 @@ static struct vmcs_config {
u32 cpu_based_2nd_exec_ctrl;
u32 vmexit_ctrl;
u32 vmentry_ctrl;
+ struct nested_vmx_msrs nested;
} vmcs_config;
static struct vmx_capability {
@@ -999,6 +1012,169 @@ static const u32 vmx_msr_index[] = {
MSR_EFER, MSR_TSC_AUX, MSR_STAR,
};
+DEFINE_STATIC_KEY_FALSE(enable_evmcs);
+
+#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs))
+
+#define KVM_EVMCS_VERSION 1
+
+#if IS_ENABLED(CONFIG_HYPERV)
+static bool __read_mostly enlightened_vmcs = true;
+module_param(enlightened_vmcs, bool, 0444);
+
+static inline void evmcs_write64(unsigned long field, u64 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u64 *)((char *)current_evmcs + offset) = value;
+
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline void evmcs_write32(unsigned long field, u32 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u32 *)((char *)current_evmcs + offset) = value;
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline void evmcs_write16(unsigned long field, u16 value)
+{
+ u16 clean_field;
+ int offset = get_evmcs_offset(field, &clean_field);
+
+ if (offset < 0)
+ return;
+
+ *(u16 *)((char *)current_evmcs + offset) = value;
+ current_evmcs->hv_clean_fields &= ~clean_field;
+}
+
+static inline u64 evmcs_read64(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u64 *)((char *)current_evmcs + offset);
+}
+
+static inline u32 evmcs_read32(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u32 *)((char *)current_evmcs + offset);
+}
+
+static inline u16 evmcs_read16(unsigned long field)
+{
+ int offset = get_evmcs_offset(field, NULL);
+
+ if (offset < 0)
+ return 0;
+
+ return *(u16 *)((char *)current_evmcs + offset);
+}
+
+static void evmcs_load(u64 phys_addr)
+{
+ struct hv_vp_assist_page *vp_ap =
+ hv_get_vp_assist_page(smp_processor_id());
+
+ vp_ap->current_nested_vmcs = phys_addr;
+ vp_ap->enlighten_vmentry = 1;
+}
+
+static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
+{
+ /*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ * POSTED_INTR_NV = 0x00000002,
+ * GUEST_INTR_STATUS = 0x00000810,
+ * APIC_ACCESS_ADDR = 0x00002014,
+ * POSTED_INTR_DESC_ADDR = 0x00002016,
+ * EOI_EXIT_BITMAP0 = 0x0000201c,
+ * EOI_EXIT_BITMAP1 = 0x0000201e,
+ * EOI_EXIT_BITMAP2 = 0x00002020,
+ * EOI_EXIT_BITMAP3 = 0x00002022,
+ */
+ vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+ vmcs_conf->cpu_based_2nd_exec_ctrl &=
+ ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+ vmcs_conf->cpu_based_2nd_exec_ctrl &=
+ ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmcs_conf->cpu_based_2nd_exec_ctrl &=
+ ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
+
+ /*
+ * GUEST_PML_INDEX = 0x00000812,
+ * PML_ADDRESS = 0x0000200e,
+ */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
+
+ /* VM_FUNCTION_CONTROL = 0x00002018, */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
+
+ /*
+ * EPTP_LIST_ADDRESS = 0x00002024,
+ * VMREAD_BITMAP = 0x00002026,
+ * VMWRITE_BITMAP = 0x00002028,
+ */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
+
+ /*
+ * TSC_MULTIPLIER = 0x00002032,
+ */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
+
+ /*
+ * PLE_GAP = 0x00004020,
+ * PLE_WINDOW = 0x00004022,
+ */
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
+
+ /*
+ * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ */
+ vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+
+ /*
+ * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ */
+ vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+ vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+
+ /*
+ * Currently unsupported in KVM:
+ * GUEST_IA32_RTIT_CTL = 0x00002814,
+ */
+}
+#else /* !IS_ENABLED(CONFIG_HYPERV) */
+static inline void evmcs_write64(unsigned long field, u64 value) {}
+static inline void evmcs_write32(unsigned long field, u32 value) {}
+static inline void evmcs_write16(unsigned long field, u16 value) {}
+static inline u64 evmcs_read64(unsigned long field) { return 0; }
+static inline u32 evmcs_read32(unsigned long field) { return 0; }
+static inline u16 evmcs_read16(unsigned long field) { return 0; }
+static inline void evmcs_load(u64 phys_addr) {}
+static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
+#endif /* IS_ENABLED(CONFIG_HYPERV) */
+
static inline bool is_exception_n(u32 intr_info, u8 vector)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -1031,6 +1207,11 @@ static inline bool is_invalid_opcode(u32 intr_info)
return is_exception_n(intr_info, UD_VECTOR);
}
+static inline bool is_gp_fault(u32 intr_info)
+{
+ return is_exception_n(intr_info, GP_VECTOR);
+}
+
static inline bool is_external_interrupt(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1044,6 +1225,13 @@ static inline bool is_machine_check(u32 intr_info)
(INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK);
}
+/* Undocumented: icebp/int1 */
+static inline bool is_icebp(u32 intr_info)
+{
+ return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+ == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK);
+}
+
static inline bool cpu_has_vmx_msr_bitmap(void)
{
return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS;
@@ -1313,7 +1501,7 @@ static inline bool report_flexpriority(void)
static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
{
- return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
+ return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
}
static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
@@ -1334,6 +1522,16 @@ static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
PIN_BASED_VMX_PREEMPTION_TIMER;
}
+static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
+{
+ return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
+}
+
+static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
+{
+ return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
+}
+
static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
{
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1472,6 +1670,9 @@ static void vmcs_load(struct vmcs *vmcs)
u64 phys_addr = __pa(vmcs);
u8 error;
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_load(phys_addr);
+
asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0"
: "=qm"(error) : "a"(&phys_addr), "m"(phys_addr)
: "cc", "memory");
@@ -1645,18 +1846,24 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
static __always_inline u16 vmcs_read16(unsigned long field)
{
vmcs_check16(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_read16(field);
return __vmcs_readl(field);
}
static __always_inline u32 vmcs_read32(unsigned long field)
{
vmcs_check32(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_read32(field);
return __vmcs_readl(field);
}
static __always_inline u64 vmcs_read64(unsigned long field)
{
vmcs_check64(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_read64(field);
#ifdef CONFIG_X86_64
return __vmcs_readl(field);
#else
@@ -1667,6 +1874,8 @@ static __always_inline u64 vmcs_read64(unsigned long field)
static __always_inline unsigned long vmcs_readl(unsigned long field)
{
vmcs_checkl(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_read64(field);
return __vmcs_readl(field);
}
@@ -1690,18 +1899,27 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val
static __always_inline void vmcs_write16(unsigned long field, u16 value)
{
vmcs_check16(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write16(field, value);
+
__vmcs_writel(field, value);
}
static __always_inline void vmcs_write32(unsigned long field, u32 value)
{
vmcs_check32(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write32(field, value);
+
__vmcs_writel(field, value);
}
static __always_inline void vmcs_write64(unsigned long field, u64 value)
{
vmcs_check64(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write64(field, value);
+
__vmcs_writel(field, value);
#ifndef CONFIG_X86_64
asm volatile ("");
@@ -1712,6 +1930,9 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
{
vmcs_checkl(field);
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write64(field, value);
+
__vmcs_writel(field, value);
}
@@ -1719,6 +1940,9 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
"vmcs_clear_bits does not support 64-bit fields");
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write32(field, evmcs_read32(field) & ~mask);
+
__vmcs_writel(field, __vmcs_readl(field) & ~mask);
}
@@ -1726,6 +1950,9 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
{
BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
"vmcs_set_bits does not support 64-bit fields");
+ if (static_branch_unlikely(&enable_evmcs))
+ return evmcs_write32(field, evmcs_read32(field) | mask);
+
__vmcs_writel(field, __vmcs_readl(field) | mask);
}
@@ -1857,6 +2084,14 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << DB_VECTOR) | (1u << AC_VECTOR);
+ /*
+ * Guest access to VMware backdoor ports could legitimately
+ * trigger #GP because of TSS I/O permission bitmap.
+ * We intercept those #GP and allow access to them anyway
+ * as VMware does.
+ */
+ if (enable_vmware_backdoor)
+ eb |= (1u << GP_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -2122,6 +2357,9 @@ static unsigned long segment_base(u16 selector)
static void vmx_save_host_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+#ifdef CONFIG_X86_64
+ int cpu = raw_smp_processor_id();
+#endif
int i;
if (vmx->host_state.loaded)
@@ -2134,7 +2372,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
*/
vmx->host_state.ldt_sel = kvm_read_ldt();
vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+
+#ifdef CONFIG_X86_64
+ save_fsgs_for_kvm();
+ vmx->host_state.fs_sel = current->thread.fsindex;
+ vmx->host_state.gs_sel = current->thread.gsindex;
+#else
savesegment(fs, vmx->host_state.fs_sel);
+ savesegment(gs, vmx->host_state.gs_sel);
+#endif
if (!(vmx->host_state.fs_sel & 7)) {
vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
vmx->host_state.fs_reload_needed = 0;
@@ -2142,7 +2388,6 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
vmcs_write16(HOST_FS_SELECTOR, 0);
vmx->host_state.fs_reload_needed = 1;
}
- savesegment(gs, vmx->host_state.gs_sel);
if (!(vmx->host_state.gs_sel & 7))
vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
else {
@@ -2153,20 +2398,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
savesegment(ds, vmx->host_state.ds_sel);
savesegment(es, vmx->host_state.es_sel);
-#endif
-#ifdef CONFIG_X86_64
- vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
- vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
-#else
- vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
- vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
-#endif
+ vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
+ vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
-#ifdef CONFIG_X86_64
- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+ vmx->msr_host_kernel_gs_base = current->thread.gsbase;
if (is_long_mode(&vmx->vcpu))
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
+#else
+ vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
+ vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
#endif
if (boot_cpu_has(X86_FEATURE_MPX))
rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
@@ -2525,6 +2766,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
return 0;
}
+static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Ensure that we clear the HLT state in the VMCS. We don't need to
+ * explicitly skip the instruction because if the HLT state is set,
+ * then the instruction is already executing and RIP has already been
+ * advanced.
+ */
+ if (kvm_hlt_in_guest(vcpu->kvm) &&
+ vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
+ vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+}
+
static void vmx_queue_exception(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2547,6 +2801,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
return;
}
+ WARN_ON_ONCE(vmx->emulation_required);
+
if (kvm_exception_is_soft(nr)) {
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmx->vcpu.arch.event_exit_inst_len);
@@ -2555,6 +2811,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
intr_info |= INTR_TYPE_HARD_EXCEPTION;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+
+ vmx_clear_hlt(vcpu);
}
static bool vmx_rdtscp_supported(void)
@@ -2682,8 +2940,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
* bit in the high half is on if the corresponding bit in the control field
* may be on. See also vmx_control_verify().
*/
-static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
+static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
{
+ if (!nested) {
+ memset(msrs, 0, sizeof(*msrs));
+ return;
+ }
+
/*
* Note that as a general rule, the high half of the MSRs (bits in
* the control fields which may be 1) should be initialized by the
@@ -2701,70 +2964,68 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
/* pin-based controls */
rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
- vmx->nested.nested_vmx_pinbased_ctls_low,
- vmx->nested.nested_vmx_pinbased_ctls_high);
- vmx->nested.nested_vmx_pinbased_ctls_low |=
+ msrs->pinbased_ctls_low,
+ msrs->pinbased_ctls_high);
+ msrs->pinbased_ctls_low |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
- vmx->nested.nested_vmx_pinbased_ctls_high &=
+ msrs->pinbased_ctls_high &=
PIN_BASED_EXT_INTR_MASK |
PIN_BASED_NMI_EXITING |
- PIN_BASED_VIRTUAL_NMIS;
- vmx->nested.nested_vmx_pinbased_ctls_high |=
+ PIN_BASED_VIRTUAL_NMIS |
+ (apicv ? PIN_BASED_POSTED_INTR : 0);
+ msrs->pinbased_ctls_high |=
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
PIN_BASED_VMX_PREEMPTION_TIMER;
- if (kvm_vcpu_apicv_active(&vmx->vcpu))
- vmx->nested.nested_vmx_pinbased_ctls_high |=
- PIN_BASED_POSTED_INTR;
/* exit controls */
rdmsr(MSR_IA32_VMX_EXIT_CTLS,
- vmx->nested.nested_vmx_exit_ctls_low,
- vmx->nested.nested_vmx_exit_ctls_high);
- vmx->nested.nested_vmx_exit_ctls_low =
+ msrs->exit_ctls_low,
+ msrs->exit_ctls_high);
+ msrs->exit_ctls_low =
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
- vmx->nested.nested_vmx_exit_ctls_high &=
+ msrs->exit_ctls_high &=
#ifdef CONFIG_X86_64
VM_EXIT_HOST_ADDR_SPACE_SIZE |
#endif
VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
- vmx->nested.nested_vmx_exit_ctls_high |=
+ msrs->exit_ctls_high |=
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
if (kvm_mpx_supported())
- vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
+ msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* We support free control of debug control saving. */
- vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+ msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
- vmx->nested.nested_vmx_entry_ctls_low,
- vmx->nested.nested_vmx_entry_ctls_high);
- vmx->nested.nested_vmx_entry_ctls_low =
+ msrs->entry_ctls_low,
+ msrs->entry_ctls_high);
+ msrs->entry_ctls_low =
VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
- vmx->nested.nested_vmx_entry_ctls_high &=
+ msrs->entry_ctls_high &=
#ifdef CONFIG_X86_64
VM_ENTRY_IA32E_MODE |
#endif
VM_ENTRY_LOAD_IA32_PAT;
- vmx->nested.nested_vmx_entry_ctls_high |=
+ msrs->entry_ctls_high |=
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
if (kvm_mpx_supported())
- vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
+ msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* We support free control of debug control loading. */
- vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+ msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
- vmx->nested.nested_vmx_procbased_ctls_low,
- vmx->nested.nested_vmx_procbased_ctls_high);
- vmx->nested.nested_vmx_procbased_ctls_low =
+ msrs->procbased_ctls_low,
+ msrs->procbased_ctls_high);
+ msrs->procbased_ctls_low =
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
- vmx->nested.nested_vmx_procbased_ctls_high &=
+ msrs->procbased_ctls_high &=
CPU_BASED_VIRTUAL_INTR_PENDING |
CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
@@ -2784,12 +3045,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* can use it to avoid exits to L1 - even when L0 runs L2
* without MSR bitmaps.
*/
- vmx->nested.nested_vmx_procbased_ctls_high |=
+ msrs->procbased_ctls_high |=
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
CPU_BASED_USE_MSR_BITMAPS;
/* We support free control of CR3 access interception. */
- vmx->nested.nested_vmx_procbased_ctls_low &=
+ msrs->procbased_ctls_low &=
~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
/*
@@ -2797,10 +3058,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* depend on CPUID bits, they are added later by vmx_cpuid_update.
*/
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
- vmx->nested.nested_vmx_secondary_ctls_low,
- vmx->nested.nested_vmx_secondary_ctls_high);
- vmx->nested.nested_vmx_secondary_ctls_low = 0;
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ msrs->secondary_ctls_low,
+ msrs->secondary_ctls_high);
+ msrs->secondary_ctls_low = 0;
+ msrs->secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
@@ -2810,33 +3071,33 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT;
- vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
+ msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
if (cpu_has_vmx_ept_execute_only())
- vmx->nested.nested_vmx_ept_caps |=
+ msrs->ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT;
- vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+ msrs->ept_caps &= vmx_capability.ept;
+ msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
VMX_EPT_1GB_PAGE_BIT;
if (enable_ept_ad_bits) {
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_PML;
- vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+ msrs->ept_caps |= VMX_EPT_AD_BIT;
}
}
if (cpu_has_vmx_vmfunc()) {
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_VMFUNC;
/*
* Advertise EPTP switching unconditionally
* since we emulate it
*/
if (enable_ept)
- vmx->nested.nested_vmx_vmfunc_controls =
+ msrs->vmfunc_controls =
VMX_VMFUNC_EPTP_SWITCHING;
}
@@ -2847,25 +3108,25 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* not failing the single-context invvpid, and it is worse.
*/
if (enable_vpid) {
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_VPID;
- vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+ msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
VMX_VPID_EXTENT_SUPPORTED_MASK;
}
if (enable_unrestricted_guest)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ msrs->secondary_ctls_high |=
SECONDARY_EXEC_UNRESTRICTED_GUEST;
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC,
- vmx->nested.nested_vmx_misc_low,
- vmx->nested.nested_vmx_misc_high);
- vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
- vmx->nested.nested_vmx_misc_low |=
+ msrs->misc_low,
+ msrs->misc_high);
+ msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
+ msrs->misc_low |=
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
VMX_MISC_ACTIVITY_HLT;
- vmx->nested.nested_vmx_misc_high = 0;
+ msrs->misc_high = 0;
/*
* This MSR reports some information about VMX support. We
@@ -2873,14 +3134,14 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* guest, and the VMCS structure we give it - not about the
* VMX support of the underlying hardware.
*/
- vmx->nested.nested_vmx_basic =
+ msrs->basic =
VMCS12_REVISION |
VMX_BASIC_TRUE_CTLS |
((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
if (cpu_has_vmx_basic_inout())
- vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
+ msrs->basic |= VMX_BASIC_INOUT;
/*
* These MSRs specify bits which the guest must keep fixed on
@@ -2889,15 +3150,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
*/
#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
- vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
- vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
+ msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
+ msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
/* These MSRs specify bits which the guest must keep fixed off. */
- rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
- rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
+ rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
/* highest index: VMX_PREEMPTION_TIMER_VALUE */
- vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
+ msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
}
/*
@@ -2934,7 +3195,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
/* reserved */
BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
- u64 vmx_basic = vmx->nested.nested_vmx_basic;
+ u64 vmx_basic = vmx->nested.msrs.basic;
if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
return -EINVAL;
@@ -2953,7 +3214,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
return -EINVAL;
- vmx->nested.nested_vmx_basic = data;
+ vmx->nested.msrs.basic = data;
return 0;
}
@@ -2965,24 +3226,24 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
switch (msr_index) {
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
- lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
- highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
+ lowp = &vmx->nested.msrs.pinbased_ctls_low;
+ highp = &vmx->nested.msrs.pinbased_ctls_high;
break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
- lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
- highp = &vmx->nested.nested_vmx_procbased_ctls_high;
+ lowp = &vmx->nested.msrs.procbased_ctls_low;
+ highp = &vmx->nested.msrs.procbased_ctls_high;
break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
- lowp = &vmx->nested.nested_vmx_exit_ctls_low;
- highp = &vmx->nested.nested_vmx_exit_ctls_high;
+ lowp = &vmx->nested.msrs.exit_ctls_low;
+ highp = &vmx->nested.msrs.exit_ctls_high;
break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
- lowp = &vmx->nested.nested_vmx_entry_ctls_low;
- highp = &vmx->nested.nested_vmx_entry_ctls_high;
+ lowp = &vmx->nested.msrs.entry_ctls_low;
+ highp = &vmx->nested.msrs.entry_ctls_high;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
- lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
- highp = &vmx->nested.nested_vmx_secondary_ctls_high;
+ lowp = &vmx->nested.msrs.secondary_ctls_low;
+ highp = &vmx->nested.msrs.secondary_ctls_high;
break;
default:
BUG();
@@ -3013,13 +3274,13 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
GENMASK_ULL(13, 9) | BIT_ULL(31);
u64 vmx_misc;
- vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
- vmx->nested.nested_vmx_misc_high);
+ vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+ vmx->nested.msrs.misc_high);
if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
return -EINVAL;
- if ((vmx->nested.nested_vmx_pinbased_ctls_high &
+ if ((vmx->nested.msrs.pinbased_ctls_high &
PIN_BASED_VMX_PREEMPTION_TIMER) &&
vmx_misc_preemption_timer_rate(data) !=
vmx_misc_preemption_timer_rate(vmx_misc))
@@ -3034,8 +3295,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
return -EINVAL;
- vmx->nested.nested_vmx_misc_low = data;
- vmx->nested.nested_vmx_misc_high = data >> 32;
+ vmx->nested.msrs.misc_low = data;
+ vmx->nested.msrs.misc_high = data >> 32;
return 0;
}
@@ -3043,15 +3304,15 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
{
u64 vmx_ept_vpid_cap;
- vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
- vmx->nested.nested_vmx_vpid_caps);
+ vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+ vmx->nested.msrs.vpid_caps);
/* Every bit is either reserved or a feature bit. */
if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
return -EINVAL;
- vmx->nested.nested_vmx_ept_caps = data;
- vmx->nested.nested_vmx_vpid_caps = data >> 32;
+ vmx->nested.msrs.ept_caps = data;
+ vmx->nested.msrs.vpid_caps = data >> 32;
return 0;
}
@@ -3061,10 +3322,10 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
switch (msr_index) {
case MSR_IA32_VMX_CR0_FIXED0:
- msr = &vmx->nested.nested_vmx_cr0_fixed0;
+ msr = &vmx->nested.msrs.cr0_fixed0;
break;
case MSR_IA32_VMX_CR4_FIXED0:
- msr = &vmx->nested.nested_vmx_cr4_fixed0;
+ msr = &vmx->nested.msrs.cr4_fixed0;
break;
default:
BUG();
@@ -3128,7 +3389,7 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
case MSR_IA32_VMX_EPT_VPID_CAP:
return vmx_restore_vmx_ept_vpid_cap(vmx, data);
case MSR_IA32_VMX_VMCS_ENUM:
- vmx->nested.nested_vmx_vmcs_enum = data;
+ vmx->nested.msrs.vmcs_enum = data;
return 0;
default:
/*
@@ -3139,77 +3400,75 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
}
/* Returns 0 on success, non-0 otherwise. */
-static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
switch (msr_index) {
case MSR_IA32_VMX_BASIC:
- *pdata = vmx->nested.nested_vmx_basic;
+ *pdata = msrs->basic;
break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_pinbased_ctls_low,
- vmx->nested.nested_vmx_pinbased_ctls_high);
+ msrs->pinbased_ctls_low,
+ msrs->pinbased_ctls_high);
if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
case MSR_IA32_VMX_PROCBASED_CTLS:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_procbased_ctls_low,
- vmx->nested.nested_vmx_procbased_ctls_high);
+ msrs->procbased_ctls_low,
+ msrs->procbased_ctls_high);
if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
case MSR_IA32_VMX_EXIT_CTLS:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_exit_ctls_low,
- vmx->nested.nested_vmx_exit_ctls_high);
+ msrs->exit_ctls_low,
+ msrs->exit_ctls_high);
if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
case MSR_IA32_VMX_ENTRY_CTLS:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_entry_ctls_low,
- vmx->nested.nested_vmx_entry_ctls_high);
+ msrs->entry_ctls_low,
+ msrs->entry_ctls_high);
if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_MISC:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_misc_low,
- vmx->nested.nested_vmx_misc_high);
+ msrs->misc_low,
+ msrs->misc_high);
break;
case MSR_IA32_VMX_CR0_FIXED0:
- *pdata = vmx->nested.nested_vmx_cr0_fixed0;
+ *pdata = msrs->cr0_fixed0;
break;
case MSR_IA32_VMX_CR0_FIXED1:
- *pdata = vmx->nested.nested_vmx_cr0_fixed1;
+ *pdata = msrs->cr0_fixed1;
break;
case MSR_IA32_VMX_CR4_FIXED0:
- *pdata = vmx->nested.nested_vmx_cr4_fixed0;
+ *pdata = msrs->cr4_fixed0;
break;
case MSR_IA32_VMX_CR4_FIXED1:
- *pdata = vmx->nested.nested_vmx_cr4_fixed1;
+ *pdata = msrs->cr4_fixed1;
break;
case MSR_IA32_VMX_VMCS_ENUM:
- *pdata = vmx->nested.nested_vmx_vmcs_enum;
+ *pdata = msrs->vmcs_enum;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
*pdata = vmx_control_msr(
- vmx->nested.nested_vmx_secondary_ctls_low,
- vmx->nested.nested_vmx_secondary_ctls_high);
+ msrs->secondary_ctls_low,
+ msrs->secondary_ctls_high);
break;
case MSR_IA32_VMX_EPT_VPID_CAP:
- *pdata = vmx->nested.nested_vmx_ept_caps |
- ((u64)vmx->nested.nested_vmx_vpid_caps << 32);
+ *pdata = msrs->ept_caps |
+ ((u64)msrs->vpid_caps << 32);
break;
case MSR_IA32_VMX_VMFUNC:
- *pdata = vmx->nested.nested_vmx_vmfunc_controls;
+ *pdata = msrs->vmfunc_controls;
break;
default:
return 1;
@@ -3226,6 +3485,20 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
return !(val & ~valid_bits);
}
+static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ switch (msr->index) {
+ case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+ if (!nested)
+ return 1;
+ return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
@@ -3297,7 +3570,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
return 1;
- return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
+ return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
+ &msr_info->data);
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
return 1;
@@ -3590,6 +3864,14 @@ static int hardware_enable(void)
if (cr4_read_shadow() & X86_CR4_VMXE)
return -EBUSY;
+ /*
+ * This can happen if we hot-added a CPU but failed to allocate
+ * VP assist page for it.
+ */
+ if (static_branch_unlikely(&enable_evmcs) &&
+ !hv_get_vp_assist_page(cpu))
+ return -EFAULT;
+
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
@@ -3688,6 +3970,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
u32 _vmexit_control = 0;
u32 _vmentry_control = 0;
+ memset(vmcs_conf, 0, sizeof(*vmcs_conf));
min = CPU_BASED_HLT_EXITING |
#ifdef CONFIG_X86_64
CPU_BASED_CR8_LOAD_EXITING |
@@ -3698,13 +3981,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
+ CPU_BASED_MWAIT_EXITING |
+ CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING |
CPU_BASED_RDPMC_EXITING;
- if (!kvm_mwait_in_guest())
- min |= CPU_BASED_MWAIT_EXITING |
- CPU_BASED_MONITOR_EXITING;
-
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3823,7 +4104,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmcs_conf->size = vmx_msr_high & 0x1fff;
vmcs_conf->order = get_order(vmcs_conf->size);
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
- vmcs_conf->revision_id = vmx_msr_low;
+
+ /* KVM supports Enlightened VMCS v1 only */
+ if (static_branch_unlikely(&enable_evmcs))
+ vmcs_conf->revision_id = KVM_EVMCS_VERSION;
+ else
+ vmcs_conf->revision_id = vmx_msr_low;
vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
@@ -3831,6 +4117,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
vmcs_conf->vmexit_ctrl = _vmexit_control;
vmcs_conf->vmentry_ctrl = _vmentry_control;
+ if (static_branch_unlikely(&enable_evmcs))
+ evmcs_sanitize_exec_ctrls(vmcs_conf);
+
cpu_has_load_ia32_efer =
allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
VM_ENTRY_LOAD_IA32_EFER)
@@ -4150,6 +4439,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
{
unsigned long flags;
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
@@ -4165,13 +4455,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
* Very old userspace does not call KVM_SET_TSS_ADDR before entering
* vcpu. Warn the user that an update is overdue.
*/
- if (!vcpu->kvm->arch.tss_addr)
+ if (!kvm_vmx->tss_addr)
printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
"called before entering vcpu\n");
vmx_segment_cache_clear(vmx);
- vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
+ vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
@@ -4279,7 +4569,7 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
{
- if (enable_ept && is_paging(vcpu))
+ if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
}
@@ -4327,11 +4617,11 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
- u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
- u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
+ if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_UNRESTRICTED_GUEST &&
nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
@@ -4341,16 +4631,16 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
- u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
- u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
return fixed_bits_valid(val, fixed0, fixed1);
}
static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
- u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0;
- u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1;
+ u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
return fixed_bits_valid(val, fixed0, fixed1);
}
@@ -4416,7 +4706,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
#endif
- if (enable_ept)
+ if (enable_ept && !enable_unrestricted_guest)
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -4457,10 +4747,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (enable_ept) {
eptp = construct_eptp(vcpu, cr3);
vmcs_write64(EPT_POINTER, eptp);
- if (is_paging(vcpu) || is_guest_mode(vcpu))
+ if (enable_unrestricted_guest || is_paging(vcpu) ||
+ is_guest_mode(vcpu))
guest_cr3 = kvm_read_cr3(vcpu);
else
- guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
+ guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr;
ept_load_pdptrs(vcpu);
}
@@ -4475,17 +4766,22 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* is in force while we are in guest mode. Do not let guests control
* this bit, even if host CR4.MCE == 0.
*/
- unsigned long hw_cr4 =
- (cr4_read_shadow() & X86_CR4_MCE) |
- (cr4 & ~X86_CR4_MCE) |
- (to_vmx(vcpu)->rmode.vm86_active ?
- KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+ unsigned long hw_cr4;
+
+ hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
+ if (enable_unrestricted_guest)
+ hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
+ else if (to_vmx(vcpu)->rmode.vm86_active)
+ hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
+ else
+ hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
hw_cr4 &= ~X86_CR4_UMIP;
- } else
+ } else if (!is_guest_mode(vcpu) ||
+ !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
@@ -4504,16 +4800,17 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
vcpu->arch.cr4 = cr4;
- if (enable_ept) {
- if (!is_paging(vcpu)) {
- hw_cr4 &= ~X86_CR4_PAE;
- hw_cr4 |= X86_CR4_PSE;
- } else if (!(cr4 & X86_CR4_PAE)) {
- hw_cr4 &= ~X86_CR4_PAE;
+
+ if (!enable_unrestricted_guest) {
+ if (enable_ept) {
+ if (!is_paging(vcpu)) {
+ hw_cr4 &= ~X86_CR4_PAE;
+ hw_cr4 |= X86_CR4_PSE;
+ } else if (!(cr4 & X86_CR4_PAE)) {
+ hw_cr4 &= ~X86_CR4_PAE;
+ }
}
- }
- if (!enable_unrestricted_guest && !is_paging(vcpu))
/*
* SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
* hardware. To emulate this behavior, SMEP/SMAP/PKU needs
@@ -4525,7 +4822,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* If enable_unrestricted_guest, the CPU automatically
* disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
*/
- hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+ if (!is_paging(vcpu))
+ hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+ }
vmcs_writel(CR4_READ_SHADOW, cr4);
vmcs_writel(GUEST_CR4, hw_cr4);
@@ -4893,7 +5192,7 @@ static int init_rmode_tss(struct kvm *kvm)
int idx, r;
idx = srcu_read_lock(&kvm->srcu);
- fn = kvm->arch.tss_addr >> PAGE_SHIFT;
+ fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
if (r < 0)
goto out;
@@ -4919,22 +5218,23 @@ out:
static int init_rmode_identity_map(struct kvm *kvm)
{
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
int i, idx, r = 0;
kvm_pfn_t identity_map_pfn;
u32 tmp;
- /* Protect kvm->arch.ept_identity_pagetable_done. */
+ /* Protect kvm_vmx->ept_identity_pagetable_done. */
mutex_lock(&kvm->slots_lock);
- if (likely(kvm->arch.ept_identity_pagetable_done))
+ if (likely(kvm_vmx->ept_identity_pagetable_done))
goto out2;
- if (!kvm->arch.ept_identity_map_addr)
- kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
- identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+ if (!kvm_vmx->ept_identity_map_addr)
+ kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+ identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
- kvm->arch.ept_identity_map_addr, PAGE_SIZE);
+ kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
if (r < 0)
goto out2;
@@ -4951,7 +5251,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
if (r < 0)
goto out;
}
- kvm->arch.ept_identity_pagetable_done = true;
+ kvm_vmx->ept_identity_pagetable_done = true;
out:
srcu_read_unlock(&kvm->srcu, idx);
@@ -5487,6 +5787,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
exec_control |= CPU_BASED_CR3_STORE_EXITING |
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_INVLPG_EXITING;
+ if (kvm_mwait_in_guest(vmx->vcpu.kvm))
+ exec_control &= ~(CPU_BASED_MWAIT_EXITING |
+ CPU_BASED_MONITOR_EXITING);
+ if (kvm_hlt_in_guest(vmx->vcpu.kvm))
+ exec_control &= ~CPU_BASED_HLT_EXITING;
return exec_control;
}
@@ -5520,7 +5825,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
}
if (!enable_unrestricted_guest)
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
- if (!ple_gap)
+ if (kvm_pause_in_guest(vmx->vcpu.kvm))
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
if (!kvm_vcpu_apicv_active(vcpu))
exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -5552,10 +5857,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (xsaves_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_XSAVES;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_XSAVES;
}
}
@@ -5567,10 +5872,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (rdtscp_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_RDTSCP;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_RDTSCP;
}
}
@@ -5588,10 +5893,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (invpcid_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_INVPCID;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_ENABLE_INVPCID;
}
}
@@ -5603,10 +5908,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (rdrand_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_RDRAND_EXITING;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_RDRAND_EXITING;
}
}
@@ -5618,10 +5923,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
if (nested) {
if (rdseed_enabled)
- vmx->nested.nested_vmx_secondary_ctls_high |=
+ vmx->nested.msrs.secondary_ctls_high |=
SECONDARY_EXEC_RDSEED_EXITING;
else
- vmx->nested.nested_vmx_secondary_ctls_high &=
+ vmx->nested.msrs.secondary_ctls_high &=
~SECONDARY_EXEC_RDSEED_EXITING;
}
}
@@ -5683,7 +5988,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
}
- if (ple_gap) {
+ if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
vmcs_write32(PLE_GAP, ple_gap);
vmx->ple_window = ple_window;
vmx->ple_window_dirty = true;
@@ -5765,6 +6070,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->rmode.vm86_active = 0;
vmx->spec_ctrl = 0;
+ vcpu->arch.microcode_version = 0x100000000ULL;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -5847,6 +6153,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
update_exception_bitmap(vcpu);
vpid_sync_context(vmx->vpid);
+ if (init_event)
+ vmx_clear_hlt(vcpu);
}
/*
@@ -5871,8 +6179,7 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
{
- return get_vmcs12(vcpu)->pin_based_vm_exec_control &
- PIN_BASED_NMI_EXITING;
+ return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
}
static void enable_irq_window(struct kvm_vcpu *vcpu)
@@ -5918,6 +6225,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
} else
intr |= INTR_TYPE_EXT_INTR;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
+
+ vmx_clear_hlt(vcpu);
}
static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -5948,6 +6257,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+
+ vmx_clear_hlt(vcpu);
}
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -6010,14 +6321,23 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
+ if (enable_unrestricted_guest)
+ return 0;
+
ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
PAGE_SIZE * 3);
if (ret)
return ret;
- kvm->arch.tss_addr = addr;
+ to_kvm_vmx(kvm)->tss_addr = addr;
return init_rmode_tss(kvm);
}
+static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+{
+ to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
+ return 0;
+}
+
static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
{
switch (vec) {
@@ -6120,19 +6440,24 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_nmi(intr_info))
return 1; /* already handled by vmx_vcpu_run() */
- if (is_invalid_opcode(intr_info)) {
- er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
- if (er == EMULATE_USER_EXIT)
- return 0;
- if (er != EMULATE_DONE)
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
+ if (is_invalid_opcode(intr_info))
+ return handle_ud(vcpu);
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
+ WARN_ON_ONCE(!enable_vmware_backdoor);
+ er = emulate_instruction(vcpu,
+ EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
+ else if (er != EMULATE_DONE)
+ kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+ return 1;
+ }
+
/*
* The #PF with PFEC.RSVD = 1 indicates the guest is accessing
* MMIO, it is better to report an internal error.
@@ -6171,7 +6496,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
vcpu->arch.dr6 &= ~15;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
- if (!(dr6 & ~DR6_RESERVED)) /* icebp */
+ if (is_icebp(intr_info))
skip_emulated_instruction(vcpu);
kvm_queue_exception(vcpu, DB_VECTOR);
@@ -6218,28 +6543,22 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
static int handle_io(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
- int size, in, string, ret;
+ int size, in, string;
unsigned port;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
string = (exit_qualification & 16) != 0;
- in = (exit_qualification & 8) != 0;
++vcpu->stat.io_exits;
- if (string || in)
+ if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
+ in = (exit_qualification & 8) != 0;
- ret = kvm_skip_emulated_instruction(vcpu);
-
- /*
- * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
- * KVM_EXIT_DEBUG here.
- */
- return kvm_fast_pio_out(vcpu, size, port) && ret;
+ return kvm_fast_pio(vcpu, size, port, in);
}
static void
@@ -6330,6 +6649,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
err = handle_set_cr0(vcpu, val);
return kvm_complete_insn_gp(vcpu, err);
case 3:
+ WARN_ON_ONCE(enable_unrestricted_guest);
err = kvm_set_cr3(vcpu, val);
return kvm_complete_insn_gp(vcpu, err);
case 4:
@@ -6362,6 +6682,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
case 1: /*mov from cr*/
switch (cr) {
case 3:
+ WARN_ON_ONCE(enable_unrestricted_guest);
val = kvm_read_cr3(vcpu);
kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
@@ -6755,7 +7076,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
{
- int ret;
gpa_t gpa;
/*
@@ -6783,17 +7103,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
NULL, 0) == EMULATE_DONE;
}
- ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
- if (ret >= 0)
- return ret;
-
- /* It is the real ept misconfig */
- WARN_ON(1);
-
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
- vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
-
- return 0;
+ return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
}
static int handle_nmi_window(struct kvm_vcpu *vcpu)
@@ -6816,6 +7126,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
bool intr_window_requested;
unsigned count = 130;
+ /*
+ * We should never reach the point where we are emulating L2
+ * due to invalid guest state as that means we incorrectly
+ * allowed a nested VMEntry with an invalid vmcs12.
+ */
+ WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
+
cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
@@ -6834,12 +7151,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
goto out;
}
- if (err != EMULATE_DONE) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
- return 0;
- }
+ if (err != EMULATE_DONE)
+ goto emulation_error;
+
+ if (vmx->emulation_required && !vmx->rmode.vm86_active &&
+ vcpu->arch.exception.pending)
+ goto emulation_error;
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
@@ -6855,34 +7172,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
out:
return ret;
-}
-
-static int __grow_ple_window(int val)
-{
- if (ple_window_grow < 1)
- return ple_window;
-
- val = min(val, ple_window_actual_max);
-
- if (ple_window_grow < ple_window)
- val *= ple_window_grow;
- else
- val += ple_window_grow;
-
- return val;
-}
-
-static int __shrink_ple_window(int val, int modifier, int minimum)
-{
- if (modifier < 1)
- return ple_window;
-
- if (modifier < ple_window)
- val /= modifier;
- else
- val -= modifier;
- return max(val, minimum);
+emulation_error:
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return 0;
}
static void grow_ple_window(struct kvm_vcpu *vcpu)
@@ -6890,7 +7185,9 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
int old = vmx->ple_window;
- vmx->ple_window = __grow_ple_window(old);
+ vmx->ple_window = __grow_ple_window(old, ple_window,
+ ple_window_grow,
+ ple_window_max);
if (vmx->ple_window != old)
vmx->ple_window_dirty = true;
@@ -6903,8 +7200,9 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
int old = vmx->ple_window;
- vmx->ple_window = __shrink_ple_window(old,
- ple_window_shrink, ple_window);
+ vmx->ple_window = __shrink_ple_window(old, ple_window,
+ ple_window_shrink,
+ ple_window);
if (vmx->ple_window != old)
vmx->ple_window_dirty = true;
@@ -6913,21 +7211,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
}
/*
- * ple_window_actual_max is computed to be one grow_ple_window() below
- * ple_window_max. (See __grow_ple_window for the reason.)
- * This prevents overflows, because ple_window_max is int.
- * ple_window_max effectively rounded down to a multiple of ple_window_grow in
- * this process.
- * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
- */
-static void update_ple_window_actual_max(void)
-{
- ple_window_actual_max =
- __shrink_ple_window(max(ple_window_max, ple_window),
- ple_window_grow, INT_MIN);
-}
-
-/*
* Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
*/
static void wakeup_handler(void)
@@ -6946,7 +7229,7 @@ static void wakeup_handler(void)
spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
}
-void vmx_enable_tdp(void)
+static void vmx_enable_tdp(void)
{
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
@@ -7047,8 +7330,6 @@ static __init int hardware_setup(void)
else
kvm_disable_tdp();
- update_ple_window_actual_max();
-
/*
* Only enable PML when hardware supports PML feature, and both EPT
* and EPT A/D bit features are enabled -- PML depends on them to work.
@@ -7080,6 +7361,7 @@ static __init int hardware_setup(void)
init_vmcs_shadow_fields();
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+ nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv);
kvm_mce_cap_supported |= MCG_LMCE_P;
@@ -7108,7 +7390,7 @@ static __exit void hardware_unsetup(void)
*/
static int handle_pause(struct kvm_vcpu *vcpu)
{
- if (ple_gap)
+ if (!kvm_pause_in_guest(vcpu->kvm))
grow_ple_window(vcpu);
/*
@@ -7940,9 +8222,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
u64 eptp, gpa;
} operand;
- if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+ if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_EPT) ||
- !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+ !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -7953,7 +8235,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
- types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
+ types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu,
@@ -8004,9 +8286,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
u64 gla;
} operand;
- if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+ if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
- !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) {
+ !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -8017,7 +8299,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
- types = (vmx->nested.nested_vmx_vpid_caps &
+ types = (vmx->nested.msrs.vpid_caps &
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
if (type >= 32 || !(types & (1 << type))) {
@@ -8111,11 +8393,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
/* Check for memory type validity */
switch (address & VMX_EPTP_MT_MASK) {
case VMX_EPTP_MT_UC:
- if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
+ if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
return false;
break;
case VMX_EPTP_MT_WB:
- if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
+ if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
return false;
break;
default:
@@ -8132,7 +8414,7 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
/* AD, if set, should be supported */
if (address & VMX_EPTP_AD_ENABLE_BIT) {
- if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT))
+ if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
return false;
}
@@ -8776,7 +9058,8 @@ static void dump_vmcs(void)
pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n",
vmcs_read64(GUEST_IA32_DEBUGCTL),
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
- if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if (cpu_has_load_perf_global_ctrl &&
+ vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016llx\n",
vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
@@ -8812,7 +9095,8 @@ static void dump_vmcs(void)
pr_err("EFER = 0x%016llx PAT = 0x%016llx\n",
vmcs_read64(HOST_IA32_EFER),
vmcs_read64(HOST_IA32_PAT));
- if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if (cpu_has_load_perf_global_ctrl &&
+ vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
pr_err("PerfGlobCtl = 0x%016llx\n",
vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
@@ -9164,9 +9448,9 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
/* We need to handle NMIs before interrupts are enabled */
if (is_nmi(exit_intr_info)) {
- kvm_before_handle_nmi(&vmx->vcpu);
+ kvm_before_interrupt(&vmx->vcpu);
asm("int $2");
- kvm_after_handle_nmi(&vmx->vcpu);
+ kvm_after_interrupt(&vmx->vcpu);
}
}
@@ -9389,7 +9673,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned long cr3, cr4;
+ unsigned long cr3, cr4, evmcs_rsp;
/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!enable_vnmi &&
@@ -9452,9 +9736,13 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* being speculatively taken.
*/
if (vmx->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched;
+
+ evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
+ (unsigned long)&current_evmcs->host_rsp : 0;
+
asm(
/* Store host registers */
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -9463,15 +9751,21 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
"je 1f \n\t"
"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
+ /* Avoid VMWRITE when Enlightened VMCS is in use */
+ "test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
+ "jz 2f \n\t"
+ "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
+ "jmp 1f \n\t"
+ "2: \n\t"
__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
"1: \n\t"
/* Reload cr2 if changed */
"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
"mov %%cr2, %%" _ASM_DX " \n\t"
"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
- "je 2f \n\t"
+ "je 3f \n\t"
"mov %%" _ASM_AX", %%cr2 \n\t"
- "2: \n\t"
+ "3: \n\t"
/* Check if vmlaunch of vmresume is needed */
"cmpl $0, %c[launched](%0) \n\t"
/* Load guest registers. Don't clobber flags. */
@@ -9540,7 +9834,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
".popsection"
- : : "c"(vmx), "d"((unsigned long)HOST_RSP),
+ : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
@@ -9565,10 +9859,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
[wordsize]"i"(sizeof(ulong))
: "cc", "memory"
#ifdef CONFIG_X86_64
- , "rax", "rbx", "rdi", "rsi"
+ , "rax", "rbx", "rdi"
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
#else
- , "eax", "ebx", "edi", "esi"
+ , "eax", "ebx", "edi"
#endif
);
@@ -9587,15 +9881,20 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
- rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
if (vmx->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
+ /* All fields are clean at this point */
+ if (static_branch_unlikely(&enable_evmcs))
+ current_evmcs->hv_clean_fields |=
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (vmx->host_debugctlmsr)
update_debugctlmsr(vmx->host_debugctlmsr);
@@ -9632,14 +9931,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
__write_pkru(vmx->host_pkru);
}
- /*
- * the KVM_REQ_EVENT optimization bit is only on for one entry, and if
- * we did not inject a still-pending event to L1 now because of
- * nested_run_pending, we need to re-enable this bit.
- */
- if (vmx->nested.nested_run_pending)
- kvm_make_request(KVM_REQ_EVENT, vcpu);
-
vmx->nested.nested_run_pending = 0;
vmx->idt_vectoring_info = 0;
@@ -9656,6 +9947,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
}
STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
+static struct kvm *vmx_vm_alloc(void)
+{
+ struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+ return &kvm_vmx->kvm;
+}
+
+static void vmx_vm_free(struct kvm *kvm)
+{
+ kfree(to_kvm_vmx(kvm));
+}
+
static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -9763,14 +10065,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
goto free_vmcs;
}
- if (enable_ept) {
+ if (enable_ept && !enable_unrestricted_guest) {
err = init_rmode_identity_map(kvm);
if (err)
goto free_vmcs;
}
if (nested) {
- nested_vmx_setup_ctls_msrs(vmx);
+ nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
+ kvm_vcpu_apicv_active(&vmx->vcpu));
vmx->nested.vpid02 = allocate_vpid();
}
@@ -9803,6 +10106,13 @@ free_vcpu:
return ERR_PTR(err);
}
+static int vmx_vm_init(struct kvm *kvm)
+{
+ if (!ple_gap)
+ kvm->arch.pause_in_guest = true;
+ return 0;
+}
+
static void __init vmx_check_processor_compat(void *rtn)
{
struct vmcs_config vmcs_conf;
@@ -9810,6 +10120,7 @@ static void __init vmx_check_processor_compat(void *rtn)
*(int *)rtn = 0;
if (setup_vmcs_config(&vmcs_conf) < 0)
*(int *)rtn = -EIO;
+ nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -9897,12 +10208,12 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_cpuid_entry2 *entry;
- vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff;
- vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
+ vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
+ vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
if (entry && (entry->_reg & (_cpuid_mask))) \
- vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask); \
+ vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \
} while (0)
entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
@@ -9999,7 +10310,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
kvm_init_shadow_ept_mmu(vcpu,
- to_vmx(vcpu)->nested.nested_vmx_ept_caps &
+ to_vmx(vcpu)->nested.msrs.ept_caps &
VMX_EPT_EXECUTE_ONLY_BIT,
nested_ept_ad_enabled(vcpu));
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
@@ -10696,6 +11007,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control, vmcs12_exec_ctrl;
+ if (vmx->nested.dirty_vmcs12) {
+ prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
+ vmx->nested.dirty_vmcs12 = false;
+ }
+
/*
* First, the fields that are shadowed. This must be kept in sync
* with vmx_shadow_fields.h.
@@ -10933,9 +11249,14 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
/* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
vmx_set_efer(vcpu, vcpu->arch.efer);
- if (vmx->nested.dirty_vmcs12) {
- prepare_vmcs02_full(vcpu, vmcs12, from_vmentry);
- vmx->nested.dirty_vmcs12 = false;
+ /*
+ * Guest state is invalid and unrestricted guest is disabled,
+ * which means L1 attempted VMEntry to L2 with invalid state.
+ * Fail the VMEntry.
+ */
+ if (vmx->emulation_required) {
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+ return 1;
}
/* Shadow page tables on either EPT or shadow page tables. */
@@ -10951,6 +11272,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
return 0;
}
+static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
+{
+ if (!nested_cpu_has_nmi_exiting(vmcs12) &&
+ nested_cpu_has_virtual_nmis(vmcs12))
+ return -EINVAL;
+
+ if (!nested_cpu_has_virtual_nmis(vmcs12) &&
+ nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
+ return -EINVAL;
+
+ return 0;
+}
+
static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -10978,26 +11312,29 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
- vmx->nested.nested_vmx_procbased_ctls_low,
- vmx->nested.nested_vmx_procbased_ctls_high) ||
+ vmx->nested.msrs.procbased_ctls_low,
+ vmx->nested.msrs.procbased_ctls_high) ||
(nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
!vmx_control_verify(vmcs12->secondary_vm_exec_control,
- vmx->nested.nested_vmx_secondary_ctls_low,
- vmx->nested.nested_vmx_secondary_ctls_high)) ||
+ vmx->nested.msrs.secondary_ctls_low,
+ vmx->nested.msrs.secondary_ctls_high)) ||
!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
- vmx->nested.nested_vmx_pinbased_ctls_low,
- vmx->nested.nested_vmx_pinbased_ctls_high) ||
+ vmx->nested.msrs.pinbased_ctls_low,
+ vmx->nested.msrs.pinbased_ctls_high) ||
!vmx_control_verify(vmcs12->vm_exit_controls,
- vmx->nested.nested_vmx_exit_ctls_low,
- vmx->nested.nested_vmx_exit_ctls_high) ||
+ vmx->nested.msrs.exit_ctls_low,
+ vmx->nested.msrs.exit_ctls_high) ||
!vmx_control_verify(vmcs12->vm_entry_controls,
- vmx->nested.nested_vmx_entry_ctls_low,
- vmx->nested.nested_vmx_entry_ctls_high))
+ vmx->nested.msrs.entry_ctls_low,
+ vmx->nested.msrs.entry_ctls_high))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ if (nested_vmx_check_nmi_controls(vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_cpu_has_vmfunc(vmcs12)) {
if (vmcs12->vm_function_control &
- ~vmx->nested.nested_vmx_vmfunc_controls)
+ ~vmx->nested.msrs.vmfunc_controls)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_cpu_has_eptp_switching(vmcs12)) {
@@ -11199,7 +11536,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (ret)
return ret;
- if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
+ /*
+ * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken
+ * by event injection, halt vcpu.
+ */
+ if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) &&
+ !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK))
return kvm_vcpu_halt(vcpu);
vmx->nested.nested_run_pending = 1;
@@ -11274,7 +11616,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
} else if (vcpu->arch.nmi_injected) {
vmcs12->idt_vectoring_info_field =
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
- } else if (vcpu->arch.interrupt.pending) {
+ } else if (vcpu->arch.interrupt.injected) {
nr = vcpu->arch.interrupt.nr;
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
@@ -11922,7 +12264,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
- if (ple_gap)
+ if (!kvm_pause_in_guest(vcpu->kvm))
shrink_ple_window(vcpu);
}
@@ -12240,6 +12582,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
vmx->nested.smm.vmxon = vmx->nested.vmxon;
vmx->nested.vmxon = false;
+ vmx_clear_hlt(vcpu);
return 0;
}
@@ -12281,6 +12624,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.cpu_has_accelerated_tpr = report_flexpriority,
.cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
+ .vm_init = vmx_vm_init,
+ .vm_alloc = vmx_vm_alloc,
+ .vm_free = vmx_vm_free,
+
.vcpu_create = vmx_create_vcpu,
.vcpu_free = vmx_free_vcpu,
.vcpu_reset = vmx_vcpu_reset,
@@ -12290,6 +12637,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.vcpu_put = vmx_vcpu_put,
.update_bp_intercept = update_exception_bitmap,
+ .get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
.get_segment_base = vmx_get_segment_base,
@@ -12347,6 +12695,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
.set_tss_addr = vmx_set_tss_addr,
+ .set_identity_map_addr = vmx_set_identity_map_addr,
.get_tdp_level = get_ept_level,
.get_mt_mask = vmx_get_mt_mask,
@@ -12405,7 +12754,38 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
static int __init vmx_init(void)
{
- int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+ int r;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ /*
+ * Enlightened VMCS usage should be recommended and the host needs
+ * to support eVMCS v1 or above. We can also disable eVMCS support
+ * with module parameter.
+ */
+ if (enlightened_vmcs &&
+ ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED &&
+ (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >=
+ KVM_EVMCS_VERSION) {
+ int cpu;
+
+ /* Check that we have assist pages on all online CPUs */
+ for_each_online_cpu(cpu) {
+ if (!hv_get_vp_assist_page(cpu)) {
+ enlightened_vmcs = false;
+ break;
+ }
+ }
+
+ if (enlightened_vmcs) {
+ pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
+ static_branch_enable(&enable_evmcs);
+ }
+ } else {
+ enlightened_vmcs = false;
+ }
+#endif
+
+ r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
__alignof__(struct vcpu_vmx), THIS_MODULE);
if (r)
return r;
@@ -12426,6 +12806,29 @@ static void __exit vmx_exit(void)
#endif
kvm_exit();
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (static_branch_unlikely(&enable_evmcs)) {
+ int cpu;
+ struct hv_vp_assist_page *vp_ap;
+ /*
+ * Reset everything to support using non-enlightened VMCS
+ * access later (e.g. when we reload the module with
+ * enlightened_vmcs=0)
+ */
+ for_each_online_cpu(cpu) {
+ vp_ap = hv_get_vp_assist_page(cpu);
+
+ if (!vp_ap)
+ continue;
+
+ vp_ap->current_nested_vmcs = 0;
+ vp_ap->enlighten_vmentry = 0;
+ }
+
+ static_branch_disable(&enable_evmcs);
+ }
+#endif
}
module_init(vmx_init)
diff --git a/arch/x86/kvm/vmx_evmcs.h b/arch/x86/kvm/vmx_evmcs.h
new file mode 100644
index 0000000..210a884
--- /dev/null
+++ b/arch/x86/kvm/vmx_evmcs.h
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_EVMCS_H
+#define __KVM_X86_VMX_EVMCS_H
+
+#include <asm/hyperv-tlfs.h>
+
+#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n)))))
+#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x)
+#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \
+ {EVMCS1_OFFSET(name), clean_field}
+
+struct evmcs_field {
+ u16 offset;
+ u16 clean_field;
+};
+
+static const struct evmcs_field vmcs_field_to_evmcs_1[] = {
+ /* 64 bit rw */
+ EVMCS1_FIELD(GUEST_RIP, guest_rip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(GUEST_RSP, guest_rsp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(GUEST_RFLAGS, guest_rflags,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(HOST_IA32_PAT, host_ia32_pat,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR0, host_cr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR3, host_cr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CR4, host_cr4,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_RIP, host_rip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(IO_BITMAP_A, io_bitmap_a,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
+ EVMCS1_FIELD(IO_BITMAP_B, io_bitmap_b,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP),
+ EVMCS1_FIELD(MSR_BITMAP, msr_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP),
+ EVMCS1_FIELD(GUEST_ES_BASE, guest_es_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_BASE, guest_cs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_BASE, guest_ss_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_BASE, guest_ds_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_BASE, guest_fs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_BASE, guest_gs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_BASE, guest_ldtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_BASE, guest_tr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GDTR_BASE, guest_gdtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_IDTR_BASE, guest_idtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(TSC_OFFSET, tsc_offset,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+ EVMCS1_FIELD(VMCS_LINK_POINTER, vmcs_link_pointer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_PAT, guest_ia32_pat,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR2, guest_pdptr2,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PDPTR3, guest_pdptr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR0_READ_SHADOW, cr0_read_shadow,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(CR4_READ_SHADOW, cr4_read_shadow,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR0, guest_cr0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR3, guest_cr3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_CR4, guest_cr4,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(GUEST_DR7, guest_dr7,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR),
+ EVMCS1_FIELD(HOST_FS_BASE, host_fs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_GS_BASE, host_gs_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_TR_BASE, host_tr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_GDTR_BASE, host_gdtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_IDTR_BASE, host_idtr_base,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(HOST_RSP, host_rsp,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER),
+ EVMCS1_FIELD(EPT_POINTER, ept_pointer,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
+ EVMCS1_FIELD(GUEST_BNDCFGS, guest_bndcfgs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2),
+
+ /* 64 bit read only */
+ EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(EXIT_QUALIFICATION, exit_qualification,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ /*
+ * Not defined in KVM:
+ *
+ * EVMCS1_FIELD(0x00006402, exit_io_instruction_ecx,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006404, exit_io_instruction_esi,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006406, exit_io_instruction_esi,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ * EVMCS1_FIELD(0x00006408, exit_io_instruction_eip,
+ * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE);
+ */
+ EVMCS1_FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+
+ /*
+ * No mask defined in the spec as Hyper-V doesn't currently support
+ * these. Future proof by resetting the whole clean field mask on
+ * access.
+ */
+ EVMCS1_FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+
+ /* 32 bit rw */
+ EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC),
+ EVMCS1_FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC),
+ EVMCS1_FIELD(EXCEPTION_BITMAP, exception_bitmap,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN),
+ EVMCS1_FIELD(VM_ENTRY_CONTROLS, vm_entry_controls,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY),
+ EVMCS1_FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ vm_entry_exception_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT),
+ EVMCS1_FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(VM_EXIT_CONTROLS, vm_exit_controls,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1),
+ EVMCS1_FIELD(GUEST_ES_LIMIT, guest_es_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_LIMIT, guest_cs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_LIMIT, guest_ss_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_LIMIT, guest_ds_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_LIMIT, guest_fs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_LIMIT, guest_gs_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_LIMIT, guest_tr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_ACTIVITY_STATE, guest_activity_state,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+ EVMCS1_FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1),
+
+ /* 32 bit read only */
+ EVMCS1_FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_REASON, vm_exit_reason,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+ EVMCS1_FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE),
+
+ /* No mask defined in the spec (not used) */
+ EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(CR3_TARGET_COUNT, cr3_target_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+ EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL),
+
+ /* 16 bit rw */
+ EVMCS1_FIELD(HOST_ES_SELECTOR, host_es_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_CS_SELECTOR, host_cs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_SS_SELECTOR, host_ss_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_DS_SELECTOR, host_ds_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_FS_SELECTOR, host_fs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_GS_SELECTOR, host_gs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(HOST_TR_SELECTOR, host_tr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1),
+ EVMCS1_FIELD(GUEST_ES_SELECTOR, guest_es_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_CS_SELECTOR, guest_cs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_SS_SELECTOR, guest_ss_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_DS_SELECTOR, guest_ds_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_FS_SELECTOR, guest_fs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_GS_SELECTOR, guest_gs_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(GUEST_TR_SELECTOR, guest_tr_selector,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2),
+ EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id,
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT),
+};
+
+static __always_inline int get_evmcs_offset(unsigned long field,
+ u16 *clean_field)
+{
+ unsigned int index = ROL16(field, 6);
+ const struct evmcs_field *evmcs_field;
+
+ if (unlikely(index >= ARRAY_SIZE(vmcs_field_to_evmcs_1))) {
+ WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n",
+ field);
+ return -ENOENT;
+ }
+
+ evmcs_field = &vmcs_field_to_evmcs_1[index];
+
+ if (clean_field)
+ *clean_field = evmcs_field->clean_field;
+
+ return evmcs_field->offset;
+}
+
+#undef ROL16
+
+#endif /* __KVM_X86_VMX_EVMCS_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8a0b54..b2ff74b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -102,6 +102,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+static void store_regs(struct kvm_vcpu *vcpu);
+static int sync_regs(struct kvm_vcpu *vcpu);
struct kvm_x86_ops *kvm_x86_ops __read_mostly;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -140,6 +142,13 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
+bool __read_mostly enable_vmware_backdoor = false;
+module_param(enable_vmware_backdoor, bool, S_IRUGO);
+EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
+
+static bool __read_mostly force_emulation_prefix = false;
+module_param(force_emulation_prefix, bool, S_IRUGO);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -1032,7 +1041,11 @@ static u32 emulated_msrs[] = {
HV_X64_MSR_VP_RUNTIME,
HV_X64_MSR_SCONTROL,
HV_X64_MSR_STIMER0_CONFIG,
- HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+ HV_X64_MSR_VP_ASSIST_PAGE,
+ HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
+ HV_X64_MSR_TSC_EMULATION_STATUS,
+
+ MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
MSR_KVM_PV_EOI_EN,
MSR_IA32_TSC_ADJUST,
@@ -1049,6 +1062,64 @@ static u32 emulated_msrs[] = {
static unsigned num_emulated_msrs;
+/*
+ * List of msr numbers which are used to expose MSR-based features that
+ * can be used by a hypervisor to validate requested CPU features.
+ */
+static u32 msr_based_features[] = {
+ MSR_IA32_VMX_BASIC,
+ MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+ MSR_IA32_VMX_PINBASED_CTLS,
+ MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+ MSR_IA32_VMX_PROCBASED_CTLS,
+ MSR_IA32_VMX_TRUE_EXIT_CTLS,
+ MSR_IA32_VMX_EXIT_CTLS,
+ MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+ MSR_IA32_VMX_ENTRY_CTLS,
+ MSR_IA32_VMX_MISC,
+ MSR_IA32_VMX_CR0_FIXED0,
+ MSR_IA32_VMX_CR0_FIXED1,
+ MSR_IA32_VMX_CR4_FIXED0,
+ MSR_IA32_VMX_CR4_FIXED1,
+ MSR_IA32_VMX_VMCS_ENUM,
+ MSR_IA32_VMX_PROCBASED_CTLS2,
+ MSR_IA32_VMX_EPT_VPID_CAP,
+ MSR_IA32_VMX_VMFUNC,
+
+ MSR_F10H_DECFG,
+ MSR_IA32_UCODE_REV,
+};
+
+static unsigned int num_msr_based_features;
+
+static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+ switch (msr->index) {
+ case MSR_IA32_UCODE_REV:
+ rdmsrl(msr->index, msr->data);
+ break;
+ default:
+ if (kvm_x86_ops->get_msr_feature(msr))
+ return 1;
+ }
+ return 0;
+}
+
+static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+ struct kvm_msr_entry msr;
+ int r;
+
+ msr.index = index;
+ r = kvm_get_msr_feature(&msr);
+ if (r)
+ return r;
+
+ *data = msr.data;
+
+ return 0;
+}
+
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
{
if (efer & efer_reserved_bits)
@@ -2222,7 +2293,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr) {
case MSR_AMD64_NB_CFG:
- case MSR_IA32_UCODE_REV:
case MSR_IA32_UCODE_WRITE:
case MSR_VM_HSAVE_PA:
case MSR_AMD64_PATCH_LOADER:
@@ -2230,6 +2300,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_DC_CFG:
break;
+ case MSR_IA32_UCODE_REV:
+ if (msr_info->host_initiated)
+ vcpu->arch.microcode_version = data;
+ break;
case MSR_EFER:
return set_efer(vcpu, data);
case MSR_K7_HWCR:
@@ -2390,6 +2464,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
return kvm_hv_set_msr_common(vcpu, msr, data,
msr_info->host_initiated);
case MSR_IA32_BBL_CR_CTL3:
@@ -2516,6 +2593,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_AMD64_DC_CFG:
msr_info->data = 0;
break;
+ case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@ -2525,7 +2603,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = 0;
break;
case MSR_IA32_UCODE_REV:
- msr_info->data = 0x100000000ULL;
+ msr_info->data = vcpu->arch.microcode_version;
break;
case MSR_MTRRcap:
case 0x200 ... 0x2ff:
@@ -2619,6 +2697,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
case HV_X64_MSR_CRASH_CTL:
case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
+ case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_CONTROL:
+ case HV_X64_MSR_TSC_EMULATION_STATUS:
return kvm_hv_get_msr_common(vcpu,
msr_info->index, &msr_info->data);
break;
@@ -2680,13 +2761,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
int (*do_msr)(struct kvm_vcpu *vcpu,
unsigned index, u64 *data))
{
- int i, idx;
+ int i;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
for (i = 0; i < msrs->nmsrs; ++i)
if (do_msr(vcpu, entries[i].index, &entries[i].data))
break;
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
return i;
}
@@ -2737,9 +2816,15 @@ out:
return r;
}
+static inline bool kvm_can_mwait_in_guest(void)
+{
+ return boot_cpu_has(X86_FEATURE_MWAIT) &&
+ !boot_cpu_has_bug(X86_BUG_MONITOR);
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
- int r;
+ int r = 0;
switch (ext) {
case KVM_CAP_IRQCHIP:
@@ -2769,6 +2854,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_SYNIC:
case KVM_CAP_HYPERV_SYNIC2:
case KVM_CAP_HYPERV_VP_INDEX:
+ case KVM_CAP_HYPERV_EVENTFD:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -2785,13 +2871,19 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_GET_MSR_FEATURES:
r = 1;
break;
+ case KVM_CAP_SYNC_REGS:
+ r = KVM_SYNC_X86_VALID_FIELDS;
+ break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
- case KVM_CAP_X86_GUEST_MWAIT:
- r = kvm_mwait_in_guest();
+ case KVM_CAP_X86_DISABLE_EXITS:
+ r |= KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE;
+ if(kvm_can_mwait_in_guest())
+ r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
@@ -2832,7 +2924,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_X2APIC_API_VALID_FLAGS;
break;
default:
- r = 0;
break;
}
return r;
@@ -2899,6 +2990,31 @@ long kvm_arch_dev_ioctl(struct file *filp,
goto out;
r = 0;
break;
+ case KVM_GET_MSR_FEATURE_INDEX_LIST: {
+ struct kvm_msr_list __user *user_msr_list = argp;
+ struct kvm_msr_list msr_list;
+ unsigned int n;
+
+ r = -EFAULT;
+ if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
+ goto out;
+ n = msr_list.nmsrs;
+ msr_list.nmsrs = num_msr_based_features;
+ if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
+ goto out;
+ r = -E2BIG;
+ if (n < msr_list.nmsrs)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user(user_msr_list->indices, &msr_based_features,
+ num_msr_based_features * sizeof(u32)))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_GET_MSRS:
+ r = msr_io(NULL, argp, do_get_msr_feature, 1);
+ break;
}
default:
r = -EINVAL;
@@ -3199,7 +3315,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->exception.error_code = vcpu->arch.exception.error_code;
events->interrupt.injected =
- vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
+ vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
events->interrupt.nr = vcpu->arch.interrupt.nr;
events->interrupt.soft = 0;
events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
@@ -3252,7 +3368,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.exception.has_error_code = events->exception.has_error_code;
vcpu->arch.exception.error_code = events->exception.error_code;
- vcpu->arch.interrupt.pending = events->interrupt.injected;
+ vcpu->arch.interrupt.injected = events->interrupt.injected;
vcpu->arch.interrupt.nr = events->interrupt.nr;
vcpu->arch.interrupt.soft = events->interrupt.soft;
if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
@@ -3636,12 +3752,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = 0;
break;
}
- case KVM_GET_MSRS:
+ case KVM_GET_MSRS: {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
r = msr_io(vcpu, argp, do_get_msr, 1);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
- case KVM_SET_MSRS:
+ }
+ case KVM_SET_MSRS: {
+ int idx = srcu_read_lock(&vcpu->kvm->srcu);
r = msr_io(vcpu, argp, do_set_msr, 0);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
+ }
case KVM_TPR_ACCESS_REPORTING: {
struct kvm_tpr_access_ctl tac;
@@ -3845,8 +3967,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
u64 ident_addr)
{
- kvm->arch.ept_identity_map_addr = ident_addr;
- return 0;
+ return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr);
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
@@ -4106,6 +4227,20 @@ split_irqchip_unlock:
r = 0;
break;
+ case KVM_CAP_X86_DISABLE_EXITS:
+ r = -EINVAL;
+ if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
+ break;
+
+ if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
+ kvm_can_mwait_in_guest())
+ kvm->arch.mwait_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
+ kvm->arch.hlt_in_guest = true;
+ if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
+ kvm->arch.pause_in_guest = true;
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -4410,6 +4545,15 @@ set_identity_unlock:
r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
break;
}
+ case KVM_HYPERV_EVENTFD: {
+ struct kvm_hyperv_eventfd hvevfd;
+
+ r = -EFAULT;
+ if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
+ goto out;
+ r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -4464,6 +4608,19 @@ static void kvm_init_msr_list(void)
j++;
}
num_emulated_msrs = j;
+
+ for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+ struct kvm_msr_entry msr;
+
+ msr.index = msr_based_features[i];
+ if (kvm_get_msr_feature(&msr))
+ continue;
+
+ if (j < i)
+ msr_based_features[j] = msr_based_features[i];
+ j++;
+ }
+ num_msr_based_features = j;
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -4686,6 +4843,30 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
+int handle_ud(struct kvm_vcpu *vcpu)
+{
+ int emul_type = EMULTYPE_TRAP_UD;
+ enum emulation_result er;
+ char sig[5]; /* ud2; .ascii "kvm" */
+ struct x86_exception e;
+
+ if (force_emulation_prefix &&
+ kvm_read_guest_virt(&vcpu->arch.emulate_ctxt,
+ kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 &&
+ memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
+ kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
+ emul_type = 0;
+ }
+
+ er = emulate_instruction(vcpu, emul_type);
+ if (er == EMULATE_USER_EXIT)
+ return 0;
+ if (er != EMULATE_DONE)
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+EXPORT_SYMBOL_GPL(handle_ud);
+
static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t gpa, bool write)
{
@@ -5527,27 +5708,27 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
kvm_rip_write(vcpu, ctxt->eip);
kvm_set_rflags(vcpu, ctxt->eflags);
- if (irq == NMI_VECTOR)
- vcpu->arch.nmi_pending = 0;
- else
- vcpu->arch.interrupt.pending = false;
-
return EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
-static int handle_emulation_failure(struct kvm_vcpu *vcpu)
+static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
{
int r = EMULATE_DONE;
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
+
+ if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
+ return EMULATE_FAIL;
+
if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
r = EMULATE_USER_EXIT;
}
+
kvm_queue_exception(vcpu, UD_VECTOR);
return r;
@@ -5791,6 +5972,37 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
return false;
}
+static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
+{
+ switch (ctxt->opcode_len) {
+ case 1:
+ switch (ctxt->b) {
+ case 0xe4: /* IN */
+ case 0xe5:
+ case 0xec:
+ case 0xed:
+ case 0xe6: /* OUT */
+ case 0xe7:
+ case 0xee:
+ case 0xef:
+ case 0x6c: /* INS */
+ case 0x6d:
+ case 0x6e: /* OUTS */
+ case 0x6f:
+ return true;
+ }
+ break;
+ case 2:
+ switch (ctxt->b) {
+ case 0x33: /* RDPMC */
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2,
int emulation_type,
@@ -5843,10 +6055,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
return EMULATE_DONE;
if (emulation_type & EMULTYPE_SKIP)
return EMULATE_FAIL;
- return handle_emulation_failure(vcpu);
+ return handle_emulation_failure(vcpu, emulation_type);
}
}
+ if ((emulation_type & EMULTYPE_VMWARE) &&
+ !is_vmware_backdoor_opcode(ctxt))
+ return EMULATE_FAIL;
+
if (emulation_type & EMULTYPE_SKIP) {
kvm_rip_write(vcpu, ctxt->_eip);
if (ctxt->eflags & X86_EFLAGS_RF)
@@ -5878,7 +6094,7 @@ restart:
emulation_type))
return EMULATE_DONE;
- return handle_emulation_failure(vcpu);
+ return handle_emulation_failure(vcpu, emulation_type);
}
if (ctxt->have_exception) {
@@ -5931,7 +6147,8 @@ restart:
}
EXPORT_SYMBOL_GPL(x86_emulate_instruction);
-int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
+static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
+ unsigned short port)
{
unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
@@ -5940,7 +6157,6 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
vcpu->arch.pio.count = 0;
return ret;
}
-EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
{
@@ -5964,7 +6180,8 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
return 1;
}
-int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size,
+ unsigned short port)
{
unsigned long val;
int ret;
@@ -5983,7 +6200,21 @@ int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
return 0;
}
-EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
+
+int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in)
+{
+ int ret = kvm_skip_emulated_instruction(vcpu);
+
+ /*
+ * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ if (in)
+ return kvm_fast_pio_in(vcpu, size, port) && ret;
+ else
+ return kvm_fast_pio_out(vcpu, size, port) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_pio);
static int kvmclock_cpu_down_prep(unsigned int cpu)
{
@@ -6161,7 +6392,8 @@ static void kvm_timer_init(void)
kvmclock_cpu_online, kvmclock_cpu_down_prep);
}
-static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu);
int kvm_is_in_guest(void)
{
@@ -6194,18 +6426,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
.get_guest_ip = kvm_get_guest_ip,
};
-void kvm_before_handle_nmi(struct kvm_vcpu *vcpu)
-{
- __this_cpu_write(current_vcpu, vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_before_handle_nmi);
-
-void kvm_after_handle_nmi(struct kvm_vcpu *vcpu)
-{
- __this_cpu_write(current_vcpu, NULL);
-}
-EXPORT_SYMBOL_GPL(kvm_after_handle_nmi);
-
static void kvm_set_mmio_spte_mask(void)
{
u64 mask;
@@ -6559,27 +6779,36 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
int r;
/* try to reinject previous events if any */
- if (vcpu->arch.exception.injected) {
- kvm_x86_ops->queue_exception(vcpu);
- return 0;
- }
+ if (vcpu->arch.exception.injected)
+ kvm_x86_ops->queue_exception(vcpu);
/*
- * Exceptions must be injected immediately, or the exception
- * frame will have the address of the NMI or interrupt handler.
+ * Do not inject an NMI or interrupt if there is a pending
+ * exception. Exceptions and interrupts are recognized at
+ * instruction boundaries, i.e. the start of an instruction.
+ * Trap-like exceptions, e.g. #DB, have higher priority than
+ * NMIs and interrupts, i.e. traps are recognized before an
+ * NMI/interrupt that's pending on the same instruction.
+ * Fault-like exceptions, e.g. #GP and #PF, are the lowest
+ * priority, but are only generated (pended) during instruction
+ * execution, i.e. a pending fault-like exception means the
+ * fault occurred on the *previous* instruction and must be
+ * serviced prior to recognizing any new events in order to
+ * fully complete the previous instruction.
*/
- if (!vcpu->arch.exception.pending) {
- if (vcpu->arch.nmi_injected) {
+ else if (!vcpu->arch.exception.pending) {
+ if (vcpu->arch.nmi_injected)
kvm_x86_ops->set_nmi(vcpu);
- return 0;
- }
-
- if (vcpu->arch.interrupt.pending) {
+ else if (vcpu->arch.interrupt.injected)
kvm_x86_ops->set_irq(vcpu);
- return 0;
- }
}
+ /*
+ * Call check_nested_events() even if we reinjected a previous event
+ * in order for caller to determine if it should require immediate-exit
+ * from L2 to L1 due to pending L1 events which require exit
+ * from L2 to L1.
+ */
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
if (r != 0)
@@ -6592,6 +6821,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code);
+ WARN_ON_ONCE(vcpu->arch.exception.injected);
vcpu->arch.exception.pending = false;
vcpu->arch.exception.injected = true;
@@ -6606,7 +6836,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
}
kvm_x86_ops->queue_exception(vcpu);
- } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
+ }
+
+ /* Don't consider new event if we re-injected an event */
+ if (kvm_event_needs_reinjection(vcpu))
+ return 0;
+
+ if (vcpu->arch.smi_pending && !is_smm(vcpu) &&
+ kvm_x86_ops->smi_allowed(vcpu)) {
vcpu->arch.smi_pending = false;
++vcpu->arch.smi_count;
enter_smm(vcpu);
@@ -6900,8 +7137,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
{
- u64 eoi_exit_bitmap[4];
-
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
@@ -6914,6 +7149,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_x86_ops->sync_pir_to_irr(vcpu);
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
+
+ if (is_guest_mode(vcpu))
+ vcpu->arch.load_eoi_exitmap_pending = true;
+ else
+ kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
+}
+
+static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+{
+ u64 eoi_exit_bitmap[4];
+
+ if (!kvm_apic_hw_enabled(vcpu->arch.apic))
+ return;
+
bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
vcpu_to_synic(vcpu)->vec_bitmap, 256);
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
@@ -7028,6 +7277,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
+ if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu))
+ vcpu_load_eoi_exitmap(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
kvm_vcpu_reload_apic_access_page(vcpu);
if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
@@ -7206,7 +7457,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_put_guest_xcr0(vcpu);
+ kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
+ kvm_after_interrupt(vcpu);
++vcpu->stat.exits;
@@ -7415,7 +7668,6 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}
-
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -7441,6 +7693,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
goto out;
}
+ if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (vcpu->run->kvm_dirty_regs) {
+ r = sync_regs(vcpu);
+ if (r != 0)
+ goto out;
+ }
+
/* re-sync apic's tpr */
if (!lapic_in_kernel(vcpu)) {
if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
@@ -7465,6 +7728,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
out:
kvm_put_guest_fpu(vcpu);
+ if (vcpu->run->kvm_valid_regs)
+ store_regs(vcpu);
post_kvm_run_save(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -7472,10 +7737,8 @@ out:
return r;
}
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- vcpu_load(vcpu);
-
if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
/*
* We are here if userspace calls get_regs() in the middle of
@@ -7508,15 +7771,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->rip = kvm_rip_read(vcpu);
regs->rflags = kvm_get_rflags(vcpu);
+}
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ __get_regs(vcpu, regs);
vcpu_put(vcpu);
return 0;
}
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
- vcpu_load(vcpu);
-
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
@@ -7545,7 +7811,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.exception.pending = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_load(vcpu);
+ __set_regs(vcpu, regs);
vcpu_put(vcpu);
return 0;
}
@@ -7560,13 +7831,10 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
}
EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct desc_ptr dt;
- vcpu_load(vcpu);
-
kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@ -7594,10 +7862,16 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
- if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
+ if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
set_bit(vcpu->arch.interrupt.nr,
(unsigned long *)sregs->interrupt_bitmap);
+}
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ vcpu_load(vcpu);
+ __get_sregs(vcpu, sregs);
vcpu_put(vcpu);
return 0;
}
@@ -7669,7 +7943,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
}
EXPORT_SYMBOL_GPL(kvm_task_switch);
-int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
/*
@@ -7692,8 +7966,7 @@ int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return 0;
}
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
- struct kvm_sregs *sregs)
+static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
@@ -7701,8 +7974,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct desc_ptr dt;
int ret = -EINVAL;
- vcpu_load(vcpu);
-
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
(sregs->cr4 & X86_CR4_OSXSAVE))
goto out;
@@ -7781,6 +8052,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
ret = 0;
out:
+ return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
+{
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = __set_sregs(vcpu, sregs);
vcpu_put(vcpu);
return ret;
}
@@ -7907,6 +8188,45 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
return 0;
}
+static void store_regs(struct kvm_vcpu *vcpu)
+{
+ BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
+
+ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
+ __get_regs(vcpu, &vcpu->run->s.regs.regs);
+
+ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
+ __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
+
+ if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
+ kvm_vcpu_ioctl_x86_get_vcpu_events(
+ vcpu, &vcpu->run->s.regs.events);
+}
+
+static int sync_regs(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
+ return -EINVAL;
+
+ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
+ __set_regs(vcpu, &vcpu->run->s.regs.regs);
+ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
+ }
+ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
+ if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+ return -EINVAL;
+ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
+ }
+ if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
+ if (kvm_vcpu_ioctl_x86_set_vcpu_events(
+ vcpu, &vcpu->run->s.regs.events))
+ return -EINVAL;
+ vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
+ }
+
+ return 0;
+}
+
static void fx_init(struct kvm_vcpu *vcpu)
{
fpstate_init(&vcpu->arch.guest_fpu.state);
@@ -8017,6 +8337,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
{
+ kvm_lapic_reset(vcpu, init_event);
+
vcpu->arch.hflags = 0;
vcpu->arch.smi_pending = 0;
@@ -8360,7 +8682,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
- mutex_init(&kvm->arch.hyperv.hv_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
@@ -8369,6 +8690,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
+ kvm_hv_init_vm(kvm);
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
@@ -8460,10 +8782,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
return r;
}
- if (!size) {
- r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
- WARN_ON(r < 0);
- }
+ if (!size)
+ vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
return 0;
}
@@ -8501,6 +8821,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
+ kvm_hv_destroy_vm(kvm);
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b91215d..7d35ce6 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,12 +2,48 @@
#ifndef ARCH_X86_KVM_X86_H
#define ARCH_X86_KVM_X86_H
-#include <asm/processor.h>
-#include <asm/mwait.h>
#include <linux/kvm_host.h>
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
+#define KVM_DEFAULT_PLE_GAP 128
+#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_DEFAULT_PLE_WINDOW_GROW 2
+#define KVM_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX UINT_MAX
+#define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX
+#define KVM_SVM_DEFAULT_PLE_WINDOW 3000
+
+static inline unsigned int __grow_ple_window(unsigned int val,
+ unsigned int base, unsigned int modifier, unsigned int max)
+{
+ u64 ret = val;
+
+ if (modifier < 1)
+ return base;
+
+ if (modifier < base)
+ ret *= modifier;
+ else
+ ret += modifier;
+
+ return min(ret, (u64)max);
+}
+
+static inline unsigned int __shrink_ple_window(unsigned int val,
+ unsigned int base, unsigned int modifier, unsigned int min)
+{
+ if (modifier < 1)
+ return base;
+
+ if (modifier < base)
+ val /= modifier;
+ else
+ val -= modifier;
+
+ return max(val, min);
+}
+
#define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
@@ -19,19 +55,19 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
bool soft)
{
- vcpu->arch.interrupt.pending = true;
+ vcpu->arch.interrupt.injected = true;
vcpu->arch.interrupt.soft = soft;
vcpu->arch.interrupt.nr = vector;
}
static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
{
- vcpu->arch.interrupt.pending = false;
+ vcpu->arch.interrupt.injected = false;
}
static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending ||
+ return vcpu->arch.exception.injected || vcpu->arch.interrupt.injected ||
vcpu->arch.nmi_injected;
}
@@ -205,8 +241,6 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
return !(kvm->arch.disabled_quirks & quirk);
}
-void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
-void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_set_pending_timer(struct kvm_vcpu *vcpu);
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
@@ -221,6 +255,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception);
+int handle_ud(struct kvm_vcpu *vcpu);
+
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
@@ -242,6 +278,8 @@ extern unsigned int min_timer_period_us;
extern unsigned int lapic_timer_advance_ns;
+extern bool enable_vmware_backdoor;
+
extern struct static_key kvm_no_apic_vcpu;
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
@@ -264,10 +302,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
__rem; \
})
-static inline bool kvm_mwait_in_guest(void)
+#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HTL (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
+#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
+ KVM_X86_DISABLE_EXITS_HTL | \
+ KVM_X86_DISABLE_EXITS_PAUSE)
+
+static inline bool kvm_mwait_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.mwait_in_guest;
+}
+
+static inline bool kvm_hlt_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.hlt_in_guest;
+}
+
+static inline bool kvm_pause_in_guest(struct kvm *kvm)
+{
+ return kvm->arch.pause_in_guest;
+}
+
+DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
+
+static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)
+{
+ __this_cpu_write(current_vcpu, vcpu);
+}
+
+static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
{
- return boot_cpu_has(X86_FEATURE_MWAIT) &&
- !boot_cpu_has_bug(X86_BUG_MONITOR);
+ __this_cpu_write(current_vcpu, NULL);
}
#endif
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700..25a972c 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RETPOLINE) += retpoline.o
-OBJECT_FILES_NON_STANDARD_retpoline.o :=y
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index 81b1635..88acd34 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,6 +1,4 @@
#include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
#include <asm/export.h>
/*
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index 693cce0..fee8b9c 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -2,6 +2,7 @@
#include <linux/export.h>
#include <linux/preempt.h>
#include <linux/smp.h>
+#include <linux/completion.h>
#include <asm/msr.h>
static void __rdmsr_on_cpu(void *info)
@@ -143,13 +144,19 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
}
EXPORT_SYMBOL(wrmsr_on_cpus);
+struct msr_info_completion {
+ struct msr_info msr;
+ struct completion done;
+};
+
/* These "safe" variants are slower and should be used when the target MSR
may not actually exist. */
static void __rdmsr_safe_on_cpu(void *info)
{
- struct msr_info *rv = info;
+ struct msr_info_completion *rv = info;
- rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
+ rv->msr.err = rdmsr_safe(rv->msr.msr_no, &rv->msr.reg.l, &rv->msr.reg.h);
+ complete(&rv->done);
}
static void __wrmsr_safe_on_cpu(void *info)
@@ -161,17 +168,26 @@ static void __wrmsr_safe_on_cpu(void *info)
int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
{
+ struct msr_info_completion rv;
+ call_single_data_t csd = {
+ .func = __rdmsr_safe_on_cpu,
+ .info = &rv,
+ };
int err;
- struct msr_info rv;
memset(&rv, 0, sizeof(rv));
+ init_completion(&rv.done);
+ rv.msr.msr_no = msr_no;
- rv.msr_no = msr_no;
- err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
- *l = rv.reg.l;
- *h = rv.reg.h;
+ err = smp_call_function_single_async(cpu, &csd);
+ if (!err) {
+ wait_for_completion(&rv.done);
+ err = rv.msr.err;
+ }
+ *l = rv.msr.reg.l;
+ *h = rv.msr.reg.h;
- return err ? err : rv.err;
+ return err;
}
EXPORT_SYMBOL(rdmsr_safe_on_cpu);
@@ -209,16 +225,13 @@ EXPORT_SYMBOL(wrmsrl_safe_on_cpu);
int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q)
{
+ u32 low, high;
int err;
- struct msr_info rv;
- memset(&rv, 0, sizeof(rv));
+ err = rdmsr_safe_on_cpu(cpu, msr_no, &low, &high);
+ *q = (u64)high << 32 | low;
- rv.msr_no = msr_no;
- err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
- *q = rv.reg.q;
-
- return err ? err : rv.err;
+ return err;
}
EXPORT_SYMBOL(rdmsrl_safe_on_cpu);
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3..c909961 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
-#include <asm/bitsperlong.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
GENERATE_THUNK(r14)
GENERATE_THUNK(r15)
#endif
-
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version - two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-.macro STUFF_RSB nr:req sp:req
- mov $(\nr / 2), %_ASM_BX
- .align 16
-771:
- call 772f
-773: /* speculation trap */
- pause
- lfence
- jmp 773b
- .align 16
-772:
- call 774f
-775: /* speculation trap */
- pause
- lfence
- jmp 775b
- .align 16
-774:
- dec %_ASM_BX
- jnz 771b
- add $((BITS_PER_LONG/8) * \nr), \sp
-.endm
-
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
-
-ENTRY(__fill_rsb)
- STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
- ret
-END(__fill_rsb)
-EXPORT_SYMBOL_GPL(__fill_rsb)
-
-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
-
-ENTRY(__clear_rsb)
- STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
- ret
-END(__clear_rsb)
-EXPORT_SYMBOL_GPL(__clear_rsb)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 27e9e90..4b101dd 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,12 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
-# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c
-KCOV_INSTRUMENT_tlb.o := n
-KCOV_INSTRUMENT_mem_encrypt.o := n
+# Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_mem_encrypt.o := n
+KCOV_INSTRUMENT_mem_encrypt_identity.o := n
-KASAN_SANITIZE_mem_encrypt.o := n
+KASAN_SANITIZE_mem_encrypt.o := n
+KASAN_SANITIZE_mem_encrypt_identity.o := n
ifdef CONFIG_FUNCTION_TRACER
-CFLAGS_REMOVE_mem_encrypt.o = -pg
+CFLAGS_REMOVE_mem_encrypt.o = -pg
+CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
@@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp)
CFLAGS_setup_nx.o := $(nostackp)
+CFLAGS_mem_encrypt_identity.o := $(nostackp)
CFLAGS_fault.o := -I$(src)/../include/asm/trace
@@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index b9283cc..476d810 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void)
for_each_possible_cpu(cpu)
setup_cpu_entry_area(cpu);
+
+ /*
+ * This is the last essential update to swapper_pgdir which needs
+ * to be synchronized to initial_page_table on 32bit.
+ */
+ sync_initial_page_table();
}
diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c
index 421f266..225fe2f 100644
--- a/arch/x86/mm/debug_pagetables.c
+++ b/arch/x86/mm/debug_pagetables.c
@@ -1,4 +1,5 @@
#include <linux/debugfs.h>
+#include <linux/efi.h>
#include <linux/module.h>
#include <linux/seq_file.h>
#include <asm/pgtable.h>
@@ -72,6 +73,30 @@ static const struct file_operations ptdump_curusr_fops = {
};
#endif
+#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
+static struct dentry *pe_efi;
+
+static int ptdump_show_efi(struct seq_file *m, void *v)
+{
+ if (efi_mm.pgd)
+ ptdump_walk_pgd_level_debugfs(m, efi_mm.pgd, false);
+ return 0;
+}
+
+static int ptdump_open_efi(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, ptdump_show_efi, NULL);
+}
+
+static const struct file_operations ptdump_efi_fops = {
+ .owner = THIS_MODULE,
+ .open = ptdump_open_efi,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
static struct dentry *dir, *pe_knl, *pe_curknl;
static int __init pt_dump_debug_init(void)
@@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void)
if (!pe_curusr)
goto err;
#endif
+
+#if defined(CONFIG_EFI) && defined(CONFIG_X86_64)
+ pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops);
+ if (!pe_efi)
+ goto err;
+#endif
+
return 0;
err:
debugfs_remove_recursive(dir);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 2a4849e..62a7e9f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -29,6 +29,7 @@
struct pg_state {
int level;
pgprot_t current_prot;
+ pgprotval_t effective_prot;
unsigned long start_address;
unsigned long current_address;
const struct addr_marker *marker;
@@ -85,11 +86,15 @@ static struct addr_marker address_markers[] = {
[VMALLOC_START_NR] = { 0UL, "vmalloc() Area" },
[VMEMMAP_START_NR] = { 0UL, "Vmemmap" },
#ifdef CONFIG_KASAN
- [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
- [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
+ /*
+ * These fields get initialized with the (dynamic)
+ * KASAN_SHADOW_{START,END} values in pt_dump_init().
+ */
+ [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" },
+ [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" },
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
- [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
+ [LDT_NR] = { 0UL, "LDT remap" },
#endif
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
#ifdef CONFIG_X86_ESPFIX64
@@ -231,9 +236,9 @@ static unsigned long normalize_addr(unsigned long u)
* print what we collected so far.
*/
static void note_page(struct seq_file *m, struct pg_state *st,
- pgprot_t new_prot, int level)
+ pgprot_t new_prot, pgprotval_t new_eff, int level)
{
- pgprotval_t prot, cur;
+ pgprotval_t prot, cur, eff;
static const char units[] = "BKMGTPE";
/*
@@ -243,23 +248,24 @@ static void note_page(struct seq_file *m, struct pg_state *st,
*/
prot = pgprot_val(new_prot);
cur = pgprot_val(st->current_prot);
+ eff = st->effective_prot;
if (!st->level) {
/* First entry */
st->current_prot = new_prot;
+ st->effective_prot = new_eff;
st->level = level;
st->marker = address_markers;
st->lines = 0;
pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n",
st->marker->name);
- } else if (prot != cur || level != st->level ||
+ } else if (prot != cur || new_eff != eff || level != st->level ||
st->current_address >= st->marker[1].start_address) {
const char *unit = units;
unsigned long delta;
int width = sizeof(unsigned long) * 2;
- pgprotval_t pr = pgprot_val(st->current_prot);
- if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) {
+ if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) {
WARN_ONCE(1,
"x86/mm: Found insecure W+X mapping at address %p/%pS\n",
(void *)st->start_address,
@@ -313,21 +319,30 @@ static void note_page(struct seq_file *m, struct pg_state *st,
st->start_address = st->current_address;
st->current_prot = new_prot;
+ st->effective_prot = new_eff;
st->level = level;
}
}
-static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P)
+static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2)
+{
+ return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) |
+ ((prot1 | prot2) & _PAGE_NX);
+}
+
+static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
+ pgprotval_t eff_in, unsigned long P)
{
int i;
pte_t *start;
- pgprotval_t prot;
+ pgprotval_t prot, eff;
start = (pte_t *)pmd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PTE; i++) {
prot = pte_flags(*start);
+ eff = effective_prot(eff_in, prot);
st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
- note_page(m, st, __pgprot(prot), 5);
+ note_page(m, st, __pgprot(prot), eff, 5);
start++;
}
}
@@ -344,12 +359,10 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
void *pt)
{
if (__pa(pt) == __pa(kasan_zero_pmd) ||
-#ifdef CONFIG_X86_5LEVEL
- __pa(pt) == __pa(kasan_zero_p4d) ||
-#endif
+ (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
__pa(pt) == __pa(kasan_zero_pud)) {
pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
- note_page(m, st, __pgprot(prot), 5);
+ note_page(m, st, __pgprot(prot), 0, 5);
return true;
}
return false;
@@ -364,42 +377,45 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
#if PTRS_PER_PMD > 1
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
+ pgprotval_t eff_in, unsigned long P)
{
int i;
pmd_t *start, *pmd_start;
- pgprotval_t prot;
+ pgprotval_t prot, eff;
pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
+ prot = pmd_flags(*start);
+ eff = effective_prot(eff_in, prot);
if (pmd_large(*start) || !pmd_present(*start)) {
- prot = pmd_flags(*start);
- note_page(m, st, __pgprot(prot), 4);
+ note_page(m, st, __pgprot(prot), eff, 4);
} else if (!kasan_page_table(m, st, pmd_start)) {
- walk_pte_level(m, st, *start,
+ walk_pte_level(m, st, *start, eff,
P + i * PMD_LEVEL_MULT);
}
} else
- note_page(m, st, __pgprot(0), 4);
+ note_page(m, st, __pgprot(0), 0, 4);
start++;
}
}
#else
-#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
+#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p)
#define pud_large(a) pmd_large(__pmd(pud_val(a)))
#define pud_none(a) pmd_none(__pmd(pud_val(a)))
#endif
#if PTRS_PER_PUD > 1
-static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
+static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
+ pgprotval_t eff_in, unsigned long P)
{
int i;
pud_t *start, *pud_start;
- pgprotval_t prot;
+ pgprotval_t prot, eff;
pud_t *prev_pud = NULL;
pud_start = start = (pud_t *)p4d_page_vaddr(addr);
@@ -407,15 +423,16 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start)) {
+ prot = pud_flags(*start);
+ eff = effective_prot(eff_in, prot);
if (pud_large(*start) || !pud_present(*start)) {
- prot = pud_flags(*start);
- note_page(m, st, __pgprot(prot), 3);
+ note_page(m, st, __pgprot(prot), eff, 3);
} else if (!kasan_page_table(m, st, pud_start)) {
- walk_pmd_level(m, st, *start,
+ walk_pmd_level(m, st, *start, eff,
P + i * PUD_LEVEL_MULT);
}
} else
- note_page(m, st, __pgprot(0), 3);
+ note_page(m, st, __pgprot(0), 0, 3);
prev_pud = start;
start++;
@@ -423,43 +440,43 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
}
#else
-#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p)
+#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p)
#define p4d_large(a) pud_large(__pud(p4d_val(a)))
#define p4d_none(a) pud_none(__pud(p4d_val(a)))
#endif
-#if PTRS_PER_P4D > 1
-
-static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
+static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
+ pgprotval_t eff_in, unsigned long P)
{
int i;
p4d_t *start, *p4d_start;
- pgprotval_t prot;
+ pgprotval_t prot, eff;
+
+ if (PTRS_PER_P4D == 1)
+ return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P);
p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);
for (i = 0; i < PTRS_PER_P4D; i++) {
st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
if (!p4d_none(*start)) {
+ prot = p4d_flags(*start);
+ eff = effective_prot(eff_in, prot);
if (p4d_large(*start) || !p4d_present(*start)) {
- prot = p4d_flags(*start);
- note_page(m, st, __pgprot(prot), 2);
+ note_page(m, st, __pgprot(prot), eff, 2);
} else if (!kasan_page_table(m, st, p4d_start)) {
- walk_pud_level(m, st, *start,
+ walk_pud_level(m, st, *start, eff,
P + i * P4D_LEVEL_MULT);
}
} else
- note_page(m, st, __pgprot(0), 2);
+ note_page(m, st, __pgprot(0), 0, 2);
start++;
}
}
-#else
-#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p)
-#define pgd_large(a) p4d_large(__p4d(pgd_val(a)))
-#define pgd_none(a) p4d_none(__p4d(pgd_val(a)))
-#endif
+#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
+#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
static inline bool is_hypervisor_range(int idx)
{
@@ -483,7 +500,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
#else
pgd_t *start = swapper_pg_dir;
#endif
- pgprotval_t prot;
+ pgprotval_t prot, eff;
int i;
struct pg_state st = {};
@@ -499,15 +516,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
for (i = 0; i < PTRS_PER_PGD; i++) {
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
if (!pgd_none(*start) && !is_hypervisor_range(i)) {
+ prot = pgd_flags(*start);
+#ifdef CONFIG_X86_PAE
+ eff = _PAGE_USER | _PAGE_RW;
+#else
+ eff = prot;
+#endif
if (pgd_large(*start) || !pgd_present(*start)) {
- prot = pgd_flags(*start);
- note_page(m, &st, __pgprot(prot), 1);
+ note_page(m, &st, __pgprot(prot), eff, 1);
} else {
- walk_p4d_level(m, &st, *start,
+ walk_p4d_level(m, &st, *start, eff,
i * PGD_LEVEL_MULT);
}
} else
- note_page(m, &st, __pgprot(0), 1);
+ note_page(m, &st, __pgprot(0), 0, 1);
cond_resched();
start++;
@@ -515,7 +537,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
/* Flush out the last page */
st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
- note_page(m, &st, __pgprot(0), 0);
+ note_page(m, &st, __pgprot(0), 0, 0);
if (!checkwx)
return;
if (st.wx_pages)
@@ -570,6 +592,13 @@ static int __init pt_dump_init(void)
address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+#ifdef CONFIG_MODIFY_LDT_SYSCALL
+ address_markers[LDT_NR].start_address = LDT_BASE_ADDR;
+#endif
+#ifdef CONFIG_KASAN
+ address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
+ address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
+#endif
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 800de81..73bd8c9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (!pmd_k)
return -1;
- if (pmd_huge(*pmd_k))
+ if (pmd_large(*pmd_k))
return 0;
pte_k = pte_offset_kernel(pmd_k, address);
@@ -417,11 +417,11 @@ void vmalloc_sync_all(void)
*/
static noinline int vmalloc_fault(unsigned long address)
{
- pgd_t *pgd, *pgd_ref;
- p4d_t *p4d, *p4d_ref;
- pud_t *pud, *pud_ref;
- pmd_t *pmd, *pmd_ref;
- pte_t *pte, *pte_ref;
+ pgd_t *pgd, *pgd_k;
+ p4d_t *p4d, *p4d_k;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
/* Make sure we are in vmalloc area: */
if (!(address >= VMALLOC_START && address < VMALLOC_END))
@@ -435,73 +435,51 @@ static noinline int vmalloc_fault(unsigned long address)
* case just flush:
*/
pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address);
- pgd_ref = pgd_offset_k(address);
- if (pgd_none(*pgd_ref))
+ pgd_k = pgd_offset_k(address);
+ if (pgd_none(*pgd_k))
return -1;
- if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (pgtable_l5_enabled) {
if (pgd_none(*pgd)) {
- set_pgd(pgd, *pgd_ref);
+ set_pgd(pgd, *pgd_k);
arch_flush_lazy_mmu_mode();
} else {
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k));
}
}
/* With 4-level paging, copying happens on the p4d level. */
p4d = p4d_offset(pgd, address);
- p4d_ref = p4d_offset(pgd_ref, address);
- if (p4d_none(*p4d_ref))
+ p4d_k = p4d_offset(pgd_k, address);
+ if (p4d_none(*p4d_k))
return -1;
- if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) {
- set_p4d(p4d, *p4d_ref);
+ if (p4d_none(*p4d) && !pgtable_l5_enabled) {
+ set_p4d(p4d, *p4d_k);
arch_flush_lazy_mmu_mode();
} else {
- BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref));
+ BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k));
}
- /*
- * Below here mismatches are bugs because these lower tables
- * are shared:
- */
BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4);
pud = pud_offset(p4d, address);
- pud_ref = pud_offset(p4d_ref, address);
- if (pud_none(*pud_ref))
+ if (pud_none(*pud))
return -1;
- if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref))
- BUG();
-
- if (pud_huge(*pud))
+ if (pud_large(*pud))
return 0;
pmd = pmd_offset(pud, address);
- pmd_ref = pmd_offset(pud_ref, address);
- if (pmd_none(*pmd_ref))
+ if (pmd_none(*pmd))
return -1;
- if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref))
- BUG();
-
- if (pmd_huge(*pmd))
+ if (pmd_large(*pmd))
return 0;
- pte_ref = pte_offset_kernel(pmd_ref, address);
- if (!pte_present(*pte_ref))
- return -1;
-
pte = pte_offset_kernel(pmd, address);
-
- /*
- * Don't use pte_page here, because the mappings can point
- * outside mem_map, and the NUMA hash lookup cannot handle
- * that:
- */
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
- BUG();
+ if (!pte_present(*pte))
+ return -1;
return 0;
}
@@ -699,7 +677,6 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT "paging request");
printk(KERN_CONT " at %px\n", (void *) address);
- printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
dump_pagetable(address);
}
@@ -1248,10 +1225,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
tsk = current;
mm = tsk->mm;
- /*
- * Detect and handle instructions that would cause a page fault for
- * both a tracked kernel page and a userspace page.
- */
prefetchw(&mm->mmap_sem);
if (unlikely(kmmio_fault(regs, address)))
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index ab33a32..9aa22be 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (pgtable_l5_enabled) {
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
} else {
/*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 79cb066..8008db2 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
}
#endif /* CONFIG_HIGHMEM */
+void __init sync_initial_page_table(void)
+{
+ clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+
+ /*
+ * sync back low identity map too. It is used for example
+ * in the 32-bit EFI stub.
+ */
+ clone_pgd_range(initial_page_table,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+}
+
void __init native_pagetable_init(void)
{
unsigned long pfn, va;
@@ -763,6 +778,7 @@ void __init mem_init(void)
free_all_bootmem();
after_bootmem = 1;
+ x86_init.hyper.init_after_bootmem();
mem_init_print_info(NULL);
printk(KERN_INFO "virtual kernel memory layout:\n"
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 8b72923..66de40e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str)
}
__setup("noexec32=", nonx32_setup);
-/*
- * When memory was added make sure all the processes MM have
- * suitable PGD entries in the local PGD level page.
- */
-#ifdef CONFIG_X86_5LEVEL
-void sync_global_pgds(unsigned long start, unsigned long end)
+static void sync_global_pgds_l5(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(&pgd_lock);
}
}
-#else
-void sync_global_pgds(unsigned long start, unsigned long end)
+
+static void sync_global_pgds_l4(unsigned long start, unsigned long end)
{
unsigned long addr;
@@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end)
* With folded p4d, pgd_none() is always false, we need to
* handle synchonization on p4d level.
*/
- BUILD_BUG_ON(pgd_none(*pgd_ref));
+ MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
p4d_ref = p4d_offset(pgd_ref, addr);
if (p4d_none(*p4d_ref))
@@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end)
spin_unlock(&pgd_lock);
}
}
-#endif
+
+/*
+ * When memory was added make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+ if (pgtable_l5_enabled)
+ sync_global_pgds_l5(start, end);
+ else
+ sync_global_pgds_l4(start, end);
+}
/*
* NOTE: This function is marked __ref because it calls __init function
@@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (!pgtable_l5_enabled)
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
@@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
page_size_mask);
spin_lock(&init_mm.page_table_lock);
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (pgtable_l5_enabled)
pgd_populate(&init_mm, pgd, p4d);
else
p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
@@ -800,17 +806,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
#define PAGE_INUSE 0xFD
-static void __meminit free_pagetable(struct page *page, int order,
- struct vmem_altmap *altmap)
+static void __meminit free_pagetable(struct page *page, int order)
{
unsigned long magic;
unsigned int nr_pages = 1 << order;
- if (altmap) {
- vmem_altmap_free(altmap, nr_pages);
- return;
- }
-
/* bootmem page has reserved flag */
if (PageReserved(page)) {
__ClearPageReserved(page);
@@ -826,9 +826,17 @@ static void __meminit free_pagetable(struct page *page, int order,
free_pages((unsigned long)page_address(page), order);
}
-static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
+static void __meminit free_hugepage_table(struct page *page,
struct vmem_altmap *altmap)
{
+ if (altmap)
+ vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE);
+ else
+ free_pagetable(page, get_order(PMD_SIZE));
+}
+
+static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
pte_t *pte;
int i;
@@ -839,14 +847,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
}
/* free a pte talbe */
- free_pagetable(pmd_page(*pmd), 0, altmap);
+ free_pagetable(pmd_page(*pmd), 0);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
spin_unlock(&init_mm.page_table_lock);
}
-static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
- struct vmem_altmap *altmap)
+static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
{
pmd_t *pmd;
int i;
@@ -858,14 +865,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
}
/* free a pmd talbe */
- free_pagetable(pud_page(*pud), 0, altmap);
+ free_pagetable(pud_page(*pud), 0);
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
spin_unlock(&init_mm.page_table_lock);
}
-static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
- struct vmem_altmap *altmap)
+static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
{
pud_t *pud;
int i;
@@ -877,7 +883,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
}
/* free a pud talbe */
- free_pagetable(p4d_page(*p4d), 0, altmap);
+ free_pagetable(p4d_page(*p4d), 0);
spin_lock(&init_mm.page_table_lock);
p4d_clear(p4d);
spin_unlock(&init_mm.page_table_lock);
@@ -885,7 +891,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
static void __meminit
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
- struct vmem_altmap *altmap, bool direct)
+ bool direct)
{
unsigned long next, pages = 0;
pte_t *pte;
@@ -916,7 +922,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
* freed when offlining, or simplely not in use.
*/
if (!direct)
- free_pagetable(pte_page(*pte), 0, altmap);
+ free_pagetable(pte_page(*pte), 0);
spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte);
@@ -939,7 +945,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
page_addr = page_address(pte_page(*pte));
if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
- free_pagetable(pte_page(*pte), 0, altmap);
+ free_pagetable(pte_page(*pte), 0);
spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte);
@@ -974,9 +980,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE)) {
if (!direct)
- free_pagetable(pmd_page(*pmd),
- get_order(PMD_SIZE),
- altmap);
+ free_hugepage_table(pmd_page(*pmd),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -989,9 +994,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
page_addr = page_address(pmd_page(*pmd));
if (!memchr_inv(page_addr, PAGE_INUSE,
PMD_SIZE)) {
- free_pagetable(pmd_page(*pmd),
- get_order(PMD_SIZE),
- altmap);
+ free_hugepage_table(pmd_page(*pmd),
+ altmap);
spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd);
@@ -1003,8 +1007,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
}
pte_base = (pte_t *)pmd_page_vaddr(*pmd);
- remove_pte_table(pte_base, addr, next, altmap, direct);
- free_pte_table(pte_base, pmd, altmap);
+ remove_pte_table(pte_base, addr, next, direct);
+ free_pte_table(pte_base, pmd);
}
/* Call free_pmd_table() in remove_pud_table(). */
@@ -1033,8 +1037,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PUD_SIZE)) {
if (!direct)
free_pagetable(pud_page(*pud),
- get_order(PUD_SIZE),
- altmap);
+ get_order(PUD_SIZE));
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1048,8 +1051,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
if (!memchr_inv(page_addr, PAGE_INUSE,
PUD_SIZE)) {
free_pagetable(pud_page(*pud),
- get_order(PUD_SIZE),
- altmap);
+ get_order(PUD_SIZE));
spin_lock(&init_mm.page_table_lock);
pud_clear(pud);
@@ -1062,7 +1064,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
pmd_base = pmd_offset(pud, 0);
remove_pmd_table(pmd_base, addr, next, direct, altmap);
- free_pmd_table(pmd_base, pud, altmap);
+ free_pmd_table(pmd_base, pud);
}
if (direct)
@@ -1093,8 +1095,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
* 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
- if (CONFIG_PGTABLE_LEVELS == 5)
- free_pud_table(pud_base, p4d, altmap);
+ if (pgtable_l5_enabled)
+ free_pud_table(pud_base, p4d);
}
if (direct)
@@ -1183,6 +1185,7 @@ void __init mem_init(void)
/* this will put all memory onto the freelists */
free_all_bootmem();
after_bootmem = 1;
+ x86_init.hyper.init_after_bootmem();
/*
* Must be done after boot memory is put on freelist, because here we
@@ -1326,14 +1329,39 @@ int kern_addr_valid(unsigned long addr)
return pfn_valid(pte_pfn(*pte));
}
+/*
+ * Block size is the minimum amount of memory which can be hotplugged or
+ * hotremoved. It must be power of two and must be equal or larger than
+ * MIN_MEMORY_BLOCK_SIZE.
+ */
+#define MAX_BLOCK_SIZE (2UL << 30)
+
+/* Amount of ram needed to start using large blocks */
+#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30)
+
static unsigned long probe_memory_block_size(void)
{
- unsigned long bz = MIN_MEMORY_BLOCK_SIZE;
+ unsigned long boot_mem_end = max_pfn << PAGE_SHIFT;
+ unsigned long bz;
- /* if system is UV or has 64GB of RAM or more, use large blocks */
- if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30)))
- bz = 2UL << 30; /* 2GB */
+ /* If this is UV system, always set 2G block size */
+ if (is_uv_system()) {
+ bz = MAX_BLOCK_SIZE;
+ goto done;
+ }
+ /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */
+ if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) {
+ bz = MIN_MEMORY_BLOCK_SIZE;
+ goto done;
+ }
+
+ /* Find the largest allowed block size that aligns to memory end */
+ for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
+ if (IS_ALIGNED(boot_mem_end, bz))
+ break;
+ }
+done:
pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20);
return bz;
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index af6f2f9..d8ff013 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,6 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt
+
+#ifdef CONFIG_X86_5LEVEL
+/* Too early to use cpu_feature_enabled() */
+#define pgtable_l5_enabled __pgtable_l5_enabled
+#endif
+
#include <linux/bootmem.h>
#include <linux/kasan.h>
#include <linux/kdebug.h>
@@ -19,7 +25,7 @@
extern struct range pfn_mapped[E820_MAX_ENTRIES];
-static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static __init void *early_alloc(size_t size, int nid, bool panic)
{
@@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start,
* With folded p4d, pgd_clear() is nop, use p4d_clear()
* instead.
*/
- if (CONFIG_PGTABLE_LEVELS < 5)
- p4d_clear(p4d_offset(pgd, start));
- else
+ if (pgtable_l5_enabled)
pgd_clear(pgd);
+ else
+ p4d_clear(p4d_offset(pgd, start));
}
pgd = pgd_offset_k(start);
@@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
{
unsigned long p4d;
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (!pgtable_l5_enabled)
return (p4d_t *)pgd;
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
@@ -272,7 +278,7 @@ void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val);
- for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
+ for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt);
@@ -303,7 +309,7 @@ void __init kasan_init(void)
* bunch of things like kernel code, modules, EFI mapping, etc.
* We need to take extra steps to not overwrite them.
*/
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (pgtable_l5_enabled) {
void *ptr;
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index aedebd2..615cc03 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -34,23 +34,12 @@
#define TB_SHIFT 40
/*
- * Virtual address start and end range for randomization.
- *
* The end address could depend on more configuration options to make the
* highest amount of space for randomization available, but that's too hard
* to keep straight and caused issues already.
*/
-static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
-/* Default values */
-unsigned long page_offset_base = __PAGE_OFFSET_BASE;
-EXPORT_SYMBOL(page_offset_base);
-unsigned long vmalloc_base = __VMALLOC_BASE;
-EXPORT_SYMBOL(vmalloc_base);
-unsigned long vmemmap_base = __VMEMMAP_BASE;
-EXPORT_SYMBOL(vmemmap_base);
-
/*
* Memory regions randomized by KASLR (except modules that use a separate logic
* earlier during boot). The list is ordered based on virtual addresses. This
@@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region {
unsigned long *base;
unsigned long size_tb;
} kaslr_regions[] = {
- { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ },
- { &vmalloc_base, VMALLOC_SIZE_TB },
+ { &page_offset_base, 0 },
+ { &vmalloc_base, 0 },
{ &vmemmap_base, 1 },
};
@@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void)
void __init kernel_randomize_memory(void)
{
size_t i;
- unsigned long vaddr = vaddr_start;
+ unsigned long vaddr_start, vaddr;
unsigned long rand, memory_tb;
struct rnd_state rand_state;
unsigned long remain_entropy;
+ vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
+ vaddr = vaddr_start;
+
/*
* These BUILD_BUG_ON checks ensure the memory layout is consistent
* with the vaddr_start/vaddr_end variables. These checks are very
@@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void)
if (!kaslr_memory_enabled())
return;
+ kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT);
+ kaslr_regions[1].size_tb = VMALLOC_SIZE_TB;
+
/*
* Update Physical memory mapping to available and
* add padding if needed (especially for memory hotplug support).
@@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (pgtable_l5_enabled)
entropy = (rand % (entropy + 1)) & P4D_MASK;
else
entropy = (rand % (entropy + 1)) & PUD_MASK;
@@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (pgtable_l5_enabled)
vaddr = round_up(vaddr + 1, P4D_SIZE);
else
vaddr = round_up(vaddr + 1, PUD_SIZE);
@@ -217,7 +212,7 @@ void __meminit init_trampoline(void)
return;
}
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (pgtable_l5_enabled)
init_trampoline_p4d();
else
init_trampoline_pud();
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 1a53071..b2de398 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -25,17 +25,12 @@
#include <asm/bootparam.h>
#include <asm/set_memory.h>
#include <asm/cacheflush.h>
-#include <asm/sections.h>
#include <asm/processor-flags.h>
#include <asm/msr.h>
#include <asm/cmdline.h>
#include "mm_internal.h"
-static char sme_cmdline_arg[] __initdata = "mem_encrypt";
-static char sme_cmdline_on[] __initdata = "on";
-static char sme_cmdline_off[] __initdata = "off";
-
/*
* Since SME related variables are set early in the boot process they must
* reside in the .data section so as not to be zeroed out when the .bss
@@ -46,7 +41,7 @@ EXPORT_SYMBOL(sme_me_mask);
DEFINE_STATIC_KEY_FALSE(sev_enable_key);
EXPORT_SYMBOL_GPL(sev_enable_key);
-static bool sev_enabled __section(.data);
+bool sev_enabled __section(.data);
/* Buffer used for early in-place encryption by BSP, no locking needed */
static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
@@ -200,67 +195,6 @@ void __init sme_early_init(void)
swiotlb_force = SWIOTLB_FORCE;
}
-static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- unsigned long dma_mask;
- unsigned int order;
- struct page *page;
- void *vaddr = NULL;
-
- dma_mask = dma_alloc_coherent_mask(dev, gfp);
- order = get_order(size);
-
- /*
- * Memory will be memset to zero after marking decrypted, so don't
- * bother clearing it before.
- */
- gfp &= ~__GFP_ZERO;
-
- page = alloc_pages_node(dev_to_node(dev), gfp, order);
- if (page) {
- dma_addr_t addr;
-
- /*
- * Since we will be clearing the encryption bit, check the
- * mask with it already cleared.
- */
- addr = __sme_clr(phys_to_dma(dev, page_to_phys(page)));
- if ((addr + size) > dma_mask) {
- __free_pages(page, get_order(size));
- } else {
- vaddr = page_address(page);
- *dma_handle = addr;
- }
- }
-
- if (!vaddr)
- vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
-
- if (!vaddr)
- return NULL;
-
- /* Clear the SME encryption bit for DMA use if not swiotlb area */
- if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) {
- set_memory_decrypted((unsigned long)vaddr, 1 << order);
- memset(vaddr, 0, PAGE_SIZE << order);
- *dma_handle = __sme_clr(*dma_handle);
- }
-
- return vaddr;
-}
-
-static void sev_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_handle, unsigned long attrs)
-{
- /* Set the SME encryption bit for re-use if not swiotlb area */
- if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle)))
- set_memory_encrypted((unsigned long)vaddr,
- 1 << get_order(size));
-
- swiotlb_free_coherent(dev, size, vaddr, dma_handle);
-}
-
static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
{
pgprot_t old_prot, new_prot;
@@ -413,20 +347,6 @@ bool sev_active(void)
}
EXPORT_SYMBOL(sev_active);
-static const struct dma_map_ops sev_dma_ops = {
- .alloc = sev_alloc,
- .free = sev_free,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
-};
-
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void)
{
@@ -437,12 +357,11 @@ void __init mem_encrypt_init(void)
swiotlb_update_mem_attributes();
/*
- * With SEV, DMA operations cannot use encryption. New DMA ops
- * are required in order to mark the DMA areas as decrypted or
- * to use bounce buffers.
+ * With SEV, DMA operations cannot use encryption, we need to use
+ * SWIOTLB to bounce buffer DMA operation.
*/
if (sev_active())
- dma_ops = &sev_dma_ops;
+ dma_ops = &swiotlb_dma_ops;
/*
* With SEV, we need to unroll the rep string I/O instructions.
@@ -455,582 +374,3 @@ void __init mem_encrypt_init(void)
: "Secure Memory Encryption (SME)");
}
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
-{
- WARN(PAGE_ALIGN(size) != size,
- "size is not page-aligned (%#lx)\n", size);
-
- /* Make the SWIOTLB buffer area decrypted */
- set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
-}
-
-struct sme_populate_pgd_data {
- void *pgtable_area;
- pgd_t *pgd;
-
- pmdval_t pmd_flags;
- pteval_t pte_flags;
- unsigned long paddr;
-
- unsigned long vaddr;
- unsigned long vaddr_end;
-};
-
-static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
-{
- unsigned long pgd_start, pgd_end, pgd_size;
- pgd_t *pgd_p;
-
- pgd_start = ppd->vaddr & PGDIR_MASK;
- pgd_end = ppd->vaddr_end & PGDIR_MASK;
-
- pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
-
- pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
-
- memset(pgd_p, 0, pgd_size);
-}
-
-#define PGD_FLAGS _KERNPG_TABLE_NOENC
-#define P4D_FLAGS _KERNPG_TABLE_NOENC
-#define PUD_FLAGS _KERNPG_TABLE_NOENC
-#define PMD_FLAGS _KERNPG_TABLE_NOENC
-
-#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
-
-#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
-#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
- (_PAGE_PAT | _PAGE_PWT))
-
-#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
-
-#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
-
-#define PTE_FLAGS_DEC PTE_FLAGS
-#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
- (_PAGE_PAT | _PAGE_PWT))
-
-#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
-
-static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
-{
- pgd_t *pgd_p;
- p4d_t *p4d_p;
- pud_t *pud_p;
- pmd_t *pmd_p;
-
- pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
- if (native_pgd_val(*pgd_p)) {
- if (IS_ENABLED(CONFIG_X86_5LEVEL))
- p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
- else
- pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
- } else {
- pgd_t pgd;
-
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p = ppd->pgtable_area;
- memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
- ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
-
- pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
- } else {
- pud_p = ppd->pgtable_area;
- memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
-
- pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
- }
- native_set_pgd(pgd_p, pgd);
- }
-
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_p += p4d_index(ppd->vaddr);
- if (native_p4d_val(*p4d_p)) {
- pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
- } else {
- p4d_t p4d;
-
- pud_p = ppd->pgtable_area;
- memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
- ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
-
- p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
- native_set_p4d(p4d_p, p4d);
- }
- }
-
- pud_p += pud_index(ppd->vaddr);
- if (native_pud_val(*pud_p)) {
- if (native_pud_val(*pud_p) & _PAGE_PSE)
- return NULL;
-
- pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
- } else {
- pud_t pud;
-
- pmd_p = ppd->pgtable_area;
- memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
- ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
-
- pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
- native_set_pud(pud_p, pud);
- }
-
- return pmd_p;
-}
-
-static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
-{
- pmd_t *pmd_p;
-
- pmd_p = sme_prepare_pgd(ppd);
- if (!pmd_p)
- return;
-
- pmd_p += pmd_index(ppd->vaddr);
- if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
- native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags));
-}
-
-static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
-{
- pmd_t *pmd_p;
- pte_t *pte_p;
-
- pmd_p = sme_prepare_pgd(ppd);
- if (!pmd_p)
- return;
-
- pmd_p += pmd_index(ppd->vaddr);
- if (native_pmd_val(*pmd_p)) {
- if (native_pmd_val(*pmd_p) & _PAGE_PSE)
- return;
-
- pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK);
- } else {
- pmd_t pmd;
-
- pte_p = ppd->pgtable_area;
- memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE);
- ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE;
-
- pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS);
- native_set_pmd(pmd_p, pmd);
- }
-
- pte_p += pte_index(ppd->vaddr);
- if (!native_pte_val(*pte_p))
- native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags));
-}
-
-static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
-{
- while (ppd->vaddr < ppd->vaddr_end) {
- sme_populate_pgd_large(ppd);
-
- ppd->vaddr += PMD_PAGE_SIZE;
- ppd->paddr += PMD_PAGE_SIZE;
- }
-}
-
-static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
-{
- while (ppd->vaddr < ppd->vaddr_end) {
- sme_populate_pgd(ppd);
-
- ppd->vaddr += PAGE_SIZE;
- ppd->paddr += PAGE_SIZE;
- }
-}
-
-static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
- pmdval_t pmd_flags, pteval_t pte_flags)
-{
- unsigned long vaddr_end;
-
- ppd->pmd_flags = pmd_flags;
- ppd->pte_flags = pte_flags;
-
- /* Save original end value since we modify the struct value */
- vaddr_end = ppd->vaddr_end;
-
- /* If start is not 2MB aligned, create PTE entries */
- ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
- __sme_map_range_pte(ppd);
-
- /* Create PMD entries */
- ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
- __sme_map_range_pmd(ppd);
-
- /* If end is not 2MB aligned, create PTE entries */
- ppd->vaddr_end = vaddr_end;
- __sme_map_range_pte(ppd);
-}
-
-static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
-{
- __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
-}
-
-static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
-{
- __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
-}
-
-static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
-{
- __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
-}
-
-static unsigned long __init sme_pgtable_calc(unsigned long len)
-{
- unsigned long p4d_size, pud_size, pmd_size, pte_size;
- unsigned long total;
-
- /*
- * Perform a relatively simplistic calculation of the pagetable
- * entries that are needed. Those mappings will be covered mostly
- * by 2MB PMD entries so we can conservatively calculate the required
- * number of P4D, PUD and PMD structures needed to perform the
- * mappings. For mappings that are not 2MB aligned, PTE mappings
- * would be needed for the start and end portion of the address range
- * that fall outside of the 2MB alignment. This results in, at most,
- * two extra pages to hold PTE entries for each range that is mapped.
- * Incrementing the count for each covers the case where the addresses
- * cross entries.
- */
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
- p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
- pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
- pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
- } else {
- p4d_size = 0;
- pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
- pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
- }
- pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
- pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
- pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE;
-
- total = p4d_size + pud_size + pmd_size + pte_size;
-
- /*
- * Now calculate the added pagetable structures needed to populate
- * the new pagetables.
- */
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
- p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
- p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
- pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
- pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
- } else {
- p4d_size = 0;
- pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
- pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
- }
- pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
- pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
-
- total += p4d_size + pud_size + pmd_size;
-
- return total;
-}
-
-void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
-{
- unsigned long workarea_start, workarea_end, workarea_len;
- unsigned long execute_start, execute_end, execute_len;
- unsigned long kernel_start, kernel_end, kernel_len;
- unsigned long initrd_start, initrd_end, initrd_len;
- struct sme_populate_pgd_data ppd;
- unsigned long pgtable_area_len;
- unsigned long decrypted_base;
-
- if (!sme_active())
- return;
-
- /*
- * Prepare for encrypting the kernel and initrd by building new
- * pagetables with the necessary attributes needed to encrypt the
- * kernel in place.
- *
- * One range of virtual addresses will map the memory occupied
- * by the kernel and initrd as encrypted.
- *
- * Another range of virtual addresses will map the memory occupied
- * by the kernel and initrd as decrypted and write-protected.
- *
- * The use of write-protect attribute will prevent any of the
- * memory from being cached.
- */
-
- /* Physical addresses gives us the identity mapped virtual addresses */
- kernel_start = __pa_symbol(_text);
- kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
- kernel_len = kernel_end - kernel_start;
-
- initrd_start = 0;
- initrd_end = 0;
- initrd_len = 0;
-#ifdef CONFIG_BLK_DEV_INITRD
- initrd_len = (unsigned long)bp->hdr.ramdisk_size |
- ((unsigned long)bp->ext_ramdisk_size << 32);
- if (initrd_len) {
- initrd_start = (unsigned long)bp->hdr.ramdisk_image |
- ((unsigned long)bp->ext_ramdisk_image << 32);
- initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
- initrd_len = initrd_end - initrd_start;
- }
-#endif
-
- /* Set the encryption workarea to be immediately after the kernel */
- workarea_start = kernel_end;
-
- /*
- * Calculate required number of workarea bytes needed:
- * executable encryption area size:
- * stack page (PAGE_SIZE)
- * encryption routine page (PAGE_SIZE)
- * intermediate copy buffer (PMD_PAGE_SIZE)
- * pagetable structures for the encryption of the kernel
- * pagetable structures for workarea (in case not currently mapped)
- */
- execute_start = workarea_start;
- execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
- execute_len = execute_end - execute_start;
-
- /*
- * One PGD for both encrypted and decrypted mappings and a set of
- * PUDs and PMDs for each of the encrypted and decrypted mappings.
- */
- pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
- pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
- if (initrd_len)
- pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
-
- /* PUDs and PMDs needed in the current pagetables for the workarea */
- pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
-
- /*
- * The total workarea includes the executable encryption area and
- * the pagetable area. The start of the workarea is already 2MB
- * aligned, align the end of the workarea on a 2MB boundary so that
- * we don't try to create/allocate PTE entries from the workarea
- * before it is mapped.
- */
- workarea_len = execute_len + pgtable_area_len;
- workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
-
- /*
- * Set the address to the start of where newly created pagetable
- * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
- * structures are created when the workarea is added to the current
- * pagetables and when the new encrypted and decrypted kernel
- * mappings are populated.
- */
- ppd.pgtable_area = (void *)execute_end;
-
- /*
- * Make sure the current pagetable structure has entries for
- * addressing the workarea.
- */
- ppd.pgd = (pgd_t *)native_read_cr3_pa();
- ppd.paddr = workarea_start;
- ppd.vaddr = workarea_start;
- ppd.vaddr_end = workarea_end;
- sme_map_range_decrypted(&ppd);
-
- /* Flush the TLB - no globals so cr3 is enough */
- native_write_cr3(__native_read_cr3());
-
- /*
- * A new pagetable structure is being built to allow for the kernel
- * and initrd to be encrypted. It starts with an empty PGD that will
- * then be populated with new PUDs and PMDs as the encrypted and
- * decrypted kernel mappings are created.
- */
- ppd.pgd = ppd.pgtable_area;
- memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
- ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
-
- /*
- * A different PGD index/entry must be used to get different
- * pagetable entries for the decrypted mapping. Choose the next
- * PGD index and convert it to a virtual address to be used as
- * the base of the mapping.
- */
- decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
- if (initrd_len) {
- unsigned long check_base;
-
- check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
- decrypted_base = max(decrypted_base, check_base);
- }
- decrypted_base <<= PGDIR_SHIFT;
-
- /* Add encrypted kernel (identity) mappings */
- ppd.paddr = kernel_start;
- ppd.vaddr = kernel_start;
- ppd.vaddr_end = kernel_end;
- sme_map_range_encrypted(&ppd);
-
- /* Add decrypted, write-protected kernel (non-identity) mappings */
- ppd.paddr = kernel_start;
- ppd.vaddr = kernel_start + decrypted_base;
- ppd.vaddr_end = kernel_end + decrypted_base;
- sme_map_range_decrypted_wp(&ppd);
-
- if (initrd_len) {
- /* Add encrypted initrd (identity) mappings */
- ppd.paddr = initrd_start;
- ppd.vaddr = initrd_start;
- ppd.vaddr_end = initrd_end;
- sme_map_range_encrypted(&ppd);
- /*
- * Add decrypted, write-protected initrd (non-identity) mappings
- */
- ppd.paddr = initrd_start;
- ppd.vaddr = initrd_start + decrypted_base;
- ppd.vaddr_end = initrd_end + decrypted_base;
- sme_map_range_decrypted_wp(&ppd);
- }
-
- /* Add decrypted workarea mappings to both kernel mappings */
- ppd.paddr = workarea_start;
- ppd.vaddr = workarea_start;
- ppd.vaddr_end = workarea_end;
- sme_map_range_decrypted(&ppd);
-
- ppd.paddr = workarea_start;
- ppd.vaddr = workarea_start + decrypted_base;
- ppd.vaddr_end = workarea_end + decrypted_base;
- sme_map_range_decrypted(&ppd);
-
- /* Perform the encryption */
- sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
- kernel_len, workarea_start, (unsigned long)ppd.pgd);
-
- if (initrd_len)
- sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
- initrd_len, workarea_start,
- (unsigned long)ppd.pgd);
-
- /*
- * At this point we are running encrypted. Remove the mappings for
- * the decrypted areas - all that is needed for this is to remove
- * the PGD entry/entries.
- */
- ppd.vaddr = kernel_start + decrypted_base;
- ppd.vaddr_end = kernel_end + decrypted_base;
- sme_clear_pgd(&ppd);
-
- if (initrd_len) {
- ppd.vaddr = initrd_start + decrypted_base;
- ppd.vaddr_end = initrd_end + decrypted_base;
- sme_clear_pgd(&ppd);
- }
-
- ppd.vaddr = workarea_start + decrypted_base;
- ppd.vaddr_end = workarea_end + decrypted_base;
- sme_clear_pgd(&ppd);
-
- /* Flush the TLB - no globals so cr3 is enough */
- native_write_cr3(__native_read_cr3());
-}
-
-void __init __nostackprotector sme_enable(struct boot_params *bp)
-{
- const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
- unsigned int eax, ebx, ecx, edx;
- unsigned long feature_mask;
- bool active_by_default;
- unsigned long me_mask;
- char buffer[16];
- u64 msr;
-
- /* Check for the SME/SEV support leaf */
- eax = 0x80000000;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- if (eax < 0x8000001f)
- return;
-
-#define AMD_SME_BIT BIT(0)
-#define AMD_SEV_BIT BIT(1)
- /*
- * Set the feature mask (SME or SEV) based on whether we are
- * running under a hypervisor.
- */
- eax = 1;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
-
- /*
- * Check for the SME/SEV feature:
- * CPUID Fn8000_001F[EAX]
- * - Bit 0 - Secure Memory Encryption support
- * - Bit 1 - Secure Encrypted Virtualization support
- * CPUID Fn8000_001F[EBX]
- * - Bits 5:0 - Pagetable bit position used to indicate encryption
- */
- eax = 0x8000001f;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- if (!(eax & feature_mask))
- return;
-
- me_mask = 1UL << (ebx & 0x3f);
-
- /* Check if memory encryption is enabled */
- if (feature_mask == AMD_SME_BIT) {
- /* For SME, check the SYSCFG MSR */
- msr = __rdmsr(MSR_K8_SYSCFG);
- if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
- return;
- } else {
- /* For SEV, check the SEV MSR */
- msr = __rdmsr(MSR_AMD64_SEV);
- if (!(msr & MSR_AMD64_SEV_ENABLED))
- return;
-
- /* SEV state cannot be controlled by a command line option */
- sme_me_mask = me_mask;
- sev_enabled = true;
- return;
- }
-
- /*
- * Fixups have not been applied to phys_base yet and we're running
- * identity mapped, so we must obtain the address to the SME command
- * line argument data using rip-relative addressing.
- */
- asm ("lea sme_cmdline_arg(%%rip), %0"
- : "=r" (cmdline_arg)
- : "p" (sme_cmdline_arg));
- asm ("lea sme_cmdline_on(%%rip), %0"
- : "=r" (cmdline_on)
- : "p" (sme_cmdline_on));
- asm ("lea sme_cmdline_off(%%rip), %0"
- : "=r" (cmdline_off)
- : "p" (sme_cmdline_off));
-
- if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
- active_by_default = true;
- else
- active_by_default = false;
-
- cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
- ((u64)bp->ext_cmd_line_ptr << 32));
-
- cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
-
- if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
- sme_me_mask = me_mask;
- else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
- sme_me_mask = 0;
- else
- sme_me_mask = active_by_default ? me_mask : 0;
-}
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 01f682c..40a6085 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -15,6 +15,7 @@
#include <asm/page.h>
#include <asm/processor-flags.h>
#include <asm/msr-index.h>
+#include <asm/nospec-branch.h>
.text
.code64
@@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute)
movq %rax, %r8 /* Workarea encryption routine */
addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */
+ ANNOTATE_RETPOLINE_SAFE
call *%rax /* Call the encryption routine */
pop %r12
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
new file mode 100644
index 0000000..1b2197d
--- /dev/null
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -0,0 +1,564 @@
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define DISABLE_BRANCH_PROFILING
+
+/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x) ((unsigned long)(x))
+#define __va(x) ((void *)((unsigned long)(x)))
+
+/*
+ * Special hack: we have to be careful, because no indirections are
+ * allowed here, and paravirt_ops is a kind of one. As it will only run in
+ * baremetal anyway, we just keep it from happening. (This list needs to
+ * be extended when new paravirt and debugging variants are added.)
+ */
+#undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_SPINLOCKS
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mem_encrypt.h>
+
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/cmdline.h>
+
+#include "mm_internal.h"
+
+#define PGD_FLAGS _KERNPG_TABLE_NOENC
+#define P4D_FLAGS _KERNPG_TABLE_NOENC
+#define PUD_FLAGS _KERNPG_TABLE_NOENC
+#define PMD_FLAGS _KERNPG_TABLE_NOENC
+
+#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
+
+#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
+#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
+
+#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL)
+
+#define PTE_FLAGS_DEC PTE_FLAGS
+#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
+ (_PAGE_PAT | _PAGE_PWT))
+
+#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC)
+
+struct sme_populate_pgd_data {
+ void *pgtable_area;
+ pgd_t *pgd;
+
+ pmdval_t pmd_flags;
+ pteval_t pte_flags;
+ unsigned long paddr;
+
+ unsigned long vaddr;
+ unsigned long vaddr_end;
+};
+
+static char sme_cmdline_arg[] __initdata = "mem_encrypt";
+static char sme_cmdline_on[] __initdata = "on";
+static char sme_cmdline_off[] __initdata = "off";
+
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
+{
+ unsigned long pgd_start, pgd_end, pgd_size;
+ pgd_t *pgd_p;
+
+ pgd_start = ppd->vaddr & PGDIR_MASK;
+ pgd_end = ppd->vaddr_end & PGDIR_MASK;
+
+ pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t);
+
+ pgd_p = ppd->pgd + pgd_index(ppd->vaddr);
+
+ memset(pgd_p, 0, pgd_size);
+}
+
+static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = ppd->pgd + pgd_index(ppd->vaddr);
+ if (pgd_none(*pgd)) {
+ p4d = ppd->pgtable_area;
+ memset(p4d, 0, sizeof(*p4d) * PTRS_PER_P4D);
+ ppd->pgtable_area += sizeof(*p4d) * PTRS_PER_P4D;
+ set_pgd(pgd, __pgd(PGD_FLAGS | __pa(p4d)));
+ }
+
+ p4d = p4d_offset(pgd, ppd->vaddr);
+ if (p4d_none(*p4d)) {
+ pud = ppd->pgtable_area;
+ memset(pud, 0, sizeof(*pud) * PTRS_PER_PUD);
+ ppd->pgtable_area += sizeof(*pud) * PTRS_PER_PUD;
+ set_p4d(p4d, __p4d(P4D_FLAGS | __pa(pud)));
+ }
+
+ pud = pud_offset(p4d, ppd->vaddr);
+ if (pud_none(*pud)) {
+ pmd = ppd->pgtable_area;
+ memset(pmd, 0, sizeof(*pmd) * PTRS_PER_PMD);
+ ppd->pgtable_area += sizeof(*pmd) * PTRS_PER_PMD;
+ set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
+ }
+
+ if (pud_large(*pud))
+ return NULL;
+
+ return pud;
+}
+
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pud = sme_prepare_pgd(ppd);
+ if (!pud)
+ return;
+
+ pmd = pmd_offset(pud, ppd->vaddr);
+ if (pmd_large(*pmd))
+ return;
+
+ set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
+}
+
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pud = sme_prepare_pgd(ppd);
+ if (!pud)
+ return;
+
+ pmd = pmd_offset(pud, ppd->vaddr);
+ if (pmd_none(*pmd)) {
+ pte = ppd->pgtable_area;
+ memset(pte, 0, sizeof(pte) * PTRS_PER_PTE);
+ ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE;
+ set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte)));
+ }
+
+ if (pmd_large(*pmd))
+ return;
+
+ pte = pte_offset_map(pmd, ppd->vaddr);
+ if (pte_none(*pte))
+ set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
+}
+
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd_large(ppd);
+
+ ppd->vaddr += PMD_PAGE_SIZE;
+ ppd->paddr += PMD_PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
+{
+ while (ppd->vaddr < ppd->vaddr_end) {
+ sme_populate_pgd(ppd);
+
+ ppd->vaddr += PAGE_SIZE;
+ ppd->paddr += PAGE_SIZE;
+ }
+}
+
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
+ pmdval_t pmd_flags, pteval_t pte_flags)
+{
+ unsigned long vaddr_end;
+
+ ppd->pmd_flags = pmd_flags;
+ ppd->pte_flags = pte_flags;
+
+ /* Save original end value since we modify the struct value */
+ vaddr_end = ppd->vaddr_end;
+
+ /* If start is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+ __sme_map_range_pte(ppd);
+
+ /* Create PMD entries */
+ ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+ __sme_map_range_pmd(ppd);
+
+ /* If end is not 2MB aligned, create PTE entries */
+ ppd->vaddr_end = vaddr_end;
+ __sme_map_range_pte(ppd);
+}
+
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
+}
+
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
+}
+
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
+{
+ __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
+}
+
+static unsigned long __init sme_pgtable_calc(unsigned long len)
+{
+ unsigned long entries = 0, tables = 0;
+
+ /*
+ * Perform a relatively simplistic calculation of the pagetable
+ * entries that are needed. Those mappings will be covered mostly
+ * by 2MB PMD entries so we can conservatively calculate the required
+ * number of P4D, PUD and PMD structures needed to perform the
+ * mappings. For mappings that are not 2MB aligned, PTE mappings
+ * would be needed for the start and end portion of the address range
+ * that fall outside of the 2MB alignment. This results in, at most,
+ * two extra pages to hold PTE entries for each range that is mapped.
+ * Incrementing the count for each covers the case where the addresses
+ * cross entries.
+ */
+
+ /* PGDIR_SIZE is equal to P4D_SIZE on 4-level machine. */
+ if (PTRS_PER_P4D > 1)
+ entries += (DIV_ROUND_UP(len, PGDIR_SIZE) + 1) * sizeof(p4d_t) * PTRS_PER_P4D;
+ entries += (DIV_ROUND_UP(len, P4D_SIZE) + 1) * sizeof(pud_t) * PTRS_PER_PUD;
+ entries += (DIV_ROUND_UP(len, PUD_SIZE) + 1) * sizeof(pmd_t) * PTRS_PER_PMD;
+ entries += 2 * sizeof(pte_t) * PTRS_PER_PTE;
+
+ /*
+ * Now calculate the added pagetable structures needed to populate
+ * the new pagetables.
+ */
+
+ if (PTRS_PER_P4D > 1)
+ tables += DIV_ROUND_UP(entries, PGDIR_SIZE) * sizeof(p4d_t) * PTRS_PER_P4D;
+ tables += DIV_ROUND_UP(entries, P4D_SIZE) * sizeof(pud_t) * PTRS_PER_PUD;
+ tables += DIV_ROUND_UP(entries, PUD_SIZE) * sizeof(pmd_t) * PTRS_PER_PMD;
+
+ return entries + tables;
+}
+
+void __init sme_encrypt_kernel(struct boot_params *bp)
+{
+ unsigned long workarea_start, workarea_end, workarea_len;
+ unsigned long execute_start, execute_end, execute_len;
+ unsigned long kernel_start, kernel_end, kernel_len;
+ unsigned long initrd_start, initrd_end, initrd_len;
+ struct sme_populate_pgd_data ppd;
+ unsigned long pgtable_area_len;
+ unsigned long decrypted_base;
+
+ if (!sme_active())
+ return;
+
+ /*
+ * Prepare for encrypting the kernel and initrd by building new
+ * pagetables with the necessary attributes needed to encrypt the
+ * kernel in place.
+ *
+ * One range of virtual addresses will map the memory occupied
+ * by the kernel and initrd as encrypted.
+ *
+ * Another range of virtual addresses will map the memory occupied
+ * by the kernel and initrd as decrypted and write-protected.
+ *
+ * The use of write-protect attribute will prevent any of the
+ * memory from being cached.
+ */
+
+ /* Physical addresses gives us the identity mapped virtual addresses */
+ kernel_start = __pa_symbol(_text);
+ kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
+ kernel_len = kernel_end - kernel_start;
+
+ initrd_start = 0;
+ initrd_end = 0;
+ initrd_len = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+ initrd_len = (unsigned long)bp->hdr.ramdisk_size |
+ ((unsigned long)bp->ext_ramdisk_size << 32);
+ if (initrd_len) {
+ initrd_start = (unsigned long)bp->hdr.ramdisk_image |
+ ((unsigned long)bp->ext_ramdisk_image << 32);
+ initrd_end = PAGE_ALIGN(initrd_start + initrd_len);
+ initrd_len = initrd_end - initrd_start;
+ }
+#endif
+
+ /* Set the encryption workarea to be immediately after the kernel */
+ workarea_start = kernel_end;
+
+ /*
+ * Calculate required number of workarea bytes needed:
+ * executable encryption area size:
+ * stack page (PAGE_SIZE)
+ * encryption routine page (PAGE_SIZE)
+ * intermediate copy buffer (PMD_PAGE_SIZE)
+ * pagetable structures for the encryption of the kernel
+ * pagetable structures for workarea (in case not currently mapped)
+ */
+ execute_start = workarea_start;
+ execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
+ execute_len = execute_end - execute_start;
+
+ /*
+ * One PGD for both encrypted and decrypted mappings and a set of
+ * PUDs and PMDs for each of the encrypted and decrypted mappings.
+ */
+ pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
+ pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
+ if (initrd_len)
+ pgtable_area_len += sme_pgtable_calc(initrd_len) * 2;
+
+ /* PUDs and PMDs needed in the current pagetables for the workarea */
+ pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
+
+ /*
+ * The total workarea includes the executable encryption area and
+ * the pagetable area. The start of the workarea is already 2MB
+ * aligned, align the end of the workarea on a 2MB boundary so that
+ * we don't try to create/allocate PTE entries from the workarea
+ * before it is mapped.
+ */
+ workarea_len = execute_len + pgtable_area_len;
+ workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
+
+ /*
+ * Set the address to the start of where newly created pagetable
+ * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
+ * structures are created when the workarea is added to the current
+ * pagetables and when the new encrypted and decrypted kernel
+ * mappings are populated.
+ */
+ ppd.pgtable_area = (void *)execute_end;
+
+ /*
+ * Make sure the current pagetable structure has entries for
+ * addressing the workarea.
+ */
+ ppd.pgd = (pgd_t *)native_read_cr3_pa();
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
+
+ /* Flush the TLB - no globals so cr3 is enough */
+ native_write_cr3(__native_read_cr3());
+
+ /*
+ * A new pagetable structure is being built to allow for the kernel
+ * and initrd to be encrypted. It starts with an empty PGD that will
+ * then be populated with new PUDs and PMDs as the encrypted and
+ * decrypted kernel mappings are created.
+ */
+ ppd.pgd = ppd.pgtable_area;
+ memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD);
+ ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD;
+
+ /*
+ * A different PGD index/entry must be used to get different
+ * pagetable entries for the decrypted mapping. Choose the next
+ * PGD index and convert it to a virtual address to be used as
+ * the base of the mapping.
+ */
+ decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
+ if (initrd_len) {
+ unsigned long check_base;
+
+ check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1);
+ decrypted_base = max(decrypted_base, check_base);
+ }
+ decrypted_base <<= PGDIR_SHIFT;
+
+ /* Add encrypted kernel (identity) mappings */
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start;
+ ppd.vaddr_end = kernel_end;
+ sme_map_range_encrypted(&ppd);
+
+ /* Add decrypted, write-protected kernel (non-identity) mappings */
+ ppd.paddr = kernel_start;
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
+
+ if (initrd_len) {
+ /* Add encrypted initrd (identity) mappings */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start;
+ ppd.vaddr_end = initrd_end;
+ sme_map_range_encrypted(&ppd);
+ /*
+ * Add decrypted, write-protected initrd (non-identity) mappings
+ */
+ ppd.paddr = initrd_start;
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_map_range_decrypted_wp(&ppd);
+ }
+
+ /* Add decrypted workarea mappings to both kernel mappings */
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start;
+ ppd.vaddr_end = workarea_end;
+ sme_map_range_decrypted(&ppd);
+
+ ppd.paddr = workarea_start;
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_map_range_decrypted(&ppd);
+
+ /* Perform the encryption */
+ sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
+ kernel_len, workarea_start, (unsigned long)ppd.pgd);
+
+ if (initrd_len)
+ sme_encrypt_execute(initrd_start, initrd_start + decrypted_base,
+ initrd_len, workarea_start,
+ (unsigned long)ppd.pgd);
+
+ /*
+ * At this point we are running encrypted. Remove the mappings for
+ * the decrypted areas - all that is needed for this is to remove
+ * the PGD entry/entries.
+ */
+ ppd.vaddr = kernel_start + decrypted_base;
+ ppd.vaddr_end = kernel_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+
+ if (initrd_len) {
+ ppd.vaddr = initrd_start + decrypted_base;
+ ppd.vaddr_end = initrd_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+ }
+
+ ppd.vaddr = workarea_start + decrypted_base;
+ ppd.vaddr_end = workarea_end + decrypted_base;
+ sme_clear_pgd(&ppd);
+
+ /* Flush the TLB - no globals so cr3 is enough */
+ native_write_cr3(__native_read_cr3());
+}
+
+void __init sme_enable(struct boot_params *bp)
+{
+ const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off;
+ unsigned int eax, ebx, ecx, edx;
+ unsigned long feature_mask;
+ bool active_by_default;
+ unsigned long me_mask;
+ char buffer[16];
+ u64 msr;
+
+ /* Check for the SME/SEV support leaf */
+ eax = 0x80000000;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x8000001f)
+ return;
+
+#define AMD_SME_BIT BIT(0)
+#define AMD_SEV_BIT BIT(1)
+ /*
+ * Set the feature mask (SME or SEV) based on whether we are
+ * running under a hypervisor.
+ */
+ eax = 1;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT;
+
+ /*
+ * Check for the SME/SEV feature:
+ * CPUID Fn8000_001F[EAX]
+ * - Bit 0 - Secure Memory Encryption support
+ * - Bit 1 - Secure Encrypted Virtualization support
+ * CPUID Fn8000_001F[EBX]
+ * - Bits 5:0 - Pagetable bit position used to indicate encryption
+ */
+ eax = 0x8000001f;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (!(eax & feature_mask))
+ return;
+
+ me_mask = 1UL << (ebx & 0x3f);
+
+ /* Check if memory encryption is enabled */
+ if (feature_mask == AMD_SME_BIT) {
+ /* For SME, check the SYSCFG MSR */
+ msr = __rdmsr(MSR_K8_SYSCFG);
+ if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+ return;
+ } else {
+ /* For SEV, check the SEV MSR */
+ msr = __rdmsr(MSR_AMD64_SEV);
+ if (!(msr & MSR_AMD64_SEV_ENABLED))
+ return;
+
+ /* SEV state cannot be controlled by a command line option */
+ sme_me_mask = me_mask;
+ sev_enabled = true;
+ return;
+ }
+
+ /*
+ * Fixups have not been applied to phys_base yet and we're running
+ * identity mapped, so we must obtain the address to the SME command
+ * line argument data using rip-relative addressing.
+ */
+ asm ("lea sme_cmdline_arg(%%rip), %0"
+ : "=r" (cmdline_arg)
+ : "p" (sme_cmdline_arg));
+ asm ("lea sme_cmdline_on(%%rip), %0"
+ : "=r" (cmdline_on)
+ : "p" (sme_cmdline_on));
+ asm ("lea sme_cmdline_off(%%rip), %0"
+ : "=r" (cmdline_off)
+ : "p" (sme_cmdline_off));
+
+ if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT))
+ active_by_default = true;
+ else
+ active_by_default = false;
+
+ cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
+ ((u64)bp->ext_cmd_line_ptr << 32));
+
+ cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+
+ if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
+ sme_me_mask = me_mask;
+ else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
+ sme_me_mask = 0;
+ else
+ sme_me_mask = active_by_default ? me_mask : 0;
+}
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 155ecba..48c5912 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void)
return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
}
-static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size,
+ struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap;
unsigned long gap_min, gap_max;
@@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd,
* process VM image, sets up which VM layout function to use:
*/
static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
- unsigned long random_factor, unsigned long task_size)
+ unsigned long random_factor, unsigned long task_size,
+ struct rlimit *rlim_stack)
{
*legacy_base = mmap_legacy_base(random_factor, task_size);
if (mmap_is_legacy())
*base = *legacy_base;
else
- *base = mmap_base(random_factor, task_size);
+ *base = mmap_base(random_factor, task_size, rlim_stack);
}
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
if (mmap_is_legacy())
mm->get_unmapped_area = arch_get_unmapped_area;
@@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
- arch_rnd(mmap64_rnd_bits), task_size_64bit(0));
+ arch_rnd(mmap64_rnd_bits), task_size_64bit(0),
+ rlim_stack);
#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
/*
@@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* mmap_base, the compat syscall uses mmap_compat_base.
*/
arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
- arch_rnd(mmap32_rnd_bits), task_size_32bit());
+ arch_rnd(mmap32_rnd_bits), task_size_32bit(),
+ rlim_stack);
#endif
}
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index aca6295..e8a4a09 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end)
}
printk(KERN_CONT "\n");
}
-
-unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
- unsigned long end_pfn)
-{
- unsigned long nr_pages = end_pfn - start_pfn;
-
- if (!nr_pages)
- return 0;
-
- return (nr_pages + 1) * sizeof(struct page);
-}
#endif
extern unsigned long highend_pfn, highstart_pfn;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 004abf9..34cda7e 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd)
return 0;
}
+
+/**
+ * pud_free_pmd_page - Clear pud entry and free pmd page.
+ * @pud: Pointer to a PUD.
+ *
+ * Context: The pud range has been unmaped and TLB purged.
+ * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ */
+int pud_free_pmd_page(pud_t *pud)
+{
+ pmd_t *pmd;
+ int i;
+
+ if (pud_none(*pud))
+ return 1;
+
+ pmd = (pmd_t *)pud_page_vaddr(*pud);
+
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ if (!pmd_free_pte_page(&pmd[i]))
+ return 0;
+
+ pud_clear(pud);
+ free_page((unsigned long)pmd);
+
+ return 1;
+}
+
+/**
+ * pmd_free_pte_page - Clear pmd entry and free pte page.
+ * @pmd: Pointer to a PMD.
+ *
+ * Context: The pmd range has been unmaped and TLB purged.
+ * Return: 1 if clearing the entry succeeded. 0 otherwise.
+ */
+int pmd_free_pte_page(pmd_t *pmd)
+{
+ pte_t *pte;
+
+ if (pmd_none(*pmd))
+ return 1;
+
+ pte = (pte_t *)pmd_page_vaddr(*pmd);
+ pmd_clear(pmd);
+ free_page((unsigned long)pte);
+
+ return 1;
+}
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index ce38f16..631507f 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void)
}
/*
- * Clone the ESPFIX P4D into the user space visinble page table
+ * Clone the ESPFIX P4D into the user space visible page table
*/
static void __init pti_setup_espfix64(void)
{
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 7f1a513..e055d1a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
unsigned long sp = current_stack_pointer;
pgd_t *pgd = pgd_offset(mm, sp);
- if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (pgtable_l5_enabled) {
if (unlikely(pgd_none(*pgd))) {
pgd_t *pgd_ref = pgd_offset_k(sp);
@@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
{
int cpu;
- struct flush_tlb_info info = {
+ struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
.mm = mm,
};
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4923d92..b7251541 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -11,10 +11,11 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
-#include <asm/cacheflush.h>
-#include <asm/set_memory.h>
#include <linux/bpf.h>
+#include <asm/set_memory.h>
+#include <asm/nospec-branch.h>
+
/*
* assembly code in arch/x86/net/bpf_jit.S
*/
@@ -60,7 +61,12 @@ static bool is_imm8(int value)
static bool is_simm32(s64 value)
{
- return value == (s64) (s32) value;
+ return value == (s64)(s32)value;
+}
+
+static bool is_uimm32(u64 value)
+{
+ return value == (u64)(u32)value;
}
/* mov dst, src */
@@ -97,16 +103,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
#define X86_JLE 0x7E
#define X86_JG 0x7F
-static void bpf_flush_icache(void *start, void *end)
-{
- mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- smp_wmb();
- flush_icache_range((unsigned long)start, (unsigned long)end);
- set_fs(old_fs);
-}
-
#define CHOOSE_LOAD_FUNC(K, func) \
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
@@ -211,7 +207,7 @@ struct jit_context {
/* emit x64 prologue code for BPF program and check it's size.
* bpf_tail_call helper will skip it while jumping into another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth)
+static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
int cnt = 0;
@@ -246,18 +242,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
/* mov qword ptr [rbp+24],r15 */
EMIT4(0x4C, 0x89, 0x7D, 24);
- /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls
- * we need to reset the counter to 0. It's done in two instructions,
- * resetting rax register to 0 (xor on eax gets 0 extended), and
- * moving it to the counter location.
- */
+ if (!ebpf_from_cbpf) {
+ /* Clear the tail call counter (tail_call_cnt): for eBPF tail
+ * calls we need to reset the counter to 0. It's done in two
+ * instructions, resetting rax register to 0, and moving it
+ * to the counter location.
+ */
- /* xor eax, eax */
- EMIT2(0x31, 0xc0);
- /* mov qword ptr [rbp+32], rax */
- EMIT4(0x48, 0x89, 0x45, 32);
+ /* xor eax, eax */
+ EMIT2(0x31, 0xc0);
+ /* mov qword ptr [rbp+32], rax */
+ EMIT4(0x48, 0x89, 0x45, 32);
+
+ BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
+ }
- BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
*pprog = prog;
}
@@ -290,7 +289,7 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 43 /* number of bytes to jump */
+#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
label1 = cnt;
@@ -299,7 +298,7 @@ static void emit_bpf_tail_call(u8 **pprog)
*/
EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 32
+#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
@@ -313,7 +312,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* goto out;
*/
EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */
-#define OFFSET3 10
+#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
label3 = cnt;
@@ -326,7 +325,7 @@ static void emit_bpf_tail_call(u8 **pprog)
* rdi == ctx (1st arg)
* rax == prog->bpf_func + prologue_size
*/
- EMIT2(0xFF, 0xE0); /* jmp rax */
+ RETPOLINE_RAX_BPF_JIT();
/* out: */
BUILD_BUG_ON(cnt - label1 != OFFSET1);
@@ -355,6 +354,86 @@ static void emit_load_skb_data_hlen(u8 **pprog)
*pprog = prog;
}
+static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
+ u32 dst_reg, const u32 imm32)
+{
+ u8 *prog = *pprog;
+ u8 b1, b2, b3;
+ int cnt = 0;
+
+ /* optimization: if imm32 is positive, use 'mov %eax, imm32'
+ * (which zero-extends imm32) to save 2 bytes.
+ */
+ if (sign_propagate && (s32)imm32 < 0) {
+ /* 'mov %rax, imm32' sign extends imm32 */
+ b1 = add_1mod(0x48, dst_reg);
+ b2 = 0xC7;
+ b3 = 0xC0;
+ EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
+ goto done;
+ }
+
+ /* optimization: if imm32 is zero, use 'xor %eax, %eax'
+ * to save 3 bytes.
+ */
+ if (imm32 == 0) {
+ if (is_ereg(dst_reg))
+ EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+ b2 = 0x31; /* xor */
+ b3 = 0xC0;
+ EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
+ goto done;
+ }
+
+ /* mov %eax, imm32 */
+ if (is_ereg(dst_reg))
+ EMIT1(add_1mod(0x40, dst_reg));
+ EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+done:
+ *pprog = prog;
+}
+
+static void emit_mov_imm64(u8 **pprog, u32 dst_reg,
+ const u32 imm32_hi, const u32 imm32_lo)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) {
+ /* For emitting plain u32, where sign bit must not be
+ * propagated LLVM tends to load imm64 over mov32
+ * directly, so save couple of bytes by just doing
+ * 'mov %eax, imm32' instead.
+ */
+ emit_mov_imm32(&prog, false, dst_reg, imm32_lo);
+ } else {
+ /* movabsq %rax, imm64 */
+ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+ EMIT(imm32_lo, 4);
+ EMIT(imm32_hi, 4);
+ }
+
+ *pprog = prog;
+}
+
+static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ if (is64) {
+ /* mov dst, src */
+ EMIT_mov(dst_reg, src_reg);
+ } else {
+ /* mov32 dst, src */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT1(add_2mod(0x40, dst_reg, src_reg));
+ EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+ }
+
+ *pprog = prog;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -368,7 +447,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int proglen = 0;
u8 *prog = temp;
- emit_prologue(&prog, bpf_prog->aux->stack_depth);
+ emit_prologue(&prog, bpf_prog->aux->stack_depth,
+ bpf_prog_was_classic(bpf_prog));
if (seen_ld_abs)
emit_load_skb_data_hlen(&prog);
@@ -377,7 +457,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
const s32 imm32 = insn->imm;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
- u8 b1 = 0, b2 = 0, b3 = 0;
+ u8 b2 = 0, b3 = 0;
s64 jmp_offset;
u8 jmp_cond;
bool reload_skb_data;
@@ -413,16 +493,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
break;
- /* mov dst, src */
case BPF_ALU64 | BPF_MOV | BPF_X:
- EMIT_mov(dst_reg, src_reg);
- break;
-
- /* mov32 dst, src */
case BPF_ALU | BPF_MOV | BPF_X:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT1(add_2mod(0x40, dst_reg, src_reg));
- EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
+ emit_mov_reg(&prog,
+ BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, src_reg);
break;
/* neg dst */
@@ -485,58 +560,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
break;
case BPF_ALU64 | BPF_MOV | BPF_K:
- /* optimization: if imm32 is positive,
- * use 'mov eax, imm32' (which zero-extends imm32)
- * to save 2 bytes
- */
- if (imm32 < 0) {
- /* 'mov rax, imm32' sign extends imm32 */
- b1 = add_1mod(0x48, dst_reg);
- b2 = 0xC7;
- b3 = 0xC0;
- EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
- break;
- }
-
case BPF_ALU | BPF_MOV | BPF_K:
- /* optimization: if imm32 is zero, use 'xor <dst>,<dst>'
- * to save 3 bytes.
- */
- if (imm32 == 0) {
- if (is_ereg(dst_reg))
- EMIT1(add_2mod(0x40, dst_reg, dst_reg));
- b2 = 0x31; /* xor */
- b3 = 0xC0;
- EMIT2(b2, add_2reg(b3, dst_reg, dst_reg));
- break;
- }
-
- /* mov %eax, imm32 */
- if (is_ereg(dst_reg))
- EMIT1(add_1mod(0x40, dst_reg));
- EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
+ emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64,
+ dst_reg, imm32);
break;
case BPF_LD | BPF_IMM | BPF_DW:
- /* optimization: if imm64 is zero, use 'xor <dst>,<dst>'
- * to save 7 bytes.
- */
- if (insn[0].imm == 0 && insn[1].imm == 0) {
- b1 = add_2mod(0x48, dst_reg, dst_reg);
- b2 = 0x31; /* xor */
- b3 = 0xC0;
- EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg));
-
- insn++;
- i++;
- break;
- }
-
- /* movabsq %rax, imm64 */
- EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
- EMIT(insn[0].imm, 4);
- EMIT(insn[1].imm, 4);
-
+ emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm);
insn++;
i++;
break;
@@ -593,36 +623,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_ALU | BPF_MUL | BPF_X:
case BPF_ALU64 | BPF_MUL | BPF_K:
case BPF_ALU64 | BPF_MUL | BPF_X:
- EMIT1(0x50); /* push rax */
- EMIT1(0x52); /* push rdx */
+ {
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+
+ if (dst_reg != BPF_REG_0)
+ EMIT1(0x50); /* push rax */
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x52); /* push rdx */
/* mov r11, dst_reg */
EMIT_mov(AUX_REG, dst_reg);
if (BPF_SRC(insn->code) == BPF_X)
- /* mov rax, src_reg */
- EMIT_mov(BPF_REG_0, src_reg);
+ emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
else
- /* mov rax, imm32 */
- EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
+ emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
- if (BPF_CLASS(insn->code) == BPF_ALU64)
+ if (is64)
EMIT1(add_1mod(0x48, AUX_REG));
else if (is_ereg(AUX_REG))
EMIT1(add_1mod(0x40, AUX_REG));
/* mul(q) r11 */
EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
- /* mov r11, rax */
- EMIT_mov(AUX_REG, BPF_REG_0);
-
- EMIT1(0x5A); /* pop rdx */
- EMIT1(0x58); /* pop rax */
-
- /* mov dst_reg, r11 */
- EMIT_mov(dst_reg, AUX_REG);
+ if (dst_reg != BPF_REG_3)
+ EMIT1(0x5A); /* pop rdx */
+ if (dst_reg != BPF_REG_0) {
+ /* mov dst_reg, rax */
+ EMIT_mov(dst_reg, BPF_REG_0);
+ EMIT1(0x58); /* pop rax */
+ }
break;
-
+ }
/* shifts */
case BPF_ALU | BPF_LSH | BPF_K:
case BPF_ALU | BPF_RSH | BPF_K:
@@ -640,7 +672,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_RSH: b3 = 0xE8; break;
case BPF_ARSH: b3 = 0xF8; break;
}
- EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
+
+ if (imm32 == 1)
+ EMIT2(0xD1, add_1reg(b3, dst_reg));
+ else
+ EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
break;
case BPF_ALU | BPF_LSH | BPF_X:
@@ -1187,7 +1223,7 @@ skip_init_addrs:
* may converge on the last pass. In such case do one more
* pass to emit the final image
*/
- for (pass = 0; pass < 10 || image; pass++) {
+ for (pass = 0; pass < 20 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
if (proglen <= 0) {
image = NULL;
@@ -1214,13 +1250,13 @@ skip_init_addrs:
}
}
oldproglen = proglen;
+ cond_resched();
}
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, proglen, pass + 1, image);
if (image) {
- bpf_flush_icache(header, image + proglen);
if (!prog->is_func || extra_pass) {
bpf_jit_binary_lock_ro(header);
} else {
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 174c597..a7a7677 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -460,7 +460,7 @@ static int nmi_setup(void)
goto fail;
for_each_possible_cpu(cpu) {
- if (!cpu)
+ if (!IS_ENABLED(CONFIG_SMP) || !cpu)
continue;
memcpy(per_cpu(cpu_msrs, cpu).counters,
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 7df49c4..5559dca 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -140,12 +140,10 @@ static const struct dmi_system_id pci_crs_quirks[] __initconst = {
void __init pci_acpi_crs_quirks(void)
{
- int year;
+ int year = dmi_get_bios_year();
- if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) {
- if (iomem_resource.end <= 0xffffffff)
- pci_use_crs = false;
- }
+ if (year >= 0 && year < 2008 && iomem_resource.end <= 0xffffffff)
+ pci_use_crs = false;
dmi_check_system(pci_crs_quirks);
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 2d95033..a51074c 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -195,14 +195,13 @@ static const struct pci_raw_ops pci_direct_conf2 = {
static int __init pci_sanity_check(const struct pci_raw_ops *o)
{
u32 x = 0;
- int year, devfn;
+ int devfn;
if (pci_probe & PCI_NO_CHECKS)
return 1;
/* Assume Type 1 works for newer systems.
This handles machines that don't have anything on PCI Bus 0. */
- dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL);
- if (year >= 2001)
+ if (dmi_get_bios_year() >= 2001)
return 1;
for (devfn = 0; devfn < 0x100; devfn++) {
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 1cb01ab..dfbe6ac 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -4,6 +4,7 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/pci.h>
+#include <asm/jailhouse_para.h>
#include <asm/pci_x86.h>
/*
@@ -34,13 +35,14 @@ int __init pci_legacy_init(void)
void pcibios_scan_specific_bus(int busn)
{
+ int stride = jailhouse_paravirt() ? 1 : 8;
int devfn;
u32 l;
if (pci_find_bus(0, busn))
return;
- for (devfn = 0; devfn < 256; devfn += 8) {
+ for (devfn = 0; devfn < 256; devfn += stride) {
if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) &&
l != 0x0000 && l != 0xffff) {
DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 96684d0..7389db5 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -94,8 +94,8 @@ static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
return new;
}
-static struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
- int end, u64 addr)
+struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start,
+ int end, u64 addr)
{
struct pci_mmcfg_region *new;
@@ -547,19 +547,14 @@ static void __init pci_mmcfg_reject_broken(int early)
static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg,
struct acpi_mcfg_allocation *cfg)
{
- int year;
-
if (cfg->address < 0xFFFFFFFF)
return 0;
if (!strncmp(mcfg->header.oem_id, "SGI", 3))
return 0;
- if (mcfg->header.revision >= 1) {
- if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) &&
- year >= 2010)
- return 0;
- }
+ if ((mcfg->header.revision >= 1) && (dmi_get_bios_year() >= 2010))
+ return 0;
pr_err(PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx "
"is above 4GB, ignored\n", cfg->pci_segment,
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 75577c1..7a5bafb 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
return p;
}
-/**
- * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
- * returns virtual address. This is the only "special" function here.
- * @dev: PCI device
- * @size: Size of the buffer
- * @dma_handle: DMA address
- * @flags: memory flags
- */
-static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t flags,
- unsigned long attrs)
-{
- void *vaddr;
-
- vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
- *dma_handle = p2a(*dma_handle, to_pci_dev(dev));
- return vaddr;
-}
-
-/* We have our own dma_ops: the same as swiotlb but from alloc (above) */
-static const struct dma_map_ops sta2x11_dma_ops = {
- .alloc = sta2x11_swiotlb_alloc_coherent,
- .free = x86_swiotlb_free_coherent,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
- .dma_supported = x86_dma_supported,
-};
-
/* At setup time, we use our own ops if the device is a ConneXt one */
static void sta2x11_setup_pdev(struct pci_dev *pdev)
{
@@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev)
return;
pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1);
- pdev->dev.dma_ops = &sta2x11_dma_ops;
+ pdev->dev.dma_ops = &swiotlb_dma_ops;
+ pdev->dev.archdata.is_sta2x11 = true;
/* We must enable all devices as master, for audio DMA to work */
pci_set_master(pdev);
@@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
{
struct sta2x11_mapping *map;
- if (dev->dma_ops != &sta2x11_dma_ops) {
+ if (!dev->archdata.is_sta2x11) {
if (!dev->dma_mask)
return false;
return addr + size - 1 <= *dev->dma_mask;
@@ -243,13 +207,13 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
}
/**
- * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
+ * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device
* @dev: device for a PCI device
* @paddr: Physical address
*/
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
{
- if (dev->dma_ops != &sta2x11_dma_ops)
+ if (!dev->archdata.is_sta2x11)
return paddr;
return p2a(paddr, to_pci_dev(dev));
}
@@ -259,9 +223,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
* @dev: device for a PCI device
* @daddr: STA2x11 AMBA DMA address
*/
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
{
- if (dev->dma_ops != &sta2x11_dma_ops)
+ if (!dev->archdata.is_sta2x11)
return daddr;
return a2p(daddr, to_pci_dev(dev));
}
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index d49d3be..034813d 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -109,18 +109,7 @@ static int punit_dev_state_show(struct seq_file *seq_file, void *unused)
return 0;
}
-
-static int punit_dev_state_open(struct inode *inode, struct file *file)
-{
- return single_open(file, punit_dev_state_show, inode->i_private);
-}
-
-static const struct file_operations punit_dev_state_ops = {
- .open = punit_dev_state_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(punit_dev_state);
static struct dentry *punit_dbg_file;
@@ -132,9 +121,9 @@ static int punit_dbgfs_register(struct punit_device *punit_device)
if (!punit_dbg_file)
return -ENXIO;
- dev_state = debugfs_create_file("dev_power_state", S_IFREG | S_IRUGO,
+ dev_state = debugfs_create_file("dev_power_state", 0444,
punit_dbg_file, punit_device,
- &punit_dev_state_ops);
+ &punit_dev_state_fops);
if (!dev_state) {
pr_err("punit_dev_state register failed\n");
debugfs_remove(punit_dbg_file);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index c310a82..bed7e7f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -27,12 +27,14 @@
#include <linux/ioport.h>
#include <linux/mc146818rtc.h>
#include <linux/efi.h>
+#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/ucs2_string.h>
#include <linux/mem_encrypt.h>
+#include <linux/sched/task.h>
#include <asm/setup.h>
#include <asm/page.h>
@@ -81,9 +83,8 @@ pgd_t * __init efi_call_phys_prolog(void)
int n_pgds, i, j;
if (!efi_enabled(EFI_OLD_MEMMAP)) {
- save_pgd = (pgd_t *)__read_cr3();
- write_cr3((unsigned long)efi_scratch.efi_pgt);
- goto out;
+ efi_switch_mm(&efi_mm);
+ return NULL;
}
early_code_mapping_set_exec(1);
@@ -155,8 +156,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
pud_t *pud;
if (!efi_enabled(EFI_OLD_MEMMAP)) {
- write_cr3((unsigned long)save_pgd);
- __flush_tlb_all();
+ efi_switch_mm(efi_scratch.prev_mm);
return;
}
@@ -190,7 +190,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
early_code_mapping_set_exec(0);
}
-static pgd_t *efi_pgd;
+EXPORT_SYMBOL_GPL(efi_mm);
/*
* We need our own copy of the higher levels of the page tables
@@ -203,7 +203,7 @@ static pgd_t *efi_pgd;
*/
int __init efi_alloc_page_tables(void)
{
- pgd_t *pgd;
+ pgd_t *pgd, *efi_pgd;
p4d_t *p4d;
pud_t *pud;
gfp_t gfp_mask;
@@ -225,12 +225,16 @@ int __init efi_alloc_page_tables(void)
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
if (!pud) {
- if (CONFIG_PGTABLE_LEVELS > 4)
+ if (pgtable_l5_enabled)
free_page((unsigned long) pgd_page_vaddr(*pgd));
- free_page((unsigned long)efi_pgd);
+ free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
return -ENOMEM;
}
+ efi_mm.pgd = efi_pgd;
+ mm_init_cpumask(&efi_mm);
+ init_new_context(NULL, &efi_mm);
+
return 0;
}
@@ -243,6 +247,7 @@ void efi_sync_low_kernel_mappings(void)
pgd_t *pgd_k, *pgd_efi;
p4d_t *p4d_k, *p4d_efi;
pud_t *pud_k, *pud_efi;
+ pgd_t *efi_pgd = efi_mm.pgd;
if (efi_enabled(EFI_OLD_MEMMAP))
return;
@@ -255,8 +260,8 @@ void efi_sync_low_kernel_mappings(void)
* only span a single PGD entry and that the entry also maps
* other important kernel regions.
*/
- BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
- BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
+ MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
+ MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
(EFI_VA_END & PGDIR_MASK));
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
@@ -336,20 +341,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
unsigned long pfn, text, pf;
struct page *page;
unsigned npages;
- pgd_t *pgd;
+ pgd_t *pgd = efi_mm.pgd;
if (efi_enabled(EFI_OLD_MEMMAP))
return 0;
/*
- * Since the PGD is encrypted, set the encryption mask so that when
- * this value is loaded into cr3 the PGD will be decrypted during
- * the pagetable walk.
- */
- efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
- pgd = efi_pgd;
-
- /*
* It can happen that the physical address of new_memmap lands in memory
* which is not mapped in the EFI page table. Therefore we need to go
* and ident-map those pages containing the map before calling
@@ -362,8 +359,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
return 1;
}
- efi_scratch.use_pgd = true;
-
/*
* Certain firmware versions are way too sentimential and still believe
* they are exclusive and unquestionable owners of the first physical page,
@@ -417,7 +412,7 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va)
{
unsigned long flags = _PAGE_RW;
unsigned long pfn;
- pgd_t *pgd = efi_pgd;
+ pgd_t *pgd = efi_mm.pgd;
if (!(md->attribute & EFI_MEMORY_WB))
flags |= _PAGE_PCD;
@@ -521,7 +516,7 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len)
static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf)
{
unsigned long pfn;
- pgd_t *pgd = efi_pgd;
+ pgd_t *pgd = efi_mm.pgd;
int err1, err2;
/* Update the 1:1 mapping */
@@ -618,10 +613,26 @@ void __init efi_dump_pagetable(void)
if (efi_enabled(EFI_OLD_MEMMAP))
ptdump_walk_pgd_level(NULL, swapper_pg_dir);
else
- ptdump_walk_pgd_level(NULL, efi_pgd);
+ ptdump_walk_pgd_level(NULL, efi_mm.pgd);
#endif
}
+/*
+ * Makes the calling thread switch to/from efi_mm context. Can be used
+ * for SetVirtualAddressMap() i.e. current->active_mm == init_mm as well
+ * as during efi runtime calls i.e current->active_mm == current_mm.
+ * We are not mm_dropping()/mm_grabbing() any mm, because we are not
+ * losing/creating any references.
+ */
+void efi_switch_mm(struct mm_struct *mm)
+{
+ task_lock(current);
+ efi_scratch.prev_mm = current->active_mm;
+ current->active_mm = mm;
+ switch_mm(efi_scratch.prev_mm, mm, NULL);
+ task_unlock(current);
+}
+
#ifdef CONFIG_EFI_MIXED
extern efi_status_t efi64_thunk(u32, ...);
@@ -675,16 +686,13 @@ efi_status_t efi_thunk_set_virtual_address_map(
efi_sync_low_kernel_mappings();
local_irq_save(flags);
- efi_scratch.prev_cr3 = __read_cr3();
- write_cr3((unsigned long)efi_scratch.efi_pgt);
- __flush_tlb_all();
+ efi_switch_mm(&efi_mm);
func = (u32)(unsigned long)phys_set_virtual_address_map;
status = efi64_thunk(func, memory_map_size, descriptor_size,
descriptor_version, virtual_map);
- write_cr3(efi_scratch.prev_cr3);
- __flush_tlb_all();
+ efi_switch_mm(efi_scratch.prev_mm);
local_irq_restore(flags);
return status;
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 189b218..46c58b0 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -33,7 +33,7 @@ ENTRY(efi64_thunk)
* Switch to 1:1 mapped 32-bit stack pointer.
*/
movq %rsp, efi_saved_sp(%rip)
- movq efi_scratch+25(%rip), %rsp
+ movq efi_scratch(%rip), %rsp
/*
* Calculate the physical address of the kernel text.
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 5b513cc..36c1f8b 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -75,7 +75,7 @@ struct quark_security_header {
u32 rsvd[2];
};
-static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 };
+static const efi_char16_t efi_dummy_name[] = L"DUMMY";
static bool efi_no_storage_paranoia;
@@ -105,7 +105,8 @@ early_param("efi_no_storage_paranoia", setup_storage_paranoia);
*/
void efi_delete_dummy_variable(void)
{
- efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+ efi.set_variable((efi_char16_t *)efi_dummy_name,
+ &EFI_DUMMY_GUID,
EFI_VARIABLE_NON_VOLATILE |
EFI_VARIABLE_BOOTSERVICE_ACCESS |
EFI_VARIABLE_RUNTIME_ACCESS,
@@ -177,12 +178,13 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size,
* that by attempting to use more space than is available.
*/
unsigned long dummy_size = remaining_size + 1024;
- void *dummy = kzalloc(dummy_size, GFP_ATOMIC);
+ void *dummy = kzalloc(dummy_size, GFP_KERNEL);
if (!dummy)
return EFI_OUT_OF_RESOURCES;
- status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID,
+ status = efi.set_variable((efi_char16_t *)efi_dummy_name,
+ &EFI_DUMMY_GUID,
EFI_VARIABLE_NON_VOLATILE |
EFI_VARIABLE_BOOTSERVICE_ACCESS |
EFI_VARIABLE_RUNTIME_ACCESS,
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index 2c67bae..2ebdf31 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -79,7 +79,7 @@ static void intel_mid_power_off(void)
static void intel_mid_reboot(void)
{
- intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0);
+ intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
}
static unsigned long __init intel_mid_calibrate_tsc(void)
@@ -199,6 +199,12 @@ void __init x86_intel_mid_early_setup(void)
legacy_pic = &null_legacy_pic;
+ /*
+ * Do nothing for now as everything needed done in
+ * x86_intel_mid_early_setup() below.
+ */
+ x86_init.acpi.reduced_hw_early_init = x86_init_noop;
+
pm_power_off = intel_mid_power_off;
machine_ops.emergency_restart = intel_mid_reboot;
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index 17d6d22..49828c2 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -224,25 +224,7 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused)
mutex_unlock(&idev->lock);
return ret;
}
-
-/**
- * imr_state_open - debugfs open callback.
- *
- * @inode: pointer to struct inode.
- * @file: pointer to struct file.
- * @return: result of single open.
- */
-static int imr_state_open(struct inode *inode, struct file *file)
-{
- return single_open(file, imr_dbgfs_state_show, inode->i_private);
-}
-
-static const struct file_operations imr_state_ops = {
- .open = imr_state_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(imr_dbgfs_state);
/**
* imr_debugfs_register - register debugfs hooks.
@@ -252,8 +234,8 @@ static const struct file_operations imr_state_ops = {
*/
static int imr_debugfs_register(struct imr_device *idev)
{
- idev->file = debugfs_create_file("imr_state", S_IFREG | S_IRUGO, NULL,
- idev, &imr_state_ops);
+ idev->file = debugfs_create_file("imr_state", 0444, NULL, idev,
+ &imr_dbgfs_state_fops);
return PTR_ERR_OR_ZERO(idev->file);
}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index db77e087..b36caae 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2255,8 +2255,6 @@ static int __init uv_bau_init(void)
init_uvhub(uvhub, vector, uv_base_pnode);
}
- alloc_intr_gate(vector, uv_bau_message_intr1);
-
for_each_possible_blade(uvhub) {
if (uv_blade_nr_possible_cpus(uvhub)) {
unsigned long val;
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 0ef5e520..74a5329 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -50,7 +50,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
{
pmd_t *pmd;
pud_t *pud;
- p4d_t *p4d;
+ p4d_t *p4d = NULL;
/*
* The new mapping only has to cover the page containing the image
@@ -66,7 +66,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
* tables used by the image kernel.
*/
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (pgtable_l5_enabled) {
p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
if (!p4d)
return -ENOMEM;
@@ -84,7 +84,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE));
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ if (p4d) {
set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
} else {
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 2f15a2a..d70c15d 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -16,7 +16,7 @@ KCOV_INSTRUMENT := n
# in turn leaves some undefined symbols like __fentry__ in purgatory and not
# sure how to relocate those. Like kexec-tools, use custom flags.
-KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large
KBUILD_CFLAGS += -m$(BITS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index de53bd1..24bb759 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -102,7 +102,7 @@ ENTRY(startup_32)
* don't we'll eventually crash trying to execute encrypted
* instructions.
*/
- bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
+ btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
jnc .Ldone
movl $MSR_K8_SYSCFG, %ecx
rdmsr
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5d73c44..220e978 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
break;
case R_X86_64_PC32:
+ case R_X86_64_PLT32:
/*
* PC relative relocations don't need to be adjusted unless
* referencing a percpu symbol.
+ *
+ * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32.
*/
if (is_percpu_sym(sym, symname))
add_reloc(&relocs32neg, offset);
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index b7d7340..f31e5d9 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -30,11 +30,7 @@
#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb() rmb()
-#else /* CONFIG_X86_PPRO_FENCE */
#define dma_rmb() barrier()
-#endif /* CONFIG_X86_PPRO_FENCE */
#define dma_wmb() barrier()
#include <asm-generic/barrier.h>
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index f605825..c1f98f3 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -18,9 +18,6 @@ config XEN_PV
bool "Xen PV guest support"
default y
depends on XEN
- # XEN_PV is not ready to work with 5-level paging.
- # Changes to hypervisor are also required.
- depends on !X86_5LEVEL
select XEN_HAVE_PVMMU
select XEN_HAVE_VPMU
help
@@ -79,6 +76,4 @@ config XEN_DEBUG_FS
config XEN_PVH
bool "Support for running as a PVH guest"
depends on XEN && XEN_PVHVM && ACPI
- # Pre-built page tables are not ready to handle 5-level paging.
- depends on !X86_5LEVEL
def_bool n
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index de58533..2163888 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -215,7 +215,7 @@ static void __init xen_apic_check(void)
}
void __init xen_init_apic(void)
{
- x86_io_apic_ops.read = xen_io_apic_read;
+ x86_apic_ops.io_apic_read = xen_io_apic_read;
/* On PV guests the APIC CPUID bit is disabled so none of the
* routines end up executing. */
if (!xen_initial_domain())
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index c047f42..3c2c253 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void)
if (!xen_initial_domain()) {
add_preferred_console("xenboot", 0, NULL);
- add_preferred_console("tty", 0, NULL);
- add_preferred_console("hvc", 0, NULL);
if (pci_xen)
x86_init.pci.arch_init = pci_xen_init;
} else {
@@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void)
xen_boot_params_init_edd();
}
+
+ add_preferred_console("tty", 0, NULL);
+ add_preferred_console("hvc", 0, NULL);
+
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
pci_probe &= ~PCI_PROBE_BIOS;
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index 436c4f0..aa1c6a68 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -6,6 +6,7 @@
#include <asm/io_apic.h>
#include <asm/hypervisor.h>
#include <asm/e820/api.h>
+#include <asm/x86_init.h>
#include <asm/xen/interface.h>
#include <asm/xen/hypercall.h>
@@ -16,15 +17,20 @@
/*
* PVH variables.
*
- * xen_pvh and pvh_bootparams need to live in data segment since they
- * are used after startup_{32|64}, which clear .bss, are invoked.
+ * xen_pvh pvh_bootparams and pvh_start_info need to live in data segment
+ * since they are used after startup_{32|64}, which clear .bss, are invoked.
*/
bool xen_pvh __attribute__((section(".data"))) = 0;
struct boot_params pvh_bootparams __attribute__((section(".data")));
+struct hvm_start_info pvh_start_info __attribute__((section(".data")));
-struct hvm_start_info pvh_start_info;
unsigned int pvh_start_info_sz = sizeof(pvh_start_info);
+static u64 pvh_get_root_pointer(void)
+{
+ return pvh_start_info.rsdp_paddr;
+}
+
static void __init init_pvh_bootparams(void)
{
struct xen_memory_map memmap;
@@ -71,6 +77,8 @@ static void __init init_pvh_bootparams(void)
*/
pvh_bootparams.hdr.version = 0x212;
pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
+
+ x86_init.acpi.get_root_pointer = pvh_get_root_pointer;
}
/*
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index aae88fe..486c0a3 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */
static phys_addr_t xen_pt_base, xen_pt_size __initdata;
+static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
+
/*
* Just beyond the highest usermode address. STACK_TOP_MAX has a
* redzone above it, so round it up to a PGD boundary.
@@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr)
}
+/*
+ * During early boot all page table pages are pinned, but we do not have struct
+ * pages, so return true until struct pages are ready.
+ */
static bool xen_page_pinned(void *ptr)
{
- struct page *page = virt_to_page(ptr);
+ if (static_branch_likely(&xen_struct_pages_ready)) {
+ struct page *page = virt_to_page(ptr);
- return PagePinned(page);
+ return PagePinned(page);
+ }
+ return true;
}
static void xen_extend_mmu_update(const struct mmu_update *update)
@@ -538,6 +547,22 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
xen_mc_issue(PARAVIRT_LAZY_MMU);
}
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+__visible p4dval_t xen_p4d_val(p4d_t p4d)
+{
+ return pte_mfn_to_pfn(p4d.p4d);
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val);
+
+__visible p4d_t xen_make_p4d(p4dval_t p4d)
+{
+ p4d = pte_pfn_to_mfn(p4d);
+
+ return native_make_p4d(p4d);
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
+#endif /* CONFIG_PGTABLE_LEVELS >= 5 */
#endif /* CONFIG_X86_64 */
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
@@ -820,11 +845,6 @@ void xen_mm_pin_all(void)
spin_unlock(&pgd_lock);
}
-/*
- * The init_mm pagetable is really pinned as soon as its created, but
- * that's before we have page structures to store the bits. So do all
- * the book-keeping now.
- */
static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level)
{
@@ -832,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
return 0;
}
-static void __init xen_mark_init_mm_pinned(void)
+/*
+ * The init_mm pagetable is really pinned as soon as its created, but
+ * that's before we have page structures to store the bits. So do all
+ * the book-keeping now once struct pages for allocated pages are
+ * initialized. This happens only after free_all_bootmem() is called.
+ */
+static void __init xen_after_bootmem(void)
{
+ static_branch_enable(&xen_struct_pages_ready);
+#ifdef CONFIG_X86_64
+ SetPagePinned(virt_to_page(level3_user_vsyscall));
+#endif
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
@@ -1607,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
unsigned level)
{
- bool pinned = PagePinned(virt_to_page(mm->pgd));
+ bool pinned = xen_page_pinned(mm->pgd);
trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
if (pinned) {
struct page *page = pfn_to_page(pfn);
- SetPagePinned(page);
+ if (static_branch_likely(&xen_struct_pages_ready))
+ SetPagePinned(page);
if (!PageHighMem(page)) {
xen_mc_batch();
@@ -2348,9 +2379,7 @@ static void __init xen_post_allocator_init(void)
#ifdef CONFIG_X86_64
pv_mmu_ops.write_cr3 = &xen_write_cr3;
- SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
- xen_mark_init_mm_pinned();
}
static void xen_leave_lazy_mmu(void)
@@ -2411,6 +2440,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.alloc_pud = xen_alloc_pmd_init,
.release_pud = xen_release_pmd_init,
+
+#if CONFIG_PGTABLE_LEVELS >= 5
+ .p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
+ .make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
+#endif
#endif /* CONFIG_X86_64 */
.activate_mm = xen_activate_mm,
@@ -2429,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
void __init xen_init_mmu_ops(void)
{
x86_init.paging.pagetable_init = xen_pagetable_init;
+ x86_init.hyper.init_after_bootmem = xen_after_bootmem;
pv_mmu_ops = xen_mmu_ops;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index d9f96cc..1d83152 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,12 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/tick.h>
+#include <linux/percpu-defs.h>
#include <xen/xen.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
#include <xen/events.h>
+#include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>
#include <asm/fixmap.h>
@@ -15,6 +18,8 @@
#include "mmu.h"
#include "pmu.h"
+static DEFINE_PER_CPU(u64, spec_ctrl);
+
void xen_arch_pre_suspend(void)
{
xen_save_time_memory_area();
@@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled)
static void xen_vcpu_notify_restore(void *data)
{
+ if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL))
+ wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl));
+
/* Boot processor notified via generic timekeeping_resume() */
if (smp_processor_id() == 0)
return;
@@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data)
static void xen_vcpu_notify_suspend(void *data)
{
+ u64 tmp;
+
tick_suspend_local();
+
+ if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) {
+ rdmsrl(MSR_IA32_SPEC_CTRL, tmp);
+ this_cpu_write(spec_ctrl, tmp);
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ }
}
void xen_arch_resume(void)
OpenPOWER on IntegriCloud