diff options
Diffstat (limited to 'arch/x86')
256 files changed, 9092 insertions, 8608 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1236b1..d234cca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,7 +54,6 @@ config X86 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 - select ARCH_HAS_PHYS_TO_DMA select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT @@ -83,6 +82,7 @@ config X86 select CLOCKSOURCE_VALIDATE_LAST_CYCLE select CLOCKSOURCE_WATCHDOG select DCACHE_WORD_ACCESS + select DMA_DIRECT_OPS select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT select GENERIC_CLOCKEVENTS @@ -393,17 +393,6 @@ config X86_FEATURE_NAMES If in doubt, say Y. -config X86_FAST_FEATURE_TESTS - bool "Fast CPU feature tests" if EMBEDDED - default y - ---help--- - Some fast-paths in the kernel depend on the capabilities of the CPU. - Say Y here for the kernel to patch in the appropriate code at runtime - based on the capabilities of the CPU. The infrastructure for patching - code at runtime takes up some additional space; space-constrained - embedded systems may wish to say N here to produce smaller, slightly - slower code. - config X86_X2APIC bool "Support x2apic" depends on X86_LOCAL_APIC && X86_64 && (IRQ_REMAP || HYPERVISOR_GUEST) @@ -430,6 +419,7 @@ config GOLDFISH config RETPOLINE bool "Avoid speculative indirect branches in kernel" default y + select STACK_VALIDATION if HAVE_STACK_VALIDATION help Compile kernel with the retpoline compiler options to guard against kernel-to-user data leaks by avoiding speculative indirect @@ -690,6 +680,7 @@ config X86_SUPPORTS_MEMORY_FAILURE config STA2X11 bool "STA2X11 Companion Chip Support" depends on X86_32_NON_STANDARD && PCI + select ARCH_HAS_PHYS_TO_DMA select X86_DEV_DMA_OPS select X86_DMA_REMAP select SWIOTLB @@ -1305,7 +1296,7 @@ config MICROCODE the Linux kernel. The preferred method to load microcode from a detached initrd is described - in Documentation/x86/early-microcode.txt. For that you need to enable + in Documentation/x86/microcode.txt. For that you need to enable CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the initrd for microcode blobs. @@ -1471,6 +1462,8 @@ config X86_PAE config X86_5LEVEL bool "Enable 5-level page tables support" + select DYNAMIC_MEMORY_LAYOUT + select SPARSEMEM_VMEMMAP depends on X86_64 ---help--- 5-level paging enables access to larger address space: @@ -1479,8 +1472,8 @@ config X86_5LEVEL It will be supported by future Intel CPUs. - Note: a kernel with this option enabled can only be booted - on machines that support the feature. + A kernel with the option enabled can be booted on machines that + support 4- or 5-level paging. See Documentation/x86/x86_64/5level-paging.txt for more information. @@ -1605,10 +1598,6 @@ config ARCH_HAVE_MEMORY_PRESENT def_bool y depends on X86_32 && DISCONTIGMEM -config NEED_NODE_MEMMAP_SIZE - def_bool y - depends on X86_32 && (DISCONTIGMEM || SPARSEMEM) - config ARCH_FLATMEM_ENABLE def_bool y depends on X86_32 && !NUMA @@ -2184,10 +2173,17 @@ config PHYSICAL_ALIGN Don't change this unless you know what you are doing. +config DYNAMIC_MEMORY_LAYOUT + bool + ---help--- + This option makes base addresses of vmalloc and vmemmap as well as + __PAGE_OFFSET movable during boot. + config RANDOMIZE_MEMORY bool "Randomize the kernel memory sections" depends on X86_64 depends on RANDOMIZE_BASE + select DYNAMIC_MEMORY_LAYOUT default RANDOMIZE_BASE ---help--- Randomizes the base virtual address of kernel memory sections @@ -2306,7 +2302,7 @@ choice it can be used to assist security vulnerability exploitation. This setting can be changed at boot time via the kernel command - line parameter vsyscall=[native|emulate|none]. + line parameter vsyscall=[emulate|none]. On a system with recent enough glibc (2.14 or newer) and no static binaries, you can say None without a performance penalty @@ -2314,15 +2310,6 @@ choice If unsure, select "Emulate". - config LEGACY_VSYSCALL_NATIVE - bool "Native" - help - Actual executable code is located in the fixed vsyscall - address mapping, implementing time() efficiently. Since - this makes the mapping executable, it can be used during - security vulnerability exploitation (traditionally as - ROP gadgets). This configuration is not recommended. - config LEGACY_VSYSCALL_EMULATE bool "Emulate" help @@ -2640,8 +2627,10 @@ config PCI_DIRECT depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC || PCI_GOMMCONFIG)) config PCI_MMCONFIG - def_bool y - depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY) + bool "Support mmconfig PCI config space access" if X86_64 + default y + depends on PCI && (ACPI || SFI || JAILHOUSE_GUEST) + depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG) config PCI_OLPC def_bool y @@ -2656,9 +2645,9 @@ config PCI_DOMAINS def_bool y depends on PCI -config PCI_MMCONFIG - bool "Support mmconfig PCI config space access" - depends on X86_64 && PCI && ACPI +config MMCONF_FAM10H + def_bool y + depends on X86_64 && PCI_MMCONFIG && ACPI config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT @@ -2676,11 +2665,13 @@ config PCI_CNB20LE_QUIRK source "drivers/pci/Kconfig" config ISA_BUS - bool "ISA-style bus support on modern systems" if EXPERT - select ISA_BUS_API + bool "ISA bus support on modern systems" if EXPERT help - Enables ISA-style drivers on modern systems. This is necessary to - support PC/104 devices on X86_64 platforms. + Expose ISA bus device drivers and options available for selection and + configuration. Enable this option if your target machine has an ISA + bus. ISA is an older system, displaced by PCI and newer bus + architectures -- if your target machine is modern, it probably does + not have an ISA bus. If unsure, say N. diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8b8d229..638411f 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT default "4" if MELAN || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX -config X86_PPRO_FENCE - bool "PentiumPro memory ordering errata workaround" - depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1 - ---help--- - Old PentiumPro multiprocessor systems had errata that could cause - memory operations to violate the x86 ordering standard in rare cases. - Enabling this option will attempt to work around some (but not all) - occurrences of this problem, at the cost of much heavier spinlock and - memory barrier operations. - - If unsure, say n here. Even distro kernels should think twice before - enabling this: there are few systems, and an unlikely bug. - config X86_F00F_BUG def_bool y depends on M586MMX || M586TSC || M586 || M486 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index fad5516..60135cb 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -31,8 +31,7 @@ endif CODE16GCC_CFLAGS := -m32 -Wa,$(srctree)/arch/x86/boot/code16gcc.h M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS)) -REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ - -DDISABLE_BRANCH_PROFILING \ +REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse @@ -181,6 +180,10 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER endif endif +ifndef CC_HAVE_ASM_GOTO + $(error Compiler lacks asm-goto support.) +endif + # # Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a # GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way @@ -223,6 +226,15 @@ KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) LDFLAGS := -m elf_$(UTS_MACHINE) +# +# The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to +# the linker to force 2MB page size regardless of the default page size used +# by the linker. +# +ifdef CONFIG_X86_64 +LDFLAGS += $(call ld-option, -z max-page-size=0x200000) +endif + # Speed up the build KBUILD_CFLAGS += -pipe # Workaround for a gcc prelease that unfortunately was shipped in a suse release @@ -232,10 +244,9 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE - RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) - ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE - endif +ifneq ($(RETPOLINE_CFLAGS),) + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +endif endif archscripts: scripts_basic diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f25e153..fa42f89 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -26,7 +26,7 @@ KCOV_INSTRUMENT := n targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 -KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ -O2 +KBUILD_CFLAGS := -m$(BITS) -O2 KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC) KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 @@ -78,7 +78,7 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o ifdef CONFIG_X86_64 - vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o + vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o endif diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 353e20c..47d3eff 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -421,9 +421,10 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) } } +static const efi_char16_t apple[] = L"Apple"; + static void setup_quirks(struct boot_params *boot_params) { - efi_char16_t const apple[] = { 'A', 'p', 'p', 'l', 'e', 0 }; efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long) efi_table_attr(efi_system_table, fw_vendor, sys_table); @@ -439,7 +440,7 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) struct efi_uga_draw_protocol *uga = NULL, *first_uga; efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; - u32 *handles = (u32 *)uga_handle;; + u32 *handles = (u32 *)uga_handle; efi_status_t status = EFI_INVALID_PARAMETER; int i; @@ -484,7 +485,7 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) struct efi_uga_draw_protocol *uga = NULL, *first_uga; efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; unsigned long nr_ugas; - u64 *handles = (u64 *)uga_handle;; + u64 *handles = (u64 *)uga_handle; efi_status_t status = EFI_INVALID_PARAMETER; int i; diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fc313e2..fca012b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -33,6 +33,7 @@ #include <asm/processor-flags.h> #include <asm/asm-offsets.h> #include <asm/bootparam.h> +#include "pgtable.h" /* * Locally defined symbols should be marked hidden: @@ -304,55 +305,77 @@ ENTRY(startup_64) /* Set up the stack */ leaq boot_stack_end(%rbx), %rsp -#ifdef CONFIG_X86_5LEVEL /* - * Check if we need to enable 5-level paging. - * RSI holds real mode data and need to be preserved across - * a function call. + * At this point we are in long mode with 4-level paging enabled, + * but we might want to enable 5-level paging or vice versa. + * + * The problem is that we cannot do it directly. Setting or clearing + * CR4.LA57 in long mode would trigger #GP. So we need to switch off + * long mode and paging first. + * + * We also need a trampoline in lower memory to switch over from + * 4- to 5-level paging for cases when the bootloader puts the kernel + * above 4G, but didn't enable 5-level paging for us. + * + * The same trampoline can be used to switch from 5- to 4-level paging + * mode, like when starting 4-level paging kernel via kexec() when + * original kernel worked in 5-level paging mode. + * + * For the trampoline, we need the top page table to reside in lower + * memory as we don't have a way to load 64-bit values into CR3 in + * 32-bit mode. + * + * We go though the trampoline even if we don't have to: if we're + * already in a desired paging mode. This way the trampoline code gets + * tested on every boot. */ - pushq %rsi - call l5_paging_required - popq %rsi - /* If l5_paging_required() returned zero, we're done here. */ - cmpq $0, %rax - je lvl5 + /* Make sure we have GDT with 32-bit code segment */ + leaq gdt(%rip), %rax + movq %rax, gdt64+2(%rip) + lgdt gdt64(%rip) /* - * At this point we are in long mode with 4-level paging enabled, - * but we want to enable 5-level paging. + * paging_prepare() sets up the trampoline and checks if we need to + * enable 5-level paging. * - * The problem is that we cannot do it directly. Setting LA57 in - * long mode would trigger #GP. So we need to switch off long mode - * first. + * Address of the trampoline is returned in RAX. + * Non zero RDX on return means we need to enable 5-level paging. * - * NOTE: This is not going to work if bootloader put us above 4G - * limit. - * - * The first step is go into compatibility mode. + * RSI holds real mode data and needs to be preserved across + * this function call. */ + pushq %rsi + call paging_prepare + popq %rsi - /* Clear additional page table */ - leaq lvl5_pgtable(%rbx), %rdi - xorq %rax, %rax - movq $(PAGE_SIZE/8), %rcx - rep stosq + /* Save the trampoline address in RCX */ + movq %rax, %rcx /* - * Setup current CR3 as the first and only entry in a new top level - * page table. + * Load the address of trampoline_return() into RDI. + * It will be used by the trampoline to return to the main code. */ - movq %cr3, %rdi - leaq 0x7 (%rdi), %rax - movq %rax, lvl5_pgtable(%rbx) + leaq trampoline_return(%rip), %rdi /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ pushq $__KERNEL32_CS - leaq compatible_mode(%rip), %rax + leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax pushq %rax lretq -lvl5: -#endif +trampoline_return: + /* Restore the stack, the 32-bit trampoline uses its own stack */ + leaq boot_stack_end(%rbx), %rsp + + /* + * cleanup_trampoline() would restore trampoline memory. + * + * RSI holds real mode data and needs to be preserved across + * this function call. + */ + pushq %rsi + call cleanup_trampoline + popq %rsi /* Zero EFLAGS */ pushq $0 @@ -490,46 +513,82 @@ relocated: jmp *%rax .code32 -#ifdef CONFIG_X86_5LEVEL -compatible_mode: - /* Setup data and stack segments */ +/* + * This is the 32-bit trampoline that will be copied over to low memory. + * + * RDI contains the return address (might be above 4G). + * ECX contains the base address of the trampoline memory. + * Non zero RDX on return means we need to enable 5-level paging. + */ +ENTRY(trampoline_32bit_src) + /* Set up data and stack segments */ movl $__KERNEL_DS, %eax movl %eax, %ds movl %eax, %ss + /* Set up new stack */ + leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - /* Point CR3 to 5-level paging */ - leal lvl5_pgtable(%ebx), %eax - movl %eax, %cr3 + /* Check what paging mode we want to be in after the trampoline */ + cmpl $0, %edx + jz 1f - /* Enable PAE and LA57 mode */ + /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ + movl %cr4, %eax + testl $X86_CR4_LA57, %eax + jnz 3f + jmp 2f +1: + /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ movl %cr4, %eax - orl $(X86_CR4_PAE | X86_CR4_LA57), %eax + testl $X86_CR4_LA57, %eax + jz 3f +2: + /* Point CR3 to the trampoline's new top level page table */ + leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax + movl %eax, %cr3 +3: + /* Enable PAE and LA57 (if required) paging modes */ + movl $X86_CR4_PAE, %eax + cmpl $0, %edx + jz 1f + orl $X86_CR4_LA57, %eax +1: movl %eax, %cr4 - /* Calculate address we are running at */ - call 1f -1: popl %edi - subl $1b, %edi + /* Calculate address of paging_enabled() once we are executing in the trampoline */ + leal paging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax - /* Prepare stack for far return to Long Mode */ + /* Prepare the stack for far return to Long Mode */ pushl $__KERNEL_CS - leal lvl5(%edi), %eax - push %eax + pushl %eax - /* Enable paging back */ + /* Enable paging again */ movl $(X86_CR0_PG | X86_CR0_PE), %eax movl %eax, %cr0 lret -#endif + .code64 +paging_enabled: + /* Return from the trampoline */ + jmp *%rdi + + /* + * The trampoline code has a size limit. + * Make sure we fail to compile if the trampoline code grows + * beyond TRAMPOLINE_32BIT_CODE_SIZE bytes. + */ + .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE + + .code32 no_longmode: - /* This isn't an x86-64 CPU so hang */ + /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: hlt jmp 1b @@ -537,6 +596,11 @@ no_longmode: #include "../../kernel/verify_cpu.S" .data +gdt64: + .word gdt_end - gdt + .long 0 + .word 0 + .quad 0 gdt: .word gdt_end - gdt .long gdt @@ -585,7 +649,3 @@ boot_stack_end: .balign 4096 pgtable: .fill BOOT_PGT_SIZE, 1, 0 -#ifdef CONFIG_X86_5LEVEL -lvl5_pgtable: - .fill PAGE_SIZE, 1, 0 -#endif diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 8199a61..66e42a0 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -46,6 +46,12 @@ #define STATIC #include <linux/decompress/mm.h> +#ifdef CONFIG_X86_5LEVEL +unsigned int pgtable_l5_enabled __ro_after_init; +unsigned int pgdir_shift __ro_after_init = 39; +unsigned int ptrs_per_p4d __ro_after_init = 1; +#endif + extern unsigned long get_cmd_line_ptr(void); /* Simplified build-specific string for starting entropy. */ @@ -723,6 +729,14 @@ void choose_random_location(unsigned long input, return; } +#ifdef CONFIG_X86_5LEVEL + if (__read_cr4() & X86_CR4_LA57) { + pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; + } +#endif + boot_params->hdr.loadflags |= KASLR_FLAG; /* Prepare to add new identity pagetables on demand. */ diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/kaslr_64.c index b5e5e02..522d114 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -16,13 +16,6 @@ #define __pa(x) ((unsigned long)(x)) #define __va(x) ((void *)((unsigned long)(x))) -/* - * The pgtable.h and mm/ident_map.c includes make use of the SME related - * information which is not used in the compressed image support. Un-define - * the SME support to avoid any compile and link errors. - */ -#undef CONFIG_AMD_MEM_ENCRYPT - /* No PAGE_TABLE_ISOLATION support needed either: */ #undef CONFIG_PAGE_TABLE_ISOLATION @@ -85,13 +78,14 @@ static struct x86_mapping_info mapping_info; /* Locates and clears a region for a new top level page table. */ void initialize_identity_maps(void) { - unsigned long sev_me_mask = get_sev_encryption_mask(); + /* If running as an SEV guest, the encryption mask is required. */ + set_sev_encryption_mask(); /* Init mapping_info with run-time function/buffer pointers. */ mapping_info.alloc_pgt_page = alloc_pgt_page; mapping_info.context = &pgt_data; - mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sev_me_mask; - mapping_info.kernpg_flag = _KERNPG_TABLE | sev_me_mask; + mapping_info.page_flag = __PAGE_KERNEL_LARGE_EXEC | sme_me_mask; + mapping_info.kernpg_flag = _KERNPG_TABLE; /* * It should be impossible for this not to already be true, diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index 54f5f66..eaa843a 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -88,9 +88,7 @@ ENTRY(get_sev_encryption_bit) ENDPROC(get_sev_encryption_bit) .code64 -ENTRY(get_sev_encryption_mask) - xor %rax, %rax - +ENTRY(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT push %rbp push %rdx @@ -101,9 +99,7 @@ ENTRY(get_sev_encryption_mask) testl %eax, %eax jz .Lno_sev_mask - xor %rdx, %rdx - bts %rax, %rdx /* Create the encryption mask */ - mov %rdx, %rax /* ... and return it */ + bts %rax, sme_me_mask(%rip) /* Create the encryption mask */ .Lno_sev_mask: movq %rbp, %rsp /* Restore original stack pointer */ @@ -112,9 +108,16 @@ ENTRY(get_sev_encryption_mask) pop %rbp #endif + xor %rax, %rax ret -ENDPROC(get_sev_encryption_mask) +ENDPROC(set_sev_encryption_mask) .data enc_bit: .int 0xffffffff + +#ifdef CONFIG_AMD_MEM_ENCRYPT + .balign 8 +GLOBAL(sme_me_mask) + .quad 0 +#endif diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 98761a1..8dd1d5c 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -14,6 +14,7 @@ #include "misc.h" #include "error.h" +#include "pgtable.h" #include "../string.h" #include "../voffset.h" @@ -169,16 +170,6 @@ void __puthex(unsigned long value) } } -static bool l5_supported(void) -{ - /* Check if leaf 7 is supported. */ - if (native_cpuid_eax(0) < 7) - return 0; - - /* Check if la57 is supported. */ - return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)); -} - #if CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len, unsigned long virt_addr) @@ -309,6 +300,10 @@ static void parse_elf(void *output) switch (phdr->p_type) { case PT_LOAD: +#ifdef CONFIG_X86_64 + if ((phdr->p_align % 0x200000) != 0) + error("Alignment of LOAD segment isn't multiple of 2MB"); +#endif #ifdef CONFIG_RELOCATABLE dest = output; dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR); @@ -372,12 +367,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, console_init(); debug_putstr("early console in extract_kernel\n"); - if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) { - error("This linux kernel as configured requires 5-level paging\n" - "This CPU does not support the required 'cr4.la57' feature\n" - "Unable to boot - please use a kernel appropriate for your CPU\n"); - } - free_mem_ptr = heap; /* Heap */ free_mem_end_ptr = heap + BOOT_HEAP_SIZE; @@ -388,6 +377,11 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, debug_putaddr(output_len); debug_putaddr(kernel_total_size); +#ifdef CONFIG_X86_64 + /* Report address of 32-bit trampoline */ + debug_putaddr(trampoline_32bit); +#endif + /* * The memory hole needed for the kernel is the larger of either * the entire decompressed kernel plus relocation table, or the diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9d323dc..9e11be4 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -12,6 +12,11 @@ #undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_KASAN +#ifdef CONFIG_X86_5LEVEL +/* cpu_feature_enabled() cannot be used that early */ +#define pgtable_l5_enabled __pgtable_l5_enabled +#endif + #include <linux/linkage.h> #include <linux/screen_info.h> #include <linux/elf.h> @@ -109,6 +114,6 @@ static inline void console_init(void) { } #endif -unsigned long get_sev_encryption_mask(void); +void set_sev_encryption_mask(void); #endif diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h new file mode 100644 index 0000000..91f7563 --- /dev/null +++ b/arch/x86/boot/compressed/pgtable.h @@ -0,0 +1,20 @@ +#ifndef BOOT_COMPRESSED_PAGETABLE_H +#define BOOT_COMPRESSED_PAGETABLE_H + +#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) + +#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 + +#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE +#define TRAMPOLINE_32BIT_CODE_SIZE 0x60 + +#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE + +#ifndef __ASSEMBLER__ + +extern unsigned long *trampoline_32bit; + +extern void trampoline_32bit_src(void *return_ptr); + +#endif /* __ASSEMBLER__ */ +#endif /* BOOT_COMPRESSED_PAGETABLE_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index b4469a3..32af1cb 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,4 +1,6 @@ #include <asm/processor.h> +#include "pgtable.h" +#include "../string.h" /* * __force_order is used by special_insns.h asm code to force instruction @@ -9,20 +11,144 @@ */ unsigned long __force_order; -int l5_paging_required(void) +#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */ +#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */ + +struct paging_config { + unsigned long trampoline_start; + unsigned long l5_required; +}; + +/* Buffer to preserve trampoline memory */ +static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; + +/* + * The page table is going to be used instead of page table in the trampoline + * memory. + * + * It must not be in BSS as BSS is cleared after cleanup_trampoline(). + */ +static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data); + +/* + * Trampoline address will be printed by extract_kernel() for debugging + * purposes. + * + * Avoid putting the pointer into .bss as it will be cleared between + * paging_prepare() and extract_kernel(). + */ +unsigned long *trampoline_32bit __section(.data); + +struct paging_config paging_prepare(void) { - /* Check if leaf 7 is supported. */ + struct paging_config paging_config = {}; + unsigned long bios_start, ebda_start; + + /* + * Check if LA57 is desired and supported. + * + * There are two parts to the check: + * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y + * - if the machine supports 5-level paging: + * + CPUID leaf 7 is supported + * + the leaf has the feature bit set + * + * That's substitute for boot_cpu_has() in early boot code. + */ + if (IS_ENABLED(CONFIG_X86_5LEVEL) && + native_cpuid_eax(0) >= 7 && + (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { + paging_config.l5_required = 1; + } + + /* + * Find a suitable spot for the trampoline. + * This code is based on reserve_bios_regions(). + */ + + ebda_start = *(unsigned short *)0x40e << 4; + bios_start = *(unsigned short *)0x413 << 10; - if (native_cpuid_eax(0) < 7) - return 0; + if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX) + bios_start = BIOS_START_MAX; + + if (ebda_start > BIOS_START_MIN && ebda_start < bios_start) + bios_start = ebda_start; + + /* Place the trampoline just below the end of low memory, aligned to 4k */ + paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE; + paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE); + + trampoline_32bit = (unsigned long *)paging_config.trampoline_start; + + /* Preserve trampoline memory */ + memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); + + /* Clear trampoline memory first */ + memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); + + /* Copy trampoline code in place */ + memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), + &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); + + /* + * The code below prepares page table in trampoline memory. + * + * The new page table will be used by trampoline code for switching + * from 4- to 5-level paging or vice versa. + * + * If switching is not required, the page table is unused: trampoline + * code wouldn't touch CR3. + */ + + /* + * We are not going to use the page table in trampoline memory if we + * are already in the desired paging mode. + */ + if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + goto out; + + if (paging_config.l5_required) { + /* + * For 4- to 5-level paging transition, set up current CR3 as + * the first and the only entry in a new top-level page table. + */ + trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; + } else { + unsigned long src; + + /* + * For 5- to 4-level paging transition, copy page table pointed + * by first entry in the current top-level page table as our + * new top-level page table. + * + * We cannot just point to the page table from trampoline as it + * may be above 4G. + */ + src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; + memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), + (void *)src, PAGE_SIZE); + } + +out: + return paging_config; +} + +void cleanup_trampoline(void) +{ + void *trampoline_pgtable; - /* Check if la57 is supported. */ - if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) - return 0; + trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET; - /* Check if 5-level paging has already been enabled. */ - if (native_read_cr4() & X86_CR4_LA57) - return 0; + /* + * Move the top level page table out of trampoline memory, + * if it's there. + */ + if ((void *)__native_read_cr3() == trampoline_pgtable) { + memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE); + native_write_cr3((unsigned long)top_pgtable); + } - return 1; + /* Restore trampoline memory */ + memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); } diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 12e8484..e762ef4 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -94,23 +94,30 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define STACK_OFFSET 8*3 -#define HashKey 16*0 // store HashKey <<1 mod poly here -#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here -#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here -#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here -#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 + +#define AadHash 16*0 +#define AadLen 16*1 +#define InLen (16*1)+8 +#define PBlockEncKey 16*2 +#define OrigIV 16*3 +#define CurCount 16*4 +#define PBlockLen 16*5 +#define HashKey 16*6 // store HashKey <<1 mod poly here +#define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here +#define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here +#define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here +#define HashKey_k 16*10 // store XOR of High 64 bits and Low 64 // bits of HashKey <<1 mod poly here //(for Karatsuba purposes) -#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 +#define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64 // bits of HashKey^2 <<1 mod poly here // (for Karatsuba purposes) -#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 +#define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64 // bits of HashKey^3 <<1 mod poly here // (for Karatsuba purposes) -#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 +#define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64 // bits of HashKey^4 <<1 mod poly here // (for Karatsuba purposes) -#define VARIABLE_OFFSET 16*8 #define arg1 rdi #define arg2 rsi @@ -118,10 +125,11 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define arg4 rcx #define arg5 r8 #define arg6 r9 -#define arg7 STACK_OFFSET+8(%r14) -#define arg8 STACK_OFFSET+16(%r14) -#define arg9 STACK_OFFSET+24(%r14) -#define arg10 STACK_OFFSET+32(%r14) +#define arg7 STACK_OFFSET+8(%rsp) +#define arg8 STACK_OFFSET+16(%rsp) +#define arg9 STACK_OFFSET+24(%rsp) +#define arg10 STACK_OFFSET+32(%rsp) +#define arg11 STACK_OFFSET+40(%rsp) #define keysize 2*15*16(%arg1) #endif @@ -171,6 +179,332 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define TKEYP T1 #endif +.macro FUNC_SAVE + push %r12 + push %r13 + push %r14 +# +# states of %xmm registers %xmm6:%xmm15 not saved +# all %xmm registers are clobbered +# +.endm + + +.macro FUNC_RESTORE + pop %r14 + pop %r13 + pop %r12 +.endm + +# Precompute hashkeys. +# Input: Hash subkey. +# Output: HashKeys stored in gcm_context_data. Only needs to be called +# once per key. +# clobbers r12, and tmp xmm registers. +.macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7 + mov \SUBKEY, %r12 + movdqu (%r12), \TMP3 + movdqa SHUF_MASK(%rip), \TMP2 + PSHUFB_XMM \TMP2, \TMP3 + + # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) + + movdqa \TMP3, \TMP2 + psllq $1, \TMP3 + psrlq $63, \TMP2 + movdqa \TMP2, \TMP1 + pslldq $8, \TMP2 + psrldq $8, \TMP1 + por \TMP2, \TMP3 + + # reduce HashKey<<1 + + pshufd $0x24, \TMP1, \TMP2 + pcmpeqd TWOONE(%rip), \TMP2 + pand POLY(%rip), \TMP2 + pxor \TMP2, \TMP3 + movdqa \TMP3, HashKey(%arg2) + + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%arg2) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%arg2) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%arg2) + + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%arg2) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%arg2) +.endm + +# GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding. +# Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13 +.macro GCM_INIT Iv SUBKEY AAD AADLEN + mov \AADLEN, %r11 + mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length + xor %r11, %r11 + mov %r11, InLen(%arg2) # ctx_data.in_length = 0 + mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0 + mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0 + mov \Iv, %rax + movdqu (%rax), %xmm0 + movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv + + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm0 + movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv + + PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + movdqa HashKey(%arg2), %xmm13 + + CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \ + %xmm4, %xmm5, %xmm6 +.endm + +# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context +# struct has been initialized by GCM_INIT. +# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK +# Clobbers rax, r10-r13, and xmm0-xmm15 +.macro GCM_ENC_DEC operation + movdqu AadHash(%arg2), %xmm8 + movdqu HashKey(%arg2), %xmm13 + add %arg5, InLen(%arg2) + + xor %r11, %r11 # initialise the data pointer offset as zero + PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation + + sub %r11, %arg5 # sub partial block data used + mov %arg5, %r13 # save the number of bytes + + and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) + mov %r13, %r12 + # Encrypt/Decrypt first few blocks + + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_\@ + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_\@ + je _initial_num_blocks_is_2_\@ +_initial_num_blocks_is_3_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation + sub $48, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_2_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation + sub $32, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_1_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation + sub $16, %r13 + jmp _initial_blocks_\@ +_initial_num_blocks_is_0_\@: + INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation +_initial_blocks_\@: + + # Main loop - Encrypt/Decrypt remaining blocks + + cmp $0, %r13 + je _zero_cipher_left_\@ + sub $64, %r13 + je _four_cipher_left_\@ +_crypt_by_4_\@: + GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \ + %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \ + %xmm7, %xmm8, enc + add $64, %r11 + sub $64, %r13 + jne _crypt_by_4_\@ +_four_cipher_left_\@: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_\@: + movdqu %xmm8, AadHash(%arg2) + movdqu %xmm0, CurCount(%arg2) + + mov %arg5, %r13 + and $15, %r13 # %r13 = arg5 (mod 16) + je _multiple_of_16_bytes_\@ + + mov %r13, PBlockLen(%arg2) + + # Handle the last <16 Byte block separately + paddd ONE(%rip), %xmm0 # INCR CNT to get Yn + movdqu %xmm0, CurCount(%arg2) + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) + movdqu %xmm0, PBlockEncKey(%arg2) + + cmp $16, %arg5 + jge _large_enough_update_\@ + + lea (%arg4,%r11,1), %r10 + mov %r13, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 + jmp _data_read_\@ + +_large_enough_update_\@: + sub $16, %r11 + add %r13, %r11 + + # receive the last <16 Byte block + movdqu (%arg4, %r11, 1), %xmm1 + + sub %r13, %r11 + add $16, %r11 + + lea SHIFT_MASK+16(%rip), %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (r13 is the number of bytes in plaintext mod 16) + sub %r13, %r12 + # get the appropriate shuffle mask + movdqu (%r12), %xmm2 + # shift right 16-r13 bytes + PSHUFB_XMM %xmm2, %xmm1 + +_data_read_\@: + lea ALL_F+16(%rip), %r12 + sub %r13, %r12 + +.ifc \operation, dec + movdqa %xmm1, %xmm2 +.endif + pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn) + movdqu (%r12), %xmm1 + # get the appropriate mask to mask out top 16-r13 bytes of xmm0 + pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 +.ifc \operation, dec + pand %xmm1, %xmm2 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10 ,%xmm2 + + pxor %xmm2, %xmm8 +.else + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10,%xmm0 + + pxor %xmm0, %xmm8 +.endif + + movdqu %xmm8, AadHash(%arg2) +.ifc \operation, enc + # GHASH computation for the last <16 byte block + movdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm0 back to output as ciphertext + PSHUFB_XMM %xmm10, %xmm0 +.endif + + # Output %r13 bytes + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + mov %rax, (%arg3 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + mov %al, (%arg3, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_multiple_of_16_bytes_\@: +.endm + +# GCM_COMPLETE Finishes update of tag of last partial block +# Output: Authorization Tag (AUTH_TAG) +# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15 +.macro GCM_COMPLETE AUTHTAG AUTHTAGLEN + movdqu AadHash(%arg2), %xmm8 + movdqu HashKey(%arg2), %xmm13 + + mov PBlockLen(%arg2), %r12 + + cmp $0, %r12 + je _partial_done\@ + + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + +_partial_done\@: + mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + movd %r12d, %xmm15 # len(A) in %xmm15 + mov InLen(%arg2), %r12 + shl $3, %r12 # len(C) in bits (*128) + MOVQ_R64_XMM %r12, %xmm1 + + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 + + movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) + pxor %xmm8, %xmm0 +_return_T_\@: + mov \AUTHTAG, %r10 # %r10 = authTag + mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_\@ + cmp $8, %r11 + jl _T_4_\@ +_T_8_\@: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 + psrldq $8, %xmm0 + cmp $0, %r11 + je _return_T_done_\@ +_T_4_\@: + movd %xmm0, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 + cmp $0, %r11 + je _return_T_done_\@ +_T_123_\@: + movd %xmm0, %eax + cmp $2, %r11 + jl _T_1_\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done_\@ + add $2, %r10 + sar $16, %eax +_T_1_\@: + mov %al, (%r10) + jmp _return_T_done_\@ +_T_16_\@: + movdqu %xmm0, (%r10) +_return_T_done_\@: +.endm #ifdef __x86_64__ /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) @@ -264,232 +598,188 @@ _read_next_byte_lt8_\@: _done_read_partial_block_\@: .endm -/* -* if a = number of total plaintext bytes -* b = floor(a/16) -* num_initial_blocks = b mod 4 -* encrypt the initial num_initial_blocks blocks and apply ghash on -* the ciphertext -* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers -* are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified -*/ - - -.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation - MOVADQ SHUF_MASK(%rip), %xmm14 - mov arg7, %r10 # %r10 = AAD - mov arg8, %r11 # %r11 = aadLen - pxor %xmm\i, %xmm\i - pxor \XMM2, \XMM2 +# CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted. +# clobbers r10-11, xmm14 +.macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \ + TMP6 TMP7 + MOVADQ SHUF_MASK(%rip), %xmm14 + mov \AAD, %r10 # %r10 = AAD + mov \AADLEN, %r11 # %r11 = aadLen + pxor \TMP7, \TMP7 + pxor \TMP6, \TMP6 cmp $16, %r11 - jl _get_AAD_rest\num_initial_blocks\operation -_get_AAD_blocks\num_initial_blocks\operation: - movdqu (%r10), %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor %xmm\i, \XMM2 - GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + jl _get_AAD_rest\@ +_get_AAD_blocks\@: + movdqu (%r10), \TMP7 + PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data + pxor \TMP7, \TMP6 + GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 add $16, %r10 sub $16, %r11 cmp $16, %r11 - jge _get_AAD_blocks\num_initial_blocks\operation + jge _get_AAD_blocks\@ - movdqu \XMM2, %xmm\i + movdqu \TMP6, \TMP7 /* read the last <16B of AAD */ -_get_AAD_rest\num_initial_blocks\operation: +_get_AAD_rest\@: cmp $0, %r11 - je _get_AAD_done\num_initial_blocks\operation - - READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor \XMM2, %xmm\i - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + je _get_AAD_done\@ -_get_AAD_done\num_initial_blocks\operation: - xor %r11, %r11 # initialise the data pointer offset as zero - # start AES for num_initial_blocks blocks - - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), \XMM0 # XMM0 = Y0 - PSHUFB_XMM %xmm14, \XMM0 + READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 + PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data + pxor \TMP6, \TMP7 + GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 + movdqu \TMP7, \TMP6 -.if (\i == 5) || (\i == 6) || (\i == 7) - MOVADQ ONE(%RIP),\TMP1 - MOVADQ (%arg1),\TMP2 -.irpc index, \i_seq - paddd \TMP1, \XMM0 # INCR Y0 - movdqa \XMM0, %xmm\index - PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap - pxor \TMP2, %xmm\index -.endr - lea 0x10(%arg1),%r10 - mov keysize,%eax - shr $2,%eax # 128->4, 192->6, 256->8 - add $5,%eax # 128->9, 192->11, 256->13 - -aes_loop_initial_dec\num_initial_blocks: - MOVADQ (%r10),\TMP1 -.irpc index, \i_seq - AESENC \TMP1, %xmm\index -.endr - add $16,%r10 - sub $1,%eax - jnz aes_loop_initial_dec\num_initial_blocks - - MOVADQ (%r10), \TMP1 -.irpc index, \i_seq - AESENCLAST \TMP1, %xmm\index # Last Round -.endr -.irpc index, \i_seq - movdqu (%arg3 , %r11, 1), \TMP1 - pxor \TMP1, %xmm\index - movdqu %xmm\index, (%arg2 , %r11, 1) - # write back plaintext/ciphertext for num_initial_blocks - add $16, %r11 - - movdqa \TMP1, %xmm\index - PSHUFB_XMM %xmm14, %xmm\index - # prepare plaintext/ciphertext for GHASH computation -.endr -.endif - - # apply GHASH on num_initial_blocks blocks +_get_AAD_done\@: + movdqu \TMP6, AadHash(%arg2) +.endm -.if \i == 5 - pxor %xmm5, %xmm6 - GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 6 - pxor %xmm6, %xmm7 - GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 -.elseif \i == 7 - pxor %xmm7, %xmm8 - GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks +# between update calls. +# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK +# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context +# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13 +.macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ + AAD_HASH operation + mov PBlockLen(%arg2), %r13 + cmp $0, %r13 + je _partial_block_done_\@ # Leave Macro if no partial blocks + # Read in input data without over reading + cmp $16, \PLAIN_CYPH_LEN + jl _fewer_than_16_bytes_\@ + movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm + jmp _data_read_\@ + +_fewer_than_16_bytes_\@: + lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10 + mov \PLAIN_CYPH_LEN, %r12 + READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1 + + mov PBlockLen(%arg2), %r13 + +_data_read_\@: # Finished reading in data + + movdqu PBlockEncKey(%arg2), %xmm9 + movdqu HashKey(%arg2), %xmm13 + + lea SHIFT_MASK(%rip), %r12 + + # adjust the shuffle mask pointer to be able to shift r13 bytes + # r16-r13 is the number of bytes in plaintext mod 16) + add %r13, %r12 + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes + +.ifc \operation, dec + movdqa %xmm1, %xmm3 + pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_1_\@ + sub %r10, %r12 +_no_extra_mask_1_\@: + + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9 + + pand %xmm1, %xmm3 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm3 + PSHUFB_XMM %xmm2, %xmm3 + pxor %xmm3, \AAD_HASH + + cmp $0, %r10 + jl _partial_incomplete_1_\@ + + # GHASH computation for the last <16 Byte block + GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %rax,%rax + + mov %rax, PBlockLen(%arg2) + jmp _dec_done_\@ +_partial_incomplete_1_\@: + add \PLAIN_CYPH_LEN, PBlockLen(%arg2) +_dec_done_\@: + movdqu \AAD_HASH, AadHash(%arg2) +.else + pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn) + + mov \PLAIN_CYPH_LEN, %r10 + add %r13, %r10 + # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling + sub $16, %r10 + # Determine if if partial block is not being filled and + # shift mask accordingly + jge _no_extra_mask_2_\@ + sub %r10, %r12 +_no_extra_mask_2_\@: + + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out bottom r13 bytes of xmm9 + pand %xmm1, %xmm9 + + movdqa SHUF_MASK(%rip), %xmm1 + PSHUFB_XMM %xmm1, %xmm9 + PSHUFB_XMM %xmm2, %xmm9 + pxor %xmm9, \AAD_HASH + + cmp $0, %r10 + jl _partial_incomplete_2_\@ + + # GHASH computation for the last <16 Byte block + GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 + xor %rax,%rax + + mov %rax, PBlockLen(%arg2) + jmp _encode_done_\@ +_partial_incomplete_2_\@: + add \PLAIN_CYPH_LEN, PBlockLen(%arg2) +_encode_done_\@: + movdqu \AAD_HASH, AadHash(%arg2) + + movdqa SHUF_MASK(%rip), %xmm10 + # shuffle xmm9 back to output as ciphertext + PSHUFB_XMM %xmm10, %xmm9 + PSHUFB_XMM %xmm2, %xmm9 .endif - cmp $64, %r13 - jl _initial_blocks_done\num_initial_blocks\operation - # no need for precomputed values -/* -* -* Precomputations for HashKey parallel with encryption of first 4 blocks. -* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i -*/ - MOVADQ ONE(%rip), \TMP1 - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM1 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM2 - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM3 - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - - paddd \TMP1, \XMM0 # INCR Y0 - MOVADQ \XMM0, \XMM4 - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - - MOVADQ 0(%arg1),\TMP1 - pxor \TMP1, \XMM1 - pxor \TMP1, \XMM2 - pxor \TMP1, \XMM3 - pxor \TMP1, \XMM4 - movdqa \TMP3, \TMP5 - pshufd $78, \TMP3, \TMP1 - pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%rsp) - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%rsp) -# HashKey_2 = HashKey^2<<1 (mod poly) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%rsp) -.irpc index, 1234 # do 4 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%rsp) -.irpc index, 56789 # do next 5 rounds - movaps 0x10*\index(%arg1), \TMP1 - AESENC \TMP1, \XMM1 - AESENC \TMP1, \XMM2 - AESENC \TMP1, \XMM3 - AESENC \TMP1, \XMM4 -.endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%rsp) - lea 0xa0(%arg1),%r10 - mov keysize,%eax - shr $2,%eax # 128->4, 192->6, 256->8 - sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_pre_dec_done\num_initial_blocks - -aes_loop_pre_dec\num_initial_blocks: - MOVADQ (%r10),\TMP2 -.irpc index, 1234 - AESENC \TMP2, %xmm\index -.endr - add $16,%r10 - sub $1,%eax - jnz aes_loop_pre_dec\num_initial_blocks - -aes_loop_pre_dec_done\num_initial_blocks: - MOVADQ (%r10), \TMP2 - AESENCLAST \TMP2, \XMM1 - AESENCLAST \TMP2, \XMM2 - AESENCLAST \TMP2, \XMM3 - AESENCLAST \TMP2, \XMM4 - movdqu 16*0(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM1 - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM1 - movdqu 16*1(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM2 - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM2 - movdqu 16*2(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM3 - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM3 - movdqu 16*3(%arg3 , %r11 , 1), \TMP1 - pxor \TMP1, \XMM4 - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) - movdqa \TMP1, \XMM4 - add $64, %r11 - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap - pxor \XMMDst, \XMM1 -# combine GHASHed value with the corresponding ciphertext - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap - -_initial_blocks_done\num_initial_blocks\operation: - -.endm + # output encrypted Bytes + cmp $0, %r10 + jl _partial_fill_\@ + mov %r13, %r12 + mov $16, %r13 + # Set r13 to be the number of bytes to write out + sub %r12, %r13 + jmp _count_set_\@ +_partial_fill_\@: + mov \PLAIN_CYPH_LEN, %r13 +_count_set_\@: + movdqa %xmm9, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_\@ + mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $8, \DATA_OFFSET + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_\@: + movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) + add $1, \DATA_OFFSET + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_\@ +_partial_block_done_\@: +.endm # PARTIAL_BLOCK /* * if a = number of total plaintext bytes @@ -499,49 +789,19 @@ _initial_blocks_done\num_initial_blocks\operation: * the ciphertext * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers * are clobbered -* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +* arg1, %arg2, %arg3 are used as a pointer only, not modified */ -.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation - MOVADQ SHUF_MASK(%rip), %xmm14 - mov arg7, %r10 # %r10 = AAD - mov arg8, %r11 # %r11 = aadLen - pxor %xmm\i, %xmm\i - pxor \XMM2, \XMM2 +.macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ + XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + MOVADQ SHUF_MASK(%rip), %xmm14 - cmp $16, %r11 - jl _get_AAD_rest\num_initial_blocks\operation -_get_AAD_blocks\num_initial_blocks\operation: - movdqu (%r10), %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor %xmm\i, \XMM2 - GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - add $16, %r10 - sub $16, %r11 - cmp $16, %r11 - jge _get_AAD_blocks\num_initial_blocks\operation + movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0 - movdqu \XMM2, %xmm\i - - /* read the last <16B of AAD */ -_get_AAD_rest\num_initial_blocks\operation: - cmp $0, %r11 - je _get_AAD_done\num_initial_blocks\operation - - READ_PARTIAL_BLOCK %r10, %r11, \TMP1, %xmm\i - PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data - pxor \XMM2, %xmm\i - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 - -_get_AAD_done\num_initial_blocks\operation: - xor %r11, %r11 # initialise the data pointer offset as zero # start AES for num_initial_blocks blocks - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), \XMM0 # XMM0 = Y0 - PSHUFB_XMM %xmm14, \XMM0 + movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0 .if (\i == 5) || (\i == 6) || (\i == 7) @@ -549,7 +809,11 @@ _get_AAD_done\num_initial_blocks\operation: MOVADQ 0(%arg1),\TMP2 .irpc index, \i_seq paddd \TMP1, \XMM0 # INCR Y0 +.ifc \operation, dec + movdqa \XMM0, %xmm\index +.else MOVADQ \XMM0, %xmm\index +.endif PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap pxor \TMP2, %xmm\index .endr @@ -558,25 +822,29 @@ _get_AAD_done\num_initial_blocks\operation: shr $2,%eax # 128->4, 192->6, 256->8 add $5,%eax # 128->9, 192->11, 256->13 -aes_loop_initial_enc\num_initial_blocks: +aes_loop_initial_\@: MOVADQ (%r10),\TMP1 .irpc index, \i_seq AESENC \TMP1, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_initial_enc\num_initial_blocks + jnz aes_loop_initial_\@ MOVADQ (%r10), \TMP1 .irpc index, \i_seq AESENCLAST \TMP1, %xmm\index # Last Round .endr .irpc index, \i_seq - movdqu (%arg3 , %r11, 1), \TMP1 + movdqu (%arg4 , %r11, 1), \TMP1 pxor \TMP1, %xmm\index - movdqu %xmm\index, (%arg2 , %r11, 1) + movdqu %xmm\index, (%arg3 , %r11, 1) # write back plaintext/ciphertext for num_initial_blocks add $16, %r11 + +.ifc \operation, dec + movdqa \TMP1, %xmm\index +.endif PSHUFB_XMM %xmm14, %xmm\index # prepare plaintext/ciphertext for GHASH computation @@ -602,7 +870,7 @@ aes_loop_initial_enc\num_initial_blocks: GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 .endif cmp $64, %r13 - jl _initial_blocks_done\num_initial_blocks\operation + jl _initial_blocks_done\@ # no need for precomputed values /* * @@ -631,17 +899,6 @@ aes_loop_initial_enc\num_initial_blocks: pxor \TMP1, \XMM2 pxor \TMP1, \XMM3 pxor \TMP1, \XMM4 - movdqa \TMP3, \TMP5 - pshufd $78, \TMP3, \TMP1 - pxor \TMP3, \TMP1 - movdqa \TMP1, HashKey_k(%rsp) - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^2<<1 (mod poly) - movdqa \TMP5, HashKey_2(%rsp) -# HashKey_2 = HashKey^2<<1 (mod poly) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_2_k(%rsp) .irpc index, 1234 # do 4 rounds movaps 0x10*\index(%arg1), \TMP1 AESENC \TMP1, \XMM1 @@ -649,12 +906,6 @@ aes_loop_initial_enc\num_initial_blocks: AESENC \TMP1, \XMM3 AESENC \TMP1, \XMM4 .endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_3(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_3_k(%rsp) .irpc index, 56789 # do next 5 rounds movaps 0x10*\index(%arg1), \TMP1 AESENC \TMP1, \XMM1 @@ -662,45 +913,56 @@ aes_loop_initial_enc\num_initial_blocks: AESENC \TMP1, \XMM3 AESENC \TMP1, \XMM4 .endr - GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 -# TMP5 = HashKey^3<<1 (mod poly) - movdqa \TMP5, HashKey_4(%rsp) - pshufd $78, \TMP5, \TMP1 - pxor \TMP5, \TMP1 - movdqa \TMP1, HashKey_4_k(%rsp) lea 0xa0(%arg1),%r10 mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_pre_enc_done\num_initial_blocks + jz aes_loop_pre_done\@ -aes_loop_pre_enc\num_initial_blocks: +aes_loop_pre_\@: MOVADQ (%r10),\TMP2 .irpc index, 1234 AESENC \TMP2, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_pre_enc\num_initial_blocks + jnz aes_loop_pre_\@ -aes_loop_pre_enc_done\num_initial_blocks: +aes_loop_pre_done\@: MOVADQ (%r10), \TMP2 AESENCLAST \TMP2, \XMM1 AESENCLAST \TMP2, \XMM2 AESENCLAST \TMP2, \XMM3 AESENCLAST \TMP2, \XMM4 - movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + movdqu 16*0(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM1 - movdqu 16*1(%arg3 , %r11 , 1), \TMP1 +.ifc \operation, dec + movdqu \XMM1, 16*0(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM1 +.endif + movdqu 16*1(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM2 - movdqu 16*2(%arg3 , %r11 , 1), \TMP1 +.ifc \operation, dec + movdqu \XMM2, 16*1(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM2 +.endif + movdqu 16*2(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM3 - movdqu 16*3(%arg3 , %r11 , 1), \TMP1 +.ifc \operation, dec + movdqu \XMM3, 16*2(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM3 +.endif + movdqu 16*3(%arg4 , %r11 , 1), \TMP1 pxor \TMP1, \XMM4 - movdqu \XMM1, 16*0(%arg2 , %r11 , 1) - movdqu \XMM2, 16*1(%arg2 , %r11 , 1) - movdqu \XMM3, 16*2(%arg2 , %r11 , 1) - movdqu \XMM4, 16*3(%arg2 , %r11 , 1) +.ifc \operation, dec + movdqu \XMM4, 16*3(%arg3 , %r11 , 1) + movdqa \TMP1, \XMM4 +.else + movdqu \XMM1, 16*0(%arg3 , %r11 , 1) + movdqu \XMM2, 16*1(%arg3 , %r11 , 1) + movdqu \XMM3, 16*2(%arg3 , %r11 , 1) + movdqu \XMM4, 16*3(%arg3 , %r11 , 1) +.endif add $64, %r11 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap @@ -710,14 +972,14 @@ aes_loop_pre_enc_done\num_initial_blocks: PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap -_initial_blocks_done\num_initial_blocks\operation: +_initial_blocks_done\@: .endm /* * encrypt 4 blocks at a time * ghash the 4 previously encrypted ciphertext blocks -* arg1, %arg2, %arg3 are used as pointers only, not modified +* arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ .macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ @@ -735,7 +997,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -754,7 +1016,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%rsp), \TMP5 + movdqa HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -769,7 +1031,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -782,7 +1044,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%rsp), \TMP5 + movdqa HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -796,7 +1058,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%rsp ), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -812,7 +1074,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%rsp), \TMP5 + movdqa HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -830,7 +1092,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -842,37 +1104,37 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_par_enc_done + jz aes_loop_par_enc_done\@ -aes_loop_par_enc: +aes_loop_par_enc\@: MOVADQ (%r10),\TMP3 .irpc index, 1234 AESENC \TMP3, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_par_enc + jnz aes_loop_par_enc\@ -aes_loop_par_enc_done: +aes_loop_par_enc_done\@: MOVADQ (%r10), \TMP3 AESENCLAST \TMP3, \XMM1 # Round 10 AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%rsp), \TMP5 + movdqa HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqu (%arg3,%r11,1), \TMP3 + movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK - movdqu 16(%arg3,%r11,1), \TMP3 + movdqu 16(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK - movdqu 32(%arg3,%r11,1), \TMP3 + movdqu 32(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK - movdqu 48(%arg3,%r11,1), \TMP3 + movdqu 48(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK - movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer + movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap @@ -925,7 +1187,7 @@ aes_loop_par_enc_done: /* * decrypt 4 blocks at a time * ghash the 4 previously decrypted ciphertext blocks -* arg1, %arg2, %arg3 are used as pointers only, not modified +* arg1, %arg3, %arg4 are used as pointers only, not modified * %r11 is the data offset value */ .macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ @@ -943,7 +1205,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pshufd $78, \XMM5, \TMP6 pxor \XMM5, \TMP6 paddd ONE(%rip), \XMM0 # INCR CNT - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 movdqa \XMM0, \XMM1 paddd ONE(%rip), \XMM0 # INCR CNT @@ -962,7 +1224,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 pxor (%arg1), \XMM4 - movdqa HashKey_4_k(%rsp), \TMP5 + movdqa HashKey_4_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) movaps 0x10(%arg1), \TMP1 AESENC \TMP1, \XMM1 # Round 1 @@ -977,7 +1239,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM6, \TMP1 pshufd $78, \XMM6, \TMP2 pxor \XMM6, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 movaps 0x30(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 3 @@ -990,7 +1252,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_3_k(%rsp), \TMP5 + movdqa HashKey_3_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x50(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 5 @@ -1004,7 +1266,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM7, \TMP1 pshufd $78, \XMM7, \TMP2 pxor \XMM7, \TMP2 - movdqa HashKey_2(%rsp ), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 # Multiply TMP5 * HashKey using karatsuba @@ -1020,7 +1282,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation AESENC \TMP3, \XMM2 AESENC \TMP3, \XMM3 AESENC \TMP3, \XMM4 - movdqa HashKey_2_k(%rsp), \TMP5 + movdqa HashKey_2_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movaps 0x80(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 8 @@ -1038,7 +1300,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM8, \TMP1 pshufd $78, \XMM8, \TMP2 pxor \XMM8, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 movaps 0x90(%arg1), \TMP3 AESENC \TMP3, \XMM1 # Round 9 @@ -1050,40 +1312,40 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation mov keysize,%eax shr $2,%eax # 128->4, 192->6, 256->8 sub $4,%eax # 128->0, 192->2, 256->4 - jz aes_loop_par_dec_done + jz aes_loop_par_dec_done\@ -aes_loop_par_dec: +aes_loop_par_dec\@: MOVADQ (%r10),\TMP3 .irpc index, 1234 AESENC \TMP3, %xmm\index .endr add $16,%r10 sub $1,%eax - jnz aes_loop_par_dec + jnz aes_loop_par_dec\@ -aes_loop_par_dec_done: +aes_loop_par_dec_done\@: MOVADQ (%r10), \TMP3 AESENCLAST \TMP3, \XMM1 # last round AESENCLAST \TMP3, \XMM2 AESENCLAST \TMP3, \XMM3 AESENCLAST \TMP3, \XMM4 - movdqa HashKey_k(%rsp), \TMP5 + movdqa HashKey_k(%arg2), \TMP5 PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) - movdqu (%arg3,%r11,1), \TMP3 + movdqu (%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK - movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM1 - movdqu 16(%arg3,%r11,1), \TMP3 + movdqu 16(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK - movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM2 - movdqu 32(%arg3,%r11,1), \TMP3 + movdqu 32(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK - movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM3 - movdqu 48(%arg3,%r11,1), \TMP3 + movdqu 48(%arg4,%r11,1), \TMP3 pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK - movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer + movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM4 PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap @@ -1143,10 +1405,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM1, \TMP6 pshufd $78, \XMM1, \TMP2 pxor \XMM1, \TMP2 - movdqa HashKey_4(%rsp), \TMP5 + movdqa HashKey_4(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 - movdqa HashKey_4_k(%rsp), \TMP4 + movdqa HashKey_4_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqa \XMM1, \XMMDst movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 @@ -1156,10 +1418,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM2, \TMP1 pshufd $78, \XMM2, \TMP2 pxor \XMM2, \TMP2 - movdqa HashKey_3(%rsp), \TMP5 + movdqa HashKey_3(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 - movdqa HashKey_3_k(%rsp), \TMP4 + movdqa HashKey_3_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM2, \XMMDst @@ -1171,10 +1433,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM3, \TMP1 pshufd $78, \XMM3, \TMP2 pxor \XMM3, \TMP2 - movdqa HashKey_2(%rsp), \TMP5 + movdqa HashKey_2(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 - movdqa HashKey_2_k(%rsp), \TMP4 + movdqa HashKey_2_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM3, \XMMDst @@ -1184,10 +1446,10 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst movdqa \XMM4, \TMP1 pshufd $78, \XMM4, \TMP2 pxor \XMM4, \TMP2 - movdqa HashKey(%rsp), \TMP5 + movdqa HashKey(%arg2), \TMP5 PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 - movdqa HashKey_k(%rsp), \TMP4 + movdqa HashKey_k(%arg2), \TMP4 PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) pxor \TMP1, \TMP6 pxor \XMM4, \XMMDst @@ -1256,6 +1518,8 @@ _esb_loop_\@: .endm /***************************************************************************** * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data +* // Context data * u8 *out, // Plaintext output. Encrypt in-place is allowed. * const u8 *in, // Ciphertext input * u64 plaintext_len, // Length of data in bytes for decryption. @@ -1333,195 +1597,20 @@ _esb_loop_\@: * *****************************************************************************/ ENTRY(aesni_gcm_dec) - push %r12 - push %r13 - push %r14 - mov %rsp, %r14 -/* -* states of %xmm registers %xmm6:%xmm15 not saved -* all %xmm registers are clobbered -*/ - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp # align rsp to 64 bytes - mov %arg6, %r12 - movdqu (%r12), %xmm13 # %xmm13 = HashKey - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - - -# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # Reduction - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) - - - # Decrypt first few blocks - - movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) - mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext - and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) - mov %r13, %r12 - and $(3<<4), %r12 - jz _initial_num_blocks_is_0_decrypt - cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_decrypt - je _initial_num_blocks_is_2_decrypt -_initial_num_blocks_is_3_decrypt: - INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec - sub $48, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_2_decrypt: - INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec - sub $32, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_1_decrypt: - INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec - sub $16, %r13 - jmp _initial_blocks_decrypted -_initial_num_blocks_is_0_decrypt: - INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec -_initial_blocks_decrypted: - cmp $0, %r13 - je _zero_cipher_left_decrypt - sub $64, %r13 - je _four_cipher_left_decrypt -_decrypt_by_4: - GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ -%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec - add $64, %r11 - sub $64, %r13 - jne _decrypt_by_4 -_four_cipher_left_decrypt: - GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ -%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_decrypt: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) - je _multiple_of_16_bytes_decrypt - - # Handle the last <16 byte block separately - - paddd ONE(%rip), %xmm0 # increment CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) - - lea (%arg3,%r11,1), %r10 - mov %r13, %r12 - READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 - - lea ALL_F+16(%rip), %r12 - sub %r13, %r12 - movdqa %xmm1, %xmm2 - pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) - movdqu (%r12), %xmm1 - # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 - pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 - pand %xmm1, %xmm2 - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10 ,%xmm2 - - pxor %xmm2, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - - # output %r13 bytes - MOVQ_R64_XMM %xmm0, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left_decrypt - mov %rax, (%arg2 , %r11, 1) - add $8, %r11 - psrldq $8, %xmm0 - MOVQ_R64_XMM %xmm0, %rax - sub $8, %r13 -_less_than_8_bytes_left_decrypt: - mov %al, (%arg2, %r11, 1) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left_decrypt -_multiple_of_16_bytes_decrypt: - mov arg8, %r12 # %r13 = aadLen (number of bytes) - shl $3, %r12 # convert into number of bits - movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 - pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 - pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) - pxor %xmm15, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # final GHASH computation - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm8 + FUNC_SAVE - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) - pxor %xmm8, %xmm0 -_return_T_decrypt: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len - cmp $16, %r11 - je _T_16_decrypt - cmp $8, %r11 - jl _T_4_decrypt -_T_8_decrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - add $8, %r10 - sub $8, %r11 - psrldq $8, %xmm0 - cmp $0, %r11 - je _return_T_done_decrypt -_T_4_decrypt: - movd %xmm0, %eax - mov %eax, (%r10) - add $4, %r10 - sub $4, %r11 - psrldq $4, %xmm0 - cmp $0, %r11 - je _return_T_done_decrypt -_T_123_decrypt: - movd %xmm0, %eax - cmp $2, %r11 - jl _T_1_decrypt - mov %ax, (%r10) - cmp $2, %r11 - je _return_T_done_decrypt - add $2, %r10 - sar $16, %eax -_T_1_decrypt: - mov %al, (%r10) - jmp _return_T_done_decrypt -_T_16_decrypt: - movdqu %xmm0, (%r10) -_return_T_done_decrypt: - mov %r14, %rsp - pop %r14 - pop %r13 - pop %r12 + GCM_INIT %arg6, arg7, arg8, arg9 + GCM_ENC_DEC dec + GCM_COMPLETE arg10, arg11 + FUNC_RESTORE ret ENDPROC(aesni_gcm_dec) /***************************************************************************** * void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data +* // Context data * u8 *out, // Ciphertext output. Encrypt in-place is allowed. * const u8 *in, // Plaintext input * u64 plaintext_len, // Length of data in bytes for encryption. @@ -1596,195 +1685,78 @@ ENDPROC(aesni_gcm_dec) * poly = x^128 + x^127 + x^126 + x^121 + 1 ***************************************************************************/ ENTRY(aesni_gcm_enc) - push %r12 - push %r13 - push %r14 - mov %rsp, %r14 -# -# states of %xmm registers %xmm6:%xmm15 not saved -# all %xmm registers are clobbered -# - sub $VARIABLE_OFFSET, %rsp - and $~63, %rsp - mov %arg6, %r12 - movdqu (%r12), %xmm13 - movdqa SHUF_MASK(%rip), %xmm2 - PSHUFB_XMM %xmm2, %xmm13 - - -# precompute HashKey<<1 mod poly from the HashKey (required for GHASH) - - movdqa %xmm13, %xmm2 - psllq $1, %xmm13 - psrlq $63, %xmm2 - movdqa %xmm2, %xmm1 - pslldq $8, %xmm2 - psrldq $8, %xmm1 - por %xmm2, %xmm13 - - # reduce HashKey<<1 - - pshufd $0x24, %xmm1, %xmm2 - pcmpeqd TWOONE(%rip), %xmm2 - pand POLY(%rip), %xmm2 - pxor %xmm2, %xmm13 - movdqa %xmm13, HashKey(%rsp) - mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) - and $-16, %r13 - mov %r13, %r12 + FUNC_SAVE - # Encrypt first few blocks + GCM_INIT %arg6, arg7, arg8, arg9 + GCM_ENC_DEC enc - and $(3<<4), %r12 - jz _initial_num_blocks_is_0_encrypt - cmp $(2<<4), %r12 - jb _initial_num_blocks_is_1_encrypt - je _initial_num_blocks_is_2_encrypt -_initial_num_blocks_is_3_encrypt: - INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc - sub $48, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_2_encrypt: - INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc - sub $32, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_1_encrypt: - INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc - sub $16, %r13 - jmp _initial_blocks_encrypted -_initial_num_blocks_is_0_encrypt: - INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ -%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc -_initial_blocks_encrypted: - - # Main loop - Encrypt remaining blocks - - cmp $0, %r13 - je _zero_cipher_left_encrypt - sub $64, %r13 - je _four_cipher_left_encrypt -_encrypt_by_4_encrypt: - GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ -%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc - add $64, %r11 - sub $64, %r13 - jne _encrypt_by_4_encrypt -_four_cipher_left_encrypt: - GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ -%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 -_zero_cipher_left_encrypt: - mov %arg4, %r13 - and $15, %r13 # %r13 = arg4 (mod 16) - je _multiple_of_16_bytes_encrypt - - # Handle the last <16 Byte block separately - paddd ONE(%rip), %xmm0 # INCR CNT to get Yn - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 - - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) - - lea (%arg3,%r11,1), %r10 - mov %r13, %r12 - READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1 - - lea ALL_F+16(%rip), %r12 - sub %r13, %r12 - pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) - movdqu (%r12), %xmm1 - # get the appropriate mask to mask out top 16-r13 bytes of xmm0 - pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10,%xmm0 + GCM_COMPLETE arg10, arg11 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_enc) - pxor %xmm0, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # GHASH computation for the last <16 byte block - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm0 +/***************************************************************************** +* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len) // Length of AAD in bytes. +*/ +ENTRY(aesni_gcm_init) + FUNC_SAVE + GCM_INIT %arg3, %arg4,%arg5, %arg6 + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_init) - # shuffle xmm0 back to output as ciphertext +/***************************************************************************** +* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +*/ +ENTRY(aesni_gcm_enc_update) + FUNC_SAVE + GCM_ENC_DEC enc + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_enc_update) - # Output %r13 bytes - MOVQ_R64_XMM %xmm0, %rax - cmp $8, %r13 - jle _less_than_8_bytes_left_encrypt - mov %rax, (%arg2 , %r11, 1) - add $8, %r11 - psrldq $8, %xmm0 - MOVQ_R64_XMM %xmm0, %rax - sub $8, %r13 -_less_than_8_bytes_left_encrypt: - mov %al, (%arg2, %r11, 1) - add $1, %r11 - shr $8, %rax - sub $1, %r13 - jne _less_than_8_bytes_left_encrypt -_multiple_of_16_bytes_encrypt: - mov arg8, %r12 # %r12 = addLen (number of bytes) - shl $3, %r12 - movd %r12d, %xmm15 # len(A) in %xmm15 - shl $3, %arg4 # len(C) in bits (*128) - MOVQ_R64_XMM %arg4, %xmm1 - pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 - pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) - pxor %xmm15, %xmm8 - GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 - # final GHASH computation - movdqa SHUF_MASK(%rip), %xmm10 - PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap +/***************************************************************************** +* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +*/ +ENTRY(aesni_gcm_dec_update) + FUNC_SAVE + GCM_ENC_DEC dec + FUNC_RESTORE + ret +ENDPROC(aesni_gcm_dec_update) - mov %arg5, %rax # %rax = *Y0 - movdqu (%rax), %xmm0 # %xmm0 = Y0 - ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) - pxor %xmm8, %xmm0 -_return_T_encrypt: - mov arg9, %r10 # %r10 = authTag - mov arg10, %r11 # %r11 = auth_tag_len - cmp $16, %r11 - je _T_16_encrypt - cmp $8, %r11 - jl _T_4_encrypt -_T_8_encrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) - add $8, %r10 - sub $8, %r11 - psrldq $8, %xmm0 - cmp $0, %r11 - je _return_T_done_encrypt -_T_4_encrypt: - movd %xmm0, %eax - mov %eax, (%r10) - add $4, %r10 - sub $4, %r11 - psrldq $4, %xmm0 - cmp $0, %r11 - je _return_T_done_encrypt -_T_123_encrypt: - movd %xmm0, %eax - cmp $2, %r11 - jl _T_1_encrypt - mov %ax, (%r10) - cmp $2, %r11 - je _return_T_done_encrypt - add $2, %r10 - sar $16, %eax -_T_1_encrypt: - mov %al, (%r10) - jmp _return_T_done_encrypt -_T_16_encrypt: - movdqu %xmm0, (%r10) -_return_T_done_encrypt: - mov %r14, %rsp - pop %r14 - pop %r13 - pop %r12 +/***************************************************************************** +* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* struct gcm_context_data *data, +* // context data +* u8 *auth_tag, // Authenticated Tag output. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), +* // 12 or 8. +*/ +ENTRY(aesni_gcm_finalize) + FUNC_SAVE + GCM_COMPLETE %arg3 %arg4 + FUNC_RESTORE ret -ENDPROC(aesni_gcm_enc) +ENDPROC(aesni_gcm_finalize) #endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 34cf1c1..acbe7e8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -72,6 +72,21 @@ struct aesni_xts_ctx { u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; }; +#define GCM_BLOCK_LEN 16 + +struct gcm_context_data { + /* init, update and finalize context data */ + u8 aad_hash[GCM_BLOCK_LEN]; + u64 aad_length; + u64 in_length; + u8 partial_block_enc_key[GCM_BLOCK_LEN]; + u8 orig_IV[GCM_BLOCK_LEN]; + u8 current_counter[GCM_BLOCK_LEN]; + u64 partial_block_len; + u64 unused; + u8 hash_keys[GCM_BLOCK_LEN * 8]; +}; + asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, @@ -105,6 +120,7 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * struct gcm_context_data. May be uninitialized. * u8 *out, Ciphertext output. Encrypt in-place is allowed. * const u8 *in, Plaintext input * unsigned long plaintext_len, Length of data in bytes for encryption. @@ -117,13 +133,15 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, * unsigned long auth_tag_len), Authenticated Tag Length in bytes. * Valid values are 16 (most likely), 12 or 8. */ -asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, +asmlinkage void aesni_gcm_enc(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); /* asmlinkage void aesni_gcm_dec() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * struct gcm_context_data. May be uninitialized. * u8 *out, Plaintext output. Decrypt in-place is allowed. * const u8 *in, Ciphertext input * unsigned long ciphertext_len, Length of data in bytes for decryption. @@ -137,11 +155,28 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, * unsigned long auth_tag_len) Authenticated Tag Length in bytes. * Valid values are 16 (most likely), 12 or 8. */ -asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, +asmlinkage void aesni_gcm_dec(void *ctx, + struct gcm_context_data *gdata, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); +/* Scatter / Gather routines, with args similar to above */ +asmlinkage void aesni_gcm_init(void *ctx, + struct gcm_context_data *gdata, + u8 *iv, + u8 *hash_subkey, const u8 *aad, + unsigned long aad_len); +asmlinkage void aesni_gcm_enc_update(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, unsigned long plaintext_len); +asmlinkage void aesni_gcm_dec_update(void *ctx, + struct gcm_context_data *gdata, u8 *out, + const u8 *in, + unsigned long ciphertext_len); +asmlinkage void aesni_gcm_finalize(void *ctx, + struct gcm_context_data *gdata, + u8 *auth_tag, unsigned long auth_tag_len); #ifdef CONFIG_AS_AVX asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, @@ -167,15 +202,17 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -static void aesni_gcm_enc_avx(void *ctx, u8 *out, +static void aesni_gcm_enc_avx(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){ - aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_enc(ctx, data, out, in, + plaintext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, @@ -183,15 +220,17 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out, } } -static void aesni_gcm_dec_avx(void *ctx, u8 *out, +static void aesni_gcm_dec_avx(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_dec(ctx, data, out, in, + ciphertext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, @@ -218,15 +257,17 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len); -static void aesni_gcm_enc_avx2(void *ctx, u8 *out, +static void aesni_gcm_enc_avx2(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long plaintext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, - aad_len, auth_tag, auth_tag_len); + aesni_gcm_enc(ctx, data, out, in, + plaintext_len, iv, hash_subkey, aad, + aad_len, auth_tag, auth_tag_len); } else if (plaintext_len < AVX_GEN4_OPTSIZE) { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, @@ -238,15 +279,17 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out, } } -static void aesni_gcm_dec_avx2(void *ctx, u8 *out, +static void aesni_gcm_dec_avx2(void *ctx, + struct gcm_context_data *data, u8 *out, const u8 *in, unsigned long ciphertext_len, u8 *iv, u8 *hash_subkey, const u8 *aad, unsigned long aad_len, u8 *auth_tag, unsigned long auth_tag_len) { struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx; if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) { - aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, - aad, aad_len, auth_tag, auth_tag_len); + aesni_gcm_dec(ctx, data, out, in, + ciphertext_len, iv, hash_subkey, + aad, aad_len, auth_tag, auth_tag_len); } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, @@ -259,15 +302,19 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out, } #endif -static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out, - const u8 *in, unsigned long plaintext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); +static void (*aesni_gcm_enc_tfm)(void *ctx, + struct gcm_context_data *data, u8 *out, + const u8 *in, unsigned long plaintext_len, + u8 *iv, u8 *hash_subkey, const u8 *aad, + unsigned long aad_len, u8 *auth_tag, + unsigned long auth_tag_len); -static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out, - const u8 *in, unsigned long ciphertext_len, u8 *iv, - u8 *hash_subkey, const u8 *aad, unsigned long aad_len, - u8 *auth_tag, unsigned long auth_tag_len); +static void (*aesni_gcm_dec_tfm)(void *ctx, + struct gcm_context_data *data, u8 *out, + const u8 *in, unsigned long ciphertext_len, + u8 *iv, u8 *hash_subkey, const u8 *aad, + unsigned long aad_len, u8 *auth_tag, + unsigned long auth_tag_len); static inline struct aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) @@ -744,6 +791,127 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, return 0; } +static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, + unsigned int assoclen, u8 *hash_subkey, + u8 *iv, void *aes_ctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + struct gcm_context_data data AESNI_ALIGN_ATTR; + struct scatter_walk dst_sg_walk = {}; + unsigned long left = req->cryptlen; + unsigned long len, srclen, dstlen; + struct scatter_walk assoc_sg_walk; + struct scatter_walk src_sg_walk; + struct scatterlist src_start[2]; + struct scatterlist dst_start[2]; + struct scatterlist *src_sg; + struct scatterlist *dst_sg; + u8 *src, *dst, *assoc; + u8 *assocmem = NULL; + u8 authTag[16]; + + if (!enc) + left -= auth_tag_len; + + /* Linearize assoc, if not already linear */ + if (req->src->length >= assoclen && req->src->length && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length < PAGE_SIZE)) { + scatterwalk_start(&assoc_sg_walk, req->src); + assoc = scatterwalk_map(&assoc_sg_walk); + } else { + /* assoc can be any length, so must be on heap */ + assocmem = kmalloc(assoclen, GFP_ATOMIC); + if (unlikely(!assocmem)) + return -ENOMEM; + assoc = assocmem; + + scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0); + } + + src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen); + scatterwalk_start(&src_sg_walk, src_sg); + if (req->src != req->dst) { + dst_sg = scatterwalk_ffwd(dst_start, req->dst, req->assoclen); + scatterwalk_start(&dst_sg_walk, dst_sg); + } + + kernel_fpu_begin(); + aesni_gcm_init(aes_ctx, &data, iv, + hash_subkey, assoc, assoclen); + if (req->src != req->dst) { + while (left) { + src = scatterwalk_map(&src_sg_walk); + dst = scatterwalk_map(&dst_sg_walk); + srclen = scatterwalk_clamp(&src_sg_walk, left); + dstlen = scatterwalk_clamp(&dst_sg_walk, left); + len = min(srclen, dstlen); + if (len) { + if (enc) + aesni_gcm_enc_update(aes_ctx, &data, + dst, src, len); + else + aesni_gcm_dec_update(aes_ctx, &data, + dst, src, len); + } + left -= len; + + scatterwalk_unmap(src); + scatterwalk_unmap(dst); + scatterwalk_advance(&src_sg_walk, len); + scatterwalk_advance(&dst_sg_walk, len); + scatterwalk_done(&src_sg_walk, 0, left); + scatterwalk_done(&dst_sg_walk, 1, left); + } + } else { + while (left) { + dst = src = scatterwalk_map(&src_sg_walk); + len = scatterwalk_clamp(&src_sg_walk, left); + if (len) { + if (enc) + aesni_gcm_enc_update(aes_ctx, &data, + src, src, len); + else + aesni_gcm_dec_update(aes_ctx, &data, + src, src, len); + } + left -= len; + scatterwalk_unmap(src); + scatterwalk_advance(&src_sg_walk, len); + scatterwalk_done(&src_sg_walk, 1, left); + } + } + aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len); + kernel_fpu_end(); + + if (!assocmem) + scatterwalk_unmap(assoc); + else + kfree(assocmem); + + if (!enc) { + u8 authTagMsg[16]; + + /* Copy out original authTag */ + scatterwalk_map_and_copy(authTagMsg, req->src, + req->assoclen + req->cryptlen - + auth_tag_len, + auth_tag_len, 0); + + /* Compare generated tag with passed in tag. */ + return crypto_memneq(authTagMsg, authTag, auth_tag_len) ? + -EBADMSG : 0; + } + + /* Copy in the authTag */ + scatterwalk_map_and_copy(authTag, req->dst, + req->assoclen + req->cryptlen, + auth_tag_len, 1); + + return 0; +} + static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, u8 *hash_subkey, u8 *iv, void *aes_ctx) { @@ -753,7 +921,14 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, unsigned long auth_tag_len = crypto_aead_authsize(tfm); struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; + struct gcm_context_data data AESNI_ALIGN_ATTR; + if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 || + aesni_gcm_enc_tfm == aesni_gcm_enc || + req->cryptlen < AVX_GEN2_OPTSIZE) { + return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, + aes_ctx); + } if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || req->src->offset + req->src->length <= PAGE_SIZE) && @@ -782,7 +957,7 @@ static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, } kernel_fpu_begin(); - aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv, + aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv, hash_subkey, assoc, assoclen, dst + req->cryptlen, auth_tag_len); kernel_fpu_end(); @@ -817,8 +992,15 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, u8 authTag[16]; struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; + struct gcm_context_data data AESNI_ALIGN_ATTR; int retval = 0; + if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 || + aesni_gcm_enc_tfm == aesni_gcm_enc || + req->cryptlen < AVX_GEN2_OPTSIZE) { + return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, + aes_ctx); + } tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); if (sg_is_last(req->src) && @@ -849,7 +1031,7 @@ static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, kernel_fpu_begin(); - aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, + aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv, hash_subkey, assoc, assoclen, authTag, auth_tag_len); kernel_fpu_end(); diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index f9eca34..3e0c07c 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -25,13 +25,13 @@ * */ -#include <asm/processor.h> +#include <crypto/algapi.h> #include <crypto/blowfish.h> +#include <crypto/internal/skcipher.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> -#include <crypto/algapi.h> /* regular block cipher functions */ asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src, @@ -77,20 +77,28 @@ static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, +static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return blowfish_setkey(&tfm->base, key, keylen); +} + +static int ecb_crypt(struct skcipher_request *req, void (*fn)(struct bf_ctx *, u8 *, const u8 *), void (*fn_4way)(struct bf_ctx *, u8 *, const u8 *)) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; unsigned int nbytes; int err; - err = blkcipher_walk_virt(desc, walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; /* Process four block batch */ if (nbytes >= bsize * 4) { @@ -116,34 +124,25 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, blowfish_enc_blk, blowfish_enc_blk_4way); + return ecb_crypt(req, blowfish_enc_blk, blowfish_enc_blk_4way); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, blowfish_dec_blk, blowfish_dec_blk_4way); + return ecb_crypt(req, blowfish_dec_blk, blowfish_dec_blk_4way); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_encrypt(struct bf_ctx *ctx, + struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -164,27 +163,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, return nbytes; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_encrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct bf_ctx *ctx, + struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -245,24 +244,25 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) +static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u8 keystream[BF_BLOCK_SIZE]; @@ -276,10 +276,8 @@ static void ctr_crypt_final(struct bf_ctx *ctx, struct blkcipher_walk *walk) crypto_inc(ctrblk, BF_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk) { - struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = BF_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -332,29 +330,30 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct bf_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __ctr_crypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } - if (walk.nbytes) { - ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); - err = blkcipher_walk_done(desc, &walk, 0); + if (nbytes) { + ctr_crypt_final(ctx, &walk); + err = skcipher_walk_done(&walk, 0); } return err; } -static struct crypto_alg bf_algs[4] = { { +static struct crypto_alg bf_cipher_alg = { .cra_name = "blowfish", .cra_driver_name = "blowfish-asm", .cra_priority = 200, @@ -372,66 +371,50 @@ static struct crypto_alg bf_algs[4] = { { .cia_decrypt = blowfish_decrypt, } } -}, { - .cra_name = "ecb(blowfish)", - .cra_driver_name = "ecb-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .setkey = blowfish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, +}; + +static struct skcipher_alg bf_skcipher_algs[] = { + { + .base.cra_name = "ecb(blowfish)", + .base.cra_driver_name = "ecb-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = BF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(blowfish)", + .base.cra_driver_name = "cbc-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = BF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(blowfish)", + .base.cra_driver_name = "ctr-blowfish-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct bf_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = BF_MIN_KEY_SIZE, + .max_keysize = BF_MAX_KEY_SIZE, + .ivsize = BF_BLOCK_SIZE, + .chunksize = BF_BLOCK_SIZE, + .setkey = blowfish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, }, -}, { - .cra_name = "cbc(blowfish)", - .cra_driver_name = "cbc-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = BF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(blowfish)", - .cra_driver_name = "ctr-blowfish-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct bf_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = BF_MIN_KEY_SIZE, - .max_keysize = BF_MAX_KEY_SIZE, - .ivsize = BF_BLOCK_SIZE, - .setkey = blowfish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; +}; static bool is_blacklisted_cpu(void) { @@ -456,6 +439,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init init(void) { + int err; + if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "blowfish-x86_64: performance on this CPU " @@ -464,12 +449,23 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs)); + err = crypto_register_alg(&bf_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(bf_skcipher_algs, + ARRAY_SIZE(bf_skcipher_algs)); + if (err) + crypto_unregister_alg(&bf_cipher_alg); + + return err; } static void __exit fini(void) { - crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs)); + crypto_unregister_alg(&bf_cipher_alg); + crypto_unregister_skciphers(bf_skcipher_algs, + ARRAY_SIZE(bf_skcipher_algs)); } module_init(init); diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 60907c1..d4992e4 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -10,18 +10,15 @@ * */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/ablk_helper.h> -#include <crypto/algapi.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> -#include <asm/fpu/api.h> #include <asm/crypto/camellia.h> #include <asm/crypto/glue_helper.h> +#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/xts.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 #define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32 @@ -150,413 +147,120 @@ static const struct common_glue_ctx camellia_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, + &tfm->base.crt_flags); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_enc, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); -} - -static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes) -{ - return glue_fpu_begin(CAMELLIA_BLOCK_SIZE, - CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled, - nbytes); -} - -static inline void camellia_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); -} - -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, - &tfm->crt_flags); -} - -struct crypt_priv { - struct camellia_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_32way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - } - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx->ctx, srcdst, srcdst); + return glue_ecb_req_128bit(&camellia_dec, req); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_32way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS; - } - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx->ctx, srcdst, srcdst); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; + return glue_ctr_req_128bit(&camellia_ctr, req); } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&camellia_enc_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&camellia_dec_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cmll_algs[10] = { { - .cra_name = "__ecb-camellia-aesni-avx2", - .cra_driver_name = "__driver-ecb-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-camellia-aesni-avx2", - .cra_driver_name = "__driver-cbc-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-camellia-aesni-avx2", - .cra_driver_name = "__driver-ctr-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-camellia-aesni-avx2", - .cra_driver_name = "__driver-lrw-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-camellia-aesni-avx2", - .cra_driver_name = "__driver-xts-camellia-aesni-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-aesni-avx2", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg camellia_algs[] = { + { + .base.cra_name = "__ecb(camellia)", + .base.cra_driver_name = "__ecb-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(camellia)", + .base.cra_driver_name = "__cbc-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(camellia)", + .base.cra_driver_name = "__ctr-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(camellia)", + .base.cra_driver_name = "__xts-camellia-aesni-avx2", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, + .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = xts_camellia_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; static int __init camellia_aesni_init(void) { @@ -576,12 +280,15 @@ static int __init camellia_aesni_init(void) return -ENODEV; } - return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + return simd_register_skciphers_compat(camellia_algs, + ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } static void __exit camellia_aesni_fini(void) { - crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } module_init(camellia_aesni_init); diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index d96429d..d09f652 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -10,18 +10,15 @@ * */ -#include <linux/module.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/ablk_helper.h> -#include <crypto/algapi.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> -#include <asm/fpu/api.h> #include <asm/crypto/camellia.h> #include <asm/crypto/glue_helper.h> +#include <crypto/algapi.h> +#include <crypto/internal/simd.h> +#include <crypto/xts.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> #define CAMELLIA_AESNI_PARALLEL_BLOCKS 16 @@ -154,401 +151,142 @@ static const struct common_glue_ctx camellia_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); + return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen, + &tfm->base.crt_flags); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&camellia_enc, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); + return glue_ecb_req_128bit(&camellia_dec, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_fpu_begin(CAMELLIA_BLOCK_SIZE, - CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled, - nbytes); + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static inline void camellia_fpu_end(bool fpu_enabled) +static int ctr_crypt(struct skcipher_request *req) { - glue_fpu_end(fpu_enabled); + return glue_ctr_req_128bit(&camellia_ctr, req); } -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) +int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, - &tfm->crt_flags); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); } +EXPORT_SYMBOL_GPL(xts_camellia_setkey); -struct crypt_priv { - struct camellia_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } - - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&camellia_enc_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { - camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; - } + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { - camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; - nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&camellia_dec_xts, req, + XTS_TWEAK_CAST(camellia_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->camellia_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - camellia_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&camellia_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&camellia_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(camellia_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg cmll_algs[10] = { { - .cra_name = "__ecb-camellia-aesni", - .cra_driver_name = "__driver-ecb-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-camellia-aesni", - .cra_driver_name = "__driver-cbc-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-camellia-aesni", - .cra_driver_name = "__driver-ctr-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-camellia-aesni", - .cra_driver_name = "__driver-lrw-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-camellia-aesni", - .cra_driver_name = "__driver-xts-camellia-aesni", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg camellia_algs[] = { + { + .base.cra_name = "__ecb(camellia)", + .base.cra_driver_name = "__ecb-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(camellia)", + .base.cra_driver_name = "__cbc-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(camellia)", + .base.cra_driver_name = "__ctr-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(camellia)", + .base.cra_driver_name = "__xts-camellia-aesni", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE, + .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = xts_camellia_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)]; static int __init camellia_aesni_init(void) { @@ -567,12 +305,15 @@ static int __init camellia_aesni_init(void) return -ENODEV; } - return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + return simd_register_skciphers_compat(camellia_algs, + ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } static void __exit camellia_aesni_fini(void) { - crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs)); + simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs), + camellia_simd_algs); } module_init(camellia_aesni_init); diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index af4840a..dcd5e0f7 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -23,15 +23,12 @@ * */ -#include <asm/processor.h> #include <asm/unaligned.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include <crypto/algapi.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> #include <asm/crypto/camellia.h> #include <asm/crypto/glue_helper.h> @@ -1272,13 +1269,19 @@ int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, } EXPORT_SYMBOL_GPL(__camellia_setkey); -static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key, +static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { - return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len, + return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len, &tfm->crt_flags); } +static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + return camellia_setkey(&tfm->base, key, key_len); +} + void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src) { u128 iv = *src; @@ -1373,188 +1376,33 @@ static const struct common_glue_ctx camellia_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes); -} - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct camellia_ctx *ctx = priv; - int i; - - while (nbytes >= 2 * bsize) { - camellia_enc_blk_2way(ctx, srcdst, srcdst); - srcdst += bsize * 2; - nbytes -= bsize * 2; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_enc_blk(ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = CAMELLIA_BLOCK_SIZE; - struct camellia_ctx *ctx = priv; - int i; - - while (nbytes >= 2 * bsize) { - camellia_dec_blk_2way(ctx, srcdst, srcdst); - srcdst += bsize * 2; - nbytes -= bsize * 2; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - camellia_dec_blk(ctx, srcdst, srcdst); -} - -int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __camellia_setkey(&ctx->camellia_ctx, key, - keylen - CAMELLIA_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, - key + keylen - CAMELLIA_BLOCK_SIZE); -} -EXPORT_SYMBOL_GPL(lrw_camellia_setkey); - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->camellia_ctx, - .crypt_fn = encrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[2 * 4]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->camellia_ctx, - .crypt_fn = decrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); + return glue_ecb_req_128bit(&camellia_enc, req); } -void lrw_camellia_exit_tfm(struct crypto_tfm *tfm) +static int ecb_decrypt(struct skcipher_request *req) { - struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); + return glue_ecb_req_128bit(&camellia_dec, req); } -EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm); -int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int cbc_encrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(camellia_enc_blk), + req); } -EXPORT_SYMBOL_GPL(xts_camellia_setkey); -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[2 * 4]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = encrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); + return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[2 * 4]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = decrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); + return glue_ctr_req_128bit(&camellia_ctr, req); } -static struct crypto_alg camellia_algs[6] = { { +static struct crypto_alg camellia_cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-asm", .cra_priority = 200, @@ -1572,109 +1420,50 @@ static struct crypto_alg camellia_algs[6] = { { .cia_decrypt = camellia_decrypt } } -}, { - .cra_name = "ecb(camellia)", - .cra_driver_name = "ecb-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .setkey = camellia_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(camellia)", - .cra_driver_name = "cbc-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(camellia)", - .cra_driver_name = "ctr-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct camellia_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = camellia_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "lrw(camellia)", - .cra_driver_name = "lrw-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_camellia_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .max_keysize = CAMELLIA_MAX_KEY_SIZE + - CAMELLIA_BLOCK_SIZE, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = lrw_camellia_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "xts(camellia)", - .cra_driver_name = "xts-camellia-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = CAMELLIA_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct camellia_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, - .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2, - .ivsize = CAMELLIA_BLOCK_SIZE, - .setkey = xts_camellia_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -} }; +}; + +static struct skcipher_alg camellia_skcipher_algs[] = { + { + .base.cra_name = "ecb(camellia)", + .base.cra_driver_name = "ecb-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(camellia)", + .base.cra_driver_name = "cbc-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(camellia)", + .base.cra_driver_name = "ctr-camellia-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct camellia_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAMELLIA_MIN_KEY_SIZE, + .max_keysize = CAMELLIA_MAX_KEY_SIZE, + .ivsize = CAMELLIA_BLOCK_SIZE, + .chunksize = CAMELLIA_BLOCK_SIZE, + .setkey = camellia_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; static bool is_blacklisted_cpu(void) { @@ -1700,6 +1489,8 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init init(void) { + int err; + if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "camellia-x86_64: performance on this CPU " @@ -1708,12 +1499,23 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); + err = crypto_register_alg(&camellia_cipher_alg); + if (err) + return err; + + err = crypto_register_skciphers(camellia_skcipher_algs, + ARRAY_SIZE(camellia_skcipher_algs)); + if (err) + crypto_unregister_alg(&camellia_cipher_alg); + + return err; } static void __exit fini(void) { - crypto_unregister_algs(camellia_algs, ARRAY_SIZE(camellia_algs)); + crypto_unregister_alg(&camellia_cipher_alg); + crypto_unregister_skciphers(camellia_skcipher_algs, + ARRAY_SIZE(camellia_skcipher_algs)); } module_init(init); diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index dbea602..4103474 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -21,18 +21,14 @@ * */ -#include <linux/module.h> -#include <linux/hardirq.h> -#include <linux/types.h> -#include <linux/crypto.h> -#include <linux/err.h> -#include <crypto/ablk_helper.h> +#include <asm/crypto/glue_helper.h> #include <crypto/algapi.h> #include <crypto/cast5.h> -#include <crypto/cryptd.h> -#include <crypto/ctr.h> -#include <asm/fpu/api.h> -#include <asm/crypto/glue_helper.h> +#include <crypto/internal/simd.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/types.h> #define CAST5_PARALLEL_BLOCKS 16 @@ -45,10 +41,17 @@ asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, __be64 *iv); -static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + return cast5_setkey(&tfm->base, key, keylen); +} + +static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, + unsigned int nbytes) { return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); + walk, fpu_enabled, nbytes); } static inline void cast5_fpu_end(bool fpu_enabled) @@ -56,29 +59,28 @@ static inline void cast5_fpu_end(bool fpu_enabled) return glue_fpu_end(fpu_enabled); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - bool enc) +static int ecb_crypt(struct skcipher_request *req, bool enc) { bool fpu_enabled = false; - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); int err; - fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; - - err = blkcipher_walk_virt(desc, walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; do { fn(ctx, wdst, wsrc); @@ -103,76 +105,58 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, true); + return ecb_crypt(req, true); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, false); + return ecb_crypt(req, false); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static int cbc_encrypt(struct skcipher_request *req) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; - unsigned int nbytes = walk->nbytes; - u64 *src = (u64 *)walk->src.virt.addr; - u64 *dst = (u64 *)walk->dst.virt.addr; - u64 *iv = (u64 *)walk->iv; - - do { - *dst = *src ^ *iv; - __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u64 *)walk->iv = *iv; - return nbytes; -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + u64 *src = (u64 *)walk.src.virt.addr; + u64 *dst = (u64 *)walk.dst.virt.addr; + u64 *iv = (u64 *)walk.iv; + + do { + *dst = *src ^ *iv; + __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + src++; + dst++; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk.iv = *iv; + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, + struct skcipher_walk *walk) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -224,31 +208,29 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); bool fpu_enabled = false; - struct blkcipher_walk walk; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); return err; } -static void ctr_crypt_final(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); u8 *ctrblk = walk->iv; u8 keystream[CAST5_BLOCK_SIZE]; u8 *src = walk->src.virt.addr; @@ -261,10 +243,9 @@ static void ctr_crypt_final(struct blkcipher_desc *desc, crypto_inc(ctrblk, CAST5_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct skcipher_walk *walk, + struct cast5_ctx *ctx) { - struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -307,162 +288,80 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); bool fpu_enabled = false; - struct blkcipher_walk walk; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); + nbytes = __ctr_crypt(&walk, ctx); + err = skcipher_walk_done(&walk, nbytes); } cast5_fpu_end(fpu_enabled); if (walk.nbytes) { - ctr_crypt_final(desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); + ctr_crypt_final(&walk, ctx); + err = skcipher_walk_done(&walk, 0); } return err; } +static struct skcipher_alg cast5_algs[] = { + { + .base.cra_name = "__ecb(cast5)", + .base.cra_driver_name = "__ecb-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST5_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(cast5)", + .base.cra_driver_name = "__cbc-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST5_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(cast5)", + .base.cra_driver_name = "__ctr-cast5-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct cast5_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .chunksize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; -static struct crypto_alg cast5_algs[6] = { { - .cra_name = "__ecb-cast5-avx", - .cra_driver_name = "__driver-ecb-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = cast5_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-cast5-avx", - .cra_driver_name = "__driver-cbc-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = cast5_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-cast5-avx", - .cra_driver_name = "__driver-ctr-cast5-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cast5_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = cast5_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "ecb(cast5)", - .cra_driver_name = "ecb-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(cast5)", - .cra_driver_name = "cbc-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST5_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(cast5)", - .cra_driver_name = "ctr-cast5-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST5_MIN_KEY_SIZE, - .max_keysize = CAST5_MAX_KEY_SIZE, - .ivsize = CAST5_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -} }; +static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)]; static int __init cast5_init(void) { @@ -474,12 +373,15 @@ static int __init cast5_init(void) return -ENODEV; } - return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); + return simd_register_skciphers_compat(cast5_algs, + ARRAY_SIZE(cast5_algs), + cast5_simd_algs); } static void __exit cast5_exit(void) { - crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); + simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs), + cast5_simd_algs); } module_init(cast5_init); diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 50e6847..9fb66b5 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -24,19 +24,13 @@ */ #include <linux/module.h> -#include <linux/hardirq.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> #include <crypto/cast6.h> -#include <crypto/cryptd.h> -#include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> +#include <crypto/internal/simd.h> #include <crypto/xts.h> -#include <asm/fpu/api.h> #include <asm/crypto/glue_helper.h> #define CAST6_PARALLEL_BLOCKS 8 @@ -56,6 +50,12 @@ asmlinkage void cast6_xts_enc_8way(struct cast6_ctx *ctx, u8 *dst, asmlinkage void cast6_xts_dec_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int cast6_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return cast6_setkey(&tfm->base, key, keylen); +} + static void cast6_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv) { glue_xts_crypt_128bit_one(ctx, dst, src, iv, @@ -157,164 +157,30 @@ static const struct common_glue_ctx cast6_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&cast6_enc, req); } -static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); + return glue_ecb_req_128bit(&cast6_dec, req); } -static inline void cast6_fpu_end(bool fpu_enabled) +static int cbc_encrypt(struct skcipher_request *req) { - glue_fpu_end(fpu_enabled); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__cast6_encrypt), + req); } -struct crypt_priv { - struct cast6_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - const unsigned int bsize = CAST6_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { - cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __cast6_encrypt(ctx->ctx, srcdst, srcdst); + return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - const unsigned int bsize = CAST6_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { - cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __cast6_decrypt(ctx->ctx, srcdst, srcdst); -} - -struct cast6_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct cast6_ctx cast6_ctx; -}; - -static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAST6_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->cast6_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - cast6_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[CAST6_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->cast6_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - cast6_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); + return glue_ctr_req_128bit(&cast6_ctr, req); } struct cast6_xts_ctx { @@ -322,14 +188,14 @@ struct cast6_xts_ctx { struct cast6_ctx crypt_ctx; }; -static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) +static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) { - struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; int err; - err = xts_check_key(tfm, key, keylen); + err = xts_verify_key(tfm, key, keylen); if (err) return err; @@ -343,245 +209,87 @@ static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, flags); } -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&cast6_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__cast6_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&cast6_enc_xts, req, + XTS_TWEAK_CAST(__cast6_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - return glue_xts_crypt_128bit(&cast6_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__cast6_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); + return glue_xts_req_128bit(&cast6_dec_xts, req, + XTS_TWEAK_CAST(__cast6_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static struct crypto_alg cast6_algs[10] = { { - .cra_name = "__ecb-cast6-avx", - .cra_driver_name = "__driver-ecb-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = cast6_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-cast6-avx", - .cra_driver_name = "__driver-cbc-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = cast6_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-cast6-avx", - .cra_driver_name = "__driver-ctr-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cast6_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = cast6_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-cast6-avx", - .cra_driver_name = "__driver-lrw-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE + - CAST6_BLOCK_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE + - CAST6_BLOCK_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = lrw_cast6_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-cast6-avx", - .cra_driver_name = "__driver-xts-cast6-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct cast6_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE * 2, - .max_keysize = CAST6_MAX_KEY_SIZE * 2, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = xts_cast6_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(cast6)", - .cra_driver_name = "ecb-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(cast6)", - .cra_driver_name = "cbc-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(cast6)", - .cra_driver_name = "ctr-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(cast6)", - .cra_driver_name = "lrw-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE + - CAST6_BLOCK_SIZE, - .max_keysize = CAST6_MAX_KEY_SIZE + - CAST6_BLOCK_SIZE, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(cast6)", - .cra_driver_name = "xts-cast6-avx", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = CAST6_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = CAST6_MIN_KEY_SIZE * 2, - .max_keysize = CAST6_MAX_KEY_SIZE * 2, - .ivsize = CAST6_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg cast6_algs[] = { + { + .base.cra_name = "__ecb(cast6)", + .base.cra_driver_name = "__ecb-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(cast6)", + .base.cra_driver_name = "__cbc-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(cast6)", + .base.cra_driver_name = "__ctr-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct cast6_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .chunksize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(cast6)", + .base.cra_driver_name = "__xts-cast6-avx", + .base.cra_priority = 200, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = CAST6_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct cast6_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * CAST6_MIN_KEY_SIZE, + .max_keysize = 2 * CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = xts_cast6_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *cast6_simd_algs[ARRAY_SIZE(cast6_algs)]; static int __init cast6_init(void) { @@ -593,12 +301,15 @@ static int __init cast6_init(void) return -ENODEV; } - return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); + return simd_register_skciphers_compat(cast6_algs, + ARRAY_SIZE(cast6_algs), + cast6_simd_algs); } static void __exit cast6_exit(void) { - crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); + simd_unregister_skciphers(cast6_algs, ARRAY_SIZE(cast6_algs), + cast6_simd_algs); } module_init(cast6_init); diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 30c0a37..5c610d4 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -20,13 +20,13 @@ * */ -#include <asm/processor.h> +#include <crypto/algapi.h> #include <crypto/des.h> +#include <crypto/internal/skcipher.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> -#include <crypto/algapi.h> struct des3_ede_x86_ctx { u32 enc_expkey[DES3_EDE_EXPKEY_WORDS]; @@ -83,18 +83,18 @@ static void des3_ede_x86_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) des3_ede_dec_blk(crypto_tfm_ctx(tfm), dst, src); } -static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, - const u32 *expkey) +static int ecb_crypt(struct skcipher_request *req, const u32 *expkey) { - unsigned int bsize = DES3_EDE_BLOCK_SIZE; + const unsigned int bsize = DES3_EDE_BLOCK_SIZE; + struct skcipher_walk walk; unsigned int nbytes; int err; - err = blkcipher_walk_virt(desc, walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + u8 *wsrc = walk.src.virt.addr; + u8 *wdst = walk.dst.virt.addr; /* Process four block batch */ if (nbytes >= bsize * 3) { @@ -121,36 +121,31 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, ctx->enc_expkey); + return ecb_crypt(req, ctx->enc_expkey); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); - blkcipher_walk_init(&walk, dst, src, nbytes); - return ecb_crypt(desc, &walk, ctx->dec_expkey); + return ecb_crypt(req, ctx->dec_expkey); } -static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -171,27 +166,27 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, return nbytes; } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_encrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_encrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } -static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __cbc_decrypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; u64 *src = (u64 *)walk->src.virt.addr; @@ -250,25 +245,26 @@ done: return nbytes; } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __cbc_decrypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __cbc_decrypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } return err; } static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, - struct blkcipher_walk *walk) + struct skcipher_walk *walk) { u8 *ctrblk = walk->iv; u8 keystream[DES3_EDE_BLOCK_SIZE]; @@ -282,10 +278,9 @@ static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx, crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE); } -static unsigned int __ctr_crypt(struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx, + struct skcipher_walk *walk) { - struct des3_ede_x86_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); unsigned int bsize = DES3_EDE_BLOCK_SIZE; unsigned int nbytes = walk->nbytes; __be64 *src = (__be64 *)walk->src.virt.addr; @@ -333,23 +328,24 @@ done: return nbytes; } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, DES3_EDE_BLOCK_SIZE); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) { - nbytes = __ctr_crypt(desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + nbytes = __ctr_crypt(ctx, &walk); + err = skcipher_walk_done(&walk, nbytes); } - if (walk.nbytes) { - ctr_crypt_final(crypto_blkcipher_ctx(desc->tfm), &walk); - err = blkcipher_walk_done(desc, &walk, 0); + if (nbytes) { + ctr_crypt_final(ctx, &walk); + err = skcipher_walk_done(&walk, 0); } return err; @@ -381,7 +377,14 @@ static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key, return 0; } -static struct crypto_alg des3_ede_algs[4] = { { +static int des3_ede_x86_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, + unsigned int keylen) +{ + return des3_ede_x86_setkey(&tfm->base, key, keylen); +} + +static struct crypto_alg des3_ede_cipher = { .cra_name = "des3_ede", .cra_driver_name = "des3_ede-asm", .cra_priority = 200, @@ -399,66 +402,50 @@ static struct crypto_alg des3_ede_algs[4] = { { .cia_decrypt = des3_ede_x86_decrypt, } } -}, { - .cra_name = "ecb(des3_ede)", - .cra_driver_name = "ecb-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(des3_ede)", - .cra_driver_name = "cbc-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = DES3_EDE_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(des3_ede)", - .cra_driver_name = "ctr-des3_ede-asm", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct des3_ede_x86_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .ivsize = DES3_EDE_BLOCK_SIZE, - .setkey = des3_ede_x86_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -} }; +}; + +static struct skcipher_alg des3_ede_skciphers[] = { + { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "ecb-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cbc-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(des3_ede)", + .base.cra_driver_name = "ctr-des3_ede-asm", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .chunksize = DES3_EDE_BLOCK_SIZE, + .setkey = des3_ede_x86_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + } +}; static bool is_blacklisted_cpu(void) { @@ -483,17 +470,30 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init des3_ede_x86_init(void) { + int err; + if (!force && is_blacklisted_cpu()) { pr_info("des3_ede-x86_64: performance on this CPU would be suboptimal: disabling des3_ede-x86_64.\n"); return -ENODEV; } - return crypto_register_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); + err = crypto_register_alg(&des3_ede_cipher); + if (err) + return err; + + err = crypto_register_skciphers(des3_ede_skciphers, + ARRAY_SIZE(des3_ede_skciphers)); + if (err) + crypto_unregister_alg(&des3_ede_cipher); + + return err; } static void __exit des3_ede_x86_fini(void) { - crypto_unregister_algs(des3_ede_algs, ARRAY_SIZE(des3_ede_algs)); + crypto_unregister_alg(&des3_ede_cipher); + crypto_unregister_skciphers(des3_ede_skciphers, + ARRAY_SIZE(des3_ede_skciphers)); } module_init(des3_ede_x86_init); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index d61e579..a78ef99 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -29,313 +29,212 @@ #include <crypto/b128ops.h> #include <crypto/gf128mul.h> #include <crypto/internal/skcipher.h> -#include <crypto/lrw.h> #include <crypto/xts.h> #include <asm/crypto/glue_helper.h> -static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) { - void *ctx = crypto_blkcipher_ctx(desc->tfm); + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); const unsigned int bsize = 128 / 8; - unsigned int nbytes, i, func_bytes; + struct skcipher_walk walk; bool fpu_enabled = false; + unsigned int nbytes; int err; - err = blkcipher_walk_virt(desc, walk); + err = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk->nbytes)) { - u8 *wsrc = walk->src.virt.addr; - u8 *wdst = walk->dst.virt.addr; + while ((nbytes = walk.nbytes)) { + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + unsigned int func_bytes; + unsigned int i; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - + &walk, fpu_enabled, nbytes); for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; - /* Process multi-block batch */ - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.ecb(ctx, wdst, - wsrc); + if (nbytes < func_bytes) + continue; - wsrc += func_bytes; - wdst += func_bytes; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); + /* Process multi-block batch */ + do { + gctx->funcs[i].fn_u.ecb(ctx, dst, src); + src += func_bytes; + dst += func_bytes; + nbytes -= func_bytes; + } while (nbytes >= func_bytes); - if (nbytes < bsize) - goto done; - } + if (nbytes < bsize) + break; } - -done: - err = blkcipher_walk_done(desc, walk, nbytes); + err = skcipher_walk_done(&walk, nbytes); } glue_fpu_end(fpu_enabled); return err; } +EXPORT_SYMBOL_GPL(glue_ecb_req_128bit); -int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, + struct skcipher_request *req) { - struct blkcipher_walk walk; - - blkcipher_walk_init(&walk, dst, src, nbytes); - return __glue_ecb_crypt_128bit(gctx, desc, &walk); -} -EXPORT_SYMBOL_GPL(glue_ecb_crypt_128bit); - -static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - void *ctx = crypto_blkcipher_ctx(desc->tfm); + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 *iv = (u128 *)walk->iv; - - do { - u128_xor(dst, src, iv); - fn(ctx, (u8 *)dst, (u8 *)dst); - iv = dst; - - src += 1; - dst += 1; - nbytes -= bsize; - } while (nbytes >= bsize); - - *(u128 *)walk->iv = *iv; - return nbytes; -} - -int glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct blkcipher_walk walk; + struct skcipher_walk walk; + unsigned int nbytes; int err; - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); + err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes)) { - nbytes = __glue_cbc_encrypt_128bit(fn, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + const u128 *src = (u128 *)walk.src.virt.addr; + u128 *dst = (u128 *)walk.dst.virt.addr; + u128 *iv = (u128 *)walk.iv; + + do { + u128_xor(dst, src, iv); + fn(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + src++; + dst++; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u128 *)walk.iv = *iv; + err = skcipher_walk_done(&walk, nbytes); } - return err; } -EXPORT_SYMBOL_GPL(glue_cbc_encrypt_128bit); +EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit); -static unsigned int -__glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) { - void *ctx = crypto_blkcipher_ctx(desc->tfm); + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - u128 last_iv; - unsigned int num_blocks, func_bytes; - unsigned int i; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; + + err = skcipher_walk_virt(&walk, req, false); - /* Start of the last block. */ - src += nbytes / bsize - 1; - dst += nbytes / bsize - 1; + while ((nbytes = walk.nbytes)) { + const u128 *src = walk.src.virt.addr; + u128 *dst = walk.dst.virt.addr; + unsigned int func_bytes, num_blocks; + unsigned int i; + u128 last_iv; - last_iv = *src; + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; + last_iv = *src; - /* Process multi-block batch */ - if (nbytes >= func_bytes) { + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; + + if (nbytes < func_bytes) + continue; + + /* Process multi-block batch */ do { - nbytes -= func_bytes - bsize; src -= num_blocks - 1; dst -= num_blocks - 1; gctx->funcs[i].fn_u.cbc(ctx, dst, src); - nbytes -= bsize; + nbytes -= func_bytes; if (nbytes < bsize) goto done; - u128_xor(dst, dst, src - 1); - src -= 1; - dst -= 1; + u128_xor(dst, dst, --src); + dst--; } while (nbytes >= func_bytes); } - } - done: - u128_xor(dst, dst, (u128 *)walk->iv); - *(u128 *)walk->iv = last_iv; - - return nbytes; -} - -int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt(desc, &walk); - - while ((nbytes = walk.nbytes)) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + u128_xor(dst, dst, (u128 *)walk.iv); + *(u128 *)walk.iv = last_iv; + err = skcipher_walk_done(&walk, nbytes); } glue_fpu_end(fpu_enabled); return err; } -EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); +EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit); -static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) +int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req) { - void *ctx = crypto_blkcipher_ctx(desc->tfm); - u8 *src = (u8 *)walk->src.virt.addr; - u8 *dst = (u8 *)walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - le128 ctrblk; - u128 tmp; + void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + const unsigned int bsize = 128 / 8; + struct skcipher_walk walk; + bool fpu_enabled = false; + unsigned int nbytes; + int err; - be128_to_le128(&ctrblk, (be128 *)walk->iv); + err = skcipher_walk_virt(&walk, req, false); - memcpy(&tmp, src, nbytes); - fn_ctr(ctx, &tmp, &tmp, &ctrblk); - memcpy(dst, &tmp, nbytes); + while ((nbytes = walk.nbytes) >= bsize) { + const u128 *src = walk.src.virt.addr; + u128 *dst = walk.dst.virt.addr; + unsigned int func_bytes, num_blocks; + unsigned int i; + le128 ctrblk; - le128_to_be128((be128 *)walk->iv, &ctrblk); -} + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, nbytes); -static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - void *ctx = crypto_blkcipher_ctx(desc->tfm); - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - le128 ctrblk; - unsigned int num_blocks, func_bytes; - unsigned int i; + be128_to_le128(&ctrblk, (be128 *)walk.iv); - be128_to_le128(&ctrblk, (be128 *)walk->iv); + for (i = 0; i < gctx->num_funcs; i++) { + num_blocks = gctx->funcs[i].num_blocks; + func_bytes = bsize * num_blocks; - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; + if (nbytes < func_bytes) + continue; - if (nbytes >= func_bytes) { + /* Process multi-block batch */ do { gctx->funcs[i].fn_u.ctr(ctx, dst, src, &ctrblk); - src += num_blocks; dst += num_blocks; nbytes -= func_bytes; } while (nbytes >= func_bytes); if (nbytes < bsize) - goto done; + break; } - } - -done: - le128_to_be128((be128 *)walk->iv, &ctrblk); - return nbytes; -} - -int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, bsize); - while ((nbytes = walk.nbytes) >= bsize) { - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); - nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); - err = blkcipher_walk_done(desc, &walk, nbytes); + le128_to_be128((be128 *)walk.iv, &ctrblk); + err = skcipher_walk_done(&walk, nbytes); } glue_fpu_end(fpu_enabled); - if (walk.nbytes) { - glue_ctr_crypt_final_128bit( - gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} -EXPORT_SYMBOL_GPL(glue_ctr_crypt_128bit); - -static unsigned int __glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - void *ctx, - struct blkcipher_desc *desc, - struct blkcipher_walk *walk) -{ - const unsigned int bsize = 128 / 8; - unsigned int nbytes = walk->nbytes; - u128 *src = (u128 *)walk->src.virt.addr; - u128 *dst = (u128 *)walk->dst.virt.addr; - unsigned int num_blocks, func_bytes; - unsigned int i; - - /* Process multi-block batch */ - for (i = 0; i < gctx->num_funcs; i++) { - num_blocks = gctx->funcs[i].num_blocks; - func_bytes = bsize * num_blocks; - - if (nbytes >= func_bytes) { - do { - gctx->funcs[i].fn_u.xts(ctx, dst, src, - (le128 *)walk->iv); + if (nbytes) { + le128 ctrblk; + u128 tmp; - src += num_blocks; - dst += num_blocks; - nbytes -= func_bytes; - } while (nbytes >= func_bytes); + be128_to_le128(&ctrblk, (be128 *)walk.iv); + memcpy(&tmp, walk.src.virt.addr, nbytes); + gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, &tmp, &tmp, + &ctrblk); + memcpy(walk.dst.virt.addr, &tmp, nbytes); + le128_to_be128((be128 *)walk.iv, &ctrblk); - if (nbytes < bsize) - goto done; - } + err = skcipher_walk_done(&walk, 0); } -done: - return nbytes; + return err; } +EXPORT_SYMBOL_GPL(glue_ctr_req_128bit); static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx, void *ctx, @@ -372,46 +271,6 @@ done: return nbytes; } -/* for implementations implementing faster XTS IV generator */ -int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - void (*tweak_fn)(void *ctx, u8 *dst, const u8 *src), - void *tweak_ctx, void *crypt_ctx) -{ - const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; - struct blkcipher_walk walk; - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - - err = blkcipher_walk_virt(desc, &walk); - nbytes = walk.nbytes; - if (!nbytes) - return err; - - /* set minimum length to bsize, for tweak_fn */ - fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, - nbytes < bsize ? bsize : nbytes); - - /* calculate first value of T */ - tweak_fn(tweak_ctx, walk.iv, walk.iv); - - while (nbytes) { - nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); - - err = blkcipher_walk_done(desc, &walk, nbytes); - nbytes = walk.nbytes; - } - - glue_fpu_end(fpu_enabled); - - return err; -} -EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); - int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, common_glue_func_t tweak_fn, void *tweak_ctx, @@ -429,9 +288,9 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, return err; /* set minimum length to bsize, for tweak_fn */ - fpu_enabled = glue_skwalk_fpu_begin(bsize, gctx->fpu_blocks_limit, - &walk, fpu_enabled, - nbytes < bsize ? bsize : nbytes); + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + &walk, fpu_enabled, + nbytes < bsize ? bsize : nbytes); /* calculate first value of T */ tweak_fn(tweak_ctx, walk.iv, walk.iv); diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 870f6d8..03347b1 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -14,15 +14,12 @@ #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> +#include <crypto/internal/simd.h> #include <crypto/serpent.h> -#include <asm/fpu/api.h> -#include <asm/crypto/serpent-avx.h> +#include <crypto/xts.h> #include <asm/crypto/glue_helper.h> +#include <asm/crypto/serpent-avx.h> #define SERPENT_AVX2_PARALLEL_BLOCKS 16 @@ -40,6 +37,12 @@ asmlinkage void serpent_xts_enc_16way(struct serpent_ctx *ctx, u8 *dst, asmlinkage void serpent_xts_dec_16way(struct serpent_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + static const struct common_glue_ctx serpent_enc = { .num_funcs = 3, .fpu_blocks_limit = 8, @@ -136,403 +139,113 @@ static const struct common_glue_ctx serpent_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&serpent_ctr, req); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - /* since reusing AVX functions, starts using FPU at 8 parallel blocks */ - return glue_fpu_begin(SERPENT_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes); -} + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); -static inline void serpent_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); + return glue_xts_req_128bit(&serpent_enc_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { - serpent_ecb_enc_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - } + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { - serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&serpent_dec_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes >= SERPENT_AVX2_PARALLEL_BLOCKS * bsize) { - serpent_ecb_dec_16way(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_AVX2_PARALLEL_BLOCKS; - } - - while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { - serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); - srcdst += bsize * SERPENT_PARALLEL_BLOCKS; - nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_AVX2_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg srp_algs[10] = { { - .cra_name = "__ecb-serpent-avx2", - .cra_driver_name = "__driver-ecb-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[0].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-avx2", - .cra_driver_name = "__driver-cbc-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[1].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-avx2", - .cra_driver_name = "__driver-ctr-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[2].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-serpent-avx2", - .cra_driver_name = "__driver-lrw-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[3].cra_list), - .cra_exit = lrw_serpent_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-serpent-avx2", - .cra_driver_name = "__driver-xts-serpent-avx2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[4].cra_list), - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[5].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[6].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[7].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[8].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-avx2", - .cra_priority = 600, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(srp_algs[9].cra_list), - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(serpent)", + .base.cra_driver_name = "__xts-serpent-avx2", + .base.cra_priority = 600, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, + .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init init(void) { @@ -548,12 +261,15 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(srp_algs, ARRAY_SIZE(srp_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit fini(void) { - crypto_unregister_algs(srp_algs, ARRAY_SIZE(srp_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(init); diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 6f778d3..458567e 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -24,21 +24,15 @@ */ #include <linux/module.h> -#include <linux/hardirq.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> +#include <crypto/internal/simd.h> #include <crypto/serpent.h> -#include <crypto/cryptd.h> -#include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/fpu/api.h> -#include <asm/crypto/serpent-avx.h> #include <asm/crypto/glue_helper.h> +#include <asm/crypto/serpent-avx.h> /* 8-way parallel cipher functions */ asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst, @@ -91,6 +85,31 @@ void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) } EXPORT_SYMBOL_GPL(serpent_xts_dec); +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + +int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); +} +EXPORT_SYMBOL_GPL(xts_serpent_setkey); static const struct common_glue_ctx serpent_enc = { .num_funcs = 2, @@ -170,423 +189,113 @@ static const struct common_glue_ctx serpent_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_ctr_req_128bit(&serpent_ctr, req); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); -} + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); -static inline void serpent_fpu_end(bool fpu_enabled) -{ - glue_fpu_end(fpu_enabled); + return glue_xts_req_128bit(&serpent_enc_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - - SERPENT_BLOCK_SIZE); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - - SERPENT_BLOCK_SIZE); -} -EXPORT_SYMBOL_GPL(lrw_serpent_setkey); - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; + return glue_xts_req_128bit(&serpent_dec_xts, req, + XTS_TWEAK_CAST(__serpent_encrypt), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -void lrw_serpent_exit_tfm(struct crypto_tfm *tfm) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} -EXPORT_SYMBOL_GPL(lrw_serpent_exit_tfm); - -int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} -EXPORT_SYMBOL_GPL(xts_serpent_setkey); - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&serpent_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&serpent_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(__serpent_encrypt), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg serpent_algs[10] = { { - .cra_name = "__ecb-serpent-avx", - .cra_driver_name = "__driver-ecb-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-avx", - .cra_driver_name = "__driver-cbc-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-avx", - .cra_driver_name = "__driver-ctr-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-serpent-avx", - .cra_driver_name = "__driver-lrw-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_serpent_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-serpent-avx", - .cra_driver_name = "__driver-xts-serpent-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-avx", - .cra_priority = 500, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(serpent)", + .base.cra_driver_name = "__xts-serpent-avx", + .base.cra_priority = 500, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * SERPENT_MIN_KEY_SIZE, + .max_keysize = 2 * SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = xts_serpent_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init serpent_init(void) { @@ -598,12 +307,15 @@ static int __init serpent_init(void) return -ENODEV; } - return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit serpent_exit(void) { - crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(serpent_init); diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index ac0e831..3dafe13 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -30,21 +30,22 @@ */ #include <linux/module.h> -#include <linux/hardirq.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> -#include <crypto/serpent.h> -#include <crypto/cryptd.h> #include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> +#include <crypto/internal/simd.h> +#include <crypto/serpent.h> #include <asm/crypto/serpent-sse2.h> #include <asm/crypto/glue_helper.h> +static int serpent_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen); +} + static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) { u128 ivs[SERPENT_PARALLEL_BLOCKS - 1]; @@ -139,464 +140,79 @@ static const struct common_glue_ctx serpent_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&serpent_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&serpent_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__serpent_encrypt), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&serpent_enc, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&serpent_dec_cbc, desc, dst, src, - nbytes); + return glue_ecb_req_128bit(&serpent_dec, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&serpent_ctr, desc, dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(__serpent_encrypt), + req); } -static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_fpu_begin(SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS, - NULL, fpu_enabled, nbytes); + return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req); } -static inline void serpent_fpu_end(bool fpu_enabled) +static int ctr_crypt(struct skcipher_request *req) { - glue_fpu_end(fpu_enabled); + return glue_ctr_req_128bit(&serpent_ctr, req); } -struct crypt_priv { - struct serpent_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_encrypt(ctx->ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = SERPENT_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { - serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - __serpent_decrypt(ctx->ctx, srcdst, srcdst); -} - -struct serpent_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct serpent_ctx serpent_ctx; -}; - -static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __serpent_setkey(&ctx->serpent_ctx, key, keylen - - SERPENT_BLOCK_SIZE); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - - SERPENT_BLOCK_SIZE); -} - -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->serpent_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static void lrw_exit_tfm(struct crypto_tfm *tfm) -{ - struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} - -struct serpent_xts_ctx { - struct serpent_ctx tweak_ctx; - struct serpent_ctx crypt_ctx; +static struct skcipher_alg serpent_algs[] = { + { + .base.cra_name = "__ecb(serpent)", + .base.cra_driver_name = "__ecb-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(serpent)", + .base.cra_driver_name = "__cbc-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = SERPENT_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(serpent)", + .base.cra_driver_name = "__ctr-serpent-sse2", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct serpent_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = SERPENT_MIN_KEY_SIZE, + .max_keysize = SERPENT_MAX_KEY_SIZE, + .ivsize = SERPENT_BLOCK_SIZE, + .chunksize = SERPENT_BLOCK_SIZE, + .setkey = serpent_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, }; -static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2); -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[SERPENT_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->crypt_ctx, - .fpu_enabled = false, - }; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt), - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = xts_crypt(desc, dst, src, nbytes, &req); - serpent_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static struct crypto_alg serpent_algs[10] = { { - .cra_name = "__ecb-serpent-sse2", - .cra_driver_name = "__driver-ecb-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-serpent-sse2", - .cra_driver_name = "__driver-cbc-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = serpent_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-serpent-sse2", - .cra_driver_name = "__driver-ctr-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct serpent_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = serpent_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-serpent-sse2", - .cra_driver_name = "__driver-lrw-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = lrw_serpent_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-serpent-sse2", - .cra_driver_name = "__driver-xts-serpent-sse2", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct serpent_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = xts_serpent_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(serpent)", - .cra_driver_name = "ecb-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(serpent)", - .cra_driver_name = "cbc-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(serpent)", - .cra_driver_name = "ctr-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(serpent)", - .cra_driver_name = "lrw-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .max_keysize = SERPENT_MAX_KEY_SIZE + - SERPENT_BLOCK_SIZE, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(serpent)", - .cra_driver_name = "xts-serpent-sse2", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = SERPENT_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = SERPENT_MIN_KEY_SIZE * 2, - .max_keysize = SERPENT_MAX_KEY_SIZE * 2, - .ivsize = SERPENT_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -} }; +static struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)]; static int __init serpent_sse2_init(void) { @@ -605,12 +221,15 @@ static int __init serpent_sse2_init(void) return -ENODEV; } - return crypto_register_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + return simd_register_skciphers_compat(serpent_algs, + ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } static void __exit serpent_sse2_exit(void) { - crypto_unregister_algs(serpent_algs, ARRAY_SIZE(serpent_algs)); + simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs), + serpent_simd_algs); } module_init(serpent_sse2_init); diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c index acf9fdf..e17655f 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb.c +++ b/arch/x86/crypto/sha1-mb/sha1_mb.c @@ -106,13 +106,6 @@ static asmlinkage struct job_sha1* (*sha1_job_mgr_flush) static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job) (struct sha1_mb_mgr *state); -static inline void sha1_init_digest(uint32_t *digest) -{ - static const uint32_t initial_digest[SHA1_DIGEST_LENGTH] = {SHA1_H0, - SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len) { @@ -244,11 +237,8 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, uint32_t len, int flags) { - if (flags & (~HASH_ENTIRE)) { - /* - * User should not pass anything other than FIRST, UPDATE, or - * LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; return ctx; } @@ -259,24 +249,12 @@ static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, return ctx; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; return ctx; } - - if (flags & HASH_FIRST) { - /* Init digest */ - sha1_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* * If we made it here, there were no errors during this call to * submit diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h index 13590cc..9454bd1 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h +++ b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha1_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c index 7926a22..4c46ac1 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -106,14 +106,6 @@ static asmlinkage struct job_sha256* (*sha256_job_mgr_flush) static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job) (struct sha256_mb_mgr *state); -inline void sha256_init_digest(uint32_t *digest) -{ - static const uint32_t initial_digest[SHA256_DIGEST_LENGTH] = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7}; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], uint64_t total_len) { @@ -245,10 +237,8 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, uint32_t len, int flags) { - if (flags & (~HASH_ENTIRE)) { - /* User should not pass anything other than FIRST, UPDATE - * or LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; return ctx; } @@ -259,23 +249,12 @@ static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, return ctx; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; return ctx; } - if (flags & HASH_FIRST) { - /* Init digest */ - sha256_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* If we made it here, there was no error during this call to submit */ ctx->error = HASH_CTX_ERROR_NONE; diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h index aabb303..7c43254 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h +++ b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha256_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index 458409b..39e2bbd 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -107,15 +107,6 @@ static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) (struct sha512_mb_mgr *state); -inline void sha512_init_digest(uint64_t *digest) -{ - static const uint64_t initial_digest[SHA512_DIGEST_LENGTH] = { - SHA512_H0, SHA512_H1, SHA512_H2, - SHA512_H3, SHA512_H4, SHA512_H5, - SHA512_H6, SHA512_H7 }; - memcpy(digest, initial_digest, sizeof(initial_digest)); -} - inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], uint64_t total_len) { @@ -263,11 +254,8 @@ static struct sha512_hash_ctx mgr = cstate->mgr; spin_lock_irqsave(&cstate->work_lock, irqflags); - if (flags & (~HASH_ENTIRE)) { - /* - * User should not pass anything other than FIRST, UPDATE, or - * LAST - */ + if (flags & ~(HASH_UPDATE | HASH_LAST)) { + /* User should not pass anything other than UPDATE or LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; goto unlock; } @@ -278,24 +266,12 @@ static struct sha512_hash_ctx goto unlock; } - if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { + if (ctx->status & HASH_CTX_STS_COMPLETE) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; goto unlock; } - - if (flags & HASH_FIRST) { - /* Init digest */ - sha512_init_digest(ctx->job.result_digest); - - /* Reset byte counter */ - ctx->total_length = 0; - - /* Clear extra blocks */ - ctx->partial_block_buffer_length = 0; - } - /* * If we made it here, there were no errors during this call to * submit diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h index e4653f5..e5c465b 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h +++ b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h @@ -57,11 +57,9 @@ #include "sha512_mb_mgr.h" #define HASH_UPDATE 0x00 -#define HASH_FIRST 0x01 -#define HASH_LAST 0x02 -#define HASH_ENTIRE 0x03 -#define HASH_DONE 0x04 -#define HASH_FINAL 0x08 +#define HASH_LAST 0x01 +#define HASH_DONE 0x02 +#define HASH_FINAL 0x04 #define HASH_CTX_STS_IDLE 0x00 #define HASH_CTX_STS_PROCESSING 0x01 diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index b7a3904..66d9892 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -24,24 +24,15 @@ */ #include <linux/module.h> -#include <linux/hardirq.h> #include <linux/types.h> #include <linux/crypto.h> #include <linux/err.h> -#include <crypto/ablk_helper.h> #include <crypto/algapi.h> +#include <crypto/internal/simd.h> #include <crypto/twofish.h> -#include <crypto/cryptd.h> -#include <crypto/b128ops.h> -#include <crypto/ctr.h> -#include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/fpu/api.h> -#include <asm/crypto/twofish.h> #include <asm/crypto/glue_helper.h> -#include <crypto/scatterwalk.h> -#include <linux/workqueue.h> -#include <linux/spinlock.h> +#include <asm/crypto/twofish.h> #define TWOFISH_PARALLEL_BLOCKS 8 @@ -61,6 +52,12 @@ asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst, asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst, const u8 *src, le128 *iv); +static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return twofish_setkey(&tfm->base, key, keylen); +} + static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { @@ -79,6 +76,31 @@ static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv) GLUE_FUNC_CAST(twofish_dec_blk)); } +struct twofish_xts_ctx { + struct twofish_ctx tweak_ctx; + struct twofish_ctx crypt_ctx; +}; + +static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 *flags = &tfm->base.crt_flags; + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + /* first half of xts-key is for crypt */ + err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} static const struct common_glue_ctx twofish_enc = { .num_funcs = 3, @@ -170,389 +192,113 @@ static const struct common_glue_ctx twofish_dec_xts = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, - dst, src, nbytes); + return glue_ecb_req_128bit(&twofish_enc, req); } -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, - nbytes); + return glue_ecb_req_128bit(&twofish_dec, req); } -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), + req); } -static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes) +static int cbc_decrypt(struct skcipher_request *req) { - return glue_fpu_begin(TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS, NULL, - fpu_enabled, nbytes); + return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static inline void twofish_fpu_end(bool fpu_enabled) +static int ctr_crypt(struct skcipher_request *req) { - glue_fpu_end(fpu_enabled); + return glue_ctr_req_128bit(&twofish_ctr, req); } -struct crypt_priv { - struct twofish_ctx *ctx; - bool fpu_enabled; -}; - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_encrypt(struct skcipher_request *req) { - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { - twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) - twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - nbytes %= bsize * 3; - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&twofish_enc_xts, req, + XTS_TWEAK_CAST(twofish_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int xts_decrypt(struct skcipher_request *req) { - const unsigned int bsize = TF_BLOCK_SIZE; - struct crypt_priv *ctx = priv; - int i; - - ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes); - - if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { - twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); - return; - } + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) - twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); - - nbytes %= bsize * 3; - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx->ctx, srcdst, srcdst); + return glue_xts_req_128bit(&twofish_dec_xts, req, + XTS_TWEAK_CAST(twofish_enc_blk), + &ctx->tweak_ctx, &ctx->crypt_ctx); } -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TWOFISH_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = encrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[TWOFISH_PARALLEL_BLOCKS]; - struct crypt_priv crypt_ctx = { - .ctx = &ctx->twofish_ctx, - .fpu_enabled = false, - }; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &crypt_ctx, - .crypt_fn = decrypt_callback, - }; - int ret; - - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - ret = lrw_crypt(desc, dst, src, nbytes, &req); - twofish_fpu_end(crypt_ctx.fpu_enabled); - - return ret; -} - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - - return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes, - XTS_TWEAK_CAST(twofish_enc_blk), - &ctx->tweak_ctx, &ctx->crypt_ctx); -} - -static struct crypto_alg twofish_algs[10] = { { - .cra_name = "__ecb-twofish-avx", - .cra_driver_name = "__driver-ecb-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "__cbc-twofish-avx", - .cra_driver_name = "__driver-cbc-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "__ctr-twofish-avx", - .cra_driver_name = "__driver-ctr-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "__lrw-twofish-avx", - .cra_driver_name = "__driver-lrw-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_twofish_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "__xts-twofish-avx", - .cra_driver_name = "__driver-xts-twofish-avx", - .cra_priority = 0, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, - }, -}, { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = __ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_encrypt, - .geniv = "chainiv", - }, - }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + - TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + - TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, - }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-avx", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - }, +static struct skcipher_alg twofish_algs[] = { + { + .base.cra_name = "__ecb(twofish)", + .base.cra_driver_name = "__ecb-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "__cbc(twofish)", + .base.cra_driver_name = "__cbc-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "__ctr(twofish)", + .base.cra_driver_name = "__ctr-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .chunksize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, { + .base.cra_name = "__xts(twofish)", + .base.cra_driver_name = "__xts-twofish-avx", + .base.cra_priority = 400, + .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_xts_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = 2 * TF_MIN_KEY_SIZE, + .max_keysize = 2 * TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = xts_twofish_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, }, -} }; +}; + +static struct simd_skcipher_alg *twofish_simd_algs[ARRAY_SIZE(twofish_algs)]; static int __init twofish_init(void) { @@ -563,12 +309,15 @@ static int __init twofish_init(void) return -ENODEV; } - return crypto_register_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); + return simd_register_skciphers_compat(twofish_algs, + ARRAY_SIZE(twofish_algs), + twofish_simd_algs); } static void __exit twofish_exit(void) { - crypto_unregister_algs(twofish_algs, ARRAY_SIZE(twofish_algs)); + simd_unregister_skciphers(twofish_algs, ARRAY_SIZE(twofish_algs), + twofish_simd_algs); } module_init(twofish_init); diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 243e90a..5714855 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -20,22 +20,26 @@ * */ -#include <asm/processor.h> +#include <asm/crypto/glue_helper.h> +#include <asm/crypto/twofish.h> +#include <crypto/algapi.h> +#include <crypto/b128ops.h> +#include <crypto/internal/skcipher.h> +#include <crypto/twofish.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> -#include <crypto/algapi.h> -#include <crypto/twofish.h> -#include <crypto/b128ops.h> -#include <asm/crypto/twofish.h> -#include <asm/crypto/glue_helper.h> -#include <crypto/lrw.h> -#include <crypto/xts.h> EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); +static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return twofish_setkey(&tfm->base, key, keylen); +} + static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, const u8 *src) { @@ -151,284 +155,74 @@ static const struct common_glue_ctx twofish_dec_cbc = { } } }; -static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); -} - -static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); -} - -static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, - dst, src, nbytes); -} - -static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, - nbytes); -} - -static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); -} - -static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +static int ecb_encrypt(struct skcipher_request *req) { - const unsigned int bsize = TF_BLOCK_SIZE; - struct twofish_ctx *ctx = priv; - int i; - - if (nbytes == 3 * bsize) { - twofish_enc_blk_3way(ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_enc_blk(ctx, srcdst, srcdst); -} - -static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) -{ - const unsigned int bsize = TF_BLOCK_SIZE; - struct twofish_ctx *ctx = priv; - int i; - - if (nbytes == 3 * bsize) { - twofish_dec_blk_3way(ctx, srcdst, srcdst); - return; - } - - for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) - twofish_dec_blk(ctx, srcdst, srcdst); -} - -int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - int err; - - err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, - &tfm->crt_flags); - if (err) - return err; - - return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); + return glue_ecb_req_128bit(&twofish_enc, req); } -EXPORT_SYMBOL_GPL(lrw_twofish_setkey); -static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ecb_decrypt(struct skcipher_request *req) { - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->twofish_ctx, - .crypt_fn = encrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); + return glue_ecb_req_128bit(&twofish_dec, req); } -static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int cbc_encrypt(struct skcipher_request *req) { - struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - be128 buf[3]; - struct lrw_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .table_ctx = &ctx->lrw_table, - .crypt_ctx = &ctx->twofish_ctx, - .crypt_fn = decrypt_callback, - }; - - return lrw_crypt(desc, dst, src, nbytes, &req); + return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), + req); } -void lrw_twofish_exit_tfm(struct crypto_tfm *tfm) +static int cbc_decrypt(struct skcipher_request *req) { - struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); - - lrw_free_table(&ctx->lrw_table); -} -EXPORT_SYMBOL_GPL(lrw_twofish_exit_tfm); - -int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - int err; - - err = xts_check_key(tfm, key, keylen); - if (err) - return err; - - /* first half of xts-key is for crypt */ - err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, - flags); -} -EXPORT_SYMBOL_GPL(xts_twofish_setkey); - -static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) -{ - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[3]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = encrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); + return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); } -static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int ctr_crypt(struct skcipher_request *req) { - struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); - le128 buf[3]; - struct xts_crypt_req req = { - .tbuf = buf, - .tbuflen = sizeof(buf), - - .tweak_ctx = &ctx->tweak_ctx, - .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), - .crypt_ctx = &ctx->crypt_ctx, - .crypt_fn = decrypt_callback, - }; - - return xts_crypt(desc, dst, src, nbytes, &req); + return glue_ctr_req_128bit(&twofish_ctr, req); } -static struct crypto_alg tf_algs[5] = { { - .cra_name = "ecb(twofish)", - .cra_driver_name = "ecb-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .setkey = twofish_setkey, - .encrypt = ecb_encrypt, - .decrypt = ecb_decrypt, - }, - }, -}, { - .cra_name = "cbc(twofish)", - .cra_driver_name = "cbc-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = cbc_encrypt, - .decrypt = cbc_decrypt, - }, - }, -}, { - .cra_name = "ctr(twofish)", - .cra_driver_name = "ctr-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE, - .max_keysize = TF_MAX_KEY_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = twofish_setkey, - .encrypt = ctr_crypt, - .decrypt = ctr_crypt, - }, - }, -}, { - .cra_name = "lrw(twofish)", - .cra_driver_name = "lrw-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_lrw_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_exit = lrw_twofish_exit_tfm, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, - .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, - .ivsize = TF_BLOCK_SIZE, - .setkey = lrw_twofish_setkey, - .encrypt = lrw_encrypt, - .decrypt = lrw_decrypt, - }, - }, -}, { - .cra_name = "xts(twofish)", - .cra_driver_name = "xts-twofish-3way", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_xts_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_blkcipher_type, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = TF_MIN_KEY_SIZE * 2, - .max_keysize = TF_MAX_KEY_SIZE * 2, - .ivsize = TF_BLOCK_SIZE, - .setkey = xts_twofish_setkey, - .encrypt = xts_encrypt, - .decrypt = xts_decrypt, - }, +static struct skcipher_alg tf_skciphers[] = { + { + .base.cra_name = "ecb(twofish)", + .base.cra_driver_name = "ecb-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, { + .base.cra_name = "cbc(twofish)", + .base.cra_driver_name = "cbc-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = TF_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, { + .base.cra_name = "ctr(twofish)", + .base.cra_driver_name = "ctr-twofish-3way", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct twofish_ctx), + .base.cra_module = THIS_MODULE, + .min_keysize = TF_MIN_KEY_SIZE, + .max_keysize = TF_MAX_KEY_SIZE, + .ivsize = TF_BLOCK_SIZE, + .chunksize = TF_BLOCK_SIZE, + .setkey = twofish_setkey_skcipher, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, }, -} }; +}; static bool is_blacklisted_cpu(void) { @@ -478,12 +272,13 @@ static int __init init(void) return -ENODEV; } - return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); + return crypto_register_skciphers(tf_skciphers, + ARRAY_SIZE(tf_skciphers)); } static void __exit fini(void) { - crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); + crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); } module_init(init); diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index dce7092..be63330 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -97,7 +97,7 @@ For 32-bit we have the following conventions - kernel is built with #define SIZEOF_PTREGS 21*8 -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 /* * Push registers and sanitize registers of values that a * speculation attack might otherwise want to exploit. The @@ -105,32 +105,41 @@ For 32-bit we have the following conventions - kernel is built with * could be put to use in a speculative execution gadget. * Interleave XOR with PUSH for better uop scheduling: */ + .if \save_ret + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ + movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ + .else pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ + .endif pushq \rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq \rax /* pt_regs->ax */ pushq %r8 /* pt_regs->r8 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq %r9 /* pt_regs->r9 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq %r10 /* pt_regs->r10 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq %r11 /* pt_regs->r11 */ - xorq %r11, %r11 /* nospec r11*/ + xorl %r11d, %r11d /* nospec r11*/ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx*/ pushq %rbp /* pt_regs->rbp */ xorl %ebp, %ebp /* nospec rbp*/ pushq %r12 /* pt_regs->r12 */ - xorq %r12, %r12 /* nospec r12*/ + xorl %r12d, %r12d /* nospec r12*/ pushq %r13 /* pt_regs->r13 */ - xorq %r13, %r13 /* nospec r13*/ + xorl %r13d, %r13d /* nospec r13*/ pushq %r14 /* pt_regs->r14 */ - xorq %r14, %r14 /* nospec r14*/ + xorl %r14d, %r14d /* nospec r14*/ pushq %r15 /* pt_regs->r15 */ - xorq %r15, %r15 /* nospec r15*/ + xorl %r15d, %r15d /* nospec r15*/ UNWIND_HINT_REGS + .if \save_ret + pushq %rsi /* return address on top of stack */ + .endif .endm .macro POP_REGS pop_rdi=1 skip_r11rcx=0 @@ -172,12 +181,7 @@ For 32-bit we have the following conventions - kernel is built with */ .macro ENCODE_FRAME_POINTER ptregs_offset=0 #ifdef CONFIG_FRAME_POINTER - .if \ptregs_offset - leaq \ptregs_offset(%rsp), %rbp - .else - mov %rsp, %rbp - .endif - orq $0x1, %rbp + leaq 1+\ptregs_offset(%rsp), %rbp #endif .endm diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 16c2c02..bef8e2b 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -252,8 +252,7 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - /* Clobbers %ebx */ - FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ @@ -903,6 +902,9 @@ BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, BUILD_INTERRUPT3(hyperv_reenlightenment_vector, HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_intr) +BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, + hv_stimer0_vector_handler) + #endif /* CONFIG_HYPERV */ ENTRY(page_fault) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8971bd6..b0a4649 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -55,7 +55,7 @@ END(native_usergs_sysret64) .macro TRACE_IRQS_FLAGS flags:req #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, \flags /* interrupts off? */ + btl $9, \flags /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: @@ -260,8 +260,13 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) * Change top bits to match most significant bit (47th or 56th bit * depending on paging mode) in the address. */ +#ifdef CONFIG_X86_5LEVEL + ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \ + "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57 +#else shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx +#endif /* If this changed %rcx, it was not canonical */ cmpq %rcx, %r11 @@ -364,8 +369,7 @@ ENTRY(__switch_to_asm) * exist, overwrite the RSB with entries which capture * speculative execution to prevent attack. */ - /* Clobbers %rbx */ - FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW #endif /* restore callee-saved registers */ @@ -449,9 +453,19 @@ END(irq_entries_start) * * The invariant is that, if irq_count != -1, then the IRQ stack is in use. */ -.macro ENTER_IRQ_STACK regs=1 old_rsp +.macro ENTER_IRQ_STACK regs=1 old_rsp save_ret=0 DEBUG_ENTRY_ASSERT_IRQS_OFF + + .if \save_ret + /* + * If save_ret is set, the original stack contains one additional + * entry -- the return address. Therefore, move the address one + * entry below %rsp to \old_rsp. + */ + leaq 8(%rsp), \old_rsp + .else movq %rsp, \old_rsp + .endif .if \regs UNWIND_HINT_REGS base=\old_rsp @@ -497,6 +511,15 @@ END(irq_entries_start) .if \regs UNWIND_HINT_REGS indirect=1 .endif + + .if \save_ret + /* + * Push the return address to the stack. This return address can + * be found at the "real" original RSP, which was offset by 8 at + * the beginning of this macro. + */ + pushq -8(\old_rsp) + .endif .endm /* @@ -520,27 +543,65 @@ END(irq_entries_start) .endm /* - * Interrupt entry/exit. - * - * Interrupt entry points save only callee clobbered registers in fast path. + * Interrupt entry helper function. * - * Entry runs with interrupts off. + * Entry runs with interrupts off. Stack layout at entry: + * +----------------------------------------------------+ + * | regs->ss | + * | regs->rsp | + * | regs->eflags | + * | regs->cs | + * | regs->ip | + * +----------------------------------------------------+ + * | regs->orig_ax = ~(interrupt number) | + * +----------------------------------------------------+ + * | return address | + * +----------------------------------------------------+ */ - -/* 0(%rsp): ~(interrupt number) */ - .macro interrupt func +ENTRY(interrupt_entry) + UNWIND_HINT_FUNC + ASM_CLAC cld - testb $3, CS-ORIG_RAX(%rsp) + testb $3, CS-ORIG_RAX+8(%rsp) jz 1f SWAPGS - call switch_to_thread_stack + + /* + * Switch to the thread stack. The IRET frame and orig_ax are + * on the stack, as well as the return address. RDI..R12 are + * not (yet) on the stack and space has not (yet) been + * allocated for them. + */ + pushq %rdi + + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + + /* + * We have RDI, return address, and orig_ax on the stack on + * top of the IRET frame. That means offset=24 + */ + UNWIND_HINT_IRET_REGS base=%rdi offset=24 + + pushq 7*8(%rdi) /* regs->ss */ + pushq 6*8(%rdi) /* regs->rsp */ + pushq 5*8(%rdi) /* regs->eflags */ + pushq 4*8(%rdi) /* regs->cs */ + pushq 3*8(%rdi) /* regs->ip */ + pushq 2*8(%rdi) /* regs->orig_ax */ + pushq 8(%rdi) /* return address */ + UNWIND_HINT_FUNC + + movq (%rdi), %rdi 1: - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 - testb $3, CS(%rsp) + testb $3, CS+8(%rsp) jz 1f /* @@ -548,7 +609,7 @@ END(irq_entries_start) * * We need to tell lockdep that IRQs are off. We can't do this until * we fix gsbase, and we should do it before enter_from_user_mode - * (which can take locks). Since TRACE_IRQS_OFF idempotent, + * (which can take locks). Since TRACE_IRQS_OFF is idempotent, * the simplest way to handle it is to just call it twice if * we enter from user mode. There's no reason to optimize this since * TRACE_IRQS_OFF is a no-op if lockdep is off. @@ -558,12 +619,15 @@ END(irq_entries_start) CALL_enter_from_user_mode 1: - ENTER_IRQ_STACK old_rsp=%rdi + ENTER_IRQ_STACK old_rsp=%rdi save_ret=1 /* We entered an interrupt context - irqs are off: */ TRACE_IRQS_OFF - call \func /* rdi points to pt_regs */ - .endm + ret +END(interrupt_entry) + + +/* Interrupt entry/exit. */ /* * The interrupt stubs push (~vector+0x80) onto the stack and @@ -571,9 +635,10 @@ END(irq_entries_start) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: - ASM_CLAC addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */ - interrupt do_IRQ + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call do_IRQ /* rdi points to pt_regs */ /* 0(%rsp): old RSP */ ret_from_intr: DISABLE_INTERRUPTS(CLBR_ANY) @@ -766,10 +831,11 @@ END(common_interrupt) .macro apicinterrupt3 num sym do_sym ENTRY(\sym) UNWIND_HINT_IRET_REGS - ASM_CLAC pushq $~(\num) .Lcommon_\sym: - interrupt \do_sym + call interrupt_entry + UNWIND_HINT_REGS indirect=1 + call \do_sym /* rdi points to pt_regs */ jmp ret_from_intr END(\sym) .endm @@ -832,34 +898,6 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt */ #define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8) -/* - * Switch to the thread stack. This is called with the IRET frame and - * orig_ax on the stack. (That is, RDI..R12 are not on the stack and - * space has not been allocated for them.) - */ -ENTRY(switch_to_thread_stack) - UNWIND_HINT_FUNC - - pushq %rdi - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi - movq %rsp, %rdi - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI - - pushq 7*8(%rdi) /* regs->ss */ - pushq 6*8(%rdi) /* regs->rsp */ - pushq 5*8(%rdi) /* regs->eflags */ - pushq 4*8(%rdi) /* regs->cs */ - pushq 3*8(%rdi) /* regs->ip */ - pushq 2*8(%rdi) /* regs->orig_ax */ - pushq 8(%rdi) /* return address */ - UNWIND_HINT_FUNC - - movq (%rdi), %rdi - ret -END(switch_to_thread_stack) - .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ENTRY(\sym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 @@ -875,12 +913,8 @@ ENTRY(\sym) pushq $-1 /* ORIG_RAX: no syscall to restart */ .endif - /* Save all registers in pt_regs */ - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER - .if \paranoid < 2 - testb $3, CS(%rsp) /* If coming from userspace, switch stacks */ + testb $3, CS-ORIG_RAX(%rsp) /* If coming from userspace, switch stacks */ jnz .Lfrom_usermode_switch_stack_\@ .endif @@ -1106,10 +1140,13 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \ hyperv_reenlightenment_vector hyperv_reenlightenment_intr + +apicinterrupt3 HYPERV_STIMER0_VECTOR \ + hv_stimer0_callback_vector hv_stimer0_vector_handler #endif /* CONFIG_HYPERV */ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK -idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK +idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN @@ -1130,13 +1167,15 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif /* - * Switch gs if needed. + * Save all registers in pt_regs, and switch gs if needed. * Use slow, but surefire "are we in kernel?" check. * Return: ebx=0: need swapgs on exit, ebx=1: otherwise */ ENTRY(paranoid_entry) UNWIND_HINT_FUNC cld + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 movl $1, %ebx movl $MSR_GS_BASE, %ecx rdmsr @@ -1181,12 +1220,14 @@ ENTRY(paranoid_exit) END(paranoid_exit) /* - * Switch gs if needed. + * Save all registers in pt_regs, and switch GS if needed. * Return: EBX=0: came from user mode; EBX=1: otherwise */ ENTRY(error_entry) - UNWIND_HINT_REGS offset=8 + UNWIND_HINT_FUNC cld + PUSH_AND_CLEAR_REGS save_ret=1 + ENCODE_FRAME_POINTER 8 testb $3, CS+8(%rsp) jz .Lerror_kernelspace @@ -1577,8 +1618,6 @@ end_repeat_nmi: * frame to point back to repeat_nmi. */ pushq $-1 /* ORIG_RAX: no syscall to restart */ - PUSH_AND_CLEAR_REGS - ENCODE_FRAME_POINTER /* * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index fd65e01..08425c4 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -85,25 +85,25 @@ ENTRY(entry_SYSENTER_compat) pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ cld /* @@ -224,25 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe) pushq %rbp /* pt_regs->cx (stashed in bp) */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp (will be overwritten) */ xorl %ebp, %ebp /* nospec rbp */ pushq $0 /* pt_regs->r12 = 0 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq $0 /* pt_regs->r13 = 0 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq $0 /* pt_regs->r14 = 0 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq $0 /* pt_regs->r15 = 0 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ /* * User mode is traced as though IRQs are on, and SYSENTER @@ -298,9 +298,9 @@ sysret32_from_system_call: */ SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9 - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d swapgs sysretl END(entry_SYSCALL_compat) @@ -347,36 +347,47 @@ ENTRY(entry_INT80_compat) */ movl %eax, %eax + /* switch to thread stack expects orig_ax and rdi to be pushed */ pushq %rax /* pt_regs->orig_ax */ + pushq %rdi /* pt_regs->di */ - /* switch to thread stack expects orig_ax to be pushed */ - call switch_to_thread_stack + /* Need to switch before accessing the thread stack. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - pushq %rdi /* pt_regs->di */ + pushq 6*8(%rdi) /* regs->ss */ + pushq 5*8(%rdi) /* regs->rsp */ + pushq 4*8(%rdi) /* regs->eflags */ + pushq 3*8(%rdi) /* regs->cs */ + pushq 2*8(%rdi) /* regs->ip */ + pushq 1*8(%rdi) /* regs->orig_ax */ + + pushq (%rdi) /* pt_regs->di */ pushq %rsi /* pt_regs->si */ pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ pushq $0 /* pt_regs->r8 = 0 */ - xorq %r8, %r8 /* nospec r8 */ + xorl %r8d, %r8d /* nospec r8 */ pushq $0 /* pt_regs->r9 = 0 */ - xorq %r9, %r9 /* nospec r9 */ + xorl %r9d, %r9d /* nospec r9 */ pushq $0 /* pt_regs->r10 = 0 */ - xorq %r10, %r10 /* nospec r10 */ + xorl %r10d, %r10d /* nospec r10 */ pushq $0 /* pt_regs->r11 = 0 */ - xorq %r11, %r11 /* nospec r11 */ + xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ pushq %rbp /* pt_regs->rbp */ xorl %ebp, %ebp /* nospec rbp */ pushq %r12 /* pt_regs->r12 */ - xorq %r12, %r12 /* nospec r12 */ + xorl %r12d, %r12d /* nospec r12 */ pushq %r13 /* pt_regs->r13 */ - xorq %r13, %r13 /* nospec r13 */ + xorl %r13d, %r13d /* nospec r13 */ pushq %r14 /* pt_regs->r14 */ - xorq %r14, %r14 /* nospec r14 */ + xorl %r14d, %r14d /* nospec r14 */ pushq %r15 /* pt_regs->r15 */ - xorq %r15, %r15 /* nospec r15 */ + xorl %r15d, %r15d /* nospec r15 */ cld /* @@ -393,15 +404,3 @@ ENTRY(entry_INT80_compat) TRACE_IRQS_ON jmp swapgs_restore_regs_and_return_to_usermode END(entry_INT80_compat) - -ENTRY(stub32_clone) - /* - * The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr). - * The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val). - * - * The native 64-bit kernel's sys_clone() implements the latter, - * so we need to swap arguments here before calling it: - */ - xchg %r8, %rcx - jmp sys_clone -ENDPROC(stub32_clone) diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 448ac21..c58f75b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -8,12 +8,12 @@ # 0 i386 restart_syscall sys_restart_syscall 1 i386 exit sys_exit -2 i386 fork sys_fork sys_fork +2 i386 fork sys_fork 3 i386 read sys_read 4 i386 write sys_write 5 i386 open sys_open compat_sys_open 6 i386 close sys_close -7 i386 waitpid sys_waitpid sys32_waitpid +7 i386 waitpid sys_waitpid 8 i386 creat sys_creat 9 i386 link sys_link 10 i386 unlink sys_unlink @@ -78,7 +78,7 @@ 69 i386 ssetmask sys_ssetmask 70 i386 setreuid sys_setreuid16 71 i386 setregid sys_setregid16 -72 i386 sigsuspend sys_sigsuspend sys_sigsuspend +72 i386 sigsuspend sys_sigsuspend 73 i386 sigpending sys_sigpending compat_sys_sigpending 74 i386 sethostname sys_sethostname 75 i386 setrlimit sys_setrlimit compat_sys_setrlimit @@ -96,7 +96,7 @@ 87 i386 swapon sys_swapon 88 i386 reboot sys_reboot 89 i386 readdir sys_old_readdir compat_sys_old_readdir -90 i386 mmap sys_old_mmap sys32_mmap +90 i386 mmap sys_old_mmap compat_sys_x86_mmap 91 i386 munmap sys_munmap 92 i386 truncate sys_truncate compat_sys_truncate 93 i386 ftruncate sys_ftruncate compat_sys_ftruncate @@ -126,7 +126,7 @@ 117 i386 ipc sys_ipc compat_sys_ipc 118 i386 fsync sys_fsync 119 i386 sigreturn sys_sigreturn sys32_sigreturn -120 i386 clone sys_clone stub32_clone +120 i386 clone sys_clone compat_sys_x86_clone 121 i386 setdomainname sys_setdomainname 122 i386 uname sys_newuname 123 i386 modify_ldt sys_modify_ldt @@ -137,7 +137,7 @@ 128 i386 init_module sys_init_module 129 i386 delete_module sys_delete_module 130 i386 get_kernel_syms -131 i386 quotactl sys_quotactl sys32_quotactl +131 i386 quotactl sys_quotactl compat_sys_quotactl32 132 i386 getpgid sys_getpgid 133 i386 fchdir sys_fchdir 134 i386 bdflush sys_bdflush @@ -186,8 +186,8 @@ 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 i386 rt_sigsuspend sys_rt_sigsuspend -180 i386 pread64 sys_pread64 sys32_pread -181 i386 pwrite64 sys_pwrite64 sys32_pwrite +180 i386 pread64 sys_pread64 compat_sys_x86_pread +181 i386 pwrite64 sys_pwrite64 compat_sys_x86_pwrite 182 i386 chown sys_chown16 183 i386 getcwd sys_getcwd 184 i386 capget sys_capget @@ -196,14 +196,14 @@ 187 i386 sendfile sys_sendfile compat_sys_sendfile 188 i386 getpmsg 189 i386 putpmsg -190 i386 vfork sys_vfork sys_vfork +190 i386 vfork sys_vfork 191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit 192 i386 mmap2 sys_mmap_pgoff -193 i386 truncate64 sys_truncate64 sys32_truncate64 -194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64 -195 i386 stat64 sys_stat64 sys32_stat64 -196 i386 lstat64 sys_lstat64 sys32_lstat64 -197 i386 fstat64 sys_fstat64 sys32_fstat64 +193 i386 truncate64 sys_truncate64 compat_sys_x86_truncate64 +194 i386 ftruncate64 sys_ftruncate64 compat_sys_x86_ftruncate64 +195 i386 stat64 sys_stat64 compat_sys_x86_stat64 +196 i386 lstat64 sys_lstat64 compat_sys_x86_lstat64 +197 i386 fstat64 sys_fstat64 compat_sys_x86_fstat64 198 i386 lchown32 sys_lchown 199 i386 getuid32 sys_getuid 200 i386 getgid32 sys_getgid @@ -231,7 +231,7 @@ # 222 is unused # 223 is unused 224 i386 gettid sys_gettid -225 i386 readahead sys_readahead sys32_readahead +225 i386 readahead sys_readahead compat_sys_x86_readahead 226 i386 setxattr sys_setxattr 227 i386 lsetxattr sys_lsetxattr 228 i386 fsetxattr sys_fsetxattr @@ -256,7 +256,7 @@ 247 i386 io_getevents sys_io_getevents compat_sys_io_getevents 248 i386 io_submit sys_io_submit compat_sys_io_submit 249 i386 io_cancel sys_io_cancel -250 i386 fadvise64 sys_fadvise64 sys32_fadvise64 +250 i386 fadvise64 sys_fadvise64 compat_sys_x86_fadvise64 # 251 is available for reuse (was briefly sys_set_zone_reclaim) 252 i386 exit_group sys_exit_group 253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie @@ -278,7 +278,7 @@ 269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 270 i386 tgkill sys_tgkill 271 i386 utimes sys_utimes compat_sys_utimes -272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64 +272 i386 fadvise64_64 sys_fadvise64_64 compat_sys_x86_fadvise64_64 273 i386 vserver 274 i386 mbind sys_mbind 275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy @@ -306,7 +306,7 @@ 297 i386 mknodat sys_mknodat 298 i386 fchownat sys_fchownat 299 i386 futimesat sys_futimesat compat_sys_futimesat -300 i386 fstatat64 sys_fstatat64 sys32_fstatat +300 i386 fstatat64 sys_fstatat64 compat_sys_x86_fstatat 301 i386 unlinkat sys_unlinkat 302 i386 renameat sys_renameat 303 i386 linkat sys_linkat @@ -320,7 +320,7 @@ 311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list 312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list 313 i386 splice sys_splice -314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range +314 i386 sync_file_range sys_sync_file_range compat_sys_x86_sync_file_range 315 i386 tee sys_tee 316 i386 vmsplice sys_vmsplice compat_sys_vmsplice 317 i386 move_pages sys_move_pages compat_sys_move_pages @@ -330,7 +330,7 @@ 321 i386 signalfd sys_signalfd compat_sys_signalfd 322 i386 timerfd_create sys_timerfd_create 323 i386 eventfd sys_eventfd -324 i386 fallocate sys_fallocate sys32_fallocate +324 i386 fallocate sys_fallocate compat_sys_x86_fallocate 325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime 326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime 327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4 diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c b/arch/x86/entry/vdso/vdso32/vclock_gettime.c index 7780bbf..9242b28 100644 --- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c @@ -5,8 +5,6 @@ #undef CONFIG_OPTIMIZE_INLINING #endif -#undef CONFIG_X86_PPRO_FENCE - #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 577fa8a..317be36 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -42,10 +42,8 @@ #define CREATE_TRACE_POINTS #include "vsyscall_trace.h" -static enum { EMULATE, NATIVE, NONE } vsyscall_mode = -#if defined(CONFIG_LEGACY_VSYSCALL_NATIVE) - NATIVE; -#elif defined(CONFIG_LEGACY_VSYSCALL_NONE) +static enum { EMULATE, NONE } vsyscall_mode = +#ifdef CONFIG_LEGACY_VSYSCALL_NONE NONE; #else EMULATE; @@ -56,8 +54,6 @@ static int __init vsyscall_setup(char *str) if (str) { if (!strcmp("emulate", str)) vsyscall_mode = EMULATE; - else if (!strcmp("native", str)) - vsyscall_mode = NATIVE; else if (!strcmp("none", str)) vsyscall_mode = NONE; else @@ -139,10 +135,6 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) WARN_ON_ONCE(address != regs->ip); - /* This should be unreachable in NATIVE mode. */ - if (WARN_ON(vsyscall_mode == NATIVE)) - return false; - if (vsyscall_mode == NONE) { warn_bad_vsyscall(KERN_INFO, regs, "vsyscall attempted with vsyscall=none"); @@ -355,7 +347,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root) set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); p4d = p4d_offset(pgd, VSYSCALL_ADDR); #if CONFIG_PGTABLE_LEVELS >= 5 - p4d->p4d |= _PAGE_USER; + set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER)); #endif pud = pud_offset(p4d, VSYSCALL_ADDR); set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER)); @@ -370,9 +362,7 @@ void __init map_vsyscall(void) if (vsyscall_mode != NONE) { __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, - vsyscall_mode == NATIVE - ? PAGE_KERNEL_VSYSCALL - : PAGE_KERNEL_VVAR); + PAGE_KERNEL_VVAR); set_vsyscall_pgtable_user_bits(swapper_pg_dir); } diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 140d332..a6006e7 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -48,7 +48,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE; +DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key); u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -990,7 +990,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, if (!dogrp) return n; - list_for_each_entry(event, &leader->sibling_list, group_entry) { + for_each_sibling_event(event, leader) { if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF) continue; @@ -1156,16 +1156,13 @@ int x86_perf_event_set_period(struct perf_event *event) per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; - if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) || - local64_read(&hwc->prev_count) != (u64)-left) { - /* - * The hw event starts counting from this event offset, - * mark it to be able to extra future deltas: - */ - local64_set(&hwc->prev_count, (u64)-left); + /* + * The hw event starts counting from this event offset, + * mark it to be able to extra future deltas: + */ + local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); - } + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* * Due to erratum on certan cpu we need @@ -1884,6 +1881,8 @@ early_initcall(init_hw_perf_events); static inline void x86_pmu_read(struct perf_event *event) { + if (x86_pmu.read) + return x86_pmu.read(event); x86_perf_event_update(event); } @@ -2118,7 +2117,8 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } - if (READ_ONCE(x86_pmu.attr_rdpmc)) + if (READ_ONCE(x86_pmu.attr_rdpmc) && + !(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; @@ -2206,9 +2206,9 @@ static ssize_t set_attr_rdpmc(struct device *cdev, * but only root can trigger it, so it's okay. */ if (val == 2) - static_key_slow_inc(&rdpmc_always_available); + static_branch_inc(&rdpmc_always_available_key); else - static_key_slow_dec(&rdpmc_always_available); + static_branch_dec(&rdpmc_always_available_key); on_each_cpu(refresh_pce, NULL, 1); } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 56457cb..607bf56 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2060,6 +2060,14 @@ static void intel_pmu_del_event(struct perf_event *event) intel_pmu_pebs_del(event); } +static void intel_pmu_read_event(struct perf_event *event) +{ + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_auto_reload_read(event); + else + x86_perf_event_update(event); +} + static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - INTEL_PMC_IDX_FIXED; @@ -2201,16 +2209,23 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) int bit, loops; u64 status; int handled; + int pmu_enabled; cpuc = this_cpu_ptr(&cpu_hw_events); /* + * Save the PMU state. + * It needs to be restored when leaving the handler. + */ + pmu_enabled = cpuc->enabled; + /* * No known reason to not always do late ACK, * but just in case do it opt-in. */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); + cpuc->enabled = 0; __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2320,7 +2335,8 @@ again: done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ - if (cpuc->enabled) + cpuc->enabled = pmu_enabled; + if (pmu_enabled) __intel_pmu_enable_all(0, true); intel_bts_enable_local(); @@ -2952,9 +2968,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event) return intel_pebs_aliases_precdist(event); } -static unsigned long intel_pmu_free_running_flags(struct perf_event *event) +static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) { - unsigned long flags = x86_pmu.free_running_flags; + unsigned long flags = x86_pmu.large_pebs_flags; if (event->attr.use_clockid) flags &= ~PERF_SAMPLE_TIME; @@ -2976,8 +2992,8 @@ static int intel_pmu_hw_config(struct perf_event *event) if (!event->attr.freq) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & - ~intel_pmu_free_running_flags(event))) - event->hw.flags |= PERF_X86_EVENT_FREERUNNING; + ~intel_pmu_large_pebs_flags(event))) + event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); @@ -3188,13 +3204,13 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, * Therefore the effective (average) period matches the requested period, * despite coarser hardware granularity. */ -static unsigned bdw_limit_period(struct perf_event *event, unsigned left) +static u64 bdw_limit_period(struct perf_event *event, u64 left) { if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xc0, .umask=0x01)) { if (left < 128) left = 128; - left &= ~0x3fu; + left &= ~0x3fULL; } return left; } @@ -3460,7 +3476,7 @@ static __initconst const struct x86_pmu core_pmu = { .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, - .free_running_flags = PEBS_FREERUNNING_FLAGS, + .large_pebs_flags = LARGE_PEBS_FLAGS, /* * Intel PMCs cannot be accessed sanely above 32-bit width, @@ -3495,6 +3511,7 @@ static __initconst const struct x86_pmu intel_pmu = { .disable = intel_pmu_disable_event, .add = intel_pmu_add_event, .del = intel_pmu_del_event, + .read = intel_pmu_read_event, .hw_config = intel_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, @@ -3502,7 +3519,7 @@ static __initconst const struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, - .free_running_flags = PEBS_FREERUNNING_FLAGS, + .large_pebs_flags = LARGE_PEBS_FLAGS, /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 72db066..9aca448 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,50 +40,51 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM + * Available model: SLM,AMT,GLM,CNL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM, + CNL * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL,GLM + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, + * SKL,KNL,GLM,CNL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 - * Available model: SNB,IVB,HSW,BDW,SKL + * Available model: SNB,IVB,HSW,BDW,SKL,CNL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM + * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL - * GLM + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, + * GLM,CNL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW - * SKL,KNL,GLM + * SKL,KNL,GLM,CNL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 - * Available model: HSW ULT only + * Available model: HSW ULT,CNL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 - * Available model: HSW ULT only + * Available model: HSW ULT,CNL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 - * Available model: HSW ULT, GLM + * Available model: HSW ULT,GLM,CNL * Scope: Package (physical package) * */ @@ -486,6 +487,21 @@ static const struct cstate_model hswult_cstates __initconst = { BIT(PERF_CSTATE_PKG_C10_RES), }; +static const struct cstate_model cnl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + static const struct cstate_model slm_cstates __initconst = { .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | BIT(PERF_CSTATE_CORE_C6_RES), @@ -557,6 +573,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_MOBILE, cnl_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 18c25ab..da67801 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -935,7 +935,7 @@ void intel_pmu_pebs_add(struct perf_event *event) bool needed_cb = pebs_needs_sched_cb(cpuc); cpuc->n_pebs++; - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs++; pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -975,7 +975,7 @@ void intel_pmu_pebs_del(struct perf_event *event) bool needed_cb = pebs_needs_sched_cb(cpuc); cpuc->n_pebs--; - if (hwc->flags & PERF_X86_EVENT_FREERUNNING) + if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS) cpuc->n_large_pebs--; pebs_update_state(needed_cb, cpuc, event->ctx->pmu); @@ -1153,6 +1153,7 @@ static void setup_pebs_sample_data(struct perf_event *event, if (pebs == NULL) return; + regs->flags &= ~PERF_EFLAGS_EXACT; sample_type = event->attr.sample_type; dsrc = sample_type & PERF_SAMPLE_DATA_SRC; @@ -1197,7 +1198,6 @@ static void setup_pebs_sample_data(struct perf_event *event, */ *regs = *iregs; regs->flags = pebs->flags; - set_linear_ip(regs, pebs->ip); if (sample_type & PERF_SAMPLE_REGS_INTR) { regs->ax = pebs->ax; @@ -1233,13 +1233,22 @@ static void setup_pebs_sample_data(struct perf_event *event, #endif } - if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { - regs->ip = pebs->real_ip; - regs->flags |= PERF_EFLAGS_EXACT; - } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs)) - regs->flags |= PERF_EFLAGS_EXACT; - else - regs->flags &= ~PERF_EFLAGS_EXACT; + if (event->attr.precise_ip > 1) { + /* Haswell and later have the eventing IP, so use it: */ + if (x86_pmu.intel_cap.pebs_format >= 2) { + set_linear_ip(regs, pebs->real_ip); + regs->flags |= PERF_EFLAGS_EXACT; + } else { + /* Otherwise use PEBS off-by-1 IP: */ + set_linear_ip(regs, pebs->ip); + + /* ... and try to fix it up using the LBR entries: */ + if (intel_pmu_pebs_fixup_ip(regs)) + regs->flags |= PERF_EFLAGS_EXACT; + } + } else + set_linear_ip(regs, pebs->ip); + if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) && x86_pmu.intel_cap.pebs_format >= 1) @@ -1306,17 +1315,93 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) return NULL; } +void intel_pmu_auto_reload_read(struct perf_event *event) +{ + WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)); + + perf_pmu_disable(event->pmu); + intel_pmu_drain_pebs_buffer(); + perf_pmu_enable(event->pmu); +} + +/* + * Special variant of intel_pmu_save_and_restart() for auto-reload. + */ +static int +intel_pmu_save_and_restart_reload(struct perf_event *event, int count) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - x86_pmu.cntval_bits; + u64 period = hwc->sample_period; + u64 prev_raw_count, new_raw_count; + s64 new, old; + + WARN_ON(!period); + + /* + * drain_pebs() only happens when the PMU is disabled. + */ + WARN_ON(this_cpu_read(cpu_hw_events.enabled)); + + prev_raw_count = local64_read(&hwc->prev_count); + rdpmcl(hwc->event_base_rdpmc, new_raw_count); + local64_set(&hwc->prev_count, new_raw_count); + + /* + * Since the counter increments a negative counter value and + * overflows on the sign switch, giving the interval: + * + * [-period, 0] + * + * the difference between two consequtive reads is: + * + * A) value2 - value1; + * when no overflows have happened in between, + * + * B) (0 - value1) + (value2 - (-period)); + * when one overflow happened in between, + * + * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period)); + * when @n overflows happened in between. + * + * Here A) is the obvious difference, B) is the extension to the + * discrete interval, where the first term is to the top of the + * interval and the second term is from the bottom of the next + * interval and C) the extension to multiple intervals, where the + * middle term is the whole intervals covered. + * + * An equivalent of C, by reduction, is: + * + * value2 - value1 + n * period + */ + new = ((s64)(new_raw_count << shift) >> shift); + old = ((s64)(prev_raw_count << shift) >> shift); + local64_add(new - old + count * period, &event->count); + + perf_event_update_userpage(event); + + return 0; +} + static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *base, void *top, int bit, int count) { + struct hw_perf_event *hwc = &event->hw; struct perf_sample_data data; struct pt_regs regs; void *at = get_next_pebs_record_by_bit(base, top, bit); - if (!intel_pmu_save_and_restart(event) && - !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)) + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { + /* + * Now, auto-reload is only enabled in fixed period mode. + * The reload value is always hwc->sample_period. + * May need to change it, if auto-reload is enabled in + * freq mode later. + */ + intel_pmu_save_and_restart_reload(event, count); + } else if (!intel_pmu_save_and_restart(event)) return; while (count > 1) { @@ -1368,8 +1453,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; n = top - at; - if (n <= 0) + if (n <= 0) { + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); return; + } __intel_pmu_pebs_event(event, iregs, at, top, 0, n); } @@ -1392,8 +1480,22 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ds->pebs_index = ds->pebs_buffer_base; - if (unlikely(base >= top)) + if (unlikely(base >= top)) { + /* + * The drain_pebs() could be called twice in a short period + * for auto-reload event in pmu::read(). There are no + * overflows have happened in between. + * It needs to call intel_pmu_save_and_restart_reload() to + * update the event->count for this case. + */ + for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, + x86_pmu.max_pebs_events) { + event = cpuc->events[bit]; + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); + } return; + } for (at = base; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; @@ -1530,7 +1632,7 @@ void __init intel_ds_init(void) x86_pmu.pebs_record_size = sizeof(struct pebs_record_skl); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; - x86_pmu.free_running_flags |= PERF_SAMPLE_TIME; + x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME; break; default: diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 81fd41d..3b99394 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1186,8 +1186,12 @@ static int pt_event_addr_filters_validate(struct list_head *filters) int range = 0; list_for_each_entry(filter, filters, entry) { - /* PT doesn't support single address triggers */ - if (!filter->range || !filter->size) + /* + * PT doesn't support single address triggers and + * 'start' filters. + */ + if (!filter->size || + filter->action == PERF_ADDR_FILTER_ACTION_START) return -EOPNOTSUPP; if (!filter->inode) { @@ -1227,7 +1231,10 @@ static void pt_event_addr_filters_sync(struct perf_event *event) filters->filter[range].msr_a = msr_a; filters->filter[range].msr_b = msr_b; - filters->filter[range].config = filter->filter ? 1 : 2; + if (filter->action == PERF_ADDR_FILTER_ACTION_FILTER) + filters->filter[range].config = 1; + else + filters->filter[range].config = 2; range++; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index a2efb49..32f3e94 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -774,6 +774,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_CANNONLAKE_MOBILE, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init), diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 7874c98..a7956fc 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -354,7 +354,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, if (!dogrp) return n; - list_for_each_entry(event, &leader->sibling_list, group_entry) { + for_each_sibling_event(event, leader) { if (!is_box_event(box, event) || event->state <= PERF_EVENT_STATE_OFF) continue; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 6d8044a..c98b943 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3343,6 +3343,7 @@ static struct extra_reg skx_uncore_cha_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x9134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x35, 0xff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x36, 0xff, 0x8), + SNBEP_CBO_EVENT_EXTRA_REG(0x38, 0xff, 0x3), EVENT_EXTRA_END }; @@ -3562,24 +3563,27 @@ static struct intel_uncore_type *skx_msr_uncores[] = { NULL, }; +/* + * To determine the number of CHAs, it should read bits 27:0 in the CAPID6 + * register which located at Device 30, Function 3, Offset 0x9C. PCI ID 0x2083. + */ +#define SKX_CAPID6 0x9c +#define SKX_CHA_BIT_MASK GENMASK(27, 0) + static int skx_count_chabox(void) { - struct pci_dev *chabox_dev = NULL; - int bus, count = 0; + struct pci_dev *dev = NULL; + u32 val = 0; - while (1) { - chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev); - if (!chabox_dev) - break; - if (count == 0) - bus = chabox_dev->bus->number; - if (bus != chabox_dev->bus->number) - break; - count++; - } + dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2083, dev); + if (!dev) + goto out; - pci_dev_put(chabox_dev); - return count; + pci_read_config_dword(dev, SKX_CAPID6, &val); + val &= SKX_CHA_BIT_MASK; +out: + pci_dev_put(dev); + return hweight32(val); } void skx_uncore_cpu_init(void) @@ -3606,7 +3610,7 @@ static struct intel_uncore_type skx_uncore_imc = { }; static struct attribute *skx_upi_uncore_formats_attr[] = { - &format_attr_event_ext.attr, + &format_attr_event.attr, &format_attr_umask_ext.attr, &format_attr_edge.attr, &format_attr_inv.attr, diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 18e2628..e7edf19 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -188,10 +188,11 @@ static inline u64 msr_read_counter(struct perf_event *event) if (event->hw.event_base) rdmsrl(event->hw.event_base, now); else - rdtscll(now); + now = rdtsc_ordered(); return now; } + static void msr_event_update(struct perf_event *event) { u64 prev, now; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 78f91ec..9f37114 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -69,7 +69,7 @@ struct event_constraint { #define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */ #define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */ #define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */ -#define PERF_X86_EVENT_FREERUNNING 0x0800 /* use freerunning PEBS */ +#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */ struct amd_nb { @@ -88,7 +88,7 @@ struct amd_nb { * REGS_USER can be handled for events limited to ring 3. * */ -#define PEBS_FREERUNNING_FLAGS \ +#define LARGE_PEBS_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ @@ -520,6 +520,7 @@ struct x86_pmu { void (*disable)(struct perf_event *); void (*add)(struct perf_event *); void (*del)(struct perf_event *); + void (*read)(struct perf_event *event); int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; @@ -557,7 +558,7 @@ struct x86_pmu { struct x86_pmu_quirk *quirks; int perfctr_second_write; bool late_ack; - unsigned (*limit_period)(struct perf_event *event, unsigned l); + u64 (*limit_period)(struct perf_event *event, u64 l); /* * sysfs attrs @@ -608,7 +609,7 @@ struct x86_pmu { struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); int max_pebs_events; - unsigned long free_running_flags; + unsigned long large_pebs_flags; /* * Intel LBR @@ -923,6 +924,8 @@ void intel_pmu_pebs_disable_all(void); void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in); +void intel_pmu_auto_reload_read(struct perf_event *event); + void intel_ds_init(void); void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2edc49e..cfecc22 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -21,7 +21,7 @@ #include <asm/apic.h> #include <asm/desc.h> #include <asm/hypervisor.h> -#include <asm/hyperv.h> +#include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #include <linux/version.h> #include <linux/vmalloc.h> @@ -88,11 +88,15 @@ EXPORT_SYMBOL_GPL(hyperv_cs); u32 *hv_vp_index; EXPORT_SYMBOL_GPL(hv_vp_index); +struct hv_vp_assist_page **hv_vp_assist_page; +EXPORT_SYMBOL_GPL(hv_vp_assist_page); + u32 hv_max_vp_index; static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; + struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; hv_get_vp_index(msr_vp_index); @@ -101,6 +105,22 @@ static int hv_cpu_init(unsigned int cpu) if (msr_vp_index > hv_max_vp_index) hv_max_vp_index = msr_vp_index; + if (!hv_vp_assist_page) + return 0; + + if (!*hvp) + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + + if (*hvp) { + u64 val; + + val = vmalloc_to_pfn(*hvp); + val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val); + } + return 0; } @@ -198,6 +218,9 @@ static int hv_cpu_die(unsigned int cpu) struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; + if (hv_vp_assist_page && hv_vp_assist_page[cpu]) + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); + if (hv_reenlightenment_cb == NULL) return 0; @@ -224,6 +247,7 @@ void hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; + int cpuhp; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -241,9 +265,17 @@ void hyperv_init(void) if (!hv_vp_index) return; - if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", - hv_cpu_init, hv_cpu_die) < 0) + hv_vp_assist_page = kcalloc(num_possible_cpus(), + sizeof(*hv_vp_assist_page), GFP_KERNEL); + if (!hv_vp_assist_page) { + ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; goto free_vp_index; + } + + cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", + hv_cpu_init, hv_cpu_die); + if (cpuhp < 0) + goto free_vp_assist_page; /* * Setup the hypercall page and enable hypercalls. @@ -256,7 +288,7 @@ void hyperv_init(void) hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - goto free_vp_index; + goto remove_cpuhp_state; } rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); @@ -304,6 +336,11 @@ register_msr_cs: return; +remove_cpuhp_state: + cpuhp_remove_state(cpuhp); +free_vp_assist_page: + kfree(hv_vp_assist_page); + hv_vp_assist_page = NULL; free_vp_index: kfree(hv_vp_index); hv_vp_index = NULL; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 41c6718..86b1341 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -33,7 +33,6 @@ #include <asm/vdso.h> #include <asm/sigframe.h> #include <asm/sighandling.h> -#include <asm/sys_ia32.h> #include <asm/smap.h> /* diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 96cd33b..11ef7b7 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -41,27 +41,28 @@ #include <linux/highuid.h> #include <linux/sysctl.h> #include <linux/slab.h> +#include <linux/sched/task.h> #include <asm/mman.h> #include <asm/types.h> #include <linux/uaccess.h> #include <linux/atomic.h> #include <asm/vgtod.h> -#include <asm/sys_ia32.h> +#include <asm/ia32.h> #define AA(__x) ((unsigned long)(__x)) -asmlinkage long sys32_truncate64(const char __user *filename, - unsigned long offset_low, - unsigned long offset_high) +COMPAT_SYSCALL_DEFINE3(x86_truncate64, const char __user *, filename, + unsigned long, offset_low, unsigned long, offset_high) { - return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low); + return ksys_truncate(filename, + ((loff_t) offset_high << 32) | offset_low); } -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low, - unsigned long offset_high) +COMPAT_SYSCALL_DEFINE3(x86_ftruncate64, unsigned int, fd, + unsigned long, offset_low, unsigned long, offset_high) { - return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); + return ksys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low); } /* @@ -96,8 +97,8 @@ static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat) return 0; } -asmlinkage long sys32_stat64(const char __user *filename, - struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_stat64, const char __user *, filename, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); @@ -107,8 +108,8 @@ asmlinkage long sys32_stat64(const char __user *filename, return ret; } -asmlinkage long sys32_lstat64(const char __user *filename, - struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_lstat64, const char __user *, filename, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -117,7 +118,8 @@ asmlinkage long sys32_lstat64(const char __user *filename, return ret; } -asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(x86_fstat64, unsigned int, fd, + struct stat64 __user *, statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -126,8 +128,9 @@ asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf) return ret; } -asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename, - struct stat64 __user *statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(x86_fstatat, unsigned int, dfd, + const char __user *, filename, + struct stat64 __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -153,7 +156,7 @@ struct mmap_arg_struct32 { unsigned int offset; }; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) +COMPAT_SYSCALL_DEFINE1(x86_mmap, struct mmap_arg_struct32 __user *, arg) { struct mmap_arg_struct32 a; @@ -163,29 +166,23 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg) if (a.offset & ~PAGE_MASK) return -EINVAL; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset>>PAGE_SHIFT); } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, - int options) -{ - return compat_sys_wait4(pid, stat_addr, options, NULL); -} - /* warning: next two assume little endian */ -asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, - u32 poslo, u32 poshi) +COMPAT_SYSCALL_DEFINE5(x86_pread, unsigned int, fd, char __user *, ubuf, + u32, count, u32, poslo, u32, poshi) { - return sys_pread64(fd, ubuf, count, - ((loff_t)AA(poshi) << 32) | AA(poslo)); + return ksys_pread64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); } -asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, - u32 count, u32 poslo, u32 poshi) +COMPAT_SYSCALL_DEFINE5(x86_pwrite, unsigned int, fd, const char __user *, ubuf, + u32, count, u32, poslo, u32, poshi) { - return sys_pwrite64(fd, ubuf, count, - ((loff_t)AA(poshi) << 32) | AA(poslo)); + return ksys_pwrite64(fd, ubuf, count, + ((loff_t)AA(poshi) << 32) | AA(poslo)); } @@ -193,40 +190,53 @@ asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf, * Some system calls that need sign extended arguments. This could be * done by a generic wrapper. */ -long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, - __u32 len_low, __u32 len_high, int advice) +COMPAT_SYSCALL_DEFINE6(x86_fadvise64_64, int, fd, __u32, offset_low, + __u32, offset_high, __u32, len_low, __u32, len_high, + int, advice) +{ + return ksys_fadvise64_64(fd, + (((u64)offset_high)<<32) | offset_low, + (((u64)len_high)<<32) | len_low, + advice); +} + +COMPAT_SYSCALL_DEFINE4(x86_readahead, int, fd, unsigned int, off_lo, + unsigned int, off_hi, size_t, count) { - return sys_fadvise64_64(fd, - (((u64)offset_high)<<32) | offset_low, - (((u64)len_high)<<32) | len_low, - advice); + return ksys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); } -asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi, - size_t count) +COMPAT_SYSCALL_DEFINE6(x86_sync_file_range, int, fd, unsigned int, off_low, + unsigned int, off_hi, unsigned int, n_low, + unsigned int, n_hi, int, flags) { - return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count); + return ksys_sync_file_range(fd, + ((u64)off_hi << 32) | off_low, + ((u64)n_hi << 32) | n_low, flags); } -asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi, - unsigned n_low, unsigned n_hi, int flags) +COMPAT_SYSCALL_DEFINE5(x86_fadvise64, int, fd, unsigned int, offset_lo, + unsigned int, offset_hi, size_t, len, int, advice) { - return sys_sync_file_range(fd, - ((u64)off_hi << 32) | off_low, - ((u64)n_hi << 32) | n_low, flags); + return ksys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, + len, advice); } -asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi, - size_t len, int advice) +COMPAT_SYSCALL_DEFINE6(x86_fallocate, int, fd, int, mode, + unsigned int, offset_lo, unsigned int, offset_hi, + unsigned int, len_lo, unsigned int, len_hi) { - return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo, - len, advice); + return ksys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, + ((u64)len_hi << 32) | len_lo); } -asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo, - unsigned offset_hi, unsigned len_lo, - unsigned len_hi) +/* + * The 32-bit clone ABI is CONFIG_CLONE_BACKWARDS + */ +COMPAT_SYSCALL_DEFINE5(x86_clone, unsigned long, clone_flags, + unsigned long, newsp, int __user *, parent_tidptr, + unsigned long, tls_val, int __user *, child_tidptr) { - return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo, - ((u64)len_hi << 32) | len_lo); + return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, + tls_val); } diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 1188172..a303d7b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -31,6 +31,7 @@ #include <asm/mmu.h> #include <asm/mpspec.h> #include <asm/realmode.h> +#include <asm/x86_init.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -133,6 +134,14 @@ static inline bool acpi_has_cpu_in_madt(void) return !!acpi_lapic; } +#define ACPI_HAVE_ARCH_GET_ROOT_POINTER +static inline u64 acpi_arch_get_root_pointer(void) +{ + return x86_init.acpi.get_root_pointer(); +} + +void acpi_generic_reduced_hw_init(void); + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 @@ -142,6 +151,8 @@ static inline void acpi_noirq_set(void) { } static inline void acpi_disable_pci(void) { } static inline void disable_acpi(void) { } +static inline void acpi_generic_reduced_hw_init(void) { } + #endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index cf5961c..4cd6a3b 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -218,13 +218,11 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ output, input...) \ -{ \ asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ "call %P[new2]", feature2) \ : output, ASM_CALL_CONSTRAINT \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ - [new2] "i" (newfunc2), ## input); \ -} + [new2] "i" (newfunc2), ## input) /* * use this macro(s) if you need more than one output parameter diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 9872277..40a3d36 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -138,7 +138,6 @@ extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); extern void apic_intr_mode_init(void); -extern void setup_local_APIC(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); @@ -183,6 +182,7 @@ static inline void disable_local_APIC(void) { } # define setup_boot_APIC_clock x86_init_noop # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } +static inline void init_bsp_APIC(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } @@ -304,12 +304,6 @@ struct apic { u32 irq_delivery_mode; u32 irq_dest_mode; - /* Functions and data related to vector allocation */ - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, - const struct cpumask *mask); - int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); u32 (*calc_dest_apicid)(unsigned int cpu); /* ICR related functions */ @@ -499,17 +493,7 @@ extern void default_setup_apic_routing(void); extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); -extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); -extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); extern bool default_check_apicid_used(physid_mask_t *map, int apicid); -extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask); -extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask); extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 4d4015d..c356098 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -7,6 +7,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_APM_H #define _ASM_X86_MACH_DEFAULT_APM_H +#include <asm/nospec-branch.h> + #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ "pushl %%ds\n\t" \ @@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, "=S" (*esi) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); } static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, @@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, * N.B. We do NOT need a cld after the BIOS call * because we always save and restore the flags. */ + firmware_restrict_branch_speculation_start(); __asm__ __volatile__(APM_DO_ZERO_SEGS "pushl %%edi\n\t" "pushl %%ebp\n\t" @@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, "=S" (si) : "a" (func), "b" (ebx_in), "c" (ecx_in) : "memory", "cc"); + firmware_restrict_branch_speculation_end(); return error; } diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 4d11161..1908214 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -38,7 +38,4 @@ INDIRECT_THUNK(dx) INDIRECT_THUNK(si) INDIRECT_THUNK(di) INDIRECT_THUNK(bp) -asmlinkage void __fill_rsb(void); -asmlinkage void __clear_rsb(void); - #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 72759f1..0db6bec 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -17,36 +17,40 @@ #define ATOMIC_INIT(i) { (i) } /** - * atomic_read - read atomic variable + * arch_atomic_read - read atomic variable * @v: pointer of type atomic_t * * Atomically reads the value of @v. */ -static __always_inline int atomic_read(const atomic_t *v) +static __always_inline int arch_atomic_read(const atomic_t *v) { + /* + * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, + * it's non-inlined function that increases binary size and stack usage. + */ return READ_ONCE((v)->counter); } /** - * atomic_set - set atomic variable + * arch_atomic_set - set atomic variable * @v: pointer of type atomic_t * @i: required value * * Atomically sets the value of @v to @i. */ -static __always_inline void atomic_set(atomic_t *v, int i) +static __always_inline void arch_atomic_set(atomic_t *v, int i) { WRITE_ONCE(v->counter, i); } /** - * atomic_add - add integer to atomic variable + * arch_atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v. */ -static __always_inline void atomic_add(int i, atomic_t *v) +static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -54,13 +58,13 @@ static __always_inline void atomic_add(int i, atomic_t *v) } /** - * atomic_sub - subtract integer from atomic variable + * arch_atomic_sub - subtract integer from atomic variable * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v. */ -static __always_inline void atomic_sub(int i, atomic_t *v) +static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -68,7 +72,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) } /** - * atomic_sub_and_test - subtract value from variable and test result + * arch_atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic_t * @@ -76,63 +80,63 @@ static __always_inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) +static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); } /** - * atomic_inc - increment atomic variable + * arch_atomic_inc - increment atomic variable * @v: pointer of type atomic_t * * Atomically increments @v by 1. */ -static __always_inline void atomic_inc(atomic_t *v) +static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); } /** - * atomic_dec - decrement atomic variable + * arch_atomic_dec - decrement atomic variable * @v: pointer of type atomic_t * * Atomically decrements @v by 1. */ -static __always_inline void atomic_dec(atomic_t *v) +static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); } /** - * atomic_dec_and_test - decrement and test + * arch_atomic_dec_and_test - decrement and test * @v: pointer of type atomic_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -static __always_inline bool atomic_dec_and_test(atomic_t *v) +static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); } /** - * atomic_inc_and_test - increment and test + * arch_atomic_inc_and_test - increment and test * @v: pointer of type atomic_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ -static __always_inline bool atomic_inc_and_test(atomic_t *v) +static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); } /** - * atomic_add_negative - add and test if negative + * arch_atomic_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * @@ -140,65 +144,65 @@ static __always_inline bool atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static __always_inline bool atomic_add_negative(int i, atomic_t *v) +static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); } /** - * atomic_add_return - add integer and return + * arch_atomic_add_return - add integer and return * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns @i + @v */ -static __always_inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } /** - * atomic_sub_return - subtract integer and return + * arch_atomic_sub_return - subtract integer and return * @v: pointer of type atomic_t * @i: integer value to subtract * * Atomically subtracts @i from @v and returns @v - @i */ -static __always_inline int atomic_sub_return(int i, atomic_t *v) +static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { - return atomic_add_return(-i, v); + return arch_atomic_add_return(-i, v); } -#define atomic_inc_return(v) (atomic_add_return(1, v)) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) +#define arch_atomic_inc_return(v) (arch_atomic_add_return(1, v)) +#define arch_atomic_dec_return(v) (arch_atomic_sub_return(1, v)) -static __always_inline int atomic_fetch_add(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } -static __always_inline int atomic_fetch_sub(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { return xadd(&v->counter, -i); } -static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { - return cmpxchg(&v->counter, old, new); + return arch_cmpxchg(&v->counter, old, new); } -#define atomic_try_cmpxchg atomic_try_cmpxchg -static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) +#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg +static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { return try_cmpxchg(&v->counter, old, new); } -static inline int atomic_xchg(atomic_t *v, int new) +static inline int arch_atomic_xchg(atomic_t *v, int new) { return xchg(&v->counter, new); } -static inline void atomic_and(int i, atomic_t *v) +static inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) @@ -206,16 +210,16 @@ static inline void atomic_and(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_and(int i, atomic_t *v) +static inline int arch_atomic_fetch_and(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val & i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i)); return val; } -static inline void atomic_or(int i, atomic_t *v) +static inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) @@ -223,16 +227,16 @@ static inline void atomic_or(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_or(int i, atomic_t *v) +static inline int arch_atomic_fetch_or(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val | i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i)); return val; } -static inline void atomic_xor(int i, atomic_t *v) +static inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) @@ -240,17 +244,17 @@ static inline void atomic_xor(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_xor(int i, atomic_t *v) +static inline int arch_atomic_fetch_xor(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val ^ i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i)); return val; } /** - * __atomic_add_unless - add unless the number is already a given value + * __arch_atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t * @a: the amount to add to v... * @u: ...unless v is equal to u. @@ -258,14 +262,14 @@ static inline int atomic_fetch_xor(int i, atomic_t *v) * Atomically adds @a to @v, so long as @v was not already @u. * Returns the old value of @v. */ -static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) +static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u) { - int c = atomic_read(v); + int c = arch_atomic_read(v); do { if (unlikely(c == u)) break; - } while (!atomic_try_cmpxchg(v, &c, c + a)); + } while (!arch_atomic_try_cmpxchg(v, &c, c + a)); return c; } @@ -276,4 +280,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) # include <asm/atomic64_64.h> #endif +#include <asm-generic/atomic-instrumented.h> + #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 97c46b8..92212bf 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -62,7 +62,7 @@ ATOMIC64_DECL(add_unless); #undef ATOMIC64_EXPORT /** - * atomic64_cmpxchg - cmpxchg atomic64 variable + * arch_atomic64_cmpxchg - cmpxchg atomic64 variable * @v: pointer to type atomic64_t * @o: expected value * @n: new value @@ -71,20 +71,21 @@ ATOMIC64_DECL(add_unless); * the old value. */ -static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) +static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, + long long n) { - return cmpxchg64(&v->counter, o, n); + return arch_cmpxchg64(&v->counter, o, n); } /** - * atomic64_xchg - xchg atomic64 variable + * arch_atomic64_xchg - xchg atomic64 variable * @v: pointer to type atomic64_t * @n: value to assign * * Atomically xchgs the value of @v to @n and returns * the old value. */ -static inline long long atomic64_xchg(atomic64_t *v, long long n) +static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) { long long o; unsigned high = (unsigned)(n >> 32); @@ -96,13 +97,13 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n) } /** - * atomic64_set - set atomic64 variable + * arch_atomic64_set - set atomic64 variable * @v: pointer to type atomic64_t * @i: value to assign * * Atomically sets the value of @v to @n. */ -static inline void atomic64_set(atomic64_t *v, long long i) +static inline void arch_atomic64_set(atomic64_t *v, long long i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; @@ -112,26 +113,26 @@ static inline void atomic64_set(atomic64_t *v, long long i) } /** - * atomic64_read - read atomic64 variable + * arch_atomic64_read - read atomic64 variable * @v: pointer to type atomic64_t * * Atomically reads the value of @v and returns it. */ -static inline long long atomic64_read(const atomic64_t *v) +static inline long long arch_atomic64_read(const atomic64_t *v) { long long r; alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); return r; - } +} /** - * atomic64_add_return - add and return + * arch_atomic64_add_return - add and return * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v and returns @i + *@v */ -static inline long long atomic64_add_return(long long i, atomic64_t *v) +static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) { alternative_atomic64(add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -142,7 +143,7 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) /* * Other variants with different arithmetic operators: */ -static inline long long atomic64_sub_return(long long i, atomic64_t *v) +static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) { alternative_atomic64(sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -150,7 +151,7 @@ static inline long long atomic64_sub_return(long long i, atomic64_t *v) return i; } -static inline long long atomic64_inc_return(atomic64_t *v) +static inline long long arch_atomic64_inc_return(atomic64_t *v) { long long a; alternative_atomic64(inc_return, "=&A" (a), @@ -158,7 +159,7 @@ static inline long long atomic64_inc_return(atomic64_t *v) return a; } -static inline long long atomic64_dec_return(atomic64_t *v) +static inline long long arch_atomic64_dec_return(atomic64_t *v) { long long a; alternative_atomic64(dec_return, "=&A" (a), @@ -167,13 +168,13 @@ static inline long long atomic64_dec_return(atomic64_t *v) } /** - * atomic64_add - add integer to atomic64 variable + * arch_atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v. */ -static inline long long atomic64_add(long long i, atomic64_t *v) +static inline long long arch_atomic64_add(long long i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -182,13 +183,13 @@ static inline long long atomic64_add(long long i, atomic64_t *v) } /** - * atomic64_sub - subtract the atomic64 variable + * arch_atomic64_sub - subtract the atomic64 variable * @i: integer value to subtract * @v: pointer to type atomic64_t * * Atomically subtracts @i from @v. */ -static inline long long atomic64_sub(long long i, atomic64_t *v) +static inline long long arch_atomic64_sub(long long i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -197,7 +198,7 @@ static inline long long atomic64_sub(long long i, atomic64_t *v) } /** - * atomic64_sub_and_test - subtract value from variable and test result + * arch_atomic64_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer to type atomic64_t * @@ -205,46 +206,46 @@ static inline long long atomic64_sub(long long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic64_sub_and_test(long long i, atomic64_t *v) +static inline int arch_atomic64_sub_and_test(long long i, atomic64_t *v) { - return atomic64_sub_return(i, v) == 0; + return arch_atomic64_sub_return(i, v) == 0; } /** - * atomic64_inc - increment atomic64 variable + * arch_atomic64_inc - increment atomic64 variable * @v: pointer to type atomic64_t * * Atomically increments @v by 1. */ -static inline void atomic64_inc(atomic64_t *v) +static inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } /** - * atomic64_dec - decrement atomic64 variable + * arch_atomic64_dec - decrement atomic64 variable * @v: pointer to type atomic64_t * * Atomically decrements @v by 1. */ -static inline void atomic64_dec(atomic64_t *v) +static inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } /** - * atomic64_dec_and_test - decrement and test + * arch_atomic64_dec_and_test - decrement and test * @v: pointer to type atomic64_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic64_dec_and_test(atomic64_t *v) +static inline int arch_atomic64_dec_and_test(atomic64_t *v) { - return atomic64_dec_return(v) == 0; + return arch_atomic64_dec_return(v) == 0; } /** @@ -255,13 +256,13 @@ static inline int atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic64_inc_and_test(atomic64_t *v) +static inline int arch_atomic64_inc_and_test(atomic64_t *v) { - return atomic64_inc_return(v) == 0; + return arch_atomic64_inc_return(v) == 0; } /** - * atomic64_add_negative - add and test if negative + * arch_atomic64_add_negative - add and test if negative * @i: integer value to add * @v: pointer to type atomic64_t * @@ -269,13 +270,13 @@ static inline int atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic64_add_negative(long long i, atomic64_t *v) +static inline int arch_atomic64_add_negative(long long i, atomic64_t *v) { - return atomic64_add_return(i, v) < 0; + return arch_atomic64_add_return(i, v) < 0; } /** - * atomic64_add_unless - add unless the number is a given value + * arch_atomic64_add_unless - add unless the number is a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. @@ -283,7 +284,8 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns non-zero if the add was done, zero otherwise. */ -static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, + long long u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); @@ -294,7 +296,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) } -static inline int atomic64_inc_not_zero(atomic64_t *v) +static inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; alternative_atomic64(inc_not_zero, "=&a" (r), @@ -302,7 +304,7 @@ static inline int atomic64_inc_not_zero(atomic64_t *v) return r; } -static inline long long atomic64_dec_if_positive(atomic64_t *v) +static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) { long long r; alternative_atomic64(dec_if_positive, "=&A" (r), @@ -313,70 +315,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 -static inline void atomic64_and(long long i, atomic64_t *v) +static inline void arch_atomic64_and(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c & i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; } -static inline long long atomic64_fetch_and(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c & i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; return old; } -static inline void atomic64_or(long long i, atomic64_t *v) +static inline void arch_atomic64_or(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c | i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; } -static inline long long atomic64_fetch_or(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c | i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; return old; } -static inline void atomic64_xor(long long i, atomic64_t *v) +static inline void arch_atomic64_xor(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; } -static inline long long atomic64_fetch_xor(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; return old; } -static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c + i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) c = old; return old; } -#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) +#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), (v)) #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 738495c..6106b59 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -11,37 +11,37 @@ #define ATOMIC64_INIT(i) { (i) } /** - * atomic64_read - read atomic64 variable + * arch_atomic64_read - read atomic64 variable * @v: pointer of type atomic64_t * * Atomically reads the value of @v. * Doesn't imply a read memory barrier. */ -static inline long atomic64_read(const atomic64_t *v) +static inline long arch_atomic64_read(const atomic64_t *v) { return READ_ONCE((v)->counter); } /** - * atomic64_set - set atomic64 variable + * arch_atomic64_set - set atomic64 variable * @v: pointer to type atomic64_t * @i: required value * * Atomically sets the value of @v to @i. */ -static inline void atomic64_set(atomic64_t *v, long i) +static inline void arch_atomic64_set(atomic64_t *v, long i) { WRITE_ONCE(v->counter, i); } /** - * atomic64_add - add integer to atomic64 variable + * arch_atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v. */ -static __always_inline void atomic64_add(long i, atomic64_t *v) +static __always_inline void arch_atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) @@ -49,13 +49,13 @@ static __always_inline void atomic64_add(long i, atomic64_t *v) } /** - * atomic64_sub - subtract the atomic64 variable + * arch_atomic64_sub - subtract the atomic64 variable * @i: integer value to subtract * @v: pointer to type atomic64_t * * Atomically subtracts @i from @v. */ -static inline void atomic64_sub(long i, atomic64_t *v) +static inline void arch_atomic64_sub(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) @@ -63,7 +63,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) } /** - * atomic64_sub_and_test - subtract value from variable and test result + * arch_atomic64_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer to type atomic64_t * @@ -71,18 +71,18 @@ static inline void atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline bool atomic64_sub_and_test(long i, atomic64_t *v) +static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); } /** - * atomic64_inc - increment atomic64 variable + * arch_atomic64_inc - increment atomic64 variable * @v: pointer to type atomic64_t * * Atomically increments @v by 1. */ -static __always_inline void atomic64_inc(atomic64_t *v) +static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) @@ -90,12 +90,12 @@ static __always_inline void atomic64_inc(atomic64_t *v) } /** - * atomic64_dec - decrement atomic64 variable + * arch_atomic64_dec - decrement atomic64 variable * @v: pointer to type atomic64_t * * Atomically decrements @v by 1. */ -static __always_inline void atomic64_dec(atomic64_t *v) +static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) @@ -103,33 +103,33 @@ static __always_inline void atomic64_dec(atomic64_t *v) } /** - * atomic64_dec_and_test - decrement and test + * arch_atomic64_dec_and_test - decrement and test * @v: pointer to type atomic64_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -static inline bool atomic64_dec_and_test(atomic64_t *v) +static inline bool arch_atomic64_dec_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); } /** - * atomic64_inc_and_test - increment and test + * arch_atomic64_inc_and_test - increment and test * @v: pointer to type atomic64_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ -static inline bool atomic64_inc_and_test(atomic64_t *v) +static inline bool arch_atomic64_inc_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); } /** - * atomic64_add_negative - add and test if negative + * arch_atomic64_add_negative - add and test if negative * @i: integer value to add * @v: pointer to type atomic64_t * @@ -137,59 +137,59 @@ static inline bool atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool atomic64_add_negative(long i, atomic64_t *v) +static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); } /** - * atomic64_add_return - add and return + * arch_atomic64_add_return - add and return * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v and returns @i + @v */ -static __always_inline long atomic64_add_return(long i, atomic64_t *v) +static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v) { return i + xadd(&v->counter, i); } -static inline long atomic64_sub_return(long i, atomic64_t *v) +static inline long arch_atomic64_sub_return(long i, atomic64_t *v) { - return atomic64_add_return(-i, v); + return arch_atomic64_add_return(-i, v); } -static inline long atomic64_fetch_add(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_add(long i, atomic64_t *v) { return xadd(&v->counter, i); } -static inline long atomic64_fetch_sub(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v) { return xadd(&v->counter, -i); } -#define atomic64_inc_return(v) (atomic64_add_return(1, (v))) -#define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) +#define arch_atomic64_inc_return(v) (arch_atomic64_add_return(1, (v))) +#define arch_atomic64_dec_return(v) (arch_atomic64_sub_return(1, (v))) -static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) +static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new) { - return cmpxchg(&v->counter, old, new); + return arch_cmpxchg(&v->counter, old, new); } -#define atomic64_try_cmpxchg atomic64_try_cmpxchg -static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) +#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) { return try_cmpxchg(&v->counter, old, new); } -static inline long atomic64_xchg(atomic64_t *v, long new) +static inline long arch_atomic64_xchg(atomic64_t *v, long new) { return xchg(&v->counter, new); } /** - * atomic64_add_unless - add unless the number is a given value + * arch_atomic64_add_unless - add unless the number is a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. @@ -197,37 +197,37 @@ static inline long atomic64_xchg(atomic64_t *v, long new) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) +static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u) { - s64 c = atomic64_read(v); + s64 c = arch_atomic64_read(v); do { if (unlikely(c == u)) return false; - } while (!atomic64_try_cmpxchg(v, &c, c + a)); + } while (!arch_atomic64_try_cmpxchg(v, &c, c + a)); return true; } -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#define arch_atomic64_inc_not_zero(v) arch_atomic64_add_unless((v), 1, 0) /* - * atomic64_dec_if_positive - decrement by 1 if old value positive + * arch_atomic64_dec_if_positive - decrement by 1 if old value positive * @v: pointer of type atomic_t * * The function returns the old value of *v minus 1, even if * the atomic variable, v, was not decremented. */ -static inline long atomic64_dec_if_positive(atomic64_t *v) +static inline long arch_atomic64_dec_if_positive(atomic64_t *v) { - s64 dec, c = atomic64_read(v); + s64 dec, c = arch_atomic64_read(v); do { dec = c - 1; if (unlikely(dec < 0)) break; - } while (!atomic64_try_cmpxchg(v, &c, dec)); + } while (!arch_atomic64_try_cmpxchg(v, &c, dec)); return dec; } -static inline void atomic64_and(long i, atomic64_t *v) +static inline void arch_atomic64_and(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "andq %1,%0" : "+m" (v->counter) @@ -235,16 +235,16 @@ static inline void atomic64_and(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_and(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val & i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); return val; } -static inline void atomic64_or(long i, atomic64_t *v) +static inline void arch_atomic64_or(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "orq %1,%0" : "+m" (v->counter) @@ -252,16 +252,16 @@ static inline void atomic64_or(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_or(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val | i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); return val; } -static inline void atomic64_xor(long i, atomic64_t *v) +static inline void arch_atomic64_xor(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "xorq %1,%0" : "+m" (v->counter) @@ -269,12 +269,12 @@ static inline void atomic64_xor(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_xor(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val ^ i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); return val; } diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e1259f0..042b5e8 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ "lfence", X86_FEATURE_LFENCE_RDTSC) -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else #define dma_rmb() barrier() -#endif #define dma_wmb() barrier() #ifdef CONFIG_X86_32 @@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -#if defined(CONFIG_X86_PPRO_FENCE) - -/* - * For this option x86 doesn't have a strong TSO memory - * model and we should fall back to full barriers. - */ - -#define __smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - WRITE_ONCE(*p, v); \ -} while (0) - -#define __smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - ___p1; \ -}) - -#else /* regular x86 TSO memory ordering */ - #define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ @@ -107,8 +79,6 @@ do { \ ___p1; \ }) -#endif - /* Atomic operations are already serializing on x86 */ #define __smp_mb__before_atomic() barrier() #define __smp_mb__after_atomic() barrier() diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 3fa0398..9f645ba 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -78,7 +78,7 @@ set_bit(long nr, volatile unsigned long *addr) : "iq" ((u8)CONST_MASK(nr)) : "memory"); } else { - asm volatile(LOCK_PREFIX "bts %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr) : "memory"); } } @@ -94,7 +94,7 @@ set_bit(long nr, volatile unsigned long *addr) */ static __always_inline void __set_bit(long nr, volatile unsigned long *addr) { - asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory"); + asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory"); } /** @@ -115,7 +115,7 @@ clear_bit(long nr, volatile unsigned long *addr) : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)~CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX "btr %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } @@ -137,7 +137,7 @@ static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *ad static __always_inline void __clear_bit(long nr, volatile unsigned long *addr) { - asm volatile("btr %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr)); } static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr) @@ -182,7 +182,7 @@ static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long * */ static __always_inline void __change_bit(long nr, volatile unsigned long *addr) { - asm volatile("btc %1,%0" : ADDR : "Ir" (nr)); + asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr)); } /** @@ -201,7 +201,7 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) : CONST_MASK_ADDR(nr, addr) : "iq" ((u8)CONST_MASK(nr))); } else { - asm volatile(LOCK_PREFIX "btc %1,%0" + asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0" : BITOP_ADDR(addr) : "Ir" (nr)); } @@ -217,7 +217,8 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) */ static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), + *addr, "Ir", nr, "%0", c); } /** @@ -246,7 +247,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * { bool oldbit; - asm("bts %2,%1" + asm(__ASM_SIZE(bts) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); @@ -263,7 +264,8 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long * */ static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), + *addr, "Ir", nr, "%0", c); } /** @@ -286,7 +288,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long { bool oldbit; - asm volatile("btr %2,%1" + asm volatile(__ASM_SIZE(btr) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); @@ -298,7 +300,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon { bool oldbit; - asm volatile("btc %2,%1" + asm volatile(__ASM_SIZE(btc) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit), ADDR : "Ir" (nr) : "memory"); @@ -316,7 +318,8 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon */ static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c); + GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), + *addr, "Ir", nr, "%0", c); } static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) @@ -329,7 +332,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l { bool oldbit; - asm volatile("bt %2,%1" + asm volatile(__ASM_SIZE(bt) " %2,%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 56bd436..e3efd8a 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -145,13 +145,13 @@ extern void __add_wrong_size(void) # include <asm/cmpxchg_64.h> #endif -#define cmpxchg(ptr, old, new) \ +#define arch_cmpxchg(ptr, old, new) \ __cmpxchg(ptr, old, new, sizeof(*(ptr))) -#define sync_cmpxchg(ptr, old, new) \ +#define arch_sync_cmpxchg(ptr, old, new) \ __sync_cmpxchg(ptr, old, new, sizeof(*(ptr))) -#define cmpxchg_local(ptr, old, new) \ +#define arch_cmpxchg_local(ptr, old, new) \ __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) @@ -221,7 +221,7 @@ extern void __add_wrong_size(void) #define __try_cmpxchg(ptr, pold, new, size) \ __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) -#define try_cmpxchg(ptr, pold, new) \ +#define try_cmpxchg(ptr, pold, new) \ __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) /* @@ -250,10 +250,10 @@ extern void __add_wrong_size(void) __ret; \ }) -#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) -#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ +#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ __cmpxchg_double(, p1, p2, o1, o2, n1, n2) #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 1732704..1a2eafca 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -36,10 +36,10 @@ static inline void set_64bit(volatile u64 *ptr, u64 value) } #ifdef CONFIG_X86_CMPXCHG64 -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ (unsigned long long)(n))) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ (unsigned long long)(n))) #endif @@ -76,7 +76,7 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) * to simulate the cmpxchg8b on the 80386 and 80486 CPU. */ -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __old = (o); \ @@ -93,7 +93,7 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) __ret; }) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __old = (o); \ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 03cad19..bfca3b3 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -7,13 +7,13 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) *ptr = val; } -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg_local((ptr), (o), (n)); \ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 736771c..b27da96 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -140,7 +140,6 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These will statically patch the target code for additional @@ -196,13 +195,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#else -/* - * Fall back to dynamic for gcc versions which don't support asm goto. Should be - * a minority now anyway. - */ -#define static_cpu_has(bit) boot_cpu_has(bit) -#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0dfe4d3..d554c11 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -213,6 +213,7 @@ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -315,6 +316,7 @@ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ #define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ #define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ @@ -327,6 +329,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h index 10f8d59..a5d86fc 100644 --- a/arch/x86/include/asm/crypto/camellia.h +++ b/arch/x86/include/asm/crypto/camellia.h @@ -2,8 +2,9 @@ #ifndef ASM_X86_CAMELLIA_H #define ASM_X86_CAMELLIA_H -#include <linux/kernel.h> +#include <crypto/b128ops.h> #include <linux/crypto.h> +#include <linux/kernel.h> #define CAMELLIA_MIN_KEY_SIZE 16 #define CAMELLIA_MAX_KEY_SIZE 32 @@ -11,16 +12,13 @@ #define CAMELLIA_TABLE_BYTE_LEN 272 #define CAMELLIA_PARALLEL_BLOCKS 2 +struct crypto_skcipher; + struct camellia_ctx { u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)]; u32 key_length; }; -struct camellia_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct camellia_ctx camellia_ctx; -}; - struct camellia_xts_ctx { struct camellia_ctx tweak_ctx; struct camellia_ctx crypt_ctx; @@ -30,11 +28,7 @@ extern int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len, u32 *flags); -extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); -extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key, +extern int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); /* regular block cipher functions */ diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 553a03d..d181863 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -45,7 +45,7 @@ struct common_glue_ctx { }; static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, - struct blkcipher_desc *desc, + struct skcipher_walk *walk, bool fpu_enabled, unsigned int nbytes) { if (likely(fpu_blocks_limit < 0)) @@ -61,33 +61,6 @@ static inline bool glue_fpu_begin(unsigned int bsize, int fpu_blocks_limit, if (nbytes < bsize * (unsigned int)fpu_blocks_limit) return false; - if (desc) { - /* prevent sleeping if FPU is in use */ - desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - } - - kernel_fpu_begin(); - return true; -} - -static inline bool glue_skwalk_fpu_begin(unsigned int bsize, - int fpu_blocks_limit, - struct skcipher_walk *walk, - bool fpu_enabled, unsigned int nbytes) -{ - if (likely(fpu_blocks_limit < 0)) - return false; - - if (fpu_enabled) - return true; - - /* - * Vector-registers are only used when chunk to be processed is large - * enough, so do not enable FPU until it is necessary. - */ - if (nbytes < bsize * (unsigned int)fpu_blocks_limit) - return false; - /* prevent sleeping if FPU is in use */ skcipher_walk_atomise(walk); @@ -126,41 +99,17 @@ static inline void le128_inc(le128 *i) i->b = cpu_to_le64(b); } -extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - -extern int glue_cbc_encrypt_128bit(const common_glue_func_t fn, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - -extern int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, - unsigned int nbytes); - -extern int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes); - -extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); - -extern int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, - struct blkcipher_desc *desc, - struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes, - common_glue_func_t tweak_fn, void *tweak_ctx, - void *crypt_ctx); +extern int glue_ecb_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + +extern int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn, + struct skcipher_request *req); + +extern int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); + +extern int glue_ctr_req_128bit(const struct common_glue_ctx *gctx, + struct skcipher_request *req); extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx, struct skcipher_request *req, diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h index c958b7b..db7c9cc 100644 --- a/arch/x86/include/asm/crypto/serpent-avx.h +++ b/arch/x86/include/asm/crypto/serpent-avx.h @@ -2,15 +2,13 @@ #ifndef ASM_X86_SERPENT_AVX_H #define ASM_X86_SERPENT_AVX_H -#include <linux/crypto.h> +#include <crypto/b128ops.h> #include <crypto/serpent.h> +#include <linux/types.h> -#define SERPENT_PARALLEL_BLOCKS 8 +struct crypto_skcipher; -struct serpent_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct serpent_ctx serpent_ctx; -}; +#define SERPENT_PARALLEL_BLOCKS 8 struct serpent_xts_ctx { struct serpent_ctx tweak_ctx; @@ -38,12 +36,7 @@ extern void __serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, extern void serpent_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv); extern void serpent_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - -extern void lrw_serpent_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key, +extern int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); #endif diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h index 65bb80a..f618bf2 100644 --- a/arch/x86/include/asm/crypto/twofish.h +++ b/arch/x86/include/asm/crypto/twofish.h @@ -4,19 +4,8 @@ #include <linux/crypto.h> #include <crypto/twofish.h> -#include <crypto/lrw.h> #include <crypto/b128ops.h> -struct twofish_lrw_ctx { - struct lrw_table_ctx lrw_table; - struct twofish_ctx twofish_ctx; -}; - -struct twofish_xts_ctx { - struct twofish_ctx tweak_ctx; - struct twofish_ctx crypt_ctx; -}; - /* regular block cipher functions from twofish_x86_64 module */ asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, const u8 *src); @@ -36,12 +25,4 @@ extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, le128 *iv); -extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - -extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm); - -extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen); - #endif /* ASM_X86_TWOFISH_H */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 5e12c63..a8f6c80 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,6 +6,9 @@ struct dev_archdata { #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif +#ifdef CONFIG_STA2X11 + bool is_sta2x11; +#endif }; #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h index 1295bc6..1a19251 100644 --- a/arch/x86/include/asm/dma-direct.h +++ b/arch/x86/include/asm/dma-direct.h @@ -2,29 +2,8 @@ #ifndef ASM_X86_DMA_DIRECT_H #define ASM_X86_DMA_DIRECT_H 1 -#include <linux/mem_encrypt.h> - -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); -#else -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(paddr); -} +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(daddr); -} -#endif /* CONFIG_X86_DMA_REMAP */ #endif /* ASM_X86_DMA_DIRECT_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6277c83..89ce4bf 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -36,37 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask); bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs); - -extern void dma_generic_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - -static inline unsigned long dma_alloc_coherent_mask(struct device *dev, - gfp_t gfp) -{ - unsigned long dma_mask = 0; - - dma_mask = dev->coherent_dma_mask; - if (!dma_mask) - dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32); - - return dma_mask; -} - -static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) -{ - unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); - - if (dma_mask <= DMA_BIT_MASK(24)) - gfp |= GFP_DMA; -#ifdef CONFIG_X86_64 - if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; -#endif - return gfp; -} - #endif diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 85f6ccb..cec5fae 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -6,6 +6,8 @@ #include <asm/pgtable.h> #include <asm/processor-flags.h> #include <asm/tlb.h> +#include <asm/nospec-branch.h> +#include <asm/mmu_context.h> /* * We map the EFI regions needed for runtime services non-contiguously, @@ -36,8 +38,18 @@ extern asmlinkage unsigned long efi_call_phys(void *, ...); -#define arch_efi_call_virt_setup() kernel_fpu_begin() -#define arch_efi_call_virt_teardown() kernel_fpu_end() +#define arch_efi_call_virt_setup() \ +({ \ + kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ +}) + +#define arch_efi_call_virt_teardown() \ +({ \ + firmware_restrict_branch_speculation_end(); \ + kernel_fpu_end(); \ +}) + /* * Wrap all the virtual calls in a way that forces the parameters on the stack. @@ -58,14 +70,13 @@ extern asmlinkage u64 efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) /* - * Scratch space used for switching the pagetable in the EFI stub + * struct efi_scratch - Scratch space used while switching to/from efi_mm + * @phys_stack: stack used during EFI Mixed Mode + * @prev_mm: store/restore stolen mm_struct while switching to/from efi_mm */ struct efi_scratch { - u64 r15; - u64 prev_cr3; - pgd_t *efi_pgt; - bool use_pgd; - u64 phys_stack; + u64 phys_stack; + struct mm_struct *prev_mm; } __packed; #define arch_efi_call_virt_setup() \ @@ -73,12 +84,10 @@ struct efi_scratch { efi_sync_low_kernel_mappings(); \ preempt_disable(); \ __kernel_fpu_begin(); \ + firmware_restrict_branch_speculation_start(); \ \ - if (efi_scratch.use_pgd) { \ - efi_scratch.prev_cr3 = __read_cr3(); \ - write_cr3((unsigned long)efi_scratch.efi_pgt); \ - __flush_tlb_all(); \ - } \ + if (!efi_enabled(EFI_OLD_MEMMAP)) \ + efi_switch_mm(&efi_mm); \ }) #define arch_efi_call_virt(p, f, args...) \ @@ -86,11 +95,10 @@ struct efi_scratch { #define arch_efi_call_virt_teardown() \ ({ \ - if (efi_scratch.use_pgd) { \ - write_cr3(efi_scratch.prev_cr3); \ - __flush_tlb_all(); \ - } \ + if (!efi_enabled(EFI_OLD_MEMMAP)) \ + efi_switch_mm(efi_scratch.prev_mm); \ \ + firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ preempt_enable(); \ }) @@ -131,6 +139,7 @@ extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); +extern void efi_switch_mm(struct mm_struct *mm); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 7c341a7..5ea2afd 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -40,6 +40,7 @@ typedef struct { #endif #if IS_ENABLED(CONFIG_HYPERV) unsigned int irq_hv_reenlightenment_count; + unsigned int hyperv_stimer0_count; #endif } ____cacheline_aligned irq_cpustat_t; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 2851077..32e666e 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); extern asmlinkage void kvm_posted_intr_nested_ipi(void); extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); +extern asmlinkage void uv_bau_message_intr1(void); extern asmlinkage void spurious_interrupt(void); extern asmlinkage void thermal_interrupt(void); diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/asm/hyperv-tlfs.h index 197c2e6..416cb0e 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -1,6 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_X86_HYPERV_H -#define _ASM_X86_HYPERV_H + +/* + * This file contains definitions from Hyper-V Hypervisor Top-Level Functional + * Specification (TLFS): + * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs + */ + +#ifndef _ASM_X86_HYPERV_TLFS_H +#define _ASM_X86_HYPERV_TLFS_H #include <linux/types.h> @@ -14,6 +21,7 @@ #define HYPERV_CPUID_FEATURES 0x40000003 #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 #define HYPERV_CPUID_MIN 0x40000005 @@ -77,6 +85,9 @@ /* Crash MSR available */ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) +/* stimer Direct Mode is available */ +#define HV_X64_STIMER_DIRECT_MODE_AVAILABLE (1 << 19) + /* * Feature identification: EBX indicates which flags were specified at * partition creation. The format is the same as the partition creation @@ -156,6 +167,9 @@ /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) +/* Recommend using enlightened VMCS */ +#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED (1 << 14) + /* * Crash notification flag. */ @@ -189,7 +203,7 @@ #define HV_X64_MSR_EOI 0x40000070 #define HV_X64_MSR_ICR 0x40000071 #define HV_X64_MSR_TPR 0x40000072 -#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 /* Define synthetic interrupt controller model specific registers. */ #define HV_X64_MSR_SCONTROL 0x40000080 @@ -237,28 +251,77 @@ #define HV_X64_MSR_CRASH_PARAMS \ (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor. + */ +union hv_x64_msr_hypercall_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 guest_physical_address:52; + }; +}; + +/* + * TSC page layout. + */ +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; + u64 reserved2[509]; +}; + +/* + * The guest OS needs to register the guest ID with the hypervisor. + * The guest ID is a 64 bit entity and the structure of this ID is + * specified in the Hyper-V specification: + * + * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx + * + * While the current guideline does not specify how Linux guest ID(s) + * need to be generated, our plan is to publish the guidelines for + * Linux and other guest operating systems that currently are hosted + * on Hyper-V. The implementation here conforms to this yet + * unpublished guidelines. + * + * + * Bit(s) + * 63 - Indicates if the OS is Open Source or not; 1 is Open Source + * 62:56 - Os Type; Linux is 0x100 + * 55:48 - Distro specific identification + * 47:16 - Linux kernel version number + * 15:0 - Distro specific identification + * + * + */ + +#define HV_LINUX_VENDOR_ID 0x8100 + /* TSC emulation after migration */ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 struct hv_reenlightenment_control { - u64 vector:8; - u64 reserved1:8; - u64 enabled:1; - u64 reserved2:15; - u64 target_vp:32; + __u64 vector:8; + __u64 reserved1:8; + __u64 enabled:1; + __u64 reserved2:15; + __u64 target_vp:32; }; #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 struct hv_tsc_emulation_control { - u64 enabled:1; - u64 reserved:63; + __u64 enabled:1; + __u64 reserved:63; }; struct hv_tsc_emulation_status { - u64 inprogress:1; - u64 reserved:63; + __u64 inprogress:1; + __u64 reserved:63; }; #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 @@ -275,10 +338,13 @@ struct hv_tsc_emulation_status { #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d -#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001 -#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12 -#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ - (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +/* Hyper-V Enlightened VMCS version mask in nested features CPUID */ +#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 @@ -298,12 +364,22 @@ enum HV_GENERIC_SET_FORMAT { HV_GENERIC_SET_ALL, }; +#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) +#define HV_HYPERCALL_FAST_BIT BIT(16) +#define HV_HYPERCALL_VARHEAD_OFFSET 17 +#define HV_HYPERCALL_REP_COMP_OFFSET 32 +#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) +#define HV_HYPERCALL_REP_START_OFFSET 48 +#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) + /* hypercall status code */ #define HV_STATUS_SUCCESS 0 #define HV_STATUS_INVALID_HYPERCALL_CODE 2 #define HV_STATUS_INVALID_HYPERCALL_INPUT 3 #define HV_STATUS_INVALID_ALIGNMENT 4 +#define HV_STATUS_INVALID_PARAMETER 5 #define HV_STATUS_INSUFFICIENT_MEMORY 11 +#define HV_STATUS_INVALID_PORT_ID 17 #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 @@ -318,6 +394,8 @@ typedef struct _HV_REFERENCE_TSC_PAGE { #define HV_SYNIC_SINT_COUNT (16) /* Define the expected SynIC version. */ #define HV_SYNIC_VERSION_1 (0x1) +/* Valid SynIC vectors are 16-255. */ +#define HV_SYNIC_FIRST_VALID_VECTOR (16) #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) #define HV_SYNIC_SIMP_ENABLE (1ULL << 0) @@ -412,6 +490,216 @@ struct hv_timer_message_payload { __u64 delivery_time; /* When the message was delivered */ }; +/* Define virtual processor assist page structure. */ +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved; + __u64 vtl_control[2]; + __u64 nested_enlightenments_control[2]; + __u32 enlighten_vmentry; + __u64 current_nested_vmcs; +}; + +struct hv_enlightened_vmcs { + u32 revision_id; + u32 abort; + + u16 host_es_selector; + u16 host_cs_selector; + u16 host_ss_selector; + u16 host_ds_selector; + u16 host_fs_selector; + u16 host_gs_selector; + u16 host_tr_selector; + + u64 host_ia32_pat; + u64 host_ia32_efer; + + u64 host_cr0; + u64 host_cr3; + u64 host_cr4; + + u64 host_ia32_sysenter_esp; + u64 host_ia32_sysenter_eip; + u64 host_rip; + u32 host_ia32_sysenter_cs; + + u32 pin_based_vm_exec_control; + u32 vm_exit_controls; + u32 secondary_vm_exec_control; + + u64 io_bitmap_a; + u64 io_bitmap_b; + u64 msr_bitmap; + + u16 guest_es_selector; + u16 guest_cs_selector; + u16 guest_ss_selector; + u16 guest_ds_selector; + u16 guest_fs_selector; + u16 guest_gs_selector; + u16 guest_ldtr_selector; + u16 guest_tr_selector; + + u32 guest_es_limit; + u32 guest_cs_limit; + u32 guest_ss_limit; + u32 guest_ds_limit; + u32 guest_fs_limit; + u32 guest_gs_limit; + u32 guest_ldtr_limit; + u32 guest_tr_limit; + u32 guest_gdtr_limit; + u32 guest_idtr_limit; + + u32 guest_es_ar_bytes; + u32 guest_cs_ar_bytes; + u32 guest_ss_ar_bytes; + u32 guest_ds_ar_bytes; + u32 guest_fs_ar_bytes; + u32 guest_gs_ar_bytes; + u32 guest_ldtr_ar_bytes; + u32 guest_tr_ar_bytes; + + u64 guest_es_base; + u64 guest_cs_base; + u64 guest_ss_base; + u64 guest_ds_base; + u64 guest_fs_base; + u64 guest_gs_base; + u64 guest_ldtr_base; + u64 guest_tr_base; + u64 guest_gdtr_base; + u64 guest_idtr_base; + + u64 padding64_1[3]; + + u64 vm_exit_msr_store_addr; + u64 vm_exit_msr_load_addr; + u64 vm_entry_msr_load_addr; + + u64 cr3_target_value0; + u64 cr3_target_value1; + u64 cr3_target_value2; + u64 cr3_target_value3; + + u32 page_fault_error_code_mask; + u32 page_fault_error_code_match; + + u32 cr3_target_count; + u32 vm_exit_msr_store_count; + u32 vm_exit_msr_load_count; + u32 vm_entry_msr_load_count; + + u64 tsc_offset; + u64 virtual_apic_page_addr; + u64 vmcs_link_pointer; + + u64 guest_ia32_debugctl; + u64 guest_ia32_pat; + u64 guest_ia32_efer; + + u64 guest_pdptr0; + u64 guest_pdptr1; + u64 guest_pdptr2; + u64 guest_pdptr3; + + u64 guest_pending_dbg_exceptions; + u64 guest_sysenter_esp; + u64 guest_sysenter_eip; + + u32 guest_activity_state; + u32 guest_sysenter_cs; + + u64 cr0_guest_host_mask; + u64 cr4_guest_host_mask; + u64 cr0_read_shadow; + u64 cr4_read_shadow; + u64 guest_cr0; + u64 guest_cr3; + u64 guest_cr4; + u64 guest_dr7; + + u64 host_fs_base; + u64 host_gs_base; + u64 host_tr_base; + u64 host_gdtr_base; + u64 host_idtr_base; + u64 host_rsp; + + u64 ept_pointer; + + u16 virtual_processor_id; + u16 padding16[3]; + + u64 padding64_2[5]; + u64 guest_physical_address; + + u32 vm_instruction_error; + u32 vm_exit_reason; + u32 vm_exit_intr_info; + u32 vm_exit_intr_error_code; + u32 idt_vectoring_info_field; + u32 idt_vectoring_error_code; + u32 vm_exit_instruction_len; + u32 vmx_instruction_info; + + u64 exit_qualification; + u64 exit_io_instruction_ecx; + u64 exit_io_instruction_esi; + u64 exit_io_instruction_edi; + u64 exit_io_instruction_eip; + + u64 guest_linear_address; + u64 guest_rsp; + u64 guest_rflags; + + u32 guest_interruptibility_info; + u32 cpu_based_vm_exec_control; + u32 exception_bitmap; + u32 vm_entry_controls; + u32 vm_entry_intr_info_field; + u32 vm_entry_exception_error_code; + u32 vm_entry_instruction_len; + u32 tpr_threshold; + + u64 guest_rip; + + u32 hv_clean_fields; + u32 hv_padding_32; + u32 hv_synthetic_controls; + u32 hv_enlightenments_control; + u32 hv_vp_id; + + u64 hv_vm_id; + u64 partition_assist_page; + u64 padding64_4[4]; + u64 guest_bndcfgs; + u64 padding64_5[7]; + u64 xss_exit_bitmap; + u64 padding64_6[7]; +}; + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0 +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14) +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15) + +#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF + #define HV_STIMER_ENABLE (1ULL << 0) #define HV_STIMER_PERIODIC (1ULL << 1) #define HV_STIMER_LAZY (1ULL << 2) diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h new file mode 100644 index 0000000..3cb002b --- /dev/null +++ b/arch/x86/include/asm/intel_pconfig.h @@ -0,0 +1,65 @@ +#ifndef _ASM_X86_INTEL_PCONFIG_H +#define _ASM_X86_INTEL_PCONFIG_H + +#include <asm/asm.h> +#include <asm/processor.h> + +enum pconfig_target { + INVALID_TARGET = 0, + MKTME_TARGET = 1, + PCONFIG_TARGET_NR +}; + +int pconfig_target_supported(enum pconfig_target target); + +enum pconfig_leaf { + MKTME_KEY_PROGRAM = 0, + PCONFIG_LEAF_INVALID, +}; + +#define PCONFIG ".byte 0x0f, 0x01, 0xc5" + +/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */ + +/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */ +#define MKTME_KEYID_SET_KEY_DIRECT 0 +#define MKTME_KEYID_SET_KEY_RANDOM 1 +#define MKTME_KEYID_CLEAR_KEY 2 +#define MKTME_KEYID_NO_ENCRYPT 3 + +/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */ +#define MKTME_AES_XTS_128 (1 << 8) + +/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */ +#define MKTME_PROG_SUCCESS 0 +#define MKTME_INVALID_PROG_CMD 1 +#define MKTME_ENTROPY_ERROR 2 +#define MKTME_INVALID_KEYID 3 +#define MKTME_INVALID_ENC_ALG 4 +#define MKTME_DEVICE_BUSY 5 + +/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */ +struct mktme_key_program { + u16 keyid; + u32 keyid_ctrl; + u8 __rsvd[58]; + u8 key_field_1[64]; + u8 key_field_2[64]; +} __packed __aligned(256); + +static inline int mktme_key_program(struct mktme_key_program *key_program) +{ + unsigned long rax = MKTME_KEY_PROGRAM; + + if (!pconfig_target_supported(MKTME_TARGET)) + return -ENXIO; + + asm volatile(PCONFIG + : "=a" (rax), "=b" (key_program) + : "0" (rax), "1" (key_program) + : "memory", "cc"); + + return rax; +} + +#endif /* _ASM_X86_INTEL_PCONFIG_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 95e9486..f6e5b93 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void); */ #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -static inline void flush_write_buffers(void) -{ -#if defined(CONFIG_X86_PPRO_FENCE) - asm volatile("lock; addl $0,0(%%esp)": : :"memory"); -#endif -} - #endif /* __KERNEL__ */ extern void native_io_delay(void); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index a8834dd..fd20a23 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -183,16 +183,17 @@ extern void disable_ioapic_support(void); extern void __init io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); -extern void native_disable_io_apic(void); +extern void native_restore_boot_irq_mode(void); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { - return x86_io_apic_ops.read(apic, reg); + return x86_apic_ops.io_apic_read(apic, reg); } extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); -extern void disable_IO_APIC(void); +extern void clear_IO_APIC(void); +extern void restore_boot_irq_mode(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ @@ -228,10 +229,11 @@ static inline void mp_save_irq(struct mpc_intsrc *m) { } static inline void disable_ioapic_support(void) { } static inline void io_apic_init_mappings(void) { } #define native_io_apic_read NULL -#define native_disable_io_apic NULL +#define native_restore_boot_irq_mode NULL static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } +static inline void restore_boot_irq_mode(void) { } #endif diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 1e5d5d9..baedab8 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,13 +2,10 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern const struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; -int x86_dma_supported(struct device *dev, u64 mask); - /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index e71c112..404c5fd 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -106,9 +106,10 @@ #if IS_ENABLED(CONFIG_HYPERV) #define HYPERV_REENLIGHTENMENT_VECTOR 0xee +#define HYPERV_STIMER0_VECTOR 0xed #endif -#define LOCAL_TIMER_VECTOR 0xed +#define LOCAL_TIMER_VECTOR 0xec #define NR_VECTORS 256 diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h index 875b543..b885a96 100644 --- a/arch/x86/include/asm/jailhouse_para.h +++ b/arch/x86/include/asm/jailhouse_para.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL2.0 */ /* - * Jailhouse paravirt_ops implementation + * Jailhouse paravirt detection * * Copyright (c) Siemens AG, 2015-2017 * diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 460991e..db7ba2f 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -5,10 +5,6 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY -extern unsigned long page_offset_base; -extern unsigned long vmalloc_base; -extern unsigned long vmemmap_base; - void kernel_randomize_memory(void); #else static inline void kernel_randomize_memory(void) { } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dd6f57a..949c977 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -34,6 +34,7 @@ #include <asm/msr-index.h> #include <asm/asm.h> #include <asm/kvm_page_track.h> +#include <asm/hyperv-tlfs.h> #define KVM_MAX_VCPUS 288 #define KVM_SOFT_MAX_VCPUS 240 @@ -73,6 +74,7 @@ #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) +#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -498,6 +500,7 @@ struct kvm_vcpu_arch { u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool apicv_active; + bool load_eoi_exitmap_pending; DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; @@ -507,6 +510,7 @@ struct kvm_vcpu_arch { u64 smi_count; bool tpr_access_reporting; u64 ia32_xss; + u64 microcode_version; /* * Paging state of the vcpu @@ -570,7 +574,7 @@ struct kvm_vcpu_arch { } exception; struct kvm_queued_interrupt { - bool pending; + bool injected; bool soft; u8 nr; } interrupt; @@ -753,6 +757,12 @@ struct kvm_hv { u64 hv_crash_ctl; HV_REFERENCE_TSC_PAGE tsc_ref; + + struct idr conn_to_evt; + + u64 hv_reenlightenment_control; + u64 hv_tsc_emulation_control; + u64 hv_tsc_emulation_status; }; enum kvm_irqchip_mode { @@ -761,15 +771,6 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; -struct kvm_sev_info { - bool active; /* SEV enabled guest */ - unsigned int asid; /* ASID used for this guest */ - unsigned int handle; /* SEV firmware handle */ - int fd; /* SEV device fd */ - unsigned long pages_locked; /* Number of pages locked */ - struct list_head regions_list; /* List of registered regions */ -}; - struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -799,13 +800,13 @@ struct kvm_arch { struct mutex apic_map_lock; struct kvm_apic_map *apic_map; - unsigned int tss_addr; bool apic_access_page_done; gpa_t wall_clock; - bool ept_identity_pagetable_done; - gpa_t ept_identity_map_addr; + bool mwait_in_guest; + bool hlt_in_guest; + bool pause_in_guest; unsigned long irq_sources_bitmap; s64 kvmclock_offset; @@ -848,17 +849,8 @@ struct kvm_arch { bool disabled_lapic_found; - /* Struct members for AVIC */ - u32 avic_vm_id; - u32 ldr_mode; - struct page *avic_logical_id_table_page; - struct page *avic_physical_id_table_page; - struct hlist_node hnode; - bool x2apic_format; bool x2apic_broadcast_quirk_disabled; - - struct kvm_sev_info sev_info; }; struct kvm_vm_stat { @@ -935,6 +927,8 @@ struct kvm_x86_ops { bool (*cpu_has_high_real_mode_segbase)(void); void (*cpuid_update)(struct kvm_vcpu *vcpu); + struct kvm *(*vm_alloc)(void); + void (*vm_free)(struct kvm *); int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); @@ -1006,6 +1000,7 @@ struct kvm_x86_ops { void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); + int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); int (*get_lpage_level)(void); @@ -1095,6 +1090,8 @@ struct kvm_x86_ops { int (*mem_enc_op)(struct kvm *kvm, void __user *argp); int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); + + int (*get_msr_feature)(struct kvm_msr_entry *entry); }; struct kvm_arch_async_pf { @@ -1106,6 +1103,17 @@ struct kvm_arch_async_pf { extern struct kvm_x86_ops *kvm_x86_ops; +#define __KVM_HAVE_ARCH_VM_ALLOC +static inline struct kvm *kvm_arch_alloc_vm(void) +{ + return kvm_x86_ops->vm_alloc(); +} + +static inline void kvm_arch_free_vm(struct kvm *kvm) +{ + return kvm_x86_ops->vm_free(kvm); +} + int kvm_mmu_module_init(void); void kvm_mmu_module_exit(void); @@ -1184,6 +1192,8 @@ enum emulation_result { #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_RETRY (1 << 3) #define EMULTYPE_NO_REEXECUTE (1 << 4) +#define EMULTYPE_NO_UD_ON_FAIL (1 << 5) +#define EMULTYPE_VMWARE (1 << 6) int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, void *insn, int insn_len); @@ -1201,8 +1211,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); struct x86_emulate_ctxt; -int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); -int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port); +int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_vcpu_halt(struct kvm_vcpu *vcpu); @@ -1464,7 +1473,4 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #define put_smstate(type, buf, offset, val) \ *(type *)((buf) + (offset) - 0x7e00) = val -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end); - #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 7b407dd..3aea265 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,6 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, #ifdef CONFIG_KVM_GUEST bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); +unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); @@ -115,6 +116,11 @@ static inline unsigned int kvm_arch_para_features(void) return 0; } +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + static inline u32 kvm_read_and_reset_pf_reason(void) { return 0; diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 96ea4b5..8c7b3e5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -138,58 +138,6 @@ struct mce_log_buffer { struct mce entry[MCE_LOG_LEN]; }; -struct mca_config { - bool dont_log_ce; - bool cmci_disabled; - bool lmce_disabled; - bool ignore_ce; - bool disabled; - bool ser; - bool recovery; - bool bios_cmci_threshold; - u8 banks; - s8 bootlog; - int tolerant; - int monarch_timeout; - int panic_timeout; - u32 rip_msr; -}; - -struct mce_vendor_flags { - /* - * Indicates that overflow conditions are not fatal, when set. - */ - __u64 overflow_recov : 1, - - /* - * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and - * Recovery. It indicates support for data poisoning in HW and deferred - * error interrupts. - */ - succor : 1, - - /* - * (AMD) SMCA: This bit indicates support for Scalable MCA which expands - * the register space for each MCA bank and also increases number of - * banks. Also, to accommodate the new banks and registers, the MCA - * register space is moved to a new MSR range. - */ - smca : 1, - - __reserved_0 : 61; -}; - -struct mca_msr_regs { - u32 (*ctl) (int bank); - u32 (*status) (int bank); - u32 (*addr) (int bank); - u32 (*misc) (int bank); -}; - -extern struct mce_vendor_flags mce_flags; - -extern struct mca_msr_regs msr_ops; - enum mce_notifier_prios { MCE_PRIO_FIRST = INT_MAX, MCE_PRIO_SRAO = INT_MAX - 1, @@ -346,6 +294,7 @@ enum smca_bank_types { SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ + SMCA_RESERVED, /* Reserved */ SMCA_EX, /* Execution Unit */ SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 22c5f3e..c064383 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -22,6 +22,7 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT extern u64 sme_me_mask; +extern bool sev_enabled; void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, unsigned long decrypted_kernel_vaddr, @@ -48,8 +49,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); -void swiotlb_set_mem_attributes(void *vaddr, unsigned long size); - bool sme_active(void); bool sev_active(void); diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 55520cec..2b7cc53 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -6,20 +6,6 @@ #include <linux/earlycpio.h> #include <linux/initrd.h> -#define native_rdmsr(msr, val1, val2) \ -do { \ - u64 __val = __rdmsr((msr)); \ - (void)((val1) = (u32)__val); \ - (void)((val2) = (u32)(__val >> 32)); \ -} while (0) - -#define native_wrmsr(msr, low, high) \ - __wrmsr(msr, low, high) - -#define native_wrmsrl(msr, val) \ - __wrmsr((msr), (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)) - struct ucode_patch { struct list_head plist; void *data; /* Intel uses only this one */ @@ -37,7 +23,13 @@ struct cpu_signature { struct device; -enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; +enum ucode_state { + UCODE_OK = 0, + UCODE_NEW, + UCODE_UPDATED, + UCODE_NFOUND, + UCODE_ERROR, +}; struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, @@ -54,7 +46,7 @@ struct microcode_ops { * are being called. * See also the "Synchronization" section in microcode_core.c. */ - int (*apply_microcode) (int cpu); + enum ucode_state (*apply_microcode) (int cpu); int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); }; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index c931b88..57e3785 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -24,11 +24,12 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, #endif /* !CONFIG_PARAVIRT */ #ifdef CONFIG_PERF_EVENTS -extern struct static_key rdpmc_always_available; + +DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); static inline void load_mm_cr4(struct mm_struct *mm) { - if (static_key_false(&rdpmc_always_available) || + if (static_branch_unlikely(&rdpmc_always_available_key) || atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else @@ -74,6 +75,7 @@ static inline void *ldt_slot_va(int slot) return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot); #else BUG(); + return (void *)fix_to_virt(FIX_HOLE); #endif } diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 25283f7..b90e796 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -6,90 +6,23 @@ #include <linux/atomic.h> #include <linux/nmi.h> #include <asm/io.h> -#include <asm/hyperv.h> +#include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> -/* - * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent - * is set by CPUID(HVCPUID_VERSION_FEATURES). - */ -enum hv_cpuid_function { - HVCPUID_VERSION_FEATURES = 0x00000001, - HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, - HVCPUID_INTERFACE = 0x40000001, - - /* - * The remaining functions depend on the value of - * HVCPUID_INTERFACE - */ - HVCPUID_VERSION = 0x40000002, - HVCPUID_FEATURES = 0x40000003, - HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, - HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, -}; - struct ms_hyperv_info { u32 features; u32 misc_features; u32 hints; + u32 nested_features; u32 max_vp_index; u32 max_lp_index; }; extern struct ms_hyperv_info ms_hyperv; -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor. - */ -union hv_x64_msr_hypercall_contents { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 guest_physical_address:52; - }; -}; - -/* - * TSC page layout. - */ - -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -}; - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 /* - * Generate the guest ID based on the guideline described above. + * Generate the guest ID. */ static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, @@ -173,6 +106,19 @@ void hv_remove_kexec_handler(void); void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); +/* + * Routines for stimer0 Direct Mode handling. + * On x86/x64, there are no percpu actions to take. + */ +void hv_stimer0_vector_handler(struct pt_regs *regs); +void hv_stimer0_callback_vector(void); +int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)); +void hv_remove_stimer0_irq(int irq); + +static inline void hv_enable_stimer0_percpu_irq(int irq) {} +static inline void hv_disable_stimer0_percpu_irq(int irq) {} + + #if IS_ENABLED(CONFIG_HYPERV) extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; @@ -215,14 +161,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) return hv_status; } -#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_REP_COMP_OFFSET 32 -#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -#define HV_HYPERCALL_REP_START_OFFSET 48 -#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) - /* Fast hypercall with 8 bytes of input and no output */ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) { @@ -294,6 +232,15 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, */ extern u32 *hv_vp_index; extern u32 hv_max_vp_index; +extern struct hv_vp_assist_page **hv_vp_assist_page; + +static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +{ + if (!hv_vp_assist_page) + return NULL; + + return hv_vp_assist_page[cpu]; +} /** * hv_cpu_number_to_vp_number() - Map CPU to VP. @@ -330,6 +277,10 @@ static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; +static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) +{ + return NULL; +} #endif /* CONFIG_HYPERV */ #ifdef CONFIG_HYPERV_TSCPAGE diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index c9084de..53d5b1b 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -353,7 +353,21 @@ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL +#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2) +#define MSR_F15H_PERF_CTL2 (MSR_F15H_PERF_CTL + 4) +#define MSR_F15H_PERF_CTL3 (MSR_F15H_PERF_CTL + 6) +#define MSR_F15H_PERF_CTL4 (MSR_F15H_PERF_CTL + 8) +#define MSR_F15H_PERF_CTL5 (MSR_F15H_PERF_CTL + 10) + #define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_PERF_CTR0 MSR_F15H_PERF_CTR +#define MSR_F15H_PERF_CTR1 (MSR_F15H_PERF_CTR + 2) +#define MSR_F15H_PERF_CTR2 (MSR_F15H_PERF_CTR + 4) +#define MSR_F15H_PERF_CTR3 (MSR_F15H_PERF_CTR + 6) +#define MSR_F15H_PERF_CTR4 (MSR_F15H_PERF_CTR + 8) +#define MSR_F15H_PERF_CTR5 (MSR_F15H_PERF_CTR + 10) + #define MSR_F15H_NB_PERF_CTL 0xc0010240 #define MSR_F15H_NB_PERF_CTR 0xc0010241 #define MSR_F15H_PTSC 0xc0010280 diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 30df295..04addd6 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -108,6 +108,20 @@ static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) : : "c" (msr), "a"(low), "d" (high) : "memory"); } +#define native_rdmsr(msr, val1, val2) \ +do { \ + u64 __val = __rdmsr((msr)); \ + (void)((val1) = (u32)__val); \ + (void)((val2) = (u32)(__val >> 32)); \ +} while (0) + +#define native_wrmsr(msr, low, high) \ + __wrmsr(msr, low, high) + +#define native_wrmsrl(msr, val) \ + __wrmsr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)) + static inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; @@ -218,9 +232,6 @@ static __always_inline unsigned long long rdtsc_ordered(void) return rdtsc(); } -/* Deprecated, keep it for a cycle for easier merging: */ -#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0) - static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 76b0585..f928ad9 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -8,6 +8,50 @@ #include <asm/cpufeatures.h> #include <asm/msr-index.h> +/* + * Fill the CPU return stack buffer. + * + * Each entry in the RSB, if used for a speculative 'ret', contains an + * infinite 'pause; lfence; jmp' loop to capture speculative execution. + * + * This is required in various cases for retpoline and IBRS-based + * mitigations for the Spectre variant 2 vulnerability. Sometimes to + * eliminate potentially bogus entries from the RSB, and sometimes + * purely to ensure that it doesn't get empty, which on some CPUs would + * allow predictions from other (unwanted!) sources to be used. + * + * We define a CPP macro such that it can be used from both .S files and + * inline assembly. It's possible to do a .macro and then include that + * from C via asm(".include <asm/nospec-branch.h>") but let's not go there. + */ + +#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ +#define RSB_FILL_LOOPS 16 /* To avoid underflow */ + +/* + * Google experimented with loop-unrolling and this turned out to be + * the optimal version — two calls, each with their own speculation + * trap should their return address end up getting used, in a loop. + */ +#define __FILL_RETURN_BUFFER(reg, nr, sp) \ + mov $(nr/2), reg; \ +771: \ + call 772f; \ +773: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 773b; \ +772: \ + call 774f; \ +775: /* speculation trap */ \ + pause; \ + lfence; \ + jmp 775b; \ +774: \ + dec reg; \ + jnz 771b; \ + add $(BITS_PER_LONG/8) * nr, sp; + #ifdef __ASSEMBLY__ /* @@ -24,6 +68,18 @@ .endm /* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +.macro ANNOTATE_RETPOLINE_SAFE + .Lannotate_\@: + .pushsection .discard.retpoline_safe + _ASM_PTR .Lannotate_\@ + .popsection +.endm + +/* * These are the bare retpoline primitives for indirect jmp and call. * Do not use these directly; they only exist to make the ALTERNATIVE * invocation below less ugly. @@ -59,9 +115,9 @@ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(jmp *\reg), \ + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD #else jmp *\reg #endif @@ -70,18 +126,25 @@ .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(call *\reg), \ + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD #else call *\reg #endif .endm -/* This clobbers the BX register */ -.macro FILL_RETURN_BUFFER nr:req ftr:req + /* + * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP + * monstrosity above, manually. + */ +.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req #ifdef CONFIG_RETPOLINE - ALTERNATIVE "", "call __clear_rsb", \ftr + ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE "jmp .Lskip_rsb_\@", \ + __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ + \ftr +.Lskip_rsb_\@: #endif .endm @@ -93,6 +156,12 @@ ".long 999b - .\n\t" \ ".popsection\n\t" +#define ANNOTATE_RETPOLINE_SAFE \ + "999:\n\t" \ + ".pushsection .discard.retpoline_safe\n\t" \ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + #if defined(CONFIG_X86_64) && defined(RETPOLINE) /* @@ -102,6 +171,7 @@ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE( \ + ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ "call __x86_indirect_thunk_%V[thunk_target]\n", \ X86_FEATURE_RETPOLINE) @@ -113,7 +183,10 @@ * otherwise we'll run out of registers. We don't care about CET * here, anyway. */ -# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ +# define CALL_NOSPEC \ + ALTERNATIVE( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ " jmp 904f;\n" \ " .align 16\n" \ "901: call 903f;\n" \ @@ -156,25 +229,90 @@ extern char __indirect_thunk_end[]; static inline void vmexit_fill_RSB(void) { #ifdef CONFIG_RETPOLINE - alternative_input("", - "call __fill_rsb", - X86_FEATURE_RETPOLINE, - ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory")); + unsigned long loops; + + asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE + ALTERNATIVE("jmp 910f", + __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)), + X86_FEATURE_RETPOLINE) + "910:" + : "=r" (loops), ASM_CALL_CONSTRAINT + : : "memory" ); #endif } +#define alternative_msr_write(_msr, _val, _feature) \ + asm volatile(ALTERNATIVE("", \ + "movl %[msr], %%ecx\n\t" \ + "movl %[val], %%eax\n\t" \ + "movl $0, %%edx\n\t" \ + "wrmsr", \ + _feature) \ + : : [msr] "i" (_msr), [val] "i" (_val) \ + : "eax", "ecx", "edx", "memory") + static inline void indirect_branch_prediction_barrier(void) { - asm volatile(ALTERNATIVE("", - "movl %[msr], %%ecx\n\t" - "movl %[val], %%eax\n\t" - "movl $0, %%edx\n\t" - "wrmsr", - X86_FEATURE_USE_IBPB) - : : [msr] "i" (MSR_IA32_PRED_CMD), - [val] "i" (PRED_CMD_IBPB) - : "eax", "ecx", "edx", "memory"); + alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, + X86_FEATURE_USE_IBPB); } +/* + * With retpoline, we must use IBRS to restrict branch prediction + * before calling into firmware. + * + * (Implemented as CPP macros due to header hell.) + */ +#define firmware_restrict_branch_speculation_start() \ +do { \ + preempt_disable(); \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ +} while (0) + +#define firmware_restrict_branch_speculation_end() \ +do { \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ +} while (0) + #endif /* __ASSEMBLY__ */ + +/* + * Below is used in the eBPF JIT compiler and emits the byte sequence + * for the following assembly: + * + * With retpolines configured: + * + * callq do_rop + * spec_trap: + * pause + * lfence + * jmp spec_trap + * do_rop: + * mov %rax,(%rsp) + * retq + * + * Without retpolines configured: + * + * jmp *%rax + */ +#ifdef CONFIG_RETPOLINE +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ + /* spec_trap: */ \ + EMIT2(0xF3, 0x90); /* pause */ \ + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ + /* do_rop: */ \ + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ + EMIT1(0xC3); /* retq */ +#else +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT2(0xFF, 0xE0); /* jmp *%rax */ +#endif + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index d652a38..939b1cf 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -11,6 +11,10 @@ extern unsigned long max_pfn; extern unsigned long phys_base; +extern unsigned long page_offset_base; +extern unsigned long vmalloc_base; +extern unsigned long vmemmap_base; + static inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; @@ -47,7 +51,7 @@ static inline void clear_page(void *page) clear_page_erms, X86_FEATURE_ERMS, "=D" (page), "0" (page) - : "memory", "rax", "rcx"); + : "cc", "memory", "rax", "rcx"); } void copy_page(void *to, void *from); diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e140731..2c5a966 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -37,26 +37,24 @@ * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's * what Xen requires. */ -#ifdef CONFIG_X86_5LEVEL -#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) -#else -#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) -#endif +#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL) +#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL) -#ifdef CONFIG_RANDOMIZE_MEMORY +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base #else -#define __PAGE_OFFSET __PAGE_OFFSET_BASE -#endif /* CONFIG_RANDOMIZE_MEMORY */ +#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4 +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#ifdef CONFIG_X86_5LEVEL + #define __PHYSICAL_MASK_SHIFT 52 -#define __VIRTUAL_MASK_SHIFT 56 + +#ifdef CONFIG_X86_5LEVEL +#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47) #else -#define __PHYSICAL_MASK_SHIFT 46 #define __VIRTUAL_MASK_SHIFT 47 #endif diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 554841f..9be2bf1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -7,6 +7,7 @@ #ifdef CONFIG_PARAVIRT #include <asm/pgtable_types.h> #include <asm/asm.h> +#include <asm/nospec-branch.h> #include <asm/paravirt_types.h> @@ -567,17 +568,22 @@ static inline p4dval_t p4d_val(p4d_t p4d) return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d); } -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) { - pgdval_t val = native_pgd_val(pgd); - - PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val); + PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); } -static inline void pgd_clear(pgd_t *pgdp) -{ - set_pgd(pgdp, __pgd(0)); -} +#define set_pgd(pgdp, pgdval) do { \ + if (pgtable_l5_enabled) \ + __set_pgd(pgdp, pgdval); \ + else \ + set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ +} while (0) + +#define pgd_clear(pgdp) do { \ + if (pgtable_l5_enabled) \ + set_pgd(pgdp, __pgd(0)); \ +} while (0) #endif /* CONFIG_PGTABLE_LEVELS == 5 */ @@ -879,23 +885,27 @@ extern void default_banner(void); #define INTERRUPT_RETURN \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret)) + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret);) #define DISABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #define ENABLE_INTERRUPTS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #ifdef CONFIG_X86_32 #define GET_CR0_INTO_EAX \ push %ecx; push %edx; \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_cpu_ops+PV_CPU_read_cr0); \ pop %edx; pop %ecx #else /* !CONFIG_X86_32 */ @@ -917,21 +927,25 @@ extern void default_banner(void); */ #define SWAPGS \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE, \ - call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs) \ + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs); \ ) #define GET_CR2_INTO_RAX \ - call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) + ANNOTATE_RETPOLINE_SAFE; \ + call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2); #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ - jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64)) + ANNOTATE_RETPOLINE_SAFE; \ + jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64);) #ifdef CONFIG_DEBUG_ENTRY #define SAVE_FLAGS(clobbers) \ PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \ PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \ + ANNOTATE_RETPOLINE_SAFE; \ call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \ PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);) #endif diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index f624f1f..180bc0b 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -43,6 +43,7 @@ #include <asm/desc_defs.h> #include <asm/kmap_types.h> #include <asm/pgtable_types.h> +#include <asm/nospec-branch.h> struct page; struct thread_struct; @@ -392,7 +393,9 @@ int paravirt_disable_iospace(void); * offset into the paravirt_patch_template structure, and can therefore be * freely converted back into a structure offset. */ -#define PARAVIRT_CALL "call *%c[paravirt_opptr];" +#define PARAVIRT_CALL \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%c[paravirt_opptr];" /* * These macros are intended to wrap calls through one of the paravirt diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index eb66fa9..959d618 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -151,6 +151,8 @@ extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, phys_addr_t addr); extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end); extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); +extern struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start, + int end, u64 addr); extern struct list_head pci_mmcfg_list; diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index ba3c523..a06b073 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, { bool oldbit; - asm volatile("bt "__percpu_arg(2)",%1" + asm volatile("btl "__percpu_arg(2)",%1" CC_SET(c) : CC_OUT(c) (oldbit) : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index aff42e1..263c142 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #if CONFIG_PGTABLE_LEVELS > 4 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { + if (!pgtable_l5_enabled) + return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } @@ -191,7 +193,8 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { - ___p4d_free_tlb(tlb, p4d); + if (pgtable_l5_enabled) + ___p4d_free_tlb(tlb, p4d); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 876b4c7..6a59a6d 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -44,5 +44,6 @@ typedef union { */ #define PTRS_PER_PTE 512 +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 63c2552..89d5c88 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags; #ifndef __PAGETABLE_P4D_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) -#define pgd_clear(pgd) native_pgd_clear(pgd) +#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0) #endif #ifndef set_p4d @@ -350,14 +350,14 @@ static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) { pmdval_t v = native_pmd_val(pmd); - return __pmd(v | set); + return native_make_pmd(v | set); } static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) { pmdval_t v = native_pmd_val(pmd); - return __pmd(v & ~clear); + return native_make_pmd(v & ~clear); } static inline pmd_t pmd_mkold(pmd_t pmd) @@ -409,14 +409,14 @@ static inline pud_t pud_set_flags(pud_t pud, pudval_t set) { pudval_t v = native_pud_val(pud); - return __pud(v | set); + return native_make_pud(v | set); } static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) { pudval_t v = native_pud_val(pud); - return __pud(v & ~clear); + return native_make_pud(v & ~clear); } static inline pud_t pud_mkold(pud_t pud) @@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address) #if CONFIG_PGTABLE_LEVELS > 4 static inline int pgd_present(pgd_t pgd) { + if (!pgtable_l5_enabled) + return 1; return pgd_flags(pgd) & _PAGE_PRESENT; } @@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) /* to find an entry in a page-table-directory. */ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { + if (!pgtable_l5_enabled) + return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); } @@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd) { unsigned long ignore_flags = _PAGE_USER; + if (!pgtable_l5_enabled) + return 0; + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; @@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd) static inline int pgd_none(pgd_t pgd) { + if (!pgtable_l5_enabled) + return 0; /* * There is no need to do a workaround for the KNL stray * A/D bit erratum here. PGDs only point to page tables diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e554667..88a056b 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -32,6 +32,9 @@ extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +void sync_initial_page_table(void); + +static inline int pgd_large(pgd_t pgd) { return 0; } /* * Define this if things work differently on an i386 and an i486: diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0777e18..e3225e8 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -15,6 +15,8 @@ # include <asm/pgtable-2level_types.h> #endif +#define pgtable_l5_enabled 0 + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81462e9..877bc27 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -28,6 +28,7 @@ extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt extern void paging_init(void); +static inline void sync_initial_page_table(void) { } #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ @@ -217,29 +218,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { -#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) - p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); -#else - *p4dp = p4d; -#endif + pgd_t pgd; + + if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + *p4dp = p4d; + return; + } + + pgd = native_make_pgd(native_p4d_val(p4d)); + pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd); + *p4dp = native_make_p4d(native_pgd_val(pgd)); } static inline void native_p4d_clear(p4d_t *p4d) { -#ifdef CONFIG_X86_5LEVEL native_set_p4d(p4d, native_make_p4d(0)); -#else - native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)}); -#endif } static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION *pgdp = pti_set_user_pgd(pgdp, pgd); -#else - *pgdp = pgd; -#endif } static inline void native_pgd_clear(pgd_t *pgd) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6b8f73d..d5c21a3 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -20,6 +20,18 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; +#ifdef CONFIG_X86_5LEVEL +extern unsigned int __pgtable_l5_enabled; +#ifndef pgtable_l5_enabled +#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57) +#endif +#else +#define pgtable_l5_enabled 0 +#endif + +extern unsigned int pgdir_shift; +extern unsigned int ptrs_per_p4d; + #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 @@ -29,24 +41,28 @@ typedef struct { pteval_t pte; } pte_t; /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 48 +#define PGDIR_SHIFT pgdir_shift #define PTRS_PER_PGD 512 /* * 4th level page in 5-level paging case */ -#define P4D_SHIFT 39 -#define PTRS_PER_P4D 512 -#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) -#define P4D_MASK (~(P4D_SIZE - 1)) +#define P4D_SHIFT 39 +#define MAX_PTRS_PER_P4D 512 +#define PTRS_PER_P4D ptrs_per_p4d +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) + +#define MAX_POSSIBLE_PHYSMEM_BITS 52 #else /* CONFIG_X86_5LEVEL */ /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 39 -#define PTRS_PER_PGD 512 +#define PGDIR_SHIFT 39 +#define PTRS_PER_PGD 512 +#define MAX_PTRS_PER_P4D 1 #endif /* CONFIG_X86_5LEVEL */ @@ -82,31 +98,33 @@ typedef struct { pteval_t pte; } pte_t; * range must not overlap with anything except the KASAN shadow area, which * is correct as KASAN disables KASLR. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM (1UL << MAX_PHYSMEM_BITS) -#ifdef CONFIG_X86_5LEVEL -# define VMALLOC_SIZE_TB _AC(12800, UL) -# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) -# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -# define LDT_PGD_ENTRY _AC(-112, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) -#else -# define VMALLOC_SIZE_TB _AC(32, UL) -# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) -# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -# define LDT_PGD_ENTRY _AC(-3, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) -#endif +#define LDT_PGD_ENTRY_L4 -3UL +#define LDT_PGD_ENTRY_L5 -112UL +#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) +#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) + +#define __VMALLOC_BASE_L4 0xffffc90000000000 +#define __VMALLOC_BASE_L5 0xffa0000000000000 + +#define VMALLOC_SIZE_TB_L4 32UL +#define VMALLOC_SIZE_TB_L5 12800UL + +#define __VMEMMAP_BASE_L4 0xffffea0000000000 +#define __VMEMMAP_BASE_L5 0xffd4000000000000 -#ifdef CONFIG_RANDOMIZE_MEMORY +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base +# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) # define VMEMMAP_START vmemmap_base #else -# define VMALLOC_START __VMALLOC_BASE -# define VMEMMAP_START __VMEMMAP_BASE -#endif /* CONFIG_RANDOMIZE_MEMORY */ +# define VMALLOC_START __VMALLOC_BASE_L4 +# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 +# define VMEMMAP_START __VMEMMAP_BASE_L4 +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) +#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 3696398..acfe755 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -174,7 +174,6 @@ enum page_cache_mode { #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -206,7 +205,6 @@ enum page_cache_mode { #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC) #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) @@ -323,6 +321,11 @@ static inline pudval_t native_pud_val(pud_t pud) #else #include <asm-generic/pgtable-nopud.h> +static inline pud_t native_make_pud(pudval_t val) +{ + return (pud_t) { .p4d.pgd = native_make_pgd(val) }; +} + static inline pudval_t native_pud_val(pud_t pud) { return native_pgd_val(pud.p4d.pgd); @@ -344,6 +347,11 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) #else #include <asm-generic/pgtable-nopmd.h> +static inline pmd_t native_make_pmd(pmdval_t val) +{ + return (pmd_t) { .pud.p4d.pgd = native_make_pgd(val) }; +} + static inline pmdval_t native_pmd_val(pmd_t pmd) { return native_pgd_val(pmd.pud.p4d.pgd); diff --git a/arch/x86/include/asm/platform_sst_audio.h b/arch/x86/include/asm/platform_sst_audio.h index 5973a2f..059823b 100644 --- a/arch/x86/include/asm/platform_sst_audio.h +++ b/arch/x86/include/asm/platform_sst_audio.h @@ -135,6 +135,7 @@ struct sst_platform_info { const struct sst_res_info *res_info; const struct sst_lib_dnld_info *lib_info; const char *platform; + bool streams_lost_on_suspend; }; int add_sst_platform_device(void); #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1bd9ed8..4fa4206 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -407,9 +407,19 @@ union irq_stack_union { DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible; DECLARE_INIT_PER_CPU(irq_stack_union); +static inline unsigned long cpu_kernelmode_gs_base(int cpu) +{ + return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu); +} + DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); extern asmlinkage void ignore_sysret(void); + +#if IS_ENABLED(CONFIG_KVM) +/* Save actual FS/GS selectors and bases to current->thread */ +void save_fsgs_for_kvm(void); +#endif #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR /* @@ -977,4 +987,5 @@ bool xen_set_default_idle(void); void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); +void microcode_check(void); #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 4e44250..4cf11d8 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -17,7 +17,7 @@ #define _REFCOUNT_EXCEPTION \ ".pushsection .text..refcount\n" \ "111:\tlea %[counter], %%" _ASM_CX "\n" \ - "112:\t" ASM_UD0 "\n" \ + "112:\t" ASM_UD2 "\n" \ ASM_UNREACHABLE \ ".popsection\n" \ "113:\n" \ @@ -67,13 +67,13 @@ static __always_inline __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r) { GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "er", i, "%0", e); + r->refs.counter, "er", i, "%0", e, "cx"); } static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r) { GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl", REFCOUNT_CHECK_LT_ZERO, - r->refs.counter, "%0", e); + r->refs.counter, "%0", e, "cx"); } static __always_inline __must_check diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fb3a6de..6847d85 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -53,12 +53,6 @@ # define NEED_MOVBE 0 #endif -#ifdef CONFIG_X86_5LEVEL -# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#else -# define NEED_LA57 0 -#endif - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -104,7 +98,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 (NEED_LA57) +#define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index f91c365..4914a3e 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -2,8 +2,7 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -#define __CLOBBERS_MEM "memory" -#define __CLOBBERS_MEM_CC_CX "memory", "cc", "cx" +#define __CLOBBERS_MEM(clb...) "memory", ## clb #if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) @@ -40,18 +39,19 @@ do { \ #endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ - __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM) + __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) -#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc) \ +#define GEN_UNARY_SUFFIXED_RMWcc(op, suffix, var, arg0, cc, clobbers...)\ __GEN_RMWcc(op " " arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM_CC_CX) + __CLOBBERS_MEM(clobbers)) #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0, var, cc, \ - __CLOBBERS_MEM, vcon (val)) + __CLOBBERS_MEM(), vcon (val)) -#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc) \ +#define GEN_BINARY_SUFFIXED_RMWcc(op, suffix, var, vcon, val, arg0, cc, \ + clobbers...) \ __GEN_RMWcc(op __BINARY_RMWcc_ARG arg0 "\n\t" suffix, var, cc, \ - __CLOBBERS_MEM_CC_CX, vcon (val)) + __CLOBBERS_MEM(clobbers), vcon (val)) #endif /* _ASM_X86_RMWcc */ diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index d6baf23..5c019d2 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; +extern char __entry_trampoline_start[], __entry_trampoline_end[]; #endif #endif /* _ASM_X86_SECTIONS_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a418976..f75bff8 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -177,16 +177,6 @@ static inline int wbinvd_on_all_cpus(void) extern unsigned disabled_cpus; #ifdef CONFIG_X86_LOCAL_APIC - -#ifndef CONFIG_X86_64 -static inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - -#endif - extern int hard_smp_processor_id(void); #else /* CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 4fc1e9d..4617a2b 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,13 +27,8 @@ # endif #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -# ifdef CONFIG_X86_5LEVEL -# define MAX_PHYSADDR_BITS 52 -# define MAX_PHYSMEM_BITS 52 -# else -# define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 46 -# endif +# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44) +# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46) #endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f737068..133d942 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -87,8 +87,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl); -extern unsigned int code_bytes; - /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0487ac0..93b462e 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -60,7 +60,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 intercept_dr; u32 intercept_exceptions; u64 intercept; - u8 reserved_1[42]; + u8 reserved_1[40]; + u16 pause_filter_thresh; u16 pause_filter_count; u64 iopm_base_pa; u64 msrpm_base_pa; diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 1c6a6cb..ff6c92e 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void) { } #endif - -extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs); -extern void x86_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - #endif /* _ASM_X86_SWIOTLB_H */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h deleted file mode 100644 index 82c34ee..0000000 --- a/arch/x86/include/asm/sys_ia32.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * sys_ia32.h - Linux ia32 syscall interfaces - * - * Copyright (c) 2008 Jaswinder Singh Rajput - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#ifndef _ASM_X86_SYS_IA32_H -#define _ASM_X86_SYS_IA32_H - -#ifdef CONFIG_COMPAT - -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <linux/types.h> -#include <linux/signal.h> -#include <asm/compat.h> -#include <asm/ia32.h> - -/* ia32/sys_ia32.c */ -asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long); -asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); - -asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long sys32_fstatat(unsigned int, const char __user *, - struct stat64 __user *, int); -struct mmap_arg_struct32; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); - -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); - -asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); -asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); - -long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); -long sys32_vm86_warning(void); - -asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); -asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, - unsigned, unsigned, int); -asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); -asmlinkage long sys32_fallocate(int, int, unsigned, - unsigned, unsigned, unsigned); - -/* ia32/ia32_signal.c */ -asmlinkage long sys32_sigreturn(void); -asmlinkage long sys32_rt_sigreturn(void); - -#endif /* CONFIG_COMPAT */ - -#endif /* _ASM_X86_SYS_IA32_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index bad25bb..ae6e05f 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -17,6 +17,7 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); @@ -34,7 +35,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *); #ifdef CONFIG_X86_32 /* kernel/signal.c */ -asmlinkage unsigned long sys_sigreturn(void); +asmlinkage long sys_sigreturn(void); /* kernel/vm86_32.c */ struct vm86_struct; diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index cf5d53c..2701d22 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -31,6 +31,7 @@ static inline cycles_t get_cycles(void) } extern struct system_counterval_t convert_art_to_tsc(u64 art); +extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); extern void tsc_early_delay_calibrate(void); extern void tsc_init(void); diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index ecb9dde..62c79e2 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -3833,7 +3833,7 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u { #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 8b67807..5db8b0b 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field { #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index fc2f082..ce8b4da 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -122,12 +122,24 @@ struct x86_init_pci { * @guest_late_init: guest late init * @x2apic_available: X2APIC detection * @init_mem_mapping: setup early mappings during init_mem_mapping() + * @init_after_bootmem: guest init after boot allocator is finished */ struct x86_hyper_init { void (*init_platform)(void); void (*guest_late_init)(void); bool (*x2apic_available)(void); void (*init_mem_mapping)(void); + void (*init_after_bootmem)(void); +}; + +/** + * struct x86_init_acpi - x86 ACPI init functions + * @get_root_pointer: get RSDP address + * @reduced_hw_early_init: hardware reduced platform early init + */ +struct x86_init_acpi { + u64 (*get_root_pointer)(void); + void (*reduced_hw_early_init)(void); }; /** @@ -144,6 +156,7 @@ struct x86_init_ops { struct x86_init_iommu iommu; struct x86_init_pci pci; struct x86_hyper_init hyper; + struct x86_init_acpi acpi; }; /** @@ -274,16 +287,16 @@ struct x86_msi_ops { void (*restore_msi_irqs)(struct pci_dev *dev); }; -struct x86_io_apic_ops { - unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*disable)(void); +struct x86_apic_ops { + unsigned int (*io_apic_read) (unsigned int apic, unsigned int reg); + void (*restore)(void); }; extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; extern struct x86_platform_ops x86_platform; extern struct x86_msi_ops x86_msi; -extern struct x86_io_apic_ops x86_io_apic_ops; +extern struct x86_apic_ops x86_apic_ops; extern void x86_early_init_platform_quirks(void); extern void x86_init_noop(void); diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f3a9604..c535c2f 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -354,8 +354,25 @@ struct kvm_xcrs { __u64 padding[16]; }; -/* definition of registers in kvm_run */ +#define KVM_SYNC_X86_REGS (1UL << 0) +#define KVM_SYNC_X86_SREGS (1UL << 1) +#define KVM_SYNC_X86_EVENTS (1UL << 2) + +#define KVM_SYNC_X86_VALID_FIELDS \ + (KVM_SYNC_X86_REGS| \ + KVM_SYNC_X86_SREGS| \ + KVM_SYNC_X86_EVENTS) + +/* kvm_sync_regs struct included by kvm_run struct */ struct kvm_sync_regs { + /* Members of this structure are potentially malicious. + * Care must be taken by code reading, esp. interpreting, + * data fields from them inside KVM to prevent TOCTOU and + * double-fetch types of vulnerabilities. + */ + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; }; #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 7a2ade4..4c851eb 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -3,15 +3,16 @@ #define _UAPI_ASM_X86_KVM_PARA_H #include <linux/types.h> -#include <asm/hyperv.h> /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It * should be used to determine that a VM is running under KVM. */ #define KVM_CPUID_SIGNATURE 0x40000000 -/* This CPUID returns a feature bitmap in eax. Before enabling a particular - * paravirtualization, the appropriate feature bit should be checked. +/* This CPUID returns two feature bitmaps in eax, edx. Before enabling + * a particular paravirtualization, the appropriate feature bit should + * be checked in eax. The performance hint feature bit should be checked + * in edx. */ #define KVM_CPUID_FEATURES 0x40000001 #define KVM_FEATURE_CLOCKSOURCE 0 @@ -26,6 +27,9 @@ #define KVM_FEATURE_PV_EOI 6 #define KVM_FEATURE_PV_UNHALT 7 #define KVM_FEATURE_PV_TLB_FLUSH 9 +#define KVM_FEATURE_ASYNC_PF_VMEXIT 10 + +#define KVM_HINTS_DEDICATED 0 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 91723461..955c2a2 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -5,31 +5,36 @@ #include <linux/types.h> #include <linux/ioctl.h> -/* Fields are zero when not available */ +/* + * Fields are zero when not available. Also, this struct is shared with + * userspace mcelog and thus must keep existing fields at current offsets. + * Only add new fields to the end of the structure + */ struct mce { - __u64 status; - __u64 misc; - __u64 addr; - __u64 mcgstatus; - __u64 ip; - __u64 tsc; /* cpu time stamp counter */ - __u64 time; /* wall time_t when error was detected */ - __u8 cpuvendor; /* cpu vendor as encoded in system.h */ - __u8 inject_flags; /* software inject flags */ - __u8 severity; + __u64 status; /* Bank's MCi_STATUS MSR */ + __u64 misc; /* Bank's MCi_MISC MSR */ + __u64 addr; /* Bank's MCi_ADDR MSR */ + __u64 mcgstatus; /* Machine Check Global Status MSR */ + __u64 ip; /* Instruction Pointer when the error happened */ + __u64 tsc; /* CPU time stamp counter */ + __u64 time; /* Wall time_t when error was detected */ + __u8 cpuvendor; /* Kernel's X86_VENDOR enum */ + __u8 inject_flags; /* Software inject flags */ + __u8 severity; /* Error severity */ __u8 pad; - __u32 cpuid; /* CPUID 1 EAX */ - __u8 cs; /* code segment */ - __u8 bank; /* machine check bank */ - __u8 cpu; /* cpu number; obsolete; use extcpu now */ - __u8 finished; /* entry is valid */ - __u32 extcpu; /* linux cpu number that detected the error */ - __u32 socketid; /* CPU socket ID */ - __u32 apicid; /* CPU initial apic ID */ - __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ - __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ - __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ - __u64 ppin; /* Protected Processor Inventory Number */ + __u32 cpuid; /* CPUID 1 EAX */ + __u8 cs; /* Code segment */ + __u8 bank; /* Machine check bank reporting the error */ + __u8 cpu; /* CPU number; obsoleted by extcpu */ + __u8 finished; /* Entry is valid */ + __u32 extcpu; /* Linux CPU number that detected the error */ + __u32 socketid; /* CPU socket ID */ + __u32 apicid; /* CPU initial APIC ID */ + __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ + __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ + __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + __u64 ppin; /* Protected Processor Inventory Number */ + __u32 microcode; /* Microcode revision */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 29786c8..02d6f5c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o obj-$(CONFIG_SYSFS) += ksysfs.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o +obj-y += alternative.o i8253.o hw_breakpoint.o obj-y += tsc.o tsc_msr.o io_delay.o rtc.o obj-y += pci-iommu_table.o obj-y += resource.o @@ -146,6 +146,6 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o - obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o + obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o endif diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2aa9209..7a37d93 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1376,17 +1376,21 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) * * We initialize the Hardware-reduced ACPI model here: */ +void __init acpi_generic_reduced_hw_init(void) +{ + /* + * Override x86_init functions and bypass legacy PIC in + * hardware reduced ACPI mode. + */ + x86_init.timers.timer_init = x86_init_noop; + x86_init.irqs.pre_vector_init = x86_init_noop; + legacy_pic = &null_legacy_pic; +} + static void __init acpi_reduced_hw_init(void) { - if (acpi_gbl_reduced_hardware) { - /* - * Override x86_init functions and bypass legacy pic - * in Hardware-reduced ACPI mode - */ - x86_init.timers.timer_init = x86_init_noop; - x86_init.irqs.pre_vector_init = x86_init_noop; - legacy_pic = &null_legacy_pic; - } + if (acpi_gbl_reduced_hardware) + x86_init.acpi.reduced_hw_early_init(); } /* diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index ecd486c..f299d8a 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -480,30 +480,21 @@ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, unsigned long attrs) { - dma_addr_t paddr; - unsigned long align_mask; - struct page *page; - - if (force_iommu && !(flag & GFP_DMA)) { - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - page = alloc_pages(flag | __GFP_ZERO, get_order(size)); - if (!page) - return NULL; - - align_mask = (1UL << get_order(size)) - 1; - paddr = dma_map_area(dev, page_to_phys(page), size, - DMA_BIDIRECTIONAL, align_mask); - - flush_gart(); - if (paddr != bad_dma_addr) { - *dma_addr = paddr; - return page_address(page); - } - __free_pages(page, get_order(size)); - } else - return dma_generic_alloc_coherent(dev, size, dma_addr, flag, - attrs); + void *vaddr; + + vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); + if (!vaddr || + !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) + return vaddr; + *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size, + DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1); + flush_gart(); + if (unlikely(*dma_addr == bad_dma_addr)) + goto out_free; + return vaddr; +out_free: + dma_direct_free(dev, size, vaddr, *dma_addr, attrs); return NULL; } @@ -513,7 +504,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -705,7 +696,7 @@ static const struct dma_map_ops gart_dma_ops = { .alloc = gart_alloc_coherent, .free = gart_free_coherent, .mapping_error = gart_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; static void gart_iommu_shutdown(void) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b203af0..2aabd4c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1408,22 +1408,69 @@ static void lapic_setup_esr(void) oldvalue, value); } +static void apic_pending_intr_clear(void) +{ + long long max_loops = cpu_khz ? cpu_khz : 1000000; + unsigned long long tsc = 0, ntsc; + unsigned int queued; + unsigned long value; + int i, j, acked = 0; + + if (boot_cpu_has(X86_FEATURE_TSC)) + tsc = rdtsc(); + /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + do { + queued = 0; + for (i = APIC_ISR_NR - 1; i >= 0; i--) + queued |= apic_read(APIC_IRR + i*0x10); + + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for_each_set_bit(j, &value, 32) { + ack_APIC_irq(); + acked++; + } + } + if (acked > 256) { + pr_err("LAPIC pending interrupts after %d EOI\n", acked); + break; + } + if (queued) { + if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { + ntsc = rdtsc(); + max_loops = (cpu_khz << 10) - (ntsc - tsc); + } else { + max_loops--; + } + } + } while (queued && max_loops > 0); + WARN_ON(max_loops <= 0); +} + /** * setup_local_APIC - setup the local APIC * * Used to setup local APIC while initializing BSP or bringing up APs. * Always called with preemption disabled. */ -void setup_local_APIC(void) +static void setup_local_APIC(void) { int cpu = smp_processor_id(); - unsigned int value, queued; - int i, j, acked = 0; - unsigned long long tsc = 0, ntsc; - long long max_loops = cpu_khz ? cpu_khz : 1000000; + unsigned int value; +#ifdef CONFIG_X86_32 + int logical_apicid, ldr_apicid; +#endif - if (boot_cpu_has(X86_FEATURE_TSC)) - tsc = rdtsc(); if (disable_apic) { disable_ioapic_support(); @@ -1460,11 +1507,11 @@ void setup_local_APIC(void) * initialized during get_smp_config(), make sure it matches the * actual value. */ - i = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - WARN_ON(i != BAD_APICID && i != logical_smp_processor_id()); + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid); /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = - logical_smp_processor_id(); + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; #endif /* @@ -1475,45 +1522,7 @@ void setup_local_APIC(void) value &= ~APIC_TPRI_MASK; apic_write(APIC_TASKPRI, value); - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - do { - queued = 0; - for (i = APIC_ISR_NR - 1; i >= 0; i--) - queued |= apic_read(APIC_IRR + i*0x10); - - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1<<j)) { - ack_APIC_irq(); - acked++; - } - } - } - if (acked > 256) { - printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n", - acked); - break; - } - if (queued) { - if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { - ntsc = rdtsc(); - max_loops = (cpu_khz << 10) - (ntsc - tsc); - } else - max_loops--; - } - } while (queued && max_loops > 0); - WARN_ON(max_loops <= 0); + apic_pending_intr_clear(); /* * Now that we are all set up, enable the APIC @@ -1570,7 +1579,7 @@ void setup_local_APIC(void) * TODO: set up through-local-APIC from through-I/O-APIC? --macro */ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!cpu && (pic_mode || !value)) { + if (!cpu && (pic_mode || !value || skip_ioapic_setup)) { value = APIC_DM_EXTINT; apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); } else { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8ad2e41..7553819 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -587,7 +587,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) mpc_ioapic_id(apic), pin); } -static void clear_IO_APIC (void) +void clear_IO_APIC (void) { int apic, pin; @@ -1410,7 +1410,7 @@ void __init enable_IO_APIC(void) clear_IO_APIC(); } -void native_disable_io_apic(void) +void native_restore_boot_irq_mode(void) { /* * If the i8259 is routed through an IOAPIC @@ -1438,20 +1438,12 @@ void native_disable_io_apic(void) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) +void restore_boot_irq_mode(void) { - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - if (!nr_legacy_irqs()) return; - x86_io_apic_ops.disable(); + x86_apic_ops.restore(); } #ifdef CONFIG_X86_32 @@ -1603,7 +1595,7 @@ static void __init delay_with_tsc(void) do { rep_nop(); now = rdtsc(); - } while ((now - start) < 40000000000UL / HZ && + } while ((now - start) < 40000000000ULL / HZ && time_before_eq(jiffies, end)); } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3cc471b..bb6f7a2 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -134,21 +134,40 @@ static void apic_update_vector(struct irq_data *irqd, unsigned int newvec, { struct apic_chip_data *apicd = apic_chip_data(irqd); struct irq_desc *desc = irq_data_to_desc(irqd); + bool managed = irqd_affinity_is_managed(irqd); lockdep_assert_held(&vector_lock); trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector, apicd->cpu); - /* Setup the vector move, if required */ - if (apicd->vector && cpu_online(apicd->cpu)) { + /* + * If there is no vector associated or if the associated vector is + * the shutdown vector, which is associated to make PCI/MSI + * shutdown mode work, then there is nothing to release. Clear out + * prev_vector for this and the offlined target case. + */ + apicd->prev_vector = 0; + if (!apicd->vector || apicd->vector == MANAGED_IRQ_SHUTDOWN_VECTOR) + goto setnew; + /* + * If the target CPU of the previous vector is online, then mark + * the vector as move in progress and store it for cleanup when the + * first interrupt on the new vector arrives. If the target CPU is + * offline then the regular release mechanism via the cleanup + * vector is not possible and the vector can be immediately freed + * in the underlying matrix allocator. + */ + if (cpu_online(apicd->cpu)) { apicd->move_in_progress = true; apicd->prev_vector = apicd->vector; apicd->prev_cpu = apicd->cpu; } else { - apicd->prev_vector = 0; + irq_matrix_free(vector_matrix, apicd->cpu, apicd->vector, + managed); } +setnew: apicd->vector = newvec; apicd->cpu = newcpu; BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec])); diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index f8d9d69..e2829bf 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -14,7 +14,7 @@ int x2apic_phys; static struct apic apic_x2apic_phys; -static int set_x2apic_phys_mode(char *arg) +static int __init set_x2apic_phys_mode(char *arg) { x2apic_phys = 1; return 0; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 570e8bb..a66229f 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f0e6456..12bc0a1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -716,7 +716,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c) static void init_amd_gh(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_64 +#ifdef CONFIG_MMCONF_FAM10H /* do this for boot cpu */ if (c == &boot_cpu_data) check_enable_amd_mmconf_dmi(); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d71c8b5..bfca937 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -300,6 +300,15 @@ retpoline_auto: setup_force_cpu_cap(X86_FEATURE_USE_IBPB); pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); } + + /* + * Retpoline means the kernel is safe because it has no indirect + * branches. But firmware isn't, so use IBRS to protect that. + */ + if (boot_cpu_has(X86_FEATURE_IBRS)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } } #undef pr_fmt @@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) return sprintf(buf, "Not affected\n"); - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", spectre_v2_module_string()); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 824aee0..4702fbd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -487,7 +487,7 @@ void load_percpu_segment(int cpu) loadsegment(fs, __KERNEL_PERCPU); #else __loadsegment_simple(gs, 0); - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); + wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); #endif load_stack_canary_segment(); } @@ -1398,6 +1398,7 @@ __setup("clearcpuid=", setup_clearcpuid); #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __aligned(PAGE_SIZE) __visible; +EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union); /* * The following percpu variables are hot. Align current_task to @@ -1749,3 +1750,33 @@ static int __init init_cpu_syscore(void) return 0; } core_initcall(init_cpu_syscore); + +/* + * The microcode loader calls this upon late microcode load to recheck features, + * only when microcode has been updated. Caller holds microcode_mutex and CPU + * hotplug lock. + */ +void microcode_check(void) +{ + struct cpuinfo_x86 info; + + perf_check_microcode(); + + /* Reload CPUID max function as it might've changed. */ + info.cpuid_level = cpuid_eax(0); + + /* + * Copy all capability leafs to pick up the synthetic ones so that + * memcmp() below doesn't fail on that. The ones coming from CPUID will + * get overwritten in get_cpu_cap(). + */ + memcpy(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability)); + + get_cpu_cap(&info); + + if (!memcmp(&info.x86_capability, &boot_cpu_data.x86_capability, sizeof(info.x86_capability))) + return; + + pr_warn("x86/CPU: CPU features have changed after loading microcode, but might not take effect.\n"); + pr_warn("x86/CPU: Please consider either early loading through initrd/built-in or a potential BIOS update.\n"); +} diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d19e903..b9693b8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -105,7 +105,7 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) /* * Early microcode releases for the Spectre v2 mitigation were broken. * Information taken from; - * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/microcode-update-guidance.pdf + * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf * - https://kb.vmware.com/s/article/52345 * - Microcode revisions observed in the wild * - Release note from 20180108 microcode release @@ -123,7 +123,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_FAM6_KABYLAKE_MOBILE, 0x09, 0x80 }, { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c }, - { INTEL_FAM6_SKYLAKE_DESKTOP, 0x03, 0xc2 }, { INTEL_FAM6_BROADWELL_CORE, 0x04, 0x28 }, { INTEL_FAM6_BROADWELL_GT3E, 0x01, 0x1b }, { INTEL_FAM6_BROADWELL_XEON_D, 0x02, 0x14 }, @@ -144,6 +143,13 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) { int i; + /* + * We know that the hypervisor lie to us on the microcode version so + * we may as well hope that it is running the correct version. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) + return false; + for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_model == spectre_bad_microcodes[i].model && c->x86_stepping == spectre_bad_microcodes[i].stepping) @@ -503,6 +509,90 @@ static void detect_vmx_virtcap(struct cpuinfo_x86 *c) } } +#define MSR_IA32_TME_ACTIVATE 0x982 + +/* Helpers to access TME_ACTIVATE MSR */ +#define TME_ACTIVATE_LOCKED(x) (x & 0x1) +#define TME_ACTIVATE_ENABLED(x) (x & 0x2) + +#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */ +#define TME_ACTIVATE_POLICY_AES_XTS_128 0 + +#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ + +#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */ +#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1 + +/* Values for mktme_status (SW only construct) */ +#define MKTME_ENABLED 0 +#define MKTME_DISABLED 1 +#define MKTME_UNINITIALIZED 2 +static int mktme_status = MKTME_UNINITIALIZED; + +static void detect_tme(struct cpuinfo_x86 *c) +{ + u64 tme_activate, tme_policy, tme_crypto_algs; + int keyid_bits = 0, nr_keyids = 0; + static u64 tme_activate_cpu0 = 0; + + rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); + + if (mktme_status != MKTME_UNINITIALIZED) { + if (tme_activate != tme_activate_cpu0) { + /* Broken BIOS? */ + pr_err_once("x86/tme: configuration is inconsistent between CPUs\n"); + pr_err_once("x86/tme: MKTME is not usable\n"); + mktme_status = MKTME_DISABLED; + + /* Proceed. We may need to exclude bits from x86_phys_bits. */ + } + } else { + tme_activate_cpu0 = tme_activate; + } + + if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { + pr_info_once("x86/tme: not enabled by BIOS\n"); + mktme_status = MKTME_DISABLED; + return; + } + + if (mktme_status != MKTME_UNINITIALIZED) + goto detect_keyid_bits; + + pr_info("x86/tme: enabled by BIOS\n"); + + tme_policy = TME_ACTIVATE_POLICY(tme_activate); + if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128) + pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy); + + tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate); + if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) { + pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n", + tme_crypto_algs); + mktme_status = MKTME_DISABLED; + } +detect_keyid_bits: + keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); + nr_keyids = (1UL << keyid_bits) - 1; + if (nr_keyids) { + pr_info_once("x86/mktme: enabled by BIOS\n"); + pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids); + } else { + pr_info_once("x86/mktme: disabled by BIOS\n"); + } + + if (mktme_status == MKTME_UNINITIALIZED) { + /* MKTME is usable */ + mktme_status = MKTME_ENABLED; + } + + /* + * KeyID bits effectively lower the number of physical address + * bits. Update cpuinfo_x86::x86_phys_bits accordingly. + */ + c->x86_phys_bits -= keyid_bits; +} + static void init_intel_energy_perf(struct cpuinfo_x86 *c) { u64 epb; @@ -673,6 +763,9 @@ static void init_intel(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_VMX)) detect_vmx_virtcap(c); + if (cpu_has(c, X86_FEATURE_TME)) + detect_tme(c); + init_intel_energy_perf(c); init_intel_misc_features(c); diff --git a/arch/x86/kernel/cpu/intel_pconfig.c b/arch/x86/kernel/cpu/intel_pconfig.c new file mode 100644 index 0000000..0771a90 --- /dev/null +++ b/arch/x86/kernel/cpu/intel_pconfig.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel PCONFIG instruction support. + * + * Copyright (C) 2017 Intel Corporation + * + * Author: + * Kirill A. Shutemov <kirill.shutemov@linux.intel.com> + */ + +#include <asm/cpufeature.h> +#include <asm/intel_pconfig.h> + +#define PCONFIG_CPUID 0x1b + +#define PCONFIG_CPUID_SUBLEAF_MASK ((1 << 12) - 1) + +/* Subleaf type (EAX) for PCONFIG CPUID leaf (0x1B) */ +enum { + PCONFIG_CPUID_SUBLEAF_INVALID = 0, + PCONFIG_CPUID_SUBLEAF_TARGETID = 1, +}; + +/* Bitmask of supported targets */ +static u64 targets_supported __read_mostly; + +int pconfig_target_supported(enum pconfig_target target) +{ + /* + * We would need to re-think the implementation once we get > 64 + * PCONFIG targets. Spec allows up to 2^32 targets. + */ + BUILD_BUG_ON(PCONFIG_TARGET_NR >= 64); + + if (WARN_ON_ONCE(target >= 64)) + return 0; + return targets_supported & (1ULL << target); +} + +static int __init intel_pconfig_init(void) +{ + int subleaf; + + if (!boot_cpu_has(X86_FEATURE_PCONFIG)) + return 0; + + /* + * Scan subleafs of PCONFIG CPUID leaf. + * + * Subleafs of the same type need not to be consecutive. + * + * Stop on the first invalid subleaf type. All subleafs after the first + * invalid are invalid too. + */ + for (subleaf = 0; subleaf < INT_MAX; subleaf++) { + struct cpuid_regs regs; + + cpuid_count(PCONFIG_CPUID, subleaf, + ®s.eax, ®s.ebx, ®s.ecx, ®s.edx); + + switch (regs.eax & PCONFIG_CPUID_SUBLEAF_MASK) { + case PCONFIG_CPUID_SUBLEAF_INVALID: + /* Stop on the first invalid subleaf */ + goto out; + case PCONFIG_CPUID_SUBLEAF_TARGETID: + /* Mark supported PCONFIG targets */ + if (regs.ebx < 64) + targets_supported |= (1ULL << regs.ebx); + if (regs.ecx < 64) + targets_supported |= (1ULL << regs.ecx); + if (regs.edx < 64) + targets_supported |= (1ULL << regs.edx); + break; + default: + /* Unknown CPUID.PCONFIG subleaf: ignore */ + break; + } + } +out: + return 0; +} +arch_initcall(intel_pconfig_init); diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index bdab7d2..fca759d 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1804,6 +1804,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, goto out_common_fail; } closid = ret; + ret = 0; rdtgrp->closid = closid; list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 231ad23..475cb4f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -491,7 +491,7 @@ static void do_inject(void) unsigned int cpu = i_mce.extcpu; u8 b = i_mce.bank; - rdtscll(i_mce.tsc); + i_mce.tsc = rdtsc_ordered(); if (i_mce.misc) i_mce.status |= MCI_STATUS_MISCV; diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index e956eb2..374d1aa 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -113,8 +113,6 @@ static inline void mce_register_injector_chain(struct notifier_block *nb) { } static inline void mce_unregister_injector_chain(struct notifier_block *nb) { } #endif -extern struct mca_config mca_cfg; - #ifndef CONFIG_X86_64 /* * On 32-bit systems it would be difficult to safely unmap a poison page @@ -130,4 +128,61 @@ static inline void mce_unmap_kpfn(unsigned long pfn) {} #define mce_unmap_kpfn mce_unmap_kpfn #endif +struct mca_config { + bool dont_log_ce; + bool cmci_disabled; + bool ignore_ce; + + __u64 lmce_disabled : 1, + disabled : 1, + ser : 1, + recovery : 1, + bios_cmci_threshold : 1, + __reserved : 59; + + u8 banks; + s8 bootlog; + int tolerant; + int monarch_timeout; + int panic_timeout; + u32 rip_msr; +}; + +extern struct mca_config mca_cfg; + +struct mce_vendor_flags { + /* + * Indicates that overflow conditions are not fatal, when set. + */ + __u64 overflow_recov : 1, + + /* + * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and + * Recovery. It indicates support for data poisoning in HW and deferred + * error interrupts. + */ + succor : 1, + + /* + * (AMD) SMCA: This bit indicates support for Scalable MCA which expands + * the register space for each MCA bank and also increases number of + * banks. Also, to accommodate the new banks and registers, the MCA + * register space is moved to a new MSR range. + */ + smca : 1, + + __reserved_0 : 61; +}; + +extern struct mce_vendor_flags mce_flags; + +struct mca_msr_regs { + u32 (*ctl) (int bank); + u32 (*status) (int bank); + u32 (*addr) (int bank); + u32 (*misc) (int bank); +}; + +extern struct mca_msr_regs msr_ops; + #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8ff94d1..42cf288 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -56,6 +56,9 @@ static DEFINE_MUTEX(mce_log_mutex); +/* sysfs synchronization */ +static DEFINE_MUTEX(mce_sysfs_mutex); + #define CREATE_TRACE_POINTS #include <trace/events/mce.h> @@ -130,6 +133,8 @@ void mce_setup(struct mce *m) if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) rdmsrl(MSR_PPIN, m->ppin); + + m->microcode = boot_cpu_data.microcode; } DEFINE_PER_CPU(struct mce, injectm); @@ -262,13 +267,15 @@ static void __print_mce(struct mce *m) */ pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, - cpu_data(m->extcpu).microcode); + m->microcode); } static void print_mce(struct mce *m) { __print_mce(m); - pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); + + if (m->cpuvendor != X86_VENDOR_AMD) + pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -1088,19 +1095,7 @@ static void mce_unmap_kpfn(unsigned long pfn) * a legal address. */ -/* - * Build time check to see if we have a spare virtual bit. Don't want - * to leave this until run time because most developers don't have a - * system that can exercise this code path. This will only become a - * problem if/when we move beyond 5-level page tables. - * - * Hard code "9" here because cpp doesn't grok ilog2(PTRS_PER_PGD) - */ -#if PGDIR_SHIFT + 9 < 63 decoy_addr = (pfn << PAGE_SHIFT) + (PAGE_OFFSET ^ BIT(63)); -#else -#error "no unused virtual bit available" -#endif if (set_memory_np(decoy_addr, 1)) pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn); @@ -1511,7 +1506,7 @@ static int __mcheck_cpu_cap_init(void) mca_cfg.rip_msr = MSR_IA32_MCG_EIP; if (cap & MCG_SER_P) - mca_cfg.ser = true; + mca_cfg.ser = 1; return 0; } @@ -1819,12 +1814,12 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) return; if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { - mca_cfg.disabled = true; + mca_cfg.disabled = 1; return; } if (mce_gen_pool_init()) { - mca_cfg.disabled = true; + mca_cfg.disabled = 1; pr_emerg("Couldn't allocate MCE records pool!\n"); return; } @@ -1902,11 +1897,11 @@ static int __init mcheck_enable(char *str) if (*str == '=') str++; if (!strcmp(str, "off")) - cfg->disabled = true; + cfg->disabled = 1; else if (!strcmp(str, "no_cmci")) cfg->cmci_disabled = true; else if (!strcmp(str, "no_lmce")) - cfg->lmce_disabled = true; + cfg->lmce_disabled = 1; else if (!strcmp(str, "dont_log_ce")) cfg->dont_log_ce = true; else if (!strcmp(str, "ignore_ce")) @@ -1914,9 +1909,9 @@ static int __init mcheck_enable(char *str) else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) cfg->bootlog = (str[0] == 'b'); else if (!strcmp(str, "bios_cmci_threshold")) - cfg->bios_cmci_threshold = true; + cfg->bios_cmci_threshold = 1; else if (!strcmp(str, "recovery")) - cfg->recovery = true; + cfg->recovery = 1; else if (isdigit(str[0])) { if (get_option(&str, &cfg->tolerant) == 2) get_option(&str, &(cfg->monarch_timeout)); @@ -2086,6 +2081,7 @@ static ssize_t set_ignore_ce(struct device *s, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; + mutex_lock(&mce_sysfs_mutex); if (mca_cfg.ignore_ce ^ !!new) { if (new) { /* disable ce features */ @@ -2098,6 +2094,8 @@ static ssize_t set_ignore_ce(struct device *s, on_each_cpu(mce_enable_ce, (void *)1, 1); } } + mutex_unlock(&mce_sysfs_mutex); + return size; } @@ -2110,6 +2108,7 @@ static ssize_t set_cmci_disabled(struct device *s, if (kstrtou64(buf, 0, &new) < 0) return -EINVAL; + mutex_lock(&mce_sysfs_mutex); if (mca_cfg.cmci_disabled ^ !!new) { if (new) { /* disable cmci */ @@ -2121,6 +2120,8 @@ static ssize_t set_cmci_disabled(struct device *s, on_each_cpu(mce_enable_ce, NULL, 1); } } + mutex_unlock(&mce_sysfs_mutex); + return size; } @@ -2128,8 +2129,19 @@ static ssize_t store_int_with_restart(struct device *s, struct device_attribute *attr, const char *buf, size_t size) { - ssize_t ret = device_store_int(s, attr, buf, size); + unsigned long old_check_interval = check_interval; + ssize_t ret = device_store_ulong(s, attr, buf, size); + + if (check_interval == old_check_interval) + return ret; + + if (check_interval < 1) + check_interval = 1; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); + return ret; } @@ -2333,6 +2345,12 @@ static __init int mcheck_init_device(void) { int err; + /* + * Check if we have a spare virtual bit. This will only become + * a problem if/when we move beyond 5-level page tables. + */ + MAYBE_BUILD_BUG_ON(__VIRTUAL_MASK_SHIFT >= 63); + if (!mce_available(&boot_cpu_data)) { err = -EIO; goto err_out; @@ -2381,7 +2399,7 @@ device_initcall_sync(mcheck_init_device); */ static int __init mcheck_disable(char *str) { - mca_cfg.disabled = true; + mca_cfg.disabled = 1; return 1; } __setup("nomce", mcheck_disable); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 0f32ad2..f7666ee 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -82,6 +82,7 @@ static struct smca_bank_name smca_names[] = { [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, [SMCA_DE] = { "decode_unit", "Decode Unit" }, + [SMCA_RESERVED] = { "reserved", "Reserved" }, [SMCA_EX] = { "execution_unit", "Execution Unit" }, [SMCA_FP] = { "floating_point", "Floating Point Unit" }, [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, @@ -110,14 +111,14 @@ const char *smca_get_long_name(enum smca_bank_types t) } EXPORT_SYMBOL_GPL(smca_get_long_name); -static enum smca_bank_types smca_get_bank_type(struct mce *m) +static enum smca_bank_types smca_get_bank_type(unsigned int bank) { struct smca_bank *b; - if (m->bank >= N_SMCA_BANK_TYPES) + if (bank >= MAX_NR_BANKS) return N_SMCA_BANK_TYPES; - b = &smca_banks[m->bank]; + b = &smca_banks[bank]; if (!b->hwid) return N_SMCA_BANK_TYPES; @@ -127,6 +128,9 @@ static enum smca_bank_types smca_get_bank_type(struct mce *m) static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ + /* Reserved type */ + { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 }, + /* ZN Core (HWID=0xB0) MCA types */ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, @@ -427,35 +431,58 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } +static u32 smca_get_block_address(unsigned int cpu, unsigned int bank, + unsigned int block) +{ + u32 low, high; + u32 addr = 0; + + if (smca_get_bank_type(bank) == SMCA_RESERVED) + return addr; + + if (!block) + return MSR_AMD64_SMCA_MCx_MISC(bank); + + /* + * For SMCA enabled processors, BLKPTR field of the first MISC register + * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). + */ + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + return addr; + + if (!(low & MCI_CONFIG_MCAX)) + return addr; + + if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && + (low & MASK_BLKPTR_LO)) + return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + + return addr; +} + static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block) { u32 addr = 0, offset = 0; - if (mce_flags.smca) { - if (!block) { - addr = MSR_AMD64_SMCA_MCx_MISC(bank); - } else { - /* - * For SMCA enabled processors, BLKPTR field of the - * first MISC register (MCx_MISC0) indicates presence of - * additional MISC register set (MISC1-4). - */ - u32 low, high; + if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) + return addr; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - return addr; + /* Get address from already initialized block. */ + if (per_cpu(threshold_banks, cpu)) { + struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank]; - if (!(low & MCI_CONFIG_MCAX)) - return addr; + if (bankp && bankp->blocks) { + struct threshold_block *blockp = &bankp->blocks[block]; - if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && - (low & MASK_BLKPTR_LO)) - addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + if (blockp) + return blockp->address; } - return addr; } + if (mce_flags.smca) + return smca_get_block_address(cpu, bank, block); + /* Fall back to method we used for older processors: */ switch (block) { case 0: @@ -760,7 +787,7 @@ bool amd_mce_is_memory_error(struct mce *m) u8 xec = (m->status >> 16) & 0x1f; if (mce_flags.smca) - return smca_get_bank_type(m) == SMCA_UMC && xec == 0x0; + return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0; return m->bank == 4 && xec == 0x8; } @@ -1063,7 +1090,7 @@ static struct kobj_type threshold_ktype = { static const char *get_name(unsigned int bank, struct threshold_block *b) { - unsigned int bank_type; + enum smca_bank_types bank_type; if (!mce_flags.smca) { if (b && bank == 4) @@ -1072,11 +1099,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return th_names[bank]; } - if (!smca_banks[bank].hwid) + bank_type = smca_get_bank_type(bank); + if (bank_type >= N_SMCA_BANK_TYPES) return NULL; - bank_type = smca_banks[bank].hwid->bank_type; - if (b && bank_type == SMCA_UMC) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) return smca_umc_block_names[b->block]; diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 330b846..0624957 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -58,7 +58,7 @@ static u8 amd_ucode_patch[PATCH_MAX_SIZE]; /* * Microcode patch container file is prepended to the initrd in cpio - * format. See Documentation/x86/early-microcode.txt + * format. See Documentation/x86/microcode.txt */ static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; @@ -339,7 +339,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) return -EINVAL; ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size); - if (ret != UCODE_OK) + if (ret > UCODE_UPDATED) return -EINVAL; return 0; @@ -498,7 +498,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -static int apply_microcode_amd(int cpu) +static enum ucode_state apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; @@ -512,7 +512,7 @@ static int apply_microcode_amd(int cpu) p = find_patch(cpu); if (!p) - return 0; + return UCODE_NFOUND; mc_amd = p->data; uci->mc = p->data; @@ -523,13 +523,13 @@ static int apply_microcode_amd(int cpu) if (rev >= mc_amd->hdr.patch_id) { c->microcode = rev; uci->cpu_sig.rev = rev; - return 0; + return UCODE_OK; } if (__apply_microcode_amd(mc_amd)) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); - return -1; + return UCODE_ERROR; } pr_info("CPU%d: new patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -537,7 +537,7 @@ static int apply_microcode_amd(int cpu) uci->cpu_sig.rev = mc_amd->hdr.patch_id; c->microcode = mc_amd->hdr.patch_id; - return 0; + return UCODE_UPDATED; } static int install_equiv_cpu_table(const u8 *buf) @@ -683,27 +683,35 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data, static enum ucode_state load_microcode_amd(bool save, u8 family, const u8 *data, size_t size) { + struct ucode_patch *p; enum ucode_state ret; /* free old equiv table */ free_equiv_cpu_table(); ret = __load_microcode_amd(family, data, size); - - if (ret != UCODE_OK) + if (ret != UCODE_OK) { cleanup(); + return ret; + } -#ifdef CONFIG_X86_32 - /* save BSP's matching patch for early load */ - if (save) { - struct ucode_patch *p = find_patch(0); - if (p) { - memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); - memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), - PATCH_MAX_SIZE)); - } + p = find_patch(0); + if (!p) { + return ret; + } else { + if (boot_cpu_data.microcode == p->patch_id) + return ret; + + ret = UCODE_NEW; } -#endif + + /* save BSP's matching patch for early load */ + if (!save) + return ret; + + memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); + memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), PATCH_MAX_SIZE)); + return ret; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 319dd65..10c4fc2 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -22,13 +22,16 @@ #define pr_fmt(fmt) "microcode: " fmt #include <linux/platform_device.h> +#include <linux/stop_machine.h> #include <linux/syscore_ops.h> #include <linux/miscdevice.h> #include <linux/capability.h> #include <linux/firmware.h> #include <linux/kernel.h> +#include <linux/delay.h> #include <linux/mutex.h> #include <linux/cpu.h> +#include <linux/nmi.h> #include <linux/fs.h> #include <linux/mm.h> @@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache); */ static DEFINE_MUTEX(microcode_mutex); +/* + * Serialize late loading so that CPUs get updated one-by-one. + */ +static DEFINE_SPINLOCK(update_lock); + struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; struct cpu_info_ctx { @@ -373,26 +381,23 @@ static int collect_cpu_info(int cpu) return ret; } -struct apply_microcode_ctx { - int err; -}; - static void apply_microcode_local(void *arg) { - struct apply_microcode_ctx *ctx = arg; + enum ucode_state *err = arg; - ctx->err = microcode_ops->apply_microcode(smp_processor_id()); + *err = microcode_ops->apply_microcode(smp_processor_id()); } static int apply_microcode_on_target(int cpu) { - struct apply_microcode_ctx ctx = { .err = 0 }; + enum ucode_state err; int ret; - ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); - if (!ret) - ret = ctx.err; - + ret = smp_call_function_single(cpu, apply_microcode_local, &err, 1); + if (!ret) { + if (err == UCODE_ERROR) + ret = 1; + } return ret; } @@ -489,31 +494,124 @@ static void __exit microcode_dev_exit(void) /* fake device for request_firmware */ static struct platform_device *microcode_pdev; -static int reload_for_cpu(int cpu) +/* + * Late loading dance. Why the heavy-handed stomp_machine effort? + * + * - HT siblings must be idle and not execute other code while the other sibling + * is loading microcode in order to avoid any negative interactions caused by + * the loading. + * + * - In addition, microcode update on the cores must be serialized until this + * requirement can be relaxed in the future. Right now, this is conservative + * and good. + */ +#define SPINUNIT 100 /* 100 nsec */ + +static int check_online_cpus(void) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; - int err = 0; + if (num_online_cpus() == num_present_cpus()) + return 0; - if (!uci->valid) - return err; + pr_err("Not all CPUs online, aborting microcode update.\n"); - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; + return -EINVAL; +} + +static atomic_t late_cpus_in; +static atomic_t late_cpus_out; + +static int __wait_for_cpus(atomic_t *t, long long timeout) +{ + int all_cpus = num_online_cpus(); + + atomic_inc(t); + + while (atomic_read(t) < all_cpus) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + all_cpus - atomic_read(t)); + return 1; + } + + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + return 0; +} + +/* + * Returns: + * < 0 - on error + * 0 - no update done + * 1 - microcode was updated + */ +static int __reload_late(void *info) +{ + int cpu = smp_processor_id(); + enum ucode_state err; + int ret = 0; + + /* + * Wait for all CPUs to arrive. A load will not be attempted unless all + * CPUs show up. + * */ + if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) + return -1; + + spin_lock(&update_lock); + apply_microcode_local(&err); + spin_unlock(&update_lock); + + if (err > UCODE_NFOUND) { + pr_warn("Error reloading microcode on CPU %d\n", cpu); + return -1; + /* siblings return UCODE_OK because their engine got updated already */ + } else if (err == UCODE_UPDATED || err == UCODE_OK) { + ret = 1; + } else { + return ret; + } + + /* + * Increase the wait timeout to a safe value here since we're + * serializing the microcode update and that could take a while on a + * large number of CPUs. And that is fine as the *actual* timeout will + * be determined by the last CPU finished updating and thus cut short. + */ + if (__wait_for_cpus(&late_cpus_out, NSEC_PER_SEC * num_online_cpus())) + panic("Timeout during microcode update!\n"); + + return ret; +} + +/* + * Reload microcode late on all CPUs. Wait for a sec until they + * all gather together. + */ +static int microcode_reload_late(void) +{ + int ret; + + atomic_set(&late_cpus_in, 0); + atomic_set(&late_cpus_out, 0); + + ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); + if (ret > 0) + microcode_check(); + + return ret; } static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { + enum ucode_state tmp_ret = UCODE_OK; + int bsp = boot_cpu_data.cpu_index; unsigned long val; - int cpu; - ssize_t ret = 0, tmp_ret; + ssize_t ret = 0; ret = kstrtoul(buf, 0, &val); if (ret) @@ -522,23 +620,24 @@ static ssize_t reload_store(struct device *dev, if (val != 1) return size; + tmp_ret = microcode_ops->request_microcode_fw(bsp, µcode_pdev->dev, true); + if (tmp_ret != UCODE_NEW) + return size; + get_online_cpus(); - mutex_lock(µcode_mutex); - for_each_online_cpu(cpu) { - tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) - pr_warn("Error reloading microcode on CPU %d\n", cpu); - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; - } - if (!ret) - perf_check_microcode(); + ret = check_online_cpus(); + if (ret) + goto put; + + mutex_lock(µcode_mutex); + ret = microcode_reload_late(); mutex_unlock(µcode_mutex); + +put: put_online_cpus(); - if (!ret) + if (ret >= 0) ret = size; return ret; @@ -606,10 +705,8 @@ static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, - refresh_fw); - - if (ustate == UCODE_OK) { + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, refresh_fw); + if (ustate == UCODE_NEW) { pr_debug("CPU%d updated upon init\n", cpu); apply_microcode_on_target(cpu); } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index a15db2b..32b8e57 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -589,6 +589,23 @@ static int apply_microcode_early(struct ucode_cpu_info *uci, bool early) if (!mc) return 0; + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + return UCODE_OK; + } + + /* + * Writeback and invalidate caches before updating microcode to avoid + * internal issues depending on what the microcode is updating. + */ + native_wbinvd(); + /* write microcode via MSR 0x79 */ native_wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -772,27 +789,44 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static int apply_microcode_intel(int cpu) +static enum ucode_state apply_microcode_intel(int cpu) { + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_intel *mc; - struct ucode_cpu_info *uci; - struct cpuinfo_x86 *c; static int prev_rev; u32 rev; /* We should bind the task to the CPU */ if (WARN_ON(raw_smp_processor_id() != cpu)) - return -1; + return UCODE_ERROR; - uci = ucode_cpu_info + cpu; - mc = uci->mc; + /* Look for a newer patch in our cache: */ + mc = find_patch(uci); if (!mc) { - /* Look for a newer patch in our cache: */ - mc = find_patch(uci); + mc = uci->mc; if (!mc) - return 0; + return UCODE_NFOUND; } + /* + * Save us the MSR write below - which is a particular expensive + * operation - when the other hyperthread has updated the microcode + * already. + */ + rev = intel_get_microcode_revision(); + if (rev >= mc->hdr.rev) { + uci->cpu_sig.rev = rev; + c->microcode = rev; + return UCODE_OK; + } + + /* + * Writeback and invalidate caches before updating microcode to avoid + * internal issues depending on what the microcode is updating. + */ + native_wbinvd(); + /* write microcode via MSR 0x79 */ wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)mc->bits); @@ -801,7 +835,7 @@ static int apply_microcode_intel(int cpu) if (rev != mc->hdr.rev) { pr_err("CPU%d update to revision 0x%x failed\n", cpu, mc->hdr.rev); - return -1; + return UCODE_ERROR; } if (rev != prev_rev) { @@ -813,12 +847,10 @@ static int apply_microcode_intel(int cpu) prev_rev = rev; } - c = &cpu_data(cpu); - uci->cpu_sig.rev = rev; c->microcode = rev; - return 0; + return UCODE_UPDATED; } static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, @@ -830,6 +862,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, unsigned int leftover = size; unsigned int curr_mc_size = 0, new_mc_size = 0; unsigned int csig, cpf; + enum ucode_state ret = UCODE_OK; while (leftover) { struct microcode_header_intel mc_header; @@ -871,6 +904,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, new_mc = mc; new_mc_size = mc_size; mc = NULL; /* trigger new vmalloc */ + ret = UCODE_NEW; } ucode_ptr += mc_size; @@ -900,7 +934,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); - return UCODE_OK; + return ret; } static int get_ucode_fw(void *to, const void *from, size_t n) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9340f41..031082c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -22,7 +22,7 @@ #include <linux/kexec.h> #include <asm/processor.h> #include <asm/hypervisor.h> -#include <asm/hyperv.h> +#include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> #include <asm/desc.h> #include <asm/irq_regs.h> @@ -37,6 +37,7 @@ EXPORT_SYMBOL_GPL(ms_hyperv); #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); +static void (*hv_stimer0_handler)(void); static void (*hv_kexec_handler)(void); static void (*hv_crash_handler)(struct pt_regs *regs); @@ -69,6 +70,41 @@ void hv_remove_vmbus_irq(void) EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq); EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); +/* + * Routines to do per-architecture handling of stimer0 + * interrupts when in Direct Mode + */ + +__visible void __irq_entry hv_stimer0_vector_handler(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + entering_irq(); + inc_irq_stat(hyperv_stimer0_count); + if (hv_stimer0_handler) + hv_stimer0_handler(); + ack_APIC_irq(); + + exiting_irq(); + set_irq_regs(old_regs); +} + +int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void)) +{ + *vector = HYPERV_STIMER0_VECTOR; + *irq = 0; /* Unused on x86/x64 */ + hv_stimer0_handler = handler; + return 0; +} +EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq); + +void hv_remove_stimer0_irq(int irq) +{ + /* We have no way to deallocate the interrupt gate */ + hv_stimer0_handler = NULL; +} +EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq); + void hv_setup_kexec_handler(void (*handler)(void)) { hv_kexec_handler = handler; @@ -180,8 +216,8 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); - ms_hyperv.max_vp_index = cpuid_eax(HVCPUID_IMPLEMENTATION_LIMITS); - ms_hyperv.max_lp_index = cpuid_ebx(HVCPUID_IMPLEMENTATION_LIMITS); + ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS); + ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS); pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n", ms_hyperv.max_vp_index, ms_hyperv.max_lp_index); @@ -189,11 +225,12 @@ static void __init ms_hyperv_init_platform(void) /* * Extract host information. */ - if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) { - hv_host_info_eax = cpuid_eax(HVCPUID_VERSION); - hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION); - hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION); - hv_host_info_edx = cpuid_edx(HVCPUID_VERSION); + if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >= + HYPERV_CPUID_VERSION) { + hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION); + hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION); + hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION); + hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION); pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n", hv_host_info_eax, hv_host_info_ebx >> 16, @@ -207,6 +244,11 @@ static void __init ms_hyperv_init_platform(void) x86_platform.calibrate_cpu = hv_get_tsc_khz; } + if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) { + ms_hyperv.nested_features = + cpuid_eax(HYPERV_CPUID_NESTED_FEATURES); + } + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { @@ -257,6 +299,10 @@ static void __init ms_hyperv_init_platform(void) alloc_intr_gate(HYPERV_REENLIGHTENMENT_VECTOR, hyperv_reenlightenment_vector); + /* Setup the IDT for stimer0 */ + if (ms_hyperv.misc_features & HV_X64_STIMER_DIRECT_MODE_AVAILABLE) + alloc_intr_gate(HYPERV_STIMER0_VECTOR, + hv_stimer0_callback_vector); #endif } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 0931a10..1d300f9 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -40,6 +40,7 @@ #include <linux/notifier.h> #include <linux/uaccess.h> #include <linux/gfp.h> +#include <linux/completion.h> #include <asm/processor.h> #include <asm/msr.h> @@ -47,19 +48,27 @@ static struct class *cpuid_class; static enum cpuhp_state cpuhp_cpuid_state; +struct cpuid_regs_done { + struct cpuid_regs regs; + struct completion done; +}; + static void cpuid_smp_cpuid(void *cmd_block) { - struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block; + struct cpuid_regs_done *cmd = cmd_block; + + cpuid_count(cmd->regs.eax, cmd->regs.ecx, + &cmd->regs.eax, &cmd->regs.ebx, + &cmd->regs.ecx, &cmd->regs.edx); - cpuid_count(cmd->eax, cmd->ecx, - &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx); + complete(&cmd->done); } static ssize_t cpuid_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { char __user *tmp = buf; - struct cpuid_regs cmd; + struct cpuid_regs_done cmd; int cpu = iminor(file_inode(file)); u64 pos = *ppos; ssize_t bytes = 0; @@ -68,19 +77,28 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, if (count % 16) return -EINVAL; /* Invalid chunk size */ + init_completion(&cmd.done); for (; count; count -= 16) { - cmd.eax = pos; - cmd.ecx = pos >> 32; - err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); + call_single_data_t csd = { + .func = cpuid_smp_cpuid, + .info = &cmd, + }; + + cmd.regs.eax = pos; + cmd.regs.ecx = pos >> 32; + + err = smp_call_function_single_async(cpu, &csd); if (err) break; - if (copy_to_user(tmp, &cmd, 16)) { + wait_for_completion(&cmd.done); + if (copy_to_user(tmp, &cmd.regs, 16)) { err = -EFAULT; break; } tmp += 16; bytes += 16; *ppos = ++pos; + reinit_completion(&cmd.done); } return bytes ? bytes : err; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 10e74d4..1f66804 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -199,9 +199,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_X86_IO_APIC /* Prevent crash_kexec() from deadlocking on ioapic_lock. */ ioapic_zap_locks(); - disable_IO_APIC(); + clear_IO_APIC(); #endif lapic_shutdown(); + restore_boot_irq_mode(); #ifdef CONFIG_HPET_TIMER hpet_disable(); #endif diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 25de5f6..f39f3a0 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -11,6 +11,7 @@ #include <linux/of_address.h> #include <linux/of_platform.h> #include <linux/of_irq.h> +#include <linux/libfdt.h> #include <linux/slab.h> #include <linux/pci.h> #include <linux/of_pci.h> @@ -130,34 +131,52 @@ static void __init dtb_setup_hpet(void) #endif } +#ifdef CONFIG_X86_LOCAL_APIC + +static void __init dtb_cpu_setup(void) +{ + struct device_node *dn; + u32 apic_id, version; + int ret; + + version = GET_APIC_VERSION(apic_read(APIC_LVR)); + for_each_node_by_type(dn, "cpu") { + ret = of_property_read_u32(dn, "reg", &apic_id); + if (ret < 0) { + pr_warn("%pOF: missing local APIC ID\n", dn); + continue; + } + generic_processor_info(apic_id, version); + } +} + static void __init dtb_lapic_setup(void) { -#ifdef CONFIG_X86_LOCAL_APIC struct device_node *dn; struct resource r; + unsigned long lapic_addr = APIC_DEFAULT_PHYS_BASE; int ret; dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-lapic"); - if (!dn) - return; - - ret = of_address_to_resource(dn, 0, &r); - if (WARN_ON(ret)) - return; + if (dn) { + ret = of_address_to_resource(dn, 0, &r); + if (WARN_ON(ret)) + return; + lapic_addr = r.start; + } /* Did the boot loader setup the local APIC ? */ if (!boot_cpu_has(X86_FEATURE_APIC)) { - if (apic_force_enable(r.start)) + if (apic_force_enable(lapic_addr)) return; } smp_found_config = 1; pic_mode = 1; - register_lapic_address(r.start); - generic_processor_info(boot_cpu_physical_apicid, - GET_APIC_VERSION(apic_read(APIC_LVR))); -#endif + register_lapic_address(lapic_addr); } +#endif /* CONFIG_X86_LOCAL_APIC */ + #ifdef CONFIG_X86_IO_APIC static unsigned int ioapic_id; @@ -194,19 +213,22 @@ static struct of_ioapic_type of_ioapic_type[] = static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { - struct of_phandle_args *irq_data = (void *)arg; + struct irq_fwspec *fwspec = (struct irq_fwspec *)arg; struct of_ioapic_type *it; struct irq_alloc_info tmp; + int type_index; - if (WARN_ON(irq_data->args_count < 2)) + if (WARN_ON(fwspec->param_count < 2)) return -EINVAL; - if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type)) + + type_index = fwspec->param[1]; + if (type_index >= ARRAY_SIZE(of_ioapic_type)) return -EINVAL; - it = &of_ioapic_type[irq_data->args[1]]; + it = &of_ioapic_type[type_index]; ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity); tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain)); - tmp.ioapic_pin = irq_data->args[0]; + tmp.ioapic_pin = fwspec->param[0]; return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp); } @@ -255,11 +277,14 @@ static void __init dtb_ioapic_setup(void) {} static void __init dtb_apic_setup(void) { +#ifdef CONFIG_X86_LOCAL_APIC dtb_lapic_setup(); + dtb_cpu_setup(); +#endif dtb_ioapic_setup(); } -#ifdef CONFIG_OF_FLATTREE +#ifdef CONFIG_OF_EARLY_FLATTREE static void __init x86_flattree_get_config(void) { u32 size, map_len; @@ -270,14 +295,15 @@ static void __init x86_flattree_get_config(void) map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128); - initial_boot_params = dt = early_memremap(initial_dtb, map_len); - size = of_get_flat_dt_size(); + dt = early_memremap(initial_dtb, map_len); + size = fdt_totalsize(dt); if (map_len < size) { early_memunmap(dt, map_len); - initial_boot_params = dt = early_memremap(initial_dtb, size); + dt = early_memremap(initial_dtb, size); map_len = size; } + early_init_dt_verify(dt); unflatten_and_copy_device_tree(); early_memunmap(dt, map_len); } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index a2d8a39..18fa9d7 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -24,7 +24,7 @@ int panic_on_unrecovered_nmi; int panic_on_io_nmi; -unsigned int code_bytes = 64; +static unsigned int code_bytes = 64; static int die_counter; bool in_task_stack(unsigned long *stack, struct task_struct *task, @@ -375,3 +375,50 @@ static int __init code_bytes_setup(char *s) return 1; } __setup("code_bytes=", code_bytes_setup); + +void show_regs(struct pt_regs *regs) +{ + bool all = true; + int i; + + show_regs_print_info(KERN_DEFAULT); + + if (IS_ENABLED(CONFIG_X86_32)) + all = !user_mode(regs); + + __show_regs(regs, all); + + /* + * When in-kernel, we also print out the stack and code at the + * time of the fault.. + */ + if (!user_mode(regs)) { + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + unsigned char c; + u8 *ip; + + show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); + + printk(KERN_DEFAULT "Code: "); + + ip = (u8 *)regs->ip - code_prologue; + if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { + /* try starting at IP */ + ip = (u8 *)regs->ip; + code_len = code_len - code_prologue + 1; + } + for (i = 0; i < code_len; i++, ip++) { + if (ip < (u8 *)PAGE_OFFSET || + probe_kernel_address(ip, c)) { + pr_cont(" Bad RIP value."); + break; + } + if (ip == (u8 *)regs->ip) + pr_cont("<%02x> ", c); + else + pr_cont("%02x ", c); + } + } + pr_cont("\n"); +} diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 04170f6..cd53f30 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -127,45 +127,3 @@ unknown: info->type = STACK_TYPE_UNKNOWN; return -EINVAL; } - -void show_regs(struct pt_regs *regs) -{ - int i; - - show_regs_print_info(KERN_EMERG); - __show_regs(regs, !user_mode(regs)); - - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (!user_mode(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - - show_trace_log_lvl(current, regs, NULL, KERN_EMERG); - - pr_emerg("Code:"); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - pr_cont(" Bad EIP value."); - break; - } - if (ip == (u8 *)regs->ip) - pr_cont(" <%02x>", c); - else - pr_cont(" %02x", c); - } - } - pr_cont("\n"); -} diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 563e28d..5cdb9e8 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -149,45 +149,3 @@ unknown: info->type = STACK_TYPE_UNKNOWN; return -EINVAL; } - -void show_regs(struct pt_regs *regs) -{ - int i; - - show_regs_print_info(KERN_DEFAULT); - __show_regs(regs, 1); - - /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. - */ - if (!user_mode(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - - show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); - - printk(KERN_DEFAULT "Code: "); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - pr_cont(" Bad RIP value."); - break; - } - if (ip == (u8 *)regs->ip) - pr_cont("<%02x> ", c); - else - pr_cont("%02x ", c); - } - } - pr_cont("\n"); -} diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 71c11ad..6a2cb14 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -924,6 +924,24 @@ static int __init parse_memmap_one(char *p) } else if (*p == '!') { start_at = memparse(p+1, &p); e820__range_add(start_at, mem_size, E820_TYPE_PRAM); + } else if (*p == '%') { + enum e820_type from = 0, to = 0; + + start_at = memparse(p + 1, &p); + if (*p == '-') + from = simple_strtoull(p + 1, &p, 0); + if (*p == '+') + to = simple_strtoull(p + 1, &p, 0); + if (*p != '\0') + return -EINVAL; + if (from && to) + e820__range_update(start_at, mem_size, from, to); + else if (to) + e820__range_add(start_at, mem_size, to); + else if (from) + e820__range_remove(start_at, mem_size, from, 1); + else + e820__range_remove(start_at, mem_size, 0, 0); } else { e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1); } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7ba5d81..0c855de 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -32,6 +32,11 @@ #include <asm/microcode.h> #include <asm/kasan.h> +#ifdef CONFIG_X86_5LEVEL +#undef pgtable_l5_enabled +#define pgtable_l5_enabled __pgtable_l5_enabled +#endif + /* * Manage page tables very early on. */ @@ -39,6 +44,24 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); +#ifdef CONFIG_X86_5LEVEL +unsigned int __pgtable_l5_enabled __ro_after_init; +EXPORT_SYMBOL(__pgtable_l5_enabled); +unsigned int pgdir_shift __ro_after_init = 39; +EXPORT_SYMBOL(pgdir_shift); +unsigned int ptrs_per_p4d __ro_after_init = 1; +EXPORT_SYMBOL(ptrs_per_p4d); +#endif + +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT +unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; +EXPORT_SYMBOL(page_offset_base); +unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4; +EXPORT_SYMBOL(vmalloc_base); +unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; +EXPORT_SYMBOL(vmemmap_base); +#endif + #define __head __section(.head.text) static void __head *fixup_pointer(void *ptr, unsigned long physaddr) @@ -46,6 +69,41 @@ static void __head *fixup_pointer(void *ptr, unsigned long physaddr) return ptr - (void *)_text + (void *)physaddr; } +static unsigned long __head *fixup_long(void *ptr, unsigned long physaddr) +{ + return fixup_pointer(ptr, physaddr); +} + +#ifdef CONFIG_X86_5LEVEL +static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) +{ + return fixup_pointer(ptr, physaddr); +} + +static bool __head check_la57_support(unsigned long physaddr) +{ + if (native_cpuid_eax(0) < 7) + return false; + + if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + return false; + + *fixup_int(&pgtable_l5_enabled, physaddr) = 1; + *fixup_int(&pgdir_shift, physaddr) = 48; + *fixup_int(&ptrs_per_p4d, physaddr) = 512; + *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; + *fixup_long(&vmalloc_base, physaddr) = __VMALLOC_BASE_L5; + *fixup_long(&vmemmap_base, physaddr) = __VMEMMAP_BASE_L5; + + return true; +} +#else +static bool __head check_la57_support(unsigned long physaddr) +{ + return false; +} +#endif + unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { @@ -55,9 +113,12 @@ unsigned long __head __startup_64(unsigned long physaddr, p4dval_t *p4d; pudval_t *pud; pmdval_t *pmd, pmd_entry; + bool la57; int i; unsigned int *next_pgt_ptr; + la57 = check_la57_support(physaddr); + /* Is the address too large? */ if (physaddr >> MAX_PHYSMEM_BITS) for (;;); @@ -81,9 +142,14 @@ unsigned long __head __startup_64(unsigned long physaddr, /* Fixup the physical addresses in the page table */ pgd = fixup_pointer(&early_top_pgt, physaddr); - pgd[pgd_index(__START_KERNEL_map)] += load_delta; - - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + p = pgd + pgd_index(__START_KERNEL_map); + if (la57) + *p = (unsigned long)level4_kernel_pgt; + else + *p = (unsigned long)level3_kernel_pgt; + *p += _PAGE_TABLE_NOENC - __START_KERNEL_map + load_delta; + + if (la57) { p4d = fixup_pointer(&level4_kernel_pgt, physaddr); p4d[511] += load_delta; } @@ -108,7 +174,7 @@ unsigned long __head __startup_64(unsigned long physaddr, pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + if (la57) { p4d = fixup_pointer(early_dynamic_pgts[next_early_pgt++], physaddr); i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; @@ -154,8 +220,7 @@ unsigned long __head __startup_64(unsigned long physaddr, * Fixup phys_base - remove the memory encryption mask to obtain * the true physical address. */ - p = fixup_pointer(&phys_base, physaddr); - *p += load_delta - sme_get_me_mask(); + *fixup_long(&phys_base, physaddr) += load_delta - sme_get_me_mask(); /* Encrypt the kernel and related (if SME is active) */ sme_encrypt_kernel(bp); @@ -206,7 +271,7 @@ again: * critical -- __PAGE_OFFSET would point us back into the dynamic * range and we might end up looping forever... */ - if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + if (!pgtable_l5_enabled) p4d_p = pgd_p; else if (pgd) p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); @@ -322,7 +387,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0); BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0); BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); - BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == + MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04a625f..48385c1 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -23,6 +23,7 @@ #include <asm/nops.h> #include "../entry/calling.h" #include <asm/export.h> +#include <asm/nospec-branch.h> #ifdef CONFIG_PARAVIRT #include <asm/asm-offsets.h> @@ -38,12 +39,12 @@ * */ +#define l4_index(x) (((x) >> 39) & 511) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) -PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) -PGD_START_KERNEL = pgd_index(__START_KERNEL_map) -#endif +L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) +L4_START_KERNEL = l4_index(__START_KERNEL_map) + L3_START_KERNEL = pud_index(__START_KERNEL_map) .text @@ -124,7 +125,10 @@ ENTRY(secondary_startup_64) /* Enable PAE mode, PGE and LA57 */ movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx #ifdef CONFIG_X86_5LEVEL + testl $1, __pgtable_l5_enabled(%rip) + jz 1f orl $X86_CR4_LA57, %ecx +1: #endif movq %rcx, %cr4 @@ -134,6 +138,7 @@ ENTRY(secondary_startup_64) /* Ensure I am executing from virtual addresses */ movq $1f, %rax + ANNOTATE_RETPOLINE_SAFE jmp *%rax 1: UNWIND_HINT_EMPTY @@ -372,12 +377,7 @@ GLOBAL(name) __INITDATA NEXT_PGD_PAGE(early_top_pgt) - .fill 511,8,0 -#ifdef CONFIG_X86_5LEVEL - .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC -#else - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC -#endif + .fill 512,8,0 .fill PTI_USER_PGD_FILL,8,0 NEXT_PAGE(early_dynamic_pgts) @@ -388,9 +388,9 @@ NEXT_PAGE(early_dynamic_pgts) #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) NEXT_PGD_PAGE(init_top_pgt) .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .org init_top_pgt + PGD_PAGE_OFFSET*8, 0 + .org init_top_pgt + L4_PAGE_OFFSET*8, 0 .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC - .org init_top_pgt + PGD_START_KERNEL*8, 0 + .org init_top_pgt + L4_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .fill PTI_USER_PGD_FILL,8,0 diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 56d99be..2c3a1b4 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -140,6 +140,9 @@ static const __initconst struct idt_data apic_idts[] = { # ifdef CONFIG_IRQ_WORK INTG(IRQ_WORK_VECTOR, irq_work_interrupt), # endif +#ifdef CONFIG_X86_UV + INTG(UV_BAU_MESSAGE, uv_bau_message_intr1), +#endif INTG(SPURIOUS_APIC_VECTOR, spurious_interrupt), INTG(ERROR_APIC_VECTOR, error_interrupt), #endif @@ -160,7 +163,6 @@ static const __initconst struct idt_data early_pf_idts[] = { */ static const __initconst struct idt_data dbg_idts[] = { INTG(X86_TRAP_DB, debug), - INTG(X86_TRAP_BP, int3), }; #endif @@ -183,7 +185,6 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), - SISTG(X86_TRAP_BP, int3, DEBUG_STACK), ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), #ifdef CONFIG_X86_MCE ISTG(X86_TRAP_MC, &machine_check, MCE_STACK), diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 2f72330..0fe1c87 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -23,7 +23,7 @@ /* * this changes the io permissions bitmap in the current task. */ -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) { struct thread_struct *t = ¤t->thread; struct tss_struct *tss; @@ -96,6 +96,11 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) return 0; } +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) +{ + return ksys_ioperm(from, num, turn_on); +} + /* * sys_iopl has to be used when you want to access the IO ports * beyond the 0x3ff range: to get the full 65536 ports bitmapped diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 45fb4d2..328d027 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -150,6 +150,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) irq_stats(j)->irq_hv_reenlightenment_count); seq_puts(p, " Hyper-V reenlightenment interrupts\n"); } + if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { + seq_printf(p, "%*s: ", prec, "HVS"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", + irq_stats(j)->hyperv_stimer0_count); + seq_puts(p, " Hyper-V stimer0 interrupts\n"); + } #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index a539410..772196c 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -61,9 +61,14 @@ void __init init_ISA_irqs(void) struct irq_chip *chip = legacy_pic->chip; int i; -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + /* + * Try to set up the through-local-APIC virtual wire mode earlier. + * + * On some 32-bit UP machines, whose APIC has been disabled by BIOS + * and then got re-enabled by "lapic", it hangs at boot time without this. + */ init_bsp_APIC(); -#endif + legacy_pic->init(0); for (i = 0; i < nr_legacy_irqs(); i++) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index b68fd89..fa183a1 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -124,6 +124,14 @@ static int __init jailhouse_pci_arch_init(void) if (pcibios_last_bus < 0) pcibios_last_bus = 0xff; +#ifdef CONFIG_PCI_MMCONFIG + if (setup_data.pci_mmconfig_base) { + pci_mmconfig_add(0, 0, pcibios_last_bus, + setup_data.pci_mmconfig_base); + pci_mmcfg_arch_init(); + } +#endif + return 0; } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index bd36f3c..0715f82 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1168,10 +1168,18 @@ NOKPROBE_SYMBOL(longjmp_break_handler); bool arch_within_kprobe_blacklist(unsigned long addr) { + bool is_in_entry_trampoline_section = false; + +#ifdef CONFIG_X86_64 + is_in_entry_trampoline_section = + (addr >= (unsigned long)__entry_trampoline_start && + addr < (unsigned long)__entry_trampoline_end); +#endif return (addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end) || (addr >= (unsigned long)__entry_text_start && - addr < (unsigned long)__entry_text_end); + addr < (unsigned long)__entry_text_end) || + is_in_entry_trampoline_section; } int __init arch_init_kprobes(void) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4e37d1a..7867417 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -49,7 +49,7 @@ static int kvmapf = 1; -static int parse_no_kvmapf(char *arg) +static int __init parse_no_kvmapf(char *arg) { kvmapf = 0; return 0; @@ -58,7 +58,7 @@ static int parse_no_kvmapf(char *arg) early_param("no-kvmapf", parse_no_kvmapf); static int steal_acc = 1; -static int parse_no_stealacc(char *arg) +static int __init parse_no_stealacc(char *arg) { steal_acc = 0; return 0; @@ -67,7 +67,7 @@ static int parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); static int kvmclock_vsyscall = 1; -static int parse_no_kvmclock_vsyscall(char *arg) +static int __init parse_no_kvmclock_vsyscall(char *arg) { kvmclock_vsyscall = 0; return 0; @@ -341,10 +341,10 @@ static void kvm_guest_cpu_init(void) #endif pa |= KVM_ASYNC_PF_ENABLED; - /* Async page fault support for L1 hypervisor is optional */ - if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN, - (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0) - wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) + pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; + + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); printk(KERN_INFO"KVM setup async PF for cpu %d\n", smp_processor_id()); @@ -454,6 +454,13 @@ static void __init sev_map_percpu_data(void) } #ifdef CONFIG_SMP +static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) +{ + native_smp_prepare_cpus(max_cpus); + if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + static_branch_disable(&virt_spin_lock_key); +} + static void __init kvm_smp_prepare_boot_cpu(void) { /* @@ -545,7 +552,9 @@ static void __init kvm_guest_init(void) pv_time_ops.steal_clock = kvm_steal_clock; } - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -555,6 +564,7 @@ static void __init kvm_guest_init(void) kvm_setup_vsyscall_timeinfo(); #ifdef CONFIG_SMP + smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online", kvm_cpu_online, kvm_cpu_down_prepare) < 0) @@ -604,6 +614,11 @@ unsigned int kvm_arch_para_features(void) return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES); } +unsigned int kvm_arch_para_hints(void) +{ + return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); +} + static uint32_t __init kvm_detect(void) { return kvm_cpuid_base(); @@ -633,7 +648,9 @@ static __init int kvm_setup_pv_tlb_flush(void) { int cpu; - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH)) { + if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); @@ -728,6 +745,9 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; + if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + return; + __pv_init_lock_hash(); pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index edfede7..60cdec6 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -195,11 +195,11 @@ void machine_kexec(struct kimage *image) /* * We need to put APICs in legacy mode so that we can * get timer interrupts in second kernel. kexec/kdump - * paths already have calls to disable_IO_APIC() in - * one form or other. kexec jump path also need - * one. + * paths already have calls to restore_boot_irq_mode() + * in one form or other. kexec jump path also need one. */ - disable_IO_APIC(); + clear_IO_APIC(); + restore_boot_irq_mode(); #endif } diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 1f790cf..93bd4fb 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -293,11 +293,11 @@ void machine_kexec(struct kimage *image) /* * We need to put APICs in legacy mode so that we can * get timer interrupts in second kernel. kexec/kdump - * paths already have calls to disable_IO_APIC() in - * one form or other. kexec jump path also need - * one. + * paths already have calls to restore_boot_irq_mode() + * in one form or other. kexec jump path also need one. */ - disable_IO_APIC(); + clear_IO_APIC(); + restore_boot_irq_mode(); #endif } @@ -350,6 +350,7 @@ void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_NUMBER(phys_base); VMCOREINFO_SYMBOL(init_top_pgt); + VMCOREINFO_NUMBER(pgtable_l5_enabled); #ifdef CONFIG_NUMA VMCOREINFO_SYMBOL(node_data); @@ -542,6 +543,7 @@ int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: value -= (u64)address; *(u32 *)location = value; break; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index da0c160..f58336a 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -191,6 +191,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, goto overflow; break; case R_X86_64_PC32: + case R_X86_64_PLT32: if (*(u32 *)loc != 0) goto invalid_relocation; val -= (u64)loc; diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 35c461f..bbfc8b1 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -33,6 +33,7 @@ #include <linux/string.h> #include <linux/crash_dump.h> #include <linux/dma-mapping.h> +#include <linux/dma-direct.h> #include <linux/bitmap.h> #include <linux/pci_ids.h> #include <linux/pci.h> @@ -445,8 +446,6 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, npages = size >> PAGE_SHIFT; order = get_order(size); - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - /* alloc enough pages (and possibly more) */ ret = (void *)__get_free_pages(flag, order); if (!ret) @@ -493,7 +492,7 @@ static const struct dma_map_ops calgary_dma_ops = { .map_page = calgary_map_page, .unmap_page = calgary_unmap_page, .mapping_error = calgary_mapping_error, - .dma_supported = x86_dma_supported, + .dma_supported = dma_direct_supported, }; static inline void __iomem * busno_to_bbar(unsigned char num) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index df7ab02..77625b6 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -6,7 +6,6 @@ #include <linux/bootmem.h> #include <linux/gfp.h> #include <linux/pci.h> -#include <linux/kmemleak.h> #include <asm/proto.h> #include <asm/dma.h> @@ -18,7 +17,7 @@ static int forbid_dac __read_mostly; -const struct dma_map_ops *dma_ops = &nommu_dma_ops; +const struct dma_map_ops *dma_ops = &dma_direct_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -76,70 +75,12 @@ void __init pci_iommu_alloc(void) } } } -void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) -{ - unsigned long dma_mask; - struct page *page; - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - dma_addr_t addr; - - dma_mask = dma_alloc_coherent_mask(dev, flag); - -again: - page = NULL; - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(flag)) { - page = dma_alloc_from_contiguous(dev, count, get_order(size), - flag); - if (page) { - addr = phys_to_dma(dev, page_to_phys(page)); - if (addr + size > dma_mask) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - } - /* fallback */ - if (!page) - page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); - if (!page) - return NULL; - - addr = phys_to_dma(dev, page_to_phys(page)); - if (addr + size > dma_mask) { - __free_pages(page, get_order(size)); - - if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { - flag = (flag & ~GFP_DMA32) | GFP_DMA; - goto again; - } - - return NULL; - } - memset(page_address(page), 0, size); - *dma_addr = addr; - return page_address(page); -} - -void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct page *page = virt_to_page(vaddr); - - if (!dma_release_from_contiguous(dev, page, count)) - free_pages((unsigned long)vaddr, get_order(size)); -} bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) { if (!*dev) *dev = &x86_dma_fallback_dev; - *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp); - if (!is_device_dma_capable(*dev)) return false; return true; @@ -245,16 +186,6 @@ int arch_dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(arch_dma_supported); -int x86_dma_supported(struct device *dev, u64 mask) -{ - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_BIT_MASK(24)) - return 0; - return 1; -} - static int __init pci_iommu_init(void) { struct iommu_table_entry *p; diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 618285e..ac7ea3a 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) return NOMMU_MAPPING_ERROR; - flush_write_buffers(); return bus; } @@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return 0; s->dma_length = s->length; } - flush_write_buffers(); return nents; } -static void nommu_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - - -static void nommu_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr) { return dma_addr == NOMMU_MAPPING_ERROR; @@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = { .free = dma_generic_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, - .sync_single_for_device = nommu_sync_single_for_device, - .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, .mapping_error = nommu_mapping_error, .dma_supported = x86_dma_supported, diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 0ee0f8f3..6615836 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -17,52 +17,6 @@ int swiotlb __read_mostly; -void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs) -{ - void *vaddr; - - /* - * Don't print a warning when the first allocation attempt fails. - * swiotlb_alloc_coherent() will print a warning when the DMA - * memory allocation ultimately failed. - */ - flags |= __GFP_NOWARN; - - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags, - attrs); - if (vaddr) - return vaddr; - - return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); -} - -void x86_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs) -{ - if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr))) - swiotlb_free_coherent(dev, size, vaddr, dma_addr); - else - dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); -} - -static const struct dma_map_ops x86_swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc = x86_swiotlb_alloc_coherent, - .free = x86_swiotlb_free_coherent, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = NULL, -}; - /* * pci_swiotlb_detect_override - set swiotlb to 1 if necessary * @@ -112,7 +66,7 @@ void __init pci_swiotlb_init(void) { if (swiotlb) { swiotlb_init(0); - dma_ops = &x86_swiotlb_dma_ops; + dma_ops = &swiotlb_dma_ops; } } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9eb448c..4b100fe 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -205,6 +205,20 @@ static __always_inline void save_fsgs(struct task_struct *task) save_base_legacy(task, task->thread.gsindex, GS); } +#if IS_ENABLED(CONFIG_KVM) +/* + * While a process is running,current->thread.fsbase and current->thread.gsbase + * may not match the corresponding CPU registers (see save_base_legacy()). KVM + * wants an efficient way to save and restore FSBASE and GSBASE. + * When FSGSBASE extensions are enabled, this will have to use RD{FS,GS}BASE. + */ +void save_fsgs_for_kvm(void) +{ + save_fsgs(current); +} +EXPORT_SYMBOL_GPL(save_fsgs_for_kvm); +#endif + static __always_inline void loadseg(enum which_selector which, unsigned short sel) { diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2126b9d..725624b 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -666,7 +666,7 @@ void native_machine_shutdown(void) * Even without the erratum, it still makes sense to quiet IO APIC * before disabling Local APIC. */ - disable_IO_APIC(); + clear_IO_APIC(); #endif #ifdef CONFIG_SMP @@ -680,6 +680,7 @@ void native_machine_shutdown(void) #endif lapic_shutdown(); + restore_boot_irq_mode(); #ifdef CONFIG_HPET_TIMER hpet_disable(); diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 69ac9cb..f7b82ed 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -41,11 +41,11 @@ EXPORT_SYMBOL(rtc_lock); */ int mach_set_rtc_mmss(const struct timespec *now) { - unsigned long nowtime = now->tv_sec; + unsigned long long nowtime = now->tv_sec; struct rtc_time tm; int retval = 0; - rtc_time_to_tm(nowtime, &tm); + rtc_time64_to_tm(nowtime, &tm); if (!rtc_valid_tm(&tm)) { retval = mc146818_set_time(&tm); if (retval) @@ -53,7 +53,7 @@ int mach_set_rtc_mmss(const struct timespec *now) __func__, retval); } else { printk(KERN_ERR - "%s: Invalid RTC value: write of %lx to RTC failed\n", + "%s: Invalid RTC value: write of %llx to RTC failed\n", __func__, nowtime); retval = -EINVAL; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 1ae67e9..6285697 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -189,9 +189,7 @@ struct ist_info ist_info; #endif #else -struct cpuinfo_x86 boot_cpu_data __read_mostly = { - .x86_phys_bits = MAX_PHYSMEM_BITS, -}; +struct cpuinfo_x86 boot_cpu_data __read_mostly; EXPORT_SYMBOL(boot_cpu_data); #endif @@ -851,6 +849,7 @@ void __init setup_arch(char **cmdline_p) __flush_tlb_all(); #else printk(KERN_INFO "Command line: %s\n", boot_command_line); + boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS; #endif /* @@ -1204,20 +1203,13 @@ void __init setup_arch(char **cmdline_p) kasan_init(); -#ifdef CONFIG_X86_32 - /* sync back kernel address range */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. + * Sync back kernel address range. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); tboot_probe(); diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 497aa76..ea554f8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -287,24 +287,15 @@ void __init setup_per_cpu_areas(void) /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); -#ifdef CONFIG_X86_32 /* * Sync back kernel address range again. We already did this in * setup_arch(), but percpu data also needs to be available in * the smpboot asm. We can't reliably pick up percpu mappings * using vmalloc_fault(), because exception dispatch needs * percpu data. + * + * FIXME: Can the later sync in setup_cpu_entry_areas() replace + * this call? */ - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); - - /* - * sync back low identity map too. It is used for example - * in the 32-bit EFI stub. - */ - clone_pgd_range(initial_page_table, - swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); -#endif + sync_initial_page_table(); } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4cdc0b2..da270b9 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -25,6 +25,7 @@ #include <linux/user-return-notifier.h> #include <linux/uprobes.h> #include <linux/context_tracking.h> +#include <linux/syscalls.h> #include <asm/processor.h> #include <asm/ucontext.h> @@ -601,7 +602,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -asmlinkage unsigned long sys_sigreturn(void) +SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = current_pt_regs(); struct sigframe __user *frame; @@ -633,7 +634,7 @@ badframe: } #endif /* CONFIG_X86_32 */ -asmlinkage long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index ac057f9..df92605 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -26,8 +26,8 @@ static inline void signal_compat_build_tests(void) * new fields are handled in copy_siginfo_to_user32()! */ BUILD_BUG_ON(NSIGILL != 11); - BUILD_BUG_ON(NSIGFPE != 13); - BUILD_BUG_ON(NSIGSEGV != 4); + BUILD_BUG_ON(NSIGFPE != 14); + BUILD_BUG_ON(NSIGSEGV != 7); BUILD_BUG_ON(NSIGBUS != 5); BUILD_BUG_ON(NSIGTRAP != 4); BUILD_BUG_ON(NSIGCHLD != 6); @@ -43,6 +43,13 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int)); #define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name)) + BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0); + BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4); + BUILD_BUG_ON(offsetof(siginfo_t, si_code) != 8); + + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_signo) != 0); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_errno) != 4); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_code) != 8); /* * Ensure that the size of each si_field never changes. * If it does, it is a sign that the @@ -63,36 +70,94 @@ static inline void signal_compat_build_tests(void) CHECK_CSI_SIZE (_kill, 2*sizeof(int)); CHECK_SI_SIZE (_kill, 2*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0xC); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + CHECK_CSI_OFFSET(_timer); CHECK_CSI_SIZE (_timer, 3*sizeof(int)); CHECK_SI_SIZE (_timer, 6*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_tid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_overrun) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14); + CHECK_CSI_OFFSET(_rt); CHECK_CSI_SIZE (_rt, 3*sizeof(int)); CHECK_SI_SIZE (_rt, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_value) != 0x14); + CHECK_CSI_OFFSET(_sigchld); CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x14); + BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_utime) != 0x20); + BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x28); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_uid) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_status) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_utime) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_stime) != 0x1C); + #ifdef CONFIG_X86_X32_ABI CHECK_CSI_OFFSET(_sigchld_x32); CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); /* no _sigchld_x32 in the generic siginfo_t */ + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._utime) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields._sigchld_x32._stime) != 0x20); #endif CHECK_CSI_OFFSET(_sigfault); CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); CHECK_SI_SIZE (_sigfault, 8*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + + BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); + + BUILD_BUG_ON(offsetof(siginfo_t, si_lower) != 0x20); + BUILD_BUG_ON(offsetof(siginfo_t, si_upper) != 0x28); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_lower) != 0x14); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_upper) != 0x18); + + BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); + CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); CHECK_SI_SIZE (_sigpoll, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_band) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_fd) != 0x10); + CHECK_CSI_OFFSET(_sigsys); CHECK_CSI_SIZE (_sigsys, 3*sizeof(int)); CHECK_SI_SIZE (_sigsys, 4*sizeof(int)); + BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_syscall) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_arch) != 0x1C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_call_addr) != 0x0C); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_syscall) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_arch) != 0x14); + /* any new si_fields should be added here */ } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9eee25d..ff99e2b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1437,6 +1437,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); c->cpu_core_id = 0; + c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); } diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 676774b..a3f15ed 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -97,7 +97,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, if (off & ~PAGE_MASK) goto out; - error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return error; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3d9b230..03f3d76 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -577,7 +577,6 @@ do_general_protection(struct pt_regs *regs, long error_code) } NOKPROBE_SYMBOL(do_general_protection); -/* May run on IST stack. */ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE @@ -592,6 +591,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) if (poke_int3_handler(regs)) return; + /* + * Use ist_enter despite the fact that we don't use an IST stack. + * We can be called from a kprobe in non-CONTEXT_KERNEL kernel + * mode or even during context tracking state changes. + * + * This means that we can't schedule. That's okay. + */ ist_enter(regs); RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -609,15 +615,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) SIGTRAP) == NOTIFY_STOP) goto exit; - /* - * Let others (NMI) know that the debug stack is in use - * as we may switch to the interrupt stack. - */ - debug_stack_usage_inc(); cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); cond_local_irq_disable(regs); - debug_stack_usage_dec(); + exit: ist_exit(regs); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fb43027..ef32297 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1179,6 +1179,45 @@ struct system_counterval_t convert_art_to_tsc(u64 art) } EXPORT_SYMBOL(convert_art_to_tsc); +/** + * convert_art_ns_to_tsc() - Convert ART in nanoseconds to TSC. + * @art_ns: ART (Always Running Timer) in unit of nanoseconds + * + * PTM requires all timestamps to be in units of nanoseconds. When user + * software requests a cross-timestamp, this function converts system timestamp + * to TSC. + * + * This is valid when CPU feature flag X86_FEATURE_TSC_KNOWN_FREQ is set + * indicating the tsc_khz is derived from CPUID[15H]. Drivers should check + * that this flag is set before conversion to TSC is attempted. + * + * Return: + * struct system_counterval_t - system counter value with the pointer to the + * corresponding clocksource + * @cycles: System counter value + * @cs: Clocksource corresponding to system counter value. Used + * by timekeeping code to verify comparibility of two cycle + * values. + */ + +struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns) +{ + u64 tmp, res, rem; + + rem = do_div(art_ns, USEC_PER_SEC); + + res = art_ns * tsc_khz; + tmp = rem * tsc_khz; + + do_div(tmp, USEC_PER_SEC); + res += tmp; + + return (struct system_counterval_t) { .cs = art_related_clocksource, + .cycles = res}; +} +EXPORT_SYMBOL(convert_art_ns_to_tsc); + + static void tsc_refine_calibration_work(struct work_struct *work); static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); /** diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 1f9188f..feb28fe 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -5,7 +5,6 @@ #include <asm/unwind.h> #include <asm/orc_types.h> #include <asm/orc_lookup.h> -#include <asm/sections.h> #define orc_warn(fmt, ...) \ printk_deferred_once(KERN_WARNING pr_fmt("WARNING: " fmt), ##__VA_ARGS__) @@ -148,7 +147,7 @@ static struct orc_entry *orc_find(unsigned long ip) } /* vmlinux .init slow lookup: */ - if (ip >= (unsigned long)_sinittext && ip < (unsigned long)_einittext) + if (init_kernel_text(ip)) return __orc_find(__start_orc_unwind_ip, __start_orc_unwind, __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5edb27f..9d0b5af 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -727,7 +727,8 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) return; check_vip: - if (VEFLAGS & X86_EFLAGS_VIP) { + if ((VEFLAGS & (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) == + (X86_EFLAGS_VIP | X86_EFLAGS_VIF)) { save_v86_state(regs, VM86_STI); return; } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9b138a0..795f3a8 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -102,7 +102,6 @@ SECTIONS _stext = .; /* bootstrapping code */ HEAD_TEXT - . = ALIGN(8); TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT @@ -118,9 +117,11 @@ SECTIONS #ifdef CONFIG_X86_64 . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_start) = .; _entry_trampoline = .; *(.entry_trampoline) . = ALIGN(PAGE_SIZE); + VMLINUX_SYMBOL(__entry_trampoline_end) = .; ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif @@ -198,7 +199,7 @@ SECTIONS . = __vvar_beginning_hack + PAGE_SIZE; } :data - . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); + . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); /* Init code and data - will be freed after init */ . = ALIGN(PAGE_SIZE); @@ -366,8 +367,8 @@ SECTIONS . = ALIGN(PAGE_SIZE); /* keep VO_INIT_SIZE page aligned */ _end = .; - STABS_DEBUG - DWARF_DEBUG + STABS_DEBUG + DWARF_DEBUG /* Sections to be discarded */ DISCARDS diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 1151ccd..3ab8676 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -8,6 +8,7 @@ #include <linux/export.h> #include <linux/pci.h> +#include <asm/acpi.h> #include <asm/bios_ebda.h> #include <asm/paravirt.h> #include <asm/pci_x86.h> @@ -26,10 +27,11 @@ void x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -int __init iommu_init_noop(void) { return 0; } -void iommu_shutdown_noop(void) { } -bool __init bool_x86_init_noop(void) { return false; } -void x86_op_int_noop(int cpu) { } +static int __init iommu_init_noop(void) { return 0; } +static void iommu_shutdown_noop(void) { } +static bool __init bool_x86_init_noop(void) { return false; } +static void x86_op_int_noop(int cpu) { } +static u64 u64_x86_init_noop(void) { return 0; } /* * The platform setup functions are preset with the default functions @@ -90,6 +92,12 @@ struct x86_init_ops x86_init __initdata = { .guest_late_init = x86_init_noop, .x2apic_available = bool_x86_init_noop, .init_mem_mapping = x86_init_noop, + .init_after_bootmem = x86_init_noop, + }, + + .acpi = { + .get_root_pointer = u64_x86_init_noop, + .reduced_hw_early_init = acpi_generic_reduced_hw_init, }, }; @@ -146,7 +154,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev) } #endif -struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = { - .read = native_io_apic_read, - .disable = native_disable_io_apic, +struct x86_apic_ops x86_apic_ops __ro_after_init = { + .io_apic_read = native_io_apic_read, + .restore = native_restore_boot_irq_mode, }; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a0c5a69..82055b9 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) return -EINVAL; } + best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + if (kvm_hlt_in_guest(vcpu->kvm) && best && + (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + /* Update physical-address width */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); kvm_mmu_reset_context(vcpu); @@ -370,7 +375,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) | 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) | - F(TOPOEXT); + F(TOPOEXT) | F(PERFCTR_CORE); /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = @@ -607,7 +612,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) | (1 << KVM_FEATURE_PV_UNHALT) | - (1 << KVM_FEATURE_PV_TLB_FLUSH); + (1 << KVM_FEATURE_PV_TLB_FLUSH) | + (1 << KVM_FEATURE_ASYNC_PF_VMEXIT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d91eaeb..b3705ae 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -30,6 +30,7 @@ #include "x86.h" #include "tss.h" #include "mmu.h" +#include "pmu.h" /* * Operand types @@ -2887,6 +2888,9 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) return ctxt->ops->cpl(ctxt) > iopl; } +#define VMWARE_PORT_VMPORT (0x5658) +#define VMWARE_PORT_VMRPC (0x5659) + static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, u16 port, u16 len) { @@ -2898,6 +2902,14 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, unsigned mask = (1 << len) - 1; unsigned long base; + /* + * VMware allows access to these ports even if denied + * by TSS I/O permission bitmap. Mimic behavior. + */ + if (enable_vmware_backdoor && + ((port == VMWARE_PORT_VMPORT) || (port == VMWARE_PORT_VMRPC))) + return true; + ops->get_segment(ctxt, &tr, &tr_seg, &base3, VCPU_SREG_TR); if (!tr_seg.p) return false; @@ -4282,6 +4294,13 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) u64 cr4 = ctxt->ops->get_cr(ctxt, 4); u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); + /* + * VMware allows access to these Pseduo-PMCs even when read via RDPMC + * in Ring3 when CR4.PCE=0. + */ + if (enable_vmware_backdoor && is_vmware_backdoor_pmc(rcx)) + return X86EMUL_CONTINUE; + if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || ctxt->ops->check_pmc(ctxt, rcx)) return emulate_gp(ctxt, 0); @@ -4498,6 +4517,10 @@ static const struct gprefix pfx_0f_2b = { ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N, }; +static const struct gprefix pfx_0f_10_0f_11 = { + I(Unaligned, em_mov), I(Unaligned, em_mov), N, N, +}; + static const struct gprefix pfx_0f_28_0f_29 = { I(Aligned, em_mov), I(Aligned, em_mov), N, N, }; @@ -4709,7 +4732,9 @@ static const struct opcode twobyte_table[256] = { DI(ImplicitOps | Priv, invd), DI(ImplicitOps | Priv, wbinvd), N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, /* 0x10 - 0x1F */ - N, N, N, N, N, N, N, N, + GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_10_0f_11), + GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_10_0f_11), + N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), N, N, N, N, N, N, D(ImplicitOps | ModRM | SrcMem | NoAccess), /* 0x20 - 0x2F */ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index dc97f25..98618e3 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -29,6 +29,7 @@ #include <linux/kvm_host.h> #include <linux/highmem.h> #include <linux/sched/cputime.h> +#include <linux/eventfd.h> #include <asm/apicdef.h> #include <trace/events/kvm.h> @@ -74,13 +75,38 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, return false; } +static void synic_update_vector(struct kvm_vcpu_hv_synic *synic, + int vector) +{ + if (vector < HV_SYNIC_FIRST_VALID_VECTOR) + return; + + if (synic_has_vector_connected(synic, vector)) + __set_bit(vector, synic->vec_bitmap); + else + __clear_bit(vector, synic->vec_bitmap); + + if (synic_has_vector_auto_eoi(synic, vector)) + __set_bit(vector, synic->auto_eoi_bitmap); + else + __clear_bit(vector, synic->auto_eoi_bitmap); +} + static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, u64 data, bool host) { - int vector; + int vector, old_vector; + bool masked; vector = data & HV_SYNIC_SINT_VECTOR_MASK; - if (vector < 16 && !host) + masked = data & HV_SYNIC_SINT_MASKED; + + /* + * Valid vectors are 16-255, however, nested Hyper-V attempts to write + * default '0x10000' value on boot and this should not #GP. We need to + * allow zero-initing the register from host as well. + */ + if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked) return 1; /* * Guest may configure multiple SINTs to use the same vector, so @@ -88,18 +114,13 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, * bitmap of vectors with auto-eoi behavior. The bitmaps are * updated here, and atomically queried on fast paths. */ + old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK; atomic64_set(&synic->sint[sint], data); - if (synic_has_vector_connected(synic, vector)) - __set_bit(vector, synic->vec_bitmap); - else - __clear_bit(vector, synic->vec_bitmap); + synic_update_vector(synic, old_vector); - if (synic_has_vector_auto_eoi(synic, vector)) - __set_bit(vector, synic->auto_eoi_bitmap); - else - __clear_bit(vector, synic->auto_eoi_bitmap); + synic_update_vector(synic, vector); /* Load SynIC vectors into EOI exit bitmap */ kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); @@ -736,6 +757,9 @@ static bool kvm_hv_msr_partition_wide(u32 msr) case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_RESET: + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + case HV_X64_MSR_TSC_EMULATION_CONTROL: + case HV_X64_MSR_TSC_EMULATION_STATUS: r = true; break; } @@ -981,6 +1005,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, kvm_make_request(KVM_REQ_HV_RESET, vcpu); } break; + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + hv->hv_reenlightenment_control = data; + break; + case HV_X64_MSR_TSC_EMULATION_CONTROL: + hv->hv_tsc_emulation_control = data; + break; + case HV_X64_MSR_TSC_EMULATION_STATUS: + hv->hv_tsc_emulation_status = data; + break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -1009,17 +1042,17 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; hv->vp_index = (u32)data; break; - case HV_X64_MSR_APIC_ASSIST_PAGE: { + case HV_X64_MSR_VP_ASSIST_PAGE: { u64 gfn; unsigned long addr; - if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { + if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { hv->hv_vapic = data; if (kvm_lapic_enable_pv_eoi(vcpu, 0)) return 1; break; } - gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; + gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT; addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); if (kvm_is_error_hva(addr)) return 1; @@ -1105,6 +1138,15 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_RESET: data = 0; break; + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + data = hv->hv_reenlightenment_control; + break; + case HV_X64_MSR_TSC_EMULATION_CONTROL: + data = hv->hv_tsc_emulation_control; + break; + case HV_X64_MSR_TSC_EMULATION_STATUS: + data = hv->hv_tsc_emulation_status; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -1129,7 +1171,7 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); case HV_X64_MSR_TPR: return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); - case HV_X64_MSR_APIC_ASSIST_PAGE: + case HV_X64_MSR_VP_ASSIST_PAGE: data = hv->hv_vapic; break; case HV_X64_MSR_VP_RUNTIME: @@ -1226,10 +1268,47 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) return 1; } +static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) +{ + struct eventfd_ctx *eventfd; + + if (unlikely(!fast)) { + int ret; + gpa_t gpa = param; + + if ((gpa & (__alignof__(param) - 1)) || + offset_in_page(gpa) + sizeof(param) > PAGE_SIZE) + return HV_STATUS_INVALID_ALIGNMENT; + + ret = kvm_vcpu_read_guest(vcpu, gpa, ¶m, sizeof(param)); + if (ret < 0) + return HV_STATUS_INVALID_ALIGNMENT; + } + + /* + * Per spec, bits 32-47 contain the extra "flag number". However, we + * have no use for it, and in all known usecases it is zero, so just + * report lookup failure if it isn't. + */ + if (param & 0xffff00000000ULL) + return HV_STATUS_INVALID_PORT_ID; + /* remaining bits are reserved-zero */ + if (param & ~KVM_HYPERV_CONN_ID_MASK) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + + /* conn_to_evt is protected by vcpu->kvm->srcu */ + eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); + if (!eventfd) + return HV_STATUS_INVALID_PORT_ID; + + eventfd_signal(eventfd, 1); + return HV_STATUS_SUCCESS; +} + int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { - u64 param, ingpa, outgpa, ret; - uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; + u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; + uint16_t code, rep_idx, rep_cnt; bool fast, longmode; /* @@ -1268,7 +1347,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) /* Hypercall continuation is not supported yet */ if (rep_cnt || rep_idx) { - res = HV_STATUS_INVALID_HYPERCALL_CODE; + ret = HV_STATUS_INVALID_HYPERCALL_CODE; goto set_result; } @@ -1276,11 +1355,15 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) case HVCALL_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu, true); break; - case HVCALL_POST_MESSAGE: case HVCALL_SIGNAL_EVENT: + ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); + if (ret != HV_STATUS_INVALID_PORT_ID) + break; + /* maybe userspace knows this conn_id: fall through */ + case HVCALL_POST_MESSAGE: /* don't bother userspace if it has no way to handle it */ if (!vcpu_to_synic(vcpu)->active) { - res = HV_STATUS_INVALID_HYPERCALL_CODE; + ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; } vcpu->run->exit_reason = KVM_EXIT_HYPERV; @@ -1292,12 +1375,79 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) kvm_hv_hypercall_complete_userspace; return 0; default: - res = HV_STATUS_INVALID_HYPERCALL_CODE; + ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; } set_result: - ret = res | (((u64)rep_done & 0xfff) << 32); kvm_hv_hypercall_set_result(vcpu, ret); return 1; } + +void kvm_hv_init_vm(struct kvm *kvm) +{ + mutex_init(&kvm->arch.hyperv.hv_lock); + idr_init(&kvm->arch.hyperv.conn_to_evt); +} + +void kvm_hv_destroy_vm(struct kvm *kvm) +{ + struct eventfd_ctx *eventfd; + int i; + + idr_for_each_entry(&kvm->arch.hyperv.conn_to_evt, eventfd, i) + eventfd_ctx_put(eventfd); + idr_destroy(&kvm->arch.hyperv.conn_to_evt); +} + +static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd) +{ + struct kvm_hv *hv = &kvm->arch.hyperv; + struct eventfd_ctx *eventfd; + int ret; + + eventfd = eventfd_ctx_fdget(fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + mutex_lock(&hv->hv_lock); + ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1, + GFP_KERNEL); + mutex_unlock(&hv->hv_lock); + + if (ret >= 0) + return 0; + + if (ret == -ENOSPC) + ret = -EEXIST; + eventfd_ctx_put(eventfd); + return ret; +} + +static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id) +{ + struct kvm_hv *hv = &kvm->arch.hyperv; + struct eventfd_ctx *eventfd; + + mutex_lock(&hv->hv_lock); + eventfd = idr_remove(&hv->conn_to_evt, conn_id); + mutex_unlock(&hv->hv_lock); + + if (!eventfd) + return -ENOENT; + + synchronize_srcu(&kvm->srcu); + eventfd_ctx_put(eventfd); + return 0; +} + +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args) +{ + if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) || + (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK)) + return -EINVAL; + + if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN) + return kvm_hv_eventfd_deassign(kvm, args->conn_id); + return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd); +} diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e637631..837465d 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -88,4 +88,8 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock); +void kvm_hv_init_vm(struct kvm *kvm); +void kvm_hv_destroy_vm(struct kvm *kvm); +int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); + #endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index f171051..faa2648 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -73,8 +73,19 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) */ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) { + /* + * FIXME: interrupt.injected represents an interrupt that it's + * side-effects have already been applied (e.g. bit from IRR + * already moved to ISR). Therefore, it is incorrect to rely + * on interrupt.injected to know if there is a pending + * interrupt in the user-mode LAPIC. + * This leads to nVMX/nSVM not be able to distinguish + * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on + * pending interrupt or should re-inject an injected + * interrupt. + */ if (!lapic_in_kernel(v)) - return v->arch.interrupt.pending; + return v->arch.interrupt.injected; if (kvm_cpu_has_extint(v)) return 1; @@ -91,8 +102,19 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { + /* + * FIXME: interrupt.injected represents an interrupt that it's + * side-effects have already been applied (e.g. bit from IRR + * already moved to ISR). Therefore, it is incorrect to rely + * on interrupt.injected to know if there is a pending + * interrupt in the user-mode LAPIC. + * This leads to nVMX/nSVM not be able to distinguish + * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on + * pending interrupt or should re-inject an injected + * interrupt. + */ if (!lapic_in_kernel(v)) - return v->arch.interrupt.pending; + return v->arch.interrupt.injected; if (kvm_cpu_has_extint(v)) return 1; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index f500293..9619dcc 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -41,7 +41,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) if (!test_bit(VCPU_EXREG_PDPTR, (unsigned long *)&vcpu->arch.regs_avail)) - kvm_x86_ops->cache_reg(vcpu, VCPU_EXREG_PDPTR); + kvm_x86_ops->cache_reg(vcpu, (enum kvm_reg)VCPU_EXREG_PDPTR); return vcpu->arch.walk_mmu->pdptrs[index]; } @@ -93,6 +93,11 @@ static inline void enter_guest_mode(struct kvm_vcpu *vcpu) static inline void leave_guest_mode(struct kvm_vcpu *vcpu) { vcpu->arch.hflags &= ~HF_GUEST_MASK; + + if (vcpu->arch.load_eoi_exitmap_pending) { + vcpu->arch.load_eoi_exitmap_pending = false; + kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); + } } static inline bool is_guest_mode(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 924ac8c..70dcb55 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -321,8 +321,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; + /* + * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) + * which doesn't have EOI register; Some buggy OSes (e.g. Windows with + * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC + * version first and level-triggered interrupts never get EOIed in + * IOAPIC. + */ feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); - if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) + if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && + !ioapic_in_kernel(vcpu->kvm)) v |= APIC_LVR_DIRECTED_EOI; kvm_lapic_set_reg(apic, APIC_LVR, v); } @@ -2002,14 +2010,13 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { - struct kvm_lapic *apic; + struct kvm_lapic *apic = vcpu->arch.apic; int i; - apic_debug("%s\n", __func__); + if (!apic) + return; - ASSERT(vcpu); - apic = vcpu->arch.apic; - ASSERT(apic != NULL); + apic_debug("%s\n", __func__); /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); @@ -2165,7 +2172,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) */ vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ - kvm_lapic_reset(vcpu, false); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); return 0; @@ -2569,7 +2575,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu) pe = xchg(&apic->pending_events, 0); if (test_bit(KVM_APIC_INIT, &pe)) { - kvm_lapic_reset(vcpu, true); kvm_vcpu_reset(vcpu, true); if (kvm_vcpu_is_bsp(apic->vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 56c3601..edce055 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -109,7 +109,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data); static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) { - return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; + return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; } int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 46ff304..8494dba 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2770,8 +2770,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, else pte_access &= ~ACC_WRITE_MASK; + if (!kvm_is_mmio_pfn(pfn)) + spte |= shadow_me_mask; + spte |= (u64)pfn << PAGE_SHIFT; - spte |= shadow_me_mask; if (pte_access & ACC_WRITE_MASK) { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 5abae72..6288e9d 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -452,14 +452,21 @@ error: * done by is_rsvd_bits_set() above. * * We set up the value of exit_qualification to inject: - * [2:0] - Derive from [2:0] of real exit_qualification at EPT violation + * [2:0] - Derive from the access bits. The exit_qualification might be + * out of date if it is serving an EPT misconfiguration. * [5:3] - Calculated by the page walk of the guest EPT page tables * [7:8] - Derived from [7:8] of real exit_qualification * * The other bits are set to 0. */ if (!(errcode & PFERR_RSVD_MASK)) { - vcpu->arch.exit_qualification &= 0x187; + vcpu->arch.exit_qualification &= 0x180; + if (write_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_WRITE; + if (user_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_READ; + if (fetch_fault) + vcpu->arch.exit_qualification |= EPT_VIOLATION_ACC_INSTR; vcpu->arch.exit_qualification |= (pte_access & 0x7) << 3; } #endif diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 026db42..58ead7d 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -244,12 +244,49 @@ int kvm_pmu_is_valid_msr_idx(struct kvm_vcpu *vcpu, unsigned idx) return kvm_x86_ops->pmu_ops->is_valid_msr_idx(vcpu, idx); } +bool is_vmware_backdoor_pmc(u32 pmc_idx) +{ + switch (pmc_idx) { + case VMWARE_BACKDOOR_PMC_HOST_TSC: + case VMWARE_BACKDOOR_PMC_REAL_TIME: + case VMWARE_BACKDOOR_PMC_APPARENT_TIME: + return true; + } + return false; +} + +static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) +{ + u64 ctr_val; + + switch (idx) { + case VMWARE_BACKDOOR_PMC_HOST_TSC: + ctr_val = rdtsc(); + break; + case VMWARE_BACKDOOR_PMC_REAL_TIME: + ctr_val = ktime_get_boot_ns(); + break; + case VMWARE_BACKDOOR_PMC_APPARENT_TIME: + ctr_val = ktime_get_boot_ns() + + vcpu->kvm->arch.kvmclock_offset; + break; + default: + return 1; + } + + *data = ctr_val; + return 0; +} + int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) { bool fast_mode = idx & (1u << 31); struct kvm_pmc *pmc; u64 ctr_val; + if (is_vmware_backdoor_pmc(idx)) + return kvm_pmu_rdpmc_vmware(vcpu, idx, data); + pmc = kvm_x86_ops->pmu_ops->msr_idx_to_pmc(vcpu, idx); if (!pmc) return 1; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index a9a62b9..ba8898e 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -9,6 +9,10 @@ /* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */ #define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf) +#define VMWARE_BACKDOOR_PMC_HOST_TSC 0x10000 +#define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001 +#define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002 + struct kvm_event_hw_type_mapping { u8 eventsel; u8 unit_mask; @@ -114,6 +118,8 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_init(struct kvm_vcpu *vcpu); void kvm_pmu_destroy(struct kvm_vcpu *vcpu); +bool is_vmware_backdoor_pmc(u32 pmc_idx); + extern struct kvm_pmu_ops intel_pmu_ops; extern struct kvm_pmu_ops amd_pmu_ops; #endif /* __KVM_X86_PMU_H */ diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index cd94443..1495a73 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -19,6 +19,21 @@ #include "lapic.h" #include "pmu.h" +enum pmu_type { + PMU_TYPE_COUNTER = 0, + PMU_TYPE_EVNTSEL, +}; + +enum index { + INDEX_ZERO = 0, + INDEX_ONE, + INDEX_TWO, + INDEX_THREE, + INDEX_FOUR, + INDEX_FIVE, + INDEX_ERROR, +}; + /* duplicated from amd_perfmon_event_map, K7 and above should work. */ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, @@ -31,6 +46,88 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, }; +static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) +{ + struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); + + if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { + if (type == PMU_TYPE_COUNTER) + return MSR_F15H_PERF_CTR; + else + return MSR_F15H_PERF_CTL; + } else { + if (type == PMU_TYPE_COUNTER) + return MSR_K7_PERFCTR0; + else + return MSR_K7_EVNTSEL0; + } +} + +static enum index msr_to_index(u32 msr) +{ + switch (msr) { + case MSR_F15H_PERF_CTL0: + case MSR_F15H_PERF_CTR0: + case MSR_K7_EVNTSEL0: + case MSR_K7_PERFCTR0: + return INDEX_ZERO; + case MSR_F15H_PERF_CTL1: + case MSR_F15H_PERF_CTR1: + case MSR_K7_EVNTSEL1: + case MSR_K7_PERFCTR1: + return INDEX_ONE; + case MSR_F15H_PERF_CTL2: + case MSR_F15H_PERF_CTR2: + case MSR_K7_EVNTSEL2: + case MSR_K7_PERFCTR2: + return INDEX_TWO; + case MSR_F15H_PERF_CTL3: + case MSR_F15H_PERF_CTR3: + case MSR_K7_EVNTSEL3: + case MSR_K7_PERFCTR3: + return INDEX_THREE; + case MSR_F15H_PERF_CTL4: + case MSR_F15H_PERF_CTR4: + return INDEX_FOUR; + case MSR_F15H_PERF_CTL5: + case MSR_F15H_PERF_CTR5: + return INDEX_FIVE; + default: + return INDEX_ERROR; + } +} + +static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, + enum pmu_type type) +{ + switch (msr) { + case MSR_F15H_PERF_CTL0: + case MSR_F15H_PERF_CTL1: + case MSR_F15H_PERF_CTL2: + case MSR_F15H_PERF_CTL3: + case MSR_F15H_PERF_CTL4: + case MSR_F15H_PERF_CTL5: + case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: + if (type != PMU_TYPE_EVNTSEL) + return NULL; + break; + case MSR_F15H_PERF_CTR0: + case MSR_F15H_PERF_CTR1: + case MSR_F15H_PERF_CTR2: + case MSR_F15H_PERF_CTR3: + case MSR_F15H_PERF_CTR4: + case MSR_F15H_PERF_CTR5: + case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: + if (type != PMU_TYPE_COUNTER) + return NULL; + break; + default: + return NULL; + } + + return &pmu->gp_counters[msr_to_index(msr)]; +} + static unsigned amd_find_arch_event(struct kvm_pmu *pmu, u8 event_select, u8 unit_mask) @@ -64,7 +161,18 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc) static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) { - return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + pmc_idx, MSR_K7_EVNTSEL0); + unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER); + struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); + + if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { + /* + * The idx is contiguous. The MSRs are not. The counter MSRs + * are interleaved with the event select MSRs. + */ + pmc_idx *= 2; + } + + return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER); } /* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */ @@ -96,8 +204,8 @@ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int ret = false; - ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) || - get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); + ret = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER) || + get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); return ret; } @@ -107,14 +215,14 @@ static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; - /* MSR_K7_PERFCTRn */ - pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); + /* MSR_PERFCTRn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { *data = pmc_read_counter(pmc); return 0; } - /* MSR_K7_EVNTSELn */ - pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); + /* MSR_EVNTSELn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { *data = pmc->eventsel; return 0; @@ -130,14 +238,14 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u32 msr = msr_info->index; u64 data = msr_info->data; - /* MSR_K7_PERFCTRn */ - pmc = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0); + /* MSR_PERFCTRn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { pmc->counter += data - pmc_read_counter(pmc); return 0; } - /* MSR_K7_EVNTSELn */ - pmc = get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); + /* MSR_EVNTSELn */ + pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { if (data == pmc->eventsel) return 0; @@ -154,7 +262,11 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; + if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) + pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS_CORE; + else + pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; + pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; pmu->reserved_bits = 0xffffffff00200000ull; /* not applicable to AMD; but clean them to prevent any fall out */ @@ -169,7 +281,9 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - for (i = 0; i < AMD64_NUM_COUNTERS ; i++) { + BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC); + + for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) { pmu->gp_counters[i].type = KVM_PMC_GP; pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; @@ -181,7 +295,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); int i; - for (i = 0; i < AMD64_NUM_COUNTERS; i++) { + for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) { struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b3e488a..b58787d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -131,6 +131,28 @@ static const u32 host_save_user_msrs[] = { #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) +struct kvm_sev_info { + bool active; /* SEV enabled guest */ + unsigned int asid; /* ASID used for this guest */ + unsigned int handle; /* SEV firmware handle */ + int fd; /* SEV device fd */ + unsigned long pages_locked; /* Number of pages locked */ + struct list_head regions_list; /* List of registered regions */ +}; + +struct kvm_svm { + struct kvm kvm; + + /* Struct members for AVIC */ + u32 avic_vm_id; + u32 ldr_mode; + struct page *avic_logical_id_table_page; + struct page *avic_physical_id_table_page; + struct hlist_node hnode; + + struct kvm_sev_info sev_info; +}; + struct kvm_vcpu; struct nested_state { @@ -178,6 +200,8 @@ struct vcpu_svm { uint64_t sysenter_eip; uint64_t tsc_aux; + u64 msr_decfg; + u64 next_rip; u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; @@ -274,6 +298,54 @@ static bool npt_enabled = true; static bool npt_enabled; #endif +/* + * These 2 parameters are used to config the controls for Pause-Loop Exiting: + * pause_filter_count: On processors that support Pause filtering(indicated + * by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter + * count value. On VMRUN this value is loaded into an internal counter. + * Each time a pause instruction is executed, this counter is decremented + * until it reaches zero at which time a #VMEXIT is generated if pause + * intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause + * Intercept Filtering for more details. + * This also indicate if ple logic enabled. + * + * pause_filter_thresh: In addition, some processor families support advanced + * pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on + * the amount of time a guest is allowed to execute in a pause loop. + * In this mode, a 16-bit pause filter threshold field is added in the + * VMCB. The threshold value is a cycle count that is used to reset the + * pause counter. As with simple pause filtering, VMRUN loads the pause + * count value from VMCB into an internal counter. Then, on each pause + * instruction the hardware checks the elapsed number of cycles since + * the most recent pause instruction against the pause filter threshold. + * If the elapsed cycle count is greater than the pause filter threshold, + * then the internal pause count is reloaded from the VMCB and execution + * continues. If the elapsed cycle count is less than the pause filter + * threshold, then the internal pause count is decremented. If the count + * value is less than zero and PAUSE intercept is enabled, a #VMEXIT is + * triggered. If advanced pause filtering is supported and pause filter + * threshold field is set to zero, the filter will operate in the simpler, + * count only mode. + */ + +static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP; +module_param(pause_filter_thresh, ushort, 0444); + +static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW; +module_param(pause_filter_count, ushort, 0444); + +/* Default doubles per-vcpu window every exit. */ +static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW; +module_param(pause_filter_count_grow, ushort, 0444); + +/* Default resets per-vcpu window every exit to pause_filter_count. */ +static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; +module_param(pause_filter_count_shrink, ushort, 0444); + +/* Default is to compute the maximum so we can never overflow. */ +static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; +module_param(pause_filter_count_max, ushort, 0444); + /* allow nested paging (virtualized MMU) for all guests */ static int npt = true; module_param(npt, int, S_IRUGO); @@ -300,6 +372,8 @@ module_param(vgif, int, 0444); static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT); module_param(sev, int, 0444); +static u8 rsm_ins_bytes[] = "\x0f\xaa"; + static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); @@ -348,6 +422,12 @@ struct enc_region { unsigned long size; }; + +static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm) +{ + return container_of(kvm, struct kvm_svm, kvm); +} + static inline bool svm_sev_enabled(void) { return max_sev_asid; @@ -355,14 +435,14 @@ static inline bool svm_sev_enabled(void) static inline bool sev_guest(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->active; } static inline int sev_get_asid(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return sev->asid; } @@ -1079,7 +1159,7 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm) } /* Note: - * This hash table is used to map VM_ID to a struct kvm_arch, + * This hash table is used to map VM_ID to a struct kvm_svm, * when handling AMD IOMMU GALOG notification to schedule in * a particular vCPU. */ @@ -1096,7 +1176,7 @@ static DEFINE_SPINLOCK(svm_vm_data_hash_lock); static int avic_ga_log_notifier(u32 ga_tag) { unsigned long flags; - struct kvm_arch *ka = NULL; + struct kvm_svm *kvm_svm; struct kvm_vcpu *vcpu = NULL; u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag); u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag); @@ -1104,13 +1184,10 @@ static int avic_ga_log_notifier(u32 ga_tag) pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); - hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { - struct kvm *kvm = container_of(ka, struct kvm, arch); - struct kvm_arch *vm_data = &kvm->arch; - - if (vm_data->avic_vm_id != vm_id) + hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) { + if (kvm_svm->avic_vm_id != vm_id) continue; - vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); + vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id); break; } spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); @@ -1168,6 +1245,42 @@ err: return rc; } +static void grow_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + int old = control->pause_filter_count; + + control->pause_filter_count = __grow_ple_window(old, + pause_filter_count, + pause_filter_count_grow, + pause_filter_count_max); + + if (control->pause_filter_count != old) + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + + trace_kvm_ple_window_grow(vcpu->vcpu_id, + control->pause_filter_count, old); +} + +static void shrink_ple_window(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control = &svm->vmcb->control; + int old = control->pause_filter_count; + + control->pause_filter_count = + __shrink_ple_window(old, + pause_filter_count, + pause_filter_count_shrink, + pause_filter_count); + if (control->pause_filter_count != old) + mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + + trace_kvm_ple_window_shrink(vcpu->vcpu_id, + control->pause_filter_count, old); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1198,6 +1311,14 @@ static __init int svm_hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 32; } + /* Check for pause filtering support */ + if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { + pause_filter_count = 0; + pause_filter_thresh = 0; + } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) { + pause_filter_thresh = 0; + } + if (nested) { printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); @@ -1324,10 +1445,10 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; - struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm); phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page)); - phys_addr_t lpa = __sme_set(page_to_phys(vm_data->avic_logical_id_table_page)); - phys_addr_t ppa = __sme_set(page_to_phys(vm_data->avic_physical_id_table_page)); + phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page)); + phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page)); vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK; vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK; @@ -1359,6 +1480,14 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, MC_VECTOR); set_exception_intercept(svm, AC_VECTOR); set_exception_intercept(svm, DB_VECTOR); + /* + * Guest access to VMware backdoor ports could legitimately + * trigger #GP because of TSS I/O permission bitmap. + * We intercept those #GP and allow access to them anyway + * as VMware does. + */ + if (enable_vmware_backdoor) + set_exception_intercept(svm, GP_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1367,7 +1496,6 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_RDPMC); set_intercept(svm, INTERCEPT_CPUID); set_intercept(svm, INTERCEPT_INVD); - set_intercept(svm, INTERCEPT_HLT); set_intercept(svm, INTERCEPT_INVLPG); set_intercept(svm, INTERCEPT_INVLPGA); set_intercept(svm, INTERCEPT_IOIO_PROT); @@ -1383,12 +1511,16 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_SKINIT); set_intercept(svm, INTERCEPT_WBINVD); set_intercept(svm, INTERCEPT_XSETBV); + set_intercept(svm, INTERCEPT_RSM); - if (!kvm_mwait_in_guest()) { + if (!kvm_mwait_in_guest(svm->vcpu.kvm)) { set_intercept(svm, INTERCEPT_MONITOR); set_intercept(svm, INTERCEPT_MWAIT); } + if (!kvm_hlt_in_guest(svm->vcpu.kvm)) + set_intercept(svm, INTERCEPT_HLT); + control->iopm_base_pa = __sme_set(iopm_base); control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->int_ctl = V_INTR_MASKING_MASK; @@ -1444,9 +1576,13 @@ static void init_vmcb(struct vcpu_svm *svm) svm->nested.vmcb = 0; svm->vcpu.arch.hflags = 0; - if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { - control->pause_filter_count = 3000; + if (pause_filter_count) { + control->pause_filter_count = pause_filter_count; + if (pause_filter_thresh) + control->pause_filter_thresh = pause_filter_thresh; set_intercept(svm, INTERCEPT_PAUSE); + } else { + clr_intercept(svm, INTERCEPT_PAUSE); } if (kvm_vcpu_apicv_active(&svm->vcpu)) @@ -1483,12 +1619,12 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, unsigned int index) { u64 *avic_physical_id_table; - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); if (index >= AVIC_MAX_PHYSICAL_ID_COUNT) return NULL; - avic_physical_id_table = page_address(vm_data->avic_physical_id_table_page); + avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page); return &avic_physical_id_table[index]; } @@ -1571,7 +1707,7 @@ static void __sev_asid_free(int asid) static void sev_asid_free(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; __sev_asid_free(sev->asid); } @@ -1611,7 +1747,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, unsigned long ulen, unsigned long *n, int write) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; unsigned long npages, npinned, size; unsigned long locked, lock_limit; struct page **pages; @@ -1662,7 +1798,7 @@ err: static void sev_unpin_memory(struct kvm *kvm, struct page **pages, unsigned long npages) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; release_pages(pages, npages); kvfree(pages); @@ -1700,9 +1836,20 @@ static void __unregister_enc_region_locked(struct kvm *kvm, kfree(region); } +static struct kvm *svm_vm_alloc(void) +{ + struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL); + return &kvm_svm->kvm; +} + +static void svm_vm_free(struct kvm *kvm) +{ + kfree(to_kvm_svm(kvm)); +} + static void sev_vm_destroy(struct kvm *kvm) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct list_head *head = &sev->regions_list; struct list_head *pos, *q; @@ -1731,18 +1878,18 @@ static void sev_vm_destroy(struct kvm *kvm) static void avic_vm_destroy(struct kvm *kvm) { unsigned long flags; - struct kvm_arch *vm_data = &kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(kvm); if (!avic) return; - if (vm_data->avic_logical_id_table_page) - __free_page(vm_data->avic_logical_id_table_page); - if (vm_data->avic_physical_id_table_page) - __free_page(vm_data->avic_physical_id_table_page); + if (kvm_svm->avic_logical_id_table_page) + __free_page(kvm_svm->avic_logical_id_table_page); + if (kvm_svm->avic_physical_id_table_page) + __free_page(kvm_svm->avic_physical_id_table_page); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); - hash_del(&vm_data->hnode); + hash_del(&kvm_svm->hnode); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); } @@ -1756,10 +1903,10 @@ static int avic_vm_init(struct kvm *kvm) { unsigned long flags; int err = -ENOMEM; - struct kvm_arch *vm_data = &kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(kvm); + struct kvm_svm *k2; struct page *p_page; struct page *l_page; - struct kvm_arch *ka; u32 vm_id; if (!avic) @@ -1770,7 +1917,7 @@ static int avic_vm_init(struct kvm *kvm) if (!p_page) goto free_avic; - vm_data->avic_physical_id_table_page = p_page; + kvm_svm->avic_physical_id_table_page = p_page; clear_page(page_address(p_page)); /* Allocating logical APIC ID table (4KB) */ @@ -1778,7 +1925,7 @@ static int avic_vm_init(struct kvm *kvm) if (!l_page) goto free_avic; - vm_data->avic_logical_id_table_page = l_page; + kvm_svm->avic_logical_id_table_page = l_page; clear_page(page_address(l_page)); spin_lock_irqsave(&svm_vm_data_hash_lock, flags); @@ -1790,15 +1937,13 @@ static int avic_vm_init(struct kvm *kvm) } /* Is it still in use? Only possible if wrapped at least once */ if (next_vm_id_wrapped) { - hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) { - struct kvm *k2 = container_of(ka, struct kvm, arch); - struct kvm_arch *vd2 = &k2->arch; - if (vd2->avic_vm_id == vm_id) + hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) { + if (k2->avic_vm_id == vm_id) goto again; } } - vm_data->avic_vm_id = vm_id; - hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id); + kvm_svm->avic_vm_id = vm_id; + hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id); spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags); return 0; @@ -1902,6 +2047,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 dummy; u32 eax = 1; + vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; if (!init_event) { @@ -2529,14 +2675,7 @@ static int bp_interception(struct vcpu_svm *svm) static int ud_interception(struct vcpu_svm *svm) { - int er; - - er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); - if (er == EMULATE_USER_EXIT) - return 0; - if (er != EMULATE_DONE) - kvm_queue_exception(&svm->vcpu, UD_VECTOR); - return 1; + return handle_ud(&svm->vcpu); } static int ac_interception(struct vcpu_svm *svm) @@ -2545,6 +2684,23 @@ static int ac_interception(struct vcpu_svm *svm) return 1; } +static int gp_interception(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u32 error_code = svm->vmcb->control.exit_info_1; + int er; + + WARN_ON_ONCE(!enable_vmware_backdoor); + + er = emulate_instruction(vcpu, + EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); + if (er == EMULATE_USER_EXIT) + return 0; + else if (er != EMULATE_DONE) + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; +} + static bool is_erratum_383(void) { int err, i; @@ -2633,7 +2789,7 @@ static int io_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ - int size, in, string, ret; + int size, in, string; unsigned port; ++svm->vcpu.stat.io_exits; @@ -2645,16 +2801,8 @@ static int io_interception(struct vcpu_svm *svm) port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; svm->next_rip = svm->vmcb->control.exit_info_2; - ret = kvm_skip_emulated_instruction(&svm->vcpu); - /* - * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - if (in) - return kvm_fast_pio_in(vcpu, size, port) && ret; - else - return kvm_fast_pio_out(vcpu, size, port) && ret; + return kvm_fast_pio(&svm->vcpu, size, port, in); } static int nmi_interception(struct vcpu_svm *svm) @@ -3699,6 +3847,12 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; } +static int rsm_interception(struct vcpu_svm *svm) +{ + return x86_emulate_instruction(&svm->vcpu, 0, 0, + rsm_ins_bytes, 2) == EMULATE_DONE; +} + static int rdpmc_interception(struct vcpu_svm *svm) { int err; @@ -3860,6 +4014,22 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } +static int svm_get_msr_feature(struct kvm_msr_entry *msr) +{ + msr->data = 0; + + switch (msr->index) { + case MSR_F10H_DECFG: + if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + break; + default: + return 1; + } + + return 0; +} + static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3935,9 +4105,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->spec_ctrl; break; - case MSR_IA32_UCODE_REV: - msr_info->data = 0x01000065; - break; case MSR_F15H_IC_CFG: { int family, model; @@ -3955,6 +4122,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; + case MSR_F10H_DECFG: + msr_info->data = svm->msr_decfg; + break; default: return kvm_get_msr_common(vcpu, msr_info); } @@ -4133,6 +4303,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; + case MSR_F10H_DECFG: { + struct kvm_msr_entry msr_entry; + + msr_entry.index = msr->index; + if (svm_get_msr_feature(&msr_entry)) + return 1; + + /* Check the supported bits */ + if (data & ~msr_entry.data) + return 1; + + /* Don't allow the guest to change a bit, #GP */ + if (!msr->host_initiated && (data ^ msr_entry.data)) + return 1; + + svm->msr_decfg = data; + break; + } case MSR_IA32_APICBASE: if (kvm_vcpu_apicv_active(vcpu)) avic_update_vapic_bar(to_svm(vcpu), data); @@ -4187,6 +4375,9 @@ static int pause_interception(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; bool in_kernel = (svm_get_cpl(vcpu) == 0); + if (pause_filter_thresh) + grow_ple_window(vcpu); + kvm_vcpu_on_spin(vcpu, in_kernel); return 1; } @@ -4277,7 +4468,7 @@ static int avic_incomplete_ipi_interception(struct vcpu_svm *svm) static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) { - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); int index; u32 *logical_apic_id_table; int dlid = GET_APIC_LOGICAL_ID(ldr); @@ -4299,7 +4490,7 @@ static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat) index = (cluster << 2) + apic; } - logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page); + logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page); return &logical_apic_id_table[index]; } @@ -4379,7 +4570,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_arch *vm_data = &vcpu->kvm->arch; + struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm); u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR); u32 mod = (dfr >> 28) & 0xf; @@ -4388,11 +4579,11 @@ static int avic_handle_dfr_update(struct kvm_vcpu *vcpu) * If this changes, we need to flush the AVIC logical * APID id table. */ - if (vm_data->ldr_mode == mod) + if (kvm_svm->ldr_mode == mod) return 0; - clear_page(page_address(vm_data->avic_logical_id_table_page)); - vm_data->ldr_mode = mod; + clear_page(page_address(kvm_svm->avic_logical_id_table_page)); + kvm_svm->ldr_mode = mod; if (svm->ldr_reg) avic_handle_ldr_update(vcpu); @@ -4512,6 +4703,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, + [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, @@ -4541,7 +4733,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, [SVM_EXIT_NPF] = npf_interception, - [SVM_EXIT_RSM] = emulate_on_interception, + [SVM_EXIT_RSM] = rsm_interception, [SVM_EXIT_AVIC_INCOMPLETE_IPI] = avic_incomplete_ipi_interception, [SVM_EXIT_AVIC_UNACCELERATED_ACCESS] = avic_unaccelerated_access_interception, }; @@ -4560,6 +4752,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); pr_err("%-20s%016llx\n", "intercepts:", control->intercept); pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); + pr_err("%-20s%d\n", "pause filter threshold:", + control->pause_filter_thresh); pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); @@ -5027,7 +5221,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); - pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, + pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id, svm->vcpu.vcpu_id); pi.is_guest_mode = true; pi.vcpu_data = &vcpu_info; @@ -5191,6 +5385,11 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } +static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) +{ + return 0; +} + static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5355,7 +5554,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5464,11 +5663,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (svm->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -5492,14 +5691,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) - kvm_before_handle_nmi(&svm->vcpu); + kvm_before_interrupt(&svm->vcpu); stgi(); /* Any pending NMI will happen here */ if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) - kvm_after_handle_nmi(&svm->vcpu); + kvm_after_interrupt(&svm->vcpu); sync_cr8_to_lapic(vcpu); @@ -5875,6 +6074,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu) static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) { + if (pause_filter_thresh) + shrink_ple_window(vcpu); } static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) @@ -5991,7 +6192,7 @@ static int sev_asid_new(void) static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; int asid, ret; ret = -EBUSY; @@ -6056,14 +6257,14 @@ static int __sev_issue_cmd(int fd, int id, void *data, int *error) static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; return __sev_issue_cmd(sev->fd, id, data, error); } static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_start *start; struct kvm_sev_launch_start params; void *dh_blob, *session_blob; @@ -6161,7 +6362,7 @@ static int get_num_contig_pages(int idx, struct page **inpages, static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) { unsigned long vaddr, vaddr_end, next_vaddr, npages, size; - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_launch_update_data params; struct sev_data_launch_update_data *data; struct page **inpages; @@ -6236,16 +6437,18 @@ e_free: static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + void __user *measure = (void __user *)(uintptr_t)argp->data; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_measure *data; struct kvm_sev_launch_measure params; + void __user *p = NULL; void *blob = NULL; int ret; if (!sev_guest(kvm)) return -ENOTTY; - if (copy_from_user(¶ms, (void __user *)(uintptr_t)argp->data, sizeof(params))) + if (copy_from_user(¶ms, measure, sizeof(params))) return -EFAULT; data = kzalloc(sizeof(*data), GFP_KERNEL); @@ -6256,17 +6459,13 @@ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!params.len) goto cmd; - if (params.uaddr) { + p = (void __user *)(uintptr_t)params.uaddr; + if (p) { if (params.len > SEV_FW_BLOB_MAX_SIZE) { ret = -EINVAL; goto e_free; } - if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) { - ret = -EFAULT; - goto e_free; - } - ret = -ENOMEM; blob = kmalloc(params.len, GFP_KERNEL); if (!blob) @@ -6290,13 +6489,13 @@ cmd: goto e_free_blob; if (blob) { - if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len)) + if (copy_to_user(p, blob, params.len)) ret = -EFAULT; } done: params.len = data->len; - if (copy_to_user((void __user *)(uintptr_t)argp->data, ¶ms, sizeof(params))) + if (copy_to_user(measure, ¶ms, sizeof(params))) ret = -EFAULT; e_free_blob: kfree(blob); @@ -6307,7 +6506,7 @@ e_free: static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_finish *data; int ret; @@ -6327,7 +6526,7 @@ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_guest_status params; struct sev_data_guest_status *data; int ret; @@ -6359,7 +6558,7 @@ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src, unsigned long dst, int size, int *error, bool enc) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_dbg *data; int ret; @@ -6591,13 +6790,13 @@ err: static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_secret *data; struct kvm_sev_launch_secret params; struct page **pages; void *blob, *hdr; unsigned long n; - int ret; + int ret, offset; if (!sev_guest(kvm)) return -ENOTTY; @@ -6623,6 +6822,10 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) if (!data) goto e_unpin_memory; + offset = params.guest_uaddr & (PAGE_SIZE - 1); + data->guest_address = __sme_page_pa(pages[0]) + offset; + data->guest_len = params.guest_len; + blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len); if (IS_ERR(blob)) { ret = PTR_ERR(blob); @@ -6637,8 +6840,8 @@ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp) ret = PTR_ERR(hdr); goto e_free_blob; } - data->trans_address = __psp_pa(blob); - data->trans_len = params.trans_len; + data->hdr_address = __psp_pa(hdr); + data->hdr_len = params.hdr_len; data->handle = sev->handle; ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error); @@ -6711,7 +6914,7 @@ out: static int svm_register_enc_region(struct kvm *kvm, struct kvm_enc_region *range) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct enc_region *region; int ret = 0; @@ -6753,7 +6956,7 @@ e_free: static struct enc_region * find_enc_region(struct kvm *kvm, struct kvm_enc_region *range) { - struct kvm_sev_info *sev = &kvm->arch.sev_info; + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct list_head *head = &sev->regions_list; struct enc_region *i; @@ -6811,6 +7014,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_free = svm_free_vcpu, .vcpu_reset = svm_vcpu_reset, + .vm_alloc = svm_vm_alloc, + .vm_free = svm_vm_free, .vm_init = avic_vm_init, .vm_destroy = svm_vm_destroy, @@ -6821,6 +7026,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .vcpu_unblocking = svm_vcpu_unblocking, .update_bp_intercept = update_bp_intercept, + .get_msr_feature = svm_get_msr_feature, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, @@ -6876,6 +7082,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .apicv_post_state_restore = avic_post_state_restore, .set_tss_addr = svm_set_tss_addr, + .set_identity_map_addr = svm_set_identity_map_addr, .get_tdp_level = get_npt_level, .get_mt_mask = svm_get_mt_mask, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3dec126..aafcc98 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -52,9 +52,11 @@ #include <asm/irq_remapping.h> #include <asm/mmu_context.h> #include <asm/nospec-branch.h> +#include <asm/mshyperv.h> #include "trace.h" #include "pmu.h" +#include "vmx_evmcs.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) #define __ex_clear(x, reg) \ @@ -130,13 +132,15 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); #endif #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) -#define KVM_VM_CR0_ALWAYS_ON \ - (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) +#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE +#define KVM_VM_CR0_ALWAYS_ON \ + (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ + X86_CR0_WP | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD) +#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) @@ -165,34 +169,33 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); * Time is measured based on a counter that runs at the same rate as the TSC, * refer SDM volume 3b section 21.6.13 & 22.1.3. */ -#define KVM_VMX_DEFAULT_PLE_GAP 128 -#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 -#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2 -#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0 -#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \ - INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW +static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; -static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; -module_param(ple_gap, int, S_IRUGO); - -static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; -module_param(ple_window, int, S_IRUGO); +static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; +module_param(ple_window, uint, 0444); /* Default doubles per-vcpu window every exit. */ -static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW; -module_param(ple_window_grow, int, S_IRUGO); +static unsigned int ple_window_grow = KVM_DEFAULT_PLE_WINDOW_GROW; +module_param(ple_window_grow, uint, 0444); /* Default resets per-vcpu window every exit to ple_window. */ -static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK; -module_param(ple_window_shrink, int, S_IRUGO); +static unsigned int ple_window_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK; +module_param(ple_window_shrink, uint, 0444); /* Default is to compute the maximum so we can never overflow. */ -static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; -static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; -module_param(ple_window_max, int, S_IRUGO); +static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; +module_param(ple_window_max, uint, 0444); extern const ulong vmx_return; +struct kvm_vmx { + struct kvm kvm; + + unsigned int tss_addr; + bool ept_identity_pagetable_done; + gpa_t ept_identity_map_addr; +}; + #define NR_AUTOLOAD_MSRS 8 struct vmcs { @@ -424,6 +427,35 @@ struct __packed vmcs12 { */ #define VMCS12_MAX_FIELD_INDEX 0x17 +struct nested_vmx_msrs { + /* + * We only store the "true" versions of the VMX capability MSRs. We + * generate the "non-true" versions by setting the must-be-1 bits + * according to the SDM. + */ + u32 procbased_ctls_low; + u32 procbased_ctls_high; + u32 secondary_ctls_low; + u32 secondary_ctls_high; + u32 pinbased_ctls_low; + u32 pinbased_ctls_high; + u32 exit_ctls_low; + u32 exit_ctls_high; + u32 entry_ctls_low; + u32 entry_ctls_high; + u32 misc_low; + u32 misc_high; + u32 ept_caps; + u32 vpid_caps; + u64 basic; + u64 cr0_fixed0; + u64 cr0_fixed1; + u64 cr4_fixed0; + u64 cr4_fixed1; + u64 vmcs_enum; + u64 vmfunc_controls; +}; + /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -475,32 +507,7 @@ struct nested_vmx { u16 vpid02; u16 last_vpid; - /* - * We only store the "true" versions of the VMX capability MSRs. We - * generate the "non-true" versions by setting the must-be-1 bits - * according to the SDM. - */ - u32 nested_vmx_procbased_ctls_low; - u32 nested_vmx_procbased_ctls_high; - u32 nested_vmx_secondary_ctls_low; - u32 nested_vmx_secondary_ctls_high; - u32 nested_vmx_pinbased_ctls_low; - u32 nested_vmx_pinbased_ctls_high; - u32 nested_vmx_exit_ctls_low; - u32 nested_vmx_exit_ctls_high; - u32 nested_vmx_entry_ctls_low; - u32 nested_vmx_entry_ctls_high; - u32 nested_vmx_misc_low; - u32 nested_vmx_misc_high; - u32 nested_vmx_ept_caps; - u32 nested_vmx_vpid_caps; - u64 nested_vmx_basic; - u64 nested_vmx_cr0_fixed0; - u64 nested_vmx_cr0_fixed1; - u64 nested_vmx_cr4_fixed0; - u64 nested_vmx_cr4_fixed1; - u64 nested_vmx_vmcs_enum; - u64 nested_vmx_vmfunc_controls; + struct nested_vmx_msrs msrs; /* SMM related state */ struct { @@ -691,6 +698,11 @@ enum segment_cache_field { SEG_FIELD_NR = 4 }; +static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm) +{ + return container_of(kvm, struct kvm_vmx, kvm); +} + static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) { return container_of(vcpu, struct vcpu_vmx, vcpu); @@ -953,6 +965,7 @@ static struct vmcs_config { u32 cpu_based_2nd_exec_ctrl; u32 vmexit_ctrl; u32 vmentry_ctrl; + struct nested_vmx_msrs nested; } vmcs_config; static struct vmx_capability { @@ -999,6 +1012,169 @@ static const u32 vmx_msr_index[] = { MSR_EFER, MSR_TSC_AUX, MSR_STAR, }; +DEFINE_STATIC_KEY_FALSE(enable_evmcs); + +#define current_evmcs ((struct hv_enlightened_vmcs *)this_cpu_read(current_vmcs)) + +#define KVM_EVMCS_VERSION 1 + +#if IS_ENABLED(CONFIG_HYPERV) +static bool __read_mostly enlightened_vmcs = true; +module_param(enlightened_vmcs, bool, 0444); + +static inline void evmcs_write64(unsigned long field, u64 value) +{ + u16 clean_field; + int offset = get_evmcs_offset(field, &clean_field); + + if (offset < 0) + return; + + *(u64 *)((char *)current_evmcs + offset) = value; + + current_evmcs->hv_clean_fields &= ~clean_field; +} + +static inline void evmcs_write32(unsigned long field, u32 value) +{ + u16 clean_field; + int offset = get_evmcs_offset(field, &clean_field); + + if (offset < 0) + return; + + *(u32 *)((char *)current_evmcs + offset) = value; + current_evmcs->hv_clean_fields &= ~clean_field; +} + +static inline void evmcs_write16(unsigned long field, u16 value) +{ + u16 clean_field; + int offset = get_evmcs_offset(field, &clean_field); + + if (offset < 0) + return; + + *(u16 *)((char *)current_evmcs + offset) = value; + current_evmcs->hv_clean_fields &= ~clean_field; +} + +static inline u64 evmcs_read64(unsigned long field) +{ + int offset = get_evmcs_offset(field, NULL); + + if (offset < 0) + return 0; + + return *(u64 *)((char *)current_evmcs + offset); +} + +static inline u32 evmcs_read32(unsigned long field) +{ + int offset = get_evmcs_offset(field, NULL); + + if (offset < 0) + return 0; + + return *(u32 *)((char *)current_evmcs + offset); +} + +static inline u16 evmcs_read16(unsigned long field) +{ + int offset = get_evmcs_offset(field, NULL); + + if (offset < 0) + return 0; + + return *(u16 *)((char *)current_evmcs + offset); +} + +static void evmcs_load(u64 phys_addr) +{ + struct hv_vp_assist_page *vp_ap = + hv_get_vp_assist_page(smp_processor_id()); + + vp_ap->current_nested_vmcs = phys_addr; + vp_ap->enlighten_vmentry = 1; +} + +static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) +{ + /* + * Enlightened VMCSv1 doesn't support these: + * + * POSTED_INTR_NV = 0x00000002, + * GUEST_INTR_STATUS = 0x00000810, + * APIC_ACCESS_ADDR = 0x00002014, + * POSTED_INTR_DESC_ADDR = 0x00002016, + * EOI_EXIT_BITMAP0 = 0x0000201c, + * EOI_EXIT_BITMAP1 = 0x0000201e, + * EOI_EXIT_BITMAP2 = 0x00002020, + * EOI_EXIT_BITMAP3 = 0x00002022, + */ + vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + vmcs_conf->cpu_based_2nd_exec_ctrl &= + ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; + vmcs_conf->cpu_based_2nd_exec_ctrl &= + ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmcs_conf->cpu_based_2nd_exec_ctrl &= + ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + + /* + * GUEST_PML_INDEX = 0x00000812, + * PML_ADDRESS = 0x0000200e, + */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML; + + /* VM_FUNCTION_CONTROL = 0x00002018, */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC; + + /* + * EPTP_LIST_ADDRESS = 0x00002024, + * VMREAD_BITMAP = 0x00002026, + * VMWRITE_BITMAP = 0x00002028, + */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS; + + /* + * TSC_MULTIPLIER = 0x00002032, + */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING; + + /* + * PLE_GAP = 0x00004020, + * PLE_WINDOW = 0x00004022, + */ + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; + + /* + * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + */ + vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; + + /* + * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + */ + vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + + /* + * Currently unsupported in KVM: + * GUEST_IA32_RTIT_CTL = 0x00002814, + */ +} +#else /* !IS_ENABLED(CONFIG_HYPERV) */ +static inline void evmcs_write64(unsigned long field, u64 value) {} +static inline void evmcs_write32(unsigned long field, u32 value) {} +static inline void evmcs_write16(unsigned long field, u16 value) {} +static inline u64 evmcs_read64(unsigned long field) { return 0; } +static inline u32 evmcs_read32(unsigned long field) { return 0; } +static inline u16 evmcs_read16(unsigned long field) { return 0; } +static inline void evmcs_load(u64 phys_addr) {} +static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} +#endif /* IS_ENABLED(CONFIG_HYPERV) */ + static inline bool is_exception_n(u32 intr_info, u8 vector) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -1031,6 +1207,11 @@ static inline bool is_invalid_opcode(u32 intr_info) return is_exception_n(intr_info, UD_VECTOR); } +static inline bool is_gp_fault(u32 intr_info) +{ + return is_exception_n(intr_info, GP_VECTOR); +} + static inline bool is_external_interrupt(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) @@ -1044,6 +1225,13 @@ static inline bool is_machine_check(u32 intr_info) (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); } +/* Undocumented: icebp/int1 */ +static inline bool is_icebp(u32 intr_info) +{ + return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) + == (INTR_TYPE_PRIV_SW_EXCEPTION | INTR_INFO_VALID_MASK); +} + static inline bool cpu_has_vmx_msr_bitmap(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; @@ -1313,7 +1501,7 @@ static inline bool report_flexpriority(void) static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu) { - return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low); + return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low); } static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) @@ -1334,6 +1522,16 @@ static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) PIN_BASED_VMX_PREEMPTION_TIMER; } +static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12) +{ + return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING; +} + +static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) +{ + return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; +} + static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) { return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); @@ -1472,6 +1670,9 @@ static void vmcs_load(struct vmcs *vmcs) u64 phys_addr = __pa(vmcs); u8 error; + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_load(phys_addr); + asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" : "=qm"(error) : "a"(&phys_addr), "m"(phys_addr) : "cc", "memory"); @@ -1645,18 +1846,24 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) static __always_inline u16 vmcs_read16(unsigned long field) { vmcs_check16(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_read16(field); return __vmcs_readl(field); } static __always_inline u32 vmcs_read32(unsigned long field) { vmcs_check32(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_read32(field); return __vmcs_readl(field); } static __always_inline u64 vmcs_read64(unsigned long field) { vmcs_check64(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_read64(field); #ifdef CONFIG_X86_64 return __vmcs_readl(field); #else @@ -1667,6 +1874,8 @@ static __always_inline u64 vmcs_read64(unsigned long field) static __always_inline unsigned long vmcs_readl(unsigned long field) { vmcs_checkl(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_read64(field); return __vmcs_readl(field); } @@ -1690,18 +1899,27 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val static __always_inline void vmcs_write16(unsigned long field, u16 value) { vmcs_check16(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write16(field, value); + __vmcs_writel(field, value); } static __always_inline void vmcs_write32(unsigned long field, u32 value) { vmcs_check32(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write32(field, value); + __vmcs_writel(field, value); } static __always_inline void vmcs_write64(unsigned long field, u64 value) { vmcs_check64(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write64(field, value); + __vmcs_writel(field, value); #ifndef CONFIG_X86_64 asm volatile (""); @@ -1712,6 +1930,9 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value) static __always_inline void vmcs_writel(unsigned long field, unsigned long value) { vmcs_checkl(field); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write64(field, value); + __vmcs_writel(field, value); } @@ -1719,6 +1940,9 @@ static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, "vmcs_clear_bits does not support 64-bit fields"); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write32(field, evmcs_read32(field) & ~mask); + __vmcs_writel(field, __vmcs_readl(field) & ~mask); } @@ -1726,6 +1950,9 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, "vmcs_set_bits does not support 64-bit fields"); + if (static_branch_unlikely(&enable_evmcs)) + return evmcs_write32(field, evmcs_read32(field) | mask); + __vmcs_writel(field, __vmcs_readl(field) | mask); } @@ -1857,6 +2084,14 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); + /* + * Guest access to VMware backdoor ports could legitimately + * trigger #GP because of TSS I/O permission bitmap. + * We intercept those #GP and allow access to them anyway + * as VMware does. + */ + if (enable_vmware_backdoor) + eb |= (1u << GP_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) @@ -2122,6 +2357,9 @@ static unsigned long segment_base(u16 selector) static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); +#ifdef CONFIG_X86_64 + int cpu = raw_smp_processor_id(); +#endif int i; if (vmx->host_state.loaded) @@ -2134,7 +2372,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) */ vmx->host_state.ldt_sel = kvm_read_ldt(); vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; + +#ifdef CONFIG_X86_64 + save_fsgs_for_kvm(); + vmx->host_state.fs_sel = current->thread.fsindex; + vmx->host_state.gs_sel = current->thread.gsindex; +#else savesegment(fs, vmx->host_state.fs_sel); + savesegment(gs, vmx->host_state.gs_sel); +#endif if (!(vmx->host_state.fs_sel & 7)) { vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); vmx->host_state.fs_reload_needed = 0; @@ -2142,7 +2388,6 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) vmcs_write16(HOST_FS_SELECTOR, 0); vmx->host_state.fs_reload_needed = 1; } - savesegment(gs, vmx->host_state.gs_sel); if (!(vmx->host_state.gs_sel & 7)) vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); else { @@ -2153,20 +2398,16 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) #ifdef CONFIG_X86_64 savesegment(ds, vmx->host_state.ds_sel); savesegment(es, vmx->host_state.es_sel); -#endif -#ifdef CONFIG_X86_64 - vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE)); - vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE)); -#else - vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); - vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); -#endif + vmcs_writel(HOST_FS_BASE, current->thread.fsbase); + vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu)); -#ifdef CONFIG_X86_64 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); + vmx->msr_host_kernel_gs_base = current->thread.gsbase; if (is_long_mode(&vmx->vcpu)) wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); +#else + vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel)); + vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel)); #endif if (boot_cpu_has(X86_FEATURE_MPX)) rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); @@ -2525,6 +2766,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit return 0; } +static void vmx_clear_hlt(struct kvm_vcpu *vcpu) +{ + /* + * Ensure that we clear the HLT state in the VMCS. We don't need to + * explicitly skip the instruction because if the HLT state is set, + * then the instruction is already executing and RIP has already been + * advanced. + */ + if (kvm_hlt_in_guest(vcpu->kvm) && + vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT) + vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); +} + static void vmx_queue_exception(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2547,6 +2801,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) return; } + WARN_ON_ONCE(vmx->emulation_required); + if (kvm_exception_is_soft(nr)) { vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmx->vcpu.arch.event_exit_inst_len); @@ -2555,6 +2811,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) intr_info |= INTR_TYPE_HARD_EXCEPTION; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); + + vmx_clear_hlt(vcpu); } static bool vmx_rdtscp_supported(void) @@ -2682,8 +2940,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) * bit in the high half is on if the corresponding bit in the control field * may be on. See also vmx_control_verify(). */ -static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) +static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv) { + if (!nested) { + memset(msrs, 0, sizeof(*msrs)); + return; + } + /* * Note that as a general rule, the high half of the MSRs (bits in * the control fields which may be 1) should be initialized by the @@ -2701,70 +2964,68 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) /* pin-based controls */ rdmsr(MSR_IA32_VMX_PINBASED_CTLS, - vmx->nested.nested_vmx_pinbased_ctls_low, - vmx->nested.nested_vmx_pinbased_ctls_high); - vmx->nested.nested_vmx_pinbased_ctls_low |= + msrs->pinbased_ctls_low, + msrs->pinbased_ctls_high); + msrs->pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; - vmx->nested.nested_vmx_pinbased_ctls_high &= + msrs->pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | - PIN_BASED_VIRTUAL_NMIS; - vmx->nested.nested_vmx_pinbased_ctls_high |= + PIN_BASED_VIRTUAL_NMIS | + (apicv ? PIN_BASED_POSTED_INTR : 0); + msrs->pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - if (kvm_vcpu_apicv_active(&vmx->vcpu)) - vmx->nested.nested_vmx_pinbased_ctls_high |= - PIN_BASED_POSTED_INTR; /* exit controls */ rdmsr(MSR_IA32_VMX_EXIT_CTLS, - vmx->nested.nested_vmx_exit_ctls_low, - vmx->nested.nested_vmx_exit_ctls_high); - vmx->nested.nested_vmx_exit_ctls_low = + msrs->exit_ctls_low, + msrs->exit_ctls_high); + msrs->exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; - vmx->nested.nested_vmx_exit_ctls_high &= + msrs->exit_ctls_high &= #ifdef CONFIG_X86_64 VM_EXIT_HOST_ADDR_SPACE_SIZE | #endif VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; - vmx->nested.nested_vmx_exit_ctls_high |= + msrs->exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; if (kvm_mpx_supported()) - vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; /* We support free control of debug control saving. */ - vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; + msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, - vmx->nested.nested_vmx_entry_ctls_low, - vmx->nested.nested_vmx_entry_ctls_high); - vmx->nested.nested_vmx_entry_ctls_low = + msrs->entry_ctls_low, + msrs->entry_ctls_high); + msrs->entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; - vmx->nested.nested_vmx_entry_ctls_high &= + msrs->entry_ctls_high &= #ifdef CONFIG_X86_64 VM_ENTRY_IA32E_MODE | #endif VM_ENTRY_LOAD_IA32_PAT; - vmx->nested.nested_vmx_entry_ctls_high |= + msrs->entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); if (kvm_mpx_supported()) - vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; /* We support free control of debug control loading. */ - vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; + msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, - vmx->nested.nested_vmx_procbased_ctls_low, - vmx->nested.nested_vmx_procbased_ctls_high); - vmx->nested.nested_vmx_procbased_ctls_low = + msrs->procbased_ctls_low, + msrs->procbased_ctls_high); + msrs->procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; - vmx->nested.nested_vmx_procbased_ctls_high &= + msrs->procbased_ctls_high &= CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING | @@ -2784,12 +3045,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * can use it to avoid exits to L1 - even when L0 runs L2 * without MSR bitmaps. */ - vmx->nested.nested_vmx_procbased_ctls_high |= + msrs->procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | CPU_BASED_USE_MSR_BITMAPS; /* We support free control of CR3 access interception. */ - vmx->nested.nested_vmx_procbased_ctls_low &= + msrs->procbased_ctls_low &= ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); /* @@ -2797,10 +3058,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * depend on CPUID bits, they are added later by vmx_cpuid_update. */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, - vmx->nested.nested_vmx_secondary_ctls_low, - vmx->nested.nested_vmx_secondary_ctls_high); - vmx->nested.nested_vmx_secondary_ctls_low = 0; - vmx->nested.nested_vmx_secondary_ctls_high &= + msrs->secondary_ctls_low, + msrs->secondary_ctls_high); + msrs->secondary_ctls_low = 0; + msrs->secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | @@ -2810,33 +3071,33 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) if (enable_ept) { /* nested EPT: emulate EPT also to L1 */ - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; - vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | + msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT | VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; if (cpu_has_vmx_ept_execute_only()) - vmx->nested.nested_vmx_ept_caps |= + msrs->ept_caps |= VMX_EPT_EXECUTE_ONLY_BIT; - vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | + msrs->ept_caps &= vmx_capability.ept; + msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_1GB_PAGE_BIT; if (enable_ept_ad_bits) { - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENABLE_PML; - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; + msrs->ept_caps |= VMX_EPT_AD_BIT; } } if (cpu_has_vmx_vmfunc()) { - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENABLE_VMFUNC; /* * Advertise EPTP switching unconditionally * since we emulate it */ if (enable_ept) - vmx->nested.nested_vmx_vmfunc_controls = + msrs->vmfunc_controls = VMX_VMFUNC_EPTP_SWITCHING; } @@ -2847,25 +3108,25 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * not failing the single-context invvpid, and it is worse. */ if (enable_vpid) { - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_ENABLE_VPID; - vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | + msrs->vpid_caps = VMX_VPID_INVVPID_BIT | VMX_VPID_EXTENT_SUPPORTED_MASK; } if (enable_unrestricted_guest) - vmx->nested.nested_vmx_secondary_ctls_high |= + msrs->secondary_ctls_high |= SECONDARY_EXEC_UNRESTRICTED_GUEST; /* miscellaneous data */ rdmsr(MSR_IA32_VMX_MISC, - vmx->nested.nested_vmx_misc_low, - vmx->nested.nested_vmx_misc_high); - vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; - vmx->nested.nested_vmx_misc_low |= + msrs->misc_low, + msrs->misc_high); + msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA; + msrs->misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | VMX_MISC_ACTIVITY_HLT; - vmx->nested.nested_vmx_misc_high = 0; + msrs->misc_high = 0; /* * This MSR reports some information about VMX support. We @@ -2873,14 +3134,14 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - vmx->nested.nested_vmx_basic = + msrs->basic = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); if (cpu_has_vmx_basic_inout()) - vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT; + msrs->basic |= VMX_BASIC_INOUT; /* * These MSRs specify bits which the guest must keep fixed on @@ -2889,15 +3150,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) */ #define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE) #define VMXON_CR4_ALWAYSON X86_CR4_VMXE - vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON; - vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON; + msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON; + msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON; /* These MSRs specify bits which the guest must keep fixed off. */ - rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1); - rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1); + rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1); + rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1); /* highest index: VMX_PREEMPTION_TIMER_VALUE */ - vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; + msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1; } /* @@ -2934,7 +3195,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | /* reserved */ BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); - u64 vmx_basic = vmx->nested.nested_vmx_basic; + u64 vmx_basic = vmx->nested.msrs.basic; if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) return -EINVAL; @@ -2953,7 +3214,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data)) return -EINVAL; - vmx->nested.nested_vmx_basic = data; + vmx->nested.msrs.basic = data; return 0; } @@ -2965,24 +3226,24 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) switch (msr_index) { case MSR_IA32_VMX_TRUE_PINBASED_CTLS: - lowp = &vmx->nested.nested_vmx_pinbased_ctls_low; - highp = &vmx->nested.nested_vmx_pinbased_ctls_high; + lowp = &vmx->nested.msrs.pinbased_ctls_low; + highp = &vmx->nested.msrs.pinbased_ctls_high; break; case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: - lowp = &vmx->nested.nested_vmx_procbased_ctls_low; - highp = &vmx->nested.nested_vmx_procbased_ctls_high; + lowp = &vmx->nested.msrs.procbased_ctls_low; + highp = &vmx->nested.msrs.procbased_ctls_high; break; case MSR_IA32_VMX_TRUE_EXIT_CTLS: - lowp = &vmx->nested.nested_vmx_exit_ctls_low; - highp = &vmx->nested.nested_vmx_exit_ctls_high; + lowp = &vmx->nested.msrs.exit_ctls_low; + highp = &vmx->nested.msrs.exit_ctls_high; break; case MSR_IA32_VMX_TRUE_ENTRY_CTLS: - lowp = &vmx->nested.nested_vmx_entry_ctls_low; - highp = &vmx->nested.nested_vmx_entry_ctls_high; + lowp = &vmx->nested.msrs.entry_ctls_low; + highp = &vmx->nested.msrs.entry_ctls_high; break; case MSR_IA32_VMX_PROCBASED_CTLS2: - lowp = &vmx->nested.nested_vmx_secondary_ctls_low; - highp = &vmx->nested.nested_vmx_secondary_ctls_high; + lowp = &vmx->nested.msrs.secondary_ctls_low; + highp = &vmx->nested.msrs.secondary_ctls_high; break; default: BUG(); @@ -3013,13 +3274,13 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) GENMASK_ULL(13, 9) | BIT_ULL(31); u64 vmx_misc; - vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low, - vmx->nested.nested_vmx_misc_high); + vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, + vmx->nested.msrs.misc_high); if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) return -EINVAL; - if ((vmx->nested.nested_vmx_pinbased_ctls_high & + if ((vmx->nested.msrs.pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) && vmx_misc_preemption_timer_rate(data) != vmx_misc_preemption_timer_rate(vmx_misc)) @@ -3034,8 +3295,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc)) return -EINVAL; - vmx->nested.nested_vmx_misc_low = data; - vmx->nested.nested_vmx_misc_high = data >> 32; + vmx->nested.msrs.misc_low = data; + vmx->nested.msrs.misc_high = data >> 32; return 0; } @@ -3043,15 +3304,15 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) { u64 vmx_ept_vpid_cap; - vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps, - vmx->nested.nested_vmx_vpid_caps); + vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps, + vmx->nested.msrs.vpid_caps); /* Every bit is either reserved or a feature bit. */ if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) return -EINVAL; - vmx->nested.nested_vmx_ept_caps = data; - vmx->nested.nested_vmx_vpid_caps = data >> 32; + vmx->nested.msrs.ept_caps = data; + vmx->nested.msrs.vpid_caps = data >> 32; return 0; } @@ -3061,10 +3322,10 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) switch (msr_index) { case MSR_IA32_VMX_CR0_FIXED0: - msr = &vmx->nested.nested_vmx_cr0_fixed0; + msr = &vmx->nested.msrs.cr0_fixed0; break; case MSR_IA32_VMX_CR4_FIXED0: - msr = &vmx->nested.nested_vmx_cr4_fixed0; + msr = &vmx->nested.msrs.cr4_fixed0; break; default: BUG(); @@ -3128,7 +3389,7 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) case MSR_IA32_VMX_EPT_VPID_CAP: return vmx_restore_vmx_ept_vpid_cap(vmx, data); case MSR_IA32_VMX_VMCS_ENUM: - vmx->nested.nested_vmx_vmcs_enum = data; + vmx->nested.msrs.vmcs_enum = data; return 0; default: /* @@ -3139,77 +3400,75 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) } /* Returns 0 on success, non-0 otherwise. */ -static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) +static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - switch (msr_index) { case MSR_IA32_VMX_BASIC: - *pdata = vmx->nested.nested_vmx_basic; + *pdata = msrs->basic; break; case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_PINBASED_CTLS: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_pinbased_ctls_low, - vmx->nested.nested_vmx_pinbased_ctls_high); + msrs->pinbased_ctls_low, + msrs->pinbased_ctls_high); if (msr_index == MSR_IA32_VMX_PINBASED_CTLS) *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; break; case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: case MSR_IA32_VMX_PROCBASED_CTLS: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_procbased_ctls_low, - vmx->nested.nested_vmx_procbased_ctls_high); + msrs->procbased_ctls_low, + msrs->procbased_ctls_high); if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS) *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; break; case MSR_IA32_VMX_TRUE_EXIT_CTLS: case MSR_IA32_VMX_EXIT_CTLS: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_exit_ctls_low, - vmx->nested.nested_vmx_exit_ctls_high); + msrs->exit_ctls_low, + msrs->exit_ctls_high); if (msr_index == MSR_IA32_VMX_EXIT_CTLS) *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; break; case MSR_IA32_VMX_TRUE_ENTRY_CTLS: case MSR_IA32_VMX_ENTRY_CTLS: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_entry_ctls_low, - vmx->nested.nested_vmx_entry_ctls_high); + msrs->entry_ctls_low, + msrs->entry_ctls_high); if (msr_index == MSR_IA32_VMX_ENTRY_CTLS) *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; break; case MSR_IA32_VMX_MISC: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_misc_low, - vmx->nested.nested_vmx_misc_high); + msrs->misc_low, + msrs->misc_high); break; case MSR_IA32_VMX_CR0_FIXED0: - *pdata = vmx->nested.nested_vmx_cr0_fixed0; + *pdata = msrs->cr0_fixed0; break; case MSR_IA32_VMX_CR0_FIXED1: - *pdata = vmx->nested.nested_vmx_cr0_fixed1; + *pdata = msrs->cr0_fixed1; break; case MSR_IA32_VMX_CR4_FIXED0: - *pdata = vmx->nested.nested_vmx_cr4_fixed0; + *pdata = msrs->cr4_fixed0; break; case MSR_IA32_VMX_CR4_FIXED1: - *pdata = vmx->nested.nested_vmx_cr4_fixed1; + *pdata = msrs->cr4_fixed1; break; case MSR_IA32_VMX_VMCS_ENUM: - *pdata = vmx->nested.nested_vmx_vmcs_enum; + *pdata = msrs->vmcs_enum; break; case MSR_IA32_VMX_PROCBASED_CTLS2: *pdata = vmx_control_msr( - vmx->nested.nested_vmx_secondary_ctls_low, - vmx->nested.nested_vmx_secondary_ctls_high); + msrs->secondary_ctls_low, + msrs->secondary_ctls_high); break; case MSR_IA32_VMX_EPT_VPID_CAP: - *pdata = vmx->nested.nested_vmx_ept_caps | - ((u64)vmx->nested.nested_vmx_vpid_caps << 32); + *pdata = msrs->ept_caps | + ((u64)msrs->vpid_caps << 32); break; case MSR_IA32_VMX_VMFUNC: - *pdata = vmx->nested.nested_vmx_vmfunc_controls; + *pdata = msrs->vmfunc_controls; break; default: return 1; @@ -3226,6 +3485,20 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, return !(val & ~valid_bits); } +static int vmx_get_msr_feature(struct kvm_msr_entry *msr) +{ + switch (msr->index) { + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + if (!nested) + return 1; + return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); + default: + return 1; + } + + return 0; +} + /* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. @@ -3297,7 +3570,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) return 1; - return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data); + return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, + &msr_info->data); case MSR_IA32_XSS: if (!vmx_xsaves_supported()) return 1; @@ -3590,6 +3864,14 @@ static int hardware_enable(void) if (cr4_read_shadow() & X86_CR4_VMXE) return -EBUSY; + /* + * This can happen if we hot-added a CPU but failed to allocate + * VP assist page for it. + */ + if (static_branch_unlikely(&enable_evmcs) && + !hv_get_vp_assist_page(cpu)) + return -EFAULT; + INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); @@ -3688,6 +3970,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) u32 _vmexit_control = 0; u32 _vmentry_control = 0; + memset(vmcs_conf, 0, sizeof(*vmcs_conf)); min = CPU_BASED_HLT_EXITING | #ifdef CONFIG_X86_64 CPU_BASED_CR8_LOAD_EXITING | @@ -3698,13 +3981,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING | CPU_BASED_RDPMC_EXITING; - if (!kvm_mwait_in_guest()) - min |= CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING; - opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -3823,7 +4104,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) vmcs_conf->size = vmx_msr_high & 0x1fff; vmcs_conf->order = get_order(vmcs_conf->size); vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; - vmcs_conf->revision_id = vmx_msr_low; + + /* KVM supports Enlightened VMCS v1 only */ + if (static_branch_unlikely(&enable_evmcs)) + vmcs_conf->revision_id = KVM_EVMCS_VERSION; + else + vmcs_conf->revision_id = vmx_msr_low; vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control; vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control; @@ -3831,6 +4117,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; + if (static_branch_unlikely(&enable_evmcs)) + evmcs_sanitize_exec_ctrls(vmcs_conf); + cpu_has_load_ia32_efer = allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS, VM_ENTRY_LOAD_IA32_EFER) @@ -4150,6 +4439,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) { unsigned long flags; struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); @@ -4165,13 +4455,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu) * Very old userspace does not call KVM_SET_TSS_ADDR before entering * vcpu. Warn the user that an update is overdue. */ - if (!vcpu->kvm->arch.tss_addr) + if (!kvm_vmx->tss_addr) printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " "called before entering vcpu\n"); vmx_segment_cache_clear(vmx); - vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr); + vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); @@ -4279,7 +4569,7 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) static void vmx_decache_cr3(struct kvm_vcpu *vcpu) { - if (enable_ept && is_paging(vcpu)) + if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); } @@ -4327,11 +4617,11 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu) static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) { - u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0; - u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1; + u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; + u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high & + if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_UNRESTRICTED_GUEST && nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST)) fixed0 &= ~(X86_CR0_PE | X86_CR0_PG); @@ -4341,16 +4631,16 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val) { - u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0; - u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1; + u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0; + u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1; return fixed_bits_valid(val, fixed0, fixed1); } static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) { - u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0; - u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1; + u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0; + u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1; return fixed_bits_valid(val, fixed0, fixed1); } @@ -4416,7 +4706,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif - if (enable_ept) + if (enable_ept && !enable_unrestricted_guest) ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); vmcs_writel(CR0_READ_SHADOW, cr0); @@ -4457,10 +4747,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (enable_ept) { eptp = construct_eptp(vcpu, cr3); vmcs_write64(EPT_POINTER, eptp); - if (is_paging(vcpu) || is_guest_mode(vcpu)) + if (enable_unrestricted_guest || is_paging(vcpu) || + is_guest_mode(vcpu)) guest_cr3 = kvm_read_cr3(vcpu); else - guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr; + guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr; ept_load_pdptrs(vcpu); } @@ -4475,17 +4766,22 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * is in force while we are in guest mode. Do not let guests control * this bit, even if host CR4.MCE == 0. */ - unsigned long hw_cr4 = - (cr4_read_shadow() & X86_CR4_MCE) | - (cr4 & ~X86_CR4_MCE) | - (to_vmx(vcpu)->rmode.vm86_active ? - KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); + unsigned long hw_cr4; + + hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); + if (enable_unrestricted_guest) + hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST; + else if (to_vmx(vcpu)->rmode.vm86_active) + hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON; + else + hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) { vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_DESC); hw_cr4 &= ~X86_CR4_UMIP; - } else + } else if (!is_guest_mode(vcpu) || + !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_DESC); @@ -4504,16 +4800,17 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; vcpu->arch.cr4 = cr4; - if (enable_ept) { - if (!is_paging(vcpu)) { - hw_cr4 &= ~X86_CR4_PAE; - hw_cr4 |= X86_CR4_PSE; - } else if (!(cr4 & X86_CR4_PAE)) { - hw_cr4 &= ~X86_CR4_PAE; + + if (!enable_unrestricted_guest) { + if (enable_ept) { + if (!is_paging(vcpu)) { + hw_cr4 &= ~X86_CR4_PAE; + hw_cr4 |= X86_CR4_PSE; + } else if (!(cr4 & X86_CR4_PAE)) { + hw_cr4 &= ~X86_CR4_PAE; + } } - } - if (!enable_unrestricted_guest && !is_paging(vcpu)) /* * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in * hardware. To emulate this behavior, SMEP/SMAP/PKU needs @@ -4525,7 +4822,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * If enable_unrestricted_guest, the CPU automatically * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0. */ - hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); + if (!is_paging(vcpu)) + hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE); + } vmcs_writel(CR4_READ_SHADOW, cr4); vmcs_writel(GUEST_CR4, hw_cr4); @@ -4893,7 +5192,7 @@ static int init_rmode_tss(struct kvm *kvm) int idx, r; idx = srcu_read_lock(&kvm->srcu); - fn = kvm->arch.tss_addr >> PAGE_SHIFT; + fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT; r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -4919,22 +5218,23 @@ out: static int init_rmode_identity_map(struct kvm *kvm) { + struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); int i, idx, r = 0; kvm_pfn_t identity_map_pfn; u32 tmp; - /* Protect kvm->arch.ept_identity_pagetable_done. */ + /* Protect kvm_vmx->ept_identity_pagetable_done. */ mutex_lock(&kvm->slots_lock); - if (likely(kvm->arch.ept_identity_pagetable_done)) + if (likely(kvm_vmx->ept_identity_pagetable_done)) goto out2; - if (!kvm->arch.ept_identity_map_addr) - kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; - identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; + if (!kvm_vmx->ept_identity_map_addr) + kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; + identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT; r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, - kvm->arch.ept_identity_map_addr, PAGE_SIZE); + kvm_vmx->ept_identity_map_addr, PAGE_SIZE); if (r < 0) goto out2; @@ -4951,7 +5251,7 @@ static int init_rmode_identity_map(struct kvm *kvm) if (r < 0) goto out; } - kvm->arch.ept_identity_pagetable_done = true; + kvm_vmx->ept_identity_pagetable_done = true; out: srcu_read_unlock(&kvm->srcu, idx); @@ -5487,6 +5787,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) exec_control |= CPU_BASED_CR3_STORE_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_INVLPG_EXITING; + if (kvm_mwait_in_guest(vmx->vcpu.kvm)) + exec_control &= ~(CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING); + if (kvm_hlt_in_guest(vmx->vcpu.kvm)) + exec_control &= ~CPU_BASED_HLT_EXITING; return exec_control; } @@ -5520,7 +5825,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) } if (!enable_unrestricted_guest) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; - if (!ple_gap) + if (kvm_pause_in_guest(vmx->vcpu.kvm)) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if (!kvm_vcpu_apicv_active(vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | @@ -5552,10 +5857,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (xsaves_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_XSAVES; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_XSAVES; } } @@ -5567,10 +5872,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (rdtscp_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_RDTSCP; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_RDTSCP; } } @@ -5588,10 +5893,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (invpcid_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_ENABLE_INVPCID; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_ENABLE_INVPCID; } } @@ -5603,10 +5908,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (rdrand_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_RDRAND_EXITING; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_RDRAND_EXITING; } } @@ -5618,10 +5923,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (nested) { if (rdseed_enabled) - vmx->nested.nested_vmx_secondary_ctls_high |= + vmx->nested.msrs.secondary_ctls_high |= SECONDARY_EXEC_RDSEED_EXITING; else - vmx->nested.nested_vmx_secondary_ctls_high &= + vmx->nested.msrs.secondary_ctls_high &= ~SECONDARY_EXEC_RDSEED_EXITING; } } @@ -5683,7 +5988,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } - if (ple_gap) { + if (!kvm_pause_in_guest(vmx->vcpu.kvm)) { vmcs_write32(PLE_GAP, ple_gap); vmx->ple_window = ple_window; vmx->ple_window_dirty = true; @@ -5765,6 +6070,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; vmx->spec_ctrl = 0; + vcpu->arch.microcode_version = 0x100000000ULL; vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); @@ -5847,6 +6153,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) update_exception_bitmap(vcpu); vpid_sync_context(vmx->vpid); + if (init_event) + vmx_clear_hlt(vcpu); } /* @@ -5871,8 +6179,7 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu) static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) { - return get_vmcs12(vcpu)->pin_based_vm_exec_control & - PIN_BASED_NMI_EXITING; + return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu)); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -5918,6 +6225,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) } else intr |= INTR_TYPE_EXT_INTR; vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); + + vmx_clear_hlt(vcpu); } static void vmx_inject_nmi(struct kvm_vcpu *vcpu) @@ -5948,6 +6257,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); + + vmx_clear_hlt(vcpu); } static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) @@ -6010,14 +6321,23 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) { int ret; + if (enable_unrestricted_guest) + return 0; + ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, PAGE_SIZE * 3); if (ret) return ret; - kvm->arch.tss_addr = addr; + to_kvm_vmx(kvm)->tss_addr = addr; return init_rmode_tss(kvm); } +static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) +{ + to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr; + return 0; +} + static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) { switch (vec) { @@ -6120,19 +6440,24 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_nmi(intr_info)) return 1; /* already handled by vmx_vcpu_run() */ - if (is_invalid_opcode(intr_info)) { - er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); - if (er == EMULATE_USER_EXIT) - return 0; - if (er != EMULATE_DONE) - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } + if (is_invalid_opcode(intr_info)) + return handle_ud(vcpu); error_code = 0; if (intr_info & INTR_INFO_DELIVER_CODE_MASK) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) { + WARN_ON_ONCE(!enable_vmware_backdoor); + er = emulate_instruction(vcpu, + EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL); + if (er == EMULATE_USER_EXIT) + return 0; + else if (er != EMULATE_DONE) + kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); + return 1; + } + /* * The #PF with PFEC.RSVD = 1 indicates the guest is accessing * MMIO, it is better to report an internal error. @@ -6171,7 +6496,7 @@ static int handle_exception(struct kvm_vcpu *vcpu) (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { vcpu->arch.dr6 &= ~15; vcpu->arch.dr6 |= dr6 | DR6_RTM; - if (!(dr6 & ~DR6_RESERVED)) /* icebp */ + if (is_icebp(intr_info)) skip_emulated_instruction(vcpu); kvm_queue_exception(vcpu, DB_VECTOR); @@ -6218,28 +6543,22 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu) static int handle_io(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; - int size, in, string, ret; + int size, in, string; unsigned port; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); string = (exit_qualification & 16) != 0; - in = (exit_qualification & 8) != 0; ++vcpu->stat.io_exits; - if (string || in) + if (string) return emulate_instruction(vcpu, 0) == EMULATE_DONE; port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; + in = (exit_qualification & 8) != 0; - ret = kvm_skip_emulated_instruction(vcpu); - - /* - * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ - return kvm_fast_pio_out(vcpu, size, port) && ret; + return kvm_fast_pio(vcpu, size, port, in); } static void @@ -6330,6 +6649,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) err = handle_set_cr0(vcpu, val); return kvm_complete_insn_gp(vcpu, err); case 3: + WARN_ON_ONCE(enable_unrestricted_guest); err = kvm_set_cr3(vcpu, val); return kvm_complete_insn_gp(vcpu, err); case 4: @@ -6362,6 +6682,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) case 1: /*mov from cr*/ switch (cr) { case 3: + WARN_ON_ONCE(enable_unrestricted_guest); val = kvm_read_cr3(vcpu); kvm_register_write(vcpu, reg, val); trace_kvm_cr_read(cr, val); @@ -6755,7 +7076,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { - int ret; gpa_t gpa; /* @@ -6783,17 +7103,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) NULL, 0) == EMULATE_DONE; } - ret = kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); - if (ret >= 0) - return ret; - - /* It is the real ept misconfig */ - WARN_ON(1); - - vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; - vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG; - - return 0; + return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0); } static int handle_nmi_window(struct kvm_vcpu *vcpu) @@ -6816,6 +7126,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) bool intr_window_requested; unsigned count = 130; + /* + * We should never reach the point where we are emulating L2 + * due to invalid guest state as that means we incorrectly + * allowed a nested VMEntry with an invalid vmcs12. + */ + WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); + cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING; @@ -6834,12 +7151,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) goto out; } - if (err != EMULATE_DONE) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } + if (err != EMULATE_DONE) + goto emulation_error; + + if (vmx->emulation_required && !vmx->rmode.vm86_active && + vcpu->arch.exception.pending) + goto emulation_error; if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; @@ -6855,34 +7172,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) out: return ret; -} - -static int __grow_ple_window(int val) -{ - if (ple_window_grow < 1) - return ple_window; - - val = min(val, ple_window_actual_max); - - if (ple_window_grow < ple_window) - val *= ple_window_grow; - else - val += ple_window_grow; - - return val; -} - -static int __shrink_ple_window(int val, int modifier, int minimum) -{ - if (modifier < 1) - return ple_window; - - if (modifier < ple_window) - val /= modifier; - else - val -= modifier; - return max(val, minimum); +emulation_error: + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; } static void grow_ple_window(struct kvm_vcpu *vcpu) @@ -6890,7 +7185,9 @@ static void grow_ple_window(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); int old = vmx->ple_window; - vmx->ple_window = __grow_ple_window(old); + vmx->ple_window = __grow_ple_window(old, ple_window, + ple_window_grow, + ple_window_max); if (vmx->ple_window != old) vmx->ple_window_dirty = true; @@ -6903,8 +7200,9 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); int old = vmx->ple_window; - vmx->ple_window = __shrink_ple_window(old, - ple_window_shrink, ple_window); + vmx->ple_window = __shrink_ple_window(old, ple_window, + ple_window_shrink, + ple_window); if (vmx->ple_window != old) vmx->ple_window_dirty = true; @@ -6913,21 +7211,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) } /* - * ple_window_actual_max is computed to be one grow_ple_window() below - * ple_window_max. (See __grow_ple_window for the reason.) - * This prevents overflows, because ple_window_max is int. - * ple_window_max effectively rounded down to a multiple of ple_window_grow in - * this process. - * ple_window_max is also prevented from setting vmx->ple_window < ple_window. - */ -static void update_ple_window_actual_max(void) -{ - ple_window_actual_max = - __shrink_ple_window(max(ple_window_max, ple_window), - ple_window_grow, INT_MIN); -} - -/* * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. */ static void wakeup_handler(void) @@ -6946,7 +7229,7 @@ static void wakeup_handler(void) spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } -void vmx_enable_tdp(void) +static void vmx_enable_tdp(void) { kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, @@ -7047,8 +7330,6 @@ static __init int hardware_setup(void) else kvm_disable_tdp(); - update_ple_window_actual_max(); - /* * Only enable PML when hardware supports PML feature, and both EPT * and EPT A/D bit features are enabled -- PML depends on them to work. @@ -7080,6 +7361,7 @@ static __init int hardware_setup(void) init_vmcs_shadow_fields(); kvm_set_posted_intr_wakeup_handler(wakeup_handler); + nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv); kvm_mce_cap_supported |= MCG_LMCE_P; @@ -7108,7 +7390,7 @@ static __exit void hardware_unsetup(void) */ static int handle_pause(struct kvm_vcpu *vcpu) { - if (ple_gap) + if (!kvm_pause_in_guest(vcpu->kvm)) grow_ple_window(vcpu); /* @@ -7940,9 +8222,9 @@ static int handle_invept(struct kvm_vcpu *vcpu) u64 eptp, gpa; } operand; - if (!(vmx->nested.nested_vmx_secondary_ctls_high & + if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || - !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) { + !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } @@ -7953,7 +8235,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); - types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; + types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, @@ -8004,9 +8286,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) u64 gla; } operand; - if (!(vmx->nested.nested_vmx_secondary_ctls_high & + if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_VPID) || - !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) { + !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } @@ -8017,7 +8299,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); - types = (vmx->nested.nested_vmx_vpid_caps & + types = (vmx->nested.msrs.vpid_caps & VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; if (type >= 32 || !(types & (1 << type))) { @@ -8111,11 +8393,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) /* Check for memory type validity */ switch (address & VMX_EPTP_MT_MASK) { case VMX_EPTP_MT_UC: - if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT)) + if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)) return false; break; case VMX_EPTP_MT_WB: - if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT)) + if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)) return false; break; default: @@ -8132,7 +8414,7 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address) /* AD, if set, should be supported */ if (address & VMX_EPTP_AD_ENABLE_BIT) { - if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)) + if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)) return false; } @@ -8776,7 +9058,8 @@ static void dump_vmcs(void) pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", vmcs_read64(GUEST_IA32_DEBUGCTL), vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); - if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) + if (cpu_has_load_perf_global_ctrl && + vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) pr_err("PerfGlobCtl = 0x%016llx\n", vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) @@ -8812,7 +9095,8 @@ static void dump_vmcs(void) pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", vmcs_read64(HOST_IA32_EFER), vmcs_read64(HOST_IA32_PAT)); - if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + if (cpu_has_load_perf_global_ctrl && + vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) pr_err("PerfGlobCtl = 0x%016llx\n", vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); @@ -9164,9 +9448,9 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) /* We need to handle NMIs before interrupts are enabled */ if (is_nmi(exit_intr_info)) { - kvm_before_handle_nmi(&vmx->vcpu); + kvm_before_interrupt(&vmx->vcpu); asm("int $2"); - kvm_after_handle_nmi(&vmx->vcpu); + kvm_after_interrupt(&vmx->vcpu); } } @@ -9389,7 +9673,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long cr3, cr4; + unsigned long cr3, cr4, evmcs_rsp; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -9452,9 +9736,13 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * being speculatively taken. */ if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); vmx->__launched = vmx->loaded_vmcs->launched; + + evmcs_rsp = static_branch_unlikely(&enable_evmcs) ? + (unsigned long)¤t_evmcs->host_rsp : 0; + asm( /* Store host registers */ "push %%" _ASM_DX "; push %%" _ASM_BP ";" @@ -9463,15 +9751,21 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" "je 1f \n\t" "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" + /* Avoid VMWRITE when Enlightened VMCS is in use */ + "test %%" _ASM_SI ", %%" _ASM_SI " \n\t" + "jz 2f \n\t" + "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" + "jmp 1f \n\t" + "2: \n\t" __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" "1: \n\t" /* Reload cr2 if changed */ "mov %c[cr2](%0), %%" _ASM_AX " \n\t" "mov %%cr2, %%" _ASM_DX " \n\t" "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" - "je 2f \n\t" + "je 3f \n\t" "mov %%" _ASM_AX", %%cr2 \n\t" - "2: \n\t" + "3: \n\t" /* Check if vmlaunch of vmresume is needed */ "cmpl $0, %c[launched](%0) \n\t" /* Load guest registers. Don't clobber flags. */ @@ -9540,7 +9834,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) ".global vmx_return \n\t" "vmx_return: " _ASM_PTR " 2b \n\t" ".popsection" - : : "c"(vmx), "d"((unsigned long)HOST_RSP), + : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp), [launched]"i"(offsetof(struct vcpu_vmx, __launched)), [fail]"i"(offsetof(struct vcpu_vmx, fail)), [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)), @@ -9565,10 +9859,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) [wordsize]"i"(sizeof(ulong)) : "cc", "memory" #ifdef CONFIG_X86_64 - , "rax", "rbx", "rdi", "rsi" + , "rax", "rbx", "rdi" , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" #else - , "eax", "ebx", "edi", "esi" + , "eax", "ebx", "edi" #endif ); @@ -9587,15 +9881,20 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)) - rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); if (vmx->spec_ctrl) - wrmsrl(MSR_IA32_SPEC_CTRL, 0); + native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); + /* All fields are clean at this point */ + if (static_branch_unlikely(&enable_evmcs)) + current_evmcs->hv_clean_fields |= + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ if (vmx->host_debugctlmsr) update_debugctlmsr(vmx->host_debugctlmsr); @@ -9632,14 +9931,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vmx->host_pkru); } - /* - * the KVM_REQ_EVENT optimization bit is only on for one entry, and if - * we did not inject a still-pending event to L1 now because of - * nested_run_pending, we need to re-enable this bit. - */ - if (vmx->nested.nested_run_pending) - kvm_make_request(KVM_REQ_EVENT, vcpu); - vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; @@ -9656,6 +9947,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_vcpu_run); +static struct kvm *vmx_vm_alloc(void) +{ + struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL); + return &kvm_vmx->kvm; +} + +static void vmx_vm_free(struct kvm *kvm) +{ + kfree(to_kvm_vmx(kvm)); +} + static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -9763,14 +10065,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } - if (enable_ept) { + if (enable_ept && !enable_unrestricted_guest) { err = init_rmode_identity_map(kvm); if (err) goto free_vmcs; } if (nested) { - nested_vmx_setup_ctls_msrs(vmx); + nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, + kvm_vcpu_apicv_active(&vmx->vcpu)); vmx->nested.vpid02 = allocate_vpid(); } @@ -9803,6 +10106,13 @@ free_vcpu: return ERR_PTR(err); } +static int vmx_vm_init(struct kvm *kvm) +{ + if (!ple_gap) + kvm->arch.pause_in_guest = true; + return 0; +} + static void __init vmx_check_processor_compat(void *rtn) { struct vmcs_config vmcs_conf; @@ -9810,6 +10120,7 @@ static void __init vmx_check_processor_compat(void *rtn) *(int *)rtn = 0; if (setup_vmcs_config(&vmcs_conf) < 0) *(int *)rtn = -EIO; + nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv); if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) { printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n", smp_processor_id()); @@ -9897,12 +10208,12 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); struct kvm_cpuid_entry2 *entry; - vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff; - vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE; + vmx->nested.msrs.cr0_fixed1 = 0xffffffff; + vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE; #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \ if (entry && (entry->_reg & (_cpuid_mask))) \ - vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask); \ + vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask); \ } while (0) entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); @@ -9999,7 +10310,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); kvm_init_shadow_ept_mmu(vcpu, - to_vmx(vcpu)->nested.nested_vmx_ept_caps & + to_vmx(vcpu)->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT, nested_ept_ad_enabled(vcpu)); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; @@ -10696,6 +11007,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control, vmcs12_exec_ctrl; + if (vmx->nested.dirty_vmcs12) { + prepare_vmcs02_full(vcpu, vmcs12, from_vmentry); + vmx->nested.dirty_vmcs12 = false; + } + /* * First, the fields that are shadowed. This must be kept in sync * with vmx_shadow_fields.h. @@ -10933,9 +11249,14 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ vmx_set_efer(vcpu, vcpu->arch.efer); - if (vmx->nested.dirty_vmcs12) { - prepare_vmcs02_full(vcpu, vmcs12, from_vmentry); - vmx->nested.dirty_vmcs12 = false; + /* + * Guest state is invalid and unrestricted guest is disabled, + * which means L1 attempted VMEntry to L2 with invalid state. + * Fail the VMEntry. + */ + if (vmx->emulation_required) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; + return 1; } /* Shadow page tables on either EPT or shadow page tables. */ @@ -10951,6 +11272,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 0; } +static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) +{ + if (!nested_cpu_has_nmi_exiting(vmcs12) && + nested_cpu_has_virtual_nmis(vmcs12)) + return -EINVAL; + + if (!nested_cpu_has_virtual_nmis(vmcs12) && + nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING)) + return -EINVAL; + + return 0; +} + static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -10978,26 +11312,29 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, - vmx->nested.nested_vmx_procbased_ctls_low, - vmx->nested.nested_vmx_procbased_ctls_high) || + vmx->nested.msrs.procbased_ctls_low, + vmx->nested.msrs.procbased_ctls_high) || (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && !vmx_control_verify(vmcs12->secondary_vm_exec_control, - vmx->nested.nested_vmx_secondary_ctls_low, - vmx->nested.nested_vmx_secondary_ctls_high)) || + vmx->nested.msrs.secondary_ctls_low, + vmx->nested.msrs.secondary_ctls_high)) || !vmx_control_verify(vmcs12->pin_based_vm_exec_control, - vmx->nested.nested_vmx_pinbased_ctls_low, - vmx->nested.nested_vmx_pinbased_ctls_high) || + vmx->nested.msrs.pinbased_ctls_low, + vmx->nested.msrs.pinbased_ctls_high) || !vmx_control_verify(vmcs12->vm_exit_controls, - vmx->nested.nested_vmx_exit_ctls_low, - vmx->nested.nested_vmx_exit_ctls_high) || + vmx->nested.msrs.exit_ctls_low, + vmx->nested.msrs.exit_ctls_high) || !vmx_control_verify(vmcs12->vm_entry_controls, - vmx->nested.nested_vmx_entry_ctls_low, - vmx->nested.nested_vmx_entry_ctls_high)) + vmx->nested.msrs.entry_ctls_low, + vmx->nested.msrs.entry_ctls_high)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + if (nested_vmx_check_nmi_controls(vmcs12)) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; if (nested_cpu_has_vmfunc(vmcs12)) { if (vmcs12->vm_function_control & - ~vmx->nested.nested_vmx_vmfunc_controls) + ~vmx->nested.msrs.vmfunc_controls) return VMXERR_ENTRY_INVALID_CONTROL_FIELD; if (nested_cpu_has_eptp_switching(vmcs12)) { @@ -11199,7 +11536,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (ret) return ret; - if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) + /* + * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken + * by event injection, halt vcpu. + */ + if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && + !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) return kvm_vcpu_halt(vcpu); vmx->nested.nested_run_pending = 1; @@ -11274,7 +11616,7 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, } else if (vcpu->arch.nmi_injected) { vmcs12->idt_vectoring_info_field = INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR; - } else if (vcpu->arch.interrupt.pending) { + } else if (vcpu->arch.interrupt.injected) { nr = vcpu->arch.interrupt.nr; idt_vectoring = nr | VECTORING_INFO_VALID_MASK; @@ -11922,7 +12264,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu) static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu) { - if (ple_gap) + if (!kvm_pause_in_guest(vcpu->kvm)) shrink_ple_window(vcpu); } @@ -12240,6 +12582,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) vmx->nested.smm.vmxon = vmx->nested.vmxon; vmx->nested.vmxon = false; + vmx_clear_hlt(vcpu); return 0; } @@ -12281,6 +12624,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_accelerated_tpr = report_flexpriority, .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .vm_init = vmx_vm_init, + .vm_alloc = vmx_vm_alloc, + .vm_free = vmx_vm_free, + .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, .vcpu_reset = vmx_vcpu_reset, @@ -12290,6 +12637,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .vcpu_put = vmx_vcpu_put, .update_bp_intercept = update_exception_bitmap, + .get_msr_feature = vmx_get_msr_feature, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, @@ -12347,6 +12695,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .deliver_posted_interrupt = vmx_deliver_posted_interrupt, .set_tss_addr = vmx_set_tss_addr, + .set_identity_map_addr = vmx_set_identity_map_addr, .get_tdp_level = get_ept_level, .get_mt_mask = vmx_get_mt_mask, @@ -12405,7 +12754,38 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { static int __init vmx_init(void) { - int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), + int r; + +#if IS_ENABLED(CONFIG_HYPERV) + /* + * Enlightened VMCS usage should be recommended and the host needs + * to support eVMCS v1 or above. We can also disable eVMCS support + * with module parameter. + */ + if (enlightened_vmcs && + ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && + (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= + KVM_EVMCS_VERSION) { + int cpu; + + /* Check that we have assist pages on all online CPUs */ + for_each_online_cpu(cpu) { + if (!hv_get_vp_assist_page(cpu)) { + enlightened_vmcs = false; + break; + } + } + + if (enlightened_vmcs) { + pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n"); + static_branch_enable(&enable_evmcs); + } + } else { + enlightened_vmcs = false; + } +#endif + + r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), __alignof__(struct vcpu_vmx), THIS_MODULE); if (r) return r; @@ -12426,6 +12806,29 @@ static void __exit vmx_exit(void) #endif kvm_exit(); + +#if IS_ENABLED(CONFIG_HYPERV) + if (static_branch_unlikely(&enable_evmcs)) { + int cpu; + struct hv_vp_assist_page *vp_ap; + /* + * Reset everything to support using non-enlightened VMCS + * access later (e.g. when we reload the module with + * enlightened_vmcs=0) + */ + for_each_online_cpu(cpu) { + vp_ap = hv_get_vp_assist_page(cpu); + + if (!vp_ap) + continue; + + vp_ap->current_nested_vmcs = 0; + vp_ap->enlighten_vmentry = 0; + } + + static_branch_disable(&enable_evmcs); + } +#endif } module_init(vmx_init) diff --git a/arch/x86/kvm/vmx_evmcs.h b/arch/x86/kvm/vmx_evmcs.h new file mode 100644 index 0000000..210a884 --- /dev/null +++ b/arch/x86/kvm/vmx_evmcs.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __KVM_X86_VMX_EVMCS_H +#define __KVM_X86_VMX_EVMCS_H + +#include <asm/hyperv-tlfs.h> + +#define ROL16(val, n) ((u16)(((u16)(val) << (n)) | ((u16)(val) >> (16 - (n))))) +#define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x) +#define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \ + {EVMCS1_OFFSET(name), clean_field} + +struct evmcs_field { + u16 offset; + u16 clean_field; +}; + +static const struct evmcs_field vmcs_field_to_evmcs_1[] = { + /* 64 bit rw */ + EVMCS1_FIELD(GUEST_RIP, guest_rip, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(GUEST_RSP, guest_rsp, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), + EVMCS1_FIELD(GUEST_RFLAGS, guest_rflags, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), + EVMCS1_FIELD(HOST_IA32_PAT, host_ia32_pat, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_IA32_EFER, host_ia32_efer, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_CR0, host_cr0, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_CR3, host_cr3, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_CR4, host_cr4, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_IA32_SYSENTER_ESP, host_ia32_sysenter_esp, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_RIP, host_rip, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(IO_BITMAP_A, io_bitmap_a, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP), + EVMCS1_FIELD(IO_BITMAP_B, io_bitmap_b, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP), + EVMCS1_FIELD(MSR_BITMAP, msr_bitmap, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP), + EVMCS1_FIELD(GUEST_ES_BASE, guest_es_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_CS_BASE, guest_cs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_SS_BASE, guest_ss_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_DS_BASE, guest_ds_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_FS_BASE, guest_fs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GS_BASE, guest_gs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_LDTR_BASE, guest_ldtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_TR_BASE, guest_tr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GDTR_BASE, guest_gdtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_IDTR_BASE, guest_idtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(TSC_OFFSET, tsc_offset, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + EVMCS1_FIELD(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + EVMCS1_FIELD(VMCS_LINK_POINTER, vmcs_link_pointer, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_IA32_PAT, guest_ia32_pat, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_IA32_EFER, guest_ia32_efer, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PDPTR0, guest_pdptr0, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PDPTR1, guest_pdptr1, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PDPTR2, guest_pdptr2, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PDPTR3, guest_pdptr3, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_PENDING_DBG_EXCEPTIONS, guest_pending_dbg_exceptions, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_SYSENTER_ESP, guest_sysenter_esp, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_SYSENTER_EIP, guest_sysenter_eip, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(CR0_GUEST_HOST_MASK, cr0_guest_host_mask, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(CR0_READ_SHADOW, cr0_read_shadow, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(CR4_READ_SHADOW, cr4_read_shadow, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(GUEST_CR0, guest_cr0, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(GUEST_CR3, guest_cr3, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(GUEST_CR4, guest_cr4, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(GUEST_DR7, guest_dr7, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR), + EVMCS1_FIELD(HOST_FS_BASE, host_fs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_GS_BASE, host_gs_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_TR_BASE, host_tr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_GDTR_BASE, host_gdtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_IDTR_BASE, host_idtr_base, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(HOST_RSP, host_rsp, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER), + EVMCS1_FIELD(EPT_POINTER, ept_pointer, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT), + EVMCS1_FIELD(GUEST_BNDCFGS, guest_bndcfgs, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(XSS_EXIT_BITMAP, xss_exit_bitmap, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2), + + /* 64 bit read only */ + EVMCS1_FIELD(GUEST_PHYSICAL_ADDRESS, guest_physical_address, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(EXIT_QUALIFICATION, exit_qualification, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + /* + * Not defined in KVM: + * + * EVMCS1_FIELD(0x00006402, exit_io_instruction_ecx, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); + * EVMCS1_FIELD(0x00006404, exit_io_instruction_esi, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); + * EVMCS1_FIELD(0x00006406, exit_io_instruction_esi, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); + * EVMCS1_FIELD(0x00006408, exit_io_instruction_eip, + * HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE); + */ + EVMCS1_FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + + /* + * No mask defined in the spec as Hyper-V doesn't currently support + * these. Future proof by resetting the whole clean field mask on + * access. + */ + EVMCS1_FIELD(VM_EXIT_MSR_STORE_ADDR, vm_exit_msr_store_addr, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_EXIT_MSR_LOAD_ADDR, vm_exit_msr_load_addr, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + + /* 32 bit rw */ + EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC), + EVMCS1_FIELD(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC), + EVMCS1_FIELD(EXCEPTION_BITMAP, exception_bitmap, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN), + EVMCS1_FIELD(VM_ENTRY_CONTROLS, vm_entry_controls, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY), + EVMCS1_FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), + EVMCS1_FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, + vm_entry_exception_error_code, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), + EVMCS1_FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT), + EVMCS1_FIELD(HOST_IA32_SYSENTER_CS, host_ia32_sysenter_cs, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), + EVMCS1_FIELD(VM_EXIT_CONTROLS, vm_exit_controls, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), + EVMCS1_FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1), + EVMCS1_FIELD(GUEST_ES_LIMIT, guest_es_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_CS_LIMIT, guest_cs_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_SS_LIMIT, guest_ss_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_DS_LIMIT, guest_ds_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_FS_LIMIT, guest_fs_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GS_LIMIT, guest_gs_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_LDTR_LIMIT, guest_ldtr_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_TR_LIMIT, guest_tr_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GDTR_LIMIT, guest_gdtr_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_IDTR_LIMIT, guest_idtr_limit, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_ES_AR_BYTES, guest_es_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_CS_AR_BYTES, guest_cs_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_SS_AR_BYTES, guest_ss_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_DS_AR_BYTES, guest_ds_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_FS_AR_BYTES, guest_fs_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GS_AR_BYTES, guest_gs_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_LDTR_AR_BYTES, guest_ldtr_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_TR_AR_BYTES, guest_tr_ar_bytes, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_ACTIVITY_STATE, guest_activity_state, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + EVMCS1_FIELD(GUEST_SYSENTER_CS, guest_sysenter_cs, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1), + + /* 32 bit read only */ + EVMCS1_FIELD(VM_INSTRUCTION_ERROR, vm_instruction_error, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VM_EXIT_REASON, vm_exit_reason, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VM_EXIT_INTR_INFO, vm_exit_intr_info, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + EVMCS1_FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE), + + /* No mask defined in the spec (not used) */ + EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MASK, page_fault_error_code_mask, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(PAGE_FAULT_ERROR_CODE_MATCH, page_fault_error_code_match, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(CR3_TARGET_COUNT, cr3_target_count, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_EXIT_MSR_STORE_COUNT, vm_exit_msr_store_count, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_EXIT_MSR_LOAD_COUNT, vm_exit_msr_load_count, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), + + /* 16 bit rw */ + EVMCS1_FIELD(HOST_ES_SELECTOR, host_es_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_CS_SELECTOR, host_cs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_SS_SELECTOR, host_ss_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_DS_SELECTOR, host_ds_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_FS_SELECTOR, host_fs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_GS_SELECTOR, host_gs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(HOST_TR_SELECTOR, host_tr_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1), + EVMCS1_FIELD(GUEST_ES_SELECTOR, guest_es_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_CS_SELECTOR, guest_cs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_SS_SELECTOR, guest_ss_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_DS_SELECTOR, guest_ds_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_FS_SELECTOR, guest_fs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_GS_SELECTOR, guest_gs_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(GUEST_TR_SELECTOR, guest_tr_selector, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2), + EVMCS1_FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id, + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT), +}; + +static __always_inline int get_evmcs_offset(unsigned long field, + u16 *clean_field) +{ + unsigned int index = ROL16(field, 6); + const struct evmcs_field *evmcs_field; + + if (unlikely(index >= ARRAY_SIZE(vmcs_field_to_evmcs_1))) { + WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n", + field); + return -ENOENT; + } + + evmcs_field = &vmcs_field_to_evmcs_1[index]; + + if (clean_field) + *clean_field = evmcs_field->clean_field; + + return evmcs_field->offset; +} + +#undef ROL16 + +#endif /* __KVM_X86_VMX_EVMCS_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8a0b54..b2ff74b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -102,6 +102,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); +static void store_regs(struct kvm_vcpu *vcpu); +static int sync_regs(struct kvm_vcpu *vcpu); struct kvm_x86_ops *kvm_x86_ops __read_mostly; EXPORT_SYMBOL_GPL(kvm_x86_ops); @@ -140,6 +142,13 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); +bool __read_mostly enable_vmware_backdoor = false; +module_param(enable_vmware_backdoor, bool, S_IRUGO); +EXPORT_SYMBOL_GPL(enable_vmware_backdoor); + +static bool __read_mostly force_emulation_prefix = false; +module_param(force_emulation_prefix, bool, S_IRUGO); + #define KVM_NR_SHARED_MSRS 16 struct kvm_shared_msrs_global { @@ -1032,7 +1041,11 @@ static u32 emulated_msrs[] = { HV_X64_MSR_VP_RUNTIME, HV_X64_MSR_SCONTROL, HV_X64_MSR_STIMER0_CONFIG, - HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, + HV_X64_MSR_VP_ASSIST_PAGE, + HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, + HV_X64_MSR_TSC_EMULATION_STATUS, + + MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, MSR_IA32_TSC_ADJUST, @@ -1049,6 +1062,64 @@ static u32 emulated_msrs[] = { static unsigned num_emulated_msrs; +/* + * List of msr numbers which are used to expose MSR-based features that + * can be used by a hypervisor to validate requested CPU features. + */ +static u32 msr_based_features[] = { + MSR_IA32_VMX_BASIC, + MSR_IA32_VMX_TRUE_PINBASED_CTLS, + MSR_IA32_VMX_PINBASED_CTLS, + MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + MSR_IA32_VMX_PROCBASED_CTLS, + MSR_IA32_VMX_TRUE_EXIT_CTLS, + MSR_IA32_VMX_EXIT_CTLS, + MSR_IA32_VMX_TRUE_ENTRY_CTLS, + MSR_IA32_VMX_ENTRY_CTLS, + MSR_IA32_VMX_MISC, + MSR_IA32_VMX_CR0_FIXED0, + MSR_IA32_VMX_CR0_FIXED1, + MSR_IA32_VMX_CR4_FIXED0, + MSR_IA32_VMX_CR4_FIXED1, + MSR_IA32_VMX_VMCS_ENUM, + MSR_IA32_VMX_PROCBASED_CTLS2, + MSR_IA32_VMX_EPT_VPID_CAP, + MSR_IA32_VMX_VMFUNC, + + MSR_F10H_DECFG, + MSR_IA32_UCODE_REV, +}; + +static unsigned int num_msr_based_features; + +static int kvm_get_msr_feature(struct kvm_msr_entry *msr) +{ + switch (msr->index) { + case MSR_IA32_UCODE_REV: + rdmsrl(msr->index, msr->data); + break; + default: + if (kvm_x86_ops->get_msr_feature(msr)) + return 1; + } + return 0; +} + +static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) +{ + struct kvm_msr_entry msr; + int r; + + msr.index = index; + r = kvm_get_msr_feature(&msr); + if (r) + return r; + + *data = msr.data; + + return 0; +} + bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) @@ -2222,7 +2293,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_AMD64_NB_CFG: - case MSR_IA32_UCODE_REV: case MSR_IA32_UCODE_WRITE: case MSR_VM_HSAVE_PA: case MSR_AMD64_PATCH_LOADER: @@ -2230,6 +2300,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_DC_CFG: break; + case MSR_IA32_UCODE_REV: + if (msr_info->host_initiated) + vcpu->arch.microcode_version = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2390,6 +2464,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + case HV_X64_MSR_TSC_EMULATION_CONTROL: + case HV_X64_MSR_TSC_EMULATION_STATUS: return kvm_hv_set_msr_common(vcpu, msr, data, msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: @@ -2516,6 +2593,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_AMD64_DC_CFG: msr_info->data = 0; break; + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: @@ -2525,7 +2603,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0; break; case MSR_IA32_UCODE_REV: - msr_info->data = 0x100000000ULL; + msr_info->data = vcpu->arch.microcode_version; break; case MSR_MTRRcap: case 0x200 ... 0x2ff: @@ -2619,6 +2697,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + case HV_X64_MSR_TSC_EMULATION_CONTROL: + case HV_X64_MSR_TSC_EMULATION_STATUS: return kvm_hv_get_msr_common(vcpu, msr_info->index, &msr_info->data); break; @@ -2680,13 +2761,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs, int (*do_msr)(struct kvm_vcpu *vcpu, unsigned index, u64 *data)) { - int i, idx; + int i; - idx = srcu_read_lock(&vcpu->kvm->srcu); for (i = 0; i < msrs->nmsrs; ++i) if (do_msr(vcpu, entries[i].index, &entries[i].data)) break; - srcu_read_unlock(&vcpu->kvm->srcu, idx); return i; } @@ -2737,9 +2816,15 @@ out: return r; } +static inline bool kvm_can_mwait_in_guest(void) +{ + return boot_cpu_has(X86_FEATURE_MWAIT) && + !boot_cpu_has_bug(X86_BUG_MONITOR); +} + int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { - int r; + int r = 0; switch (ext) { case KVM_CAP_IRQCHIP: @@ -2769,6 +2854,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_HYPERV_SYNIC2: case KVM_CAP_HYPERV_VP_INDEX: + case KVM_CAP_HYPERV_EVENTFD: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -2785,13 +2871,19 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_GET_MSR_FEATURES: r = 1; break; + case KVM_CAP_SYNC_REGS: + r = KVM_SYNC_X86_VALID_FIELDS; + break; case KVM_CAP_ADJUST_CLOCK: r = KVM_CLOCK_TSC_STABLE; break; - case KVM_CAP_X86_GUEST_MWAIT: - r = kvm_mwait_in_guest(); + case KVM_CAP_X86_DISABLE_EXITS: + r |= KVM_X86_DISABLE_EXITS_HTL | KVM_X86_DISABLE_EXITS_PAUSE; + if(kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, @@ -2832,7 +2924,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_X2APIC_API_VALID_FLAGS; break; default: - r = 0; break; } return r; @@ -2899,6 +2990,31 @@ long kvm_arch_dev_ioctl(struct file *filp, goto out; r = 0; break; + case KVM_GET_MSR_FEATURE_INDEX_LIST: { + struct kvm_msr_list __user *user_msr_list = argp; + struct kvm_msr_list msr_list; + unsigned int n; + + r = -EFAULT; + if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list))) + goto out; + n = msr_list.nmsrs; + msr_list.nmsrs = num_msr_based_features; + if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list))) + goto out; + r = -E2BIG; + if (n < msr_list.nmsrs) + goto out; + r = -EFAULT; + if (copy_to_user(user_msr_list->indices, &msr_based_features, + num_msr_based_features * sizeof(u32))) + goto out; + r = 0; + break; + } + case KVM_GET_MSRS: + r = msr_io(NULL, argp, do_get_msr_feature, 1); + break; } default: r = -EINVAL; @@ -3199,7 +3315,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->exception.error_code = vcpu->arch.exception.error_code; events->interrupt.injected = - vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; + vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; events->interrupt.soft = 0; events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu); @@ -3252,7 +3368,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.exception.has_error_code = events->exception.has_error_code; vcpu->arch.exception.error_code = events->exception.error_code; - vcpu->arch.interrupt.pending = events->interrupt.injected; + vcpu->arch.interrupt.injected = events->interrupt.injected; vcpu->arch.interrupt.nr = events->interrupt.nr; vcpu->arch.interrupt.soft = events->interrupt.soft; if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) @@ -3636,12 +3752,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_MSRS: + case KVM_GET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_get_msr, 1); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; - case KVM_SET_MSRS: + } + case KVM_SET_MSRS: { + int idx = srcu_read_lock(&vcpu->kvm->srcu); r = msr_io(vcpu, argp, do_set_msr, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; + } case KVM_TPR_ACCESS_REPORTING: { struct kvm_tpr_access_ctl tac; @@ -3845,8 +3967,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) { - kvm->arch.ept_identity_map_addr = ident_addr; - return 0; + return kvm_x86_ops->set_identity_map_addr(kvm, ident_addr); } static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, @@ -4106,6 +4227,20 @@ split_irqchip_unlock: r = 0; break; + case KVM_CAP_X86_DISABLE_EXITS: + r = -EINVAL; + if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) + break; + + if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && + kvm_can_mwait_in_guest()) + kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL) + kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) + kvm->arch.pause_in_guest = true; + r = 0; + break; default: r = -EINVAL; break; @@ -4410,6 +4545,15 @@ set_identity_unlock: r = kvm_x86_ops->mem_enc_unreg_region(kvm, ®ion); break; } + case KVM_HYPERV_EVENTFD: { + struct kvm_hyperv_eventfd hvevfd; + + r = -EFAULT; + if (copy_from_user(&hvevfd, argp, sizeof(hvevfd))) + goto out; + r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); + break; + } default: r = -ENOTTY; } @@ -4464,6 +4608,19 @@ static void kvm_init_msr_list(void) j++; } num_emulated_msrs = j; + + for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { + struct kvm_msr_entry msr; + + msr.index = msr_based_features[i]; + if (kvm_get_msr_feature(&msr)) + continue; + + if (j < i) + msr_based_features[j] = msr_based_features[i]; + j++; + } + num_msr_based_features = j; } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, @@ -4686,6 +4843,30 @@ out: } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +int handle_ud(struct kvm_vcpu *vcpu) +{ + int emul_type = EMULTYPE_TRAP_UD; + enum emulation_result er; + char sig[5]; /* ud2; .ascii "kvm" */ + struct x86_exception e; + + if (force_emulation_prefix && + kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, + kvm_get_linear_rip(vcpu), sig, sizeof(sig), &e) == 0 && + memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) { + kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig)); + emul_type = 0; + } + + er = emulate_instruction(vcpu, emul_type); + if (er == EMULATE_USER_EXIT) + return 0; + if (er != EMULATE_DONE) + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} +EXPORT_SYMBOL_GPL(handle_ud); + static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t gpa, bool write) { @@ -5527,27 +5708,27 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); - if (irq == NMI_VECTOR) - vcpu->arch.nmi_pending = 0; - else - vcpu->arch.interrupt.pending = false; - return EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt); -static int handle_emulation_failure(struct kvm_vcpu *vcpu) +static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type) { int r = EMULATE_DONE; ++vcpu->stat.insn_emulation_fail; trace_kvm_emulate_insn_failed(vcpu); + + if (emulation_type & EMULTYPE_NO_UD_ON_FAIL) + return EMULATE_FAIL; + if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; r = EMULATE_USER_EXIT; } + kvm_queue_exception(vcpu, UD_VECTOR); return r; @@ -5791,6 +5972,37 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) return false; } +static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt) +{ + switch (ctxt->opcode_len) { + case 1: + switch (ctxt->b) { + case 0xe4: /* IN */ + case 0xe5: + case 0xec: + case 0xed: + case 0xe6: /* OUT */ + case 0xe7: + case 0xee: + case 0xef: + case 0x6c: /* INS */ + case 0x6d: + case 0x6e: /* OUTS */ + case 0x6f: + return true; + } + break; + case 2: + switch (ctxt->b) { + case 0x33: /* RDPMC */ + return true; + } + break; + } + + return false; +} + int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, @@ -5843,10 +6055,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; - return handle_emulation_failure(vcpu); + return handle_emulation_failure(vcpu, emulation_type); } } + if ((emulation_type & EMULTYPE_VMWARE) && + !is_vmware_backdoor_opcode(ctxt)) + return EMULATE_FAIL; + if (emulation_type & EMULTYPE_SKIP) { kvm_rip_write(vcpu, ctxt->_eip); if (ctxt->eflags & X86_EFLAGS_RF) @@ -5878,7 +6094,7 @@ restart: emulation_type)) return EMULATE_DONE; - return handle_emulation_failure(vcpu); + return handle_emulation_failure(vcpu, emulation_type); } if (ctxt->have_exception) { @@ -5931,7 +6147,8 @@ restart: } EXPORT_SYMBOL_GPL(x86_emulate_instruction); -int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) +static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, + unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, @@ -5940,7 +6157,6 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) vcpu->arch.pio.count = 0; return ret; } -EXPORT_SYMBOL_GPL(kvm_fast_pio_out); static int complete_fast_pio_in(struct kvm_vcpu *vcpu) { @@ -5964,7 +6180,8 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) return 1; } -int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) +static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, + unsigned short port) { unsigned long val; int ret; @@ -5983,7 +6200,21 @@ int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port) return 0; } -EXPORT_SYMBOL_GPL(kvm_fast_pio_in); + +int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) +{ + int ret = kvm_skip_emulated_instruction(vcpu); + + /* + * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered + * KVM_EXIT_DEBUG here. + */ + if (in) + return kvm_fast_pio_in(vcpu, size, port) && ret; + else + return kvm_fast_pio_out(vcpu, size, port) && ret; +} +EXPORT_SYMBOL_GPL(kvm_fast_pio); static int kvmclock_cpu_down_prep(unsigned int cpu) { @@ -6161,7 +6392,8 @@ static void kvm_timer_init(void) kvmclock_cpu_online, kvmclock_cpu_down_prep); } -static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); +DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); +EXPORT_PER_CPU_SYMBOL_GPL(current_vcpu); int kvm_is_in_guest(void) { @@ -6194,18 +6426,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { .get_guest_ip = kvm_get_guest_ip, }; -void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) -{ - __this_cpu_write(current_vcpu, vcpu); -} -EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); - -void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) -{ - __this_cpu_write(current_vcpu, NULL); -} -EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); - static void kvm_set_mmio_spte_mask(void) { u64 mask; @@ -6559,27 +6779,36 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) int r; /* try to reinject previous events if any */ - if (vcpu->arch.exception.injected) { - kvm_x86_ops->queue_exception(vcpu); - return 0; - } + if (vcpu->arch.exception.injected) + kvm_x86_ops->queue_exception(vcpu); /* - * Exceptions must be injected immediately, or the exception - * frame will have the address of the NMI or interrupt handler. + * Do not inject an NMI or interrupt if there is a pending + * exception. Exceptions and interrupts are recognized at + * instruction boundaries, i.e. the start of an instruction. + * Trap-like exceptions, e.g. #DB, have higher priority than + * NMIs and interrupts, i.e. traps are recognized before an + * NMI/interrupt that's pending on the same instruction. + * Fault-like exceptions, e.g. #GP and #PF, are the lowest + * priority, but are only generated (pended) during instruction + * execution, i.e. a pending fault-like exception means the + * fault occurred on the *previous* instruction and must be + * serviced prior to recognizing any new events in order to + * fully complete the previous instruction. */ - if (!vcpu->arch.exception.pending) { - if (vcpu->arch.nmi_injected) { + else if (!vcpu->arch.exception.pending) { + if (vcpu->arch.nmi_injected) kvm_x86_ops->set_nmi(vcpu); - return 0; - } - - if (vcpu->arch.interrupt.pending) { + else if (vcpu->arch.interrupt.injected) kvm_x86_ops->set_irq(vcpu); - return 0; - } } + /* + * Call check_nested_events() even if we reinjected a previous event + * in order for caller to determine if it should require immediate-exit + * from L2 to L1 due to pending L1 events which require exit + * from L2 to L1. + */ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); if (r != 0) @@ -6592,6 +6821,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); + WARN_ON_ONCE(vcpu->arch.exception.injected); vcpu->arch.exception.pending = false; vcpu->arch.exception.injected = true; @@ -6606,7 +6836,14 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) } kvm_x86_ops->queue_exception(vcpu); - } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) { + } + + /* Don't consider new event if we re-injected an event */ + if (kvm_event_needs_reinjection(vcpu)) + return 0; + + if (vcpu->arch.smi_pending && !is_smm(vcpu) && + kvm_x86_ops->smi_allowed(vcpu)) { vcpu->arch.smi_pending = false; ++vcpu->arch.smi_count; enter_smm(vcpu); @@ -6900,8 +7137,6 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm) static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { - u64 eoi_exit_bitmap[4]; - if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; @@ -6914,6 +7149,20 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } + + if (is_guest_mode(vcpu)) + vcpu->arch.load_eoi_exitmap_pending = true; + else + kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); +} + +static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) +{ + u64 eoi_exit_bitmap[4]; + + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) + return; + bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, vcpu_to_synic(vcpu)->vec_bitmap, 256); kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); @@ -7028,6 +7277,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu)) vcpu_scan_ioapic(vcpu); + if (kvm_check_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu)) + vcpu_load_eoi_exitmap(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) kvm_vcpu_reload_apic_access_page(vcpu); if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { @@ -7206,7 +7457,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_put_guest_xcr0(vcpu); + kvm_before_interrupt(vcpu); kvm_x86_ops->handle_external_intr(vcpu); + kvm_after_interrupt(vcpu); ++vcpu->stat.exits; @@ -7415,7 +7668,6 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) return 0; } - int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -7441,6 +7693,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } + if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { + r = -EINVAL; + goto out; + } + + if (vcpu->run->kvm_dirty_regs) { + r = sync_regs(vcpu); + if (r != 0) + goto out; + } + /* re-sync apic's tpr */ if (!lapic_in_kernel(vcpu)) { if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) { @@ -7465,6 +7728,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: kvm_put_guest_fpu(vcpu); + if (vcpu->run->kvm_valid_regs) + store_regs(vcpu); post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -7472,10 +7737,8 @@ out: return r; } -int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - vcpu_load(vcpu); - if (vcpu->arch.emulate_regs_need_sync_to_vcpu) { /* * We are here if userspace calls get_regs() in the middle of @@ -7508,15 +7771,18 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) regs->rip = kvm_rip_read(vcpu); regs->rflags = kvm_get_rflags(vcpu); +} +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + __get_regs(vcpu, regs); vcpu_put(vcpu); return 0; } -int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { - vcpu_load(vcpu); - vcpu->arch.emulate_regs_need_sync_from_vcpu = true; vcpu->arch.emulate_regs_need_sync_to_vcpu = false; @@ -7545,7 +7811,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) vcpu->arch.exception.pending = false; kvm_make_request(KVM_REQ_EVENT, vcpu); +} +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + __set_regs(vcpu, regs); vcpu_put(vcpu); return 0; } @@ -7560,13 +7831,10 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) } EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); -int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) +static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct desc_ptr dt; - vcpu_load(vcpu); - kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); @@ -7594,10 +7862,16 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); - if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) + if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft) set_bit(vcpu->arch.interrupt.nr, (unsigned long *)sregs->interrupt_bitmap); +} +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + vcpu_load(vcpu); + __get_sregs(vcpu, sregs); vcpu_put(vcpu); return 0; } @@ -7669,7 +7943,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, } EXPORT_SYMBOL_GPL(kvm_task_switch); -int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +static int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) { /* @@ -7692,8 +7966,7 @@ int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) return 0; } -int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, - struct kvm_sregs *sregs) +static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; @@ -7701,8 +7974,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct desc_ptr dt; int ret = -EINVAL; - vcpu_load(vcpu); - if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (sregs->cr4 & X86_CR4_OSXSAVE)) goto out; @@ -7781,6 +8052,16 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, ret = 0; out: + return ret; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + int ret; + + vcpu_load(vcpu); + ret = __set_sregs(vcpu, sregs); vcpu_put(vcpu); return ret; } @@ -7907,6 +8188,45 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } +static void store_regs(struct kvm_vcpu *vcpu) +{ + BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES); + + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS) + __get_regs(vcpu, &vcpu->run->s.regs.regs); + + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS) + __get_sregs(vcpu, &vcpu->run->s.regs.sregs); + + if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS) + kvm_vcpu_ioctl_x86_get_vcpu_events( + vcpu, &vcpu->run->s.regs.events); +} + +static int sync_regs(struct kvm_vcpu *vcpu) +{ + if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS) + return -EINVAL; + + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) { + __set_regs(vcpu, &vcpu->run->s.regs.regs); + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) { + if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs)) + return -EINVAL; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS; + } + if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) { + if (kvm_vcpu_ioctl_x86_set_vcpu_events( + vcpu, &vcpu->run->s.regs.events)) + return -EINVAL; + vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS; + } + + return 0; +} + static void fx_init(struct kvm_vcpu *vcpu) { fpstate_init(&vcpu->arch.guest_fpu.state); @@ -8017,6 +8337,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) { + kvm_lapic_reset(vcpu, init_event); + vcpu->arch.hflags = 0; vcpu->arch.smi_pending = 0; @@ -8360,7 +8682,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) raw_spin_lock_init(&kvm->arch.tsc_write_lock); mutex_init(&kvm->arch.apic_map_lock); - mutex_init(&kvm->arch.hyperv.hv_lock); spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); kvm->arch.kvmclock_offset = -ktime_get_boot_ns(); @@ -8369,6 +8690,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); + kvm_hv_init_vm(kvm); kvm_page_track_init(kvm); kvm_mmu_init_vm(kvm); @@ -8460,10 +8782,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) return r; } - if (!size) { - r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); - WARN_ON(r < 0); - } + if (!size) + vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); return 0; } @@ -8501,6 +8821,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); kvm_page_track_cleanup(kvm); + kvm_hv_destroy_vm(kvm); } void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b91215d..7d35ce6 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -2,12 +2,48 @@ #ifndef ARCH_X86_KVM_X86_H #define ARCH_X86_KVM_X86_H -#include <asm/processor.h> -#include <asm/mwait.h> #include <linux/kvm_host.h> #include <asm/pvclock.h> #include "kvm_cache_regs.h" +#define KVM_DEFAULT_PLE_GAP 128 +#define KVM_VMX_DEFAULT_PLE_WINDOW 4096 +#define KVM_DEFAULT_PLE_WINDOW_GROW 2 +#define KVM_DEFAULT_PLE_WINDOW_SHRINK 0 +#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX UINT_MAX +#define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX +#define KVM_SVM_DEFAULT_PLE_WINDOW 3000 + +static inline unsigned int __grow_ple_window(unsigned int val, + unsigned int base, unsigned int modifier, unsigned int max) +{ + u64 ret = val; + + if (modifier < 1) + return base; + + if (modifier < base) + ret *= modifier; + else + ret += modifier; + + return min(ret, (u64)max); +} + +static inline unsigned int __shrink_ple_window(unsigned int val, + unsigned int base, unsigned int modifier, unsigned int min) +{ + if (modifier < 1) + return base; + + if (modifier < base) + val /= modifier; + else + val -= modifier; + + return max(val, min); +} + #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) @@ -19,19 +55,19 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, bool soft) { - vcpu->arch.interrupt.pending = true; + vcpu->arch.interrupt.injected = true; vcpu->arch.interrupt.soft = soft; vcpu->arch.interrupt.nr = vector; } static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) { - vcpu->arch.interrupt.pending = false; + vcpu->arch.interrupt.injected = false; } static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) { - return vcpu->arch.exception.injected || vcpu->arch.interrupt.pending || + return vcpu->arch.exception.injected || vcpu->arch.interrupt.injected || vcpu->arch.nmi_injected; } @@ -205,8 +241,6 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) return !(kvm->arch.disabled_quirks & quirk); } -void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); -void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); void kvm_set_pending_timer(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); @@ -221,6 +255,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, struct x86_exception *exception); +int handle_ud(struct kvm_vcpu *vcpu); + void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); @@ -242,6 +278,8 @@ extern unsigned int min_timer_period_us; extern unsigned int lapic_timer_advance_ns; +extern bool enable_vmware_backdoor; + extern struct static_key kvm_no_apic_vcpu; static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) @@ -264,10 +302,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) __rem; \ }) -static inline bool kvm_mwait_in_guest(void) +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ + KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_PAUSE) + +static inline bool kvm_mwait_in_guest(struct kvm *kvm) +{ + return kvm->arch.mwait_in_guest; +} + +static inline bool kvm_hlt_in_guest(struct kvm *kvm) +{ + return kvm->arch.hlt_in_guest; +} + +static inline bool kvm_pause_in_guest(struct kvm *kvm) +{ + return kvm->arch.pause_in_guest; +} + +DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu); + +static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu) +{ + __this_cpu_write(current_vcpu, vcpu); +} + +static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu) { - return boot_cpu_has(X86_FEATURE_MWAIT) && - !boot_cpu_has_bug(X86_BUG_MONITOR); + __this_cpu_write(current_vcpu, NULL); } #endif diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 91e9700..25a972c 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RETPOLINE) += retpoline.o -OBJECT_FILES_NON_STANDARD_retpoline.o :=y obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S index 81b1635..88acd34 100644 --- a/arch/x86/lib/clear_page_64.S +++ b/arch/x86/lib/clear_page_64.S @@ -1,6 +1,4 @@ #include <linux/linkage.h> -#include <asm/cpufeatures.h> -#include <asm/alternative-asm.h> #include <asm/export.h> /* diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c index 693cce0..fee8b9c 100644 --- a/arch/x86/lib/msr-smp.c +++ b/arch/x86/lib/msr-smp.c @@ -2,6 +2,7 @@ #include <linux/export.h> #include <linux/preempt.h> #include <linux/smp.h> +#include <linux/completion.h> #include <asm/msr.h> static void __rdmsr_on_cpu(void *info) @@ -143,13 +144,19 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) } EXPORT_SYMBOL(wrmsr_on_cpus); +struct msr_info_completion { + struct msr_info msr; + struct completion done; +}; + /* These "safe" variants are slower and should be used when the target MSR may not actually exist. */ static void __rdmsr_safe_on_cpu(void *info) { - struct msr_info *rv = info; + struct msr_info_completion *rv = info; - rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); + rv->msr.err = rdmsr_safe(rv->msr.msr_no, &rv->msr.reg.l, &rv->msr.reg.h); + complete(&rv->done); } static void __wrmsr_safe_on_cpu(void *info) @@ -161,17 +168,26 @@ static void __wrmsr_safe_on_cpu(void *info) int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { + struct msr_info_completion rv; + call_single_data_t csd = { + .func = __rdmsr_safe_on_cpu, + .info = &rv, + }; int err; - struct msr_info rv; memset(&rv, 0, sizeof(rv)); + init_completion(&rv.done); + rv.msr.msr_no = msr_no; - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - *l = rv.reg.l; - *h = rv.reg.h; + err = smp_call_function_single_async(cpu, &csd); + if (!err) { + wait_for_completion(&rv.done); + err = rv.msr.err; + } + *l = rv.msr.reg.l; + *h = rv.msr.reg.h; - return err ? err : rv.err; + return err; } EXPORT_SYMBOL(rdmsr_safe_on_cpu); @@ -209,16 +225,13 @@ EXPORT_SYMBOL(wrmsrl_safe_on_cpu); int rdmsrl_safe_on_cpu(unsigned int cpu, u32 msr_no, u64 *q) { + u32 low, high; int err; - struct msr_info rv; - memset(&rv, 0, sizeof(rv)); + err = rdmsr_safe_on_cpu(cpu, msr_no, &low, &high); + *q = (u64)high << 32 | low; - rv.msr_no = msr_no; - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); - *q = rv.reg.q; - - return err ? err : rv.err; + return err; } EXPORT_SYMBOL(rdmsrl_safe_on_cpu); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 480edc3..c909961 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,7 +7,6 @@ #include <asm/alternative-asm.h> #include <asm/export.h> #include <asm/nospec-branch.h> -#include <asm/bitsperlong.h> .macro THUNK reg .section .text.__x86.indirect_thunk @@ -47,58 +46,3 @@ GENERATE_THUNK(r13) GENERATE_THUNK(r14) GENERATE_THUNK(r15) #endif - -/* - * Fill the CPU return stack buffer. - * - * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; lfence; jmp' loop to capture speculative execution. - * - * This is required in various cases for retpoline and IBRS-based - * mitigations for the Spectre variant 2 vulnerability. Sometimes to - * eliminate potentially bogus entries from the RSB, and sometimes - * purely to ensure that it doesn't get empty, which on some CPUs would - * allow predictions from other (unwanted!) sources to be used. - * - * Google experimented with loop-unrolling and this turned out to be - * the optimal version - two calls, each with their own speculation - * trap should their return address end up getting used, in a loop. - */ -.macro STUFF_RSB nr:req sp:req - mov $(\nr / 2), %_ASM_BX - .align 16 -771: - call 772f -773: /* speculation trap */ - pause - lfence - jmp 773b - .align 16 -772: - call 774f -775: /* speculation trap */ - pause - lfence - jmp 775b - .align 16 -774: - dec %_ASM_BX - jnz 771b - add $((BITS_PER_LONG/8) * \nr), \sp -.endm - -#define RSB_FILL_LOOPS 16 /* To avoid underflow */ - -ENTRY(__fill_rsb) - STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP - ret -END(__fill_rsb) -EXPORT_SYMBOL_GPL(__fill_rsb) - -#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ - -ENTRY(__clear_rsb) - STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP - ret -END(__clear_rsb) -EXPORT_SYMBOL_GPL(__clear_rsb) diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 27e9e90..4b101dd 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,12 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 -# Kernel does not boot with instrumentation of tlb.c and mem_encrypt.c -KCOV_INSTRUMENT_tlb.o := n -KCOV_INSTRUMENT_mem_encrypt.o := n +# Kernel does not boot with instrumentation of tlb.c and mem_encrypt*.c +KCOV_INSTRUMENT_tlb.o := n +KCOV_INSTRUMENT_mem_encrypt.o := n +KCOV_INSTRUMENT_mem_encrypt_identity.o := n -KASAN_SANITIZE_mem_encrypt.o := n +KASAN_SANITIZE_mem_encrypt.o := n +KASAN_SANITIZE_mem_encrypt_identity.o := n ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_mem_encrypt.o = -pg +CFLAGS_REMOVE_mem_encrypt.o = -pg +CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ @@ -16,6 +19,7 @@ obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_physaddr.o := $(nostackp) CFLAGS_setup_nx.o := $(nostackp) +CFLAGS_mem_encrypt_identity.o := $(nostackp) CFLAGS_fault.o := -I$(src)/../include/asm/trace @@ -47,4 +51,5 @@ obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o +obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index b9283cc..476d810 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -163,4 +163,10 @@ void __init setup_cpu_entry_areas(void) for_each_possible_cpu(cpu) setup_cpu_entry_area(cpu); + + /* + * This is the last essential update to swapper_pgdir which needs + * to be synchronized to initial_page_table on 32bit. + */ + sync_initial_page_table(); } diff --git a/arch/x86/mm/debug_pagetables.c b/arch/x86/mm/debug_pagetables.c index 421f266..225fe2f 100644 --- a/arch/x86/mm/debug_pagetables.c +++ b/arch/x86/mm/debug_pagetables.c @@ -1,4 +1,5 @@ #include <linux/debugfs.h> +#include <linux/efi.h> #include <linux/module.h> #include <linux/seq_file.h> #include <asm/pgtable.h> @@ -72,6 +73,30 @@ static const struct file_operations ptdump_curusr_fops = { }; #endif +#if defined(CONFIG_EFI) && defined(CONFIG_X86_64) +static struct dentry *pe_efi; + +static int ptdump_show_efi(struct seq_file *m, void *v) +{ + if (efi_mm.pgd) + ptdump_walk_pgd_level_debugfs(m, efi_mm.pgd, false); + return 0; +} + +static int ptdump_open_efi(struct inode *inode, struct file *filp) +{ + return single_open(filp, ptdump_show_efi, NULL); +} + +static const struct file_operations ptdump_efi_fops = { + .owner = THIS_MODULE, + .open = ptdump_open_efi, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + static struct dentry *dir, *pe_knl, *pe_curknl; static int __init pt_dump_debug_init(void) @@ -96,6 +121,13 @@ static int __init pt_dump_debug_init(void) if (!pe_curusr) goto err; #endif + +#if defined(CONFIG_EFI) && defined(CONFIG_X86_64) + pe_efi = debugfs_create_file("efi", 0400, dir, NULL, &ptdump_efi_fops); + if (!pe_efi) + goto err; +#endif + return 0; err: debugfs_remove_recursive(dir); diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 2a4849e..62a7e9f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -29,6 +29,7 @@ struct pg_state { int level; pgprot_t current_prot; + pgprotval_t effective_prot; unsigned long start_address; unsigned long current_address; const struct addr_marker *marker; @@ -85,11 +86,15 @@ static struct addr_marker address_markers[] = { [VMALLOC_START_NR] = { 0UL, "vmalloc() Area" }, [VMEMMAP_START_NR] = { 0UL, "Vmemmap" }, #ifdef CONFIG_KASAN - [KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" }, - [KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" }, + /* + * These fields get initialized with the (dynamic) + * KASAN_SHADOW_{START,END} values in pt_dump_init(). + */ + [KASAN_SHADOW_START_NR] = { 0UL, "KASAN shadow" }, + [KASAN_SHADOW_END_NR] = { 0UL, "KASAN shadow end" }, #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL - [LDT_NR] = { LDT_BASE_ADDR, "LDT remap" }, + [LDT_NR] = { 0UL, "LDT remap" }, #endif [CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" }, #ifdef CONFIG_X86_ESPFIX64 @@ -231,9 +236,9 @@ static unsigned long normalize_addr(unsigned long u) * print what we collected so far. */ static void note_page(struct seq_file *m, struct pg_state *st, - pgprot_t new_prot, int level) + pgprot_t new_prot, pgprotval_t new_eff, int level) { - pgprotval_t prot, cur; + pgprotval_t prot, cur, eff; static const char units[] = "BKMGTPE"; /* @@ -243,23 +248,24 @@ static void note_page(struct seq_file *m, struct pg_state *st, */ prot = pgprot_val(new_prot); cur = pgprot_val(st->current_prot); + eff = st->effective_prot; if (!st->level) { /* First entry */ st->current_prot = new_prot; + st->effective_prot = new_eff; st->level = level; st->marker = address_markers; st->lines = 0; pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", st->marker->name); - } else if (prot != cur || level != st->level || + } else if (prot != cur || new_eff != eff || level != st->level || st->current_address >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; int width = sizeof(unsigned long) * 2; - pgprotval_t pr = pgprot_val(st->current_prot); - if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { + if (st->check_wx && (eff & _PAGE_RW) && !(eff & _PAGE_NX)) { WARN_ONCE(1, "x86/mm: Found insecure W+X mapping at address %p/%pS\n", (void *)st->start_address, @@ -313,21 +319,30 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->start_address = st->current_address; st->current_prot = new_prot; + st->effective_prot = new_eff; st->level = level; } } -static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, unsigned long P) +static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +{ + return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | + ((prot1 | prot2) & _PAGE_NX); +} + +static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, + pgprotval_t eff_in, unsigned long P) { int i; pte_t *start; - pgprotval_t prot; + pgprotval_t prot, eff; start = (pte_t *)pmd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PTE; i++) { prot = pte_flags(*start); + eff = effective_prot(eff_in, prot); st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); - note_page(m, st, __pgprot(prot), 5); + note_page(m, st, __pgprot(prot), eff, 5); start++; } } @@ -344,12 +359,10 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, void *pt) { if (__pa(pt) == __pa(kasan_zero_pmd) || -#ifdef CONFIG_X86_5LEVEL - __pa(pt) == __pa(kasan_zero_p4d) || -#endif + (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) || __pa(pt) == __pa(kasan_zero_pud)) { pgprotval_t prot = pte_flags(kasan_zero_pte[0]); - note_page(m, st, __pgprot(prot), 5); + note_page(m, st, __pgprot(prot), 0, 5); return true; } return false; @@ -364,42 +377,45 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, #if PTRS_PER_PMD > 1 -static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P) +static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, + pgprotval_t eff_in, unsigned long P) { int i; pmd_t *start, *pmd_start; - pgprotval_t prot; + pgprotval_t prot, eff; pmd_start = start = (pmd_t *)pud_page_vaddr(addr); for (i = 0; i < PTRS_PER_PMD; i++) { st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); if (!pmd_none(*start)) { + prot = pmd_flags(*start); + eff = effective_prot(eff_in, prot); if (pmd_large(*start) || !pmd_present(*start)) { - prot = pmd_flags(*start); - note_page(m, st, __pgprot(prot), 4); + note_page(m, st, __pgprot(prot), eff, 4); } else if (!kasan_page_table(m, st, pmd_start)) { - walk_pte_level(m, st, *start, + walk_pte_level(m, st, *start, eff, P + i * PMD_LEVEL_MULT); } } else - note_page(m, st, __pgprot(0), 4); + note_page(m, st, __pgprot(0), 0, 4); start++; } } #else -#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) +#define walk_pmd_level(m,s,a,e,p) walk_pte_level(m,s,__pmd(pud_val(a)),e,p) #define pud_large(a) pmd_large(__pmd(pud_val(a))) #define pud_none(a) pmd_none(__pmd(pud_val(a))) #endif #if PTRS_PER_PUD > 1 -static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P) +static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, + pgprotval_t eff_in, unsigned long P) { int i; pud_t *start, *pud_start; - pgprotval_t prot; + pgprotval_t prot, eff; pud_t *prev_pud = NULL; pud_start = start = (pud_t *)p4d_page_vaddr(addr); @@ -407,15 +423,16 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, for (i = 0; i < PTRS_PER_PUD; i++) { st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); if (!pud_none(*start)) { + prot = pud_flags(*start); + eff = effective_prot(eff_in, prot); if (pud_large(*start) || !pud_present(*start)) { - prot = pud_flags(*start); - note_page(m, st, __pgprot(prot), 3); + note_page(m, st, __pgprot(prot), eff, 3); } else if (!kasan_page_table(m, st, pud_start)) { - walk_pmd_level(m, st, *start, + walk_pmd_level(m, st, *start, eff, P + i * PUD_LEVEL_MULT); } } else - note_page(m, st, __pgprot(0), 3); + note_page(m, st, __pgprot(0), 0, 3); prev_pud = start; start++; @@ -423,43 +440,43 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, } #else -#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(p4d_val(a)),p) +#define walk_pud_level(m,s,a,e,p) walk_pmd_level(m,s,__pud(p4d_val(a)),e,p) #define p4d_large(a) pud_large(__pud(p4d_val(a))) #define p4d_none(a) pud_none(__pud(p4d_val(a))) #endif -#if PTRS_PER_P4D > 1 - -static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) +static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, + pgprotval_t eff_in, unsigned long P) { int i; p4d_t *start, *p4d_start; - pgprotval_t prot; + pgprotval_t prot, eff; + + if (PTRS_PER_P4D == 1) + return walk_pud_level(m, st, __p4d(pgd_val(addr)), eff_in, P); p4d_start = start = (p4d_t *)pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_P4D; i++) { st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT); if (!p4d_none(*start)) { + prot = p4d_flags(*start); + eff = effective_prot(eff_in, prot); if (p4d_large(*start) || !p4d_present(*start)) { - prot = p4d_flags(*start); - note_page(m, st, __pgprot(prot), 2); + note_page(m, st, __pgprot(prot), eff, 2); } else if (!kasan_page_table(m, st, p4d_start)) { - walk_pud_level(m, st, *start, + walk_pud_level(m, st, *start, eff, P + i * P4D_LEVEL_MULT); } } else - note_page(m, st, __pgprot(0), 2); + note_page(m, st, __pgprot(0), 0, 2); start++; } } -#else -#define walk_p4d_level(m,s,a,p) walk_pud_level(m,s,__p4d(pgd_val(a)),p) -#define pgd_large(a) p4d_large(__p4d(pgd_val(a))) -#define pgd_none(a) p4d_none(__p4d(pgd_val(a))) -#endif +#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) +#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) static inline bool is_hypervisor_range(int idx) { @@ -483,7 +500,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, #else pgd_t *start = swapper_pg_dir; #endif - pgprotval_t prot; + pgprotval_t prot, eff; int i; struct pg_state st = {}; @@ -499,15 +516,20 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, for (i = 0; i < PTRS_PER_PGD; i++) { st.current_address = normalize_addr(i * PGD_LEVEL_MULT); if (!pgd_none(*start) && !is_hypervisor_range(i)) { + prot = pgd_flags(*start); +#ifdef CONFIG_X86_PAE + eff = _PAGE_USER | _PAGE_RW; +#else + eff = prot; +#endif if (pgd_large(*start) || !pgd_present(*start)) { - prot = pgd_flags(*start); - note_page(m, &st, __pgprot(prot), 1); + note_page(m, &st, __pgprot(prot), eff, 1); } else { - walk_p4d_level(m, &st, *start, + walk_p4d_level(m, &st, *start, eff, i * PGD_LEVEL_MULT); } } else - note_page(m, &st, __pgprot(0), 1); + note_page(m, &st, __pgprot(0), 0, 1); cond_resched(); start++; @@ -515,7 +537,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, /* Flush out the last page */ st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); - note_page(m, &st, __pgprot(0), 0); + note_page(m, &st, __pgprot(0), 0, 0); if (!checkwx) return; if (st.wx_pages) @@ -570,6 +592,13 @@ static int __init pt_dump_init(void) address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; +#ifdef CONFIG_MODIFY_LDT_SYSCALL + address_markers[LDT_NR].start_address = LDT_BASE_ADDR; +#endif +#ifdef CONFIG_KASAN + address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; + address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; +#endif #endif #ifdef CONFIG_X86_32 address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 800de81..73bd8c9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -330,7 +330,7 @@ static noinline int vmalloc_fault(unsigned long address) if (!pmd_k) return -1; - if (pmd_huge(*pmd_k)) + if (pmd_large(*pmd_k)) return 0; pte_k = pte_offset_kernel(pmd_k, address); @@ -417,11 +417,11 @@ void vmalloc_sync_all(void) */ static noinline int vmalloc_fault(unsigned long address) { - pgd_t *pgd, *pgd_ref; - p4d_t *p4d, *p4d_ref; - pud_t *pud, *pud_ref; - pmd_t *pmd, *pmd_ref; - pte_t *pte, *pte_ref; + pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; /* Make sure we are in vmalloc area: */ if (!(address >= VMALLOC_START && address < VMALLOC_END)) @@ -435,73 +435,51 @@ static noinline int vmalloc_fault(unsigned long address) * case just flush: */ pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); - pgd_ref = pgd_offset_k(address); - if (pgd_none(*pgd_ref)) + pgd_k = pgd_offset_k(address); + if (pgd_none(*pgd_k)) return -1; - if (CONFIG_PGTABLE_LEVELS > 4) { + if (pgtable_l5_enabled) { if (pgd_none(*pgd)) { - set_pgd(pgd, *pgd_ref); + set_pgd(pgd, *pgd_k); arch_flush_lazy_mmu_mode(); } else { - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k)); } } /* With 4-level paging, copying happens on the p4d level. */ p4d = p4d_offset(pgd, address); - p4d_ref = p4d_offset(pgd_ref, address); - if (p4d_none(*p4d_ref)) + p4d_k = p4d_offset(pgd_k, address); + if (p4d_none(*p4d_k)) return -1; - if (p4d_none(*p4d) && CONFIG_PGTABLE_LEVELS == 4) { - set_p4d(p4d, *p4d_ref); + if (p4d_none(*p4d) && !pgtable_l5_enabled) { + set_p4d(p4d, *p4d_k); arch_flush_lazy_mmu_mode(); } else { - BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref)); + BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k)); } - /* - * Below here mismatches are bugs because these lower tables - * are shared: - */ BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); pud = pud_offset(p4d, address); - pud_ref = pud_offset(p4d_ref, address); - if (pud_none(*pud_ref)) + if (pud_none(*pud)) return -1; - if (pud_none(*pud) || pud_pfn(*pud) != pud_pfn(*pud_ref)) - BUG(); - - if (pud_huge(*pud)) + if (pud_large(*pud)) return 0; pmd = pmd_offset(pud, address); - pmd_ref = pmd_offset(pud_ref, address); - if (pmd_none(*pmd_ref)) + if (pmd_none(*pmd)) return -1; - if (pmd_none(*pmd) || pmd_pfn(*pmd) != pmd_pfn(*pmd_ref)) - BUG(); - - if (pmd_huge(*pmd)) + if (pmd_large(*pmd)) return 0; - pte_ref = pte_offset_kernel(pmd_ref, address); - if (!pte_present(*pte_ref)) - return -1; - pte = pte_offset_kernel(pmd, address); - - /* - * Don't use pte_page here, because the mappings can point - * outside mem_map, and the NUMA hash lookup cannot handle - * that: - */ - if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) - BUG(); + if (!pte_present(*pte)) + return -1; return 0; } @@ -699,7 +677,6 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "paging request"); printk(KERN_CONT " at %px\n", (void *) address); - printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip); dump_pagetable(address); } @@ -1248,10 +1225,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, tsk = current; mm = tsk->mm; - /* - * Detect and handle instructions that would cause a page fault for - * both a tracked kernel page and a userspace page. - */ prefetchw(&mm->mmap_sem); if (unlikely(kmmio_fault(regs, address))) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index ab33a32..9aa22be 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -120,7 +120,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, result = ident_p4d_init(info, p4d, addr, next); if (result) return result; - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + if (pgtable_l5_enabled) { set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); } else { /* diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 79cb066..8008db2 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -453,6 +453,21 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ +void __init sync_initial_page_table(void) +{ + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + + /* + * sync back low identity map too. It is used for example + * in the 32-bit EFI stub. + */ + clone_pgd_range(initial_page_table, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); +} + void __init native_pagetable_init(void) { unsigned long pfn, va; @@ -763,6 +778,7 @@ void __init mem_init(void) free_all_bootmem(); after_bootmem = 1; + x86_init.hyper.init_after_bootmem(); mem_init_print_info(NULL); printk(KERN_INFO "virtual kernel memory layout:\n" diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8b72923..66de40e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -88,12 +88,7 @@ static int __init nonx32_setup(char *str) } __setup("noexec32=", nonx32_setup); -/* - * When memory was added make sure all the processes MM have - * suitable PGD entries in the local PGD level page. - */ -#ifdef CONFIG_X86_5LEVEL -void sync_global_pgds(unsigned long start, unsigned long end) +static void sync_global_pgds_l5(unsigned long start, unsigned long end) { unsigned long addr; @@ -129,8 +124,8 @@ void sync_global_pgds(unsigned long start, unsigned long end) spin_unlock(&pgd_lock); } } -#else -void sync_global_pgds(unsigned long start, unsigned long end) + +static void sync_global_pgds_l4(unsigned long start, unsigned long end) { unsigned long addr; @@ -143,7 +138,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) * With folded p4d, pgd_none() is always false, we need to * handle synchonization on p4d level. */ - BUILD_BUG_ON(pgd_none(*pgd_ref)); + MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref)); p4d_ref = p4d_offset(pgd_ref, addr); if (p4d_none(*p4d_ref)) @@ -173,7 +168,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) spin_unlock(&pgd_lock); } } -#endif + +/* + * When memory was added make sure all the processes MM have + * suitable PGD entries in the local PGD level page. + */ +void sync_global_pgds(unsigned long start, unsigned long end) +{ + if (pgtable_l5_enabled) + sync_global_pgds_l5(start, end); + else + sync_global_pgds_l4(start, end); +} /* * NOTE: This function is marked __ref because it calls __init function @@ -632,7 +638,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, unsigned long vaddr = (unsigned long)__va(paddr); int i = p4d_index(vaddr); - if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + if (!pgtable_l5_enabled) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { @@ -712,7 +718,7 @@ kernel_physical_mapping_init(unsigned long paddr_start, page_size_mask); spin_lock(&init_mm.page_table_lock); - if (IS_ENABLED(CONFIG_X86_5LEVEL)) + if (pgtable_l5_enabled) pgd_populate(&init_mm, pgd, p4d); else p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); @@ -800,17 +806,11 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, #define PAGE_INUSE 0xFD -static void __meminit free_pagetable(struct page *page, int order, - struct vmem_altmap *altmap) +static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; - if (altmap) { - vmem_altmap_free(altmap, nr_pages); - return; - } - /* bootmem page has reserved flag */ if (PageReserved(page)) { __ClearPageReserved(page); @@ -826,9 +826,17 @@ static void __meminit free_pagetable(struct page *page, int order, free_pages((unsigned long)page_address(page), order); } -static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, +static void __meminit free_hugepage_table(struct page *page, struct vmem_altmap *altmap) { + if (altmap) + vmem_altmap_free(altmap, PMD_SIZE / PAGE_SIZE); + else + free_pagetable(page, get_order(PMD_SIZE)); +} + +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ pte_t *pte; int i; @@ -839,14 +847,13 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd, } /* free a pte talbe */ - free_pagetable(pmd_page(*pmd), 0, altmap); + free_pagetable(pmd_page(*pmd), 0); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, - struct vmem_altmap *altmap) +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) { pmd_t *pmd; int i; @@ -858,14 +865,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, } /* free a pmd talbe */ - free_pagetable(pud_page(*pud), 0, altmap); + free_pagetable(pud_page(*pud), 0); spin_lock(&init_mm.page_table_lock); pud_clear(pud); spin_unlock(&init_mm.page_table_lock); } -static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, - struct vmem_altmap *altmap) +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) { pud_t *pud; int i; @@ -877,7 +883,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, } /* free a pud talbe */ - free_pagetable(p4d_page(*p4d), 0, altmap); + free_pagetable(p4d_page(*p4d), 0); spin_lock(&init_mm.page_table_lock); p4d_clear(p4d); spin_unlock(&init_mm.page_table_lock); @@ -885,7 +891,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d, static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, - struct vmem_altmap *altmap, bool direct) + bool direct) { unsigned long next, pages = 0; pte_t *pte; @@ -916,7 +922,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, * freed when offlining, or simplely not in use. */ if (!direct) - free_pagetable(pte_page(*pte), 0, altmap); + free_pagetable(pte_page(*pte), 0); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -939,7 +945,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, page_addr = page_address(pte_page(*pte)); if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { - free_pagetable(pte_page(*pte), 0, altmap); + free_pagetable(pte_page(*pte), 0); spin_lock(&init_mm.page_table_lock); pte_clear(&init_mm, addr, pte); @@ -974,9 +980,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { if (!direct) - free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE), - altmap); + free_hugepage_table(pmd_page(*pmd), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -989,9 +994,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, page_addr = page_address(pmd_page(*pmd)); if (!memchr_inv(page_addr, PAGE_INUSE, PMD_SIZE)) { - free_pagetable(pmd_page(*pmd), - get_order(PMD_SIZE), - altmap); + free_hugepage_table(pmd_page(*pmd), + altmap); spin_lock(&init_mm.page_table_lock); pmd_clear(pmd); @@ -1003,8 +1007,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, } pte_base = (pte_t *)pmd_page_vaddr(*pmd); - remove_pte_table(pte_base, addr, next, altmap, direct); - free_pte_table(pte_base, pmd, altmap); + remove_pte_table(pte_base, addr, next, direct); + free_pte_table(pte_base, pmd); } /* Call free_pmd_table() in remove_pud_table(). */ @@ -1033,8 +1037,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, IS_ALIGNED(next, PUD_SIZE)) { if (!direct) free_pagetable(pud_page(*pud), - get_order(PUD_SIZE), - altmap); + get_order(PUD_SIZE)); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1048,8 +1051,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, if (!memchr_inv(page_addr, PAGE_INUSE, PUD_SIZE)) { free_pagetable(pud_page(*pud), - get_order(PUD_SIZE), - altmap); + get_order(PUD_SIZE)); spin_lock(&init_mm.page_table_lock); pud_clear(pud); @@ -1062,7 +1064,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, pmd_base = pmd_offset(pud, 0); remove_pmd_table(pmd_base, addr, next, direct, altmap); - free_pmd_table(pmd_base, pud, altmap); + free_pmd_table(pmd_base, pud); } if (direct) @@ -1093,8 +1095,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, * 5-level case we should free them. This code will have to change * to adapt for boot-time switching between 4 and 5 level page tables. */ - if (CONFIG_PGTABLE_LEVELS == 5) - free_pud_table(pud_base, p4d, altmap); + if (pgtable_l5_enabled) + free_pud_table(pud_base, p4d); } if (direct) @@ -1183,6 +1185,7 @@ void __init mem_init(void) /* this will put all memory onto the freelists */ free_all_bootmem(); after_bootmem = 1; + x86_init.hyper.init_after_bootmem(); /* * Must be done after boot memory is put on freelist, because here we @@ -1326,14 +1329,39 @@ int kern_addr_valid(unsigned long addr) return pfn_valid(pte_pfn(*pte)); } +/* + * Block size is the minimum amount of memory which can be hotplugged or + * hotremoved. It must be power of two and must be equal or larger than + * MIN_MEMORY_BLOCK_SIZE. + */ +#define MAX_BLOCK_SIZE (2UL << 30) + +/* Amount of ram needed to start using large blocks */ +#define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30) + static unsigned long probe_memory_block_size(void) { - unsigned long bz = MIN_MEMORY_BLOCK_SIZE; + unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; + unsigned long bz; - /* if system is UV or has 64GB of RAM or more, use large blocks */ - if (is_uv_system() || ((max_pfn << PAGE_SHIFT) >= (64UL << 30))) - bz = 2UL << 30; /* 2GB */ + /* If this is UV system, always set 2G block size */ + if (is_uv_system()) { + bz = MAX_BLOCK_SIZE; + goto done; + } + /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ + if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { + bz = MIN_MEMORY_BLOCK_SIZE; + goto done; + } + + /* Find the largest allowed block size that aligns to memory end */ + for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) { + if (IS_ALIGNED(boot_mem_end, bz)) + break; + } +done: pr_info("x86/mm: Memory block size: %ldMB\n", bz >> 20); return bz; diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index af6f2f9..d8ff013 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,6 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt + +#ifdef CONFIG_X86_5LEVEL +/* Too early to use cpu_feature_enabled() */ +#define pgtable_l5_enabled __pgtable_l5_enabled +#endif + #include <linux/bootmem.h> #include <linux/kasan.h> #include <linux/kdebug.h> @@ -19,7 +25,7 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; -static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); +static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); static __init void *early_alloc(size_t size, int nid, bool panic) { @@ -176,10 +182,10 @@ static void __init clear_pgds(unsigned long start, * With folded p4d, pgd_clear() is nop, use p4d_clear() * instead. */ - if (CONFIG_PGTABLE_LEVELS < 5) - p4d_clear(p4d_offset(pgd, start)); - else + if (pgtable_l5_enabled) pgd_clear(pgd); + else + p4d_clear(p4d_offset(pgd, start)); } pgd = pgd_offset_k(start); @@ -191,7 +197,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) { unsigned long p4d; - if (!IS_ENABLED(CONFIG_X86_5LEVEL)) + if (!pgtable_l5_enabled) return (p4d_t *)pgd; p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; @@ -272,7 +278,7 @@ void __init kasan_early_init(void) for (i = 0; i < PTRS_PER_PUD; i++) kasan_zero_pud[i] = __pud(pud_val); - for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++) + for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++) kasan_zero_p4d[i] = __p4d(p4d_val); kasan_map_early_shadow(early_top_pgt); @@ -303,7 +309,7 @@ void __init kasan_init(void) * bunch of things like kernel code, modules, EFI mapping, etc. * We need to take extra steps to not overwrite them. */ - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + if (pgtable_l5_enabled) { void *ptr; ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index aedebd2..615cc03 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -34,23 +34,12 @@ #define TB_SHIFT 40 /* - * Virtual address start and end range for randomization. - * * The end address could depend on more configuration options to make the * highest amount of space for randomization available, but that's too hard * to keep straight and caused issues already. */ -static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; -/* Default values */ -unsigned long page_offset_base = __PAGE_OFFSET_BASE; -EXPORT_SYMBOL(page_offset_base); -unsigned long vmalloc_base = __VMALLOC_BASE; -EXPORT_SYMBOL(vmalloc_base); -unsigned long vmemmap_base = __VMEMMAP_BASE; -EXPORT_SYMBOL(vmemmap_base); - /* * Memory regions randomized by KASLR (except modules that use a separate logic * earlier during boot). The list is ordered based on virtual addresses. This @@ -60,8 +49,8 @@ static __initdata struct kaslr_memory_region { unsigned long *base; unsigned long size_tb; } kaslr_regions[] = { - { &page_offset_base, 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT) /* Maximum */ }, - { &vmalloc_base, VMALLOC_SIZE_TB }, + { &page_offset_base, 0 }, + { &vmalloc_base, 0 }, { &vmemmap_base, 1 }, }; @@ -84,11 +73,14 @@ static inline bool kaslr_memory_enabled(void) void __init kernel_randomize_memory(void) { size_t i; - unsigned long vaddr = vaddr_start; + unsigned long vaddr_start, vaddr; unsigned long rand, memory_tb; struct rnd_state rand_state; unsigned long remain_entropy; + vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4; + vaddr = vaddr_start; + /* * These BUILD_BUG_ON checks ensure the memory layout is consistent * with the vaddr_start/vaddr_end variables. These checks are very @@ -101,6 +93,9 @@ void __init kernel_randomize_memory(void) if (!kaslr_memory_enabled()) return; + kaslr_regions[0].size_tb = 1 << (__PHYSICAL_MASK_SHIFT - TB_SHIFT); + kaslr_regions[1].size_tb = VMALLOC_SIZE_TB; + /* * Update Physical memory mapping to available and * add padding if needed (especially for memory hotplug support). @@ -129,7 +124,7 @@ void __init kernel_randomize_memory(void) */ entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); prandom_bytes_state(&rand_state, &rand, sizeof(rand)); - if (IS_ENABLED(CONFIG_X86_5LEVEL)) + if (pgtable_l5_enabled) entropy = (rand % (entropy + 1)) & P4D_MASK; else entropy = (rand % (entropy + 1)) & PUD_MASK; @@ -141,7 +136,7 @@ void __init kernel_randomize_memory(void) * randomization alignment. */ vaddr += get_padding(&kaslr_regions[i]); - if (IS_ENABLED(CONFIG_X86_5LEVEL)) + if (pgtable_l5_enabled) vaddr = round_up(vaddr + 1, P4D_SIZE); else vaddr = round_up(vaddr + 1, PUD_SIZE); @@ -217,7 +212,7 @@ void __meminit init_trampoline(void) return; } - if (IS_ENABLED(CONFIG_X86_5LEVEL)) + if (pgtable_l5_enabled) init_trampoline_p4d(); else init_trampoline_pud(); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 1a53071..b2de398 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -25,17 +25,12 @@ #include <asm/bootparam.h> #include <asm/set_memory.h> #include <asm/cacheflush.h> -#include <asm/sections.h> #include <asm/processor-flags.h> #include <asm/msr.h> #include <asm/cmdline.h> #include "mm_internal.h" -static char sme_cmdline_arg[] __initdata = "mem_encrypt"; -static char sme_cmdline_on[] __initdata = "on"; -static char sme_cmdline_off[] __initdata = "off"; - /* * Since SME related variables are set early in the boot process they must * reside in the .data section so as not to be zeroed out when the .bss @@ -46,7 +41,7 @@ EXPORT_SYMBOL(sme_me_mask); DEFINE_STATIC_KEY_FALSE(sev_enable_key); EXPORT_SYMBOL_GPL(sev_enable_key); -static bool sev_enabled __section(.data); +bool sev_enabled __section(.data); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); @@ -200,67 +195,6 @@ void __init sme_early_init(void) swiotlb_force = SWIOTLB_FORCE; } -static void *sev_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned long dma_mask; - unsigned int order; - struct page *page; - void *vaddr = NULL; - - dma_mask = dma_alloc_coherent_mask(dev, gfp); - order = get_order(size); - - /* - * Memory will be memset to zero after marking decrypted, so don't - * bother clearing it before. - */ - gfp &= ~__GFP_ZERO; - - page = alloc_pages_node(dev_to_node(dev), gfp, order); - if (page) { - dma_addr_t addr; - - /* - * Since we will be clearing the encryption bit, check the - * mask with it already cleared. - */ - addr = __sme_clr(phys_to_dma(dev, page_to_phys(page))); - if ((addr + size) > dma_mask) { - __free_pages(page, get_order(size)); - } else { - vaddr = page_address(page); - *dma_handle = addr; - } - } - - if (!vaddr) - vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, gfp); - - if (!vaddr) - return NULL; - - /* Clear the SME encryption bit for DMA use if not swiotlb area */ - if (!is_swiotlb_buffer(dma_to_phys(dev, *dma_handle))) { - set_memory_decrypted((unsigned long)vaddr, 1 << order); - memset(vaddr, 0, PAGE_SIZE << order); - *dma_handle = __sme_clr(*dma_handle); - } - - return vaddr; -} - -static void sev_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs) -{ - /* Set the SME encryption bit for re-use if not swiotlb area */ - if (!is_swiotlb_buffer(dma_to_phys(dev, dma_handle))) - set_memory_encrypted((unsigned long)vaddr, - 1 << get_order(size)); - - swiotlb_free_coherent(dev, size, vaddr, dma_handle); -} - static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -413,20 +347,6 @@ bool sev_active(void) } EXPORT_SYMBOL(sev_active); -static const struct dma_map_ops sev_dma_ops = { - .alloc = sev_alloc, - .free = sev_free, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .mapping_error = swiotlb_dma_mapping_error, -}; - /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void) { @@ -437,12 +357,11 @@ void __init mem_encrypt_init(void) swiotlb_update_mem_attributes(); /* - * With SEV, DMA operations cannot use encryption. New DMA ops - * are required in order to mark the DMA areas as decrypted or - * to use bounce buffers. + * With SEV, DMA operations cannot use encryption, we need to use + * SWIOTLB to bounce buffer DMA operation. */ if (sev_active()) - dma_ops = &sev_dma_ops; + dma_ops = &swiotlb_dma_ops; /* * With SEV, we need to unroll the rep string I/O instructions. @@ -455,582 +374,3 @@ void __init mem_encrypt_init(void) : "Secure Memory Encryption (SME)"); } -void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) -{ - WARN(PAGE_ALIGN(size) != size, - "size is not page-aligned (%#lx)\n", size); - - /* Make the SWIOTLB buffer area decrypted */ - set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); -} - -struct sme_populate_pgd_data { - void *pgtable_area; - pgd_t *pgd; - - pmdval_t pmd_flags; - pteval_t pte_flags; - unsigned long paddr; - - unsigned long vaddr; - unsigned long vaddr_end; -}; - -static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) -{ - unsigned long pgd_start, pgd_end, pgd_size; - pgd_t *pgd_p; - - pgd_start = ppd->vaddr & PGDIR_MASK; - pgd_end = ppd->vaddr_end & PGDIR_MASK; - - pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); - - pgd_p = ppd->pgd + pgd_index(ppd->vaddr); - - memset(pgd_p, 0, pgd_size); -} - -#define PGD_FLAGS _KERNPG_TABLE_NOENC -#define P4D_FLAGS _KERNPG_TABLE_NOENC -#define PUD_FLAGS _KERNPG_TABLE_NOENC -#define PMD_FLAGS _KERNPG_TABLE_NOENC - -#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) - -#define PMD_FLAGS_DEC PMD_FLAGS_LARGE -#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ - (_PAGE_PAT | _PAGE_PWT)) - -#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) - -#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) - -#define PTE_FLAGS_DEC PTE_FLAGS -#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ - (_PAGE_PAT | _PAGE_PWT)) - -#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) - -static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) -{ - pgd_t *pgd_p; - p4d_t *p4d_p; - pud_t *pud_p; - pmd_t *pmd_p; - - pgd_p = ppd->pgd + pgd_index(ppd->vaddr); - if (native_pgd_val(*pgd_p)) { - if (IS_ENABLED(CONFIG_X86_5LEVEL)) - p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); - else - pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); - } else { - pgd_t pgd; - - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_p = ppd->pgtable_area; - memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); - ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; - - pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); - } else { - pud_p = ppd->pgtable_area; - memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; - - pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); - } - native_set_pgd(pgd_p, pgd); - } - - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_p += p4d_index(ppd->vaddr); - if (native_p4d_val(*p4d_p)) { - pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); - } else { - p4d_t p4d; - - pud_p = ppd->pgtable_area; - memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; - - p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); - native_set_p4d(p4d_p, p4d); - } - } - - pud_p += pud_index(ppd->vaddr); - if (native_pud_val(*pud_p)) { - if (native_pud_val(*pud_p) & _PAGE_PSE) - return NULL; - - pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); - } else { - pud_t pud; - - pmd_p = ppd->pgtable_area; - memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); - ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; - - pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); - native_set_pud(pud_p, pud); - } - - return pmd_p; -} - -static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) -{ - pmd_t *pmd_p; - - pmd_p = sme_prepare_pgd(ppd); - if (!pmd_p) - return; - - pmd_p += pmd_index(ppd->vaddr); - if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) - native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); -} - -static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) -{ - pmd_t *pmd_p; - pte_t *pte_p; - - pmd_p = sme_prepare_pgd(ppd); - if (!pmd_p) - return; - - pmd_p += pmd_index(ppd->vaddr); - if (native_pmd_val(*pmd_p)) { - if (native_pmd_val(*pmd_p) & _PAGE_PSE) - return; - - pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK); - } else { - pmd_t pmd; - - pte_p = ppd->pgtable_area; - memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE); - ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE; - - pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS); - native_set_pmd(pmd_p, pmd); - } - - pte_p += pte_index(ppd->vaddr); - if (!native_pte_val(*pte_p)) - native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags)); -} - -static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) -{ - while (ppd->vaddr < ppd->vaddr_end) { - sme_populate_pgd_large(ppd); - - ppd->vaddr += PMD_PAGE_SIZE; - ppd->paddr += PMD_PAGE_SIZE; - } -} - -static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) -{ - while (ppd->vaddr < ppd->vaddr_end) { - sme_populate_pgd(ppd); - - ppd->vaddr += PAGE_SIZE; - ppd->paddr += PAGE_SIZE; - } -} - -static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, - pmdval_t pmd_flags, pteval_t pte_flags) -{ - unsigned long vaddr_end; - - ppd->pmd_flags = pmd_flags; - ppd->pte_flags = pte_flags; - - /* Save original end value since we modify the struct value */ - vaddr_end = ppd->vaddr_end; - - /* If start is not 2MB aligned, create PTE entries */ - ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); - __sme_map_range_pte(ppd); - - /* Create PMD entries */ - ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; - __sme_map_range_pmd(ppd); - - /* If end is not 2MB aligned, create PTE entries */ - ppd->vaddr_end = vaddr_end; - __sme_map_range_pte(ppd); -} - -static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) -{ - __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); -} - -static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) -{ - __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); -} - -static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) -{ - __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); -} - -static unsigned long __init sme_pgtable_calc(unsigned long len) -{ - unsigned long p4d_size, pud_size, pmd_size, pte_size; - unsigned long total; - - /* - * Perform a relatively simplistic calculation of the pagetable - * entries that are needed. Those mappings will be covered mostly - * by 2MB PMD entries so we can conservatively calculate the required - * number of P4D, PUD and PMD structures needed to perform the - * mappings. For mappings that are not 2MB aligned, PTE mappings - * would be needed for the start and end portion of the address range - * that fall outside of the 2MB alignment. This results in, at most, - * two extra pages to hold PTE entries for each range that is mapped. - * Incrementing the count for each covers the case where the addresses - * cross entries. - */ - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; - p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; - pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1; - pud_size *= sizeof(pud_t) * PTRS_PER_PUD; - } else { - p4d_size = 0; - pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; - pud_size *= sizeof(pud_t) * PTRS_PER_PUD; - } - pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; - pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; - pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE; - - total = p4d_size + pud_size + pmd_size + pte_size; - - /* - * Now calculate the added pagetable structures needed to populate - * the new pagetables. - */ - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; - p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D; - pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE; - pud_size *= sizeof(pud_t) * PTRS_PER_PUD; - } else { - p4d_size = 0; - pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE; - pud_size *= sizeof(pud_t) * PTRS_PER_PUD; - } - pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE; - pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; - - total += p4d_size + pud_size + pmd_size; - - return total; -} - -void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp) -{ - unsigned long workarea_start, workarea_end, workarea_len; - unsigned long execute_start, execute_end, execute_len; - unsigned long kernel_start, kernel_end, kernel_len; - unsigned long initrd_start, initrd_end, initrd_len; - struct sme_populate_pgd_data ppd; - unsigned long pgtable_area_len; - unsigned long decrypted_base; - - if (!sme_active()) - return; - - /* - * Prepare for encrypting the kernel and initrd by building new - * pagetables with the necessary attributes needed to encrypt the - * kernel in place. - * - * One range of virtual addresses will map the memory occupied - * by the kernel and initrd as encrypted. - * - * Another range of virtual addresses will map the memory occupied - * by the kernel and initrd as decrypted and write-protected. - * - * The use of write-protect attribute will prevent any of the - * memory from being cached. - */ - - /* Physical addresses gives us the identity mapped virtual addresses */ - kernel_start = __pa_symbol(_text); - kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); - kernel_len = kernel_end - kernel_start; - - initrd_start = 0; - initrd_end = 0; - initrd_len = 0; -#ifdef CONFIG_BLK_DEV_INITRD - initrd_len = (unsigned long)bp->hdr.ramdisk_size | - ((unsigned long)bp->ext_ramdisk_size << 32); - if (initrd_len) { - initrd_start = (unsigned long)bp->hdr.ramdisk_image | - ((unsigned long)bp->ext_ramdisk_image << 32); - initrd_end = PAGE_ALIGN(initrd_start + initrd_len); - initrd_len = initrd_end - initrd_start; - } -#endif - - /* Set the encryption workarea to be immediately after the kernel */ - workarea_start = kernel_end; - - /* - * Calculate required number of workarea bytes needed: - * executable encryption area size: - * stack page (PAGE_SIZE) - * encryption routine page (PAGE_SIZE) - * intermediate copy buffer (PMD_PAGE_SIZE) - * pagetable structures for the encryption of the kernel - * pagetable structures for workarea (in case not currently mapped) - */ - execute_start = workarea_start; - execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; - execute_len = execute_end - execute_start; - - /* - * One PGD for both encrypted and decrypted mappings and a set of - * PUDs and PMDs for each of the encrypted and decrypted mappings. - */ - pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; - pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; - if (initrd_len) - pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; - - /* PUDs and PMDs needed in the current pagetables for the workarea */ - pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); - - /* - * The total workarea includes the executable encryption area and - * the pagetable area. The start of the workarea is already 2MB - * aligned, align the end of the workarea on a 2MB boundary so that - * we don't try to create/allocate PTE entries from the workarea - * before it is mapped. - */ - workarea_len = execute_len + pgtable_area_len; - workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); - - /* - * Set the address to the start of where newly created pagetable - * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable - * structures are created when the workarea is added to the current - * pagetables and when the new encrypted and decrypted kernel - * mappings are populated. - */ - ppd.pgtable_area = (void *)execute_end; - - /* - * Make sure the current pagetable structure has entries for - * addressing the workarea. - */ - ppd.pgd = (pgd_t *)native_read_cr3_pa(); - ppd.paddr = workarea_start; - ppd.vaddr = workarea_start; - ppd.vaddr_end = workarea_end; - sme_map_range_decrypted(&ppd); - - /* Flush the TLB - no globals so cr3 is enough */ - native_write_cr3(__native_read_cr3()); - - /* - * A new pagetable structure is being built to allow for the kernel - * and initrd to be encrypted. It starts with an empty PGD that will - * then be populated with new PUDs and PMDs as the encrypted and - * decrypted kernel mappings are created. - */ - ppd.pgd = ppd.pgtable_area; - memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); - ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; - - /* - * A different PGD index/entry must be used to get different - * pagetable entries for the decrypted mapping. Choose the next - * PGD index and convert it to a virtual address to be used as - * the base of the mapping. - */ - decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); - if (initrd_len) { - unsigned long check_base; - - check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); - decrypted_base = max(decrypted_base, check_base); - } - decrypted_base <<= PGDIR_SHIFT; - - /* Add encrypted kernel (identity) mappings */ - ppd.paddr = kernel_start; - ppd.vaddr = kernel_start; - ppd.vaddr_end = kernel_end; - sme_map_range_encrypted(&ppd); - - /* Add decrypted, write-protected kernel (non-identity) mappings */ - ppd.paddr = kernel_start; - ppd.vaddr = kernel_start + decrypted_base; - ppd.vaddr_end = kernel_end + decrypted_base; - sme_map_range_decrypted_wp(&ppd); - - if (initrd_len) { - /* Add encrypted initrd (identity) mappings */ - ppd.paddr = initrd_start; - ppd.vaddr = initrd_start; - ppd.vaddr_end = initrd_end; - sme_map_range_encrypted(&ppd); - /* - * Add decrypted, write-protected initrd (non-identity) mappings - */ - ppd.paddr = initrd_start; - ppd.vaddr = initrd_start + decrypted_base; - ppd.vaddr_end = initrd_end + decrypted_base; - sme_map_range_decrypted_wp(&ppd); - } - - /* Add decrypted workarea mappings to both kernel mappings */ - ppd.paddr = workarea_start; - ppd.vaddr = workarea_start; - ppd.vaddr_end = workarea_end; - sme_map_range_decrypted(&ppd); - - ppd.paddr = workarea_start; - ppd.vaddr = workarea_start + decrypted_base; - ppd.vaddr_end = workarea_end + decrypted_base; - sme_map_range_decrypted(&ppd); - - /* Perform the encryption */ - sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, - kernel_len, workarea_start, (unsigned long)ppd.pgd); - - if (initrd_len) - sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, - initrd_len, workarea_start, - (unsigned long)ppd.pgd); - - /* - * At this point we are running encrypted. Remove the mappings for - * the decrypted areas - all that is needed for this is to remove - * the PGD entry/entries. - */ - ppd.vaddr = kernel_start + decrypted_base; - ppd.vaddr_end = kernel_end + decrypted_base; - sme_clear_pgd(&ppd); - - if (initrd_len) { - ppd.vaddr = initrd_start + decrypted_base; - ppd.vaddr_end = initrd_end + decrypted_base; - sme_clear_pgd(&ppd); - } - - ppd.vaddr = workarea_start + decrypted_base; - ppd.vaddr_end = workarea_end + decrypted_base; - sme_clear_pgd(&ppd); - - /* Flush the TLB - no globals so cr3 is enough */ - native_write_cr3(__native_read_cr3()); -} - -void __init __nostackprotector sme_enable(struct boot_params *bp) -{ - const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; - unsigned int eax, ebx, ecx, edx; - unsigned long feature_mask; - bool active_by_default; - unsigned long me_mask; - char buffer[16]; - u64 msr; - - /* Check for the SME/SEV support leaf */ - eax = 0x80000000; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - if (eax < 0x8000001f) - return; - -#define AMD_SME_BIT BIT(0) -#define AMD_SEV_BIT BIT(1) - /* - * Set the feature mask (SME or SEV) based on whether we are - * running under a hypervisor. - */ - eax = 1; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; - - /* - * Check for the SME/SEV feature: - * CPUID Fn8000_001F[EAX] - * - Bit 0 - Secure Memory Encryption support - * - Bit 1 - Secure Encrypted Virtualization support - * CPUID Fn8000_001F[EBX] - * - Bits 5:0 - Pagetable bit position used to indicate encryption - */ - eax = 0x8000001f; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - if (!(eax & feature_mask)) - return; - - me_mask = 1UL << (ebx & 0x3f); - - /* Check if memory encryption is enabled */ - if (feature_mask == AMD_SME_BIT) { - /* For SME, check the SYSCFG MSR */ - msr = __rdmsr(MSR_K8_SYSCFG); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) - return; - } else { - /* For SEV, check the SEV MSR */ - msr = __rdmsr(MSR_AMD64_SEV); - if (!(msr & MSR_AMD64_SEV_ENABLED)) - return; - - /* SEV state cannot be controlled by a command line option */ - sme_me_mask = me_mask; - sev_enabled = true; - return; - } - - /* - * Fixups have not been applied to phys_base yet and we're running - * identity mapped, so we must obtain the address to the SME command - * line argument data using rip-relative addressing. - */ - asm ("lea sme_cmdline_arg(%%rip), %0" - : "=r" (cmdline_arg) - : "p" (sme_cmdline_arg)); - asm ("lea sme_cmdline_on(%%rip), %0" - : "=r" (cmdline_on) - : "p" (sme_cmdline_on)); - asm ("lea sme_cmdline_off(%%rip), %0" - : "=r" (cmdline_off) - : "p" (sme_cmdline_off)); - - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) - active_by_default = true; - else - active_by_default = false; - - cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | - ((u64)bp->ext_cmd_line_ptr << 32)); - - cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); - - if (!strncmp(buffer, cmdline_on, sizeof(buffer))) - sme_me_mask = me_mask; - else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) - sme_me_mask = 0; - else - sme_me_mask = active_by_default ? me_mask : 0; -} diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 01f682c..40a6085 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -15,6 +15,7 @@ #include <asm/page.h> #include <asm/processor-flags.h> #include <asm/msr-index.h> +#include <asm/nospec-branch.h> .text .code64 @@ -59,6 +60,7 @@ ENTRY(sme_encrypt_execute) movq %rax, %r8 /* Workarea encryption routine */ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ + ANNOTATE_RETPOLINE_SAFE call *%rax /* Call the encryption routine */ pop %r12 diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c new file mode 100644 index 0000000..1b2197d --- /dev/null +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -0,0 +1,564 @@ +/* + * AMD Memory Encryption Support + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define DISABLE_BRANCH_PROFILING + +/* + * Since we're dealing with identity mappings, physical and virtual + * addresses are the same, so override these defines which are ultimately + * used by the headers in misc.h. + */ +#define __pa(x) ((unsigned long)(x)) +#define __va(x) ((void *)((unsigned long)(x))) + +/* + * Special hack: we have to be careful, because no indirections are + * allowed here, and paravirt_ops is a kind of one. As it will only run in + * baremetal anyway, we just keep it from happening. (This list needs to + * be extended when new paravirt and debugging variants are added.) + */ +#undef CONFIG_PARAVIRT +#undef CONFIG_PARAVIRT_SPINLOCKS + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/mem_encrypt.h> + +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/cmdline.h> + +#include "mm_internal.h" + +#define PGD_FLAGS _KERNPG_TABLE_NOENC +#define P4D_FLAGS _KERNPG_TABLE_NOENC +#define PUD_FLAGS _KERNPG_TABLE_NOENC +#define PMD_FLAGS _KERNPG_TABLE_NOENC + +#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) + +#define PMD_FLAGS_DEC PMD_FLAGS_LARGE +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) + +#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) + +#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) + +#define PTE_FLAGS_DEC PTE_FLAGS +#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) + +#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) + +struct sme_populate_pgd_data { + void *pgtable_area; + pgd_t *pgd; + + pmdval_t pmd_flags; + pteval_t pte_flags; + unsigned long paddr; + + unsigned long vaddr; + unsigned long vaddr_end; +}; + +static char sme_cmdline_arg[] __initdata = "mem_encrypt"; +static char sme_cmdline_on[] __initdata = "on"; +static char sme_cmdline_off[] __initdata = "off"; + +static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) +{ + unsigned long pgd_start, pgd_end, pgd_size; + pgd_t *pgd_p; + + pgd_start = ppd->vaddr & PGDIR_MASK; + pgd_end = ppd->vaddr_end & PGDIR_MASK; + + pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); + + pgd_p = ppd->pgd + pgd_index(ppd->vaddr); + + memset(pgd_p, 0, pgd_size); +} + +static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + pgd = ppd->pgd + pgd_index(ppd->vaddr); + if (pgd_none(*pgd)) { + p4d = ppd->pgtable_area; + memset(p4d, 0, sizeof(*p4d) * PTRS_PER_P4D); + ppd->pgtable_area += sizeof(*p4d) * PTRS_PER_P4D; + set_pgd(pgd, __pgd(PGD_FLAGS | __pa(p4d))); + } + + p4d = p4d_offset(pgd, ppd->vaddr); + if (p4d_none(*p4d)) { + pud = ppd->pgtable_area; + memset(pud, 0, sizeof(*pud) * PTRS_PER_PUD); + ppd->pgtable_area += sizeof(*pud) * PTRS_PER_PUD; + set_p4d(p4d, __p4d(P4D_FLAGS | __pa(pud))); + } + + pud = pud_offset(p4d, ppd->vaddr); + if (pud_none(*pud)) { + pmd = ppd->pgtable_area; + memset(pmd, 0, sizeof(*pmd) * PTRS_PER_PMD); + ppd->pgtable_area += sizeof(*pmd) * PTRS_PER_PMD; + set_pud(pud, __pud(PUD_FLAGS | __pa(pmd))); + } + + if (pud_large(*pud)) + return NULL; + + return pud; +} + +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +{ + pud_t *pud; + pmd_t *pmd; + + pud = sme_prepare_pgd(ppd); + if (!pud) + return; + + pmd = pmd_offset(pud, ppd->vaddr); + if (pmd_large(*pmd)) + return; + + set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); +} + +static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pud = sme_prepare_pgd(ppd); + if (!pud) + return; + + pmd = pmd_offset(pud, ppd->vaddr); + if (pmd_none(*pmd)) { + pte = ppd->pgtable_area; + memset(pte, 0, sizeof(pte) * PTRS_PER_PTE); + ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE; + set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte))); + } + + if (pmd_large(*pmd)) + return; + + pte = pte_offset_map(pmd, ppd->vaddr); + if (pte_none(*pte)) + set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); +} + +static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +{ + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd_large(ppd); + + ppd->vaddr += PMD_PAGE_SIZE; + ppd->paddr += PMD_PAGE_SIZE; + } +} + +static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +{ + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd(ppd); + + ppd->vaddr += PAGE_SIZE; + ppd->paddr += PAGE_SIZE; + } +} + +static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, + pmdval_t pmd_flags, pteval_t pte_flags) +{ + unsigned long vaddr_end; + + ppd->pmd_flags = pmd_flags; + ppd->pte_flags = pte_flags; + + /* Save original end value since we modify the struct value */ + vaddr_end = ppd->vaddr_end; + + /* If start is not 2MB aligned, create PTE entries */ + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + __sme_map_range_pte(ppd); + + /* Create PMD entries */ + ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + __sme_map_range_pmd(ppd); + + /* If end is not 2MB aligned, create PTE entries */ + ppd->vaddr_end = vaddr_end; + __sme_map_range_pte(ppd); +} + +static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); +} + +static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); +} + +static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); +} + +static unsigned long __init sme_pgtable_calc(unsigned long len) +{ + unsigned long entries = 0, tables = 0; + + /* + * Perform a relatively simplistic calculation of the pagetable + * entries that are needed. Those mappings will be covered mostly + * by 2MB PMD entries so we can conservatively calculate the required + * number of P4D, PUD and PMD structures needed to perform the + * mappings. For mappings that are not 2MB aligned, PTE mappings + * would be needed for the start and end portion of the address range + * that fall outside of the 2MB alignment. This results in, at most, + * two extra pages to hold PTE entries for each range that is mapped. + * Incrementing the count for each covers the case where the addresses + * cross entries. + */ + + /* PGDIR_SIZE is equal to P4D_SIZE on 4-level machine. */ + if (PTRS_PER_P4D > 1) + entries += (DIV_ROUND_UP(len, PGDIR_SIZE) + 1) * sizeof(p4d_t) * PTRS_PER_P4D; + entries += (DIV_ROUND_UP(len, P4D_SIZE) + 1) * sizeof(pud_t) * PTRS_PER_PUD; + entries += (DIV_ROUND_UP(len, PUD_SIZE) + 1) * sizeof(pmd_t) * PTRS_PER_PMD; + entries += 2 * sizeof(pte_t) * PTRS_PER_PTE; + + /* + * Now calculate the added pagetable structures needed to populate + * the new pagetables. + */ + + if (PTRS_PER_P4D > 1) + tables += DIV_ROUND_UP(entries, PGDIR_SIZE) * sizeof(p4d_t) * PTRS_PER_P4D; + tables += DIV_ROUND_UP(entries, P4D_SIZE) * sizeof(pud_t) * PTRS_PER_PUD; + tables += DIV_ROUND_UP(entries, PUD_SIZE) * sizeof(pmd_t) * PTRS_PER_PMD; + + return entries + tables; +} + +void __init sme_encrypt_kernel(struct boot_params *bp) +{ + unsigned long workarea_start, workarea_end, workarea_len; + unsigned long execute_start, execute_end, execute_len; + unsigned long kernel_start, kernel_end, kernel_len; + unsigned long initrd_start, initrd_end, initrd_len; + struct sme_populate_pgd_data ppd; + unsigned long pgtable_area_len; + unsigned long decrypted_base; + + if (!sme_active()) + return; + + /* + * Prepare for encrypting the kernel and initrd by building new + * pagetables with the necessary attributes needed to encrypt the + * kernel in place. + * + * One range of virtual addresses will map the memory occupied + * by the kernel and initrd as encrypted. + * + * Another range of virtual addresses will map the memory occupied + * by the kernel and initrd as decrypted and write-protected. + * + * The use of write-protect attribute will prevent any of the + * memory from being cached. + */ + + /* Physical addresses gives us the identity mapped virtual addresses */ + kernel_start = __pa_symbol(_text); + kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); + kernel_len = kernel_end - kernel_start; + + initrd_start = 0; + initrd_end = 0; + initrd_len = 0; +#ifdef CONFIG_BLK_DEV_INITRD + initrd_len = (unsigned long)bp->hdr.ramdisk_size | + ((unsigned long)bp->ext_ramdisk_size << 32); + if (initrd_len) { + initrd_start = (unsigned long)bp->hdr.ramdisk_image | + ((unsigned long)bp->ext_ramdisk_image << 32); + initrd_end = PAGE_ALIGN(initrd_start + initrd_len); + initrd_len = initrd_end - initrd_start; + } +#endif + + /* Set the encryption workarea to be immediately after the kernel */ + workarea_start = kernel_end; + + /* + * Calculate required number of workarea bytes needed: + * executable encryption area size: + * stack page (PAGE_SIZE) + * encryption routine page (PAGE_SIZE) + * intermediate copy buffer (PMD_PAGE_SIZE) + * pagetable structures for the encryption of the kernel + * pagetable structures for workarea (in case not currently mapped) + */ + execute_start = workarea_start; + execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE; + execute_len = execute_end - execute_start; + + /* + * One PGD for both encrypted and decrypted mappings and a set of + * PUDs and PMDs for each of the encrypted and decrypted mappings. + */ + pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; + pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; + if (initrd_len) + pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; + + /* PUDs and PMDs needed in the current pagetables for the workarea */ + pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); + + /* + * The total workarea includes the executable encryption area and + * the pagetable area. The start of the workarea is already 2MB + * aligned, align the end of the workarea on a 2MB boundary so that + * we don't try to create/allocate PTE entries from the workarea + * before it is mapped. + */ + workarea_len = execute_len + pgtable_area_len; + workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); + + /* + * Set the address to the start of where newly created pagetable + * structures (PGDs, PUDs and PMDs) will be allocated. New pagetable + * structures are created when the workarea is added to the current + * pagetables and when the new encrypted and decrypted kernel + * mappings are populated. + */ + ppd.pgtable_area = (void *)execute_end; + + /* + * Make sure the current pagetable structure has entries for + * addressing the workarea. + */ + ppd.pgd = (pgd_t *)native_read_cr3_pa(); + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start; + ppd.vaddr_end = workarea_end; + sme_map_range_decrypted(&ppd); + + /* Flush the TLB - no globals so cr3 is enough */ + native_write_cr3(__native_read_cr3()); + + /* + * A new pagetable structure is being built to allow for the kernel + * and initrd to be encrypted. It starts with an empty PGD that will + * then be populated with new PUDs and PMDs as the encrypted and + * decrypted kernel mappings are created. + */ + ppd.pgd = ppd.pgtable_area; + memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); + ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; + + /* + * A different PGD index/entry must be used to get different + * pagetable entries for the decrypted mapping. Choose the next + * PGD index and convert it to a virtual address to be used as + * the base of the mapping. + */ + decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); + if (initrd_len) { + unsigned long check_base; + + check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); + decrypted_base = max(decrypted_base, check_base); + } + decrypted_base <<= PGDIR_SHIFT; + + /* Add encrypted kernel (identity) mappings */ + ppd.paddr = kernel_start; + ppd.vaddr = kernel_start; + ppd.vaddr_end = kernel_end; + sme_map_range_encrypted(&ppd); + + /* Add decrypted, write-protected kernel (non-identity) mappings */ + ppd.paddr = kernel_start; + ppd.vaddr = kernel_start + decrypted_base; + ppd.vaddr_end = kernel_end + decrypted_base; + sme_map_range_decrypted_wp(&ppd); + + if (initrd_len) { + /* Add encrypted initrd (identity) mappings */ + ppd.paddr = initrd_start; + ppd.vaddr = initrd_start; + ppd.vaddr_end = initrd_end; + sme_map_range_encrypted(&ppd); + /* + * Add decrypted, write-protected initrd (non-identity) mappings + */ + ppd.paddr = initrd_start; + ppd.vaddr = initrd_start + decrypted_base; + ppd.vaddr_end = initrd_end + decrypted_base; + sme_map_range_decrypted_wp(&ppd); + } + + /* Add decrypted workarea mappings to both kernel mappings */ + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start; + ppd.vaddr_end = workarea_end; + sme_map_range_decrypted(&ppd); + + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start + decrypted_base; + ppd.vaddr_end = workarea_end + decrypted_base; + sme_map_range_decrypted(&ppd); + + /* Perform the encryption */ + sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, + kernel_len, workarea_start, (unsigned long)ppd.pgd); + + if (initrd_len) + sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, + initrd_len, workarea_start, + (unsigned long)ppd.pgd); + + /* + * At this point we are running encrypted. Remove the mappings for + * the decrypted areas - all that is needed for this is to remove + * the PGD entry/entries. + */ + ppd.vaddr = kernel_start + decrypted_base; + ppd.vaddr_end = kernel_end + decrypted_base; + sme_clear_pgd(&ppd); + + if (initrd_len) { + ppd.vaddr = initrd_start + decrypted_base; + ppd.vaddr_end = initrd_end + decrypted_base; + sme_clear_pgd(&ppd); + } + + ppd.vaddr = workarea_start + decrypted_base; + ppd.vaddr_end = workarea_end + decrypted_base; + sme_clear_pgd(&ppd); + + /* Flush the TLB - no globals so cr3 is enough */ + native_write_cr3(__native_read_cr3()); +} + +void __init sme_enable(struct boot_params *bp) +{ + const char *cmdline_ptr, *cmdline_arg, *cmdline_on, *cmdline_off; + unsigned int eax, ebx, ecx, edx; + unsigned long feature_mask; + bool active_by_default; + unsigned long me_mask; + char buffer[16]; + u64 msr; + + /* Check for the SME/SEV support leaf */ + eax = 0x80000000; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (eax < 0x8000001f) + return; + +#define AMD_SME_BIT BIT(0) +#define AMD_SEV_BIT BIT(1) + /* + * Set the feature mask (SME or SEV) based on whether we are + * running under a hypervisor. + */ + eax = 1; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + feature_mask = (ecx & BIT(31)) ? AMD_SEV_BIT : AMD_SME_BIT; + + /* + * Check for the SME/SEV feature: + * CPUID Fn8000_001F[EAX] + * - Bit 0 - Secure Memory Encryption support + * - Bit 1 - Secure Encrypted Virtualization support + * CPUID Fn8000_001F[EBX] + * - Bits 5:0 - Pagetable bit position used to indicate encryption + */ + eax = 0x8000001f; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + if (!(eax & feature_mask)) + return; + + me_mask = 1UL << (ebx & 0x3f); + + /* Check if memory encryption is enabled */ + if (feature_mask == AMD_SME_BIT) { + /* For SME, check the SYSCFG MSR */ + msr = __rdmsr(MSR_K8_SYSCFG); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + } else { + /* For SEV, check the SEV MSR */ + msr = __rdmsr(MSR_AMD64_SEV); + if (!(msr & MSR_AMD64_SEV_ENABLED)) + return; + + /* SEV state cannot be controlled by a command line option */ + sme_me_mask = me_mask; + sev_enabled = true; + return; + } + + /* + * Fixups have not been applied to phys_base yet and we're running + * identity mapped, so we must obtain the address to the SME command + * line argument data using rip-relative addressing. + */ + asm ("lea sme_cmdline_arg(%%rip), %0" + : "=r" (cmdline_arg) + : "p" (sme_cmdline_arg)); + asm ("lea sme_cmdline_on(%%rip), %0" + : "=r" (cmdline_on) + : "p" (sme_cmdline_on)); + asm ("lea sme_cmdline_off(%%rip), %0" + : "=r" (cmdline_off) + : "p" (sme_cmdline_off)); + + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT)) + active_by_default = true; + else + active_by_default = false; + + cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr | + ((u64)bp->ext_cmd_line_ptr << 32)); + + cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)); + + if (!strncmp(buffer, cmdline_on, sizeof(buffer))) + sme_me_mask = me_mask; + else if (!strncmp(buffer, cmdline_off, sizeof(buffer))) + sme_me_mask = 0; + else + sme_me_mask = active_by_default ? me_mask : 0; +} diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 155ecba..48c5912 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -90,9 +90,10 @@ unsigned long arch_mmap_rnd(void) return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits); } -static unsigned long mmap_base(unsigned long rnd, unsigned long task_size) +static unsigned long mmap_base(unsigned long rnd, unsigned long task_size, + struct rlimit *rlim_stack) { - unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; unsigned long gap_min, gap_max; @@ -126,16 +127,17 @@ static unsigned long mmap_legacy_base(unsigned long rnd, * process VM image, sets up which VM layout function to use: */ static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base, - unsigned long random_factor, unsigned long task_size) + unsigned long random_factor, unsigned long task_size, + struct rlimit *rlim_stack) { *legacy_base = mmap_legacy_base(random_factor, task_size); if (mmap_is_legacy()) *base = *legacy_base; else - *base = mmap_base(random_factor, task_size); + *base = mmap_base(random_factor, task_size, rlim_stack); } -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { if (mmap_is_legacy()) mm->get_unmapped_area = arch_get_unmapped_area; @@ -143,7 +145,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->get_unmapped_area = arch_get_unmapped_area_topdown; arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base, - arch_rnd(mmap64_rnd_bits), task_size_64bit(0)); + arch_rnd(mmap64_rnd_bits), task_size_64bit(0), + rlim_stack); #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES /* @@ -153,7 +156,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * mmap_base, the compat syscall uses mmap_compat_base. */ arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base, - arch_rnd(mmap32_rnd_bits), task_size_32bit()); + arch_rnd(mmap32_rnd_bits), task_size_32bit(), + rlim_stack); #endif } diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index aca6295..e8a4a09 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -60,17 +60,6 @@ void memory_present(int nid, unsigned long start, unsigned long end) } printk(KERN_CONT "\n"); } - -unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long nr_pages = end_pfn - start_pfn; - - if (!nr_pages) - return 0; - - return (nr_pages + 1) * sizeof(struct page); -} #endif extern unsigned long highend_pfn, highstart_pfn; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 004abf9..34cda7e 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd) return 0; } + +/** + * pud_free_pmd_page - Clear pud entry and free pmd page. + * @pud: Pointer to a PUD. + * + * Context: The pud range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pud_free_pmd_page(pud_t *pud) +{ + pmd_t *pmd; + int i; + + if (pud_none(*pud)) + return 1; + + pmd = (pmd_t *)pud_page_vaddr(*pud); + + for (i = 0; i < PTRS_PER_PMD; i++) + if (!pmd_free_pte_page(&pmd[i])) + return 0; + + pud_clear(pud); + free_page((unsigned long)pmd); + + return 1; +} + +/** + * pmd_free_pte_page - Clear pmd entry and free pte page. + * @pmd: Pointer to a PMD. + * + * Context: The pmd range has been unmaped and TLB purged. + * Return: 1 if clearing the entry succeeded. 0 otherwise. + */ +int pmd_free_pte_page(pmd_t *pmd) +{ + pte_t *pte; + + if (pmd_none(*pmd)) + return 1; + + pte = (pte_t *)pmd_page_vaddr(*pmd); + pmd_clear(pmd); + free_page((unsigned long)pte); + + return 1; +} #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index ce38f16..631507f 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -332,7 +332,7 @@ static void __init pti_clone_user_shared(void) } /* - * Clone the ESPFIX P4D into the user space visinble page table + * Clone the ESPFIX P4D into the user space visible page table */ static void __init pti_setup_espfix64(void) { diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 7f1a513..e055d1a 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) unsigned long sp = current_stack_pointer; pgd_t *pgd = pgd_offset(mm, sp); - if (CONFIG_PGTABLE_LEVELS > 4) { + if (pgtable_l5_enabled) { if (unlikely(pgd_none(*pgd))) { pgd_t *pgd_ref = pgd_offset_k(sp); @@ -613,7 +613,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, { int cpu; - struct flush_tlb_info info = { + struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = { .mm = mm, }; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4923d92..b7251541 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -11,10 +11,11 @@ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/if_vlan.h> -#include <asm/cacheflush.h> -#include <asm/set_memory.h> #include <linux/bpf.h> +#include <asm/set_memory.h> +#include <asm/nospec-branch.h> + /* * assembly code in arch/x86/net/bpf_jit.S */ @@ -60,7 +61,12 @@ static bool is_imm8(int value) static bool is_simm32(s64 value) { - return value == (s64) (s32) value; + return value == (s64)(s32)value; +} + +static bool is_uimm32(u64 value) +{ + return value == (u64)(u32)value; } /* mov dst, src */ @@ -97,16 +103,6 @@ static int bpf_size_to_x86_bytes(int bpf_size) #define X86_JLE 0x7E #define X86_JG 0x7F -static void bpf_flush_icache(void *start, void *end) -{ - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - smp_wmb(); - flush_icache_range((unsigned long)start, (unsigned long)end); - set_fs(old_fs); -} - #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) @@ -211,7 +207,7 @@ struct jit_context { /* emit x64 prologue code for BPF program and check it's size. * bpf_tail_call helper will skip it while jumping into another program */ -static void emit_prologue(u8 **pprog, u32 stack_depth) +static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) { u8 *prog = *pprog; int cnt = 0; @@ -246,18 +242,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth) /* mov qword ptr [rbp+24],r15 */ EMIT4(0x4C, 0x89, 0x7D, 24); - /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls - * we need to reset the counter to 0. It's done in two instructions, - * resetting rax register to 0 (xor on eax gets 0 extended), and - * moving it to the counter location. - */ + if (!ebpf_from_cbpf) { + /* Clear the tail call counter (tail_call_cnt): for eBPF tail + * calls we need to reset the counter to 0. It's done in two + * instructions, resetting rax register to 0, and moving it + * to the counter location. + */ - /* xor eax, eax */ - EMIT2(0x31, 0xc0); - /* mov qword ptr [rbp+32], rax */ - EMIT4(0x48, 0x89, 0x45, 32); + /* xor eax, eax */ + EMIT2(0x31, 0xc0); + /* mov qword ptr [rbp+32], rax */ + EMIT4(0x48, 0x89, 0x45, 32); + + BUILD_BUG_ON(cnt != PROLOGUE_SIZE); + } - BUILD_BUG_ON(cnt != PROLOGUE_SIZE); *pprog = prog; } @@ -290,7 +289,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 43 /* number of bytes to jump */ +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -299,7 +298,7 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 32 +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ @@ -313,7 +312,7 @@ static void emit_bpf_tail_call(u8 **pprog) * goto out; */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ -#define OFFSET3 10 +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; @@ -326,7 +325,7 @@ static void emit_bpf_tail_call(u8 **pprog) * rdi == ctx (1st arg) * rax == prog->bpf_func + prologue_size */ - EMIT2(0xFF, 0xE0); /* jmp rax */ + RETPOLINE_RAX_BPF_JIT(); /* out: */ BUILD_BUG_ON(cnt - label1 != OFFSET1); @@ -355,6 +354,86 @@ static void emit_load_skb_data_hlen(u8 **pprog) *pprog = prog; } +static void emit_mov_imm32(u8 **pprog, bool sign_propagate, + u32 dst_reg, const u32 imm32) +{ + u8 *prog = *pprog; + u8 b1, b2, b3; + int cnt = 0; + + /* optimization: if imm32 is positive, use 'mov %eax, imm32' + * (which zero-extends imm32) to save 2 bytes. + */ + if (sign_propagate && (s32)imm32 < 0) { + /* 'mov %rax, imm32' sign extends imm32 */ + b1 = add_1mod(0x48, dst_reg); + b2 = 0xC7; + b3 = 0xC0; + EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); + goto done; + } + + /* optimization: if imm32 is zero, use 'xor %eax, %eax' + * to save 3 bytes. + */ + if (imm32 == 0) { + if (is_ereg(dst_reg)) + EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + b2 = 0x31; /* xor */ + b3 = 0xC0; + EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); + goto done; + } + + /* mov %eax, imm32 */ + if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); +done: + *pprog = prog; +} + +static void emit_mov_imm64(u8 **pprog, u32 dst_reg, + const u32 imm32_hi, const u32 imm32_lo) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { + /* For emitting plain u32, where sign bit must not be + * propagated LLVM tends to load imm64 over mov32 + * directly, so save couple of bytes by just doing + * 'mov %eax, imm32' instead. + */ + emit_mov_imm32(&prog, false, dst_reg, imm32_lo); + } else { + /* movabsq %rax, imm64 */ + EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); + EMIT(imm32_lo, 4); + EMIT(imm32_hi, 4); + } + + *pprog = prog; +} + +static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (is64) { + /* mov dst, src */ + EMIT_mov(dst_reg, src_reg); + } else { + /* mov32 dst, src */ + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); + } + + *pprog = prog; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@ -368,7 +447,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int proglen = 0; u8 *prog = temp; - emit_prologue(&prog, bpf_prog->aux->stack_depth); + emit_prologue(&prog, bpf_prog->aux->stack_depth, + bpf_prog_was_classic(bpf_prog)); if (seen_ld_abs) emit_load_skb_data_hlen(&prog); @@ -377,7 +457,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, const s32 imm32 = insn->imm; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; - u8 b1 = 0, b2 = 0, b3 = 0; + u8 b2 = 0, b3 = 0; s64 jmp_offset; u8 jmp_cond; bool reload_skb_data; @@ -413,16 +493,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); break; - /* mov dst, src */ case BPF_ALU64 | BPF_MOV | BPF_X: - EMIT_mov(dst_reg, src_reg); - break; - - /* mov32 dst, src */ case BPF_ALU | BPF_MOV | BPF_X: - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT1(add_2mod(0x40, dst_reg, src_reg)); - EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); + emit_mov_reg(&prog, + BPF_CLASS(insn->code) == BPF_ALU64, + dst_reg, src_reg); break; /* neg dst */ @@ -485,58 +560,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; case BPF_ALU64 | BPF_MOV | BPF_K: - /* optimization: if imm32 is positive, - * use 'mov eax, imm32' (which zero-extends imm32) - * to save 2 bytes - */ - if (imm32 < 0) { - /* 'mov rax, imm32' sign extends imm32 */ - b1 = add_1mod(0x48, dst_reg); - b2 = 0xC7; - b3 = 0xC0; - EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); - break; - } - case BPF_ALU | BPF_MOV | BPF_K: - /* optimization: if imm32 is zero, use 'xor <dst>,<dst>' - * to save 3 bytes. - */ - if (imm32 == 0) { - if (is_ereg(dst_reg)) - EMIT1(add_2mod(0x40, dst_reg, dst_reg)); - b2 = 0x31; /* xor */ - b3 = 0xC0; - EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); - break; - } - - /* mov %eax, imm32 */ - if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); - EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); + emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64, + dst_reg, imm32); break; case BPF_LD | BPF_IMM | BPF_DW: - /* optimization: if imm64 is zero, use 'xor <dst>,<dst>' - * to save 7 bytes. - */ - if (insn[0].imm == 0 && insn[1].imm == 0) { - b1 = add_2mod(0x48, dst_reg, dst_reg); - b2 = 0x31; /* xor */ - b3 = 0xC0; - EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg)); - - insn++; - i++; - break; - } - - /* movabsq %rax, imm64 */ - EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); - EMIT(insn[0].imm, 4); - EMIT(insn[1].imm, 4); - + emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm); insn++; i++; break; @@ -593,36 +623,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_X: - EMIT1(0x50); /* push rax */ - EMIT1(0x52); /* push rdx */ + { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + + if (dst_reg != BPF_REG_0) + EMIT1(0x50); /* push rax */ + if (dst_reg != BPF_REG_3) + EMIT1(0x52); /* push rdx */ /* mov r11, dst_reg */ EMIT_mov(AUX_REG, dst_reg); if (BPF_SRC(insn->code) == BPF_X) - /* mov rax, src_reg */ - EMIT_mov(BPF_REG_0, src_reg); + emit_mov_reg(&prog, is64, BPF_REG_0, src_reg); else - /* mov rax, imm32 */ - EMIT3_off32(0x48, 0xC7, 0xC0, imm32); + emit_mov_imm32(&prog, is64, BPF_REG_0, imm32); - if (BPF_CLASS(insn->code) == BPF_ALU64) + if (is64) EMIT1(add_1mod(0x48, AUX_REG)); else if (is_ereg(AUX_REG)) EMIT1(add_1mod(0x40, AUX_REG)); /* mul(q) r11 */ EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); - /* mov r11, rax */ - EMIT_mov(AUX_REG, BPF_REG_0); - - EMIT1(0x5A); /* pop rdx */ - EMIT1(0x58); /* pop rax */ - - /* mov dst_reg, r11 */ - EMIT_mov(dst_reg, AUX_REG); + if (dst_reg != BPF_REG_3) + EMIT1(0x5A); /* pop rdx */ + if (dst_reg != BPF_REG_0) { + /* mov dst_reg, rax */ + EMIT_mov(dst_reg, BPF_REG_0); + EMIT1(0x58); /* pop rax */ + } break; - + } /* shifts */ case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_K: @@ -640,7 +672,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_RSH: b3 = 0xE8; break; case BPF_ARSH: b3 = 0xF8; break; } - EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); + + if (imm32 == 1) + EMIT2(0xD1, add_1reg(b3, dst_reg)); + else + EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU | BPF_LSH | BPF_X: @@ -1187,7 +1223,7 @@ skip_init_addrs: * may converge on the last pass. In such case do one more * pass to emit the final image */ - for (pass = 0; pass < 10 || image; pass++) { + for (pass = 0; pass < 20 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { image = NULL; @@ -1214,13 +1250,13 @@ skip_init_addrs: } } oldproglen = proglen; + cond_resched(); } if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, proglen, pass + 1, image); if (image) { - bpf_flush_icache(header, image + proglen); if (!prog->is_func || extra_pass) { bpf_jit_binary_lock_ro(header); } else { diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 174c597..a7a7677 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -460,7 +460,7 @@ static int nmi_setup(void) goto fail; for_each_possible_cpu(cpu) { - if (!cpu) + if (!IS_ENABLED(CONFIG_SMP) || !cpu) continue; memcpy(per_cpu(cpu_msrs, cpu).counters, diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 7df49c4..5559dca 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -140,12 +140,10 @@ static const struct dmi_system_id pci_crs_quirks[] __initconst = { void __init pci_acpi_crs_quirks(void) { - int year; + int year = dmi_get_bios_year(); - if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008) { - if (iomem_resource.end <= 0xffffffff) - pci_use_crs = false; - } + if (year >= 0 && year < 2008 && iomem_resource.end <= 0xffffffff) + pci_use_crs = false; dmi_check_system(pci_crs_quirks); diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 2d95033..a51074c 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -195,14 +195,13 @@ static const struct pci_raw_ops pci_direct_conf2 = { static int __init pci_sanity_check(const struct pci_raw_ops *o) { u32 x = 0; - int year, devfn; + int devfn; if (pci_probe & PCI_NO_CHECKS) return 1; /* Assume Type 1 works for newer systems. This handles machines that don't have anything on PCI Bus 0. */ - dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL); - if (year >= 2001) + if (dmi_get_bios_year() >= 2001) return 1; for (devfn = 0; devfn < 0x100; devfn++) { diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 1cb01ab..dfbe6ac 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -4,6 +4,7 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/pci.h> +#include <asm/jailhouse_para.h> #include <asm/pci_x86.h> /* @@ -34,13 +35,14 @@ int __init pci_legacy_init(void) void pcibios_scan_specific_bus(int busn) { + int stride = jailhouse_paravirt() ? 1 : 8; int devfn; u32 l; if (pci_find_bus(0, busn)) return; - for (devfn = 0; devfn < 256; devfn += 8) { + for (devfn = 0; devfn < 256; devfn += stride) { if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) && l != 0x0000 && l != 0xffff) { DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 96684d0..7389db5 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -94,8 +94,8 @@ static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start, return new; } -static struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start, - int end, u64 addr) +struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start, + int end, u64 addr) { struct pci_mmcfg_region *new; @@ -547,19 +547,14 @@ static void __init pci_mmcfg_reject_broken(int early) static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, struct acpi_mcfg_allocation *cfg) { - int year; - if (cfg->address < 0xFFFFFFFF) return 0; if (!strncmp(mcfg->header.oem_id, "SGI", 3)) return 0; - if (mcfg->header.revision >= 1) { - if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && - year >= 2010) - return 0; - } + if ((mcfg->header.revision >= 1) && (dmi_get_bios_year() >= 2010)) + return 0; pr_err(PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " "is above 4GB, ignored\n", cfg->pci_segment, diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 75577c1..7a5bafb 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev) return p; } -/** - * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers - * returns virtual address. This is the only "special" function here. - * @dev: PCI device - * @size: Size of the buffer - * @dma_handle: DMA address - * @flags: memory flags - */ -static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, - size_t size, - dma_addr_t *dma_handle, - gfp_t flags, - unsigned long attrs) -{ - void *vaddr; - - vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs); - *dma_handle = p2a(*dma_handle, to_pci_dev(dev)); - return vaddr; -} - -/* We have our own dma_ops: the same as swiotlb but from alloc (above) */ -static const struct dma_map_ops sta2x11_dma_ops = { - .alloc = sta2x11_swiotlb_alloc_coherent, - .free = x86_swiotlb_free_coherent, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .mapping_error = swiotlb_dma_mapping_error, - .dma_supported = x86_dma_supported, -}; - /* At setup time, we use our own ops if the device is a ConneXt one */ static void sta2x11_setup_pdev(struct pci_dev *pdev) { @@ -205,7 +168,8 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev) return; pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); - pdev->dev.dma_ops = &sta2x11_dma_ops; + pdev->dev.dma_ops = &swiotlb_dma_ops; + pdev->dev.archdata.is_sta2x11 = true; /* We must enable all devices as master, for audio DMA to work */ pci_set_master(pdev); @@ -225,7 +189,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { struct sta2x11_mapping *map; - if (dev->dma_ops != &sta2x11_dma_ops) { + if (!dev->archdata.is_sta2x11) { if (!dev->dma_mask) return false; return addr + size - 1 <= *dev->dma_mask; @@ -243,13 +207,13 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) } /** - * phys_to_dma - Return the DMA AMBA address used for this STA2x11 device + * __phys_to_dma - Return the DMA AMBA address used for this STA2x11 device * @dev: device for a PCI device * @paddr: Physical address */ -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { - if (dev->dma_ops != &sta2x11_dma_ops) + if (!dev->archdata.is_sta2x11) return paddr; return p2a(paddr, to_pci_dev(dev)); } @@ -259,9 +223,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) * @dev: device for a PCI device * @daddr: STA2x11 AMBA DMA address */ -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { - if (dev->dma_ops != &sta2x11_dma_ops) + if (!dev->archdata.is_sta2x11) return daddr; return a2p(daddr, to_pci_dev(dev)); } diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c index d49d3be..034813d 100644 --- a/arch/x86/platform/atom/punit_atom_debug.c +++ b/arch/x86/platform/atom/punit_atom_debug.c @@ -109,18 +109,7 @@ static int punit_dev_state_show(struct seq_file *seq_file, void *unused) return 0; } - -static int punit_dev_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, punit_dev_state_show, inode->i_private); -} - -static const struct file_operations punit_dev_state_ops = { - .open = punit_dev_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(punit_dev_state); static struct dentry *punit_dbg_file; @@ -132,9 +121,9 @@ static int punit_dbgfs_register(struct punit_device *punit_device) if (!punit_dbg_file) return -ENXIO; - dev_state = debugfs_create_file("dev_power_state", S_IFREG | S_IRUGO, + dev_state = debugfs_create_file("dev_power_state", 0444, punit_dbg_file, punit_device, - &punit_dev_state_ops); + &punit_dev_state_fops); if (!dev_state) { pr_err("punit_dev_state register failed\n"); debugfs_remove(punit_dbg_file); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index c310a82..bed7e7f 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -27,12 +27,14 @@ #include <linux/ioport.h> #include <linux/mc146818rtc.h> #include <linux/efi.h> +#include <linux/export.h> #include <linux/uaccess.h> #include <linux/io.h> #include <linux/reboot.h> #include <linux/slab.h> #include <linux/ucs2_string.h> #include <linux/mem_encrypt.h> +#include <linux/sched/task.h> #include <asm/setup.h> #include <asm/page.h> @@ -81,9 +83,8 @@ pgd_t * __init efi_call_phys_prolog(void) int n_pgds, i, j; if (!efi_enabled(EFI_OLD_MEMMAP)) { - save_pgd = (pgd_t *)__read_cr3(); - write_cr3((unsigned long)efi_scratch.efi_pgt); - goto out; + efi_switch_mm(&efi_mm); + return NULL; } early_code_mapping_set_exec(1); @@ -155,8 +156,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) pud_t *pud; if (!efi_enabled(EFI_OLD_MEMMAP)) { - write_cr3((unsigned long)save_pgd); - __flush_tlb_all(); + efi_switch_mm(efi_scratch.prev_mm); return; } @@ -190,7 +190,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) early_code_mapping_set_exec(0); } -static pgd_t *efi_pgd; +EXPORT_SYMBOL_GPL(efi_mm); /* * We need our own copy of the higher levels of the page tables @@ -203,7 +203,7 @@ static pgd_t *efi_pgd; */ int __init efi_alloc_page_tables(void) { - pgd_t *pgd; + pgd_t *pgd, *efi_pgd; p4d_t *p4d; pud_t *pud; gfp_t gfp_mask; @@ -225,12 +225,16 @@ int __init efi_alloc_page_tables(void) pud = pud_alloc(&init_mm, p4d, EFI_VA_END); if (!pud) { - if (CONFIG_PGTABLE_LEVELS > 4) + if (pgtable_l5_enabled) free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_page((unsigned long)efi_pgd); + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); return -ENOMEM; } + efi_mm.pgd = efi_pgd; + mm_init_cpumask(&efi_mm); + init_new_context(NULL, &efi_mm); + return 0; } @@ -243,6 +247,7 @@ void efi_sync_low_kernel_mappings(void) pgd_t *pgd_k, *pgd_efi; p4d_t *p4d_k, *p4d_efi; pud_t *pud_k, *pud_efi; + pgd_t *efi_pgd = efi_mm.pgd; if (efi_enabled(EFI_OLD_MEMMAP)) return; @@ -255,8 +260,8 @@ void efi_sync_low_kernel_mappings(void) * only span a single PGD entry and that the entry also maps * other important kernel regions. */ - BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); - BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != + MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); + MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != (EFI_VA_END & PGDIR_MASK)); pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); @@ -336,20 +341,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) unsigned long pfn, text, pf; struct page *page; unsigned npages; - pgd_t *pgd; + pgd_t *pgd = efi_mm.pgd; if (efi_enabled(EFI_OLD_MEMMAP)) return 0; /* - * Since the PGD is encrypted, set the encryption mask so that when - * this value is loaded into cr3 the PGD will be decrypted during - * the pagetable walk. - */ - efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd); - pgd = efi_pgd; - - /* * It can happen that the physical address of new_memmap lands in memory * which is not mapped in the EFI page table. Therefore we need to go * and ident-map those pages containing the map before calling @@ -362,8 +359,6 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 1; } - efi_scratch.use_pgd = true; - /* * Certain firmware versions are way too sentimential and still believe * they are exclusive and unquestionable owners of the first physical page, @@ -417,7 +412,7 @@ static void __init __map_region(efi_memory_desc_t *md, u64 va) { unsigned long flags = _PAGE_RW; unsigned long pfn; - pgd_t *pgd = efi_pgd; + pgd_t *pgd = efi_mm.pgd; if (!(md->attribute & EFI_MEMORY_WB)) flags |= _PAGE_PCD; @@ -521,7 +516,7 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len) static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf) { unsigned long pfn; - pgd_t *pgd = efi_pgd; + pgd_t *pgd = efi_mm.pgd; int err1, err2; /* Update the 1:1 mapping */ @@ -618,10 +613,26 @@ void __init efi_dump_pagetable(void) if (efi_enabled(EFI_OLD_MEMMAP)) ptdump_walk_pgd_level(NULL, swapper_pg_dir); else - ptdump_walk_pgd_level(NULL, efi_pgd); + ptdump_walk_pgd_level(NULL, efi_mm.pgd); #endif } +/* + * Makes the calling thread switch to/from efi_mm context. Can be used + * for SetVirtualAddressMap() i.e. current->active_mm == init_mm as well + * as during efi runtime calls i.e current->active_mm == current_mm. + * We are not mm_dropping()/mm_grabbing() any mm, because we are not + * losing/creating any references. + */ +void efi_switch_mm(struct mm_struct *mm) +{ + task_lock(current); + efi_scratch.prev_mm = current->active_mm; + current->active_mm = mm; + switch_mm(efi_scratch.prev_mm, mm, NULL); + task_unlock(current); +} + #ifdef CONFIG_EFI_MIXED extern efi_status_t efi64_thunk(u32, ...); @@ -675,16 +686,13 @@ efi_status_t efi_thunk_set_virtual_address_map( efi_sync_low_kernel_mappings(); local_irq_save(flags); - efi_scratch.prev_cr3 = __read_cr3(); - write_cr3((unsigned long)efi_scratch.efi_pgt); - __flush_tlb_all(); + efi_switch_mm(&efi_mm); func = (u32)(unsigned long)phys_set_virtual_address_map; status = efi64_thunk(func, memory_map_size, descriptor_size, descriptor_version, virtual_map); - write_cr3(efi_scratch.prev_cr3); - __flush_tlb_all(); + efi_switch_mm(efi_scratch.prev_mm); local_irq_restore(flags); return status; diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 189b218..46c58b0 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -33,7 +33,7 @@ ENTRY(efi64_thunk) * Switch to 1:1 mapped 32-bit stack pointer. */ movq %rsp, efi_saved_sp(%rip) - movq efi_scratch+25(%rip), %rsp + movq efi_scratch(%rip), %rsp /* * Calculate the physical address of the kernel text. diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 5b513cc..36c1f8b 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -75,7 +75,7 @@ struct quark_security_header { u32 rsvd[2]; }; -static efi_char16_t efi_dummy_name[6] = { 'D', 'U', 'M', 'M', 'Y', 0 }; +static const efi_char16_t efi_dummy_name[] = L"DUMMY"; static bool efi_no_storage_paranoia; @@ -105,7 +105,8 @@ early_param("efi_no_storage_paranoia", setup_storage_paranoia); */ void efi_delete_dummy_variable(void) { - efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + efi.set_variable((efi_char16_t *)efi_dummy_name, + &EFI_DUMMY_GUID, EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, @@ -177,12 +178,13 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size, * that by attempting to use more space than is available. */ unsigned long dummy_size = remaining_size + 1024; - void *dummy = kzalloc(dummy_size, GFP_ATOMIC); + void *dummy = kzalloc(dummy_size, GFP_KERNEL); if (!dummy) return EFI_OUT_OF_RESOURCES; - status = efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + status = efi.set_variable((efi_char16_t *)efi_dummy_name, + &EFI_DUMMY_GUID, EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS | EFI_VARIABLE_RUNTIME_ACCESS, diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 2c67bae..2ebdf31 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -79,7 +79,7 @@ static void intel_mid_power_off(void) static void intel_mid_reboot(void) { - intel_scu_ipc_simple_command(IPCMSG_COLD_BOOT, 0); + intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0); } static unsigned long __init intel_mid_calibrate_tsc(void) @@ -199,6 +199,12 @@ void __init x86_intel_mid_early_setup(void) legacy_pic = &null_legacy_pic; + /* + * Do nothing for now as everything needed done in + * x86_intel_mid_early_setup() below. + */ + x86_init.acpi.reduced_hw_early_init = x86_init_noop; + pm_power_off = intel_mid_power_off; machine_ops.emergency_restart = intel_mid_reboot; diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c index 17d6d22..49828c2 100644 --- a/arch/x86/platform/intel-quark/imr.c +++ b/arch/x86/platform/intel-quark/imr.c @@ -224,25 +224,7 @@ static int imr_dbgfs_state_show(struct seq_file *s, void *unused) mutex_unlock(&idev->lock); return ret; } - -/** - * imr_state_open - debugfs open callback. - * - * @inode: pointer to struct inode. - * @file: pointer to struct file. - * @return: result of single open. - */ -static int imr_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, imr_dbgfs_state_show, inode->i_private); -} - -static const struct file_operations imr_state_ops = { - .open = imr_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(imr_dbgfs_state); /** * imr_debugfs_register - register debugfs hooks. @@ -252,8 +234,8 @@ static const struct file_operations imr_state_ops = { */ static int imr_debugfs_register(struct imr_device *idev) { - idev->file = debugfs_create_file("imr_state", S_IFREG | S_IRUGO, NULL, - idev, &imr_state_ops); + idev->file = debugfs_create_file("imr_state", 0444, NULL, idev, + &imr_dbgfs_state_fops); return PTR_ERR_OR_ZERO(idev->file); } diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index db77e087..b36caae 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -2255,8 +2255,6 @@ static int __init uv_bau_init(void) init_uvhub(uvhub, vector, uv_base_pnode); } - alloc_intr_gate(vector, uv_bau_message_intr1); - for_each_possible_blade(uvhub) { if (uv_blade_nr_possible_cpus(uvhub)) { unsigned long val; diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 0ef5e520..74a5329 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -50,7 +50,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) { pmd_t *pmd; pud_t *pud; - p4d_t *p4d; + p4d_t *p4d = NULL; /* * The new mapping only has to cover the page containing the image @@ -66,7 +66,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) * tables used by the image kernel. */ - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + if (pgtable_l5_enabled) { p4d = (p4d_t *)get_safe_page(GFP_ATOMIC); if (!p4d) return -ENOMEM; @@ -84,7 +84,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) __pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC)); set_pud(pud + pud_index(restore_jump_address), __pud(__pa(pmd) | _KERNPG_TABLE)); - if (IS_ENABLED(CONFIG_X86_5LEVEL)) { + if (p4d) { set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE)); set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE)); } else { diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 2f15a2a..d70c15d 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -16,7 +16,7 @@ KCOV_INSTRUMENT := n # in turn leaves some undefined symbols like __fentry__ in purgatory and not # sure how to relocate those. Like kexec-tools, use custom flags. -KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large +KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -Os -mcmodel=large KBUILD_CFLAGS += -m$(BITS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index de53bd1..24bb759 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -102,7 +102,7 @@ ENTRY(startup_32) * don't we'll eventually crash trying to execute encrypted * instructions. */ - bt $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags + btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone movl $MSR_K8_SYSCFG, %ecx rdmsr diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 5d73c44..220e978 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -770,9 +770,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, break; case R_X86_64_PC32: + case R_X86_64_PLT32: /* * PC relative relocations don't need to be adjusted unless * referencing a percpu symbol. + * + * NB: R_X86_64_PLT32 can be treated as R_X86_64_PC32. */ if (is_percpu_sym(sym, symname)) add_reloc(&relocs32neg, offset); diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index b7d7340..f31e5d9 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -30,11 +30,7 @@ #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else /* CONFIG_X86_PPRO_FENCE */ #define dma_rmb() barrier() -#endif /* CONFIG_X86_PPRO_FENCE */ #define dma_wmb() barrier() #include <asm-generic/barrier.h> diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index f605825..c1f98f3 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -18,9 +18,6 @@ config XEN_PV bool "Xen PV guest support" default y depends on XEN - # XEN_PV is not ready to work with 5-level paging. - # Changes to hypervisor are also required. - depends on !X86_5LEVEL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU help @@ -79,6 +76,4 @@ config XEN_DEBUG_FS config XEN_PVH bool "Support for running as a PVH guest" depends on XEN && XEN_PVHVM && ACPI - # Pre-built page tables are not ready to handle 5-level paging. - depends on !X86_5LEVEL def_bool n diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index de58533..2163888 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -215,7 +215,7 @@ static void __init xen_apic_check(void) } void __init xen_init_apic(void) { - x86_io_apic_ops.read = xen_io_apic_read; + x86_apic_ops.io_apic_read = xen_io_apic_read; /* On PV guests the APIC CPUID bit is disabled so none of the * routines end up executing. */ if (!xen_initial_domain()) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c047f42..3c2c253 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1376,8 +1376,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (!xen_initial_domain()) { add_preferred_console("xenboot", 0, NULL); - add_preferred_console("tty", 0, NULL); - add_preferred_console("hvc", 0, NULL); if (pci_xen) x86_init.pci.arch_init = pci_xen_init; } else { @@ -1410,6 +1408,10 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_boot_params_init_edd(); } + + add_preferred_console("tty", 0, NULL); + add_preferred_console("hvc", 0, NULL); + #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ pci_probe &= ~PCI_PROBE_BIOS; diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index 436c4f0..aa1c6a68 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -6,6 +6,7 @@ #include <asm/io_apic.h> #include <asm/hypervisor.h> #include <asm/e820/api.h> +#include <asm/x86_init.h> #include <asm/xen/interface.h> #include <asm/xen/hypercall.h> @@ -16,15 +17,20 @@ /* * PVH variables. * - * xen_pvh and pvh_bootparams need to live in data segment since they - * are used after startup_{32|64}, which clear .bss, are invoked. + * xen_pvh pvh_bootparams and pvh_start_info need to live in data segment + * since they are used after startup_{32|64}, which clear .bss, are invoked. */ bool xen_pvh __attribute__((section(".data"))) = 0; struct boot_params pvh_bootparams __attribute__((section(".data"))); +struct hvm_start_info pvh_start_info __attribute__((section(".data"))); -struct hvm_start_info pvh_start_info; unsigned int pvh_start_info_sz = sizeof(pvh_start_info); +static u64 pvh_get_root_pointer(void) +{ + return pvh_start_info.rsdp_paddr; +} + static void __init init_pvh_bootparams(void) { struct xen_memory_map memmap; @@ -71,6 +77,8 @@ static void __init init_pvh_bootparams(void) */ pvh_bootparams.hdr.version = 0x212; pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ + + x86_init.acpi.get_root_pointer = pvh_get_root_pointer; } /* diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index aae88fe..486c0a3 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -116,6 +116,8 @@ DEFINE_PER_CPU(unsigned long, xen_current_cr3); /* actual vcpu cr3 */ static phys_addr_t xen_pt_base, xen_pt_size __initdata; +static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready); + /* * Just beyond the highest usermode address. STACK_TOP_MAX has a * redzone above it, so round it up to a PGD boundary. @@ -155,11 +157,18 @@ void make_lowmem_page_readwrite(void *vaddr) } +/* + * During early boot all page table pages are pinned, but we do not have struct + * pages, so return true until struct pages are ready. + */ static bool xen_page_pinned(void *ptr) { - struct page *page = virt_to_page(ptr); + if (static_branch_likely(&xen_struct_pages_ready)) { + struct page *page = virt_to_page(ptr); - return PagePinned(page); + return PagePinned(page); + } + return true; } static void xen_extend_mmu_update(const struct mmu_update *update) @@ -538,6 +547,22 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) xen_mc_issue(PARAVIRT_LAZY_MMU); } + +#if CONFIG_PGTABLE_LEVELS >= 5 +__visible p4dval_t xen_p4d_val(p4d_t p4d) +{ + return pte_mfn_to_pfn(p4d.p4d); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val); + +__visible p4d_t xen_make_p4d(p4dval_t p4d) +{ + p4d = pte_pfn_to_mfn(p4d); + + return native_make_p4d(p4d); +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d); +#endif /* CONFIG_PGTABLE_LEVELS >= 5 */ #endif /* CONFIG_X86_64 */ static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, @@ -820,11 +845,6 @@ void xen_mm_pin_all(void) spin_unlock(&pgd_lock); } -/* - * The init_mm pagetable is really pinned as soon as its created, but - * that's before we have page structures to store the bits. So do all - * the book-keeping now. - */ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, enum pt_level level) { @@ -832,8 +852,18 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page, return 0; } -static void __init xen_mark_init_mm_pinned(void) +/* + * The init_mm pagetable is really pinned as soon as its created, but + * that's before we have page structures to store the bits. So do all + * the book-keeping now once struct pages for allocated pages are + * initialized. This happens only after free_all_bootmem() is called. + */ +static void __init xen_after_bootmem(void) { + static_branch_enable(&xen_struct_pages_ready); +#ifdef CONFIG_X86_64 + SetPagePinned(virt_to_page(level3_user_vsyscall)); +#endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1607,14 +1637,15 @@ static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot) static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) { - bool pinned = PagePinned(virt_to_page(mm->pgd)); + bool pinned = xen_page_pinned(mm->pgd); trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned); if (pinned) { struct page *page = pfn_to_page(pfn); - SetPagePinned(page); + if (static_branch_likely(&xen_struct_pages_ready)) + SetPagePinned(page); if (!PageHighMem(page)) { xen_mc_batch(); @@ -2348,9 +2379,7 @@ static void __init xen_post_allocator_init(void) #ifdef CONFIG_X86_64 pv_mmu_ops.write_cr3 = &xen_write_cr3; - SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif - xen_mark_init_mm_pinned(); } static void xen_leave_lazy_mmu(void) @@ -2411,6 +2440,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, + +#if CONFIG_PGTABLE_LEVELS >= 5 + .p4d_val = PV_CALLEE_SAVE(xen_p4d_val), + .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), +#endif #endif /* CONFIG_X86_64 */ .activate_mm = xen_activate_mm, @@ -2429,6 +2463,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { x86_init.paging.pagetable_init = xen_pagetable_init; + x86_init.hyper.init_after_bootmem = xen_after_bootmem; pv_mmu_ops = xen_mmu_ops; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d9f96cc..1d83152 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,12 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/tick.h> +#include <linux/percpu-defs.h> #include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/grant_table.h> #include <xen/events.h> +#include <asm/cpufeatures.h> +#include <asm/msr-index.h> #include <asm/xen/hypercall.h> #include <asm/xen/page.h> #include <asm/fixmap.h> @@ -15,6 +18,8 @@ #include "mmu.h" #include "pmu.h" +static DEFINE_PER_CPU(u64, spec_ctrl); + void xen_arch_pre_suspend(void) { xen_save_time_memory_area(); @@ -35,6 +40,9 @@ void xen_arch_post_suspend(int cancelled) static void xen_vcpu_notify_restore(void *data) { + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, this_cpu_read(spec_ctrl)); + /* Boot processor notified via generic timekeeping_resume() */ if (smp_processor_id() == 0) return; @@ -44,7 +52,15 @@ static void xen_vcpu_notify_restore(void *data) static void xen_vcpu_notify_suspend(void *data) { + u64 tmp; + tick_suspend_local(); + + if (xen_pv_domain() && boot_cpu_has(X86_FEATURE_SPEC_CTRL)) { + rdmsrl(MSR_IA32_SPEC_CTRL, tmp); + this_cpu_write(spec_ctrl, tmp); + wrmsrl(MSR_IA32_SPEC_CTRL, 0); + } } void xen_arch_resume(void) |