diff options
author | Jiri Kosina <jkosina@suse.cz> | 2012-10-28 19:28:52 +0100 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2012-10-28 19:29:19 +0100 |
commit | 3bd7bf1f0fe14f591c089ae61bbfa9bd356f178a (patch) | |
tree | 0058693cc9e70b7461dae551f8a19aff2efd13ca /arch/x86 | |
parent | f16f84937d769c893492160b1a8c3672e3992beb (diff) | |
parent | e657e078d3dfa9f96976db7a2b5fd7d7c9f1f1a6 (diff) | |
download | op-kernel-dev-3bd7bf1f0fe14f591c089ae61bbfa9bd356f178a.zip op-kernel-dev-3bd7bf1f0fe14f591c089ae61bbfa9bd356f178a.tar.gz |
Merge branch 'master' into for-next
Sync up with Linus' tree to be able to apply Cesar's patch
against newer version of the code.
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Diffstat (limited to 'arch/x86')
263 files changed, 9404 insertions, 4989 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 50a1d1f..46c3bff 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -7,11 +7,14 @@ config 64BIT Say no to build a 32-bit kernel - formerly known as i386 config X86_32 - def_bool !64BIT + def_bool y + depends on !64BIT select CLKSRC_I8253 + select HAVE_UID16 config X86_64 - def_bool 64BIT + def_bool y + depends on 64BIT select X86_DEV_DMA_OPS ### Arch settings @@ -36,6 +39,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER @@ -43,6 +47,7 @@ config X86 select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_SYSCALL_TRACEPOINTS + select SYSCTL_EXCEPTION_TRACE select HAVE_KVM select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK @@ -60,6 +65,9 @@ config X86 select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP + select HAVE_DEBUG_KMEMLEAK select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 @@ -80,6 +88,7 @@ config X86 select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP select HAVE_BPF_JIT if X86_64 + select HAVE_ARCH_TRANSPARENT_HUGEPAGE select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP @@ -97,9 +106,16 @@ config X86 select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_RCU_USER_QS if X86_64 + select HAVE_IRQ_TIME_ACCOUNTING + select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE + select MODULES_USE_ELF_REL if X86_32 + select MODULES_USE_ELF_RELA if X86_64 config INSTRUCTION_DECODER - def_bool (KPROBES || PERF_EVENTS || UPROBES) + def_bool y + depends on KPROBES || PERF_EVENTS || UPROBES config OUTPUT_FORMAT string @@ -127,13 +143,15 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG) + def_bool y + depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG config NEED_SG_DMA_LENGTH def_bool y config GENERIC_ISA_DMA - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config GENERIC_BUG def_bool y @@ -150,13 +168,16 @@ config GENERIC_GPIO bool config ARCH_MAY_HAVE_PC_FDC - def_bool ISA_DMA_API + def_bool y + depends on ISA_DMA_API config RWSEM_GENERIC_SPINLOCK - def_bool !X86_XADD + def_bool y + depends on !X86_XADD config RWSEM_XCHGADD_ALGORITHM - def_bool X86_XADD + def_bool y + depends on X86_XADD config GENERIC_CALIBRATE_DELAY def_bool y @@ -573,23 +594,18 @@ config PARAVIRT_TIME_ACCOUNTING source "arch/x86/xen/Kconfig" -config KVM_CLOCK - bool "KVM paravirtualized clock" - select PARAVIRT - select PARAVIRT_CLOCK - ---help--- - Turning on this option will allow you to run a paravirtualized clock - when running over the KVM hypervisor. Instead of relying on a PIT - (or probably other) emulation by the underlying device model, the host - provides the guest with timing infrastructure such as time of day, and - system time - config KVM_GUEST - bool "KVM Guest support" + bool "KVM Guest support (including kvmclock)" + select PARAVIRT select PARAVIRT + select PARAVIRT_CLOCK + default y if PARAVIRT_GUEST ---help--- This option enables various optimizations for running under the KVM - hypervisor. + hypervisor. It includes a paravirtualized clock, so that instead + of relying on a PIT (or probably other) emulation by the + underlying device model, the host provides the guest with + timing infrastructure such as time of day, and system time source "arch/x86/lguest/Kconfig" @@ -752,7 +768,8 @@ config SWIOTLB If unsure, say Y. config IOMMU_HELPER - def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) + def_bool y + depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" @@ -796,17 +813,6 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. -config IRQ_TIME_ACCOUNTING - bool "Fine granularity task level IRQ time accounting" - default n - ---help--- - Select this option to enable fine granularity task irq time - accounting. This is done by reading a timestamp on each - transitions between softirq and hardirq state, so there can be a - small performance impact. - - If in doubt, say N here. - source "kernel/Kconfig.preempt" config X86_UP_APIC @@ -871,6 +877,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS config X86_MCE bool "Machine Check / overheating reporting" + default y ---help--- Machine Check support allows the processor to notify the kernel if it detects a problem (e.g. overheating, data corruption). @@ -982,25 +989,25 @@ config X86_REBOOTFIXUPS Say N otherwise. config MICROCODE - tristate "/dev/cpu/microcode - microcode support" + tristate "CPU microcode loading support" select FW_LOADER ---help--- + If you say Y here, you will be able to update the microcode on certain Intel and AMD processors. The Intel support is for the - IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, - Pentium 4, Xeon etc. The AMD support is for family 0x10 and - 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra. - You will obviously need the actual microcode binary data itself - which is not shipped with the Linux kernel. + IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, + Xeon etc. The AMD support is for families 0x10 and later. You will + obviously need the actual microcode binary data itself which is not + shipped with the Linux kernel. This option selects the general module only, you need to select at least one vendor specific module as well. - To compile this driver as a module, choose M here: the - module will be called microcode. + To compile this driver as a module, choose M here: the module + will be called microcode. config MICROCODE_INTEL - bool "Intel microcode patch loading support" + bool "Intel microcode loading support" depends on MICROCODE default MICROCODE select FW_LOADER @@ -1013,7 +1020,7 @@ config MICROCODE_INTEL <http://www.urbanmyth.org/microcode/>. config MICROCODE_AMD - bool "AMD microcode patch loading support" + bool "AMD microcode loading support" depends on MICROCODE select FW_LOADER ---help--- @@ -1159,10 +1166,12 @@ config X86_PAE consumes more pagetable space per process. config ARCH_PHYS_ADDR_T_64BIT - def_bool X86_64 || X86_PAE + def_bool y + depends on X86_64 || X86_PAE config ARCH_DMA_ADDR_T_64BIT - def_bool X86_64 || HIGHMEM64G + def_bool y + depends on X86_64 || HIGHMEM64G config DIRECT_GBPAGES bool "Enable 1GB pages for kernel pagetables" if EXPERT @@ -1285,8 +1294,8 @@ config ARCH_SELECT_MEMORY_MODEL depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE - def_bool X86_64 - depends on MEMORY_HOTPLUG + def_bool y + depends on X86_64 && MEMORY_HOTPLUG config ARCH_PROC_KCORE_TEXT def_bool y @@ -1487,6 +1496,17 @@ config ARCH_RANDOM If supported, this is a high bandwidth, cryptographically secure hardware random number generator. +config X86_SMAP + def_bool y + prompt "Supervisor Mode Access Prevention" if EXPERT + ---help--- + Supervisor Mode Access Prevention (SMAP) is a security + feature in newer Intel processors. There is a small + performance cost if this enabled and turned on; there is + also a small increase in the kernel size if this is enabled. + + If unsure, say Y. + config EFI bool "EFI runtime service support" depends on ACPI @@ -1975,7 +1995,6 @@ config PCI_MMCONFIG config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT - default n depends on PCI && EXPERIMENTAL help Read the PCI windows out of the CNB20LE host bridge. This allows @@ -2157,6 +2176,7 @@ config IA32_EMULATION bool "IA32 Emulation" depends on X86_64 select COMPAT_BINFMT_ELF + select HAVE_UID16 ---help--- Include code to run legacy 32-bit programs under a 64-bit kernel. You should likely turn this on, unless you're @@ -2186,18 +2206,18 @@ config COMPAT depends on IA32_EMULATION || X86_X32 select ARCH_WANT_OLD_COMPAT_IPC +if COMPAT config COMPAT_FOR_U64_ALIGNMENT - def_bool COMPAT - depends on X86_64 + def_bool y config SYSVIPC_COMPAT def_bool y - depends on COMPAT && SYSVIPC + depends on SYSVIPC config KEYS_COMPAT - bool - depends on COMPAT && KEYS - default y + def_bool y + depends on KEYS +endif endmenu diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 706e12e..f3b86d0 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT default X86_L1_CACHE_SHIFT config X86_CMPXCHG - def_bool X86_64 || (X86_32 && !M386) + def_bool y + depends on X86_64 || (X86_32 && !M386) config X86_L1_CACHE_SHIFT int @@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT config X86_XADD def_bool y - depends on X86_64 || !M386 + depends on !M386 config X86_PPRO_FENCE bool "PentiumPro memory ordering errata workaround" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 474ca35..05afcca 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -92,7 +92,7 @@ endif ifdef CONFIG_X86_X32 x32_ld_ok := $(call try-run,\ /bin/echo -e '1: .quad 1b' | \ - $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \ + $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \ $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \ $(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n) ifeq ($(x32_ld_ok),y) diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index f7535be..ccce0ed 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -37,7 +37,8 @@ setup-y += video-bios.o targets += $(setup-y) hostprogs-y := mkcpustr tools/build -HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \ +HOST_EXTRACFLAGS += -I$(srctree)/tools/include \ + -include include/generated/autoconf.h \ -D__EXPORTED_HEADERS__ $(obj)/cpu.o: $(obj)/cpustr.h @@ -52,7 +53,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE # How to compile the 16-bit code. Note we always compile for -march=i386, # that way we can complain to the user if the CPU is insufficient. -KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ +KBUILD_CFLAGS := $(USERINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes \ -march=i386 -mregparm=3 \ diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index e398bb5..8a84501 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ $(obj)/piggy.o +$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone +$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone + ifeq ($(CONFIG_EFI_STUB), y) VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o endif diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index b3e0227..c760e07 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -276,8 +276,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, nr_gops = size / sizeof(void *); for (i = 0; i < nr_gops; i++) { struct efi_graphics_output_mode_info *info; - efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; - void *pciio; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy; void *h = gop_handle[i]; status = efi_call_phys3(sys_table->boottime->handle_protocol, @@ -285,19 +286,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, if (status != EFI_SUCCESS) continue; - efi_call_phys3(sys_table->boottime->handle_protocol, - h, &pciio_proto, &pciio); + status = efi_call_phys3(sys_table->boottime->handle_protocol, + h, &conout_proto, &dummy); + + if (status == EFI_SUCCESS) + conout_found = true; status = efi_call_phys4(gop->query_mode, gop, gop->mode->mode, &size, &info); - if (status == EFI_SUCCESS && (!first_gop || pciio)) { + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* - * Apple provide GOPs that are not backed by - * real hardware (they're used to handle - * multiple displays). The workaround is to - * search for a GOP implementing the PCIIO - * protocol, and if one isn't found, to just - * fallback to the first GOP. + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. */ width = info->horizontal_resolution; height = info->vertical_resolution; @@ -308,10 +311,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, pixels_per_scan_line = info->pixels_per_scan_line; /* - * Once we've found a GOP supporting PCIIO, + * Once we've found a GOP supporting ConOut, * don't bother looking any further. */ - if (pciio) + if (conout_found) break; first_gop = gop; @@ -328,7 +331,6 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->lfb_width = width; si->lfb_height = height; si->lfb_base = fb_base; - si->lfb_size = fb_size; si->pages = 1; if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { @@ -376,6 +378,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->rsvd_pos = 0; } + si->lfb_size = si->lfb_linelength * si->lfb_height; + + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; + free_handle: efi_call_phys1(sys_table->boottime->free_pool, gop_handle); return status; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 3b6e156..e5b0a8f 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -14,6 +14,10 @@ #define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT) #define EFI_READ_CHUNK_SIZE (1024 * 1024) +#define EFI_CONSOLE_OUT_DEVICE_GUID \ + EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \ + 0x3f, 0xc1, 0x4d) + #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0 #define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1 #define PIXEL_BIT_MASK 2 diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index b4e15dd..2a01744 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -32,10 +32,6 @@ SYSSEG = 0x1000 /* historical load address >> 4 */ #define SVGA_MODE ASK_VGA #endif -#ifndef RAMDISK -#define RAMDISK 0 -#endif - #ifndef ROOT_RDONLY #define ROOT_RDONLY 1 #endif diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c index 919257f..4579eff 100644 --- a/arch/x86/boot/mkcpustr.c +++ b/arch/x86/boot/mkcpustr.c @@ -15,6 +15,8 @@ #include <stdio.h> +#include "../include/asm/required-features.h" +#include "../include/asm/cpufeature.h" #include "../kernel/cpu/capflags.c" int main(void) diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 119db67..5598547 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_X86_GENERIC=y CONFIG_HPET_TIMER=y @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -231,8 +229,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -243,11 +239,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_RTC_CLASS=y CONFIG_DMADEVICES=y CONFIG_EEEPC_LAPTOP=y CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -299,13 +293,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -316,4 +308,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_CRYPTO_AES_586=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 76eb290..671524d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y @@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y CONFIG_SUN_PARTITION=y CONFIG_KARMA_PARTITION=y CONFIG_EFI_PARTITION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_SMP=y CONFIG_CALGARY_IOMMU=y CONFIG_NR_CPUS=64 @@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y @@ -227,8 +225,6 @@ CONFIG_SND_HRTIMER=y CONFIG_SND_HDA_INTEL=y CONFIG_SND_HDA_HWDEP=y CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_NTRIG=y @@ -239,11 +235,11 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_DEBUG=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_TT_NEWSCHED is not set @@ -262,10 +258,9 @@ CONFIG_AMD_IOMMU_STATS=y CONFIG_INTEL_IOMMU=y # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set CONFIG_EFI_VARS=y -CONFIG_EXT3_FS=y -# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -280,7 +275,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -298,13 +292,11 @@ CONFIG_DEBUG_KERNEL=y CONFIG_SCHEDSTATS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_STACK_USAGE=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_RODATA_TEST is not set -CONFIG_DEBUG_NX_TEST=m CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_KEYS_DEBUG_PROC_KEYS=y @@ -314,4 +306,3 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRC_T10DIF=y diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e908e5d..5bacb4a 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -12,6 +12,8 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o +obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o +obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o @@ -32,6 +34,8 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o +cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o +cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 59b37de..aafe8ce 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -40,7 +40,6 @@ static struct crypto_alg aes_alg = { .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 34fdcff..1b9c22b 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -28,6 +28,9 @@ #include <crypto/aes.h> #include <crypto/cryptd.h> #include <crypto/ctr.h> +#include <crypto/b128ops.h> +#include <crypto/lrw.h> +#include <crypto/xts.h> #include <asm/cpu_device_id.h> #include <asm/i387.h> #include <asm/crypto/aes.h> @@ -41,18 +44,10 @@ #define HAS_CTR #endif -#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) -#define HAS_LRW -#endif - #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) #define HAS_PCBC #endif -#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) -#define HAS_XTS -#endif - /* This data is stored at the end of the crypto_tfm struct. * It's a type of per "session" data storage location. * This needs to be 16 byte aligned. @@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data { #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) #define RFC4106_HASH_SUBKEY_SIZE 16 +struct aesni_lrw_ctx { + struct lrw_table_ctx lrw_table; + u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; +}; + +struct aesni_xts_ctx { + u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; + u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1]; +}; + asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, @@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) #endif #endif -#ifdef HAS_LRW -static int ablk_lrw_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))"); -} -#endif - #ifdef HAS_PCBC static int ablk_pcbc_init(struct crypto_tfm *tfm) { @@ -412,12 +410,165 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm) } #endif -#ifdef HAS_XTS -static int ablk_xts_init(struct crypto_tfm *tfm) +static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) { - return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))"); + aesni_ecb_enc(ctx, blks, blks, nbytes); +} + +static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes) +{ + aesni_ecb_dec(ctx, blks, blks, nbytes); +} + +static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key, + keylen - AES_BLOCK_SIZE); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE); +} + +static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm) +{ + struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), + .crypt_fn = lrw_xts_encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = lrw_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = aes_ctx(ctx->raw_aes_ctx), + .crypt_fn = lrw_xts_decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = lrw_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2, + keylen / 2); +} + + +static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in) +{ + aesni_enc(ctx, out, in); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), + .tweak_fn = aesni_xts_tweak, + .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), + .crypt_fn = lrw_xts_encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = xts_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[8]; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = aes_ctx(ctx->raw_tweak_ctx), + .tweak_fn = aesni_xts_tweak, + .crypt_ctx = aes_ctx(ctx->raw_crypt_ctx), + .crypt_fn = lrw_xts_decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + kernel_fpu_begin(); + ret = xts_crypt(desc, dst, src, nbytes, &req); + kernel_fpu_end(); + + return ret; } -#endif #ifdef CONFIG_X86_64 static int rfc4106_init(struct crypto_tfm *tfm) @@ -1035,10 +1186,10 @@ static struct crypto_alg aesni_algs[] = { { }, #endif #endif -#ifdef HAS_LRW +#ifdef HAS_PCBC }, { - .cra_name = "lrw(aes)", - .cra_driver_name = "lrw-aes-aesni", + .cra_name = "pcbc(aes)", + .cra_driver_name = "pcbc-aes-aesni", .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, @@ -1046,12 +1197,12 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_lrw_init, + .cra_init = ablk_pcbc_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, .setkey = ablk_set_key, .encrypt = ablk_encrypt, @@ -1059,10 +1210,50 @@ static struct crypto_alg aesni_algs[] = { { }, }, #endif -#ifdef HAS_PCBC }, { - .cra_name = "pcbc(aes)", - .cra_driver_name = "pcbc-aes-aesni", + .cra_name = "__lrw-aes-aesni", + .cra_driver_name = "__driver-lrw-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesni_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_exit = lrw_aesni_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = lrw_aesni_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-aes-aesni", + .cra_driver_name = "__driver-xts-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aesni_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = xts_aesni_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "lrw(aes)", + .cra_driver_name = "lrw-aes-aesni", .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, @@ -1070,20 +1261,18 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_pcbc_init, + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, + .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, .ivsize = AES_BLOCK_SIZE, .setkey = ablk_set_key, .encrypt = ablk_encrypt, .decrypt = ablk_decrypt, }, }, -#endif -#ifdef HAS_XTS }, { .cra_name = "xts(aes)", .cra_driver_name = "xts-aes-aesni", @@ -1094,7 +1283,7 @@ static struct crypto_alg aesni_algs[] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_init = ablk_xts_init, + .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { .ablkcipher = { @@ -1106,7 +1295,6 @@ static struct crypto_alg aesni_algs[] = { { .decrypt = ablk_decrypt, }, }, -#endif } }; @@ -1118,7 +1306,7 @@ MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); static int __init aesni_init(void) { - int err, i; + int err; if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; @@ -1127,9 +1315,6 @@ static int __init aesni_init(void) if (err) return err; - for (i = 0; i < ARRAY_SIZE(aesni_algs); i++) - INIT_LIST_HEAD(&aesni_algs[i].cra_list); - return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); } diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 7967474..50ec333 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -367,7 +367,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_ctxsize = sizeof(struct bf_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[0].cra_list), .cra_u = { .cipher = { .cia_min_keysize = BF_MIN_KEY_SIZE, @@ -387,7 +386,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, @@ -407,7 +405,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, @@ -428,7 +425,6 @@ static struct crypto_alg bf_algs[4] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(bf_algs[3].cra_list), .cra_u = { .blkcipher = { .min_keysize = BF_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index eeb2b3b..42ffd2b 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -92,715 +92,715 @@ static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) /* camellia sboxes */ const u64 camellia_sp10011110[256] = { - 0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00, - 0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700, - 0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400, - 0x8500008585858500, 0x5700005757575700, 0x3500003535353500, - 0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00, - 0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00, - 0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500, - 0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100, - 0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00, - 0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500, - 0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600, - 0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00, - 0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00, - 0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000, - 0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00, - 0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00, - 0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900, - 0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700, - 0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900, - 0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600, - 0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00, - 0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00, - 0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00, - 0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00, - 0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00, - 0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100, - 0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00, - 0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200, - 0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600, - 0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700, - 0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100, - 0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700, - 0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00, - 0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00, - 0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200, - 0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600, - 0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200, - 0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400, - 0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300, - 0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100, - 0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800, - 0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00, - 0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00, - 0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00, - 0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200, - 0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00, - 0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000, - 0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200, - 0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000, - 0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700, - 0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00, - 0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00, - 0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00, - 0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00, - 0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00, - 0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300, - 0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600, - 0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00, - 0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200, - 0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600, - 0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600, - 0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600, - 0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300, - 0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00, - 0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700, - 0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00, - 0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900, - 0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600, - 0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400, - 0x5900005959595900, 0x7800007878787800, 0x9800009898989800, - 0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700, - 0x4600004646464600, 0x7100007171717100, 0xba0000babababa00, - 0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00, - 0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200, - 0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200, - 0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500, - 0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00, - 0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900, - 0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00, - 0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400, - 0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300, - 0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900, - 0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500, - 0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400, - 0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000, - 0x9e00009e9e9e9e00, + 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, + 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, + 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, + 0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL, + 0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL, + 0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL, + 0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL, + 0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL, + 0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL, + 0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL, + 0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL, + 0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL, + 0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL, + 0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL, + 0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL, + 0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL, + 0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL, + 0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL, + 0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL, + 0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL, + 0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL, + 0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL, + 0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL, + 0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL, + 0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL, + 0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL, + 0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL, + 0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL, + 0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL, + 0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL, + 0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL, + 0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL, + 0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL, + 0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL, + 0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL, + 0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL, + 0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL, + 0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL, + 0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL, + 0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL, + 0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL, + 0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL, + 0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL, + 0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL, + 0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL, + 0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL, + 0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL, + 0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL, + 0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL, + 0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL, + 0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL, + 0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL, + 0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL, + 0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL, + 0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL, + 0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL, + 0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL, + 0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL, + 0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL, + 0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL, + 0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL, + 0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL, + 0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL, + 0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL, + 0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL, + 0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL, + 0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL, + 0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL, + 0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL, + 0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL, + 0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL, + 0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL, + 0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL, + 0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL, + 0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL, + 0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL, + 0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL, + 0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL, + 0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL, + 0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL, + 0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL, + 0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL, + 0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL, + 0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL, + 0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL, + 0x9e00009e9e9e9e00ULL, }; const u64 camellia_sp22000222[256] = { - 0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858, - 0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e, - 0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9, - 0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a, - 0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d, - 0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf, - 0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a, - 0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242, - 0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e, - 0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca, - 0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d, - 0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f, - 0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e, - 0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060, - 0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc, - 0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434, - 0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272, - 0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e, - 0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3, - 0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad, - 0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8, - 0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a, - 0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7, - 0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a, - 0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656, - 0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363, - 0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf, - 0x9898000000989898, 0x9797000000979797, 0x8585000000858585, - 0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec, - 0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f, - 0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3, - 0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf, - 0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474, - 0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636, - 0x2222000000222222, 0x3838000000383838, 0x6464000000646464, - 0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c, - 0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5, - 0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888, - 0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787, - 0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323, - 0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1, - 0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9, - 0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555, - 0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa, - 0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4, - 0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6, - 0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1, - 0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5, - 0x2020000000202020, 0x8989000000898989, 0x0000000000000000, - 0x9090000000909090, 0x4747000000474747, 0xefef000000efefef, - 0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515, - 0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5, - 0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb, - 0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8, - 0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b, - 0x9494000000949494, 0x2121000000212121, 0x6666000000666666, - 0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed, - 0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe, - 0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4, - 0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c, - 0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d, - 0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d, - 0xdede000000dedede, 0x9696000000969696, 0x2626000000262626, - 0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c, - 0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f, - 0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc, - 0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252, - 0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d, - 0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969, - 0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131, - 0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf, - 0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575, - 0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757, - 0x8484000000848484, 0x1111000000111111, 0x4545000000454545, - 0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4, - 0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa, - 0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959, - 0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292, - 0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878, - 0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949, - 0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7, - 0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393, - 0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a, - 0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9, - 0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101, - 0x3d3d0000003d3d3d, + 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, + 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, + 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, + 0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL, + 0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL, + 0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL, + 0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL, + 0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL, + 0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL, + 0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL, + 0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL, + 0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL, + 0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL, + 0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL, + 0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL, + 0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL, + 0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL, + 0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL, + 0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL, + 0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL, + 0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL, + 0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL, + 0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL, + 0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL, + 0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL, + 0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL, + 0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL, + 0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL, + 0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL, + 0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL, + 0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL, + 0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL, + 0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL, + 0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL, + 0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL, + 0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL, + 0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL, + 0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL, + 0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL, + 0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL, + 0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL, + 0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL, + 0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL, + 0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL, + 0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL, + 0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL, + 0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL, + 0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL, + 0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL, + 0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL, + 0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL, + 0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL, + 0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL, + 0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL, + 0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL, + 0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL, + 0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL, + 0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL, + 0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL, + 0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL, + 0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL, + 0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL, + 0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL, + 0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL, + 0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL, + 0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL, + 0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL, + 0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL, + 0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL, + 0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL, + 0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL, + 0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL, + 0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL, + 0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL, + 0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL, + 0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL, + 0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL, + 0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL, + 0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL, + 0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL, + 0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL, + 0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL, + 0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL, + 0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL, + 0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL, + 0x3d3d0000003d3d3dULL, }; const u64 camellia_sp03303033[256] = { - 0x0038380038003838, 0x0041410041004141, 0x0016160016001616, - 0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393, - 0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272, - 0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a, - 0x0075750075007575, 0x0006060006000606, 0x0057570057005757, - 0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7, - 0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2, - 0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090, - 0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7, - 0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2, - 0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343, - 0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7, - 0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f, - 0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818, - 0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f, - 0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d, - 0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c, - 0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3, - 0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec, - 0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b, - 0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636, - 0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686, - 0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd, - 0x0066660066006666, 0x0058580058005858, 0x0096960096009696, - 0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595, - 0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8, - 0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef, - 0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161, - 0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b, - 0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb, - 0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8, - 0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb, - 0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d, - 0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d, - 0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919, - 0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b, - 0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979, - 0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222, - 0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1, - 0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8, - 0x0012120012001212, 0x0004040004000404, 0x0074740074007474, - 0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e, - 0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555, - 0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe, - 0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131, - 0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad, - 0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070, - 0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969, - 0x0008080008000808, 0x0062620062006262, 0x0000000000000000, - 0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb, - 0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545, - 0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d, - 0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee, - 0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e, - 0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6, - 0x0025250025002525, 0x0048480048004848, 0x0099990099009999, - 0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b, - 0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf, - 0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929, - 0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313, - 0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363, - 0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b, - 0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989, - 0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717, - 0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3, - 0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737, - 0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494, - 0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b, - 0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a, - 0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c, - 0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3, - 0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d, - 0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5, - 0x0021210021002121, 0x0044440044004444, 0x0051510051005151, - 0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939, - 0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa, - 0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656, - 0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4, - 0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e, - 0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252, - 0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9, - 0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4, - 0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a, - 0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a, - 0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040, - 0x004f4f004f004f4f, + 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, + 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, + 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, + 0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL, + 0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL, + 0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL, + 0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL, + 0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL, + 0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL, + 0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL, + 0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL, + 0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL, + 0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL, + 0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL, + 0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL, + 0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL, + 0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL, + 0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL, + 0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL, + 0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL, + 0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL, + 0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL, + 0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL, + 0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL, + 0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL, + 0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL, + 0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL, + 0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL, + 0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL, + 0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL, + 0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL, + 0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL, + 0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL, + 0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL, + 0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL, + 0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL, + 0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL, + 0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL, + 0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL, + 0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL, + 0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL, + 0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL, + 0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL, + 0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL, + 0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL, + 0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL, + 0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL, + 0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL, + 0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL, + 0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL, + 0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL, + 0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL, + 0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL, + 0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL, + 0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL, + 0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL, + 0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL, + 0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL, + 0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL, + 0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL, + 0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL, + 0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL, + 0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL, + 0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL, + 0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL, + 0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL, + 0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL, + 0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL, + 0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL, + 0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL, + 0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL, + 0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL, + 0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL, + 0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL, + 0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL, + 0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL, + 0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL, + 0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL, + 0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL, + 0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL, + 0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL, + 0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL, + 0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL, + 0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL, + 0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL, + 0x004f4f004f004f4fULL, }; const u64 camellia_sp00444404[256] = { - 0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3, - 0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057, - 0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023, - 0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5, - 0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d, - 0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af, - 0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e, - 0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b, - 0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5, - 0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a, - 0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b, - 0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0, - 0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0, - 0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb, - 0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d, - 0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004, - 0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de, - 0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c, - 0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe, - 0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a, - 0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060, - 0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0, - 0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054, - 0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064, - 0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3, - 0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6, - 0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087, - 0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090, - 0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d, - 0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8, - 0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081, - 0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063, - 0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f, - 0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9, - 0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078, - 0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071, - 0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088, - 0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9, - 0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036, - 0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1, - 0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb, - 0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad, - 0x0000777777770077, 0x0000808080800080, 0x0000828282820082, - 0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5, - 0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c, - 0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093, - 0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e, - 0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd, - 0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb, - 0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f, - 0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1, - 0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d, - 0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056, - 0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066, - 0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012, - 0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099, - 0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e, - 0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031, - 0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058, - 0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c, - 0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018, - 0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2, - 0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008, - 0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050, - 0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089, - 0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095, - 0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4, - 0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db, - 0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f, - 0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002, - 0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067, - 0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2, - 0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037, - 0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b, - 0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079, - 0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e, - 0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd, - 0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a, - 0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025, - 0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa, - 0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee, - 0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068, - 0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028, - 0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1, - 0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7, - 0x00009e9e9e9e009e, + 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, + 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, + 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, + 0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL, + 0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL, + 0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL, + 0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL, + 0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL, + 0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL, + 0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL, + 0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL, + 0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL, + 0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL, + 0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL, + 0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL, + 0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL, + 0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL, + 0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL, + 0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL, + 0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL, + 0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL, + 0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL, + 0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL, + 0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL, + 0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL, + 0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL, + 0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL, + 0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL, + 0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL, + 0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL, + 0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL, + 0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL, + 0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL, + 0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL, + 0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL, + 0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL, + 0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL, + 0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL, + 0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL, + 0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL, + 0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL, + 0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL, + 0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL, + 0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL, + 0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL, + 0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL, + 0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL, + 0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL, + 0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL, + 0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL, + 0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL, + 0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL, + 0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL, + 0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL, + 0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL, + 0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL, + 0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL, + 0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL, + 0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL, + 0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL, + 0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL, + 0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL, + 0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL, + 0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL, + 0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL, + 0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL, + 0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL, + 0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL, + 0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL, + 0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL, + 0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL, + 0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL, + 0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL, + 0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL, + 0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL, + 0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL, + 0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL, + 0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL, + 0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL, + 0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL, + 0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL, + 0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL, + 0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL, + 0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL, + 0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL, + 0x00009e9e9e9e009eULL, }; const u64 camellia_sp02220222[256] = { - 0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858, - 0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e, - 0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9, - 0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a, - 0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d, - 0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf, - 0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a, - 0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242, - 0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e, - 0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca, - 0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d, - 0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f, - 0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e, - 0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060, - 0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc, - 0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434, - 0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272, - 0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e, - 0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3, - 0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad, - 0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8, - 0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a, - 0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7, - 0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a, - 0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656, - 0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363, - 0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf, - 0x0098989800989898, 0x0097979700979797, 0x0085858500858585, - 0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec, - 0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f, - 0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3, - 0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf, - 0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474, - 0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636, - 0x0022222200222222, 0x0038383800383838, 0x0064646400646464, - 0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c, - 0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5, - 0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888, - 0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787, - 0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323, - 0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1, - 0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9, - 0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555, - 0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa, - 0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4, - 0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6, - 0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1, - 0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5, - 0x0020202000202020, 0x0089898900898989, 0x0000000000000000, - 0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef, - 0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515, - 0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5, - 0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb, - 0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8, - 0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b, - 0x0094949400949494, 0x0021212100212121, 0x0066666600666666, - 0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed, - 0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe, - 0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4, - 0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c, - 0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d, - 0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d, - 0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626, - 0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c, - 0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f, - 0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc, - 0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252, - 0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d, - 0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969, - 0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131, - 0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf, - 0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575, - 0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757, - 0x0084848400848484, 0x0011111100111111, 0x0045454500454545, - 0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4, - 0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa, - 0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959, - 0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292, - 0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878, - 0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949, - 0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7, - 0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393, - 0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a, - 0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9, - 0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101, - 0x003d3d3d003d3d3d, + 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, + 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, + 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, + 0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL, + 0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL, + 0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL, + 0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL, + 0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL, + 0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL, + 0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL, + 0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL, + 0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL, + 0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL, + 0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL, + 0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL, + 0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL, + 0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL, + 0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL, + 0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL, + 0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL, + 0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL, + 0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL, + 0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL, + 0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL, + 0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL, + 0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL, + 0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL, + 0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL, + 0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL, + 0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL, + 0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL, + 0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL, + 0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL, + 0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL, + 0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL, + 0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL, + 0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL, + 0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL, + 0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL, + 0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL, + 0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL, + 0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL, + 0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL, + 0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL, + 0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL, + 0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL, + 0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL, + 0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL, + 0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL, + 0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL, + 0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL, + 0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL, + 0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL, + 0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL, + 0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL, + 0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL, + 0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL, + 0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL, + 0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL, + 0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL, + 0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL, + 0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL, + 0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL, + 0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL, + 0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL, + 0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL, + 0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL, + 0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL, + 0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL, + 0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL, + 0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL, + 0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL, + 0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL, + 0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL, + 0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL, + 0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL, + 0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL, + 0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL, + 0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL, + 0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL, + 0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL, + 0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL, + 0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL, + 0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL, + 0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL, + 0x003d3d3d003d3d3dULL, }; const u64 camellia_sp30333033[256] = { - 0x3800383838003838, 0x4100414141004141, 0x1600161616001616, - 0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393, - 0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272, - 0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a, - 0x7500757575007575, 0x0600060606000606, 0x5700575757005757, - 0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7, - 0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2, - 0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090, - 0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7, - 0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2, - 0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343, - 0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7, - 0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f, - 0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818, - 0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f, - 0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d, - 0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c, - 0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3, - 0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec, - 0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b, - 0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636, - 0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686, - 0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd, - 0x6600666666006666, 0x5800585858005858, 0x9600969696009696, - 0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595, - 0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8, - 0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef, - 0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161, - 0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b, - 0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb, - 0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8, - 0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb, - 0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d, - 0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d, - 0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919, - 0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b, - 0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979, - 0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222, - 0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1, - 0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8, - 0x1200121212001212, 0x0400040404000404, 0x7400747474007474, - 0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e, - 0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555, - 0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe, - 0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131, - 0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad, - 0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070, - 0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969, - 0x0800080808000808, 0x6200626262006262, 0x0000000000000000, - 0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb, - 0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545, - 0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d, - 0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee, - 0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e, - 0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6, - 0x2500252525002525, 0x4800484848004848, 0x9900999999009999, - 0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b, - 0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf, - 0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929, - 0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313, - 0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363, - 0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b, - 0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989, - 0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717, - 0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3, - 0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737, - 0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494, - 0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b, - 0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a, - 0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c, - 0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3, - 0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d, - 0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5, - 0x2100212121002121, 0x4400444444004444, 0x5100515151005151, - 0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939, - 0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa, - 0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656, - 0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4, - 0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e, - 0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252, - 0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9, - 0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4, - 0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a, - 0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a, - 0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040, - 0x4f004f4f4f004f4f, + 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, + 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, + 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, + 0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL, + 0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL, + 0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL, + 0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL, + 0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL, + 0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL, + 0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL, + 0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL, + 0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL, + 0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL, + 0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL, + 0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL, + 0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL, + 0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL, + 0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL, + 0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL, + 0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL, + 0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL, + 0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL, + 0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL, + 0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL, + 0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL, + 0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL, + 0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL, + 0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL, + 0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL, + 0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL, + 0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL, + 0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL, + 0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL, + 0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL, + 0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL, + 0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL, + 0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL, + 0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL, + 0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL, + 0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL, + 0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL, + 0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL, + 0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL, + 0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL, + 0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL, + 0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL, + 0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL, + 0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL, + 0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL, + 0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL, + 0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL, + 0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL, + 0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL, + 0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL, + 0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL, + 0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL, + 0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL, + 0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL, + 0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL, + 0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL, + 0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL, + 0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL, + 0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL, + 0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL, + 0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL, + 0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL, + 0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL, + 0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL, + 0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL, + 0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL, + 0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL, + 0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL, + 0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL, + 0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL, + 0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL, + 0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL, + 0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL, + 0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL, + 0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL, + 0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL, + 0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL, + 0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL, + 0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL, + 0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL, + 0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL, + 0x4f004f4f4f004f4fULL, }; const u64 camellia_sp44044404[256] = { - 0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3, - 0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057, - 0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023, - 0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5, - 0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d, - 0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af, - 0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e, - 0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b, - 0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5, - 0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a, - 0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b, - 0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0, - 0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0, - 0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb, - 0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d, - 0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004, - 0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de, - 0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c, - 0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe, - 0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a, - 0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060, - 0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0, - 0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054, - 0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064, - 0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3, - 0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6, - 0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087, - 0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090, - 0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d, - 0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8, - 0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081, - 0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063, - 0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f, - 0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9, - 0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078, - 0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071, - 0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088, - 0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9, - 0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036, - 0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1, - 0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb, - 0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad, - 0x7777007777770077, 0x8080008080800080, 0x8282008282820082, - 0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5, - 0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c, - 0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093, - 0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e, - 0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd, - 0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb, - 0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f, - 0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1, - 0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d, - 0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056, - 0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066, - 0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012, - 0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099, - 0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e, - 0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031, - 0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058, - 0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c, - 0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018, - 0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2, - 0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008, - 0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050, - 0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089, - 0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095, - 0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4, - 0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db, - 0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f, - 0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002, - 0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067, - 0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2, - 0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037, - 0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b, - 0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079, - 0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e, - 0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd, - 0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a, - 0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025, - 0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa, - 0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee, - 0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068, - 0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028, - 0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1, - 0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7, - 0x9e9e009e9e9e009e, + 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, + 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, + 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, + 0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL, + 0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL, + 0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL, + 0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL, + 0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL, + 0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL, + 0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL, + 0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL, + 0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL, + 0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL, + 0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL, + 0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL, + 0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL, + 0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL, + 0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL, + 0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL, + 0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL, + 0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL, + 0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL, + 0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL, + 0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL, + 0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL, + 0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL, + 0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL, + 0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL, + 0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL, + 0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL, + 0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL, + 0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL, + 0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL, + 0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL, + 0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL, + 0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL, + 0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL, + 0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL, + 0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL, + 0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL, + 0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL, + 0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL, + 0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL, + 0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL, + 0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL, + 0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL, + 0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL, + 0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL, + 0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL, + 0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL, + 0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL, + 0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL, + 0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL, + 0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL, + 0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL, + 0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL, + 0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL, + 0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL, + 0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL, + 0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL, + 0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL, + 0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL, + 0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL, + 0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL, + 0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL, + 0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL, + 0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL, + 0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL, + 0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL, + 0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL, + 0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL, + 0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL, + 0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL, + 0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL, + 0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL, + 0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL, + 0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL, + 0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL, + 0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL, + 0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL, + 0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL, + 0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL, + 0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL, + 0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL, + 0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL, + 0x9e9e009e9e9e009eULL, }; const u64 camellia_sp11101110[256] = { - 0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00, - 0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700, - 0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400, - 0x8585850085858500, 0x5757570057575700, 0x3535350035353500, - 0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00, - 0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00, - 0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500, - 0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100, - 0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00, - 0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500, - 0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600, - 0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00, - 0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00, - 0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000, - 0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00, - 0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00, - 0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900, - 0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700, - 0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900, - 0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600, - 0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00, - 0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00, - 0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00, - 0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00, - 0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00, - 0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100, - 0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00, - 0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200, - 0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600, - 0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700, - 0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100, - 0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700, - 0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00, - 0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00, - 0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200, - 0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600, - 0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200, - 0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400, - 0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300, - 0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100, - 0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800, - 0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00, - 0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00, - 0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00, - 0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200, - 0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00, - 0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000, - 0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200, - 0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000, - 0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700, - 0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00, - 0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00, - 0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00, - 0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00, - 0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00, - 0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300, - 0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600, - 0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00, - 0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200, - 0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600, - 0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600, - 0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600, - 0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300, - 0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00, - 0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700, - 0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00, - 0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900, - 0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600, - 0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400, - 0x5959590059595900, 0x7878780078787800, 0x9898980098989800, - 0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700, - 0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00, - 0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00, - 0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200, - 0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200, - 0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500, - 0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00, - 0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900, - 0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00, - 0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400, - 0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300, - 0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900, - 0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500, - 0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400, - 0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000, - 0x9e9e9e009e9e9e00, + 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, + 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, + 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, + 0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL, + 0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL, + 0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL, + 0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL, + 0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL, + 0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL, + 0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL, + 0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL, + 0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL, + 0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL, + 0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL, + 0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL, + 0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL, + 0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL, + 0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL, + 0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL, + 0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL, + 0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL, + 0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL, + 0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL, + 0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL, + 0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL, + 0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL, + 0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL, + 0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL, + 0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL, + 0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL, + 0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL, + 0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL, + 0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL, + 0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL, + 0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL, + 0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL, + 0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL, + 0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL, + 0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL, + 0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL, + 0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL, + 0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL, + 0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL, + 0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL, + 0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL, + 0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL, + 0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL, + 0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL, + 0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL, + 0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL, + 0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL, + 0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL, + 0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL, + 0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL, + 0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL, + 0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL, + 0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL, + 0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL, + 0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL, + 0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL, + 0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL, + 0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL, + 0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL, + 0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL, + 0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL, + 0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL, + 0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL, + 0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL, + 0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL, + 0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL, + 0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL, + 0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL, + 0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL, + 0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL, + 0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL, + 0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL, + 0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL, + 0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL, + 0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL, + 0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL, + 0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL, + 0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL, + 0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL, + 0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL, + 0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL, + 0x9e9e9e009e9e9e00ULL, }; /* key constants */ @@ -1601,7 +1601,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_ctxsize = sizeof(struct camellia_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[0].cra_list), .cra_u = { .cipher = { .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1621,7 +1620,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1641,7 +1639,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1662,7 +1659,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[3].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE, @@ -1683,7 +1679,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[4].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -1707,7 +1702,6 @@ static struct crypto_alg camellia_algs[6] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(camellia_algs[5].cra_list), .cra_u = { .blkcipher = { .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2, diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S new file mode 100644 index 0000000..a41a3aa --- /dev/null +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -0,0 +1,376 @@ +/* + * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> + * + * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "cast5-avx-x86_64-asm_64.S" + +.extern cast5_s1 +.extern cast5_s2 +.extern cast5_s3 +.extern cast5_s4 + +/* structure of crypto context */ +#define km 0 +#define kr (16*4) +#define rr ((16*4)+16) + +/* s-boxes */ +#define s1 cast5_s1 +#define s2 cast5_s2 +#define s3 cast5_s3 +#define s4 cast5_s4 + +/********************************************************************** + 16-way AVX cast5 + **********************************************************************/ +#define CTX %rdi + +#define RL1 %xmm0 +#define RR1 %xmm1 +#define RL2 %xmm2 +#define RR2 %xmm3 +#define RL3 %xmm4 +#define RR3 %xmm5 +#define RL4 %xmm6 +#define RR4 %xmm7 + +#define RX %xmm8 + +#define RKM %xmm9 +#define RKR %xmm10 +#define RKRF %xmm11 +#define RKRR %xmm12 + +#define R32 %xmm13 +#define R1ST %xmm14 + +#define RTMP %xmm15 + +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + +#define RFS1 %r8 +#define RFS1d %r8d +#define RFS2 %r9 +#define RFS2d %r9d +#define RFS3 %r10 +#define RFS3d %r10d + + +#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + shrq $16, src; \ + movl s1(, RID1, 4), dst ## d; \ + op1 s2(, RID2, 4), dst ## d; \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + interleave_op(il_reg); \ + op2 s3(, RID1, 4), dst ## d; \ + op3 s4(, RID2, 4), dst ## d; + +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define F_head(a, x, gi1, gi2, op0) \ + op0 a, RKM, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ + vpor RTMP, x, x; \ + \ + vmovq x, gi1; \ + vpextrq $1, x, gi2; + +#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ + lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ + lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ + \ + lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ + shlq $32, RFS1; \ + orq RFS1, RFS3; \ + \ + vmovq RFS2, x; \ + vpinsrq $1, RFS3, x, x; + +#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ + F_head(b1, RX, RGI1, RGI2, op0); \ + F_head(b2, RX, RGI3, RGI4, op0); \ + \ + F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ + F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ + \ + vpxor a1, RX, a1; \ + vpxor a2, RTMP, a2; + +#define F1_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) +#define F2_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) +#define F3_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) + +#define subround(a1, b1, a2, b2, f) \ + F ## f ## _2(a1, b1, a2, b2); + +#define round(l, r, n, f) \ + vbroadcastss (km+(4*n))(CTX), RKM; \ + vpand R1ST, RKR, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + vpsrldq $1, RKR, RKR; \ + subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \ + subround(l ## 3, r ## 3, l ## 4, r ## 4, f); + +#define enc_preload_rkr() \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor kr(CTX), RKR, RKR; + +#define dec_preload_rkr() \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor kr(CTX), RKR, RKR; \ + vpshufb .Lbswap128_mask, RKR, RKR; + +#define transpose_2x4(x0, x1, t0, t1) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t1; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; + +#define inpack_blocks(in, x0, x1, t0, t1, rmask) \ + vmovdqu (0*4*4)(in), x0; \ + vmovdqu (1*4*4)(in), x1; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + \ + transpose_2x4(x0, x1, t0, t1) + +#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \ + transpose_2x4(x0, x1, t0, t1) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vmovdqu x0, (0*4*4)(out); \ + vmovdqu x1, (1*4*4)(out); + +#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \ + transpose_2x4(x0, x1, t0, t1) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); + +.data + +.align 16 +.Lbswap_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.L16_mask: + .byte 16, 16, 16, 16 +.L32_mask: + .byte 32, 0, 0, 0 +.Lfirst_mask: + .byte 0x1f, 0, 0, 0 + +.text + +.align 16 +.global __cast5_enc_blk_16way +.type __cast5_enc_blk_16way,@function; + +__cast5_enc_blk_16way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pushq %rbp; + pushq %rbx; + pushq %rcx; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + enc_preload_rkr(); + + leaq 1*(2*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM); + inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM); + + movq %rsi, %r11; + + round(RL, RR, 0, 1); + round(RR, RL, 1, 2); + round(RL, RR, 2, 3); + round(RR, RL, 3, 1); + round(RL, RR, 4, 2); + round(RR, RL, 5, 3); + round(RL, RR, 6, 1); + round(RR, RL, 7, 2); + round(RL, RR, 8, 3); + round(RR, RL, 9, 1); + round(RL, RR, 10, 2); + round(RR, RL, 11, 3); + + movzbl rr(CTX), %eax; + testl %eax, %eax; + jnz __skip_enc; + + round(RL, RR, 12, 1); + round(RR, RL, 13, 2); + round(RL, RR, 14, 3); + round(RR, RL, 15, 1); + +__skip_enc: + popq %rcx; + popq %rbx; + popq %rbp; + + vmovdqa .Lbswap_mask, RKM; + leaq 1*(2*4*4)(%r11), %rax; + + testb %cl, %cl; + jnz __enc_xor16; + + outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM); + + ret; + +__enc_xor16: + outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM); + + ret; + +.align 16 +.global cast5_dec_blk_16way +.type cast5_dec_blk_16way,@function; + +cast5_dec_blk_16way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pushq %rbp; + pushq %rbx; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + dec_preload_rkr(); + + leaq 1*(2*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM); + inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%rdx), %rax; + inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM); + + movq %rsi, %r11; + + movzbl rr(CTX), %eax; + testl %eax, %eax; + jnz __skip_dec; + + round(RL, RR, 15, 1); + round(RR, RL, 14, 3); + round(RL, RR, 13, 2); + round(RR, RL, 12, 1); + +__dec_tail: + round(RL, RR, 11, 3); + round(RR, RL, 10, 2); + round(RL, RR, 9, 1); + round(RR, RL, 8, 3); + round(RL, RR, 7, 2); + round(RR, RL, 6, 1); + round(RL, RR, 5, 3); + round(RR, RL, 4, 2); + round(RL, RR, 3, 1); + round(RR, RL, 2, 3); + round(RL, RR, 1, 2); + round(RR, RL, 0, 1); + + vmovdqa .Lbswap_mask, RKM; + popq %rbx; + popq %rbp; + + leaq 1*(2*4*4)(%r11), %rax; + outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM); + outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM); + leaq 2*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM); + leaq 3*(2*4*4)(%r11), %rax; + outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM); + + ret; + +__skip_dec: + vpsrldq $4, RKR, RKR; + jmp __dec_tail; diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c new file mode 100644 index 0000000..e0ea14f --- /dev/null +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -0,0 +1,530 @@ +/* + * Glue Code for the AVX assembler implemention of the Cast5 Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include <linux/module.h> +#include <linux/hardirq.h> +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <crypto/algapi.h> +#include <crypto/cast5.h> +#include <crypto/cryptd.h> +#include <crypto/ctr.h> +#include <asm/xcr.h> +#include <asm/xsave.h> +#include <asm/crypto/ablk_helper.h> +#include <asm/crypto/glue_helper.h> + +#define CAST5_PARALLEL_BLOCKS 16 + +asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast5_enc_blk_16way(ctx, dst, src, false); +} + +static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast5_enc_blk_16way(ctx, dst, src, true); +} + +static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst, + const u8 *src) +{ + cast5_dec_blk_16way(ctx, dst, src); +} + + +static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, + NULL, fpu_enabled, nbytes); +} + +static inline void cast5_fpu_end(bool fpu_enabled) +{ + return glue_fpu_end(fpu_enabled); +} + +static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, + bool enc) +{ + bool fpu_enabled = false; + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes; + int err; + + err = blkcipher_walk_virt(desc, walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk->nbytes)) { + u8 *wsrc = walk->src.virt.addr; + u8 *wdst = walk->dst.virt.addr; + + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + if (enc) + cast5_enc_blk_xway(ctx, wdst, wsrc); + else + cast5_dec_blk_xway(ctx, wdst, wsrc); + + wsrc += bsize * CAST5_PARALLEL_BLOCKS; + wdst += bsize * CAST5_PARALLEL_BLOCKS; + nbytes -= bsize * CAST5_PARALLEL_BLOCKS; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (enc) + __cast5_encrypt(ctx, wdst, wsrc); + else + __cast5_decrypt(ctx, wdst, wsrc); + + wsrc += bsize; + wdst += bsize; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + err = blkcipher_walk_done(desc, walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + return err; +} + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, true); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + + blkcipher_walk_init(&walk, dst, src, nbytes); + return ecb_crypt(desc, &walk, false); +} + +static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 *iv = (u64 *)walk->iv; + + do { + *dst = *src ^ *iv; + __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); + iv = dst; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + + *(u64 *)walk->iv = *iv; + return nbytes; +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + while ((nbytes = walk.nbytes)) { + nbytes = __cbc_encrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + return err; +} + +static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ivs[CAST5_PARALLEL_BLOCKS - 1]; + u64 last_iv; + int i; + + /* Start of the last block. */ + src += nbytes / bsize - 1; + dst += nbytes / bsize - 1; + + last_iv = *src; + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); + src -= CAST5_PARALLEL_BLOCKS - 1; + dst -= CAST5_PARALLEL_BLOCKS - 1; + + for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) + ivs[i] = src[i]; + + cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++) + *(dst + (i + 1)) ^= *(ivs + i); + + nbytes -= bsize; + if (nbytes < bsize) + goto done; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + for (;;) { + __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); + + nbytes -= bsize; + if (nbytes < bsize) + break; + + *dst ^= *(src - 1); + src -= 1; + dst -= 1; + } + +done: + *dst ^= *(u64 *)walk->iv; + *(u64 *)walk->iv = last_iv; + + return nbytes; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes)) { + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + nbytes = __cbc_decrypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + return err; +} + +static void ctr_crypt_final(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + u8 *ctrblk = walk->iv; + u8 keystream[CAST5_BLOCK_SIZE]; + u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + unsigned int nbytes = walk->nbytes; + + __cast5_encrypt(ctx, keystream, ctrblk); + crypto_xor(keystream, src, nbytes); + memcpy(dst, keystream, nbytes); + + crypto_inc(ctrblk, CAST5_BLOCK_SIZE); +} + +static unsigned int __ctr_crypt(struct blkcipher_desc *desc, + struct blkcipher_walk *walk) +{ + struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + const unsigned int bsize = CAST5_BLOCK_SIZE; + unsigned int nbytes = walk->nbytes; + u64 *src = (u64 *)walk->src.virt.addr; + u64 *dst = (u64 *)walk->dst.virt.addr; + u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv); + __be64 ctrblocks[CAST5_PARALLEL_BLOCKS]; + int i; + + /* Process multi-block batch */ + if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { + do { + /* create ctrblks for parallel encrypt */ + for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + ctrblocks[i] = cpu_to_be64(ctrblk++); + } + + cast5_enc_blk_xway_xor(ctx, (u8 *)dst, + (u8 *)ctrblocks); + + src += CAST5_PARALLEL_BLOCKS; + dst += CAST5_PARALLEL_BLOCKS; + nbytes -= bsize * CAST5_PARALLEL_BLOCKS; + } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); + + if (nbytes < bsize) + goto done; + } + + /* Handle leftovers */ + do { + if (dst != src) + *dst = *src; + + ctrblocks[0] = cpu_to_be64(ctrblk++); + + __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks); + *dst ^= ctrblocks[0]; + + src += 1; + dst += 1; + nbytes -= bsize; + } while (nbytes >= bsize); + +done: + *(__be64 *)walk->iv = cpu_to_be64(ctrblk); + return nbytes; +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + bool fpu_enabled = false; + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + + while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { + fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + nbytes = __ctr_crypt(desc, &walk); + err = blkcipher_walk_done(desc, &walk, nbytes); + } + + cast5_fpu_end(fpu_enabled); + + if (walk.nbytes) { + ctr_crypt_final(desc, &walk); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + + +static struct crypto_alg cast5_algs[6] = { { + .cra_name = "__ecb-cast5-avx", + .cra_driver_name = "__driver-ecb-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-cast5-avx", + .cra_driver_name = "__driver-cbc-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = cast5_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-cast5-avx", + .cra_driver_name = "__driver-ctr-cast5-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cast5_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = cast5_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "ecb(cast5)", + .cra_driver_name = "ecb-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(cast5)", + .cra_driver_name = "cbc-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST5_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(cast5)", + .cra_driver_name = "ctr-cast5-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST5_MIN_KEY_SIZE, + .max_keysize = CAST5_MAX_KEY_SIZE, + .ivsize = CAST5_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +} }; + +static int __init cast5_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + pr_info("AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + pr_info("AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); +} + +static void __exit cast5_exit(void) +{ + crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); +} + +module_init(cast5_init); +module_exit(cast5_exit); + +MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("cast5"); diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S new file mode 100644 index 0000000..218d283 --- /dev/null +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -0,0 +1,383 @@ +/* + * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64) + * + * Copyright (C) 2012 Johannes Goetzfried + * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> + * + * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +.file "cast6-avx-x86_64-asm_64.S" + +.extern cast6_s1 +.extern cast6_s2 +.extern cast6_s3 +.extern cast6_s4 + +/* structure of crypto context */ +#define km 0 +#define kr (12*4*4) + +/* s-boxes */ +#define s1 cast6_s1 +#define s2 cast6_s2 +#define s3 cast6_s3 +#define s4 cast6_s4 + +/********************************************************************** + 8-way AVX cast6 + **********************************************************************/ +#define CTX %rdi + +#define RA1 %xmm0 +#define RB1 %xmm1 +#define RC1 %xmm2 +#define RD1 %xmm3 + +#define RA2 %xmm4 +#define RB2 %xmm5 +#define RC2 %xmm6 +#define RD2 %xmm7 + +#define RX %xmm8 + +#define RKM %xmm9 +#define RKR %xmm10 +#define RKRF %xmm11 +#define RKRR %xmm12 +#define R32 %xmm13 +#define R1ST %xmm14 + +#define RTMP %xmm15 + +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi + +#define RGI1 %rdx +#define RGI1bl %dl +#define RGI1bh %dh +#define RGI2 %rcx +#define RGI2bl %cl +#define RGI2bh %ch + +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + +#define RFS1 %r8 +#define RFS1d %r8d +#define RFS2 %r9 +#define RFS2d %r9d +#define RFS3 %r10 +#define RFS3d %r10d + + +#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + shrq $16, src; \ + movl s1(, RID1, 4), dst ## d; \ + op1 s2(, RID2, 4), dst ## d; \ + movzbl src ## bh, RID1d; \ + movzbl src ## bl, RID2d; \ + interleave_op(il_reg); \ + op2 s3(, RID1, 4), dst ## d; \ + op3 s4(, RID2, 4), dst ## d; + +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define F_head(a, x, gi1, gi2, op0) \ + op0 a, RKM, x; \ + vpslld RKRF, x, RTMP; \ + vpsrld RKRR, x, x; \ + vpor RTMP, x, x; \ + \ + vmovq x, gi1; \ + vpextrq $1, x, gi2; + +#define F_tail(a, x, gi1, gi2, op1, op2, op3) \ + lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \ + lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \ + \ + lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none); \ + shlq $32, RFS2; \ + orq RFS1, RFS2; \ + lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none); \ + shlq $32, RFS1; \ + orq RFS1, RFS3; \ + \ + vmovq RFS2, x; \ + vpinsrq $1, RFS3, x, x; + +#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \ + F_head(b1, RX, RGI1, RGI2, op0); \ + F_head(b2, RX, RGI3, RGI4, op0); \ + \ + F_tail(b1, RX, RGI1, RGI2, op1, op2, op3); \ + F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3); \ + \ + vpxor a1, RX, a1; \ + vpxor a2, RTMP, a2; + +#define F1_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl) +#define F2_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl) +#define F3_2(a1, b1, a2, b2) \ + F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl) + +#define qop(in, out, f) \ + F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2); + +#define get_round_keys(nn) \ + vbroadcastss (km+(4*(nn)))(CTX), RKM; \ + vpand R1ST, RKR, RKRF; \ + vpsubq RKRF, R32, RKRR; \ + vpsrldq $1, RKR, RKR; + +#define Q(n) \ + get_round_keys(4*n+0); \ + qop(RD, RC, 1); \ + \ + get_round_keys(4*n+1); \ + qop(RC, RB, 2); \ + \ + get_round_keys(4*n+2); \ + qop(RB, RA, 3); \ + \ + get_round_keys(4*n+3); \ + qop(RA, RD, 1); + +#define QBAR(n) \ + get_round_keys(4*n+3); \ + qop(RA, RD, 1); \ + \ + get_round_keys(4*n+2); \ + qop(RB, RA, 3); \ + \ + get_round_keys(4*n+1); \ + qop(RC, RB, 2); \ + \ + get_round_keys(4*n+0); \ + qop(RD, RC, 1); + +#define shuffle(mask) \ + vpshufb mask, RKR, RKR; + +#define preload_rkr(n, do_mask, mask) \ + vbroadcastss .L16_mask, RKR; \ + /* add 16-bit rotation to key rotations (mod 32) */ \ + vpxor (kr+n*16)(CTX), RKR, RKR; \ + do_mask(mask); + +#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + vpunpckldq x1, x0, t0; \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x3; \ + \ + vpunpcklqdq t1, t0, x0; \ + vpunpckhqdq t1, t0, x1; \ + vpunpcklqdq x3, t2, x2; \ + vpunpckhqdq x3, t2, x3; + +#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \ + vmovdqu (0*4*4)(in), x0; \ + vmovdqu (1*4*4)(in), x1; \ + vmovdqu (2*4*4)(in), x2; \ + vmovdqu (3*4*4)(in), x3; \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ + \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) + +#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ + vmovdqu x0, (0*4*4)(out); \ + vmovdqu x1, (1*4*4)(out); \ + vmovdqu x2, (2*4*4)(out); \ + vmovdqu x3, (3*4*4)(out); + +#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \ + transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ + \ + vpshufb rmask, x0, x0; \ + vpshufb rmask, x1, x1; \ + vpshufb rmask, x2, x2; \ + vpshufb rmask, x3, x3; \ + vpxor (0*4*4)(out), x0, x0; \ + vmovdqu x0, (0*4*4)(out); \ + vpxor (1*4*4)(out), x1, x1; \ + vmovdqu x1, (1*4*4)(out); \ + vpxor (2*4*4)(out), x2, x2; \ + vmovdqu x2, (2*4*4)(out); \ + vpxor (3*4*4)(out), x3, x3; \ + vmovdqu x3, (3*4*4)(out); + +.data + +.align 16 +.Lbswap_mask: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_enc_Q_Q_QBAR_QBAR: + .byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_enc_QBAR_QBAR_QBAR_QBAR: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lrkr_dec_Q_Q_Q_Q: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 +.Lrkr_dec_Q_Q_QBAR_QBAR: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 +.Lrkr_dec_QBAR_QBAR_QBAR_QBAR: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.L16_mask: + .byte 16, 16, 16, 16 +.L32_mask: + .byte 32, 0, 0, 0 +.Lfirst_mask: + .byte 0x1f, 0, 0, 0 + +.text + +.align 16 +.global __cast6_enc_blk_8way +.type __cast6_enc_blk_8way,@function; + +__cast6_enc_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + * %rcx: bool, if true: xor output + */ + + pushq %rbp; + pushq %rbx; + pushq %rcx; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + movq %rsi, %r11; + + preload_rkr(0, dummy, none); + Q(0); + Q(1); + Q(2); + Q(3); + preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR); + Q(4); + Q(5); + QBAR(6); + QBAR(7); + preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR); + QBAR(8); + QBAR(9); + QBAR(10); + QBAR(11); + + popq %rcx; + popq %rbx; + popq %rbp; + + vmovdqa .Lbswap_mask, RKM; + leaq (4*4*4)(%r11), %rax; + + testb %cl, %cl; + jnz __enc_xor8; + + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + ret; + +__enc_xor8: + outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + ret; + +.align 16 +.global cast6_dec_blk_8way +.type cast6_dec_blk_8way,@function; + +cast6_dec_blk_8way: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + + pushq %rbp; + pushq %rbx; + + vmovdqa .Lbswap_mask, RKM; + vmovd .Lfirst_mask, R1ST; + vmovd .L32_mask, R32; + + leaq (4*4*4)(%rdx), %rax; + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + movq %rsi, %r11; + + preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q); + Q(11); + Q(10); + Q(9); + Q(8); + preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR); + Q(7); + Q(6); + QBAR(5); + QBAR(4); + preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR); + QBAR(3); + QBAR(2); + QBAR(1); + QBAR(0); + + popq %rbx; + popq %rbp; + + vmovdqa .Lbswap_mask, RKM; + leaq (4*4*4)(%r11), %rax; + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); + + ret; diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c new file mode 100644 index 0000000..15e5f85 --- /dev/null +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -0,0 +1,648 @@ +/* + * Glue Code for the AVX assembler implemention of the Cast6 Cipher + * + * Copyright (C) 2012 Johannes Goetzfried + * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + */ + +#include <linux/module.h> +#include <linux/hardirq.h> +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <crypto/algapi.h> +#include <crypto/cast6.h> +#include <crypto/cryptd.h> +#include <crypto/b128ops.h> +#include <crypto/ctr.h> +#include <crypto/lrw.h> +#include <crypto/xts.h> +#include <asm/xcr.h> +#include <asm/xsave.h> +#include <asm/crypto/ablk_helper.h> +#include <asm/crypto/glue_helper.h> + +#define CAST6_PARALLEL_BLOCKS 8 + +asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst, + const u8 *src, bool xor); +asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst, + const u8 *src); + +static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast6_enc_blk_8way(ctx, dst, src, false); +} + +static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + __cast6_enc_blk_8way(ctx, dst, src, true); +} + +static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst, + const u8 *src) +{ + cast6_dec_blk_8way(ctx, dst, src); +} + + +static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src) +{ + u128 ivs[CAST6_PARALLEL_BLOCKS - 1]; + unsigned int j; + + for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++) + ivs[j] = src[j]; + + cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src); + + for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++) + u128_xor(dst + (j + 1), dst + (j + 1), ivs + j); +} + +static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) +{ + be128 ctrblk; + + u128_to_be128(&ctrblk, iv); + u128_inc(iv); + + __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); + u128_xor(dst, src, (u128 *)&ctrblk); +} + +static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src, + u128 *iv) +{ + be128 ctrblks[CAST6_PARALLEL_BLOCKS]; + unsigned int i; + + for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) { + if (dst != src) + dst[i] = src[i]; + + u128_to_be128(&ctrblks[i], iv); + u128_inc(iv); + } + + cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks); +} + +static const struct common_glue_ctx cast6_enc = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) } + } } +}; + +static const struct common_glue_ctx cast6_ctr = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) } + } } +}; + +static const struct common_glue_ctx cast6_dec = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) } + }, { + .num_blocks = 1, + .fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) } + } } +}; + +static const struct common_glue_ctx cast6_dec_cbc = { + .num_funcs = 2, + .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + + .funcs = { { + .num_blocks = CAST6_PARALLEL_BLOCKS, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) } + }, { + .num_blocks = 1, + .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) } + } } +}; + +static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes); +} + +static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes); +} + +static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc, + dst, src, nbytes); +} + +static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src, + nbytes); +} + +static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes); +} + +static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes) +{ + return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS, + NULL, fpu_enabled, nbytes); +} + +static inline void cast6_fpu_end(bool fpu_enabled) +{ + glue_fpu_end(fpu_enabled); +} + +struct crypt_priv { + struct cast6_ctx *ctx; + bool fpu_enabled; +}; + +static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAST6_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { + cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __cast6_encrypt(ctx->ctx, srcdst, srcdst); +} + +static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) +{ + const unsigned int bsize = CAST6_BLOCK_SIZE; + struct crypt_priv *ctx = priv; + int i; + + ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); + + if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { + cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst); + return; + } + + for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) + __cast6_decrypt(ctx->ctx, srcdst, srcdst); +} + +struct cast6_lrw_ctx { + struct lrw_table_ctx lrw_table; + struct cast6_ctx cast6_ctx; +}; + +static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + int err; + + err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE, + &tfm->crt_flags); + if (err) + return err; + + return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE); +} + +static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->cast6_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->cast6_ctx, + .fpu_enabled = false, + }; + struct lrw_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .table_ctx = &ctx->lrw_table, + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = lrw_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static void lrw_exit_tfm(struct crypto_tfm *tfm) +{ + struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm); + + lrw_free_table(&ctx->lrw_table); +} + +struct cast6_xts_ctx { + struct cast6_ctx tweak_ctx; + struct cast6_ctx crypt_ctx; +}; + +static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int err; + + /* key consists of keys of equal size concatenated, therefore + * the length must be even + */ + if (keylen % 2) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* first half of xts-key is for crypt */ + err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, + flags); +} + +static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = encrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); + be128 buf[CAST6_PARALLEL_BLOCKS]; + struct crypt_priv crypt_ctx = { + .ctx = &ctx->crypt_ctx, + .fpu_enabled = false, + }; + struct xts_crypt_req req = { + .tbuf = buf, + .tbuflen = sizeof(buf), + + .tweak_ctx = &ctx->tweak_ctx, + .tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt), + .crypt_ctx = &crypt_ctx, + .crypt_fn = decrypt_callback, + }; + int ret; + + desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + ret = xts_crypt(desc, dst, src, nbytes, &req); + cast6_fpu_end(crypt_ctx.fpu_enabled); + + return ret; +} + +static struct crypto_alg cast6_algs[10] = { { + .cra_name = "__ecb-cast6-avx", + .cra_driver_name = "__driver-ecb-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}, { + .cra_name = "__cbc-cast6-avx", + .cra_driver_name = "__driver-cbc-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = cast6_setkey, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}, { + .cra_name = "__ctr-cast6-avx", + .cra_driver_name = "__driver-ctr-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cast6_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = cast6_setkey, + .encrypt = ctr_crypt, + .decrypt = ctr_crypt, + }, + }, +}, { + .cra_name = "__lrw-cast6-avx", + .cra_driver_name = "__driver-lrw-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_lrw_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_exit = lrw_exit_tfm, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE + + CAST6_BLOCK_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE + + CAST6_BLOCK_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = lrw_cast6_setkey, + .encrypt = lrw_encrypt, + .decrypt = lrw_decrypt, + }, + }, +}, { + .cra_name = "__xts-cast6-avx", + .cra_driver_name = "__driver-xts-cast6-avx", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct cast6_xts_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_u = { + .blkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE * 2, + .max_keysize = CAST6_MAX_KEY_SIZE * 2, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = xts_cast6_setkey, + .encrypt = xts_encrypt, + .decrypt = xts_decrypt, + }, + }, +}, { + .cra_name = "ecb(cast6)", + .cra_driver_name = "ecb-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "cbc(cast6)", + .cra_driver_name = "cbc-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = __ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "ctr(cast6)", + .cra_driver_name = "ctr-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_encrypt, + .geniv = "chainiv", + }, + }, +}, { + .cra_name = "lrw(cast6)", + .cra_driver_name = "lrw-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE + + CAST6_BLOCK_SIZE, + .max_keysize = CAST6_MAX_KEY_SIZE + + CAST6_BLOCK_SIZE, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}, { + .cra_name = "xts(cast6)", + .cra_driver_name = "xts-cast6-avx", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = CAST6_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_helper_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = ablk_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = CAST6_MIN_KEY_SIZE * 2, + .max_keysize = CAST6_MAX_KEY_SIZE * 2, + .ivsize = CAST6_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +} }; + +static int __init cast6_init(void) +{ + u64 xcr0; + + if (!cpu_has_avx || !cpu_has_osxsave) { + pr_info("AVX instructions are not detected.\n"); + return -ENODEV; + } + + xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { + pr_info("AVX detected but unusable.\n"); + return -ENODEV; + } + + return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); +} + +static void __exit cast6_exit(void) +{ + crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs)); +} + +module_init(cast6_init); +module_exit(cast6_exit); + +MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("cast6"); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index b4bf0a6..6759dd1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -150,7 +150,6 @@ static struct shash_alg ghash_alg = { .cra_blocksize = GHASH_BLOCK_SIZE, .cra_ctxsize = sizeof(struct ghash_ctx), .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ghash_alg.base.cra_list), }, }; @@ -288,7 +287,6 @@ static struct ahash_alg ghash_async_alg = { .cra_blocksize = GHASH_BLOCK_SIZE, .cra_type = &crypto_ahash_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list), .cra_init = ghash_async_init_tfm, .cra_exit = ghash_async_exit_tfm, }, diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 4854f0f..30b3927 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -110,7 +110,7 @@ static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn, nbytes -= bsize; } while (nbytes >= bsize); - u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv); + *(u128 *)walk->iv = *iv; return nbytes; } diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index bccb76d..a3a3c02 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -97,7 +97,6 @@ static struct crypto_alg alg = { .cra_ctxsize = sizeof(struct salsa20_ctx), .cra_alignmask = 3, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), .cra_u = { .blkcipher = { .setkey = setkey, diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index b36bdac..3f543a0 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -390,7 +390,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -410,7 +409,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -430,7 +428,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -451,7 +448,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -475,7 +471,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE * 2, @@ -496,7 +491,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -518,7 +512,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -541,7 +534,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -565,7 +557,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -590,7 +581,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c index d679c86..9107a99 100644 --- a/arch/x86/crypto/serpent_sse2_glue.c +++ b/arch/x86/crypto/serpent_sse2_glue.c @@ -393,7 +393,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -413,7 +412,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -433,7 +431,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE, @@ -454,7 +451,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[3].cra_list), .cra_exit = lrw_exit_tfm, .cra_u = { .blkcipher = { @@ -478,7 +474,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = SERPENT_MIN_KEY_SIZE * 2, @@ -499,7 +494,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -521,7 +515,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -544,7 +537,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -568,7 +560,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -593,7 +584,6 @@ static struct crypto_alg serpent_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(serpent_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 35f4557..1585abb 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -4,6 +4,8 @@ * Copyright (C) 2012 Johannes Goetzfried * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> * + * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -47,16 +49,22 @@ #define RC2 %xmm6 #define RD2 %xmm7 -#define RX %xmm8 -#define RY %xmm9 +#define RX0 %xmm8 +#define RY0 %xmm9 + +#define RX1 %xmm10 +#define RY1 %xmm11 -#define RK1 %xmm10 -#define RK2 %xmm11 +#define RK1 %xmm12 +#define RK2 %xmm13 -#define RID1 %rax -#define RID1b %al -#define RID2 %rbx -#define RID2b %bl +#define RT %xmm14 +#define RR %xmm15 + +#define RID1 %rbp +#define RID1d %ebp +#define RID2 %rsi +#define RID2d %esi #define RGI1 %rdx #define RGI1bl %dl @@ -65,6 +73,13 @@ #define RGI2bl %cl #define RGI2bh %ch +#define RGI3 %rax +#define RGI3bl %al +#define RGI3bh %ah +#define RGI4 %rbx +#define RGI4bl %bl +#define RGI4bh %bh + #define RGS1 %r8 #define RGS1d %r8d #define RGS2 %r9 @@ -73,89 +88,123 @@ #define RGS3d %r10d -#define lookup_32bit(t0, t1, t2, t3, src, dst) \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ - movl t0(CTX, RID1, 4), dst ## d; \ - xorl t1(CTX, RID2, 4), dst ## d; \ +#define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \ + movzbl src ## bl, RID1d; \ + movzbl src ## bh, RID2d; \ shrq $16, src; \ - movb src ## bl, RID1b; \ - movb src ## bh, RID2b; \ + movl t0(CTX, RID1, 4), dst ## d; \ + movl t1(CTX, RID2, 4), RID2d; \ + movzbl src ## bl, RID1d; \ + xorl RID2d, dst ## d; \ + movzbl src ## bh, RID2d; \ + interleave_op(il_reg); \ xorl t2(CTX, RID1, 4), dst ## d; \ xorl t3(CTX, RID2, 4), dst ## d; -#define G(a, x, t0, t1, t2, t3) \ - vmovq a, RGI1; \ - vpsrldq $8, a, x; \ - vmovq x, RGI2; \ +#define dummy(d) /* do nothing */ + +#define shr_next(reg) \ + shrq $16, reg; + +#define G(gi1, gi2, x, t0, t1, t2, t3) \ + lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1); \ + lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2); \ + \ + lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none); \ + shlq $32, RGS2; \ + orq RGS1, RGS2; \ + lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none); \ + shlq $32, RGS1; \ + orq RGS1, RGS3; + +#define round_head_2(a, b, x1, y1, x2, y2) \ + vmovq b ## 1, RGI3; \ + vpextrq $1, b ## 1, RGI4; \ \ - lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \ - shrq $16, RGI1; \ - lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \ - shlq $32, RGS2; \ - orq RGS1, RGS2; \ + G(RGI1, RGI2, x1, s0, s1, s2, s3); \ + vmovq a ## 2, RGI1; \ + vpextrq $1, a ## 2, RGI2; \ + vmovq RGS2, x1; \ + vpinsrq $1, RGS3, x1, x1; \ \ - lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \ - shrq $16, RGI2; \ - lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \ - shlq $32, RGS3; \ - orq RGS1, RGS3; \ + G(RGI3, RGI4, y1, s1, s2, s3, s0); \ + vmovq b ## 2, RGI3; \ + vpextrq $1, b ## 2, RGI4; \ + vmovq RGS2, y1; \ + vpinsrq $1, RGS3, y1, y1; \ \ - vmovq RGS2, x; \ - vpinsrq $1, RGS3, x, x; + G(RGI1, RGI2, x2, s0, s1, s2, s3); \ + vmovq RGS2, x2; \ + vpinsrq $1, RGS3, x2, x2; \ + \ + G(RGI3, RGI4, y2, s1, s2, s3, s0); \ + vmovq RGS2, y2; \ + vpinsrq $1, RGS3, y2, y2; -#define encround(a, b, c, d, x, y) \ - G(a, x, s0, s1, s2, s3); \ - G(b, y, s1, s2, s3, s0); \ +#define encround_tail(a, b, c, d, x, y, prerotate) \ vpaddd x, y, x; \ + vpaddd x, RK1, RT;\ + prerotate(b); \ + vpxor RT, c, c; \ vpaddd y, x, y; \ - vpaddd x, RK1, x; \ vpaddd y, RK2, y; \ - vpxor x, c, c; \ - vpsrld $1, c, x; \ + vpsrld $1, c, RT; \ vpslld $(32 - 1), c, c; \ - vpor c, x, c; \ - vpslld $1, d, x; \ - vpsrld $(32 - 1), d, d; \ - vpor d, x, d; \ - vpxor d, y, d; - -#define decround(a, b, c, d, x, y) \ - G(a, x, s0, s1, s2, s3); \ - G(b, y, s1, s2, s3, s0); \ + vpor c, RT, c; \ + vpxor d, y, d; \ + +#define decround_tail(a, b, c, d, x, y, prerotate) \ vpaddd x, y, x; \ + vpaddd x, RK1, RT;\ + prerotate(a); \ + vpxor RT, c, c; \ vpaddd y, x, y; \ vpaddd y, RK2, y; \ vpxor d, y, d; \ vpsrld $1, d, y; \ vpslld $(32 - 1), d, d; \ vpor d, y, d; \ - vpslld $1, c, y; \ - vpsrld $(32 - 1), c, c; \ - vpor c, y, c; \ - vpaddd x, RK1, x; \ - vpxor x, c, c; - -#define encrypt_round(n, a, b, c, d) \ - vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ - vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ - encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ - encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); - -#define decrypt_round(n, a, b, c, d) \ - vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ - vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ - decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \ - decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY); + +#define rotate_1l(x) \ + vpslld $1, x, RR; \ + vpsrld $(32 - 1), x, x; \ + vpor x, RR, x; + +#define preload_rgi(c) \ + vmovq c, RGI1; \ + vpextrq $1, c, RGI2; + +#define encrypt_round(n, a, b, c, d, preload, prerotate) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + round_head_2(a, b, RX0, RY0, RX1, RY1); \ + encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ + preload(c ## 1); \ + encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); + +#define decrypt_round(n, a, b, c, d, preload, prerotate) \ + vbroadcastss (k+4*(2*(n)))(CTX), RK1; \ + vbroadcastss (k+4*(2*(n)+1))(CTX), RK2; \ + round_head_2(a, b, RX0, RY0, RX1, RY1); \ + decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \ + preload(c ## 1); \ + decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate); #define encrypt_cycle(n) \ - encrypt_round((2*n), RA, RB, RC, RD); \ - encrypt_round(((2*n) + 1), RC, RD, RA, RB); + encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); + +#define encrypt_cycle_last(n) \ + encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \ + encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy); #define decrypt_cycle(n) \ - decrypt_round(((2*n) + 1), RC, RD, RA, RB); \ - decrypt_round((2*n), RA, RB, RC, RD); + decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ + decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); +#define decrypt_cycle_last(n) \ + decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \ + decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy); #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ vpunpckldq x1, x0, t0; \ @@ -216,17 +265,20 @@ __twofish_enc_blk_8way: * %rcx: bool, if true: xor output */ + pushq %rbp; pushq %rbx; pushq %rcx; vmovdqu w(CTX), RK1; leaq (4*4*4)(%rdx), %rax; - inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); - inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + preload_rgi(RA1); + rotate_1l(RD1); + inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); + rotate_1l(RD2); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; encrypt_cycle(0); encrypt_cycle(1); @@ -235,26 +287,27 @@ __twofish_enc_blk_8way: encrypt_cycle(4); encrypt_cycle(5); encrypt_cycle(6); - encrypt_cycle(7); + encrypt_cycle_last(7); vmovdqu (w+4*4)(CTX), RK1; popq %rcx; popq %rbx; + popq %rbp; - leaq (4*4*4)(%rsi), %rax; + leaq (4*4*4)(%r11), %rax; testb %cl, %cl; jnz __enc_xor8; - outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; __enc_xor8: - outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; @@ -269,16 +322,19 @@ twofish_dec_blk_8way: * %rdx: src */ + pushq %rbp; pushq %rbx; vmovdqu (w+4*4)(CTX), RK1; leaq (4*4*4)(%rdx), %rax; - inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2); - inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2); + inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); + preload_rgi(RC1); + rotate_1l(RA1); + inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); + rotate_1l(RA2); - xorq RID1, RID1; - xorq RID2, RID2; + movq %rsi, %r11; decrypt_cycle(7); decrypt_cycle(6); @@ -287,14 +343,15 @@ twofish_dec_blk_8way: decrypt_cycle(3); decrypt_cycle(2); decrypt_cycle(1); - decrypt_cycle(0); + decrypt_cycle_last(0); vmovdqu (w)(CTX), RK1; popq %rbx; + popq %rbp; - leaq (4*4*4)(%rsi), %rax; - outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2); - outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2); + leaq (4*4*4)(%r11), %rax; + outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); + outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); ret; diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 782b67d..e7708b5 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -378,7 +378,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -398,7 +397,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -418,7 +416,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -439,7 +436,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[3].cra_list), .cra_exit = lrw_twofish_exit_tfm, .cra_u = { .blkcipher = { @@ -463,7 +459,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE * 2, @@ -484,7 +479,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[5].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -506,7 +500,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[6].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -529,7 +522,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[7].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -553,7 +545,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[8].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { @@ -578,7 +569,6 @@ static struct crypto_alg twofish_algs[10] = { { .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(twofish_algs[9].cra_list), .cra_init = ablk_init, .cra_exit = ablk_exit, .cra_u = { diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 359ae08..0a52023 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -70,7 +70,6 @@ static struct crypto_alg alg = { .cra_ctxsize = sizeof(struct twofish_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = TF_MIN_KEY_SIZE, diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 15f9347..aa3eb35 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -342,7 +342,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -362,7 +361,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -383,7 +381,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE, @@ -404,7 +401,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), .cra_exit = lrw_twofish_exit_tfm, .cra_u = { .blkcipher = { @@ -426,7 +422,6 @@ static struct crypto_alg tf_algs[5] = { { .cra_alignmask = 0, .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list), .cra_u = { .blkcipher = { .min_keysize = TF_MIN_KEY_SIZE * 2, diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 673ac9b..efc6a95 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -32,6 +32,7 @@ #include <asm/sigframe.h> #include <asm/sighandling.h> #include <asm/sys_ia32.h> +#include <asm/smap.h> #define FIX_EFLAGS __FIX_EFLAGS @@ -162,7 +163,8 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, } seg = get_fs(); set_fs(KERNEL_DS); - ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp); + ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL), + (stack_t __force __user *) &uoss, regs->sp); set_fs(seg); if (ret >= 0 && uoss_ptr) { if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t))) @@ -250,11 +252,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, get_user_ex(tmp, &sc->fpstate); buf = compat_ptr(tmp); - err |= restore_i387_xstate_ia32(buf); get_user_ex(*pax, &sc->ax); } get_user_catch(err); + err |= restore_xstate_sig(buf, 1); + return err; } @@ -361,7 +364,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, */ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, - void **fpstate) + void __user **fpstate) { unsigned long sp; @@ -381,9 +384,12 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, sp = (unsigned long) ka->sa.sa_restorer; if (used_math()) { - sp = sp - sig_xstate_ia32_size; - *fpstate = (struct _fpstate_ia32 *) sp; - if (save_i387_xstate_ia32(*fpstate) < 0) + unsigned long fx_aligned, math_size; + + sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); + *fpstate = (struct _fpstate_ia32 __user *) sp; + if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, + math_size) < 0) return (void __user *) -1L; } @@ -448,7 +454,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, * These are actually not used anymore, but left because some * gdb versions depend on them as a marker. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); if (err) @@ -502,7 +508,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sig, &frame->sig); put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo); put_user_ex(ptr_to_compat(&frame->uc), &frame->puc); - err |= copy_siginfo_to_user32(&frame->info, info); /* Create the ucontext. */ if (cpu_has_xsave) @@ -514,9 +519,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -529,9 +531,14 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * Not actually used anymore, but left because some gdb * versions need it. */ - put_user_ex(*((u64 *)&code), (u64 *)frame->retcode); + put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); + err |= copy_siginfo_to_user32(&frame->info, info); + err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) return -EFAULT; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 20e5f7b..076745f 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -14,6 +14,7 @@ #include <asm/segment.h> #include <asm/irqflags.h> #include <asm/asm.h> +#include <asm/smap.h> #include <linux/linkage.h> #include <linux/err.h> @@ -146,8 +147,10 @@ ENTRY(ia32_sysenter_target) SAVE_ARGS 0,1,0 /* no need to do an access_ok check here because rbp has been 32bit zero extended */ + ASM_STAC 1: movl (%rbp),%ebp _ASM_EXTABLE(1b,ia32_badarg) + ASM_CLAC orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -301,8 +304,10 @@ ENTRY(ia32_cstar_target) /* no need to do an access_ok check here because r8 has been 32bit zero extended */ /* hardware stack frame is complete now */ + ASM_STAC 1: movl (%r8),%r9d _ASM_EXTABLE(1b,ia32_badarg) + ASM_CLAC orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) CFI_REMEMBER_STATE @@ -365,6 +370,7 @@ cstar_tracesys: END(ia32_cstar_target) ia32_badarg: + ASM_CLAC movq $-EFAULT,%rax jmp ia32_sysret CFI_ENDPROC @@ -459,7 +465,7 @@ GLOBAL(\label) PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx - PTREGSCALL stub32_execve, sys32_execve, %rcx + PTREGSCALL stub32_execve, compat_sys_execve, %rcx PTREGSCALL stub32_fork, sys_fork, %rdi PTREGSCALL stub32_clone, sys32_clone, %rdx PTREGSCALL stub32_vfork, sys_vfork, %rdi diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 4540bec..86d68d1 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -287,7 +287,7 @@ asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, return ret; } -asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, +asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, int options) { return compat_sys_wait4(pid, stat_addr, options, NULL); @@ -385,21 +385,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, return ret; } -asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv, - compat_uptr_t __user *envp, struct pt_regs *regs) -{ - long error; - char *filename; - - filename = getname(name); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - return error; - error = compat_do_execve(filename, argv, envp, regs); - putname(filename); - return error; -} - asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, struct pt_regs *regs) { diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index f9c0d3b..66e5f0e 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -26,3 +26,5 @@ header-y += vsyscall.h genhdr-y += unistd_32.h genhdr-y += unistd_64.h genhdr-y += unistd_x32.h + +generic-y += clkdev.h diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index 952bd01..372231c 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -1,3 +1,6 @@ +#ifndef _ASM_X86_ALTERNATIVE_ASM_H +#define _ASM_X86_ALTERNATIVE_ASM_H + #ifdef __ASSEMBLY__ #include <asm/asm.h> @@ -5,10 +8,10 @@ #ifdef CONFIG_SMP .macro LOCK_PREFIX 672: lock - .section .smp_locks,"a" + .pushsection .smp_locks,"a" .balign 4 .long 672b - . - .previous + .popsection .endm #else .macro LOCK_PREFIX @@ -24,3 +27,5 @@ .endm #endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_ALTERNATIVE_ASM_H */ diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 7078068..58ed6d9 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -29,10 +29,10 @@ #ifdef CONFIG_SMP #define LOCK_PREFIX_HERE \ - ".section .smp_locks,\"a\"\n" \ - ".balign 4\n" \ - ".long 671f - .\n" /* offset */ \ - ".previous\n" \ + ".pushsection .smp_locks,\"a\"\n" \ + ".balign 4\n" \ + ".long 671f - .\n" /* offset */ \ + ".popsection\n" \ "671:" #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " @@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); -extern void alternatives_smp_switch(int smp); +extern void alternatives_enable_smp(void); extern int alternatives_text_reserved(void *start, void *end); extern bool skip_smp_alternatives; #else @@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} -static inline void alternatives_smp_switch(int smp) {} +static inline void alternatives_enable_smp(void) {} static inline int alternatives_text_reserved(void *start, void *end) { return 0; @@ -99,30 +99,30 @@ static inline int alternatives_text_reserved(void *start, void *end) /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ OLDINSTR(oldinstr) \ - ".section .altinstructions,\"a\"\n" \ + ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature, 1) \ - ".previous\n" \ - ".section .discard,\"aw\",@progbits\n" \ + ".popsection\n" \ + ".pushsection .discard,\"aw\",@progbits\n" \ DISCARD_ENTRY(1) \ - ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \ - ".previous" + ".popsection" #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\ OLDINSTR(oldinstr) \ - ".section .altinstructions,\"a\"\n" \ + ".pushsection .altinstructions,\"a\"\n" \ ALTINSTR_ENTRY(feature1, 1) \ ALTINSTR_ENTRY(feature2, 2) \ - ".previous\n" \ - ".section .discard,\"aw\",@progbits\n" \ + ".popsection\n" \ + ".pushsection .discard,\"aw\",@progbits\n" \ DISCARD_ENTRY(1) \ DISCARD_ENTRY(2) \ - ".previous\n" \ - ".section .altinstr_replacement, \"ax\"\n" \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \ ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ - ".previous" + ".popsection" /* * This must be included *after* the definition of ALTERNATIVE due to diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index f342612..3388034 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -409,7 +409,7 @@ extern struct apic *apic; * to enforce the order with in them. */ #define apic_driver(sym) \ - static struct apic *__apicdrivers_##sym __used \ + static const struct apic *__apicdrivers_##sym __used \ __aligned(sizeof(struct apic *)) \ __section(.apicdrivers) = { &sym } diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 58cb6d4..b6c3b82 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -240,30 +240,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return c; } - -/* - * atomic_dec_if_positive - decrement by 1 if old value positive - * @v: pointer of type atomic_t - * - * The function returns the old value of *v minus 1, even if - * the atomic variable, v, was not decremented. - */ -static inline int atomic_dec_if_positive(atomic_t *v) -{ - int c, old, dec; - c = atomic_read(v); - for (;;) { - dec = c - 1; - if (unlikely(dec < 0)) - break; - old = atomic_cmpxchg((v), c, dec); - if (likely(old == c)) - break; - c = old; - } - return dec; -} - /** * atomic_inc_short - increment of a short integer * @v: pointer to type int @@ -309,9 +285,9 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2) #define smp_mb__after_atomic_inc() barrier() #ifdef CONFIG_X86_32 -# include "atomic64_32.h" +# include <asm/atomic64_32.h> #else -# include "atomic64_64.h" +# include <asm/atomic64_64.h> #endif #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 72f5009..6dfd019 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -355,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr); */ static inline unsigned long __ffs(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "rm" (word)); return word; @@ -369,7 +369,7 @@ static inline unsigned long __ffs(unsigned long word) */ static inline unsigned long ffz(unsigned long word) { - asm("bsf %1,%0" + asm("rep; bsf %1,%0" : "=r" (word) : "r" (~word)); return word; @@ -417,10 +417,9 @@ static inline int ffs(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsfl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsfl %1,%0\n\t" "cmovzl %2,%0" @@ -459,10 +458,9 @@ static inline int fls(int x) * We cannot do this on 32 bits because at the very least some * 486 CPUs did not behave this way. */ - long tmp = -1; asm("bsrl %1,%0" : "=r" (r) - : "rm" (x), "0" (tmp)); + : "rm" (x), "0" (-1)); #elif defined(CONFIG_X86_CMOV) asm("bsrl %1,%0\n\t" "cmovzl %2,%0" @@ -490,13 +488,13 @@ static inline int fls(int x) #ifdef CONFIG_X86_64 static __always_inline int fls64(__u64 x) { - long bitpos = -1; + int bitpos = -1; /* * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the * dest reg is undefined if x==0, but their CPU architect says its * value is written to set it to the same as before. */ - asm("bsrq %1,%0" + asm("bsrq %1,%q0" : "+r" (bitpos) : "rm" (x)); return bitpos + 1; diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index a9e3a74..0fa6750 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -46,41 +46,39 @@ For 32-bit we have the following conventions - kernel is built with */ -#include "dwarf2.h" +#include <asm/dwarf2.h> /* - * 64-bit system call stack frame layout defines and helpers, for - * assembly code (note that the seemingly unnecessary parentheses - * are to prevent cpp from inserting spaces in expressions that get - * passed to macros): + * 64-bit system call stack frame layout defines and helpers, + * for assembly code: */ -#define R15 (0) -#define R14 (8) -#define R13 (16) -#define R12 (24) -#define RBP (32) -#define RBX (40) +#define R15 0 +#define R14 8 +#define R13 16 +#define R12 24 +#define RBP 32 +#define RBX 40 /* arguments: interrupts/non tracing syscalls only save up to here: */ -#define R11 (48) -#define R10 (56) -#define R9 (64) -#define R8 (72) -#define RAX (80) -#define RCX (88) -#define RDX (96) -#define RSI (104) -#define RDI (112) -#define ORIG_RAX (120) /* + error_code */ +#define R11 48 +#define R10 56 +#define R9 64 +#define R8 72 +#define RAX 80 +#define RCX 88 +#define RDX 96 +#define RSI 104 +#define RDI 112 +#define ORIG_RAX 120 /* + error_code */ /* end of arguments */ /* cpu exception frame or undefined in case of fast syscall: */ -#define RIP (128) -#define CS (136) -#define EFLAGS (144) -#define RSP (152) -#define SS (160) +#define RIP 128 +#define CS 136 +#define EFLAGS 144 +#define RSP 152 +#define SS 160 #define ARGOFFSET R11 #define SWFRAME ORIG_RAX diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h index 848850f..5f5bb0f 100644 --- a/arch/x86/include/asm/checksum.h +++ b/arch/x86/include/asm/checksum.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "checksum_32.h" +# include <asm/checksum_32.h> #else -# include "checksum_64.h" +# include <asm/checksum_64.h> #endif diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 99480e5..8d871ea 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -138,9 +138,9 @@ extern void __add_wrong_size(void) __raw_cmpxchg((ptr), (old), (new), (size), "") #ifdef CONFIG_X86_32 -# include "cmpxchg_32.h" +# include <asm/cmpxchg_32.h> #else -# include "cmpxchg_64.h" +# include <asm/cmpxchg_64.h> #endif #ifdef __HAVE_ARCH_CMPXCHG diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index fedf32b..59c6c40 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -41,6 +41,7 @@ typedef s64 __attribute__((aligned(4))) compat_s64; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; typedef u64 __attribute__((aligned(4))) compat_u64; +typedef u32 compat_uptr_t; struct compat_timespec { compat_time_t tv_sec; @@ -124,6 +125,78 @@ typedef u32 compat_old_sigset_t; /* at least 32 bits */ typedef u32 compat_sigset_word; +typedef union compat_sigval { + compat_int_t sival_int; + compat_uptr_t sival_ptr; +} compat_sigval_t; + +typedef struct compat_siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[128/sizeof(int) - 3]; + + /* kill() */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + compat_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + compat_sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + int _overrun_incr; /* amount to add to overrun */ + } _timer; + + /* POSIX.1b signals */ + struct { + unsigned int _pid; /* sender's pid */ + unsigned int _uid; /* sender's uid */ + compat_sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_clock_t _utime; + compat_clock_t _stime; + } _sigchld; + + /* SIGCHLD (x32 version) */ + struct { + unsigned int _pid; /* which child */ + unsigned int _uid; /* sender's uid */ + int _status; /* exit code */ + compat_s64 _utime; + compat_s64 _stime; + } _sigchld_x32; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + unsigned int _addr; /* faulting insn/memory ref. */ + } _sigfault; + + /* SIGPOLL */ + struct { + int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + + struct { + unsigned int _call_addr; /* calling insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; + } _sifields; +} compat_siginfo_t; + #define COMPAT_OFF_T_MAX 0x7fffffff #define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL @@ -209,7 +282,6 @@ typedef struct user_regs_struct32 compat_elf_gregset_t; * as pointers because the syscall entry code will have * appropriately converted them already. */ -typedef u32 compat_uptr_t; static inline void __user *compat_ptr(compat_uptr_t uptr) { diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 6b7ee5f..8c297aa 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -4,7 +4,9 @@ #ifndef _ASM_X86_CPUFEATURE_H #define _ASM_X86_CPUFEATURE_H +#ifndef _ASM_X86_REQUIRED_FEATURES_H #include <asm/required-features.h> +#endif #define NCAPINTS 10 /* N 32-bit words worth of info */ @@ -97,6 +99,7 @@ #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ +#define X86_FEATURE_EAGER_FPU (3*32+29) /* "eagerfpu" Non lazy FPU restore */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ @@ -209,6 +212,7 @@ #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -299,12 +303,14 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) +#define cpu_has_xsaveopt boot_cpu_has(X86_FEATURE_XSAVEOPT) #define cpu_has_osxsave boot_cpu_has(X86_FEATURE_OSXSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) +#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index c9dcc18..6e8fdf5 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -35,7 +35,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ efi_call_virt(f, a1, a2, a3, a4, a5, a6) -#define efi_ioremap(addr, size, type) ioremap_cache(addr, size) +#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size) #else /* !CONFIG_X86_32 */ @@ -89,7 +89,7 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, - u32 type); + u32 type, u64 attribute); #endif /* CONFIG_X86_32 */ @@ -98,6 +98,8 @@ extern void efi_set_executable(efi_memory_desc_t *md, bool executable); extern int efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); +extern void efi_unmap_memmap(void); +extern void efi_memory_uc(u64 addr, unsigned long size); #ifndef CONFIG_EFI /* diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 75f4c6d..831dbb9 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -12,6 +12,7 @@ #include <linux/kernel_stat.h> #include <linux/regset.h> +#include <linux/compat.h> #include <linux/slab.h> #include <asm/asm.h> #include <asm/cpufeature.h> @@ -20,43 +21,76 @@ #include <asm/user.h> #include <asm/uaccess.h> #include <asm/xsave.h> +#include <asm/smap.h> -extern unsigned int sig_xstate_size; +#ifdef CONFIG_X86_64 +# include <asm/sigcontext32.h> +# include <asm/user32.h> +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern unsigned int mxcsr_feature_mask; extern void fpu_init(void); +extern void eager_fpu_init(void); DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env); + extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, xstateregs_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, xstateregs_set; - /* * xstateregs_active == fpregs_active. Please refer to the comment * at the definition of fpregs_active. */ #define xstateregs_active fpregs_active -extern struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -extern unsigned int sig_xstate_ia32_size; -extern struct _fpx_sw_bytes fx_sw_reserved_ia32; -struct _fpstate_ia32; -struct _xstate_ia32; -extern int save_i387_xstate_ia32(void __user *buf); -extern int restore_i387_xstate_ia32(void __user *buf); -#endif - #ifdef CONFIG_MATH_EMULATION +# define HAVE_HWFP (boot_cpu_data.hard_math) extern void finit_soft_fpu(struct i387_soft_struct *soft); #else +# define HAVE_HWFP 1 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif +static inline int is_ia32_compat_frame(void) +{ + return config_enabled(CONFIG_IA32_EMULATION) && + test_thread_flag(TIF_IA32); +} + +static inline int is_ia32_frame(void) +{ + return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); +} + +static inline int is_x32_frame(void) +{ + return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); +} + #define X87_FSW_ES (1 << 7) /* Exception Summary */ +static __always_inline __pure bool use_eager_fpu(void) +{ + return static_cpu_has(X86_FEATURE_EAGER_FPU); +} + static __always_inline __pure bool use_xsaveopt(void) { return static_cpu_has(X86_FEATURE_XSAVEOPT); @@ -72,6 +106,13 @@ static __always_inline __pure bool use_fxsr(void) return static_cpu_has(X86_FEATURE_FXSR); } +static inline void fx_finit(struct i387_fxsave_struct *fx) +{ + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} + extern void __sanitize_i387_state(struct task_struct *); static inline void sanitize_i387_state(struct task_struct *tsk) @@ -81,131 +122,121 @@ static inline void sanitize_i387_state(struct task_struct *tsk) __sanitize_i387_state(tsk); } -#ifdef CONFIG_X86_64 -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) -{ - int err; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxrstorq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "m" (*fx), "0" (0)); -#else - asm volatile("1: rex64/fxrstor (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err) - : [fx] "R" (fx), "m" (*fx), "0" (0)); -#endif - return err; +#define user_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile(ASM_STAC "\n" \ + "1:" #insn "\n\t" \ + "2: " ASM_CLAC "\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +#define check_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +static inline int fsave_user(struct i387_fsave_struct __user *fx) +{ + return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { - int err; + if (config_enabled(CONFIG_X86_32)) + return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - /* - * Clear the bytes not touched by the fxsave and reserved - * for the SW usage. - */ - err = __clear_user(&fx->sw_reserved, - sizeof(struct _fpx_sw_bytes)); - if (unlikely(err)) - return -EFAULT; - - /* See comment in fxsave() below. */ -#ifdef CONFIG_AS_FXSAVEQ - asm volatile("1: fxsaveq %[fx]\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), [fx] "=m" (*fx) - : "0" (0)); -#else - asm volatile("1: rex64/fxsave (%[fx])\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3: movl $-1,%[err]\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : [err] "=r" (err), "=m" (*fx) - : [fx] "R" (fx), "0" (0)); -#endif - if (unlikely(err) && - __clear_user(fx, sizeof(struct i387_fxsave_struct))) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ - return err; + /* See comment in fpu_fxsave() below. */ + return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } -static inline void fpu_fxsave(struct fpu *fpu) +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { - /* Using "rex64; fxsave %0" is broken because, if the memory operand - uses any extended registers for addressing, a second REX prefix - will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -#ifdef CONFIG_AS_FXSAVEQ - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#else - /* Using, as a workaround, the properly prefixed form below isn't - accepted by any binutils version so far released, complaining that - the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). - asm volatile("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); - This, however, we can work around by forcing the compiler to select - an addressing mode that doesn't require extended registers. */ - asm volatile("rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); -#endif + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); } -#else /* CONFIG_X86_32 */ +static inline int fxrstor_user(struct i387_fxsave_struct __user *fx) +{ + if (config_enabled(CONFIG_X86_32)) + return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); -/* perform fxrstor iff the processor has extended states, otherwise frstor */ -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) + /* See comment in fpu_fxsave() below. */ + return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); +} + +static inline int frstor_checking(struct i387_fsave_struct *fx) { - /* - * The "nop" is needed to make the instructions the same - * length. - */ - alternative_input( - "nop ; frstor %1", - "fxrstor %1", - X86_FEATURE_FXSR, - "m" (*fx)); + return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} - return 0; +static inline int frstor_user(struct i387_fsave_struct __user *fx) +{ + return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline void fpu_fxsave(struct fpu *fpu) { - asm volatile("fxsave %[fx]" - : [fx] "=m" (fpu->state->fxsave)); + if (config_enabled(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + asm volatile("fxsaveq %0" : "=m" (fpu->state->fxsave)); + else { + /* Using "rex64; fxsave %0" is broken because, if the memory + * operand uses any extended registers for addressing, a second + * REX prefix will be generated (to the assembler, rex64 + * followed by semicolon is a separate instruction), and hence + * the 64-bitness is lost. + * + * Using "fxsaveq %0" would be the ideal choice, but is only + * supported starting with gas 2.16. + * + * Using, as a workaround, the properly prefixed form below + * isn't accepted by any binutils version so far released, + * complaining that the same type of prefix is used twice if + * an extended register is needed for addressing (fix submitted + * to mainline 2005-11-21). + * + * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); + * + * This, however, we can work around by forcing the compiler to + * select an addressing mode that doesn't require extended + * registers. + */ + asm volatile( "rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); + } } -#endif /* CONFIG_X86_64 */ - /* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact. @@ -248,17 +279,14 @@ static inline int __save_init_fpu(struct task_struct *tsk) return fpu_save_init(&tsk->thread.fpu); } -static inline int fpu_fxrstor_checking(struct fpu *fpu) -{ - return fxrstor_checking(&fpu->state->fxsave); -} - static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) - return fpu_xrstor_checking(fpu); + return fpu_xrstor_checking(&fpu->state->xsave); + else if (use_fxsr()) + return fxrstor_checking(&fpu->state->fxsave); else - return fpu_fxrstor_checking(fpu); + return frstor_checking(&fpu->state->fsave); } static inline int restore_fpu_checking(struct task_struct *tsk) @@ -310,15 +338,52 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) static inline void __thread_fpu_end(struct task_struct *tsk) { __thread_clear_has_fpu(tsk); - stts(); + if (!use_eager_fpu()) + stts(); } static inline void __thread_fpu_begin(struct task_struct *tsk) { - clts(); + if (!use_eager_fpu()) + clts(); __thread_set_has_fpu(tsk); } +static inline void __drop_fpu(struct task_struct *tsk) +{ + if (__thread_has_fpu(tsk)) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(tsk); + } +} + +static inline void drop_fpu(struct task_struct *tsk) +{ + /* + * Forget coprocessor state.. + */ + preempt_disable(); + tsk->fpu_counter = 0; + __drop_fpu(tsk); + clear_used_math(); + preempt_enable(); +} + +static inline void drop_init_fpu(struct task_struct *tsk) +{ + if (!use_eager_fpu()) + drop_fpu(tsk); + else { + if (use_xsave()) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); + } +} + /* * FPU state switching for scheduling. * @@ -352,7 +417,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta { fpu_switch_t fpu; - fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; + /* + * If the task has used the math, pre-load the FPU on xsave processors + * or if the past 5 consecutive context-switches used math. + */ + fpu.preload = tsk_used_math(new) && (use_eager_fpu() || + new->fpu_counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) cpu = ~0; @@ -364,14 +434,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); - } else + } else if (!use_eager_fpu()) stts(); } else { old->fpu_counter = 0; old->thread.fpu.last_cpu = ~0; if (fpu.preload) { new->fpu_counter++; - if (fpu_lazy_restore(new, cpu)) + if (!use_eager_fpu() && fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -391,44 +461,40 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { if (fpu.preload) { if (unlikely(restore_fpu_checking(new))) - __thread_fpu_end(new); + drop_init_fpu(new); } } /* * Signal frame handlers... */ -extern int save_i387_xstate(void __user *buf); -extern int restore_i387_xstate(void __user *buf); +extern int save_xstate_sig(void __user *buf, void __user *fx, int size); +extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); -static inline void __clear_fpu(struct task_struct *tsk) +static inline int xstate_sigframe_size(void) { - if (__thread_has_fpu(tsk)) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +static inline int restore_xstate_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct i387_fsave_struct); + size += sizeof(struct i387_fsave_struct); } + + return __restore_xstate_sig(buf, buf_fx, size); } /* - * The actual user_fpu_begin/end() functions - * need to be preemption-safe. + * Need to be preemption-safe. * - * NOTE! user_fpu_end() must be used only after you - * have saved the FP state, and user_fpu_begin() must - * be used only immediately before restoring it. - * These functions do not do any save/restore on - * their own. + * NOTE! user_fpu_begin() must be used only immediately before restoring + * it. This function does not do any save/restore on their own. */ -static inline void user_fpu_end(void) -{ - preempt_disable(); - __thread_fpu_end(current); - preempt_enable(); -} - static inline void user_fpu_begin(void) { preempt_disable(); @@ -437,25 +503,32 @@ static inline void user_fpu_begin(void) preempt_enable(); } +static inline void __save_fpu(struct task_struct *tsk) +{ + if (use_xsave()) + xsave_state(&tsk->thread.fpu.state->xsave, -1); + else + fpu_fxsave(&tsk->thread.fpu); +} + /* * These disable preemption on their own and are safe */ static inline void save_init_fpu(struct task_struct *tsk) { WARN_ON_ONCE(!__thread_has_fpu(tsk)); + + if (use_eager_fpu()) { + __save_fpu(tsk); + return; + } + preempt_disable(); __save_init_fpu(tsk); __thread_fpu_end(tsk); preempt_enable(); } -static inline void clear_fpu(struct task_struct *tsk) -{ - preempt_disable(); - __clear_fpu(tsk); - preempt_enable(); -} - /* * i387 state interaction */ @@ -510,11 +583,34 @@ static inline void fpu_free(struct fpu *fpu) } } -static inline void fpu_copy(struct fpu *dst, struct fpu *src) +static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) { - memcpy(dst->state, src->state, xstate_size); + if (use_eager_fpu()) { + memset(&dst->thread.fpu.state->xsave, 0, xstate_size); + __save_fpu(dst); + } else { + struct fpu *dfpu = &dst->thread.fpu; + struct fpu *sfpu = &src->thread.fpu; + + unlazy_fpu(src); + memcpy(dfpu->state, sfpu->state, xstate_size); + } } -extern void fpu_finit(struct fpu *fpu); +static inline unsigned long +alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, + unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct i387_fsave_struct); + sp -= sizeof(struct i387_fsave_struct); + } + + *size = frame_size; + return sp; +} #endif diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b0767bc..9a25b52 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,38 +3,54 @@ #ifdef __ASSEMBLY__ - .macro MCOUNT_SAVE_FRAME - /* taken from glibc */ - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) + /* skip is set if the stack was already partially adjusted */ + .macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) .endm - .macro MCOUNT_RESTORE_FRAME - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp + .macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp .endm #endif #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#ifdef CC_USING_FENTRY +# define MCOUNT_ADDR ((long)(__fentry__)) +#else +# define MCOUNT_ADDR ((long)(mcount)) +#endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#define ARCH_SUPPORTS_FTRACE_SAVE_REGS +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; +extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 71ecbcb..f373046 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -9,10 +9,13 @@ #include <asm/asm.h> #include <asm/errno.h> #include <asm/processor.h> +#include <asm/smap.h> #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ - asm volatile("1:\t" insn "\n" \ - "2:\t.section .fixup,\"ax\"\n" \ + asm volatile("\t" ASM_STAC "\n" \ + "1:\t" insn "\n" \ + "2:\t" ASM_CLAC "\n" \ + "\t.section .fixup,\"ax\"\n" \ "3:\tmov\t%3, %1\n" \ "\tjmp\t2b\n" \ "\t.previous\n" \ @@ -21,12 +24,14 @@ : "i" (-EFAULT), "0" (oparg), "1" (0)) #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ - asm volatile("1:\tmovl %2, %0\n" \ + asm volatile("\t" ASM_STAC "\n" \ + "1:\tmovl %2, %0\n" \ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t1b\n" \ - "3:\t.section .fixup,\"ax\"\n" \ + "3:\t" ASM_CLAC "\n" \ + "\t.section .fixup,\"ax\"\n" \ "4:\tmov\t%5, %1\n" \ "\tjmp\t3b\n" \ "\t.previous\n" \ @@ -122,8 +127,10 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" - "2:\t.section .fixup, \"ax\"\n" + asm volatile("\t" ASM_STAC "\n" + "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" + "2:\t" ASM_CLAC "\n" + "\t.section .fixup, \"ax\"\n" "3:\tmov %3, %0\n" "\tjmp 2b\n" "\t.previous\n" diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index d3895db..81f04ce 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -18,6 +18,10 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; + /* + * irq_tlb_count is double-counted in irq_call_count, so it must be + * subtracted from irq_call_count when displaying irq_call_count + */ unsigned int irq_tlb_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 439a9ac..bdd35db 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -90,4 +90,8 @@ static inline void arch_release_hugepage(struct page *page) { } +static inline void arch_clear_hugepage_flags(struct page *page) +{ +} + #endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 257d9cc..ed8089d6 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -19,12 +19,37 @@ struct pt_regs; struct user_i387_struct; extern int init_fpu(struct task_struct *child); +extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); extern bool irq_fpu_usable(void); -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); + +/* + * Careful: __kernel_fpu_begin/end() must be called with preempt disabled + * and they don't touch the preempt state on their own. + * If you enable preemption after __kernel_fpu_begin(), preempt notifier + * should call the __kernel_fpu_end() to prevent the kernel/user FPU + * state from getting corrupted. KVM for example uses this model. + * + * All other cases use kernel_fpu_begin/end() which disable preemption + * during kernel FPU usage. + */ +extern void __kernel_fpu_begin(void); +extern void __kernel_fpu_end(void); + +static inline void kernel_fpu_begin(void) +{ + WARN_ON_ONCE(!irq_fpu_usable()); + preempt_disable(); + __kernel_fpu_begin(); +} + +static inline void kernel_fpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} /* * Some instructions like VIA's padlock instructions generate a spurious diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index b04cbdb..e623277 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -86,73 +86,6 @@ struct stat64 { unsigned long long st_ino; } __attribute__((packed)); -typedef struct compat_siginfo { - int si_signo; - int si_errno; - int si_code; - - union { - int _pad[((128 / sizeof(int)) - 3)]; - - /* kill() */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - } _kill; - - /* POSIX.1b timers */ - struct { - compat_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ - compat_sigval_t _sigval; /* same as below */ - int _sys_private; /* not to be passed to user */ - int _overrun_incr; /* amount to add to overrun */ - } _timer; - - /* POSIX.1b signals */ - struct { - unsigned int _pid; /* sender's pid */ - unsigned int _uid; /* sender's uid */ - compat_sigval_t _sigval; - } _rt; - - /* SIGCHLD */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_clock_t _utime; - compat_clock_t _stime; - } _sigchld; - - /* SIGCHLD (x32 version) */ - struct { - unsigned int _pid; /* which child */ - unsigned int _uid; /* sender's uid */ - int _status; /* exit code */ - compat_s64 _utime; - compat_s64 _stime; - } _sigchld_x32; - - /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ - struct { - unsigned int _addr; /* faulting insn/memory ref. */ - } _sigfault; - - /* SIGPOLL */ - struct { - int _band; /* POLL_IN, POLL_OUT, POLL_MSG */ - int _fd; - } _sigpoll; - - struct { - unsigned int _call_addr; /* calling insn */ - int _syscall; /* triggering system call number */ - unsigned int _arch; /* AUDIT_ARCH_* of syscall */ - } _sigsys; - } _sifields; -} compat_siginfo_t; - #define IA32_STACK_TOP IA32_PAGE_OFFSET #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/iommu_table.h b/arch/x86/include/asm/iommu_table.h index f229b13..f42a047 100644 --- a/arch/x86/include/asm/iommu_table.h +++ b/arch/x86/include/asm/iommu_table.h @@ -48,7 +48,7 @@ struct iommu_table_entry { #define __IOMMU_INIT(_detect, _depend, _early_init, _late_init, _finish)\ - static const struct iommu_table_entry const \ + static const struct iommu_table_entry \ __iommu_entry_##_detect __used \ __attribute__ ((unused, __section__(".iommu_table"), \ aligned((sizeof(void *))))) \ @@ -63,10 +63,10 @@ struct iommu_table_entry { * to stop detecting the other IOMMUs after yours has been detected. */ #define IOMMU_INIT_POST(_detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 0) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 0) #define IOMMU_INIT_POST_FINISH(detect) \ - __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, 0, 0, 1) + __IOMMU_INIT(_detect, pci_swiotlb_detect_4gb, NULL, NULL, 1) /* * A more sophisticated version of IOMMU_INIT. This variant requires: diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 5478825..d3ddd17 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,6 +27,7 @@ #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT +#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index 246617e..a65ec29 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -9,6 +9,22 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define DE_VECTOR 0 +#define DB_VECTOR 1 +#define BP_VECTOR 3 +#define OF_VECTOR 4 +#define BR_VECTOR 5 +#define UD_VECTOR 6 +#define NM_VECTOR 7 +#define DF_VECTOR 8 +#define TS_VECTOR 10 +#define NP_VECTOR 11 +#define SS_VECTOR 12 +#define GP_VECTOR 13 +#define PF_VECTOR 14 +#define MF_VECTOR 16 +#define MC_VECTOR 18 + /* Select x86 specific features in <linux/kvm.h> */ #define __KVM_HAVE_PIT #define __KVM_HAVE_IOAPIC @@ -25,6 +41,7 @@ #define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_XSAVE #define __KVM_HAVE_XCRS +#define __KVM_HAVE_READONLY_MEM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index c764f43..15f960c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -86,6 +86,19 @@ struct x86_instruction_info { struct x86_emulate_ops { /* + * read_gpr: read a general purpose register (rax - r15) + * + * @reg: gpr number. + */ + ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg); + /* + * write_gpr: write a general purpose register (rax - r15) + * + * @reg: gpr number. + * @val: value to write. + */ + void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val); + /* * read_std: Read bytes of standard (non-emulated/special) memory. * Used for descriptor reading. * @addr: [IN ] Linear address from which to read. @@ -200,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t; /* Type, address-of, and value of an instruction's operand. */ struct operand { - enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; + enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type; unsigned int bytes; + unsigned int count; union { unsigned long orig_val; u64 orig_val64; @@ -221,6 +235,7 @@ struct operand { char valptr[sizeof(unsigned long) + 2]; sse128_t vec_val; u64 mm_val; + void *data; }; }; @@ -236,14 +251,23 @@ struct read_cache { unsigned long end; }; +/* Execution mode, passed to the emulator. */ +enum x86emul_mode { + X86EMUL_MODE_REAL, /* Real mode. */ + X86EMUL_MODE_VM86, /* Virtual 8086 mode. */ + X86EMUL_MODE_PROT16, /* 16-bit protected mode. */ + X86EMUL_MODE_PROT32, /* 32-bit protected mode. */ + X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */ +}; + struct x86_emulate_ctxt { - struct x86_emulate_ops *ops; + const struct x86_emulate_ops *ops; /* Register state before/after emulation. */ unsigned long eflags; unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ - int mode; + enum x86emul_mode mode; /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; @@ -281,8 +305,10 @@ struct x86_emulate_ctxt { bool rip_relative; unsigned long _eip; struct operand memop; + u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */ + u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */ /* Fields above regs are cleared together. */ - unsigned long regs[NR_VCPU_REGS]; + unsigned long _regs[NR_VCPU_REGS]; struct operand *memopp; struct fetch_cache fetch; struct read_cache io_read; @@ -293,17 +319,6 @@ struct x86_emulate_ctxt { #define REPE_PREFIX 0xf3 #define REPNE_PREFIX 0xf2 -/* Execution mode, passed to the emulator. */ -#define X86EMUL_MODE_REAL 0 /* Real mode. */ -#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */ -#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */ -#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */ -#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ - -/* any protected mode */ -#define X86EMUL_MODE_PROT (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \ - X86EMUL_MODE_PROT64) - /* CPUID vendors */ #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 @@ -394,4 +409,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); +void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); +void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); + #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09155d6..b2e11f4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -75,22 +75,6 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define DE_VECTOR 0 -#define DB_VECTOR 1 -#define BP_VECTOR 3 -#define OF_VECTOR 4 -#define BR_VECTOR 5 -#define UD_VECTOR 6 -#define NM_VECTOR 7 -#define DF_VECTOR 8 -#define TS_VECTOR 10 -#define NP_VECTOR 11 -#define SS_VECTOR 12 -#define GP_VECTOR 13 -#define PF_VECTOR 14 -#define MF_VECTOR 16 -#define MC_VECTOR 18 - #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 @@ -287,10 +271,24 @@ struct kvm_mmu { union kvm_mmu_page_role base_role; bool direct_map; + /* + * Bitmap; bit set = permission fault + * Byte index: page fault error code [4:1] + * Bit index: pte permissions in ACC_* format + */ + u8 permissions[16]; + u64 *pae_root; u64 *lm_root; u64 rsvd_bits_mask[2][4]; + /* + * Bitmap: bit set = last pte in walk + * index[0:1]: level (zero-based) + * index[2]: pte.ps + */ + u8 last_pte_bitmap; + bool nx; u64 pdptrs[4]; /* pae */ @@ -414,12 +412,15 @@ struct kvm_vcpu_arch { struct x86_emulate_ctxt emulate_ctxt; bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; + int (*complete_userspace_io)(struct kvm_vcpu *vcpu); gpa_t time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; unsigned int time_offset; struct page *time_page; + /* set guest stopped flag in pvclock flags field */ + bool pvclock_set_guest_stopped_request; struct { u64 msr_val; @@ -454,6 +455,7 @@ struct kvm_vcpu_arch { unsigned long dr6; unsigned long dr7; unsigned long eff_db[KVM_NR_DB_REGS]; + unsigned long guest_debug_dr7; u64 mcg_cap; u64 mcg_status; @@ -500,14 +502,24 @@ struct kvm_vcpu_arch { }; struct kvm_lpage_info { - unsigned long rmap_pde; int write_count; }; struct kvm_arch_memory_slot { + unsigned long *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; +struct kvm_apic_map { + struct rcu_head rcu; + u8 ldr_bits; + /* fields bellow are used to decode ldr values in different modes */ + u32 cid_shift, cid_mask, lid_mask; + struct kvm_lapic *phys_map[256]; + /* first index is cluster id second is cpu id in a cluster */ + struct kvm_lapic *logical_map[16][16]; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -525,6 +537,8 @@ struct kvm_arch { struct kvm_ioapic *vioapic; struct kvm_pit *vpit; int vapics_in_nmi_mode; + struct mutex apic_map_lock; + struct kvm_apic_map *apic_map; unsigned int tss_addr; struct page *apic_access_page; @@ -618,8 +632,7 @@ struct kvm_x86_ops { void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - void (*set_guest_debug)(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *dbg); + void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); @@ -957,6 +970,7 @@ extern bool kvm_rebooting; #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2f7712e..eb3e9d8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -102,21 +102,21 @@ struct kvm_vcpu_pv_apf_data { extern void kvmclock_init(void); extern int kvm_register_clock(char *txt); -#ifdef CONFIG_KVM_CLOCK +#ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else static inline bool kvm_check_and_clear_guest_paused(void) { return false; } -#endif /* CONFIG_KVMCLOCK */ +#endif /* CONFIG_KVM_GUEST */ /* This instruction is vmcall. On non-VT architectures, it will generate a * trap that we will then rewrite to the appropriate instruction. */ #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" -/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun +/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the * instructions are guaranteed to be supported. * diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a3ac52b..54d73b1 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -116,19 +116,9 @@ struct mce_log { /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 - -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) /* MCE_AMD */ -#define K8_MCE_THRESHOLD_BANK_0 (MCE_THRESHOLD_BASE + 0 * 9) -#define K8_MCE_THRESHOLD_BANK_1 (MCE_THRESHOLD_BASE + 1 * 9) -#define K8_MCE_THRESHOLD_BANK_2 (MCE_THRESHOLD_BASE + 2 * 9) -#define K8_MCE_THRESHOLD_BANK_3 (MCE_THRESHOLD_BASE + 3 * 9) -#define K8_MCE_THRESHOLD_BANK_4 (MCE_THRESHOLD_BASE + 4 * 9) -#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) -#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) - +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) #ifdef __KERNEL__ - extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); @@ -171,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device); #ifdef CONFIG_X86_MCE_INTEL extern int mce_cmci_disabled; extern int mce_ignore_ce; +extern int mce_bios_cmci_threshold; void mce_intel_feature_init(struct cpuinfo_x86 *c); void cmci_clear(void); void cmci_reenable(void); diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 4ebe157..43d921b 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -15,8 +15,8 @@ struct microcode_ops { enum ucode_state (*request_microcode_user) (int cpu, const void __user *buf, size_t size); - enum ucode_state (*request_microcode_fw) (int cpu, - struct device *device); + enum ucode_state (*request_microcode_fw) (int cpu, struct device *, + bool refresh_fw); void (*microcode_fini_cpu) (int cpu); @@ -49,12 +49,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void) #ifdef CONFIG_MICROCODE_AMD extern struct microcode_ops * __init init_amd_microcode(void); extern void __exit exit_amd_microcode(void); - -static inline void get_ucode_data(void *to, const u8 *from, size_t n) -{ - memcpy(to, from, n); -} - #else static inline struct microcode_ops * __init init_amd_microcode(void) { diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h index 64217ea..d497bc4 100644 --- a/arch/x86/include/asm/mmzone.h +++ b/arch/x86/include/asm/mmzone.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "mmzone_32.h" +# include <asm/mmzone_32.h> #else -# include "mmzone_64.h" +# include <asm/mmzone_64.h> #endif diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 957ec87..7f0edce 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -121,6 +121,11 @@ #define MSR_P6_EVNTSEL0 0x00000186 #define MSR_P6_EVNTSEL1 0x00000187 +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + /* AMD64 MSRs. Not complete. See the architecture manual for a more complete list. */ @@ -248,6 +253,9 @@ #define MSR_IA32_PERF_STATUS 0x00000198 #define MSR_IA32_PERF_CTL 0x00000199 +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_IA32_MPERF 0x000000e7 #define MSR_IA32_APERF 0x000000e8 diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h index a731b9c..7d3a482 100644 --- a/arch/x86/include/asm/mutex.h +++ b/arch/x86/include/asm/mutex.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "mutex_32.h" +# include <asm/mutex_32.h> #else -# include "mutex_64.h" +# include <asm/mutex_64.h> #endif diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index bfacd2c..49119fc 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -53,9 +53,9 @@ static inline int numa_cpu_node(int cpu) #endif /* CONFIG_NUMA */ #ifdef CONFIG_X86_32 -# include "numa_32.h" +# include <asm/numa_32.h> #else -# include "numa_64.h" +# include <asm/numa_64.h> #endif #ifdef CONFIG_NUMA diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index df75d07..6e41b93 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -141,7 +141,7 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq); #endif /* __KERNEL__ */ #ifdef CONFIG_X86_64 -#include "pci_64.h" +#include <asm/pci_64.h> #endif /* implement the pci_ DMA API in terms of the generic device dma_ one */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index cb4e43b..4fabcdf 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { } static inline void amd_pmu_disable_virt(void) { } #endif +#define arch_perf_out_copy_user copy_from_user_nmi + #endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h new file mode 100644 index 0000000..3f2207b --- /dev/null +++ b/arch/x86/include/asm/perf_regs.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_PERF_REGS_H +#define _ASM_X86_PERF_REGS_H + +enum perf_event_x86_regs { + PERF_REG_X86_AX, + PERF_REG_X86_BX, + PERF_REG_X86_CX, + PERF_REG_X86_DX, + PERF_REG_X86_SI, + PERF_REG_X86_DI, + PERF_REG_X86_BP, + PERF_REG_X86_SP, + PERF_REG_X86_IP, + PERF_REG_X86_FLAGS, + PERF_REG_X86_CS, + PERF_REG_X86_SS, + PERF_REG_X86_DS, + PERF_REG_X86_ES, + PERF_REG_X86_FS, + PERF_REG_X86_GS, + PERF_REG_X86_R8, + PERF_REG_X86_R9, + PERF_REG_X86_R10, + PERF_REG_X86_R11, + PERF_REG_X86_R12, + PERF_REG_X86_R13, + PERF_REG_X86_R14, + PERF_REG_X86_R15, + + PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, + PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, +}; +#endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 49afb3f..a1f780d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) static inline int pmd_large(pmd_t pte) { - return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == - (_PAGE_PSE | _PAGE_PRESENT); + return pmd_flags(pte) & _PAGE_PSE; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -384,9 +383,9 @@ pte_t *populate_extra_pte(unsigned long vaddr); #endif /* __ASSEMBLY__ */ #ifdef CONFIG_X86_32 -# include "pgtable_32.h" +# include <asm/pgtable_32.h> #else -# include "pgtable_64.h" +# include <asm/pgtable_64.h> #endif #ifndef __ASSEMBLY__ @@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte) static inline int pmd_present(pmd_t pmd) { - return pmd_flags(pmd) & _PAGE_PRESENT; + /* + * Checking for _PAGE_PSE is needed too because + * split_huge_page will temporarily clear the present bit (but + * the _PAGE_PSE flag will remain set at all times while the + * _PAGE_PRESENT bit is clear). + */ + return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE); } static inline int pmd_none(pmd_t pmd) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 0c92113..8faa215 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -71,6 +71,7 @@ do { \ * tables contain all the necessary information. */ #define update_mmu_cache(vma, address, ptep) do { } while (0) +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 8251be0..47356f9 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -143,6 +143,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define pte_unmap(pte) ((void)(pte))/* NOP */ #define update_mmu_cache(vma, address, ptep) do { } while (0) +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) /* Encode and de-code a swap entry */ #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 013286a1..ec8a1fc 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -174,9 +174,9 @@ #endif #ifdef CONFIG_X86_32 -# include "pgtable_32_types.h" +# include <asm/pgtable_32_types.h> #else -# include "pgtable_64_types.h" +# include <asm/pgtable_64_types.h> #endif #ifndef __ASSEMBLY__ @@ -303,11 +303,9 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 -extern void native_pagetable_setup_start(pgd_t *base); -extern void native_pagetable_setup_done(pgd_t *base); +extern void native_pagetable_init(void); #else -#define native_pagetable_setup_start x86_init_pgd_noop -#define native_pagetable_setup_done x86_init_pgd_noop +#define native_pagetable_init paging_init #endif struct seq_file; diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h index 7ef7c30..bad3665 100644 --- a/arch/x86/include/asm/posix_types.h +++ b/arch/x86/include/asm/posix_types.h @@ -1,15 +1,15 @@ #ifdef __KERNEL__ # ifdef CONFIG_X86_32 -# include "posix_types_32.h" +# include <asm/posix_types_32.h> # else -# include "posix_types_64.h" +# include <asm/posix_types_64.h> # endif #else # ifdef __i386__ -# include "posix_types_32.h" +# include <asm/posix_types_32.h> # elif defined(__ILP32__) -# include "posix_types_x32.h" +# include <asm/posix_types_x32.h> # else -# include "posix_types_64.h" +# include <asm/posix_types_64.h> # endif #endif diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index aea1d1d..680cf09 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h @@ -65,6 +65,7 @@ #define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ #define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ #define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ +#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */ /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d048cad..ad1fc85 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); DECLARE_PER_CPU(unsigned int, irq_count); -extern unsigned long kernel_eflags; extern asmlinkage void ignore_sysret(void); #else /* X86_64 */ #ifdef CONFIG_CC_STACKPROTECTOR @@ -589,11 +588,6 @@ typedef struct { } mm_segment_t; -/* - * create a kernel thread without removing it from tasklists - */ -extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); - /* Free all resources held by a thread. */ extern void release_thread(struct task_struct *); @@ -759,6 +753,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +extern void set_task_blockstep(struct task_struct *task, bool on); + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h new file mode 100644 index 0000000..d1ac07a --- /dev/null +++ b/arch/x86/include/asm/rcu.h @@ -0,0 +1,32 @@ +#ifndef _ASM_X86_RCU_H +#define _ASM_X86_RCU_H + +#ifndef __ASSEMBLY__ + +#include <linux/rcupdate.h> +#include <asm/ptrace.h> + +static inline void exception_enter(struct pt_regs *regs) +{ + rcu_user_exit(); +} + +static inline void exception_exit(struct pt_regs *regs) +{ +#ifdef CONFIG_RCU_USER_QS + if (user_mode(regs)) + rcu_user_enter(); +#endif +} + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_RCU_USER_QS +# define SCHEDULE_USER call schedule_user +#else +# define SCHEDULE_USER call schedule +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h index c62e58a..0f3d7f0 100644 --- a/arch/x86/include/asm/seccomp.h +++ b/arch/x86/include/asm/seccomp.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "seccomp_32.h" +# include <asm/seccomp_32.h> #else -# include "seccomp_64.h" +# include <asm/seccomp_64.h> #endif diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 598457c..323973f 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -31,6 +31,10 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +#ifndef CONFIG_COMPAT +typedef sigset_t compat_sigset_t; +#endif + #else /* Here we must cater to libcs that poke about in kernel headers. */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h new file mode 100644 index 0000000..8d3120f --- /dev/null +++ b/arch/x86/include/asm/smap.h @@ -0,0 +1,91 @@ +/* + * Supervisor Mode Access Prevention support + * + * Copyright (C) 2012 Intel Corporation + * Author: H. Peter Anvin <hpa@linux.intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#ifndef _ASM_X86_SMAP_H +#define _ASM_X86_SMAP_H + +#include <linux/stringify.h> +#include <asm/nops.h> +#include <asm/cpufeature.h> + +/* "Raw" instruction opcodes */ +#define __ASM_CLAC .byte 0x0f,0x01,0xca +#define __ASM_STAC .byte 0x0f,0x01,0xcb + +#ifdef __ASSEMBLY__ + +#include <asm/alternative-asm.h> + +#ifdef CONFIG_X86_SMAP + +#define ASM_CLAC \ + 661: ASM_NOP3 ; \ + .pushsection .altinstr_replacement, "ax" ; \ + 662: __ASM_CLAC ; \ + .popsection ; \ + .pushsection .altinstructions, "a" ; \ + altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ + .popsection + +#define ASM_STAC \ + 661: ASM_NOP3 ; \ + .pushsection .altinstr_replacement, "ax" ; \ + 662: __ASM_STAC ; \ + .popsection ; \ + .pushsection .altinstructions, "a" ; \ + altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \ + .popsection + +#else /* CONFIG_X86_SMAP */ + +#define ASM_CLAC +#define ASM_STAC + +#endif /* CONFIG_X86_SMAP */ + +#else /* __ASSEMBLY__ */ + +#include <asm/alternative.h> + +#ifdef CONFIG_X86_SMAP + +static __always_inline void clac(void) +{ + /* Note: a barrier is implicit in alternative() */ + alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP); +} + +static __always_inline void stac(void) +{ + /* Note: a barrier is implicit in alternative() */ + alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP); +} + +/* These macros can be used in asm() statements */ +#define ASM_CLAC \ + ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP) +#define ASM_STAC \ + ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP) + +#else /* CONFIG_X86_SMAP */ + +static inline void clac(void) { } +static inline void stac(void) { } + +#define ASM_CLAC +#define ASM_STAC + +#endif /* CONFIG_X86_SMAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_SMAP_H */ diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h index 6dfd6d9..09224d7 100644 --- a/arch/x86/include/asm/string.h +++ b/arch/x86/include/asm/string.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "string_32.h" +# include <asm/string_32.h> #else -# include "string_64.h" +# include <asm/string_64.h> #endif diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h index 9bd521f..2fab6c2 100644 --- a/arch/x86/include/asm/suspend.h +++ b/arch/x86/include/asm/suspend.h @@ -1,5 +1,5 @@ #ifdef CONFIG_X86_32 -# include "suspend_32.h" +# include <asm/suspend_32.h> #else -# include "suspend_64.h" +# include <asm/suspend_64.h> #endif diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f2b83bc..cdf5674 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -1,6 +1,135 @@ #ifndef __SVM_H #define __SVM_H +#define SVM_EXIT_READ_CR0 0x000 +#define SVM_EXIT_READ_CR3 0x003 +#define SVM_EXIT_READ_CR4 0x004 +#define SVM_EXIT_READ_CR8 0x008 +#define SVM_EXIT_WRITE_CR0 0x010 +#define SVM_EXIT_WRITE_CR3 0x013 +#define SVM_EXIT_WRITE_CR4 0x014 +#define SVM_EXIT_WRITE_CR8 0x018 +#define SVM_EXIT_READ_DR0 0x020 +#define SVM_EXIT_READ_DR1 0x021 +#define SVM_EXIT_READ_DR2 0x022 +#define SVM_EXIT_READ_DR3 0x023 +#define SVM_EXIT_READ_DR4 0x024 +#define SVM_EXIT_READ_DR5 0x025 +#define SVM_EXIT_READ_DR6 0x026 +#define SVM_EXIT_READ_DR7 0x027 +#define SVM_EXIT_WRITE_DR0 0x030 +#define SVM_EXIT_WRITE_DR1 0x031 +#define SVM_EXIT_WRITE_DR2 0x032 +#define SVM_EXIT_WRITE_DR3 0x033 +#define SVM_EXIT_WRITE_DR4 0x034 +#define SVM_EXIT_WRITE_DR5 0x035 +#define SVM_EXIT_WRITE_DR6 0x036 +#define SVM_EXIT_WRITE_DR7 0x037 +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_INTR 0x060 +#define SVM_EXIT_NMI 0x061 +#define SVM_EXIT_SMI 0x062 +#define SVM_EXIT_INIT 0x063 +#define SVM_EXIT_VINTR 0x064 +#define SVM_EXIT_CR0_SEL_WRITE 0x065 +#define SVM_EXIT_IDTR_READ 0x066 +#define SVM_EXIT_GDTR_READ 0x067 +#define SVM_EXIT_LDTR_READ 0x068 +#define SVM_EXIT_TR_READ 0x069 +#define SVM_EXIT_IDTR_WRITE 0x06a +#define SVM_EXIT_GDTR_WRITE 0x06b +#define SVM_EXIT_LDTR_WRITE 0x06c +#define SVM_EXIT_TR_WRITE 0x06d +#define SVM_EXIT_RDTSC 0x06e +#define SVM_EXIT_RDPMC 0x06f +#define SVM_EXIT_PUSHF 0x070 +#define SVM_EXIT_POPF 0x071 +#define SVM_EXIT_CPUID 0x072 +#define SVM_EXIT_RSM 0x073 +#define SVM_EXIT_IRET 0x074 +#define SVM_EXIT_SWINT 0x075 +#define SVM_EXIT_INVD 0x076 +#define SVM_EXIT_PAUSE 0x077 +#define SVM_EXIT_HLT 0x078 +#define SVM_EXIT_INVLPG 0x079 +#define SVM_EXIT_INVLPGA 0x07a +#define SVM_EXIT_IOIO 0x07b +#define SVM_EXIT_MSR 0x07c +#define SVM_EXIT_TASK_SWITCH 0x07d +#define SVM_EXIT_FERR_FREEZE 0x07e +#define SVM_EXIT_SHUTDOWN 0x07f +#define SVM_EXIT_VMRUN 0x080 +#define SVM_EXIT_VMMCALL 0x081 +#define SVM_EXIT_VMLOAD 0x082 +#define SVM_EXIT_VMSAVE 0x083 +#define SVM_EXIT_STGI 0x084 +#define SVM_EXIT_CLGI 0x085 +#define SVM_EXIT_SKINIT 0x086 +#define SVM_EXIT_RDTSCP 0x087 +#define SVM_EXIT_ICEBP 0x088 +#define SVM_EXIT_WBINVD 0x089 +#define SVM_EXIT_MONITOR 0x08a +#define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d +#define SVM_EXIT_NPF 0x400 + +#define SVM_EXIT_ERR -1 + +#define SVM_EXIT_REASONS \ + { SVM_EXIT_READ_CR0, "read_cr0" }, \ + { SVM_EXIT_READ_CR3, "read_cr3" }, \ + { SVM_EXIT_READ_CR4, "read_cr4" }, \ + { SVM_EXIT_READ_CR8, "read_cr8" }, \ + { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ + { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ + { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ + { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ + { SVM_EXIT_READ_DR0, "read_dr0" }, \ + { SVM_EXIT_READ_DR1, "read_dr1" }, \ + { SVM_EXIT_READ_DR2, "read_dr2" }, \ + { SVM_EXIT_READ_DR3, "read_dr3" }, \ + { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ + { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ + { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ + { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ + { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ + { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ + { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ + { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ + { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ + { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ + { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ + { SVM_EXIT_INTR, "interrupt" }, \ + { SVM_EXIT_NMI, "nmi" }, \ + { SVM_EXIT_SMI, "smi" }, \ + { SVM_EXIT_INIT, "init" }, \ + { SVM_EXIT_VINTR, "vintr" }, \ + { SVM_EXIT_CPUID, "cpuid" }, \ + { SVM_EXIT_INVD, "invd" }, \ + { SVM_EXIT_HLT, "hlt" }, \ + { SVM_EXIT_INVLPG, "invlpg" }, \ + { SVM_EXIT_INVLPGA, "invlpga" }, \ + { SVM_EXIT_IOIO, "io" }, \ + { SVM_EXIT_MSR, "msr" }, \ + { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ + { SVM_EXIT_SHUTDOWN, "shutdown" }, \ + { SVM_EXIT_VMRUN, "vmrun" }, \ + { SVM_EXIT_VMMCALL, "hypercall" }, \ + { SVM_EXIT_VMLOAD, "vmload" }, \ + { SVM_EXIT_VMSAVE, "vmsave" }, \ + { SVM_EXIT_STGI, "stgi" }, \ + { SVM_EXIT_CLGI, "clgi" }, \ + { SVM_EXIT_SKINIT, "skinit" }, \ + { SVM_EXIT_WBINVD, "wbinvd" }, \ + { SVM_EXIT_MONITOR, "monitor" }, \ + { SVM_EXIT_MWAIT, "mwait" }, \ + { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_NPF, "npf" } + +#ifdef __KERNEL__ + enum { INTERCEPT_INTR, INTERCEPT_NMI, @@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFO_REG_MASK 0x0F -#define SVM_EXIT_READ_CR0 0x000 -#define SVM_EXIT_READ_CR3 0x003 -#define SVM_EXIT_READ_CR4 0x004 -#define SVM_EXIT_READ_CR8 0x008 -#define SVM_EXIT_WRITE_CR0 0x010 -#define SVM_EXIT_WRITE_CR3 0x013 -#define SVM_EXIT_WRITE_CR4 0x014 -#define SVM_EXIT_WRITE_CR8 0x018 -#define SVM_EXIT_READ_DR0 0x020 -#define SVM_EXIT_READ_DR1 0x021 -#define SVM_EXIT_READ_DR2 0x022 -#define SVM_EXIT_READ_DR3 0x023 -#define SVM_EXIT_READ_DR4 0x024 -#define SVM_EXIT_READ_DR5 0x025 -#define SVM_EXIT_READ_DR6 0x026 -#define SVM_EXIT_READ_DR7 0x027 -#define SVM_EXIT_WRITE_DR0 0x030 -#define SVM_EXIT_WRITE_DR1 0x031 -#define SVM_EXIT_WRITE_DR2 0x032 -#define SVM_EXIT_WRITE_DR3 0x033 -#define SVM_EXIT_WRITE_DR4 0x034 -#define SVM_EXIT_WRITE_DR5 0x035 -#define SVM_EXIT_WRITE_DR6 0x036 -#define SVM_EXIT_WRITE_DR7 0x037 -#define SVM_EXIT_EXCP_BASE 0x040 -#define SVM_EXIT_INTR 0x060 -#define SVM_EXIT_NMI 0x061 -#define SVM_EXIT_SMI 0x062 -#define SVM_EXIT_INIT 0x063 -#define SVM_EXIT_VINTR 0x064 -#define SVM_EXIT_CR0_SEL_WRITE 0x065 -#define SVM_EXIT_IDTR_READ 0x066 -#define SVM_EXIT_GDTR_READ 0x067 -#define SVM_EXIT_LDTR_READ 0x068 -#define SVM_EXIT_TR_READ 0x069 -#define SVM_EXIT_IDTR_WRITE 0x06a -#define SVM_EXIT_GDTR_WRITE 0x06b -#define SVM_EXIT_LDTR_WRITE 0x06c -#define SVM_EXIT_TR_WRITE 0x06d -#define SVM_EXIT_RDTSC 0x06e -#define SVM_EXIT_RDPMC 0x06f -#define SVM_EXIT_PUSHF 0x070 -#define SVM_EXIT_POPF 0x071 -#define SVM_EXIT_CPUID 0x072 -#define SVM_EXIT_RSM 0x073 -#define SVM_EXIT_IRET 0x074 -#define SVM_EXIT_SWINT 0x075 -#define SVM_EXIT_INVD 0x076 -#define SVM_EXIT_PAUSE 0x077 -#define SVM_EXIT_HLT 0x078 -#define SVM_EXIT_INVLPG 0x079 -#define SVM_EXIT_INVLPGA 0x07a -#define SVM_EXIT_IOIO 0x07b -#define SVM_EXIT_MSR 0x07c -#define SVM_EXIT_TASK_SWITCH 0x07d -#define SVM_EXIT_FERR_FREEZE 0x07e -#define SVM_EXIT_SHUTDOWN 0x07f -#define SVM_EXIT_VMRUN 0x080 -#define SVM_EXIT_VMMCALL 0x081 -#define SVM_EXIT_VMLOAD 0x082 -#define SVM_EXIT_VMSAVE 0x083 -#define SVM_EXIT_STGI 0x084 -#define SVM_EXIT_CLGI 0x085 -#define SVM_EXIT_SKINIT 0x086 -#define SVM_EXIT_RDTSCP 0x087 -#define SVM_EXIT_ICEBP 0x088 -#define SVM_EXIT_WBINVD 0x089 -#define SVM_EXIT_MONITOR 0x08a -#define SVM_EXIT_MWAIT 0x08b -#define SVM_EXIT_MWAIT_COND 0x08c -#define SVM_EXIT_XSETBV 0x08d -#define SVM_EXIT_NPF 0x400 - -#define SVM_EXIT_ERR -1 - #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda" @@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb { #endif +#endif diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3fda9db4..a9a8cf3 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -40,7 +40,7 @@ asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); asmlinkage long sys32_alarm(unsigned int); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); asmlinkage long sys32_sysfs(int, u32, u32); asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, @@ -54,8 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *, - compat_uptr_t __user *, struct pt_regs *); asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); long sys32_lseek(unsigned int, int, unsigned int); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index f1d8b44..2be0b88 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -25,7 +25,7 @@ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); long sys_execve(const char __user *, const char __user *const __user *, - const char __user *const __user *, struct pt_regs *); + const char __user *const __user *); long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 89f794f..2d946e6 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,7 +79,6 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_IRET 5 /* force IRET */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -89,6 +88,7 @@ struct thread_info { #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_FORK 18 /* ret_from_fork */ +#define TIF_NOHZ 19 /* in adaptive nohz mode */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_DEBUG 21 /* uses debug registers */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ @@ -104,7 +104,6 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_IRET (1 << TIF_IRET) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -114,6 +113,7 @@ struct thread_info { #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_DEBUG (1 << TIF_DEBUG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) @@ -126,12 +126,13 @@ struct thread_info { /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ - _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) + _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* work to do in syscall_trace_leave() */ #define _TIF_WORK_SYSCALL_EXIT \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \ - _TIF_SYSCALL_TRACEPOINT) + _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -141,7 +142,8 @@ struct thread_info { /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK \ - ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT) + ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \ + _TIF_NOHZ) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index e1f3a17..7ccf8d1 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -9,6 +9,7 @@ #include <linux/string.h> #include <asm/asm.h> #include <asm/page.h> +#include <asm/smap.h> #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -192,9 +193,10 @@ extern int __get_user_bad(void); #ifdef CONFIG_X86_32 #define __put_user_asm_u64(x, addr, err, errret) \ - asm volatile("1: movl %%eax,0(%2)\n" \ + asm volatile(ASM_STAC "\n" \ + "1: movl %%eax,0(%2)\n" \ "2: movl %%edx,4(%2)\n" \ - "3:\n" \ + "3: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "4: movl %3,%0\n" \ " jmp 3b\n" \ @@ -205,9 +207,10 @@ extern int __get_user_bad(void); : "A" (x), "r" (addr), "i" (errret), "0" (err)) #define __put_user_asm_ex_u64(x, addr) \ - asm volatile("1: movl %%eax,0(%1)\n" \ + asm volatile(ASM_STAC "\n" \ + "1: movl %%eax,0(%1)\n" \ "2: movl %%edx,4(%1)\n" \ - "3:\n" \ + "3: " ASM_CLAC "\n" \ _ASM_EXTABLE_EX(1b, 2b) \ _ASM_EXTABLE_EX(2b, 3b) \ : : "A" (x), "r" (addr)) @@ -379,8 +382,9 @@ do { \ } while (0) #define __get_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %2,%"rtype"1\n" \ - "2:\n" \ + asm volatile(ASM_STAC "\n" \ + "1: mov"itype" %2,%"rtype"1\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " xor"itype" %"rtype"1,%"rtype"1\n" \ @@ -443,8 +447,9 @@ struct __large_struct { unsigned long buf[100]; }; * aliasing issues. */ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errret) \ - asm volatile("1: mov"itype" %"rtype"1,%2\n" \ - "2:\n" \ + asm volatile(ASM_STAC "\n" \ + "1: mov"itype" %"rtype"1,%2\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: mov %3,%0\n" \ " jmp 2b\n" \ @@ -463,13 +468,13 @@ struct __large_struct { unsigned long buf[100]; }; * uaccess_try and catch */ #define uaccess_try do { \ - int prev_err = current_thread_info()->uaccess_err; \ current_thread_info()->uaccess_err = 0; \ + stac(); \ barrier(); #define uaccess_catch(err) \ + clac(); \ (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0); \ - current_thread_info()->uaccess_err = prev_err; \ } while (0) /** @@ -569,6 +574,9 @@ strncpy_from_user(char *dst, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); extern __must_check long strnlen_user(const char __user *str, long n); +unsigned long __must_check clear_user(void __user *mem, unsigned long len); +unsigned long __must_check __clear_user(void __user *mem, unsigned long len); + /* * movsl can be slow when source and dest are not both 8-byte aligned */ @@ -581,9 +589,9 @@ extern struct movsl_mask { #define ARCH_HAS_NOCACHE_UACCESS 1 #ifdef CONFIG_X86_32 -# include "uaccess_32.h" +# include <asm/uaccess_32.h> #else -# include "uaccess_64.h" +# include <asm/uaccess_64.h> #endif #endif /* _ASM_X86_UACCESS_H */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 576e39b..7f760a9 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,7 +213,4 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -unsigned long __must_check clear_user(void __user *mem, unsigned long len); -unsigned long __must_check __clear_user(void __user *mem, unsigned long len); - #endif /* _ASM_X86_UACCESS_32_H */ diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index d8def8b..142810c 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -217,9 +217,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check unsigned long clear_user(void __user *mem, unsigned long len); -__must_check unsigned long __clear_user(void __user *mem, unsigned long len); - static __must_check __always_inline int __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) { diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 0d9776e..16f3fc6 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -50,6 +50,7 @@ # define __ARCH_WANT_SYS_TIME # define __ARCH_WANT_SYS_UTIME # define __ARCH_WANT_SYS_WAITPID +# define __ARCH_WANT_SYS_EXECVE /* * "Conditional" syscalls diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h index f3971bb..8ff8be7 100644 --- a/arch/x86/include/asm/uprobes.h +++ b/arch/x86/include/asm/uprobes.h @@ -42,10 +42,11 @@ struct arch_uprobe { }; struct arch_uprobe_task { - unsigned long saved_trap_nr; #ifdef CONFIG_X86_64 unsigned long saved_scratch_register; #endif + unsigned int saved_trap_nr; + unsigned int saved_tf; }; extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h index 24532c7..ccab4af 100644 --- a/arch/x86/include/asm/user.h +++ b/arch/x86/include/asm/user.h @@ -2,9 +2,9 @@ #define _ASM_X86_USER_H #ifdef CONFIG_X86_32 -# include "user_32.h" +# include <asm/user_32.h> #else -# include "user_64.h" +# include <asm/user_64.h> #endif #include <asm/types.h> diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index bb05228..fddb53d 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -11,7 +11,8 @@ extern const char VDSO32_PRELINK[]; #define VDSO32_SYMBOL(base, name) \ ({ \ extern const char VDSO32_##name[]; \ - (void *)(VDSO32_##name - VDSO32_PRELINK + (unsigned long)(base)); \ + (void __user *)(VDSO32_##name - VDSO32_PRELINK + \ + (unsigned long)(base)); \ }) #endif diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 8b38be2..46e24d3 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -17,8 +17,8 @@ struct vsyscall_gtod_data { /* open coded 'struct timespec' */ time_t wall_time_sec; - u32 wall_time_nsec; - u32 monotonic_time_nsec; + u64 wall_time_snsec; + u64 monotonic_time_snsec; time_t monotonic_time_sec; struct timezone sys_tz; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 74fcb96..36ec21c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -25,6 +25,88 @@ * */ +#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 + +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 + +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_INVPCID 58 + +#define VMX_EXIT_REASONS \ + { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ + { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ + { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ + { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ + { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ + { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ + { EXIT_REASON_CPUID, "CPUID" }, \ + { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVLPG, "INVLPG" }, \ + { EXIT_REASON_RDPMC, "RDPMC" }, \ + { EXIT_REASON_RDTSC, "RDTSC" }, \ + { EXIT_REASON_VMCALL, "VMCALL" }, \ + { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ + { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ + { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ + { EXIT_REASON_VMPTRST, "VMPTRST" }, \ + { EXIT_REASON_VMREAD, "VMREAD" }, \ + { EXIT_REASON_VMRESUME, "VMRESUME" }, \ + { EXIT_REASON_VMWRITE, "VMWRITE" }, \ + { EXIT_REASON_VMOFF, "VMOFF" }, \ + { EXIT_REASON_VMON, "VMON" }, \ + { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ + { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ + { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ + { EXIT_REASON_MSR_READ, "MSR_READ" }, \ + { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ + { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ + { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ + { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ + { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ + { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ + { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ + { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ + { EXIT_REASON_WBINVD, "WBINVD" } + +#ifdef __KERNEL__ + #include <linux/types.h> /* @@ -241,49 +323,6 @@ enum vmcs_field { HOST_RIP = 0x00006c16, }; -#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 - -#define EXIT_REASON_EXCEPTION_NMI 0 -#define EXIT_REASON_EXTERNAL_INTERRUPT 1 -#define EXIT_REASON_TRIPLE_FAULT 2 - -#define EXIT_REASON_PENDING_INTERRUPT 7 -#define EXIT_REASON_NMI_WINDOW 8 -#define EXIT_REASON_TASK_SWITCH 9 -#define EXIT_REASON_CPUID 10 -#define EXIT_REASON_HLT 12 -#define EXIT_REASON_INVD 13 -#define EXIT_REASON_INVLPG 14 -#define EXIT_REASON_RDPMC 15 -#define EXIT_REASON_RDTSC 16 -#define EXIT_REASON_VMCALL 18 -#define EXIT_REASON_VMCLEAR 19 -#define EXIT_REASON_VMLAUNCH 20 -#define EXIT_REASON_VMPTRLD 21 -#define EXIT_REASON_VMPTRST 22 -#define EXIT_REASON_VMREAD 23 -#define EXIT_REASON_VMRESUME 24 -#define EXIT_REASON_VMWRITE 25 -#define EXIT_REASON_VMOFF 26 -#define EXIT_REASON_VMON 27 -#define EXIT_REASON_CR_ACCESS 28 -#define EXIT_REASON_DR_ACCESS 29 -#define EXIT_REASON_IO_INSTRUCTION 30 -#define EXIT_REASON_MSR_READ 31 -#define EXIT_REASON_MSR_WRITE 32 -#define EXIT_REASON_INVALID_STATE 33 -#define EXIT_REASON_MWAIT_INSTRUCTION 36 -#define EXIT_REASON_MONITOR_INSTRUCTION 39 -#define EXIT_REASON_PAUSE_INSTRUCTION 40 -#define EXIT_REASON_MCE_DURING_VMENTRY 41 -#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 -#define EXIT_REASON_APIC_ACCESS 44 -#define EXIT_REASON_EPT_VIOLATION 48 -#define EXIT_REASON_EPT_MISCONFIG 49 -#define EXIT_REASON_WBINVD 54 -#define EXIT_REASON_XSETBV 55 -#define EXIT_REASON_INVPCID 58 - /* * Interruption-information format */ @@ -488,3 +527,5 @@ enum vm_instruction_error_number { }; #endif + +#endif diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 38155f6..5769349 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -81,12 +81,13 @@ struct x86_init_mapping { /** * struct x86_init_paging - platform specific paging functions - * @pagetable_setup_start: platform specific pre paging_init() call - * @pagetable_setup_done: platform specific post paging_init() call + * @pagetable_init: platform specific paging initialization call to setup + * the kernel pagetables and prepare accessors functions. + * Callback must call paging_init(). Called once after the + * direct mapping for phys memory is available. */ struct x86_init_paging { - void (*pagetable_setup_start)(pgd_t *base); - void (*pagetable_setup_done)(pgd_t *base); + void (*pagetable_init)(void); }; /** diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index cbf0c9d..54d52ff 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -47,16 +47,22 @@ #endif #ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the public interface + * with Xen so that on ARM we can have one ABI that works for 32 and 64 + * bit guests. */ +typedef unsigned long xen_pfn_t; +#define PRI_xen_pfn "lx" +typedef unsigned long xen_ulong_t; +#define PRI_xen_ulong "lx" /* Guest handles for primitive C types. */ __DEFINE_GUEST_HANDLE(uchar, unsigned char); __DEFINE_GUEST_HANDLE(uint, unsigned int); -__DEFINE_GUEST_HANDLE(ulong, unsigned long); DEFINE_GUEST_HANDLE(char); DEFINE_GUEST_HANDLE(int); -DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); DEFINE_GUEST_HANDLE(uint32_t); +DEFINE_GUEST_HANDLE(xen_pfn_t); #endif #ifndef HYPERVISOR_VIRT_START @@ -116,11 +122,13 @@ struct arch_shared_info { #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_32 -#include "interface_32.h" +#include <asm/xen/interface_32.h> #else -#include "interface_64.h" +#include <asm/xen/interface_64.h> #endif +#include <asm/pvclock-abi.h> + #ifndef __ASSEMBLY__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h index 1be1ab7..ee52fca 100644 --- a/arch/x86/include/asm/xen/swiotlb-xen.h +++ b/arch/x86/include/asm/xen/swiotlb-xen.h @@ -5,10 +5,12 @@ extern int xen_swiotlb; extern int __init pci_xen_swiotlb_detect(void); extern void __init pci_xen_swiotlb_init(void); +extern int pci_xen_swiotlb_init_late(void); #else #define xen_swiotlb (0) static inline int __init pci_xen_swiotlb_detect(void) { return 0; } static inline void __init pci_xen_swiotlb_init(void) { } +static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; } #endif #endif /* _ASM_X86_SWIOTLB_XEN_H */ diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 7fcf6f3..f8fde90 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -3,8 +3,8 @@ # include <asm-generic/xor.h> #else #ifdef CONFIG_X86_32 -# include "xor_32.h" +# include <asm/xor_32.h> #else -# include "xor_64.h" +# include <asm/xor_64.h> #endif #endif diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 4545708..f79cb7e 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = { * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile( \ - "movups %%xmm0,(%0) ;\n\t" \ - "movups %%xmm1,0x10(%0) ;\n\t" \ - "movups %%xmm2,0x20(%0) ;\n\t" \ - "movups %%xmm3,0x30(%0) ;\n\t" \ - : \ - : "r" (xmm_save) \ - : "memory"); \ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%0),%%xmm0 ;\n\t" \ - "movups 0x10(%0),%%xmm1 ;\n\t" \ - "movups 0x20(%0),%%xmm2 ;\n\t" \ - "movups 0x30(%0),%%xmm3 ;\n\t" \ - : \ - : "r" (xmm_save) \ - : "memory"); \ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0) - -#define ALIGN16 __attribute__((aligned(16))) - #define OFFS(x) "16*("#x")" #define PF_OFFS(x) "256+16*("#x")" #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" @@ -587,10 +555,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned long lines = bytes >> 8; - char xmm_save[16*4] ALIGN16; - int cr0; - XMMS_SAVE; + kernel_fpu_begin(); /* Make sure GCC forgets anything it knows about p4 or p5, such that it won't pass to the asm volatile below a @@ -850,7 +810,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, like assuming they have some legal value. */ asm("" : "=r" (p4), "=r" (p5)); - XMMS_RESTORE; + kernel_fpu_end(); } static struct xor_block_template xor_block_pIII_sse = { @@ -862,7 +822,7 @@ static struct xor_block_template xor_block_pIII_sse = { }; /* Also try the AVX routines */ -#include "xor_avx.h" +#include <asm/xor_avx.h> /* Also try the generic routines. */ #include <asm-generic/xor.h> diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index b9b2323..87ac522 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h @@ -34,41 +34,7 @@ * no advantages to be gotten from x86-64 here anyways. */ -typedef struct { - unsigned long a, b; -} __attribute__((aligned(16))) xmm_store_t; - -/* Doesn't use gcc to save the XMM registers, because there is no easy way to - tell it to do a clts before the register saving. */ -#define XMMS_SAVE \ -do { \ - preempt_disable(); \ - asm volatile( \ - "movq %%cr0,%0 ;\n\t" \ - "clts ;\n\t" \ - "movups %%xmm0,(%1) ;\n\t" \ - "movups %%xmm1,0x10(%1) ;\n\t" \ - "movups %%xmm2,0x20(%1) ;\n\t" \ - "movups %%xmm3,0x30(%1) ;\n\t" \ - : "=&r" (cr0) \ - : "r" (xmm_save) \ - : "memory"); \ -} while (0) - -#define XMMS_RESTORE \ -do { \ - asm volatile( \ - "sfence ;\n\t" \ - "movups (%1),%%xmm0 ;\n\t" \ - "movups 0x10(%1),%%xmm1 ;\n\t" \ - "movups 0x20(%1),%%xmm2 ;\n\t" \ - "movups 0x30(%1),%%xmm3 ;\n\t" \ - "movq %0,%%cr0 ;\n\t" \ - : \ - : "r" (cr0), "r" (xmm_save) \ - : "memory"); \ - preempt_enable(); \ -} while (0) +#include <asm/i387.h> #define OFFS(x) "16*("#x")" #define PF_OFFS(x) "256+16*("#x")" @@ -91,10 +57,8 @@ static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) { unsigned int lines = bytes >> 8; - unsigned long cr0; - xmm_store_t xmm_save[4]; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -135,7 +99,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -143,11 +107,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK #define BLOCK(i) \ @@ -194,7 +155,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -202,10 +163,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -261,7 +220,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : [inc] "r" (256UL) : "memory" ); - XMMS_RESTORE; + kernel_fpu_end(); } static void @@ -269,10 +228,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { unsigned int lines = bytes >> 8; - xmm_store_t xmm_save[4]; - unsigned long cr0; - XMMS_SAVE; + kernel_fpu_begin(); asm volatile( #undef BLOCK @@ -336,7 +293,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, : [inc] "r" (256UL) : "memory"); - XMMS_RESTORE; + kernel_fpu_end(); } static struct xor_block_template xor_block_sse = { @@ -349,7 +306,7 @@ static struct xor_block_template xor_block_sse = { /* Also try the AVX routines */ -#include "xor_avx.h" +#include <asm/xor_avx.h> #undef XOR_TRY_TEMPLATES #define XOR_TRY_TEMPLATES \ diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 2510d35..7ea79c5 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -20,32 +20,6 @@ #include <linux/compiler.h> #include <asm/i387.h> -#define ALIGN32 __aligned(32) - -#define YMM_SAVED_REGS 4 - -#define YMMS_SAVE \ -do { \ - preempt_disable(); \ - cr0 = read_cr0(); \ - clts(); \ - asm volatile("vmovaps %%ymm0, %0" : "=m" (ymm_save[0]) : : "memory"); \ - asm volatile("vmovaps %%ymm1, %0" : "=m" (ymm_save[32]) : : "memory"); \ - asm volatile("vmovaps %%ymm2, %0" : "=m" (ymm_save[64]) : : "memory"); \ - asm volatile("vmovaps %%ymm3, %0" : "=m" (ymm_save[96]) : : "memory"); \ -} while (0); - -#define YMMS_RESTORE \ -do { \ - asm volatile("sfence" : : : "memory"); \ - asm volatile("vmovaps %0, %%ymm3" : : "m" (ymm_save[96])); \ - asm volatile("vmovaps %0, %%ymm2" : : "m" (ymm_save[64])); \ - asm volatile("vmovaps %0, %%ymm1" : : "m" (ymm_save[32])); \ - asm volatile("vmovaps %0, %%ymm0" : : "m" (ymm_save[0])); \ - write_cr0(cr0); \ - preempt_enable(); \ -} while (0); - #define BLOCK4(i) \ BLOCK(32 * i, 0) \ BLOCK(32 * (i + 1), 1) \ @@ -60,10 +34,9 @@ do { \ static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -82,16 +55,15 @@ do { \ p1 = (unsigned long *)((uintptr_t)p1 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -113,16 +85,15 @@ do { \ p2 = (unsigned long *)((uintptr_t)p2 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2, unsigned long *p3) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -147,16 +118,15 @@ do { \ p3 = (unsigned long *)((uintptr_t)p3 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { - unsigned long cr0, lines = bytes >> 9; - char ymm_save[32 * YMM_SAVED_REGS] ALIGN32; + unsigned long lines = bytes >> 9; - YMMS_SAVE + kernel_fpu_begin(); while (lines--) { #undef BLOCK @@ -184,7 +154,7 @@ do { \ p4 = (unsigned long *)((uintptr_t)p4 + 512); } - YMMS_RESTORE + kernel_fpu_end(); } static struct xor_block_template xor_block_avx = { diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 8a1b6f9..0415cda 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -34,17 +34,14 @@ extern unsigned int xstate_size; extern u64 pcntxt_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; +extern struct xsave_struct *init_xstate_buf; extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); extern int init_fpu(struct task_struct *child); -extern int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *sw); -static inline int fpu_xrstor_checking(struct fpu *fpu) +static inline int fpu_xrstor_checking(struct xsave_struct *fx) { - struct xsave_struct *fx = &fpu->state->xsave; int err; asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" @@ -69,13 +66,13 @@ static inline int xsave_user(struct xsave_struct __user *buf) * Clear the xsave header first, so that reserved fields are * initialized to zero. */ - err = __clear_user(&buf->xsave_hdr, - sizeof(struct xsave_hdr_struct)); + err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); if (unlikely(err)) return -EFAULT; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" - "2:\n" + __asm__ __volatile__(ASM_STAC "\n" + "1: .byte " REX_PREFIX "0x0f,0xae,0x27\n" + "2: " ASM_CLAC "\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" " jmp 2b\n" @@ -84,9 +81,6 @@ static inline int xsave_user(struct xsave_struct __user *buf) : [err] "=r" (err) : "D" (buf), "a" (-1), "d" (-1), "0" (0) : "memory"); - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - /* No need to clear here because the caller clears USED_MATH */ return err; } @@ -97,8 +91,9 @@ static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) u32 lmask = mask; u32 hmask = mask >> 32; - __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" - "2:\n" + __asm__ __volatile__(ASM_STAC "\n" + "1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n" + "2: " ASM_CLAC "\n" ".section .fixup,\"ax\"\n" "3: movl $-1,%[err]\n" " jmp 2b\n" diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild new file mode 100644 index 0000000..83b6e9a --- /dev/null +++ b/arch/x86/include/uapi/asm/Kbuild @@ -0,0 +1,6 @@ +# UAPI Header export list +include include/uapi/asm-generic/Kbuild.asm + +genhdr-y += unistd_32.h +genhdr-y += unistd_64.h +genhdr-y += unistd_x32.h diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e56..91ce48f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,7 +23,7 @@ obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o -obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o +obj-$(CONFIG_X86_32) += i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-y += syscall_$(BITS).o obj-$(CONFIG_X86_64) += vsyscall_64.o @@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o -obj-$(CONFIG_KVM_GUEST) += kvm.o -obj-$(CONFIG_KVM_CLOCK) += kvmclock.o +obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o @@ -100,6 +99,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e5..e651f7a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) acpi_register_lapic(physid, ACPI_MADT_ENABLED); /* - * If mp_register_lapic successfully generates a new logical cpu + * If acpi_register_lapic successfully generates a new logical cpu * number, then the following will get us exactly what was mapped */ cpumask_andnot(new_map, cpu_present_mask, tmp_map); diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1b8e5a0..11676cf 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -43,17 +43,22 @@ int acpi_suspend_lowlevel(void) header->video_mode = saved_video_mode; + header->pmode_behavior = 0; + #ifndef CONFIG_64BIT store_gdt((struct desc_ptr *)&header->pmode_gdt); - if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, - &header->pmode_efer_high)) - header->pmode_efer_low = header->pmode_efer_high = 0; + if (!rdmsr_safe(MSR_EFER, + &header->pmode_efer_low, + &header->pmode_efer_high)) + header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER); #endif /* !CONFIG_64BIT */ header->pmode_cr0 = read_cr0(); - header->pmode_cr4 = read_cr4_safe(); - header->pmode_behavior = 0; + if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { + header->pmode_cr4 = read_cr4(); + header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); + } if (!rdmsr_safe(MSR_IA32_MISC_ENABLE, &header->pmode_misc_en_low, &header->pmode_misc_en_high)) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ced4534..ef5ccca 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -23,19 +23,6 @@ #define MAX_PATCH_LEN (255-1) -#ifdef CONFIG_HOTPLUG_CPU -static int smp_alt_once; - -static int __init bootonly(char *str) -{ - smp_alt_once = 1; - return 1; -} -__setup("smp-alt-boot", bootonly); -#else -#define smp_alt_once 1 -#endif - static int __initdata_or_module debug_alternative; static int __init debug_alt(char *str) @@ -317,7 +304,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end, /* turn DS segment override prefix into lock prefix */ if (*ptr == 0x3e) text_poke(ptr, ((unsigned char []){0xf0}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, { const s32 *poff; - if (noreplace_smp) - return; - mutex_lock(&text_mutex); for (poff = start; poff < end; poff++) { u8 *ptr = (u8 *)poff + *poff; @@ -338,7 +322,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end, /* turn lock prefix into DS segment override prefix */ if (*ptr == 0xf0) text_poke(ptr, ((unsigned char []){0x3E}), 1); - }; + } mutex_unlock(&text_mutex); } @@ -359,7 +343,7 @@ struct smp_alt_module { }; static LIST_HEAD(smp_alt_modules); static DEFINE_MUTEX(smp_alt); -static int smp_mode = 1; /* protected by smp_alt */ +static bool uniproc_patched = false; /* protected by smp_alt */ void __init_or_module alternatives_smp_module_add(struct module *mod, char *name, @@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, { struct smp_alt_module *smp; - if (noreplace_smp) - return; + mutex_lock(&smp_alt); + if (!uniproc_patched) + goto unlock; - if (smp_alt_once) { - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(locks, locks_end, - text, text_end); - return; - } + if (num_possible_cpus() == 1) + /* Don't bother remembering, we'll never have to undo it. */ + goto smp_unlock; smp = kzalloc(sizeof(*smp), GFP_KERNEL); if (NULL == smp) - return; /* we'll run the (safe but slow) SMP code then ... */ + /* we'll run the (safe but slow) SMP code then ... */ + goto unlock; smp->mod = mod; smp->name = name; @@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod, __func__, smp->locks, smp->locks_end, smp->text, smp->text_end, smp->name); - mutex_lock(&smp_alt); list_add_tail(&smp->next, &smp_alt_modules); - if (boot_cpu_has(X86_FEATURE_UP)) - alternatives_smp_unlock(smp->locks, smp->locks_end, - smp->text, smp->text_end); +smp_unlock: + alternatives_smp_unlock(locks, locks_end, text, text_end); +unlock: mutex_unlock(&smp_alt); } @@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod) { struct smp_alt_module *item; - if (smp_alt_once || noreplace_smp) - return; - mutex_lock(&smp_alt); list_for_each_entry(item, &smp_alt_modules, next) { if (mod != item->mod) continue; list_del(&item->next); - mutex_unlock(&smp_alt); - DPRINTK("%s: %s\n", __func__, item->name); kfree(item); - return; + break; } mutex_unlock(&smp_alt); } -bool skip_smp_alternatives; -void alternatives_smp_switch(int smp) +void alternatives_enable_smp(void) { struct smp_alt_module *mod; @@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp) pr_info("lockdep: fixing up alternatives\n"); #endif - if (noreplace_smp || smp_alt_once || skip_smp_alternatives) - return; - BUG_ON(!smp && (num_online_cpus() > 1)); + /* Why bother if there are no other CPUs? */ + BUG_ON(num_possible_cpus() == 1); mutex_lock(&smp_alt); - /* - * Avoid unnecessary switches because it forces JIT based VMs to - * throw away all cached translations, which can be quite costly. - */ - if (smp == smp_mode) { - /* nothing */ - } else if (smp) { + if (uniproc_patched) { pr_info("switching to SMP code\n"); + BUG_ON(num_online_cpus() != 1); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); list_for_each_entry(mod, &smp_alt_modules, next) alternatives_smp_lock(mod->locks, mod->locks_end, mod->text, mod->text_end); - } else { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - list_for_each_entry(mod, &smp_alt_modules, next) - alternatives_smp_unlock(mod->locks, mod->locks_end, - mod->text, mod->text_end); + uniproc_patched = false; } - smp_mode = smp; mutex_unlock(&smp_alt); } @@ -540,40 +503,22 @@ void __init alternative_instructions(void) apply_alternatives(__alt_instructions, __alt_instructions_end); - /* switch to patch-once-at-boottime-only mode and free the - * tables in case we know the number of CPUs will never ever - * change */ -#ifdef CONFIG_HOTPLUG_CPU - if (num_possible_cpus() < 2) - smp_alt_once = 1; -#endif - #ifdef CONFIG_SMP - if (smp_alt_once) { - if (1 == num_possible_cpus()) { - pr_info("switching to UP code\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); - set_cpu_cap(&cpu_data(0), X86_FEATURE_UP); - - alternatives_smp_unlock(__smp_locks, __smp_locks_end, - _text, _etext); - } - } else { + /* Patch to UP if other cpus not imminent. */ + if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { + uniproc_patched = true; alternatives_smp_module_add(NULL, "core kernel", __smp_locks, __smp_locks_end, _text, _etext); - - /* Only switch to UP mode if we don't immediately boot others */ - if (num_present_cpus() == 1 || setup_max_cpus <= 1) - alternatives_smp_switch(0); } -#endif - apply_paravirt(__parainstructions, __parainstructions_end); - if (smp_alt_once) + if (!uniproc_patched || num_possible_cpus() == 1) free_init_pages("SMP alternatives", (unsigned long)__smp_locks, (unsigned long)__smp_locks_end); +#endif + + apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 24deb30..b17416e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs) apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); i++; v1 >>= 1; - }; + } apic_printk(APIC_DEBUG, KERN_CONT "\n"); diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index bc552cf..a65829a 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -30,7 +30,7 @@ static int numachip_system __read_mostly; -static struct apic apic_numachip __read_mostly; +static const struct apic apic_numachip __read_mostly; static unsigned int get_apic_id(unsigned long x) { @@ -199,7 +199,7 @@ static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -static struct apic apic_numachip __refconst = { +static const struct apic apic_numachip __refconst = { .name = "NumaConnect system", .probe = numachip_probe, diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c265593..1817fa9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2257,6 +2257,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; cfg = irq_cfg(irq); + if (!cfg) + continue; + raw_spin_lock(&desc->lock); /* diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 68de2dc..2861082 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -69,4 +69,7 @@ void common(void) { OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); OFFSET(BP_pref_address, boot_params, hdr.pref_address); OFFSET(BP_code32_start, boot_params, hdr.code32_start); + + BLANK(); + DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); } diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index d30a6a9..a0e067d 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o +obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9d92e19..f7e98a2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -737,6 +737,72 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, } #endif +static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) +{ + if (!cpu_has_invlpg) + return; + + tlb_flushall_shift = 5; + + if (c->x86 <= 0x11) + tlb_flushall_shift = 4; +} + +static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c) +{ + u32 ebx, eax, ecx, edx; + u16 mask = 0xfff; + + if (c->x86 < 0xf) + return; + + if (c->extended_cpuid_level < 0x80000006) + return; + + cpuid(0x80000006, &eax, &ebx, &ecx, &edx); + + tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask; + tlb_lli_4k[ENTRIES] = ebx & mask; + + /* + * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB + * characteristics from the CPUID function 0x80000005 instead. + */ + if (c->x86 == 0xf) { + cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + mask = 0xff; + } + + /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ + if (!((eax >> 16) & mask)) { + u32 a, b, c, d; + + cpuid(0x80000005, &a, &b, &c, &d); + tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; + } else { + tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; + } + + /* a 4M entry uses two 2M entries */ + tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; + + /* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ + if (!(eax & mask)) { + /* Erratum 658 */ + if (c->x86 == 0x15 && c->x86_model <= 0x1f) { + tlb_lli_2m[ENTRIES] = 1024; + } else { + cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + tlb_lli_2m[ENTRIES] = eax & 0xff; + } + } else + tlb_lli_2m[ENTRIES] = eax & mask; + + tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1; + + cpu_set_tlb_flushall_shift(c); +} + static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_vendor = "AMD", .c_ident = { "AuthenticAMD" }, @@ -756,6 +822,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_size_cache = amd_size_cache, #endif .c_early_init = early_init_amd, + .c_detect_tlb = cpu_detect_tlb_amd, .c_bsp_init = bsp_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c97bb7b..d0e910d 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -165,10 +165,15 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); #endif check_config(); - check_fpu(); check_hlt(); check_popad(); init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); + + /* + * kernel_fpu_begin/end() in check_fpu() relies on the patched + * alternative instructions. + */ + check_fpu(); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a5fbc3c..7505f7b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -259,23 +259,36 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } #endif -static int disable_smep __cpuinitdata; static __init int setup_disable_smep(char *arg) { - disable_smep = 1; + setup_clear_cpu_cap(X86_FEATURE_SMEP); return 1; } __setup("nosmep", setup_disable_smep); -static __cpuinit void setup_smep(struct cpuinfo_x86 *c) +static __always_inline void setup_smep(struct cpuinfo_x86 *c) { - if (cpu_has(c, X86_FEATURE_SMEP)) { - if (unlikely(disable_smep)) { - setup_clear_cpu_cap(X86_FEATURE_SMEP); - clear_in_cr4(X86_CR4_SMEP); - } else - set_in_cr4(X86_CR4_SMEP); - } + if (cpu_has(c, X86_FEATURE_SMEP)) + set_in_cr4(X86_CR4_SMEP); +} + +static __init int setup_disable_smap(char *arg) +{ + setup_clear_cpu_cap(X86_FEATURE_SMAP); + return 1; +} +__setup("nosmap", setup_disable_smap); + +static __always_inline void setup_smap(struct cpuinfo_x86 *c) +{ + unsigned long eflags; + + /* This should have been cleared long ago */ + raw_local_save_flags(eflags); + BUG_ON(eflags & X86_EFLAGS_AC); + + if (cpu_has(c, X86_FEATURE_SMAP)) + set_in_cr4(X86_CR4_SMAP); } /* @@ -476,7 +489,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ - "tlb_flushall_shift is 0x%x\n", + "tlb_flushall_shift: %d\n", tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], @@ -712,8 +725,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) c->cpu_index = 0; filter_cpuid_features(c, false); - setup_smep(c); - if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); } @@ -798,8 +809,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) c->phys_proc_id = c->initial_apicid; } - setup_smep(c); - get_model_name(c); /* Default name */ detect_nopl(c); @@ -864,6 +873,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); + /* Set up SMEP/SMAP */ + setup_smep(c); + setup_smap(c); + /* * The vendor-specific functions might have changed features. * Now we do "generic changes." @@ -942,8 +955,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - if (boot_cpu_data.cpuid_level >= 2) - cpu_detect_tlb(&boot_cpu_data); + cpu_detect_tlb(&boot_cpu_data); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) @@ -1023,14 +1035,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT "%s ", vendor); if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + printk(KERN_CONT "%s", strim(c->x86_model_id)); else printk(KERN_CONT "%d86", c->x86); + printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model); + if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT " stepping %02x\n", c->x86_mask); + printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask); else - printk(KERN_CONT "\n"); + printk(KERN_CONT ")\n"); print_cpu_msr(c); } @@ -1113,11 +1127,10 @@ void syscall_init(void) /* Flags to clear on syscall */ wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); + X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| + X86_EFLAGS_IOPL|X86_EFLAGS_AC); } -unsigned long kernel_eflags; - /* * Copies of the original ist values from the tss are only accessed during * debugging, no special alignment required. @@ -1297,9 +1310,6 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); - xsave_init(); - - raw_local_save_flags(kernel_eflags); if (is_uv_system()) uv_cpu_init(); @@ -1352,6 +1362,5 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); - xsave_init(); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0a4ce29..198e019 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) int i, j, n; unsigned int regs[4]; unsigned char *desc = (unsigned char *)regs; + + if (c->cpuid_level < 2) + return; + /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 9a7c90d..93c5451 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -991,7 +991,7 @@ static struct attribute ** __cpuinit amd_l3_attrs(void) if (attrs) return attrs; - n = sizeof (default_attrs) / sizeof (struct attribute *); + n = ARRAY_SIZE(default_attrs); if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) n += 2; diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index fc4beb3..ddc72f8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) } static cpumask_var_t mce_inject_cpumask; +static DEFINE_MUTEX(mce_inject_mutex); static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs) { @@ -194,7 +195,11 @@ static void raise_mce(struct mce *m) put_online_cpus(); } else #endif + { + preempt_disable(); raise_local(); + preempt_enable(); + } } /* Error injection interface */ @@ -225,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, * so do it a jiffie or two later everywhere. */ schedule_timeout(2); + + mutex_lock(&mce_inject_mutex); raise_mce(&m); + mutex_unlock(&mce_inject_mutex); return usize; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index ed44c8a..6a05c1d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -28,6 +28,18 @@ extern int mce_ser; extern struct mce_bank *mce_banks; +#ifdef CONFIG_X86_MCE_INTEL +unsigned long mce_intel_adjust_timer(unsigned long interval); +void mce_intel_cmci_poll(void); +void mce_intel_hcpu_update(unsigned long cpu); +#else +# define mce_intel_adjust_timer mce_adjust_timer_default +static inline void mce_intel_cmci_poll(void) { } +static inline void mce_intel_hcpu_update(unsigned long cpu) { } +#endif + +void mce_timer_kick(unsigned long interval); + #ifdef CONFIG_ACPI_APEI int apei_write_mce(struct mce *m); ssize_t apei_read_mce(struct mce *m, u64 *record_id); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 292d025..46cbf86 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly; int mce_cmci_disabled __read_mostly; int mce_ignore_ce __read_mostly; int mce_ser __read_mostly; +int mce_bios_cmci_threshold __read_mostly; struct mce_bank *mce_banks __read_mostly; @@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); +static unsigned long mce_adjust_timer_default(unsigned long interval) +{ + return interval; +} + +static unsigned long (*mce_adjust_timer)(unsigned long interval) = + mce_adjust_timer_default; + static void mce_timer_fn(unsigned long data) { struct timer_list *t = &__get_cpu_var(mce_timer); @@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data) if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); + mce_intel_cmci_poll(); } /* @@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data) * polling interval, otherwise increase the polling interval. */ iv = __this_cpu_read(mce_next_interval); - if (mce_notify_irq()) + if (mce_notify_irq()) { iv = max(iv / 2, (unsigned long) HZ/100); - else + } else { iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); + iv = mce_adjust_timer(iv); + } __this_cpu_write(mce_next_interval, iv); + /* Might have become 0 after CMCI storm subsided */ + if (iv) { + t->expires = jiffies + iv; + add_timer_on(t, smp_processor_id()); + } +} - t->expires = jiffies + iv; - add_timer_on(t, smp_processor_id()); +/* + * Ensure that the timer is firing in @interval from now. + */ +void mce_timer_kick(unsigned long interval) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned long when = jiffies + interval; + unsigned long iv = __this_cpu_read(mce_next_interval); + + if (timer_pending(t)) { + if (time_before(when, t->expires)) + mod_timer_pinned(t, when); + } else { + t->expires = round_jiffies(when); + add_timer_on(t, smp_processor_id()); + } + if (interval < iv) + __this_cpu_write(mce_next_interval, interval); } /* Must not be called in IRQ context where del_timer_sync() can deadlock */ @@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: mce_intel_feature_init(c); + mce_adjust_timer = mce_intel_adjust_timer; break; case X86_VENDOR_AMD: mce_amd_feature_init(c); @@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } -static void __mcheck_cpu_init_timer(void) +static void mce_start_timer(unsigned int cpu, struct timer_list *t) { - struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long iv = check_interval * HZ; + unsigned long iv = mce_adjust_timer(check_interval * HZ); - setup_timer(t, mce_timer_fn, smp_processor_id()); + __this_cpu_write(mce_next_interval, iv); - if (mce_ignore_ce) + if (mce_ignore_ce || !iv) return; - __this_cpu_write(mce_next_interval, iv); - if (!iv) - return; t->expires = round_jiffies(jiffies + iv); add_timer_on(t, smp_processor_id()); } +static void __mcheck_cpu_init_timer(void) +{ + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned int cpu = smp_processor_id(); + + setup_timer(t, mce_timer_fn, cpu); + mce_start_timer(cpu, t); +} + /* Handle unconfigured int18 (should never happen) */ static void unexpected_machine_check(struct pt_regs *regs, long error_code) { @@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = { * check, or 0 to not wait * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. * mce=nobootlog Don't log MCEs from before booting. + * mce=bios_cmci_threshold Don't program the CMCI threshold */ static int __init mcheck_enable(char *str) { @@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str) mce_ignore_ce = 1; else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) mce_bootlog = (str[0] == 'b'); + else if (!strcmp(str, "bios_cmci_threshold")) + mce_bios_cmci_threshold = 1; else if (isdigit(str[0])) { get_option(&str, &tolerant); if (*str == ',') { @@ -2294,38 +2337,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) unsigned int cpu = (unsigned long)hcpu; struct timer_list *t = &per_cpu(mce_timer, cpu); - switch (action) { + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: mce_device_create(cpu); if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); break; case CPU_DEAD: - case CPU_DEAD_FROZEN: if (threshold_cpu_callback) threshold_cpu_callback(action, cpu); mce_device_remove(cpu); + mce_intel_hcpu_update(cpu); break; case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - del_timer_sync(t); smp_call_function_single(cpu, mce_disable_cpu, &action, 1); + del_timer_sync(t); break; case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: - if (!mce_ignore_ce && check_interval) { - t->expires = round_jiffies(jiffies + - per_cpu(mce_next_interval, cpu)); - add_timer_on(t, cpu); - } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); + mce_start_timer(cpu, t); break; - case CPU_POST_DEAD: + } + + if (action == CPU_POST_DEAD) { /* intentionally ignoring frozen here */ cmci_rediscover(cpu); - break; } + return NOTIFY_OK; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index c4e916d..698b6ec 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -576,12 +576,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) int err = 0; if (shared_bank[bank]) { - nb = node_to_amd_nb(amd_get_nb_id(cpu)); - WARN_ON(!nb); /* threshold descriptor already initialized on this node? */ - if (nb->bank4) { + if (nb && nb->bank4) { /* yes, use it */ b = nb->bank4; err = kobject_add(b->kobj, &dev->kobj, name); @@ -615,8 +613,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) atomic_set(&b->cpus, 1); /* nb is already initialized, see above */ - WARN_ON(nb->bank4); - nb->bank4 = b; + if (nb) { + WARN_ON(nb->bank4); + nb->bank4 = b; + } } err = allocate_threshold_blocks(cpu, bank, 0, diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 38e49bc9..5f88abf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -15,6 +15,8 @@ #include <asm/msr.h> #include <asm/mce.h> +#include "mce-internal.h" + /* * Support for Intel Correct Machine Check Interrupts. This allows * the CPU to raise an interrupt when a corrected machine check happened. @@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); */ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); -#define CMCI_THRESHOLD 1 +#define CMCI_THRESHOLD 1 +#define CMCI_POLL_INTERVAL (30 * HZ) +#define CMCI_STORM_INTERVAL (1 * HZ) +#define CMCI_STORM_THRESHOLD 15 + +static DEFINE_PER_CPU(unsigned long, cmci_time_stamp); +static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt); +static DEFINE_PER_CPU(unsigned int, cmci_storm_state); + +enum { + CMCI_STORM_NONE, + CMCI_STORM_ACTIVE, + CMCI_STORM_SUBSIDED, +}; + +static atomic_t cmci_storm_on_cpus; static int cmci_supported(int *banks) { @@ -53,6 +70,93 @@ static int cmci_supported(int *banks) return !!(cap & MCG_CMCI_P); } +void mce_intel_cmci_poll(void) +{ + if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE) + return; + machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); +} + +void mce_intel_hcpu_update(unsigned long cpu) +{ + if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE) + atomic_dec(&cmci_storm_on_cpus); + + per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; +} + +unsigned long mce_intel_adjust_timer(unsigned long interval) +{ + int r; + + if (interval < CMCI_POLL_INTERVAL) + return interval; + + switch (__this_cpu_read(cmci_storm_state)) { + case CMCI_STORM_ACTIVE: + /* + * We switch back to interrupt mode once the poll timer has + * silenced itself. That means no events recorded and the + * timer interval is back to our poll interval. + */ + __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED); + r = atomic_sub_return(1, &cmci_storm_on_cpus); + if (r == 0) + pr_notice("CMCI storm subsided: switching to interrupt mode\n"); + /* FALLTHROUGH */ + + case CMCI_STORM_SUBSIDED: + /* + * We wait for all cpus to go back to SUBSIDED + * state. When that happens we switch back to + * interrupt mode. + */ + if (!atomic_read(&cmci_storm_on_cpus)) { + __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); + cmci_reenable(); + cmci_recheck(); + } + return CMCI_POLL_INTERVAL; + default: + /* + * We have shiny weather. Let the poll do whatever it + * thinks. + */ + return interval; + } +} + +static bool cmci_storm_detect(void) +{ + unsigned int cnt = __this_cpu_read(cmci_storm_cnt); + unsigned long ts = __this_cpu_read(cmci_time_stamp); + unsigned long now = jiffies; + int r; + + if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE) + return true; + + if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) { + cnt++; + } else { + cnt = 1; + __this_cpu_write(cmci_time_stamp, now); + } + __this_cpu_write(cmci_storm_cnt, cnt); + + if (cnt <= CMCI_STORM_THRESHOLD) + return false; + + cmci_clear(); + __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); + r = atomic_add_return(1, &cmci_storm_on_cpus); + mce_timer_kick(CMCI_POLL_INTERVAL); + + if (r == 1) + pr_notice("CMCI storm detected: switching to poll mode\n"); + return true; +} + /* * The interrupt handler. This is called on every event. * Just call the poller directly to log any events. @@ -61,33 +165,28 @@ static int cmci_supported(int *banks) */ static void intel_threshold_interrupt(void) { + if (cmci_storm_detect()) + return; machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); mce_notify_irq(); } -static void print_update(char *type, int *hdr, int num) -{ - if (*hdr == 0) - printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); - *hdr = 1; - printk(KERN_CONT " %s:%d", type, num); -} - /* * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks * on this CPU. Use the algorithm recommended in the SDM to discover shared * banks. */ -static void cmci_discover(int banks, int boot) +static void cmci_discover(int banks) { unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); unsigned long flags; - int hdr = 0; int i; + int bios_wrong_thresh = 0; raw_spin_lock_irqsave(&cmci_discover_lock, flags); for (i = 0; i < banks; i++) { u64 val; + int bios_zero_thresh = 0; if (test_bit(i, owned)) continue; @@ -96,29 +195,52 @@ static void cmci_discover(int banks, int boot) /* Already owned by someone else? */ if (val & MCI_CTL2_CMCI_EN) { - if (test_and_clear_bit(i, owned) && !boot) - print_update("SHD", &hdr, i); + clear_bit(i, owned); __clear_bit(i, __get_cpu_var(mce_poll_banks)); continue; } - val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; - val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD; + if (!mce_bios_cmci_threshold) { + val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK; + val |= CMCI_THRESHOLD; + } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) { + /* + * If bios_cmci_threshold boot option was specified + * but the threshold is zero, we'll try to initialize + * it to 1. + */ + bios_zero_thresh = 1; + val |= CMCI_THRESHOLD; + } + + val |= MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(i), val); rdmsrl(MSR_IA32_MCx_CTL2(i), val); /* Did the enable bit stick? -- the bank supports CMCI */ if (val & MCI_CTL2_CMCI_EN) { - if (!test_and_set_bit(i, owned) && !boot) - print_update("CMCI", &hdr, i); + set_bit(i, owned); __clear_bit(i, __get_cpu_var(mce_poll_banks)); + /* + * We are able to set thresholds for some banks that + * had a threshold of 0. This means the BIOS has not + * set the thresholds properly or does not work with + * this boot option. Note down now and report later. + */ + if (mce_bios_cmci_threshold && bios_zero_thresh && + (val & MCI_CTL2_CMCI_THRESHOLD_MASK)) + bios_wrong_thresh = 1; } else { WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); } } raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); - if (hdr) - printk(KERN_CONT "\n"); + if (mce_bios_cmci_threshold && bios_wrong_thresh) { + pr_info_once( + "bios_cmci_threshold: Some banks do not have valid thresholds set\n"); + pr_info_once( + "bios_cmci_threshold: Make sure your BIOS supports this boot option\n"); + } } /* @@ -156,7 +278,7 @@ void cmci_clear(void) continue; /* Disable CMCI */ rdmsrl(MSR_IA32_MCx_CTL2(i), val); - val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK); + val &= ~MCI_CTL2_CMCI_EN; wrmsrl(MSR_IA32_MCx_CTL2(i), val); __clear_bit(i, __get_cpu_var(mce_banks_owned)); } @@ -186,7 +308,7 @@ void cmci_rediscover(int dying) continue; /* Recheck banks in case CPUs don't all have the same */ if (cmci_supported(&banks)) - cmci_discover(banks, 0); + cmci_discover(banks); } set_cpus_allowed_ptr(current, old); @@ -200,7 +322,7 @@ void cmci_reenable(void) { int banks; if (cmci_supported(&banks)) - cmci_discover(banks, 0); + cmci_discover(banks); } static void intel_init_cmci(void) @@ -211,7 +333,7 @@ static void intel_init_cmci(void) return; mce_threshold_vector = intel_threshold_interrupt; - cmci_discover(banks, 1); + cmci_discover(banks); /* * For CPU #0 this runs with still disabled APIC, but that's * ok because only the vector is set up. We still do another diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl index c7b3fe2..091972e 100644 --- a/arch/x86/kernel/cpu/mkcapflags.pl +++ b/arch/x86/kernel/cpu/mkcapflags.pl @@ -8,7 +8,10 @@ open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n"; open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n"; -print OUT "#include <asm/cpufeature.h>\n\n"; +print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n"; +print OUT "#include <asm/cpufeature.h>\n"; +print OUT "#endif\n"; +print OUT "\n"; print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n"; %features = (); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 915b876..4a3374e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -208,12 +208,14 @@ static bool check_hw_exists(void) } /* - * Now write a value and read it back to see if it matches, - * this is needed to detect certain hardware emulators (qemu/kvm) - * that don't trap on the MSR access and always return 0s. + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. */ - val = 0xabcdUL; reg = x86_pmu_event_addr(0); + if (rdmsrl_safe(reg, &val)) + goto msr_fail; + val ^= 0xffffUL; ret = wrmsrl_safe(reg, val); ret |= rdmsrl_safe(reg, &val_new); if (ret || val != val_new) @@ -338,6 +340,9 @@ int x86_setup_perfctr(struct perf_event *event) /* BTS is currently only allowed for user-mode. */ if (!attr->exclude_kernel) return -EOPNOTSUPP; + + if (!attr->exclude_guest) + return -EOPNOTSUPP; } hwc->config |= config; @@ -380,6 +385,9 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip) { int precise = 0; + if (!event->attr.exclude_guest) + return -EOPNOTSUPP; + /* Support for constant skid */ if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) { precise++; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8b6defe..271d257 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -626,6 +626,8 @@ int p4_pmu_init(void); int p6_pmu_init(void); +int knc_pmu_init(void); + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index eebd5ff..6336bcb 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -41,17 +41,22 @@ struct cpu_perf_ibs { }; struct perf_ibs { - struct pmu pmu; - unsigned int msr; - u64 config_mask; - u64 cnt_mask; - u64 enable_mask; - u64 valid_mask; - u64 max_period; - unsigned long offset_mask[1]; - int offset_max; - struct cpu_perf_ibs __percpu *pcpu; - u64 (*get_count)(u64 config); + struct pmu pmu; + unsigned int msr; + u64 config_mask; + u64 cnt_mask; + u64 enable_mask; + u64 valid_mask; + u64 max_period; + unsigned long offset_mask[1]; + int offset_max; + struct cpu_perf_ibs __percpu *pcpu; + + struct attribute **format_attrs; + struct attribute_group format_group; + const struct attribute_group *attr_groups[2]; + + u64 (*get_count)(u64 config); }; struct perf_ibs_data { @@ -446,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags) static void perf_ibs_read(struct perf_event *event) { } +PMU_FORMAT_ATTR(rand_en, "config:57"); +PMU_FORMAT_ATTR(cnt_ctl, "config:19"); + +static struct attribute *ibs_fetch_format_attrs[] = { + &format_attr_rand_en.attr, + NULL, +}; + +static struct attribute *ibs_op_format_attrs[] = { + NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */ + NULL, +}; + static struct perf_ibs perf_ibs_fetch = { .pmu = { .task_ctx_nr = perf_invalid_context, @@ -465,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = { .max_period = IBS_FETCH_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK }, .offset_max = MSR_AMD64_IBSFETCH_REG_COUNT, + .format_attrs = ibs_fetch_format_attrs, .get_count = get_ibs_fetch_count, }; @@ -488,6 +507,7 @@ static struct perf_ibs perf_ibs_op = { .max_period = IBS_OP_MAX_CNT << 4, .offset_mask = { MSR_AMD64_IBSOP_REG_MASK }, .offset_max = MSR_AMD64_IBSOP_REG_COUNT, + .format_attrs = ibs_op_format_attrs, .get_count = get_ibs_op_count, }; @@ -597,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) perf_ibs->pcpu = pcpu; + /* register attributes */ + if (perf_ibs->format_attrs[0]) { + memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group)); + perf_ibs->format_group.name = "format"; + perf_ibs->format_group.attrs = perf_ibs->format_attrs; + + memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups)); + perf_ibs->attr_groups[0] = &perf_ibs->format_group; + perf_ibs->pmu.attr_groups = perf_ibs->attr_groups; + } + ret = perf_pmu_register(&perf_ibs->pmu, name, -1); if (ret) { perf_ibs->pcpu = NULL; @@ -608,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) static __init int perf_event_ibs_init(void) { + struct attribute **attr = ibs_op_format_attrs; + if (!ibs_caps) return -ENODEV; /* ibs not supported by the cpu */ perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch"); - if (ibs_caps & IBS_CAPS_OPCNT) + + if (ibs_caps & IBS_CAPS_OPCNT) { perf_ibs_op.config_mask |= IBS_OP_CNT_CTL; + *attr++ = &format_attr_cnt_ctl.attr; + } perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); + register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 6bca492..324bb52 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void) switch (boot_cpu_data.x86) { case 0x6: return p6_pmu_init(); + case 0xb: + return knc_pmu_init(); case 0xf: return p4_pmu_init(); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 38e4894..3cf3d97 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -118,22 +118,24 @@ static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); - u32 config; + u32 config = 0; - pci_read_config_dword(pdev, box_ctl, &config); - config |= SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config |= SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } } static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; int box_ctl = uncore_pci_box_ctl(box); - u32 config; + u32 config = 0; - pci_read_config_dword(pdev, box_ctl, &config); - config &= ~SNBEP_PMON_BOX_CTL_FRZ; - pci_write_config_dword(pdev, box_ctl, config); + if (!pci_read_config_dword(pdev, box_ctl, &config)) { + config &= ~SNBEP_PMON_BOX_CTL_FRZ; + pci_write_config_dword(pdev, box_ctl, config); + } } static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event) @@ -156,7 +158,7 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe { struct pci_dev *pdev = box->pci_dev; struct hw_perf_event *hwc = &event->hw; - u64 count; + u64 count = 0; pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count); pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1); @@ -603,11 +605,12 @@ static struct pci_driver snbep_uncore_pci_driver = { /* * build pci bus to socket mapping */ -static void snbep_pci2phy_map_init(void) +static int snbep_pci2phy_map_init(void) { struct pci_dev *ubox_dev = NULL; int i, bus, nodeid; - u32 config; + int err = 0; + u32 config = 0; while (1) { /* find the UBOX device */ @@ -618,10 +621,14 @@ static void snbep_pci2phy_map_init(void) break; bus = ubox_dev->bus->number; /* get the Node ID of the local register */ - pci_read_config_dword(ubox_dev, 0x40, &config); + err = pci_read_config_dword(ubox_dev, 0x40, &config); + if (err) + break; nodeid = config; /* get the Node ID mapping */ - pci_read_config_dword(ubox_dev, 0x54, &config); + err = pci_read_config_dword(ubox_dev, 0x54, &config); + if (err) + break; /* * every three bits in the Node ID mapping register maps * to a particular node. @@ -633,7 +640,11 @@ static void snbep_pci2phy_map_init(void) } } }; - return; + + if (ubox_dev) + pci_dev_put(ubox_dev); + + return err ? pcibios_err_to_errno(err) : 0; } /* end of Sandy Bridge-EP uncore support */ @@ -1547,7 +1558,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - int port; /* adjust the main event selector and extra register index */ if (reg1->idx % 2) { @@ -1559,7 +1569,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event) } /* adjust extra register config */ - port = reg1->idx / 6 + box->pmu->pmu_idx * 4; switch (reg1->idx % 6) { case 2: /* shift the 8~15 bits to the 0~7 bits */ @@ -1950,7 +1959,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp static struct intel_uncore_box * uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - static struct intel_uncore_box *box; + struct intel_uncore_box *box; box = *per_cpu_ptr(pmu->box, cpu); if (box) @@ -2347,6 +2356,27 @@ int uncore_pmu_event_init(struct perf_event *event) return ret; } +static ssize_t uncore_get_attr_cpumask(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask); + + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} + +static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL); + +static struct attribute *uncore_pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group uncore_pmu_attr_group = { + .attrs = uncore_pmu_attrs, +}; + static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; @@ -2384,8 +2414,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) free_percpu(type->pmus[i].box); kfree(type->pmus); type->pmus = NULL; - kfree(type->attr_groups[1]); - type->attr_groups[1] = NULL; + kfree(type->events_group); + type->events_group = NULL; } static void __init uncore_types_exit(struct intel_uncore_type **types) @@ -2437,9 +2467,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type) for (j = 0; j < i; j++) attrs[j] = &type->event_descs[j].attr.attr; - type->attr_groups[1] = events_group; + type->events_group = events_group; } + type->pmu_group = &uncore_pmu_attr_group; type->pmus = pmus; return 0; fail: @@ -2556,9 +2587,11 @@ static int __init uncore_pci_init(void) switch (boot_cpu_data.x86_model) { case 45: /* Sandy Bridge-EP */ + ret = snbep_pci2phy_map_init(); + if (ret) + return ret; pci_uncores = snbep_pci_uncores; uncore_pci_driver = &snbep_uncore_pci_driver; - snbep_pci2phy_map_init(); break; default: return 0; @@ -2904,6 +2937,9 @@ static int __init intel_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return -ENODEV; + if (cpu_has_hypervisor) + return -ENODEV; + ret = uncore_pci_init(); if (ret) goto fail; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 5b81c18..e68a455 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -369,10 +369,12 @@ struct intel_uncore_type { struct intel_uncore_pmu *pmus; struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; - const struct attribute_group *attr_groups[3]; + const struct attribute_group *attr_groups[4]; }; -#define format_group attr_groups[0] +#define pmu_group attr_groups[0] +#define format_group attr_groups[1] +#define events_group attr_groups[2] struct intel_uncore_ops { void (*init_box)(struct intel_uncore_box *); diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c new file mode 100644 index 0000000..4b7731b --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_knc.c @@ -0,0 +1,319 @@ +/* Driver for Intel Xeon Phi "Knights Corner" PMU */ + +#include <linux/perf_event.h> +#include <linux/types.h> + +#include <asm/hardirq.h> + +#include "perf_event.h" + +static const u64 knc_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x002a, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0029, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b, +}; + +static __initconst u64 knc_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + /* On Xeon Phi event "0" is a valid DATA_READ */ + /* (L1 Data Cache Reads) Instruction. */ + /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */ + /* bit will always be set in x86_pmu_hw_config(). */ + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, + /* DATA_READ */ + [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ + [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */ + [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ + [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */ + [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT, + /* DATA_READ */ + /* see note on L1 OP_READ */ + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */ + [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */ + [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */ + [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + + +static u64 knc_pmu_event_map(int hw_event) +{ + return knc_perfmon_event_map[hw_event]; +} + +static struct event_constraint knc_event_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */ + INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */ + INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */ + INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */ + INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */ + INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */ + INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */ + INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */ + INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */ + INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */ + INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */ + INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */ + INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */ + INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */ + INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */ + INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */ + INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */ + INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */ + INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */ + INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */ + EVENT_CONSTRAINT_END +}; + +#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d +#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e +#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f + +#define KNC_ENABLE_COUNTER0 0x00000001 +#define KNC_ENABLE_COUNTER1 0x00000002 + +static void knc_pmu_disable_all(void) +{ + u64 val; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); +} + +static void knc_pmu_enable_all(int added) +{ + u64 val; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); + val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1); + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val); +} + +static inline void +knc_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; + + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); +} + +static void knc_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 val; + + val = hwc->config; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; + + (void)wrmsrl_safe(hwc->config_base + hwc->idx, val); +} + +static inline u64 knc_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void knc_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack); +} + +static int knc_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int handled = 0; + int bit, loops; + u64 status; + + cpuc = &__get_cpu_var(cpu_hw_events); + + knc_pmu_disable_all(); + + status = knc_pmu_get_status(); + if (!status) { + knc_pmu_enable_all(0); + return handled; + } + + loops = 0; +again: + knc_pmu_ack_status(status); + if (++loops > 100) { + WARN_ONCE(1, "perf: irq loop stuck!\n"); + perf_event_print_debug(); + goto done; + } + + inc_irq_stat(apic_perf_irqs); + + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + handled++; + + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + /* + * Repeat if there is more work to be done: + */ + status = knc_pmu_get_status(); + if (status) + goto again; + +done: + knc_pmu_enable_all(0); + + return handled; +} + + +PMU_FORMAT_ATTR(event, "config:0-7" ); +PMU_FORMAT_ATTR(umask, "config:8-15" ); +PMU_FORMAT_ATTR(edge, "config:18" ); +PMU_FORMAT_ATTR(inv, "config:23" ); +PMU_FORMAT_ATTR(cmask, "config:24-31" ); + +static struct attribute *intel_knc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +static __initconst struct x86_pmu knc_pmu = { + .name = "knc", + .handle_irq = knc_pmu_handle_irq, + .disable_all = knc_pmu_disable_all, + .enable_all = knc_pmu_enable_all, + .enable = knc_pmu_enable_event, + .disable = knc_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_KNC_EVNTSEL0, + .perfctr = MSR_KNC_PERFCTR0, + .event_map = knc_pmu_event_map, + .max_events = ARRAY_SIZE(knc_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 39) - 1, + .version = 0, + .num_counters = 2, + .cntval_bits = 40, + .cntval_mask = (1ULL << 40) - 1, + .get_event_constraints = x86_get_event_constraints, + .event_constraints = knc_event_constraints, + .format_attrs = intel_knc_formats_attr, +}; + +__init int knc_pmu_init(void) +{ + x86_pmu = knc_pmu; + + memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; +} diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index e4dd0f7..7d0270b 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -8,13 +8,106 @@ */ static const u64 p6_perfmon_event_map[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */ + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */ + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */ + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */ + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */ + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */ + +}; + +static __initconst u64 p6_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ + [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ + [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */ + [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */ + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, }; static u64 p6_pmu_event_map(int hw_event) @@ -34,7 +127,7 @@ static struct event_constraint p6_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ @@ -64,25 +157,25 @@ static void p6_pmu_enable_all(int added) static inline void p6_pmu_disable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; u64 val = P6_NOP_EVENT; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)wrmsrl_safe(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; u64 val; val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL_ENABLE; + + /* + * p6 only has a global event enable, set on PerfEvtSel0 + * We "disable" events by programming P6_NOP_EVENT + * and we rely on p6_pmu_enable_all() being called + * to actually enable the events. + */ (void)wrmsrl_safe(hwc->config_base, val); } @@ -158,5 +251,9 @@ __init int p6_pmu_init(void) x86_pmu = p6_pmu; + memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + return 0; } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 966512b..2e8caf0 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) switch (boot_cpu_data.x86) { case 6: return msr - MSR_P6_PERFCTR0; + case 11: + return msr - MSR_KNC_PERFCTR0; case 15: return msr - MSR_P4_BPU_PERFCTR0; } @@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) switch (boot_cpu_data.x86) { case 6: return msr - MSR_P6_EVNTSEL0; + case 11: + return msr - MSR_KNC_EVNTSEL0; case 15: return msr - MSR_P4_BSU_ESCR0; } diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 8022c66..fbd8955 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) static void *c_start(struct seq_file *m, loff_t *pos) { - if (*pos == 0) /* just in case, cpu 0 is not the first */ - *pos = cpumask_first(cpu_online_mask); - else - *pos = cpumask_next(*pos - 1, cpu_online_mask); + *pos = cpumask_next(*pos - 1, cpu_online_mask); if ((*pos) < nr_cpu_ids) return &cpu_data(*pos); return NULL; diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 39472dd..60c7891 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -199,12 +199,14 @@ static int __init cpuid_init(void) goto out_chrdev; } cpuid_class->devnode = cpuid_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = cpuid_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -214,6 +216,7 @@ out_class: for_each_online_cpu(i) { cpuid_device_destroy(i); } + put_online_cpus(); class_destroy(cpuid_class); out_chrdev: __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); @@ -225,11 +228,13 @@ static void __exit cpuid_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) cpuid_device_destroy(cpu); class_destroy(cpuid_class); __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); + put_online_cpus(); } module_init(cpuid_init); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 3ae2ced..b158152 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = { .xlate = ioapic_xlate, }; +static void dt_add_ioapic_domain(unsigned int ioapic_num, + struct device_node *np) +{ + struct irq_domain *id; + struct mp_ioapic_gsi *gsi_cfg; + int ret; + int num; + + gsi_cfg = mp_ioapic_gsi_routing(ioapic_num); + num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1; + + id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops, + (void *)ioapic_num); + BUG_ON(!id); + if (gsi_cfg->gsi_base == 0) { + /* + * The first NR_IRQS_LEGACY irq descs are allocated in + * early_irq_init() and need just a mapping. The + * remaining irqs need both. All of them are preallocated + * and assigned so we can keep the 1:1 mapping which the ioapic + * is having. + */ + ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY); + if (ret) + pr_err("Error mapping legacy IRQs: %d\n", ret); + + if (num > NR_IRQS_LEGACY) { + ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY, + NR_IRQS_LEGACY, num - NR_IRQS_LEGACY); + if (ret) + pr_err("Error creating mapping for the " + "remaining IRQs: %d\n", ret); + } + irq_set_default_host(id); + } else { + ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num); + if (ret) + pr_err("Error creating IRQ mapping: %d\n", ret); + } +} + static void __init ioapic_add_ofnode(struct device_node *np) { struct resource r; @@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np) for (i = 0; i < nr_ioapics; i++) { if (r.start == mpc_ioapic_addr(i)) { - struct irq_domain *id; - struct mp_ioapic_gsi *gsi_cfg; - - gsi_cfg = mp_ioapic_gsi_routing(i); - - id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0, - &ioapic_irq_domain_ops, - (void*)i); - BUG_ON(!id); + dt_add_ioapic_domain(i, np); return; } } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ed858e9..df06ade 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1077,6 +1077,9 @@ void __init memblock_x86_fill(void) memblock_add(ei->addr, ei->size); } + /* throw away partial pages */ + memblock_trim_memory(PAGE_SIZE); + memblock_dump_all(); } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f288..88b725a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -57,6 +57,7 @@ #include <asm/cpufeature.h> #include <asm/alternative-asm.h> #include <asm/asm.h> +#include <asm/smap.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ #include <linux/elf-em.h> @@ -298,6 +299,21 @@ ENTRY(ret_from_fork) CFI_ENDPROC END(ret_from_fork) +ENTRY(ret_from_kernel_thread) + CFI_STARTPROC + pushl_cfi %eax + call schedule_tail + GET_THREAD_INFO(%ebp) + popl_cfi %eax + pushl_cfi $0x0202 # Reset kernel eflags + popfl_cfi + movl PT_EBP(%esp),%eax + call *PT_EBX(%esp) + movl $0,PT_EAX(%esp) + jmp syscall_exit + CFI_ENDPROC +ENDPROC(ret_from_kernel_thread) + /* * Interrupt exit functions should be protected against kprobes */ @@ -322,8 +338,7 @@ ret_from_intr: andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax #else /* - * We can be coming here from a syscall done in the kernel space, - * e.g. a failed kernel_execve(). + * We can be coming here from child spawned by kernel_thread(). */ movl PT_CS(%esp), %eax andl $SEGMENT_RPL_MASK, %eax @@ -407,7 +422,9 @@ sysenter_past_esp: */ cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault + ASM_STAC 1: movl (%ebp),%ebp + ASM_CLAC movl %ebp,PT_EBP(%esp) _ASM_EXTABLE(1b,syscall_fault) @@ -488,6 +505,7 @@ ENDPROC(ia32_sysenter_target) # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway + ASM_CLAC pushl_cfi %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) @@ -612,6 +630,10 @@ work_notifysig: # deal with pending signals and movl %esp, %eax jne work_notifysig_v86 # returning to kernel-space or # vm86-space +1: +#else + movl %esp, %eax +#endif TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) movb PT_CS(%esp), %bl @@ -622,24 +644,15 @@ work_notifysig: # deal with pending signals and call do_notify_resume jmp resume_userspace +#ifdef CONFIG_VM86 ALIGN work_notifysig_v86: pushl_cfi %ecx # save ti_flags for do_notify_resume call save_v86_state # %eax contains pt_regs pointer popl_cfi %ecx movl %eax, %esp -#else - movl %esp, %eax + jmp 1b #endif - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - movb PT_CS(%esp), %bl - andb $SEGMENT_RPL_MASK, %bl - cmpb $USER_RPL, %bl - jb resume_kernel - xorl %edx, %edx - call do_notify_resume - jmp resume_userspace END(work_pending) # perform syscall exit tracing @@ -670,6 +683,7 @@ END(syscall_exit_work) RING0_INT_FRAME # can't unwind into user space anyway syscall_fault: + ASM_CLAC GET_THREAD_INFO(%ebp) movl $-EFAULT,PT_EAX(%esp) jmp resume_userspace @@ -727,7 +741,6 @@ ENDPROC(ptregs_##name) PTREGSCALL1(iopl) PTREGSCALL0(fork) PTREGSCALL0(vfork) -PTREGSCALL3(execve) PTREGSCALL2(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) @@ -825,6 +838,7 @@ END(interrupt) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: + ASM_CLAC addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ SAVE_ALL TRACE_IRQS_OFF @@ -841,6 +855,7 @@ ENDPROC(common_interrupt) #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ + ASM_CLAC; \ pushl_cfi $~(nr); \ SAVE_ALL; \ TRACE_IRQS_OFF \ @@ -857,6 +872,7 @@ ENDPROC(name) ENTRY(coprocessor_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_coprocessor_error jmp error_code @@ -865,6 +881,7 @@ END(coprocessor_error) ENTRY(simd_coprocessor_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 #ifdef CONFIG_X86_INVD_BUG /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ @@ -886,6 +903,7 @@ END(simd_coprocessor_error) ENTRY(device_not_available) RING0_INT_FRAME + ASM_CLAC pushl_cfi $-1 # mark this as an int pushl_cfi $do_device_not_available jmp error_code @@ -906,6 +924,7 @@ END(native_irq_enable_sysexit) ENTRY(overflow) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_overflow jmp error_code @@ -914,6 +933,7 @@ END(overflow) ENTRY(bounds) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_bounds jmp error_code @@ -922,6 +942,7 @@ END(bounds) ENTRY(invalid_op) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_invalid_op jmp error_code @@ -930,6 +951,7 @@ END(invalid_op) ENTRY(coprocessor_segment_overrun) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_coprocessor_segment_overrun jmp error_code @@ -938,6 +960,7 @@ END(coprocessor_segment_overrun) ENTRY(invalid_TSS) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_invalid_TSS jmp error_code CFI_ENDPROC @@ -945,6 +968,7 @@ END(invalid_TSS) ENTRY(segment_not_present) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_segment_not_present jmp error_code CFI_ENDPROC @@ -952,6 +976,7 @@ END(segment_not_present) ENTRY(stack_segment) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_stack_segment jmp error_code CFI_ENDPROC @@ -959,6 +984,7 @@ END(stack_segment) ENTRY(alignment_check) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_alignment_check jmp error_code CFI_ENDPROC @@ -966,6 +992,7 @@ END(alignment_check) ENTRY(divide_error) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 # no error code pushl_cfi $do_divide_error jmp error_code @@ -975,6 +1002,7 @@ END(divide_error) #ifdef CONFIG_X86_MCE ENTRY(machine_check) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi machine_check_vector jmp error_code @@ -984,6 +1012,7 @@ END(machine_check) ENTRY(spurious_interrupt_bug) RING0_INT_FRAME + ASM_CLAC pushl_cfi $0 pushl_cfi $do_spurious_interrupt_bug jmp error_code @@ -994,16 +1023,6 @@ END(spurious_interrupt_bug) */ .popsection -ENTRY(kernel_thread_helper) - pushl $0 # fake return address for unwinder - CFI_STARTPROC - movl %edi,%eax - call *%esi - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -ENDPROC(kernel_thread_helper) - #ifdef CONFIG_XEN /* Xen doesn't set %esp to be precisely what the normal sysenter entrypoint expects, so fix it up before using the normal path. */ @@ -1016,7 +1035,7 @@ ENTRY(xen_sysenter_target) ENTRY(xen_hypervisor_callback) CFI_STARTPROC - pushl_cfi $0 + pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL TRACE_IRQS_OFF @@ -1058,14 +1077,16 @@ ENTRY(xen_failsafe_callback) 2: mov 8(%esp),%es 3: mov 12(%esp),%fs 4: mov 16(%esp),%gs + /* EAX == 0 => Category 1 (Bad segment) + EAX != 0 => Category 2 (Bad IRET) */ testl %eax,%eax popl_cfi %eax lea 16(%esp),%esp CFI_ADJUST_CFA_OFFSET -16 jz 5f addl $16,%esp - jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) -5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment) + jmp iret_exc +5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp ret_from_exception CFI_ENDPROC @@ -1109,17 +1130,21 @@ ENTRY(ftrace_caller) pushl %eax pushl %ecx pushl %edx - movl 0xc(%esp), %eax + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax movl 0x4(%ebp), %edx + leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call ftrace_call: call ftrace_stub + addl $4,%esp /* skip NULL pointer */ popl %edx popl %ecx popl %eax +ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -1131,6 +1156,71 @@ ftrace_stub: ret END(ftrace_caller) +ENTRY(ftrace_regs_caller) + pushf /* push flags before compare (in cs location) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* + * i386 does not save SS and ESP when coming from kernel. + * Instead, to get sp, ®s->sp is used (see ptrace.h). + * Unfortunately, that means eflags must be at the same location + * as the current return ip is. We move the return ip into the + * ip location, and move flags into the return ip location. + */ + pushl 4(%esp) /* save return ip into ip slot */ + + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS,13*4(%esp) + + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ + leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + movl %eax, 14*4(%esp) /* Put return ip back for ret */ + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret + +ftrace_restore_flags: + popf + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) @@ -1171,9 +1261,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx @@ -1207,6 +1294,7 @@ return_to_handler: ENTRY(page_fault) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_page_fault ALIGN error_code: @@ -1279,6 +1367,7 @@ END(page_fault) ENTRY(debug) RING0_INT_FRAME + ASM_CLAC cmpl $ia32_sysenter_target,(%esp) jne debug_stack_correct FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn @@ -1303,6 +1392,7 @@ END(debug) */ ENTRY(nmi) RING0_INT_FRAME + ASM_CLAC pushl_cfi %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1373,6 +1463,7 @@ END(nmi) ENTRY(int3) RING0_INT_FRAME + ASM_CLAC pushl_cfi $-1 # mark this as an int SAVE_ALL TRACE_IRQS_OFF @@ -1393,6 +1484,7 @@ END(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) RING0_EC_FRAME + ASM_CLAC pushl_cfi $do_async_page_fault jmp error_code CFI_ENDPROC diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8..b51b2c7 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -56,6 +56,8 @@ #include <asm/ftrace.h> #include <asm/percpu.h> #include <asm/asm.h> +#include <asm/rcu.h> +#include <asm/smap.h> #include <linux/err.h> /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ @@ -68,25 +70,51 @@ .section .entry.text, "ax" #ifdef CONFIG_FUNCTION_TRACER + +#ifdef CC_USING_FENTRY +# define function_hook __fentry__ +#else +# define function_hook mcount +#endif + #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) + +ENTRY(function_hook) retq -END(mcount) +END(function_hook) + +/* skip is set if stack has been adjusted */ +.macro ftrace_caller_setup skip=0 + MCOUNT_SAVE_FRAME \skip + + /* Load the ftrace_ops into the 3rd parameter */ + leaq function_trace_op, %rdx + + /* Load ip into the first parameter */ + movq RIP(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi + /* Load the parent_ip into the second parameter */ +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else + movq 8(%rbp), %rsi +#endif +.endm ENTRY(ftrace_caller) + /* Check if tracing was disabled (quick check) */ cmpl $0, function_trace_stop jne ftrace_stub - MCOUNT_SAVE_FRAME - - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup + /* regs go into 4th parameter (but make it NULL) */ + movq $0, %rcx GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME +ftrace_return: #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -97,8 +125,78 @@ GLOBAL(ftrace_stub) retq END(ftrace_caller) +ENTRY(ftrace_regs_caller) + /* Save the current flags before compare (in SS location)*/ + pushfq + + /* Check if tracing was disabled (quick check) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* skip=8 to skip flags saved in SS */ + ftrace_caller_setup 8 + + /* Save the rest of pt_regs */ + movq %r15, R15(%rsp) + movq %r14, R14(%rsp) + movq %r13, R13(%rsp) + movq %r12, R12(%rsp) + movq %r11, R11(%rsp) + movq %r10, R10(%rsp) + movq %rbp, RBP(%rsp) + movq %rbx, RBX(%rsp) + /* Copy saved flags */ + movq SS(%rsp), %rcx + movq %rcx, EFLAGS(%rsp) + /* Kernel segments */ + movq $__KERNEL_DS, %rcx + movq %rcx, SS(%rsp) + movq $__KERNEL_CS, %rcx + movq %rcx, CS(%rsp) + /* Stack - skipping return address */ + leaq SS+16(%rsp), %rcx + movq %rcx, RSP(%rsp) + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + /* Copy flags back to SS, to restore them */ + movq EFLAGS(%rsp), %rax + movq %rax, SS(%rsp) + + /* Handlers can change the RIP */ + movq RIP(%rsp), %rax + movq %rax, SS+8(%rsp) + + /* restore the rest of pt_regs */ + movq R15(%rsp), %r15 + movq R14(%rsp), %r14 + movq R13(%rsp), %r13 + movq R12(%rsp), %r12 + movq R10(%rsp), %r10 + movq RBP(%rsp), %rbp + movq RBX(%rsp), %rbx + + /* skip=8 to skip flags saved in SS */ + MCOUNT_RESTORE_FRAME 8 + + /* Restore flags */ + popfq + + jmp ftrace_return +ftrace_restore_flags: + popfq + jmp ftrace_stub + +END(ftrace_regs_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) + +ENTRY(function_hook) cmpl $0, function_trace_stop jne ftrace_stub @@ -119,8 +217,12 @@ GLOBAL(ftrace_stub) trace: MCOUNT_SAVE_FRAME - movq 0x38(%rsp), %rdi + movq RIP(%rsp), %rdi +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function @@ -128,20 +230,22 @@ trace: MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - MCOUNT_SAVE_FRAME +#ifdef CC_USING_FENTRY + leaq SS+16(%rsp), %rdi + movq $0, %rdx /* No framepointers needed */ +#else leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi movq (%rbp), %rdx +#endif + movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return @@ -342,15 +446,15 @@ ENDPROC(native_usergs_sysret64) .macro SAVE_ARGS_IRQ cld /* start from rbp in pt_regs and jump over */ - movq_cfi rdi, RDI-RBP - movq_cfi rsi, RSI-RBP - movq_cfi rdx, RDX-RBP - movq_cfi rcx, RCX-RBP - movq_cfi rax, RAX-RBP - movq_cfi r8, R8-RBP - movq_cfi r9, R9-RBP - movq_cfi r10, R10-RBP - movq_cfi r11, R11-RBP + movq_cfi rdi, (RDI-RBP) + movq_cfi rsi, (RSI-RBP) + movq_cfi rdx, (RDX-RBP) + movq_cfi rcx, (RCX-RBP) + movq_cfi rax, (RAX-RBP) + movq_cfi r8, (R8-RBP) + movq_cfi r9, (R9-RBP) + movq_cfi r10, (R10-RBP) + movq_cfi r11, (R11-RBP) /* Save rbp so that we can unwind from get_irq_regs() */ movq_cfi rbp, 0 @@ -384,7 +488,7 @@ ENDPROC(native_usergs_sysret64) .endm ENTRY(save_rest) - PARTIAL_FRAME 1 REST_SKIP+8 + PARTIAL_FRAME 1 (REST_SKIP+8) movq 5*8+16(%rsp), %r11 /* save return address */ movq_cfi rbx, RBX+16 movq_cfi rbp, RBP+16 @@ -440,7 +544,7 @@ ENTRY(ret_from_fork) LOCK ; btr $TIF_FORK,TI_flags(%r8) - pushq_cfi kernel_eflags(%rip) + pushq_cfi $0x0002 popfq_cfi # reset kernel eflags call schedule_tail # rdi: 'prev' task parameter @@ -450,7 +554,7 @@ ENTRY(ret_from_fork) RESTORE_REST testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? - jz retint_restore_args + jz 1f testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET jnz int_ret_from_sys_call @@ -458,6 +562,14 @@ ENTRY(ret_from_fork) RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET jmp ret_from_sys_call # go to the SYSRET fastpath +1: + subq $REST_SKIP, %rsp # leave space for volatiles + CFI_ADJUST_CFA_OFFSET REST_SKIP + movq %rbp, %rdi + call *%rbx + movl $0, RAX(%rsp) + RESTORE_REST + jmp int_ret_from_sys_call CFI_ENDPROC END(ret_from_fork) @@ -465,7 +577,8 @@ END(ret_from_fork) * System call entry. Up to 6 arguments in registers are supported. * * SYSCALL does not save anything on the stack and does not change the - * stack pointer. + * stack pointer. However, it does mask the flags register for us, so + * CLD and CLAC are not needed. */ /* @@ -565,7 +678,7 @@ sysret_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi jmp sysret_check @@ -678,7 +791,7 @@ int_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF @@ -757,7 +870,6 @@ ENTRY(stub_execve) PARTIAL_FRAME 0 SAVE_REST FIXUP_TOP_OF_STACK %r11 - movq %rsp, %rcx call sys_execve RESTORE_TOP_OF_STACK %r11 movq %rax,RAX(%rsp) @@ -807,8 +919,7 @@ ENTRY(stub_x32_execve) PARTIAL_FRAME 0 SAVE_REST FIXUP_TOP_OF_STACK %r11 - movq %rsp, %rcx - call sys32_execve + call compat_sys_execve RESTORE_TOP_OF_STACK %r11 movq %rax,RAX(%rsp) RESTORE_REST @@ -884,6 +995,7 @@ END(interrupt) */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: + ASM_CLAC XCPT_FRAME addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ @@ -974,7 +1086,7 @@ retint_careful: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi - call schedule + SCHEDULE_USER popq_cfi %rdi GET_THREAD_INFO(%rcx) DISABLE_INTERRUPTS(CLBR_NONE) @@ -1023,6 +1135,7 @@ END(common_interrupt) */ .macro apicinterrupt num sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME pushq_cfi $~(\num) .Lcommon_\sym: @@ -1077,6 +1190,7 @@ apicinterrupt IRQ_WORK_VECTOR \ */ .macro zeroentry sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1094,6 +1208,7 @@ END(\sym) .macro paranoidzeroentry sym do_sym ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1112,6 +1227,7 @@ END(\sym) #define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8) .macro paranoidzeroentry_ist sym do_sym ist ENTRY(\sym) + ASM_CLAC INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1131,6 +1247,7 @@ END(\sym) .macro errorentry sym do_sym ENTRY(\sym) + ASM_CLAC XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $ORIG_RAX-R15, %rsp @@ -1149,6 +1266,7 @@ END(\sym) /* error code is on the stack already */ .macro paranoiderrorentry sym do_sym ENTRY(\sym) + ASM_CLAC XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $ORIG_RAX-R15, %rsp @@ -1206,52 +1324,6 @@ bad_gs: jmp 2b .previous -ENTRY(kernel_thread_helper) - pushq $0 # fake return address - CFI_STARTPROC - /* - * Here we are in the child and the registers are set as they were - * at kernel_thread() invocation in the parent. - */ - call *%rsi - # exit - mov %eax, %edi - call do_exit - ud2 # padding for call trace - CFI_ENDPROC -END(kernel_thread_helper) - -/* - * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. - * - * C extern interface: - * extern long execve(const char *name, char **argv, char **envp) - * - * asm input arguments: - * rdi: name, rsi: argv, rdx: envp - * - * We want to fallback into: - * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) - * - * do_sys_execve asm fallback arguments: - * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack - */ -ENTRY(kernel_execve) - CFI_STARTPROC - FAKE_STACK_FRAME $0 - SAVE_ALL - movq %rsp,%rcx - call sys_execve - movq %rax, RAX(%rsp) - RESTORE_REST - testq %rax,%rax - je int_ret_from_sys_call - RESTORE_ARGS - UNFAKE_STACK_FRAME - ret - CFI_ENDPROC -END(kernel_execve) - /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1363,7 +1435,7 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - pushq_cfi $0 + pushq_cfi $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL jmp error_exit CFI_ENDPROC @@ -1449,7 +1521,7 @@ paranoid_userspace: paranoid_schedule: TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_ANY) - call schedule + SCHEDULE_USER DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF jmp paranoid_userspace diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4..1d41402 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,6 +206,21 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); +/* + * Should never be called: + * As it is only called by __ftrace_replace_code() which is called by + * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() + * which is called to turn mcount into nops or nops into function calls + * but not to convert a function from not using regs to one that uses + * regs, which ftrace_modify_call() is for. + */ +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + WARN_ON(1); + return -EINVAL; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + } + atomic_dec(&modifying_ftrace_code); return ret; @@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } +/* + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + */ +static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/* + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + */ +static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; @@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + ftrace_addr = get_ftrace_old_addr(rec); + /* fall through */ case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); @@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); + nop = ftrace_call_replace(ip, ftrace_addr); + + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) + goto update; + + /* Check both ftrace_addr and ftrace_old_addr */ + ftrace_addr = get_ftrace_old_addr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } + update: return probe_kernel_write((void *)ip, &nop[0], 1); } @@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); @@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d42ab17..957a47a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -287,27 +287,28 @@ ENTRY(startup_32_smp) leal -__PAGE_OFFSET(%ecx),%esp default_entry: - /* * New page tables may be in 4Mbyte page mode and may * be using the global pages. * * NOTE! If we are on a 486 we may have no cr4 at all! - * So we do not try to touch it unless we really have - * some bits in it to set. This won't work if the BSP - * implements cr4 but this AP does not -- very unlikely - * but be warned! The same applies to the pse feature - * if not equally supported. --macro - * - * NOTE! We have to correct for the fact that we're - * not yet offset PAGE_OFFSET.. + * Specifically, cr4 exists if and only if CPUID exists, + * which in turn exists if and only if EFLAGS.ID exists. */ -#define cr4_bits pa(mmu_cr4_features) - movl cr4_bits,%edx - andl %edx,%edx - jz 6f - movl %cr4,%eax # Turn on paging options (PSE,PAE,..) - orl %edx,%eax + movl $X86_EFLAGS_ID,%ecx + pushl %ecx + popfl + pushfl + popl %eax + pushl $0 + popfl + pushfl + popl %edx + xorl %edx,%eax + testl %ecx,%eax + jz 6f # No ID flag = no CPUID = no CR4 + + movl pa(mmu_cr4_features),%eax movl %eax,%cr4 testb $X86_CR4_PAE, %al # check if PAE is enabled diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f250431..675a050 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -19,24 +19,17 @@ #include <asm/fpu-internal.h> #include <asm/user.h> -#ifdef CONFIG_X86_64 -# include <asm/sigcontext32.h> -# include <asm/user32.h> -#else -# define save_i387_xstate_ia32 save_i387_xstate -# define restore_i387_xstate_ia32 restore_i387_xstate -# define _fpstate_ia32 _fpstate -# define _xstate_ia32 _xstate -# define sig_xstate_ia32_size sig_xstate_size -# define fx_sw_reserved_ia32 fx_sw_reserved -# define user_i387_ia32_struct user_i387_struct -# define user32_fxsr_struct user_fxsr_struct -#endif - /* * Were we in an interrupt that interrupted kernel mode? * - * We can do a kernel_fpu_begin/end() pair *ONLY* if that + * For now, with eagerfpu we will return interrupted kernel FPU + * state as not-idle. TBD: Ideally we can change the return value + * to something like __thread_has_fpu(current). But we need to + * be careful of doing __thread_clear_has_fpu() before saving + * the FPU etc for supporting nested uses etc. For now, take + * the simple route! + * + * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that * pair does nothing at all: the thread must not have fpu (so * that we don't try to save the FPU state), and TS must * be set (so that the clts/stts pair does nothing that is @@ -44,6 +37,9 @@ */ static inline bool interrupted_kernel_fpu_idle(void) { + if (use_eager_fpu()) + return 0; + return !__thread_has_fpu(current) && (read_cr0() & X86_CR0_TS); } @@ -77,29 +73,29 @@ bool irq_fpu_usable(void) } EXPORT_SYMBOL(irq_fpu_usable); -void kernel_fpu_begin(void) +void __kernel_fpu_begin(void) { struct task_struct *me = current; - WARN_ON_ONCE(!irq_fpu_usable()); - preempt_disable(); if (__thread_has_fpu(me)) { __save_init_fpu(me); __thread_clear_has_fpu(me); - /* We do 'stts()' in kernel_fpu_end() */ - } else { + /* We do 'stts()' in __kernel_fpu_end() */ + } else if (!use_eager_fpu()) { this_cpu_write(fpu_owner_task, NULL); clts(); } } -EXPORT_SYMBOL(kernel_fpu_begin); +EXPORT_SYMBOL(__kernel_fpu_begin); -void kernel_fpu_end(void) +void __kernel_fpu_end(void) { - stts(); - preempt_enable(); + if (use_eager_fpu()) + math_state_restore(); + else + stts(); } -EXPORT_SYMBOL(kernel_fpu_end); +EXPORT_SYMBOL(__kernel_fpu_end); void unlazy_fpu(struct task_struct *tsk) { @@ -113,23 +109,15 @@ void unlazy_fpu(struct task_struct *tsk) } EXPORT_SYMBOL(unlazy_fpu); -#ifdef CONFIG_MATH_EMULATION -# define HAVE_HWFP (boot_cpu_data.hard_math) -#else -# define HAVE_HWFP 1 -#endif - -static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32); static struct i387_fxsave_struct fx_scratch __cpuinitdata; static void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; - clts(); if (cpu_has_fxsr) { memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); asm volatile("fxsave %0" : : "m" (fx_scratch)); @@ -138,7 +126,6 @@ static void __cpuinit mxcsr_feature_mask_init(void) mask = 0x0000ffbf; } mxcsr_feature_mask &= mask; - stts(); } static void __cpuinit init_thread_xstate(void) @@ -192,9 +179,8 @@ void __cpuinit fpu_init(void) init_thread_xstate(); mxcsr_feature_mask_init(); - /* clean state in init */ - current_thread_info()->status = 0; - clear_used_math(); + xsave_init(); + eager_fpu_init(); } void fpu_finit(struct fpu *fpu) @@ -205,12 +191,7 @@ void fpu_finit(struct fpu *fpu) } if (cpu_has_fxsr) { - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - - memset(fx, 0, xstate_size); - fx->cwd = 0x37f; - if (cpu_has_xmm) - fx->mxcsr = MXCSR_DEFAULT; + fx_finit(&fpu->state->fxsave); } else { struct i387_fsave_struct *fp = &fpu->state->fsave; memset(fp, 0, xstate_size); @@ -454,7 +435,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) * FXSR floating point environment conversions. */ -static void +void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; @@ -491,8 +472,8 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) memcpy(&to[i], &from[i], sizeof(to[0])); } -static void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env) +void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env) { struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; @@ -589,223 +570,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, } /* - * Signal frame handlers. - */ - -static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; - - fp->status = fp->swd; - if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) - return -1; - return 1; -} - -static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - struct user_i387_ia32_struct env; - int err = 0; - - convert_from_fxsr(&env, tsk); - if (__copy_to_user(buf, &env, sizeof(env))) - return -1; - - err |= __put_user(fx->swd, &buf->status); - err |= __put_user(X86_FXSR_MAGIC, &buf->magic); - if (err) - return -1; - - if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size)) - return -1; - return 1; -} - -static int save_i387_xsave(void __user *buf) -{ - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fx = buf; - int err = 0; - - - sanitize_i387_state(tsk); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. - * This will enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware applications can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - - if (save_i387_fxsave(fx) < 0) - return -1; - - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32, - sizeof(struct _fpx_sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_ia32_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return -1; - - return 1; -} - -int save_i387_xstate_ia32(void __user *buf) -{ - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - struct task_struct *tsk = current; - - if (!used_math()) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size)) - return -EACCES; - /* - * This will cause a "finit" to be triggered by the next - * attempted FPU operation by the 'current' process. - */ - clear_used_math(); - - if (!HAVE_HWFP) { - return fpregs_soft_get(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) ? -1 : 1; - } - - unlazy_fpu(tsk); - - if (cpu_has_xsave) - return save_i387_xsave(fp); - if (cpu_has_fxsr) - return save_i387_fxsave(fp); - else - return save_i387_fsave(fp); -} - -static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) -{ - struct task_struct *tsk = current; - - return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, - sizeof(struct i387_fsave_struct)); -} - -static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, - unsigned int size) -{ - struct task_struct *tsk = current; - struct user_i387_ia32_struct env; - int err; - - err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], - size); - /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - if (err || __copy_from_user(&env, buf, sizeof(env))) - return 1; - convert_to_fxsr(tsk, &env); - - return 0; -} - -static int restore_i387_xsave(void __user *buf) -{ - struct _fpx_sw_bytes fx_sw_user; - struct _fpstate_ia32 __user *fx_user = - ((struct _fpstate_ia32 __user *) buf); - struct i387_fxsave_struct __user *fx = - (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; - struct xsave_hdr_struct *xsave_hdr = - ¤t->thread.fpu.state->xsave.xsave_hdr; - u64 mask; - int err; - - if (check_for_xstate(fx, buf, &fx_sw_user)) - goto fx_only; - - mask = fx_sw_user.xstate_bv; - - err = restore_i387_fxsave(buf, fx_sw_user.xstate_size); - - xsave_hdr->xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; - - /* - * Init the state that is not present in the memory layout - * and enabled by the OS. - */ - mask = ~(pcntxt_mask & ~mask); - xsave_hdr->xstate_bv &= mask; - - return err; -fx_only: - /* - * Couldn't find the extended state information in the memory - * layout. Restore the FP/SSE and init the other extended state - * enabled by the OS. - */ - xsave_hdr->xstate_bv = XSTATE_FPSSE; - return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct)); -} - -int restore_i387_xstate_ia32(void __user *buf) -{ - int err; - struct task_struct *tsk = current; - struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf; - - if (HAVE_HWFP) - clear_fpu(tsk); - - if (!buf) { - if (used_math()) { - clear_fpu(tsk); - clear_used_math(); - } - - return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size)) - return -EACCES; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (HAVE_HWFP) { - if (cpu_has_xsave) - err = restore_i387_xsave(buf); - else if (cpu_has_fxsr) - err = restore_i387_fxsave(fp, sizeof(struct - i387_fxsave_struct)); - else - err = restore_i387_fsave(fp); - } else { - err = fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, fp) != 0; - } - set_used_math(); - - return err; -} - -/* * FPU state for core dumps. * This is only used for a.out dumps now. * It is declared generically using elf_fpregset_t (which is diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 36d1853..9a5c460 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -263,7 +263,7 @@ static void i8259A_shutdown(void) * out of. */ outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ - outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-1 */ + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ } static struct syscore_ops i8259_syscore_ops = { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d44f782..e4595f1 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, " Rescheduling interrupts\n"); seq_printf(p, "%*s: ", prec, "CAL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); + seq_printf(p, "%10u ", irq_stats(j)->irq_call_count - + irq_stats(j)->irq_tlb_count); seq_printf(p, " Function call interrupts\n"); seq_printf(p, "%*s: ", prec, "TLB"); for_each_online_cpu(j) @@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; - sum += irq_stats(cpu)->irq_tlb_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 3f61904..836f832 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; +#ifdef CONFIG_DEBUG_RODATA char opc[BREAK_INSTR_SIZE]; +#endif /* CONFIG_DEBUG_RODATA */ bpt->type = BP_BREAKPOINT; err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751e..57916c0 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,6 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } +#ifdef KPROBES_CAN_USE_FTRACE +static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); +} +#endif + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -599,6 +616,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { +#ifdef KPROBES_CAN_USE_FTRACE + if (kprobe_ftrace(p)) { + skip_singlestep(p, regs, kcb); + return 1; + } +#endif setup_singlestep(p, regs, kcb, 0); return 1; } @@ -1052,6 +1075,50 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +#ifdef KPROBES_CAN_USE_FTRACE +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} +#endif + int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c1d61ee..4180a87 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -247,7 +247,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) break; case KVM_PV_REASON_PAGE_NOT_PRESENT: /* page is swapped out by the host. */ + rcu_irq_enter(); + exit_idle(); kvm_async_pf_task_wait((u32)read_cr2()); + rcu_irq_exit(); break; case KVM_PV_REASON_PAGE_READY: rcu_irq_enter(); @@ -354,6 +357,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); + kvm_disable_steal_time(); } static int kvm_pv_reboot_notify(struct notifier_block *nb, @@ -396,9 +400,7 @@ void kvm_disable_steal_time(void) #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { -#ifdef CONFIG_KVM_CLOCK WARN_ON(kvm_register_clock("primary cpu clock")); -#endif kvm_guest_cpu_init(); native_smp_prepare_boot_cpu(); } diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 82746f9..7720ff5 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -75,20 +75,113 @@ struct microcode_amd { static struct equiv_cpu_entry *equiv_cpu_table; -/* page-sized ucode patch buffer */ -void *patch; +struct ucode_patch { + struct list_head plist; + void *data; + u32 patch_id; + u16 equiv_cpu; +}; + +static LIST_HEAD(pcache); + +static u16 find_equiv_id(unsigned int cpu) +{ + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + int i = 0; + + if (!equiv_cpu_table) + return 0; + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) + return equiv_cpu_table[i].equiv_cpu; + + i++; + } + return 0; +} + +static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) +{ + int i = 0; + + BUG_ON(!equiv_cpu_table); + + while (equiv_cpu_table[i].equiv_cpu != 0) { + if (equiv_cpu == equiv_cpu_table[i].equiv_cpu) + return equiv_cpu_table[i].installed_cpu; + i++; + } + return 0; +} + +/* + * a small, trivial cache of per-family ucode patches + */ +static struct ucode_patch *cache_find_patch(u16 equiv_cpu) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) + if (p->equiv_cpu == equiv_cpu) + return p; + return NULL; +} + +static void update_cache(struct ucode_patch *new_patch) +{ + struct ucode_patch *p; + + list_for_each_entry(p, &pcache, plist) { + if (p->equiv_cpu == new_patch->equiv_cpu) { + if (p->patch_id >= new_patch->patch_id) + /* we already have the latest patch */ + return; + + list_replace(&p->plist, &new_patch->plist); + kfree(p->data); + kfree(p); + return; + } + } + /* no patch found, add it */ + list_add_tail(&new_patch->plist, &pcache); +} + +static void free_cache(void) +{ + struct ucode_patch *p, *tmp; + + list_for_each_entry_safe(p, tmp, &pcache, plist) { + __list_del(p->plist.prev, p->plist.next); + kfree(p->data); + kfree(p); + } +} + +static struct ucode_patch *find_patch(unsigned int cpu) +{ + u16 equiv_id; + + equiv_id = find_equiv_id(cpu); + if (!equiv_id) + return NULL; + + return cache_find_patch(equiv_id); +} static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); + csig->sig = cpuid_eax(0x00000001); csig->rev = c->microcode; pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); return 0; } -static unsigned int verify_ucode_size(int cpu, u32 patch_size, +static unsigned int verify_patch_size(int cpu, u32 patch_size, unsigned int size) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -118,95 +211,37 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size, return patch_size; } -static u16 find_equiv_id(void) +static int apply_microcode_amd(int cpu) { - unsigned int current_cpu_id, i = 0; - - BUG_ON(equiv_cpu_table == NULL); - - current_cpu_id = cpuid_eax(0x00000001); - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (current_cpu_id == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; - } - return 0; -} + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_amd *mc_amd; + struct ucode_cpu_info *uci; + struct ucode_patch *p; + u32 rev, dummy; -/* - * we signal a good patch is found by returning its size > 0 - */ -static int get_matching_microcode(int cpu, const u8 *ucode_ptr, - unsigned int leftover_size, int rev, - unsigned int *current_size) -{ - struct microcode_header_amd *mc_hdr; - unsigned int actual_size, patch_size; - u16 equiv_cpu_id; + BUG_ON(raw_smp_processor_id() != cpu); - /* size of the current patch we're staring at */ - patch_size = *(u32 *)(ucode_ptr + 4); - *current_size = patch_size + SECTION_HDR_SIZE; + uci = ucode_cpu_info + cpu; - equiv_cpu_id = find_equiv_id(); - if (!equiv_cpu_id) + p = find_patch(cpu); + if (!p) return 0; - /* - * let's look at the patch header itself now - */ - mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE); + mc_amd = p->data; + uci->mc = p->data; - if (mc_hdr->processor_rev_id != equiv_cpu_id) - return 0; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* ucode might be chipset specific -- currently we don't support this */ - if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { - pr_err("CPU%d: chipset specific code not yet supported\n", - cpu); + /* need to apply patch? */ + if (rev >= mc_amd->hdr.patch_id) { + c->microcode = rev; return 0; } - if (mc_hdr->patch_id <= rev) - return 0; - - /* - * now that the header looks sane, verify its size - */ - actual_size = verify_ucode_size(cpu, patch_size, leftover_size); - if (!actual_size) - return 0; - - /* clear the patch buffer */ - memset(patch, 0, PAGE_SIZE); - - /* all looks ok, get the binary patch */ - get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size); - - return actual_size; -} - -static int apply_microcode_amd(int cpu) -{ - u32 rev, dummy; - int cpu_num = raw_smp_processor_id(); - struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; - struct microcode_amd *mc_amd = uci->mc; - struct cpuinfo_x86 *c = &cpu_data(cpu); - - /* We should bind the task to the CPU */ - BUG_ON(cpu_num != cpu); - - if (mc_amd == NULL) - return 0; - wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); - /* get patch id after patching */ - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* check current patch id and patch's id for match */ + /* verify patch application was successful */ + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev != mc_amd->hdr.patch_id) { pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); @@ -238,7 +273,7 @@ static int install_equiv_cpu_table(const u8 *buf) return -ENOMEM; } - get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); + memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size); /* add header length */ return size + CONTAINER_HDR_SZ; @@ -250,61 +285,113 @@ static void free_equiv_cpu_table(void) equiv_cpu_table = NULL; } -static enum ucode_state -generic_load_microcode(int cpu, const u8 *data, size_t size) +static void cleanup(void) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - struct microcode_header_amd *mc_hdr = NULL; - unsigned int mc_size, leftover, current_size = 0; + free_equiv_cpu_table(); + free_cache(); +} + +/* + * We return the current size even if some of the checks failed so that + * we can skip over the next patch. If we return a negative value, we + * signal a grave error like a memory allocation has failed and the + * driver cannot continue functioning normally. In such cases, we tear + * down everything we've used up so far and exit. + */ +static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) +{ + struct cpuinfo_x86 *c = &cpu_data(cpu); + struct microcode_header_amd *mc_hdr; + struct ucode_patch *patch; + unsigned int patch_size, crnt_size, ret; + u32 proc_fam; + u16 proc_id; + + patch_size = *(u32 *)(fw + 4); + crnt_size = patch_size + SECTION_HDR_SIZE; + mc_hdr = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE); + proc_id = mc_hdr->processor_rev_id; + + proc_fam = find_cpu_family_by_equiv_cpu(proc_id); + if (!proc_fam) { + pr_err("No patch family for equiv ID: 0x%04x\n", proc_id); + return crnt_size; + } + + /* check if patch is for the current family */ + proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff); + if (proc_fam != c->x86) + return crnt_size; + + if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { + pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n", + mc_hdr->patch_id); + return crnt_size; + } + + ret = verify_patch_size(cpu, patch_size, leftover); + if (!ret) { + pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id); + return crnt_size; + } + + patch = kzalloc(sizeof(*patch), GFP_KERNEL); + if (!patch) { + pr_err("Patch allocation failure.\n"); + return -EINVAL; + } + + patch->data = kzalloc(patch_size, GFP_KERNEL); + if (!patch->data) { + pr_err("Patch data allocation failure.\n"); + kfree(patch); + return -EINVAL; + } + + /* All looks ok, copy patch... */ + memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size); + INIT_LIST_HEAD(&patch->plist); + patch->patch_id = mc_hdr->patch_id; + patch->equiv_cpu = proc_id; + + /* ... and add to cache. */ + update_cache(patch); + + return crnt_size; +} + +static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) +{ + enum ucode_state ret = UCODE_ERROR; + unsigned int leftover; + u8 *fw = (u8 *)data; + int crnt_size = 0; int offset; - const u8 *ucode_ptr = data; - void *new_mc = NULL; - unsigned int new_rev = uci->cpu_sig.rev; - enum ucode_state state = UCODE_ERROR; - offset = install_equiv_cpu_table(ucode_ptr); + offset = install_equiv_cpu_table(data); if (offset < 0) { pr_err("failed to create equivalent cpu table\n"); - goto out; + return ret; } - ucode_ptr += offset; + fw += offset; leftover = size - offset; - if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) { + if (*(u32 *)fw != UCODE_UCODE_TYPE) { pr_err("invalid type field in container file section header\n"); - goto free_table; + free_equiv_cpu_table(); + return ret; } while (leftover) { - mc_size = get_matching_microcode(cpu, ucode_ptr, leftover, - new_rev, ¤t_size); - if (mc_size) { - mc_hdr = patch; - new_mc = patch; - new_rev = mc_hdr->patch_id; - goto out_ok; - } - - ucode_ptr += current_size; - leftover -= current_size; - } + crnt_size = verify_and_add_patch(cpu, fw, leftover); + if (crnt_size < 0) + return ret; - if (!new_mc) { - state = UCODE_NFOUND; - goto free_table; + fw += crnt_size; + leftover -= crnt_size; } -out_ok: - uci->mc = new_mc; - state = UCODE_OK; - pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", - cpu, uci->cpu_sig.rev, new_rev); - -free_table: - free_equiv_cpu_table(); - -out: - return state; + return UCODE_OK; } /* @@ -315,7 +402,7 @@ out: * * This legacy file is always smaller than 2K in size. * - * Starting at family 15h they are in family specific firmware files: + * Beginning with family 15h, they are in family-specific firmware files: * * amd-ucode/microcode_amd_fam15h.bin * amd-ucode/microcode_amd_fam16h.bin @@ -323,12 +410,17 @@ out: * * These might be larger than 2K. */ -static enum ucode_state request_microcode_amd(int cpu, struct device *device) +static enum ucode_state request_microcode_amd(int cpu, struct device *device, + bool refresh_fw) { char fw_name[36] = "amd-ucode/microcode_amd.bin"; - const struct firmware *fw; - enum ucode_state ret = UCODE_NFOUND; struct cpuinfo_x86 *c = &cpu_data(cpu); + enum ucode_state ret = UCODE_NFOUND; + const struct firmware *fw; + + /* reload ucode container only on the boot cpu */ + if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index) + return UCODE_OK; if (c->x86 >= 0x15) snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); @@ -344,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device) goto fw_release; } - ret = generic_load_microcode(cpu, fw->data, fw->size); + /* free old equiv table */ + free_equiv_cpu_table(); + + ret = load_microcode_amd(cpu, fw->data, fw->size); + if (ret != UCODE_OK) + cleanup(); -fw_release: + fw_release: release_firmware(fw); -out: + out: return ret; } @@ -383,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void) return NULL; } - patch = (void *)get_zeroed_page(GFP_KERNEL); - if (!patch) - return NULL; - return µcode_amd_ops; } void __exit exit_amd_microcode(void) { - free_page((unsigned long)patch); + cleanup(); } diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 9e5bcf1..3a04b22 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -279,19 +279,18 @@ static struct platform_device *microcode_pdev; static int reload_for_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + enum ucode_state ustate; int err = 0; - if (uci->valid) { - enum ucode_state ustate; - - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - } + if (!uci->valid) + return err; + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); + if (ustate == UCODE_OK) + apply_microcode_on_target(cpu); + else + if (ustate == UCODE_ERROR) + err = -EINVAL; return err; } @@ -373,18 +372,15 @@ static void microcode_fini_cpu(int cpu) static enum ucode_state microcode_resume_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - - if (!uci->mc) - return UCODE_NFOUND; - pr_debug("CPU%d updated upon resume\n", cpu); - apply_microcode_on_target(cpu); + + if (apply_microcode_on_target(cpu)) + return UCODE_ERROR; return UCODE_OK; } -static enum ucode_state microcode_init_cpu(int cpu) +static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) { enum ucode_state ustate; @@ -395,7 +391,8 @@ static enum ucode_state microcode_init_cpu(int cpu) if (system_state != SYSTEM_RUNNING) return UCODE_NFOUND; - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); + ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, + refresh_fw); if (ustate == UCODE_OK) { pr_debug("CPU%d updated upon init\n", cpu); @@ -408,14 +405,11 @@ static enum ucode_state microcode_init_cpu(int cpu) static enum ucode_state microcode_update_cpu(int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; if (uci->valid) - ustate = microcode_resume_cpu(cpu); - else - ustate = microcode_init_cpu(cpu); + return microcode_resume_cpu(cpu); - return ustate; + return microcode_init_cpu(cpu, false); } static int mc_device_add(struct device *dev, struct subsys_interface *sif) @@ -431,7 +425,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) if (err) return err; - if (microcode_init_cpu(cpu) == UCODE_ERROR) + if (microcode_init_cpu(cpu, true) == UCODE_ERROR) return -EINVAL; return err; @@ -480,34 +474,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) struct device *dev; dev = get_cpu_device(cpu); - switch (action) { + + switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: - case CPU_ONLINE_FROZEN: microcode_update_cpu(cpu); - case CPU_DOWN_FAILED: - case CPU_DOWN_FAILED_FROZEN: pr_debug("CPU%d added\n", cpu); + /* + * "break" is missing on purpose here because we want to fall + * through in order to create the sysfs group. + */ + + case CPU_DOWN_FAILED: if (sysfs_create_group(&dev->kobj, &mc_attr_group)) pr_err("Failed to create group for CPU%d\n", cpu); break; + case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&dev->kobj, &mc_attr_group); pr_debug("CPU%d removed\n", cpu); break; /* + * case CPU_DEAD: + * * When a CPU goes offline, don't free up or invalidate the copy of * the microcode in kernel memory, so that we can reuse it when the * CPU comes back online without unnecessarily requesting the userspace * for it again. */ - case CPU_UP_CANCELED_FROZEN: - /* The CPU refused to come up during a system resume */ - microcode_fini_cpu(cpu); - break; } + + /* The CPU refused to come up during a system resume */ + if (action == CPU_UP_CANCELED_FROZEN) + microcode_fini_cpu(cpu); + return NOTIFY_OK; } diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0327e2b..3544aed 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n) return 0; } -static enum ucode_state request_microcode_fw(int cpu, struct device *device) +static enum ucode_state request_microcode_fw(int cpu, struct device *device, + bool refresh_fw) { char name[30]; struct cpuinfo_x86 *c = &cpu_data(cpu); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index eb11369..a7c5661 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -257,12 +257,14 @@ static int __init msr_init(void) goto out_chrdev; } msr_class->devnode = msr_devnode; + get_online_cpus(); for_each_online_cpu(i) { err = msr_device_create(i); if (err != 0) goto out_class; } register_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); err = 0; goto out; @@ -271,6 +273,7 @@ out_class: i = 0; for_each_online_cpu(i) msr_device_destroy(i); + put_online_cpus(); class_destroy(msr_class); out_chrdev: __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); @@ -281,11 +284,13 @@ out: static void __exit msr_exit(void) { int cpu = 0; + get_online_cpus(); for_each_online_cpu(cpu) msr_device_destroy(cpu); class_destroy(msr_class); __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); + put_online_cpus(); } module_init(msr_init); diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 0000000..e309cc5 --- /dev/null +++ b/arch/x86/kernel/perf_regs.c @@ -0,0 +1,105 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <linux/stddef.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_X86_32 +#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX +#else +#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX +#endif + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { + PT_REGS_OFFSET(PERF_REG_X86_AX, ax), + PT_REGS_OFFSET(PERF_REG_X86_BX, bx), + PT_REGS_OFFSET(PERF_REG_X86_CX, cx), + PT_REGS_OFFSET(PERF_REG_X86_DX, dx), + PT_REGS_OFFSET(PERF_REG_X86_SI, si), + PT_REGS_OFFSET(PERF_REG_X86_DI, di), + PT_REGS_OFFSET(PERF_REG_X86_BP, bp), + PT_REGS_OFFSET(PERF_REG_X86_SP, sp), + PT_REGS_OFFSET(PERF_REG_X86_IP, ip), + PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), + PT_REGS_OFFSET(PERF_REG_X86_CS, cs), + PT_REGS_OFFSET(PERF_REG_X86_SS, ss), +#ifdef CONFIG_X86_32 + PT_REGS_OFFSET(PERF_REG_X86_DS, ds), + PT_REGS_OFFSET(PERF_REG_X86_ES, es), + PT_REGS_OFFSET(PERF_REG_X86_FS, fs), + PT_REGS_OFFSET(PERF_REG_X86_GS, gs), +#else + /* + * The pt_regs struct does not store + * ds, es, fs, gs in 64 bit mode. + */ + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, +#endif +#ifdef CONFIG_X86_64 + PT_REGS_OFFSET(PERF_REG_X86_R8, r8), + PT_REGS_OFFSET(PERF_REG_X86_R9, r9), + PT_REGS_OFFSET(PERF_REG_X86_R10, r10), + PT_REGS_OFFSET(PERF_REG_X86_R11, r11), + PT_REGS_OFFSET(PERF_REG_X86_R12, r12), + PT_REGS_OFFSET(PERF_REG_X86_R13, r13), + PT_REGS_OFFSET(PERF_REG_X86_R14, r14), + PT_REGS_OFFSET(PERF_REG_X86_R15, r15), +#endif +}; + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset))) + return 0; + + return regs_get_register(regs, pt_regs_offset[idx]); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) + +#ifdef CONFIG_X86_32 +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* CONFIG_X86_64 */ +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + if (mask & REG_NOSUPPORT) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_IA32)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 0bc72e2..d5f15c3 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -150,7 +150,7 @@ static struct resource *find_oprom(struct pci_dev *pdev) return oprom; } -void *pci_map_biosrom(struct pci_dev *pdev) +void __iomem *pci_map_biosrom(struct pci_dev *pdev) { struct resource *oprom = find_oprom(pdev); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ef6a845..b644e1c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -66,15 +66,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { int ret; - unlazy_fpu(src); - *dst = *src; if (fpu_allocated(&src->thread.fpu)) { memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); ret = fpu_alloc(&dst->thread.fpu); if (ret) return ret; - fpu_copy(&dst->thread.fpu, &src->thread.fpu); + fpu_copy(dst, src); } return 0; } @@ -97,16 +95,6 @@ void arch_task_cache_init(void) SLAB_PANIC | SLAB_NOTRACK, NULL); } -static inline void drop_fpu(struct task_struct *tsk) -{ - /* - * Forget coprocessor state.. - */ - tsk->fpu_counter = 0; - clear_fpu(tsk); - clear_used_math(); -} - /* * Free current thread data structures etc.. */ @@ -163,7 +151,13 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - drop_fpu(tsk); + drop_init_fpu(tsk); + /* + * Free the FPU state for non xsave platforms. They get reallocated + * lazily at the first use. + */ + if (!use_eager_fpu()) + free_thread_xstate(tsk); } static void hard_disable_TSC(void) @@ -299,71 +293,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp, } /* - * This gets run with %si containing the - * function to call, and %di containing - * the "args". - */ -extern void kernel_thread_helper(void); - -/* - * Create a kernel thread - */ -int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.si = (unsigned long) fn; - regs.di = (unsigned long) arg; - -#ifdef CONFIG_X86_32 - regs.ds = __USER_DS; - regs.es = __USER_DS; - regs.fs = __KERNEL_PERCPU; - regs.gs = __KERNEL_STACK_CANARY; -#else - regs.ss = __KERNEL_DS; -#endif - - regs.orig_ax = -1; - regs.ip = (unsigned long) kernel_thread_helper; - regs.cs = __KERNEL_CS | get_kernel_rpl(); - regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; - - /* Ok, create the new process.. */ - return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - -/* - * sys_execve() executes a new program. - */ -long sys_execve(const char __user *name, - const char __user *const __user *argv, - const char __user *const __user *envp, struct pt_regs *regs) -{ - long error; - char *filename; - - filename = getname(name); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - return error; - error = do_execve(filename, argv, envp, regs); - -#ifdef CONFIG_X86_32 - if (error == 0) { - /* Make sure we don't return using sysenter.. */ - set_thread_flag(TIF_IRET); - } -#endif - - putname(filename); - return error; -} - -/* * Idle related variables and functions */ unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 516fa18..44e0bff 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -57,6 +57,7 @@ #include <asm/switch_to.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); +asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread"); /* * Return saved PC of a blocked thread. @@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task) } int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long unused, + unsigned long arg, struct task_struct *p, struct pt_regs *regs) { - struct pt_regs *childregs; + struct pt_regs *childregs = task_pt_regs(p); struct task_struct *tsk; int err; - childregs = task_pt_regs(p); + p->thread.sp = (unsigned long) childregs; + p->thread.sp0 = (unsigned long) (childregs+1); + + if (unlikely(!regs)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); + p->thread.ip = (unsigned long) ret_from_kernel_thread; + task_user_gs(p) = __KERNEL_STACK_CANARY; + childregs->ds = __USER_DS; + childregs->es = __USER_DS; + childregs->fs = __KERNEL_PERCPU; + childregs->bx = sp; /* function */ + childregs->bp = arg; + childregs->orig_ax = -1; + childregs->cs = __KERNEL_CS | get_kernel_rpl(); + childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; + p->fpu_counter = 0; + p->thread.io_bitmap_ptr = NULL; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + return 0; + } *childregs = *regs; childregs->ax = 0; childregs->sp = sp; - p->thread.sp = (unsigned long) childregs; - p->thread.sp0 = (unsigned long) (childregs+1); - p->thread.ip = (unsigned long) ret_from_fork; - task_user_gs(p) = get_user_gs(regs); p->fpu_counter = 0; @@ -190,10 +207,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; + regs->flags = X86_EFLAGS_IF; /* - * Free the old FP and other extended state + * force it to the iret return path by making it look as if there was + * some work pending. */ - free_thread_xstate(current); + set_thread_flag(TIF_NOTIFY_RESUME); } EXPORT_SYMBOL_GPL(start_thread); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0a980c9..16c6365 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls) } int copy_thread(unsigned long clone_flags, unsigned long sp, - unsigned long unused, + unsigned long arg, struct task_struct *p, struct pt_regs *regs) { int err; struct pt_regs *childregs; struct task_struct *me = current; - childregs = ((struct pt_regs *) - (THREAD_SIZE + task_stack_page(p))) - 1; - *childregs = *regs; - - childregs->ax = 0; - if (user_mode(regs)) - childregs->sp = sp; - else - childregs->sp = (unsigned long)childregs; - + p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; + childregs = task_pt_regs(p); p->thread.sp = (unsigned long) childregs; - p->thread.sp0 = (unsigned long) (childregs+1); p->thread.usersp = me->thread.usersp; - set_tsk_thread_flag(p, TIF_FORK); - p->fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; @@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); + + if (unlikely(!regs)) { + /* kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); + childregs->sp = (unsigned long)childregs; + childregs->ss = __KERNEL_DS; + childregs->bx = sp; /* function */ + childregs->bp = arg; + childregs->orig_ax = -1; + childregs->cs = __KERNEL_CS | get_kernel_rpl(); + childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; + return 0; + } + *childregs = *regs; + + childregs->ax = 0; + childregs->sp = sp; err = -ENOMEM; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); @@ -232,10 +239,6 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip, regs->cs = _cs; regs->ss = _ss; regs->flags = X86_EFLAGS_IF; - /* - * Free the old FP and other extended state - */ - free_thread_xstate(current); } void diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c4c6a5c..b00b33a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -21,6 +21,7 @@ #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/rcupdate.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -1332,9 +1333,6 @@ static const struct user_regset_view user_x86_64_view = { #define genregs32_get genregs_get #define genregs32_set genregs_set -#define user_i387_ia32_struct user_i387_struct -#define user32_fxsr_struct user_fxsr_struct - #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -1463,6 +1461,8 @@ long syscall_trace_enter(struct pt_regs *regs) { long ret = 0; + rcu_user_exit(); + /* * If we stepped into a sysenter/syscall insn, it trapped in * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. @@ -1526,4 +1526,6 @@ void syscall_trace_leave(struct pt_regs *regs) !test_thread_flag(TIF_SYSCALL_EMU); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, step); + + rcu_user_enter(); } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 52190a9..4e8ba39 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -358,14 +358,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"), }, }, - { /* Handle problems with rebooting on CompuLab SBC-FITPC2 */ - .callback = set_bios_reboot, - .ident = "CompuLab SBC-FITPC2", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "CompuLab"), - DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), - }, - }, { /* Handle problems with rebooting on ASUS P4S800 */ .callback = set_bios_reboot, .ident = "ASUS P4S800", diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index af6db6e..4929c1b 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -225,7 +225,7 @@ static struct platform_device rtc_device = { static __init int add_rtc_cmos(void) { #ifdef CONFIG_PNP - static const char *ids[] __initconst = + static const char * const const ids[] __initconst = { "PNP0b00", "PNP0b01", "PNP0b02", }; struct pnp_dev *dev; struct pnp_id *id; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80..ca45696 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -68,6 +68,7 @@ #include <linux/percpu.h> #include <linux/crash_dump.h> #include <linux/tboot.h> +#include <linux/jiffies.h> #include <video/edid.h> @@ -919,8 +920,22 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { - max_pfn_mapped = init_memory_mapping(1UL<<32, - max_pfn<<PAGE_SHIFT); + int i; + unsigned long start, end; + unsigned long start_pfn, end_pfn; + + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, + NULL) { + + end = PFN_PHYS(end_pfn); + if (end <= (1UL<<32)) + continue; + + start = PFN_PHYS(start_pfn); + max_pfn_mapped = init_memory_mapping( + max((1UL<<32), start), end); + } + /* can we preseve max_low_pfn ?*/ max_low_pfn = max_pfn; } @@ -957,13 +972,11 @@ void __init setup_arch(char **cmdline_p) initmem_init(); memblock_find_dma_reserve(); -#ifdef CONFIG_KVM_CLOCK +#ifdef CONFIG_KVM_GUEST kvmclock_init(); #endif - x86_init.paging.pagetable_setup_start(swapper_pg_dir); - paging_init(); - x86_init.paging.pagetable_setup_done(swapper_pg_dir); + x86_init.paging.pagetable_init(); if (boot_cpu_data.cpuid_level >= 0) { /* A CPU has %cr4 if and only if it has CPUID */ @@ -1034,6 +1047,20 @@ void __init setup_arch(char **cmdline_p) mcheck_init(); arch_init_ideal_nops(); + + register_refined_jiffies(CLOCK_TICK_RATE); + +#ifdef CONFIG_EFI + /* Once setup is done above, disable efi_enabled on mismatched + * firmware/kernel archtectures since there is no support for + * runtime services. + */ + if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) { + pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + efi_unmap_memmap(); + efi_enabled = 0; + } +#endif } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b280908..70b27ee 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -114,11 +114,12 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, regs->orig_ax = -1; /* disable syscall checks */ get_user_ex(buf, &sc->fpstate); - err |= restore_i387_xstate(buf); get_user_ex(*pax, &sc->ax); } get_user_catch(err); + err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); + return err; } @@ -206,35 +207,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { /* Default to using normal stack */ + unsigned long math_size = 0; unsigned long sp = regs->sp; + unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); -#ifdef CONFIG_X86_64 /* redzone */ - sp -= 128; -#endif /* CONFIG_X86_64 */ + if (config_enabled(CONFIG_X86_64)) + sp -= 128; if (!onsigstack) { /* This is the X/Open sanctioned signal stack switching. */ if (ka->sa.sa_flags & SA_ONSTACK) { if (current->sas_ss_size) sp = current->sas_ss_sp + current->sas_ss_size; - } else { -#ifdef CONFIG_X86_32 - /* This is the legacy signal stack switching. */ - if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) + } else if (config_enabled(CONFIG_X86_32) && + (regs->ss & 0xffff) != __USER_DS && + !(ka->sa.sa_flags & SA_RESTORER) && + ka->sa.sa_restorer) { + /* This is the legacy signal stack switching. */ sp = (unsigned long) ka->sa.sa_restorer; -#endif /* CONFIG_X86_32 */ } } if (used_math()) { - sp -= sig_xstate_size; -#ifdef CONFIG_X86_64 - sp = round_down(sp, 64); -#endif /* CONFIG_X86_64 */ + sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), + &buf_fx, &math_size); *fpstate = (void __user *)sp; } @@ -247,8 +245,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, if (onsigstack && !likely(on_sig_stack(sp))) return (void __user *)-1L; - /* save i387 state */ - if (used_math() && save_i387_xstate(*fpstate) < 0) + /* save i387 and extended state */ + if (used_math() && + save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; return (void __user *)sp; @@ -357,7 +356,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sig, &frame->sig); put_user_ex(&frame->info, &frame->pinfo); put_user_ex(&frame->uc, &frame->puc); - err |= copy_siginfo_to_user(&frame->info, info); /* Create the ucontext. */ if (cpu_has_xsave) @@ -369,9 +367,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. */ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); @@ -388,6 +383,11 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, */ put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); } put_user_catch(err); + + err |= copy_siginfo_to_user(&frame->info, info); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) return -EFAULT; @@ -436,8 +436,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(sas_ss_flags(regs->sp), &frame->uc.uc_stack.ss_flags); put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); /* Set up to return from userspace. If provided, use a stub already in userspace. */ @@ -450,6 +448,9 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } } put_user_catch(err); + err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) return -EFAULT; @@ -474,6 +475,75 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } #endif /* CONFIG_X86_32 */ +static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, + siginfo_t *info, compat_sigset_t *set, + struct pt_regs *regs) +{ +#ifdef CONFIG_X86_X32_ABI + struct rt_sigframe_x32 __user *frame; + void __user *restorer; + int err = 0; + void __user *fpstate = NULL; + + frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + + if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) + return -EFAULT; + + if (ka->sa.sa_flags & SA_SIGINFO) { + if (copy_siginfo_to_user32(&frame->info, info)) + return -EFAULT; + } + + put_user_try { + /* Create the ucontext. */ + if (cpu_has_xsave) + put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); + else + put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(0, &frame->uc.uc_link); + put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); + put_user_ex(sas_ss_flags(regs->sp), + &frame->uc.uc_stack.ss_flags); + put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); + put_user_ex(0, &frame->uc.uc__pad0); + + if (ka->sa.sa_flags & SA_RESTORER) { + restorer = ka->sa.sa_restorer; + } else { + /* could use a vstub here */ + restorer = NULL; + err |= -EFAULT; + } + put_user_ex(restorer, &frame->pretcode); + } put_user_catch(err); + + err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, + regs, set->sig[0]); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + + if (err) + return -EFAULT; + + /* Set up registers for signal handler */ + regs->sp = (unsigned long) frame; + regs->ip = (unsigned long) ka->sa.sa_handler; + + /* We use the x32 calling convention here... */ + regs->di = sig; + regs->si = (unsigned long) &frame->info; + regs->dx = (unsigned long) &frame->uc; + + loadsegment(ds, __USER_DS); + loadsegment(es, __USER_DS); + + regs->cs = __USER_CS; + regs->ss = __USER_DS; +#endif /* CONFIG_X86_X32_ABI */ + + return 0; +} + #ifdef CONFIG_X86_32 /* * Atomically swap in the new signal mask, and wait for a signal. @@ -612,55 +682,22 @@ static int signr_convert(int sig) return sig; } -#ifdef CONFIG_X86_32 - -#define is_ia32 1 -#define ia32_setup_frame __setup_frame -#define ia32_setup_rt_frame __setup_rt_frame - -#else /* !CONFIG_X86_32 */ - -#ifdef CONFIG_IA32_EMULATION -#define is_ia32 test_thread_flag(TIF_IA32) -#else /* !CONFIG_IA32_EMULATION */ -#define is_ia32 0 -#endif /* CONFIG_IA32_EMULATION */ - -#ifdef CONFIG_X86_X32_ABI -#define is_x32 test_thread_flag(TIF_X32) - -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs); -#else /* !CONFIG_X86_X32_ABI */ -#define is_x32 0 -#endif /* CONFIG_X86_X32_ABI */ - -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs *regs); - -#endif /* CONFIG_X86_32 */ - static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct pt_regs *regs) { int usig = signr_convert(sig); sigset_t *set = sigmask_to_save(); + compat_sigset_t *cset = (compat_sigset_t *) set; /* Set up the stack frame */ - if (is_ia32) { + if (is_ia32_frame()) { if (ka->sa.sa_flags & SA_SIGINFO) - return ia32_setup_rt_frame(usig, ka, info, set, regs); + return ia32_setup_rt_frame(usig, ka, info, cset, regs); else - return ia32_setup_frame(usig, ka, set, regs); -#ifdef CONFIG_X86_X32_ABI - } else if (is_x32) { - return x32_setup_rt_frame(usig, ka, info, - (compat_sigset_t *)set, regs); -#endif + return ia32_setup_frame(usig, ka, cset, regs); + } else if (is_x32_frame()) { + return x32_setup_rt_frame(usig, ka, info, cset, regs); } else { return __setup_rt_frame(sig, ka, info, set, regs); } @@ -779,16 +816,16 @@ static void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { + rcu_user_exit(); + #ifdef CONFIG_X86_MCE /* notify userspace of pending MCEs */ if (thread_info_flags & _TIF_MCE_NOTIFY) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ - if (thread_info_flags & _TIF_UPROBE) { - clear_thread_flag(TIF_UPROBE); + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); - } /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) @@ -801,9 +838,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); -#ifdef CONFIG_X86_32 - clear_thread_flag(TIF_IRET); -#endif /* CONFIG_X86_32 */ + rcu_user_enter(); } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) @@ -824,72 +859,6 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) } #ifdef CONFIG_X86_X32_ABI -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, - struct pt_regs *regs) -{ - struct rt_sigframe_x32 __user *frame; - void __user *restorer; - int err = 0; - void __user *fpstate = NULL; - - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); - - if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - return -EFAULT; - - if (ka->sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user32(&frame->info, info)) - return -EFAULT; - } - - put_user_try { - /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); - put_user_ex(0, &frame->uc.uc_link); - put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); - put_user_ex(sas_ss_flags(regs->sp), - &frame->uc.uc_stack.ss_flags); - put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - put_user_ex(0, &frame->uc.uc__pad0); - err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, - regs, set->sig[0]); - err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - - if (ka->sa.sa_flags & SA_RESTORER) { - restorer = ka->sa.sa_restorer; - } else { - /* could use a vstub here */ - restorer = NULL; - err |= -EFAULT; - } - put_user_ex(restorer, &frame->pretcode); - } put_user_catch(err); - - if (err) - return -EFAULT; - - /* Set up registers for signal handler */ - regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; - - /* We use the x32 calling convention here... */ - regs->di = sig; - regs->si = (unsigned long) &frame->info; - regs->dx = (unsigned long) &frame->uc; - - loadsegment(ds, __USER_DS); - loadsegment(es, __USER_DS); - - regs->cs = __USER_CS; - regs->ss = __USER_DS; - - return 0; -} - asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) { struct rt_sigframe_x32 __user *frame; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c5a8c3..c80a33b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) unsigned long boot_error = 0; int timeout; - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); idle->thread.sp = (unsigned long) (((struct pt_regs *) (THREAD_SIZE + task_stack_page(idle))) - 1); @@ -1053,20 +1054,6 @@ out: preempt_enable(); } -void arch_disable_nonboot_cpus_begin(void) -{ - /* - * Avoid the smp alternatives switch during the disable_nonboot_cpus(). - * In the suspend path, we will be back in the SMP mode shortly anyways. - */ - skip_smp_alternatives = true; -} - -void arch_disable_nonboot_cpus_end(void) -{ - skip_smp_alternatives = false; -} - void arch_enable_nonboot_cpus_begin(void) { set_mtrr_aps_delayed_init(); @@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu) if (per_cpu(cpu_state, cpu) == CPU_DEAD) { if (system_state == SYSTEM_RUNNING) pr_info("CPU %u is now offline\n", cpu); - - if (1 == num_online_cpus()) - alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c346d11..cd3b243 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -157,6 +157,33 @@ static int enable_single_step(struct task_struct *child) return 1; } +void set_task_blockstep(struct task_struct *task, bool on) +{ + unsigned long debugctl; + + /* + * Ensure irq/preemption can't change debugctl in between. + * Note also that both TIF_BLOCKSTEP and debugctl should + * be changed atomically wrt preemption. + * FIXME: this means that set/clear TIF_BLOCKSTEP is simply + * wrong if task != current, SIGKILL can wakeup the stopped + * tracee and set/clear can play with the running task, this + * can confuse the next __switch_to_xtra(). + */ + local_irq_disable(); + debugctl = get_debugctlmsr(); + if (on) { + debugctl |= DEBUGCTLMSR_BTF; + set_tsk_thread_flag(task, TIF_BLOCKSTEP); + } else { + debugctl &= ~DEBUGCTLMSR_BTF; + clear_tsk_thread_flag(task, TIF_BLOCKSTEP); + } + if (task == current) + update_debugctlmsr(debugctl); + local_irq_enable(); +} + /* * Enable single or block step. */ @@ -169,19 +196,10 @@ static void enable_step(struct task_struct *child, bool block) * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - set_tsk_thread_flag(child, TIF_BLOCKSTEP); - } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (enable_single_step(child) && block) + set_task_blockstep(child, true); + else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) @@ -199,13 +217,8 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - clear_tsk_thread_flag(child, TIF_BLOCKSTEP); - } + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) + set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c deleted file mode 100644 index 0b0cb5f..0000000 --- a/arch/x86/kernel/sys_i386_32.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * This file contains various random system calls that - * have a non-standard calling sequence on the Linux/i386 - * platform. - */ - -#include <linux/errno.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/smp.h> -#include <linux/sem.h> -#include <linux/msg.h> -#include <linux/shm.h> -#include <linux/stat.h> -#include <linux/syscalls.h> -#include <linux/mman.h> -#include <linux/file.h> -#include <linux/utsname.h> -#include <linux/ipc.h> - -#include <linux/uaccess.h> -#include <linux/unistd.h> - -#include <asm/syscalls.h> - -/* - * Do a system call from kernel instead of calling sys_execve so we - * end up with proper pt_regs. - */ -int kernel_execve(const char *filename, - const char *const argv[], - const char *const envp[]) -{ - long __res; - asm volatile ("int $0x80" - : "=a" (__res) - : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory"); - return __res; -} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b481341..8276dc6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -55,6 +55,7 @@ #include <asm/i387.h> #include <asm/fpu-internal.h> #include <asm/mce.h> +#include <asm/rcu.h> #include <asm/mach_traps.h> @@ -107,30 +108,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) dec_preempt_count(); } -static void __kprobes -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) +static int __kprobes +do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, + struct pt_regs *regs, long error_code) { - struct task_struct *tsk = current; - #ifdef CONFIG_X86_32 if (regs->flags & X86_VM_MASK) { /* - * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. + * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86. * On nmi (interrupt 2), do_trap should not be called. */ - if (trapnr < X86_TRAP_UD) - goto vm86_trap; - goto trap_signal; + if (trapnr < X86_TRAP_UD) { + if (!handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, trapnr)) + return 0; + } + return -1; } #endif + if (!user_mode(regs)) { + if (!fixup_exception(regs)) { + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = trapnr; + die(str, regs, error_code); + } + return 0; + } - if (!user_mode(regs)) - goto kernel_trap; + return -1; +} -#ifdef CONFIG_X86_32 -trap_signal: -#endif +static void __kprobes +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, siginfo_t *info) +{ + struct task_struct *tsk = current; + + + if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) + return; /* * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not @@ -158,33 +174,20 @@ trap_signal: force_sig_info(signr, info, tsk); else force_sig(signr, tsk); - return; - -kernel_trap: - if (!fixup_exception(regs)) { - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = trapnr; - die(str, regs, error_code); - } - return; - -#ifdef CONFIG_X86_32 -vm86_trap: - if (handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, trapnr)) - goto trap_signal; - return; -#endif } #define DO_ERROR(trapnr, signr, str, name) \ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ + exception_exit(regs); \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -195,11 +198,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ - == NOTIFY_STOP) \ + exception_enter(regs); \ + if (notify_die(DIE_TRAP, str, regs, error_code, \ + trapnr, signr) == NOTIFY_STOP) { \ + exception_exit(regs); \ return; \ + } \ conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ + exception_exit(regs); \ } DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, @@ -222,12 +229,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, /* Runs on IST stack */ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) { + exception_enter(regs); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, - X86_TRAP_SS, SIGBUS) == NOTIFY_STOP) - return; - preempt_conditional_sti(regs); - do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { + preempt_conditional_sti(regs); + do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); + } + exception_exit(regs); } dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) @@ -235,6 +244,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) static const char str[] = "double fault"; struct task_struct *tsk = current; + exception_enter(regs); /* Return not checked because double check cannot be ignored */ notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); @@ -255,16 +265,29 @@ do_general_protection(struct pt_regs *regs, long error_code) { struct task_struct *tsk; + exception_enter(regs); conditional_sti(regs); #ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto gp_in_vm86; + if (regs->flags & X86_VM_MASK) { + local_irq_enable(); + handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); + goto exit; + } #endif tsk = current; - if (!user_mode(regs)) - goto gp_in_kernel; + if (!user_mode(regs)) { + if (fixup_exception(regs)) + goto exit; + + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = X86_TRAP_GP; + if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) + die("general protection fault", regs, error_code); + goto exit; + } tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; @@ -279,25 +302,8 @@ do_general_protection(struct pt_regs *regs, long error_code) } force_sig(SIGSEGV, tsk); - return; - -#ifdef CONFIG_X86_32 -gp_in_vm86: - local_irq_enable(); - handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); - return; -#endif - -gp_in_kernel: - if (fixup_exception(regs)) - return; - - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, - X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) - return; - die("general protection fault", regs, error_code); +exit: + exception_exit(regs); } /* May run on IST stack. */ @@ -312,15 +318,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co ftrace_int3_handler(regs)) return; #endif + exception_enter(regs); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -331,6 +338,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); preempt_conditional_cli(regs); debug_stack_usage_dec(); +exit: + exception_exit(regs); } #ifdef CONFIG_X86_64 @@ -391,6 +400,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; + exception_enter(regs); + get_debugreg(dr6, 6); /* Filter out all the reserved bits which are preset to 1 */ @@ -406,7 +417,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) - return; + goto exit; /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); @@ -421,7 +432,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, SIGTRAP) == NOTIFY_STOP) - return; + goto exit; /* * Let others (NMI) know that the debug stack is in use @@ -437,7 +448,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) X86_TRAP_DB); preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; + goto exit; } /* @@ -458,7 +469,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); debug_stack_usage_dec(); - return; +exit: + exception_exit(regs); } /* @@ -555,14 +567,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 ignore_fpu_irq = 1; #endif - + exception_enter(regs); math_error(regs, error_code, X86_TRAP_MF); + exception_exit(regs); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { + exception_enter(regs); math_error(regs, error_code, X86_TRAP_XF); + exception_exit(regs); } dotraplinkage void @@ -613,11 +628,12 @@ void math_state_restore(void) } __thread_fpu_begin(tsk); + /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ if (unlikely(restore_fpu_checking(tsk))) { - __thread_fpu_end(tsk); + drop_init_fpu(tsk); force_sig(SIGSEGV, tsk); return; } @@ -629,6 +645,9 @@ EXPORT_SYMBOL_GPL(math_state_restore); dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { + exception_enter(regs); + BUG_ON(use_eager_fpu()); + #ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; @@ -637,6 +656,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); + exception_exit(regs); return; } #endif @@ -644,12 +664,15 @@ do_device_not_available(struct pt_regs *regs, long error_code) #ifdef CONFIG_X86_32 conditional_sti(regs); #endif + exception_exit(regs); } #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { siginfo_t info; + + exception_enter(regs); local_irq_enable(); info.si_signo = SIGILL; @@ -657,10 +680,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) info.si_code = ILL_BADSTK; info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, - X86_TRAP_IRET, SIGILL) == NOTIFY_STOP) - return; - do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); + X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { + do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, + &info); + } + exception_exit(regs); } #endif diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 36fd420..aafa555 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -41,6 +41,9 @@ /* Adjust the return address of a call insn */ #define UPROBE_FIX_CALL 0x2 +/* Instruction will modify TF, don't change it */ +#define UPROBE_FIX_SETF 0x4 + #define UPROBE_FIX_RIP_AX 0x8000 #define UPROBE_FIX_RIP_CX 0x4000 @@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn) insn_get_opcode(insn); /* should be a nop */ switch (OPCODE1(insn)) { + case 0x9d: + /* popf */ + auprobe->fixups |= UPROBE_FIX_SETF; + break; case 0xc3: /* ret/lret */ case 0xcb: case 0xc2: @@ -644,32 +651,63 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) /* * Skip these instructions as per the currently known x86 ISA. - * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 } + * rep=0x66*; nop=0x90 */ -bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) { int i; for (i = 0; i < MAX_UINSN_BYTES; i++) { - if ((auprobe->insn[i] == 0x66)) + if (auprobe->insn[i] == 0x66) continue; if (auprobe->insn[i] == 0x90) return true; - if (i == (MAX_UINSN_BYTES - 1)) - break; + break; + } + return false; +} - if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f)) - return true; +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + bool ret = __skip_sstep(auprobe, regs); + if (ret && (regs->flags & X86_EFLAGS_TF)) + send_sig(SIGTRAP, current, 0); + return ret; +} - if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19)) - return true; +void arch_uprobe_enable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + struct pt_regs *regs = task_pt_regs(task); - if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0)) - return true; + autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF); - break; + regs->flags |= X86_EFLAGS_TF; + if (test_tsk_thread_flag(task, TIF_BLOCKSTEP)) + set_task_blockstep(task, false); +} + +void arch_uprobe_disable_step(struct arch_uprobe *auprobe) +{ + struct task_struct *task = current; + struct arch_uprobe_task *autask = &task->utask->autask; + bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED); + struct pt_regs *regs = task_pt_regs(task); + /* + * The state of TIF_BLOCKSTEP was not saved so we can get an extra + * SIGTRAP if we do not clear TF. We need to examine the opcode to + * make it right. + */ + if (unlikely(trapped)) { + if (!autask->saved_tf) + regs->flags &= ~X86_EFLAGS_TF; + } else { + if (autask->saved_tf) + send_sig(SIGTRAP, task, 0); + else if (!(auprobe->fixups & UPROBE_FIX_SETF)) + regs->flags &= ~X86_EFLAGS_TF; } - return false; } diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 54abcc0..5c9687b 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) if ((trapno == 3) || (trapno == 1)) { KVM86->regs32->ax = VM86_TRAP + (trapno << 8); /* setting this flag forces the code in entry_32.S to - call save_v86_state() and change the stack pointer - to KVM86->regs32 */ - set_thread_flag(TIF_IRET); + the path where we call save_v86_state() and change + the stack pointer to KVM86->regs32 */ + set_thread_flag(TIF_NOTIFY_RESUME); return 0; } do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8d141b3..3a3e8c9 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -28,7 +28,7 @@ #include <linux/jiffies.h> #include <linux/sysctl.h> #include <linux/topology.h> -#include <linux/clocksource.h> +#include <linux/timekeeper_internal.h> #include <linux/getcpu.h> #include <linux/cpu.h> #include <linux/smp.h> @@ -82,32 +82,41 @@ void update_vsyscall_tz(void) vsyscall_gtod_data.sys_tz = sys_tz; } -void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, - struct clocksource *clock, u32 mult) +void update_vsyscall(struct timekeeper *tk) { - struct timespec monotonic; + struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; - write_seqcount_begin(&vsyscall_gtod_data.seq); + write_seqcount_begin(&vdata->seq); /* copy vsyscall data */ - vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; - vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; - vsyscall_gtod_data.clock.mask = clock->mask; - vsyscall_gtod_data.clock.mult = mult; - vsyscall_gtod_data.clock.shift = clock->shift; - - vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; + vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->clock->cycle_last; + vdata->clock.mask = tk->clock->mask; + vdata->clock.mult = tk->mult; + vdata->clock.shift = tk->shift; + + vdata->wall_time_sec = tk->xtime_sec; + vdata->wall_time_snsec = tk->xtime_nsec; + + vdata->monotonic_time_sec = tk->xtime_sec + + tk->wall_to_monotonic.tv_sec; + vdata->monotonic_time_snsec = tk->xtime_nsec + + (tk->wall_to_monotonic.tv_nsec + << tk->shift); + while (vdata->monotonic_time_snsec >= + (((u64)NSEC_PER_SEC) << tk->shift)) { + vdata->monotonic_time_snsec -= + ((u64)NSEC_PER_SEC) << tk->shift; + vdata->monotonic_time_sec++; + } - monotonic = timespec_add(*wall_time, *wtm); - vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec; - vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec; + vdata->wall_time_coarse.tv_sec = tk->xtime_sec; + vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); - vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); - vsyscall_gtod_data.monotonic_time_coarse = - timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm); + vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, + tk->wall_to_monotonic); - write_seqcount_end(&vsyscall_gtod_data.seq); + write_seqcount_end(&vdata->seq); } static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f..1330dd1 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -13,9 +13,13 @@ #include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ +/* mcount and __fentry__ are defined in assembly */ +#ifdef CC_USING_FENTRY +EXPORT_SYMBOL(__fentry__); +#else EXPORT_SYMBOL(mcount); #endif +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 9f3167e..7a3d075 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -26,7 +26,6 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } -void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } void iommu_shutdown_noop(void) { } @@ -68,8 +67,7 @@ struct x86_init_ops x86_init __initdata = { }, .paging = { - .pagetable_setup_start = native_pagetable_setup_start, - .pagetable_setup_done = native_pagetable_setup_done, + .pagetable_init = native_pagetable_init, }, .timers = { diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 3d3e207..ada87a3 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -10,9 +10,7 @@ #include <linux/compat.h> #include <asm/i387.h> #include <asm/fpu-internal.h> -#ifdef CONFIG_IA32_EMULATION -#include <asm/sigcontext32.h> -#endif +#include <asm/sigframe.h> #include <asm/xcr.h> /* @@ -23,13 +21,9 @@ u64 pcntxt_mask; /* * Represents init state for the supported extended state. */ -static struct xsave_struct *init_xstate_buf; - -struct _fpx_sw_bytes fx_sw_reserved; -#ifdef CONFIG_IA32_EMULATION -struct _fpx_sw_bytes fx_sw_reserved_ia32; -#endif +struct xsave_struct *init_xstate_buf; +static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; /* @@ -44,9 +38,9 @@ static unsigned int *xstate_offsets, *xstate_sizes, xstate_features; */ void __sanitize_i387_state(struct task_struct *tsk) { - u64 xstate_bv; - int feature_bit = 0x2; struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; + int feature_bit = 0x2; + u64 xstate_bv; if (!fx) return; @@ -104,213 +98,326 @@ void __sanitize_i387_state(struct task_struct *tsk) * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ -int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw_user) +static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct i387_fxsave_struct) + sizeof(struct xsave_hdr_struct); unsigned int magic2; - int err; - err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0], - sizeof(struct _fpx_sw_bytes)); - if (err) - return -EFAULT; + if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) + return -1; - /* - * First Magic check failed. - */ - if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1) - return -EINVAL; + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fx_sw->extended_size) + return -1; /* - * Check for error scenarios. - */ - if (fx_sw_user->xstate_size < min_xstate_size || - fx_sw_user->xstate_size > xstate_size || - fx_sw_user->xstate_size > fx_sw_user->extended_size) - return -EINVAL; - - err = __get_user(magic2, (__u32 *) (((void *)fpstate) + - fx_sw_user->extended_size - - FP_XSTATE_MAGIC2_SIZE)); - if (err) - return err; - /* * Check for the presence of second magic word at the end of memory * layout. This detects the case where the user just copied the legacy * fpstate layout with out copying the extended state information * in the memory layout. */ - if (magic2 != FP_XSTATE_MAGIC2) - return -EFAULT; + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) + || magic2 != FP_XSTATE_MAGIC2) + return -1; return 0; } -#ifdef CONFIG_X86_64 /* * Signal frame handlers. */ - -int save_i387_xstate(void __user *buf) +static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) { - struct task_struct *tsk = current; - int err = 0; - - if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size)) - return -EACCES; + if (use_fxsr()) { + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + struct _fpstate_ia32 __user *fp = buf; - BUG_ON(sig_xstate_size < xstate_size); + convert_from_fxsr(&env, tsk); - if ((unsigned long)buf % 64) - pr_err("%s: bad fpstate %p\n", __func__, buf); - - if (!used_math()) - return 0; - - if (user_has_fpu()) { - if (use_xsave()) - err = xsave_user(buf); - else - err = fxsave_user(buf); - - if (err) - return err; - user_fpu_end(); + if (__copy_to_user(buf, &env, sizeof(env)) || + __put_user(xsave->i387.swd, &fp->status) || + __put_user(X86_FXSR_MAGIC, &fp->magic)) + return -1; } else { - sanitize_i387_state(tsk); - if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, - xstate_size)) + struct i387_fsave_struct __user *fp = buf; + u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) return -1; } - clear_used_math(); /* trigger finit */ + return 0; +} - if (use_xsave()) { - struct _fpstate __user *fx = buf; - struct _xstate __user *x = buf; - u64 xstate_bv; +static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +{ + struct xsave_struct __user *x = buf; + struct _fpx_sw_bytes *sw_bytes; + u32 xstate_bv; + int err; - err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); + /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ + sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; + err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); - err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 __user *) (buf + sig_xstate_size - - FP_XSTATE_MAGIC2_SIZE)); + if (!use_xsave()) + return err; - /* - * Read the xstate_bv which we copied (directly from the cpu or - * from the state in task struct) to the user buffers and - * set the FP/SSE bits. - */ - err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv); + err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware apps can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - xstate_bv |= XSTATE_FPSSE; + /* + * Read the xstate_bv which we copied (directly from the cpu or + * from the state in task struct) to the user buffers. + */ + err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv); + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware apps can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xstate_bv |= XSTATE_FPSSE; - if (err) - return err; - } + err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - return 1; + return err; +} + +static inline int save_user_xstate(struct xsave_struct __user *buf) +{ + int err; + + if (use_xsave()) + err = xsave_user(buf); + else if (use_fxsr()) + err = fxsave_user((struct i387_fxsave_struct __user *) buf); + else + err = fsave_user((struct i387_fsave_struct __user *) buf); + + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + return err; } /* - * Restore the extended state if present. Otherwise, restore the FP/SSE - * state. + * Save the fpu, extended register state to the user signal frame. + * + * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save + * state is copied. + * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. + * + * buf == buf_fx for 64-bit frames and 32-bit fsave frame. + * buf != buf_fx for 32-bit frames with fxstate. + * + * If the fpu, extended register state is live, save the state directly + * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, + * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * + * If this is a 32-bit frame with fxstate, put a fsave header before + * the aligned state at 'buf_fx'. + * + * For [f]xsave state, update the SW reserved fields in the [f]xsave frame + * indicating the absence/presence of the extended state to the user. */ -static int restore_user_xstate(void __user *buf) +int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) { - struct _fpx_sw_bytes fx_sw_user; - u64 mask; - int err; + struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; + struct task_struct *tsk = current; + int ia32_fxstate = (buf != buf_fx); - if (((unsigned long)buf % 64) || - check_for_xstate(buf, buf, &fx_sw_user)) - goto fx_only; + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); - mask = fx_sw_user.xstate_bv; + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EACCES; - /* - * restore the state passed by the user. - */ - err = xrestore_user(buf, mask); - if (err) - return err; + if (!HAVE_HWFP) + return fpregs_soft_get(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), NULL, + (struct _fpstate_ia32 __user *) buf) ? -1 : 1; - /* - * init the state skipped by the user. - */ - mask = pcntxt_mask & ~mask; - if (unlikely(mask)) - xrstor_state(init_xstate_buf, mask); + if (user_has_fpu()) { + /* Save the live register state to the user directly. */ + if (save_user_xstate(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + fpu_fxsave(&tsk->thread.fpu); + } else { + sanitize_i387_state(tsk); + if (__copy_to_user(buf_fx, xsave, xstate_size)) + return -1; + } + + /* Save the fsave header for the 32-bit frames. */ + if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + return -1; + + if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + return -1; + + drop_init_fpu(tsk); /* trigger finit */ return 0; +} -fx_only: - /* - * couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE); - return fxrstor_checking((__force struct i387_fxsave_struct *)buf); +static inline void +sanitize_restored_xstate(struct task_struct *tsk, + struct user_i387_ia32_struct *ia32_env, + u64 xstate_bv, int fx_only) +{ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; + + if (use_xsave()) { + /* These bits must be zero. */ + xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; + + /* + * Init the state that is not present in the memory + * layout and not enabled by the OS. + */ + if (fx_only) + xsave_hdr->xstate_bv = XSTATE_FPSSE; + else + xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); + } + + if (use_fxsr()) { + /* + * mscsr reserved bits must be masked to zero for security + * reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + + convert_to_fxsr(tsk, ia32_env); + } } /* - * This restores directly out of user space. Exceptions are handled. + * Restore the extended state if present. Otherwise, restore the FP/SSE state. */ -int restore_i387_xstate(void __user *buf) +static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) { + if (use_xsave()) { + if ((unsigned long)buf % 64 || fx_only) { + u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; + xrstor_state(init_xstate_buf, init_bv); + return fxrstor_user(buf); + } else { + u64 init_bv = pcntxt_mask & ~xbv; + if (unlikely(init_bv)) + xrstor_state(init_xstate_buf, init_bv); + return xrestore_user(buf, xbv); + } + } else if (use_fxsr()) { + return fxrstor_user(buf); + } else + return frstor_user(buf); +} + +int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) +{ + int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; - int err = 0; + int state_size = xstate_size; + u64 xstate_bv = 0; + int fx_only = 0; + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - if (used_math()) - goto clear; + drop_init_fpu(tsk); return 0; - } else - if (!access_ok(VERIFY_READ, buf, sig_xstate_size)) - return -EACCES; + } - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; + if (!access_ok(VERIFY_READ, buf, size)) + return -EACCES; + + if (!used_math() && init_fpu(tsk)) + return -1; + + if (!HAVE_HWFP) { + return fpregs_soft_set(current, NULL, + 0, sizeof(struct user_i387_ia32_struct), + NULL, buf) != 0; } - user_fpu_begin(); - if (use_xsave()) - err = restore_user_xstate(buf); - else - err = fxrstor_checking((__force struct i387_fxsave_struct *) - buf); - if (unlikely(err)) { + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; + if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { + /* + * Couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + state_size = sizeof(struct i387_fxsave_struct); + fx_only = 1; + } else { + state_size = fx_sw_user.xstate_size; + xstate_bv = fx_sw_user.xstate_bv; + } + } + + if (ia32_fxstate) { + /* + * For 32-bit frames with fxstate, copy the user state to the + * thread's fpu state, reconstruct fxstate from the fsave + * header. Sanitize the copied state etc. + */ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + int err = 0; + + /* + * Drop the current fpu which clears used_math(). This ensures + * that any context-switch during the copy of the new state, + * avoids the intermediate state from getting restored/saved. + * Thus avoiding the new restored state from getting corrupted. + * We will be ready to restore/save the state only after + * set_used_math() is again set. + */ + drop_fpu(tsk); + + if (__copy_from_user(xsave, buf_fx, state_size) || + __copy_from_user(&env, buf, sizeof(env))) { + err = -1; + } else { + sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + set_used_math(); + } + + if (use_eager_fpu()) + math_state_restore(); + + return err; + } else { /* - * Encountered an error while doing the restore from the - * user buffer, clear the fpu state. + * For 64-bit frames and 32-bit fsave frames, restore the user + * state to the registers directly (with exceptions handled). */ -clear: - clear_fpu(tsk); - clear_used_math(); + user_fpu_begin(); + if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { + drop_init_fpu(tsk); + return -1; + } } - return err; + + return 0; } -#endif /* * Prepare the SW reserved portion of the fxsave memory layout, indicating @@ -321,31 +428,22 @@ clear: */ static void prepare_fx_sw_frame(void) { - int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) + - FP_XSTATE_MAGIC2_SIZE; + int fsave_header_size = sizeof(struct i387_fsave_struct); + int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - sig_xstate_size = sizeof(struct _fpstate) + size_extended; - -#ifdef CONFIG_IA32_EMULATION - sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended; -#endif - - memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved)); + if (config_enabled(CONFIG_X86_32)) + size += fsave_header_size; fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = sig_xstate_size; + fx_sw_reserved.extended_size = size; fx_sw_reserved.xstate_bv = pcntxt_mask; fx_sw_reserved.xstate_size = xstate_size; -#ifdef CONFIG_IA32_EMULATION - memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved, - sizeof(struct _fpx_sw_bytes)); - fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size; -#endif -} -#ifdef CONFIG_X86_64 -unsigned int sig_xstate_size = sizeof(struct _fpstate); -#endif + if (config_enabled(CONFIG_IA32_EMULATION)) { + fx_sw_reserved_ia32 = fx_sw_reserved; + fx_sw_reserved_ia32.extended_size += fsave_header_size; + } +} /* * Enable the extended processor state save/restore feature @@ -384,19 +482,21 @@ static void __init setup_xstate_features(void) /* * setup the xstate image representing the init state */ -static void __init setup_xstate_init(void) +static void __init setup_init_fpu_buf(void) { - setup_xstate_features(); - /* * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave */ init_xstate_buf = alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); - init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; + fx_finit(&init_xstate_buf->i387); + + if (!cpu_has_xsave) + return; + + setup_xstate_features(); - clts(); /* * Init all the features state with header_bv being 0x0 */ @@ -406,9 +506,21 @@ static void __init setup_xstate_init(void) * of any feature which is not represented by all zero's. */ xsave_state(init_xstate_buf, -1); - stts(); } +static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; +static int __init eager_fpu_setup(char *s) +{ + if (!strcmp(s, "on")) + eagerfpu = ENABLE; + else if (!strcmp(s, "off")) + eagerfpu = DISABLE; + else if (!strcmp(s, "auto")) + eagerfpu = AUTO; + return 1; +} +__setup("eagerfpu=", eager_fpu_setup); + /* * Enable and initialize the xsave feature. */ @@ -445,8 +557,11 @@ static void __init xstate_enable_boot_cpu(void) update_regset_xstate_info(xstate_size, pcntxt_mask); prepare_fx_sw_frame(); + setup_init_fpu_buf(); - setup_xstate_init(); + /* Auto enable eagerfpu for xsaveopt */ + if (cpu_has_xsaveopt && eagerfpu != DISABLE) + eagerfpu = ENABLE; pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", pcntxt_mask, xstate_size); @@ -471,3 +586,43 @@ void __cpuinit xsave_init(void) next_func = xstate_enable; this_func(); } + +static inline void __init eager_fpu_init_bp(void) +{ + current->thread.fpu.state = + alloc_bootmem_align(xstate_size, __alignof__(struct xsave_struct)); + if (!init_xstate_buf) + setup_init_fpu_buf(); +} + +void __cpuinit eager_fpu_init(void) +{ + static __refdata void (*boot_func)(void) = eager_fpu_init_bp; + + clear_used_math(); + current_thread_info()->status = 0; + + if (eagerfpu == ENABLE) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + + if (!cpu_has_eager_fpu) { + stts(); + return; + } + + if (boot_func) { + boot_func(); + boot_func = NULL; + } + + /* + * This is same as math_state_restore(). But use_xsave() is + * not yet patched to use math_state_restore(). + */ + init_fpu(current); + __thread_fpu_begin(current); + if (cpu_has_xsave) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); +} diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a28f338..586f000 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -20,6 +20,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + depends on HIGH_RES_TIMERS # for device assignment: depends on PCI # for TASKSTATS/TASK_DELAY_ACCT: @@ -37,6 +38,7 @@ config KVM select TASK_DELAY_ACCT select PERF_EVENTS select HAVE_KVM_MSI + select HAVE_KVM_CPU_RELAX_INTERCEPT ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 4f579e8..04d3040 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o) kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ - i8254.o timer.o cpuid.o pmu.o + i8254.o cpuid.o pmu.o kvm-intel-y += vmx.o kvm-amd-y += svm.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0595f13..ec79e77 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } case 7: { entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - /* Mask ebx against host capbability word 9 */ + /* Mask ebx against host capability word 9 */ if (index == 0) { entry->ebx &= kvm_supported_word9_x86_features; cpuid_mask(&entry->ebx, 9); @@ -397,8 +397,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; } case KVM_CPUID_SIGNATURE: { - char signature[12] = "KVMKVMKVM\0\0"; - u32 *sigptr = (u32 *)signature; + static const char signature[12] = "KVMKVMKVM\0\0"; + const u32 *sigptr = (const u32 *)signature; entry->eax = KVM_CPUID_FEATURES; entry->ebx = sigptr[0]; entry->ecx = sigptr[1]; @@ -484,10 +484,10 @@ struct kvm_cpuid_param { u32 func; u32 idx; bool has_leaf_count; - bool (*qualifier)(struct kvm_cpuid_param *param); + bool (*qualifier)(const struct kvm_cpuid_param *param); }; -static bool is_centaur_cpu(struct kvm_cpuid_param *param) +static bool is_centaur_cpu(const struct kvm_cpuid_param *param) { return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; } @@ -498,7 +498,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG, i; u32 func; - static struct kvm_cpuid_param param[] = { + static const struct kvm_cpuid_param param[] = { { .func = 0, .has_leaf_count = true }, { .func = 0x80000000, .has_leaf_count = true }, { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true }, @@ -517,7 +517,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, r = 0; for (i = 0; i < ARRAY_SIZE(param); i++) { - struct kvm_cpuid_param *ent = ¶m[i]; + const struct kvm_cpuid_param *ent = ¶m[i]; if (ent->qualifier && !ent->qualifier(ent)) continue; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a3b57a2..39171cb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -161,9 +161,9 @@ struct opcode { u64 intercept : 8; union { int (*execute)(struct x86_emulate_ctxt *ctxt); - struct opcode *group; - struct group_dual *gdual; - struct gprefix *gprefix; + const struct opcode *group; + const struct group_dual *gdual; + const struct gprefix *gprefix; } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -202,6 +202,42 @@ struct gprefix { #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a #define EFLG_RESERVED_ONE_MASK 2 +static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + if (!(ctxt->regs_valid & (1 << nr))) { + ctxt->regs_valid |= 1 << nr; + ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); + } + return ctxt->_regs[nr]; +} + +static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + ctxt->regs_valid |= 1 << nr; + ctxt->regs_dirty |= 1 << nr; + return &ctxt->_regs[nr]; +} + +static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + reg_read(ctxt, nr); + return reg_write(ctxt, nr); +} + +static void writeback_registers(struct x86_emulate_ctxt *ctxt) +{ + unsigned reg; + + for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16) + ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]); +} + +static void invalidate_registers(struct x86_emulate_ctxt *ctxt) +{ + ctxt->regs_dirty = 0; + ctxt->regs_valid = 0; +} + /* * Instruction emulation: * Most instructions are emulated directly via a fragment of inline assembly @@ -374,8 +410,8 @@ struct gprefix { #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ do { \ unsigned long _tmp; \ - ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX]; \ - ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX]; \ + ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \ + ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \ \ __asm__ __volatile__ ( \ _PRE_EFLAGS("0", "5", "1") \ @@ -494,7 +530,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) { - masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc); + masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc); } static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) @@ -632,8 +668,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, la = seg_base(ctxt, addr.seg) + addr.ea; switch (ctxt->mode) { - case X86EMUL_MODE_REAL: - break; case X86EMUL_MODE_PROT64: if (((signed long)la << 16) >> 16 != la) return emulate_gp(ctxt, 0); @@ -655,7 +689,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim) goto bad; } else { - /* exapand-down segment */ + /* expand-down segment */ if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim) goto bad; lim = desc.d ? 0xffffffff : 0xffff; @@ -663,7 +697,10 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; } cpl = ctxt->ops->cpl(ctxt); - rpl = sel & 3; + if (ctxt->mode == X86EMUL_MODE_REAL) + rpl = 0; + else + rpl = sel & 3; cpl = max(cpl, rpl); if (!(desc.type & 8)) { /* data segment */ @@ -688,9 +725,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, return X86EMUL_CONTINUE; bad: if (addr.seg == VCPU_SREG_SS) - return emulate_ss(ctxt, addr.seg); + return emulate_ss(ctxt, sel); else - return emulate_gp(ctxt, addr.seg); + return emulate_gp(ctxt, sel); } static int linearize(struct x86_emulate_ctxt *ctxt, @@ -786,14 +823,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, * pointer into the block that addresses the relevant register. * @highbyte_regs specifies whether to decode AH,CH,DH,BH. */ -static void *decode_register(u8 modrm_reg, unsigned long *regs, +static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, int highbyte_regs) { void *p; - p = ®s[modrm_reg]; if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) - p = (unsigned char *)®s[modrm_reg & 3] + 1; + p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; + else + p = reg_rmw(ctxt, modrm_reg); return p; } @@ -871,23 +909,23 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg) { ctxt->ops->get_fpu(ctxt); switch (reg) { - case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break; - case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break; - case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break; - case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break; - case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break; - case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break; - case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break; - case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break; + case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break; + case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break; + case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break; + case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break; + case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break; + case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break; + case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break; + case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break; #ifdef CONFIG_X86_64 - case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break; - case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break; - case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break; - case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break; - case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break; - case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break; - case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break; - case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break; + case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break; + case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break; + case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break; + case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break; + case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break; + case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break; + case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break; + case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break; #endif default: BUG(); } @@ -899,23 +937,23 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, { ctxt->ops->get_fpu(ctxt); switch (reg) { - case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break; - case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break; - case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break; - case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break; - case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break; - case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break; - case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break; - case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break; + case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break; + case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break; + case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break; + case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break; + case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break; + case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break; + case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break; + case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break; #ifdef CONFIG_X86_64 - case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break; - case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break; - case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break; - case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break; - case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break; - case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break; - case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break; - case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break; + case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break; + case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break; + case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break; + case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break; + case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break; + case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break; + case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break; + case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break; #endif default: BUG(); } @@ -982,10 +1020,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, op->type = OP_REG; if (ctxt->d & ByteOp) { - op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); + op->addr.reg = decode_register(ctxt, reg, highbyte_regs); op->bytes = 1; } else { - op->addr.reg = decode_register(reg, ctxt->regs, 0); + op->addr.reg = decode_register(ctxt, reg, 0); op->bytes = ctxt->op_bytes; } fetch_register_operand(op); @@ -1020,8 +1058,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if (ctxt->modrm_mod == 3) { op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; - op->addr.reg = decode_register(ctxt->modrm_rm, - ctxt->regs, ctxt->d & ByteOp); + op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp); if (ctxt->d & Sse) { op->type = OP_XMM; op->bytes = 16; @@ -1042,10 +1079,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, op->type = OP_MEM; if (ctxt->ad_bytes == 2) { - unsigned bx = ctxt->regs[VCPU_REGS_RBX]; - unsigned bp = ctxt->regs[VCPU_REGS_RBP]; - unsigned si = ctxt->regs[VCPU_REGS_RSI]; - unsigned di = ctxt->regs[VCPU_REGS_RDI]; + unsigned bx = reg_read(ctxt, VCPU_REGS_RBX); + unsigned bp = reg_read(ctxt, VCPU_REGS_RBP); + unsigned si = reg_read(ctxt, VCPU_REGS_RSI); + unsigned di = reg_read(ctxt, VCPU_REGS_RDI); /* 16-bit ModR/M decode. */ switch (ctxt->modrm_mod) { @@ -1102,17 +1139,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) modrm_ea += insn_fetch(s32, ctxt); else { - modrm_ea += ctxt->regs[base_reg]; + modrm_ea += reg_read(ctxt, base_reg); adjust_modrm_seg(ctxt, base_reg); } if (index_reg != 4) - modrm_ea += ctxt->regs[index_reg] << scale; + modrm_ea += reg_read(ctxt, index_reg) << scale; } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { if (ctxt->mode == X86EMUL_MODE_PROT64) ctxt->rip_relative = 1; } else { base_reg = ctxt->modrm_rm; - modrm_ea += ctxt->regs[base_reg]; + modrm_ea += reg_read(ctxt, base_reg); adjust_modrm_seg(ctxt, base_reg); } switch (ctxt->modrm_mod) { @@ -1179,24 +1216,21 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, int rc; struct read_cache *mc = &ctxt->mem_read; - while (size) { - int n = min(size, 8u); - size -= n; - if (mc->pos < mc->end) - goto read_cached; + if (mc->pos < mc->end) + goto read_cached; - rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n, - &ctxt->exception); - if (rc != X86EMUL_CONTINUE) - return rc; - mc->end += n; + WARN_ON((mc->end + size) >= sizeof(mc->data)); - read_cached: - memcpy(dest, mc->data + mc->pos, n); - mc->pos += n; - dest += n; - addr += n; - } + rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size, + &ctxt->exception); + if (rc != X86EMUL_CONTINUE) + return rc; + + mc->end += size; + +read_cached: + memcpy(dest, mc->data + mc->pos, size); + mc->pos += size; return X86EMUL_CONTINUE; } @@ -1253,10 +1287,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (rc->pos == rc->end) { /* refill pio read ahead */ unsigned int in_page, n; unsigned int count = ctxt->rep_prefix ? - address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1; + address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; in_page = (ctxt->eflags & EFLG_DF) ? - offset_in_page(ctxt->regs[VCPU_REGS_RDI]) : - PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]); + offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : + PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, count); if (n == 0) @@ -1267,8 +1301,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, rc->end = n * size; } - memcpy(dest, rc->data + rc->pos, size); - rc->pos += size; + if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) { + ctxt->dst.data = rc->data + rc->pos; + ctxt->dst.type = OP_MEM_STR; + ctxt->dst.count = (rc->end - rc->pos) / size; + rc->pos = rc->end; + } else { + memcpy(dest, rc->data + rc->pos, size); + rc->pos += size; + } return 1; } @@ -1291,7 +1332,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, u16 selector, struct desc_ptr *dt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; if (selector & 1 << 2) { struct desc_struct desc; @@ -1355,19 +1396,15 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ ulong desc_addr; int ret; + u16 dummy; memset(&seg_desc, 0, sizeof seg_desc); if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) || ctxt->mode == X86EMUL_MODE_REAL) { /* set real mode segment descriptor */ + ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg); set_desc_base(&seg_desc, selector << 4); - set_desc_limit(&seg_desc, 0xffff); - seg_desc.type = 3; - seg_desc.p = 1; - seg_desc.s = 1; - if (ctxt->mode == X86EMUL_MODE_VM86) - seg_desc.dpl = 3; goto load; } @@ -1396,7 +1433,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, err_code = selector & 0xfffc; err_vec = GP_VECTOR; - /* can't load system descriptor into segment selecor */ + /* can't load system descriptor into segment selector */ if (seg <= VCPU_SREG_GS && !seg_desc.s) goto exception; @@ -1516,6 +1553,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; break; + case OP_MEM_STR: + rc = segmented_write(ctxt, + ctxt->dst.addr.mem, + ctxt->dst.data, + ctxt->dst.bytes * ctxt->dst.count); + if (rc != X86EMUL_CONTINUE) + return rc; + break; case OP_XMM: write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm); break; @@ -1536,7 +1581,7 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) struct segmented_address addr; rsp_increment(ctxt, -bytes); - addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); + addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt); addr.seg = VCPU_SREG_SS; return segmented_write(ctxt, addr, data, bytes); @@ -1555,7 +1600,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, int rc; struct segmented_address addr; - addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); + addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt); addr.seg = VCPU_SREG_SS; rc = segmented_read(ctxt, addr, dest, len); if (rc != X86EMUL_CONTINUE) @@ -1623,26 +1668,28 @@ static int em_enter(struct x86_emulate_ctxt *ctxt) int rc; unsigned frame_size = ctxt->src.val; unsigned nesting_level = ctxt->src2.val & 31; + ulong rbp; if (nesting_level) return X86EMUL_UNHANDLEABLE; - rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt)); + rbp = reg_read(ctxt, VCPU_REGS_RBP); + rc = push(ctxt, &rbp, stack_size(ctxt)); if (rc != X86EMUL_CONTINUE) return rc; - assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP], + assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP), stack_mask(ctxt)); - assign_masked(&ctxt->regs[VCPU_REGS_RSP], - ctxt->regs[VCPU_REGS_RSP] - frame_size, + assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), + reg_read(ctxt, VCPU_REGS_RSP) - frame_size, stack_mask(ctxt)); return X86EMUL_CONTINUE; } static int em_leave(struct x86_emulate_ctxt *ctxt) { - assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP], + assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP), stack_mask(ctxt)); - return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes); + return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes); } static int em_push_sreg(struct x86_emulate_ctxt *ctxt) @@ -1670,13 +1717,13 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) static int em_pusha(struct x86_emulate_ctxt *ctxt) { - unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP]; + unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP); int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RAX; while (reg <= VCPU_REGS_RDI) { (reg == VCPU_REGS_RSP) ? - (ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]); + (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg)); rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) @@ -1705,7 +1752,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) --reg; } - rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes); + rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) break; --reg; @@ -1713,9 +1760,9 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) return rc; } -int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) +static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; int rc; struct desc_ptr dt; gva_t cs_addr; @@ -1762,11 +1809,22 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) return rc; } +int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) +{ + int rc; + + invalidate_registers(ctxt); + rc = __emulate_int_real(ctxt, irq); + if (rc == X86EMUL_CONTINUE) + writeback_registers(ctxt); + return rc; +} + static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq) { switch(ctxt->mode) { case X86EMUL_MODE_REAL: - return emulate_int_real(ctxt, irq); + return __emulate_int_real(ctxt, irq); case X86EMUL_MODE_VM86: case X86EMUL_MODE_PROT16: case X86EMUL_MODE_PROT32: @@ -1973,14 +2031,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) { u64 old = ctxt->dst.orig_val64; - if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) || - ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) { - ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0); - ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32); + if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || + ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { + *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); + *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); ctxt->eflags &= ~EFLG_ZF; } else { - ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) | - (u32) ctxt->regs[VCPU_REGS_RBX]; + ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | + (u32) reg_read(ctxt, VCPU_REGS_RBX); ctxt->eflags |= EFLG_ZF; } @@ -2016,7 +2074,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ ctxt->src.orig_val = ctxt->src.val; - ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; + ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); emulate_2op_SrcV(ctxt, "cmp"); if (ctxt->eflags & EFLG_ZF) { @@ -2025,7 +2083,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) } else { /* Failure: write the value we saw to EAX. */ ctxt->dst.type = OP_REG; - ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; + ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); } return X86EMUL_CONTINUE; } @@ -2050,12 +2108,6 @@ static void setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, struct desc_struct *cs, struct desc_struct *ss) { - u16 selector; - - memset(cs, 0, sizeof(struct desc_struct)); - ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); - memset(ss, 0, sizeof(struct desc_struct)); - cs->l = 0; /* will be adjusted later */ set_desc_base(cs, 0); /* flat segment */ cs->g = 1; /* 4kb granularity */ @@ -2065,6 +2117,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, cs->dpl = 0; /* will be adjusted later */ cs->p = 1; cs->d = 1; + cs->avl = 0; set_desc_base(ss, 0); /* flat segment */ set_desc_limit(ss, 0xfffff); /* 4GB limit */ @@ -2074,6 +2127,8 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, ss->d = 1; /* 32bit stack segment */ ss->dpl = 0; ss->p = 1; + ss->l = 0; + ss->avl = 0; } static bool vendor_intel(struct x86_emulate_ctxt *ctxt) @@ -2089,7 +2144,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt) static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; u32 eax, ebx, ecx, edx; /* @@ -2133,7 +2188,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt) static int em_syscall(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; @@ -2165,10 +2220,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip; + *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; if (efer & EFER_LMA) { #ifdef CONFIG_X86_64 - ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; + *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; ops->get_msr(ctxt, ctxt->mode == X86EMUL_MODE_PROT64 ? @@ -2191,7 +2246,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) static int em_sysenter(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; @@ -2228,6 +2283,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) if (msr_data == 0x0) return emulate_gp(ctxt, 0); break; + default: + break; } ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF); @@ -2247,14 +2304,14 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) ctxt->_eip = msr_data; ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); - ctxt->regs[VCPU_REGS_RSP] = msr_data; + *reg_write(ctxt, VCPU_REGS_RSP) = msr_data; return X86EMUL_CONTINUE; } static int em_sysexit(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; int usermode; @@ -2297,8 +2354,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); - ctxt->_eip = ctxt->regs[VCPU_REGS_RDX]; - ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX]; + ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); + *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); return X86EMUL_CONTINUE; } @@ -2317,7 +2374,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, u16 port, u16 len) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct tr_seg; u32 base3; int r; @@ -2367,14 +2424,14 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, { tss->ip = ctxt->_eip; tss->flag = ctxt->eflags; - tss->ax = ctxt->regs[VCPU_REGS_RAX]; - tss->cx = ctxt->regs[VCPU_REGS_RCX]; - tss->dx = ctxt->regs[VCPU_REGS_RDX]; - tss->bx = ctxt->regs[VCPU_REGS_RBX]; - tss->sp = ctxt->regs[VCPU_REGS_RSP]; - tss->bp = ctxt->regs[VCPU_REGS_RBP]; - tss->si = ctxt->regs[VCPU_REGS_RSI]; - tss->di = ctxt->regs[VCPU_REGS_RDI]; + tss->ax = reg_read(ctxt, VCPU_REGS_RAX); + tss->cx = reg_read(ctxt, VCPU_REGS_RCX); + tss->dx = reg_read(ctxt, VCPU_REGS_RDX); + tss->bx = reg_read(ctxt, VCPU_REGS_RBX); + tss->sp = reg_read(ctxt, VCPU_REGS_RSP); + tss->bp = reg_read(ctxt, VCPU_REGS_RBP); + tss->si = reg_read(ctxt, VCPU_REGS_RSI); + tss->di = reg_read(ctxt, VCPU_REGS_RDI); tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); @@ -2390,14 +2447,14 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, ctxt->_eip = tss->ip; ctxt->eflags = tss->flag | 2; - ctxt->regs[VCPU_REGS_RAX] = tss->ax; - ctxt->regs[VCPU_REGS_RCX] = tss->cx; - ctxt->regs[VCPU_REGS_RDX] = tss->dx; - ctxt->regs[VCPU_REGS_RBX] = tss->bx; - ctxt->regs[VCPU_REGS_RSP] = tss->sp; - ctxt->regs[VCPU_REGS_RBP] = tss->bp; - ctxt->regs[VCPU_REGS_RSI] = tss->si; - ctxt->regs[VCPU_REGS_RDI] = tss->di; + *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax; + *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx; + *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx; + *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx; + *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp; + *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp; + *reg_write(ctxt, VCPU_REGS_RSI) = tss->si; + *reg_write(ctxt, VCPU_REGS_RDI) = tss->di; /* * SDM says that segment selectors are loaded before segment @@ -2410,7 +2467,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS); /* - * Now load segment descriptors. If fault happenes at this stage + * Now load segment descriptors. If fault happens at this stage * it is handled in a context of new task */ ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); @@ -2436,7 +2493,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 tss_selector, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct tss_segment_16 tss_seg; int ret; u32 new_tss_base = get_desc_base(new_desc); @@ -2482,14 +2539,14 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, tss->cr3 = ctxt->ops->get_cr(ctxt, 3); tss->eip = ctxt->_eip; tss->eflags = ctxt->eflags; - tss->eax = ctxt->regs[VCPU_REGS_RAX]; - tss->ecx = ctxt->regs[VCPU_REGS_RCX]; - tss->edx = ctxt->regs[VCPU_REGS_RDX]; - tss->ebx = ctxt->regs[VCPU_REGS_RBX]; - tss->esp = ctxt->regs[VCPU_REGS_RSP]; - tss->ebp = ctxt->regs[VCPU_REGS_RBP]; - tss->esi = ctxt->regs[VCPU_REGS_RSI]; - tss->edi = ctxt->regs[VCPU_REGS_RDI]; + tss->eax = reg_read(ctxt, VCPU_REGS_RAX); + tss->ecx = reg_read(ctxt, VCPU_REGS_RCX); + tss->edx = reg_read(ctxt, VCPU_REGS_RDX); + tss->ebx = reg_read(ctxt, VCPU_REGS_RBX); + tss->esp = reg_read(ctxt, VCPU_REGS_RSP); + tss->ebp = reg_read(ctxt, VCPU_REGS_RBP); + tss->esi = reg_read(ctxt, VCPU_REGS_RSI); + tss->edi = reg_read(ctxt, VCPU_REGS_RDI); tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); @@ -2511,14 +2568,14 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, ctxt->eflags = tss->eflags | 2; /* General purpose registers */ - ctxt->regs[VCPU_REGS_RAX] = tss->eax; - ctxt->regs[VCPU_REGS_RCX] = tss->ecx; - ctxt->regs[VCPU_REGS_RDX] = tss->edx; - ctxt->regs[VCPU_REGS_RBX] = tss->ebx; - ctxt->regs[VCPU_REGS_RSP] = tss->esp; - ctxt->regs[VCPU_REGS_RBP] = tss->ebp; - ctxt->regs[VCPU_REGS_RSI] = tss->esi; - ctxt->regs[VCPU_REGS_RDI] = tss->edi; + *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax; + *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx; + *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx; + *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx; + *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp; + *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp; + *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi; + *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi; /* * SDM says that segment selectors are loaded before segment @@ -2583,7 +2640,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 tss_selector, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct tss_segment_32 tss_seg; int ret; u32 new_tss_base = get_desc_base(new_desc); @@ -2627,7 +2684,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct curr_tss_desc, next_tss_desc; int ret; u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); @@ -2652,7 +2709,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, * * 1. jmp/call/int to task gate: Check against DPL of the task gate * 2. Exception/IRQ/iret: No check is performed - * 3. jmp/call to TSS: Check agains DPL of the TSS + * 3. jmp/call to TSS: Check against DPL of the TSS */ if (reason == TASK_SWITCH_GATE) { if (idt_index != -1) { @@ -2693,7 +2750,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; /* set back link to prev task only if NT bit is set in eflags - note that old_tss_sel is not used afetr this point */ + note that old_tss_sel is not used after this point */ if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) old_tss_sel = 0xffff; @@ -2733,26 +2790,28 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, { int rc; + invalidate_registers(ctxt); ctxt->_eip = ctxt->eip; ctxt->dst.type = OP_NONE; rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason, has_error_code, error_code); - if (rc == X86EMUL_CONTINUE) + if (rc == X86EMUL_CONTINUE) { ctxt->eip = ctxt->_eip; + writeback_registers(ctxt); + } return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; } -static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, - int reg, struct operand *op) +static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, + struct operand *op) { - int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; + int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; - register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes); - op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]); - op->addr.mem.seg = seg; + register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); + op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); } static int em_das(struct x86_emulate_ctxt *ctxt) @@ -2927,7 +2986,7 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.type = OP_REG; ctxt->dst.bytes = ctxt->src.bytes; - ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; + ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1); return X86EMUL_CONTINUE; @@ -2938,8 +2997,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) u64 tsc = 0; ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); - ctxt->regs[VCPU_REGS_RAX] = (u32)tsc; - ctxt->regs[VCPU_REGS_RDX] = tsc >> 32; + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc; + *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32; return X86EMUL_CONTINUE; } @@ -2947,10 +3006,10 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) { u64 pmc; - if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc)) + if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc)) return emulate_gp(ctxt, 0); - ctxt->regs[VCPU_REGS_RAX] = (u32)pmc; - ctxt->regs[VCPU_REGS_RDX] = pmc >> 32; + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc; + *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32; return X86EMUL_CONTINUE; } @@ -2992,9 +3051,9 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt) { u64 msr_data; - msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] - | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); - if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) + msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX) + | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32); + if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -3004,11 +3063,11 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt) { u64 msr_data; - if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) + if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data)) return emulate_gp(ctxt, 0); - ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; - ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data; + *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32; return X86EMUL_CONTINUE; } @@ -3188,8 +3247,8 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) static int em_loop(struct x86_emulate_ctxt *ctxt) { - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); - if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) && + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); + if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) jmp_rel(ctxt, ctxt->src.val); @@ -3198,7 +3257,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt) static int em_jcxz(struct x86_emulate_ctxt *ctxt) { - if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) jmp_rel(ctxt, ctxt->src.val); return X86EMUL_CONTINUE; @@ -3286,20 +3345,20 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) { u32 eax, ebx, ecx, edx; - eax = ctxt->regs[VCPU_REGS_RAX]; - ecx = ctxt->regs[VCPU_REGS_RCX]; + eax = reg_read(ctxt, VCPU_REGS_RAX); + ecx = reg_read(ctxt, VCPU_REGS_RCX); ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); - ctxt->regs[VCPU_REGS_RAX] = eax; - ctxt->regs[VCPU_REGS_RBX] = ebx; - ctxt->regs[VCPU_REGS_RCX] = ecx; - ctxt->regs[VCPU_REGS_RDX] = edx; + *reg_write(ctxt, VCPU_REGS_RAX) = eax; + *reg_write(ctxt, VCPU_REGS_RBX) = ebx; + *reg_write(ctxt, VCPU_REGS_RCX) = ecx; + *reg_write(ctxt, VCPU_REGS_RDX) = edx; return X86EMUL_CONTINUE; } static int em_lahf(struct x86_emulate_ctxt *ctxt) { - ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL; - ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8; + *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; + *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8; return X86EMUL_CONTINUE; } @@ -3456,7 +3515,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) static int check_svme_pa(struct x86_emulate_ctxt *ctxt) { - u64 rax = ctxt->regs[VCPU_REGS_RAX]; + u64 rax = reg_read(ctxt, VCPU_REGS_RAX); /* Valid physical address? */ if (rax & 0xffff000000000000ULL) @@ -3478,7 +3537,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) static int check_rdpmc(struct x86_emulate_ctxt *ctxt) { u64 cr4 = ctxt->ops->get_cr(ctxt, 4); - u64 rcx = ctxt->regs[VCPU_REGS_RCX]; + u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || (rcx > 3)) @@ -3531,13 +3590,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) -static struct opcode group7_rm1[] = { +static const struct opcode group7_rm1[] = { DI(SrcNone | Priv, monitor), DI(SrcNone | Priv, mwait), N, N, N, N, N, N, }; -static struct opcode group7_rm3[] = { +static const struct opcode group7_rm3[] = { DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall), DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), @@ -3548,13 +3607,13 @@ static struct opcode group7_rm3[] = { DIP(SrcNone | Prot | Priv, invlpga, check_svme), }; -static struct opcode group7_rm7[] = { +static const struct opcode group7_rm7[] = { N, DIP(SrcNone, rdtscp, check_rdtsc), N, N, N, N, N, N, }; -static struct opcode group1[] = { +static const struct opcode group1[] = { I(Lock, em_add), I(Lock | PageTable, em_or), I(Lock, em_adc), @@ -3565,11 +3624,11 @@ static struct opcode group1[] = { I(0, em_cmp), }; -static struct opcode group1A[] = { +static const struct opcode group1A[] = { I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, }; -static struct opcode group3[] = { +static const struct opcode group3[] = { I(DstMem | SrcImm, em_test), I(DstMem | SrcImm, em_test), I(DstMem | SrcNone | Lock, em_not), @@ -3580,13 +3639,13 @@ static struct opcode group3[] = { I(SrcMem, em_idiv_ex), }; -static struct opcode group4[] = { +static const struct opcode group4[] = { I(ByteOp | DstMem | SrcNone | Lock, em_grp45), I(ByteOp | DstMem | SrcNone | Lock, em_grp45), N, N, N, N, N, N, }; -static struct opcode group5[] = { +static const struct opcode group5[] = { I(DstMem | SrcNone | Lock, em_grp45), I(DstMem | SrcNone | Lock, em_grp45), I(SrcMem | Stack, em_grp45), @@ -3596,7 +3655,7 @@ static struct opcode group5[] = { I(SrcMem | Stack, em_grp45), N, }; -static struct opcode group6[] = { +static const struct opcode group6[] = { DI(Prot, sldt), DI(Prot, str), II(Prot | Priv | SrcMem16, em_lldt, lldt), @@ -3604,7 +3663,7 @@ static struct opcode group6[] = { N, N, N, N, }; -static struct group_dual group7 = { { +static const struct group_dual group7 = { { II(Mov | DstMem | Priv, em_sgdt, sgdt), II(Mov | DstMem | Priv, em_sidt, sidt), II(SrcMem | Priv, em_lgdt, lgdt), @@ -3621,7 +3680,7 @@ static struct group_dual group7 = { { EXT(0, group7_rm7), } }; -static struct opcode group8[] = { +static const struct opcode group8[] = { N, N, N, N, I(DstMem | SrcImmByte, em_bt), I(DstMem | SrcImmByte | Lock | PageTable, em_bts), @@ -3629,26 +3688,26 @@ static struct opcode group8[] = { I(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; -static struct group_dual group9 = { { +static const struct group_dual group9 = { { N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, }, { N, N, N, N, N, N, N, N, } }; -static struct opcode group11[] = { +static const struct opcode group11[] = { I(DstMem | SrcImm | Mov | PageTable, em_mov), X7(D(Undefined)), }; -static struct gprefix pfx_0f_6f_0f_7f = { +static const struct gprefix pfx_0f_6f_0f_7f = { I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov), }; -static struct gprefix pfx_vmovntpx = { +static const struct gprefix pfx_vmovntpx = { I(0, em_mov), N, N, N, }; -static struct opcode opcode_table[256] = { +static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ I6ALU(Lock, em_add), I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), @@ -3689,7 +3748,7 @@ static struct opcode opcode_table[256] = { I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op), I(SrcImmByte | Mov | Stack, em_push), I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op), - I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */ + I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */ I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */ /* 0x70 - 0x7F */ X16(D(SrcImmByte)), @@ -3765,7 +3824,7 @@ static struct opcode opcode_table[256] = { D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5), }; -static struct opcode twobyte_table[256] = { +static const struct opcode twobyte_table[256] = { /* 0x00 - 0x0F */ G(0, group6), GD(0, &group7), N, N, N, I(ImplicitOps | VendorSpecific, em_syscall), @@ -3936,7 +3995,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, case OpAcc: op->type = OP_REG; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; - op->addr.reg = &ctxt->regs[VCPU_REGS_RAX]; + op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); fetch_register_operand(op); op->orig_val = op->val; break; @@ -3944,19 +4003,20 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); + register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); op->addr.mem.seg = VCPU_SREG_ES; op->val = 0; + op->count = 1; break; case OpDX: op->type = OP_REG; op->bytes = 2; - op->addr.reg = &ctxt->regs[VCPU_REGS_RDX]; + op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); fetch_register_operand(op); break; case OpCL: op->bytes = 1; - op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff; + op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; break; case OpImmByte: rc = decode_imm(ctxt, op, 1, true); @@ -3987,9 +4047,10 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->type = OP_MEM; op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; op->addr.mem.ea = - register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); + register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); op->addr.mem.seg = seg_override(ctxt); op->val = 0; + op->count = 1; break; case OpImmFAddr: op->type = OP_IMM; @@ -4293,9 +4354,10 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); } + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { - struct x86_emulate_ops *ops = ctxt->ops; + const struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; int saved_dst_type = ctxt->dst.type; @@ -4356,7 +4418,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) } /* Instruction can only be executed in protected mode */ - if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) { + if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) { rc = emulate_ud(ctxt); goto done; } @@ -4377,7 +4439,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) if (ctxt->rep_prefix && (ctxt->d & String)) { /* All REP prefixes have the same first termination condition */ - if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) { + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { ctxt->eip = ctxt->_eip; goto done; } @@ -4450,7 +4512,7 @@ special_insn: ctxt->dst.val = ctxt->src.addr.mem.ea; break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ - if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) + if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) break; rc = em_xchg(ctxt); break; @@ -4478,7 +4540,7 @@ special_insn: rc = em_grp2(ctxt); break; case 0xd2 ... 0xd3: /* Grp2 */ - ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; + ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX); rc = em_grp2(ctxt); break; case 0xe9: /* jmp rel */ @@ -4524,23 +4586,27 @@ writeback: ctxt->dst.type = saved_dst_type; if ((ctxt->d & SrcMask) == SrcSI) - string_addr_inc(ctxt, seg_override(ctxt), - VCPU_REGS_RSI, &ctxt->src); + string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src); if ((ctxt->d & DstMask) == DstDI) - string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI, - &ctxt->dst); + string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst); if (ctxt->rep_prefix && (ctxt->d & String)) { + unsigned int count; struct read_cache *r = &ctxt->io_read; - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); + if ((ctxt->d & SrcMask) == SrcSI) + count = ctxt->src.count; + else + count = ctxt->dst.count; + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), + -count); if (!string_insn_completed(ctxt)) { /* * Re-enter guest when pio read ahead buffer is empty * or, if it is not used, after each 1024 iteration. */ - if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) && + if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) && (r->end == 0 || r->end != r->pos)) { /* * Reset read cache. Usually happens before @@ -4548,6 +4614,7 @@ writeback: * we have to do it here. */ ctxt->mem_read.end = 0; + writeback_registers(ctxt); return EMULATION_RESTART; } goto done; /* skip rip writeback */ @@ -4562,6 +4629,9 @@ done: if (rc == X86EMUL_INTERCEPTED) return EMULATION_INTERCEPTED; + if (rc == X86EMUL_CONTINUE) + writeback_registers(ctxt); + return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; twobyte_insn: @@ -4634,3 +4704,13 @@ twobyte_insn: cannot_emulate: return EMULATION_FAILED; } + +void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt) +{ + invalidate_registers(ctxt); +} + +void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt) +{ + writeback_registers(ctxt); +} diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index adba28f..11300d2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -108,7 +108,7 @@ static s64 __kpit_elapsed(struct kvm *kvm) ktime_t remaining; struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; - if (!ps->pit_timer.period) + if (!ps->period) return 0; /* @@ -120,9 +120,9 @@ static s64 __kpit_elapsed(struct kvm *kvm) * itself with the initial count and continues counting * from there. */ - remaining = hrtimer_get_remaining(&ps->pit_timer.timer); - elapsed = ps->pit_timer.period - ktime_to_ns(remaining); - elapsed = mod_64(elapsed, ps->pit_timer.period); + remaining = hrtimer_get_remaining(&ps->timer); + elapsed = ps->period - ktime_to_ns(remaining); + elapsed = mod_64(elapsed, ps->period); return elapsed; } @@ -238,12 +238,12 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) int value; spin_lock(&ps->inject_lock); - value = atomic_dec_return(&ps->pit_timer.pending); + value = atomic_dec_return(&ps->pending); if (value < 0) /* spurious acks can be generated if, for example, the * PIC is being reset. Handle it gracefully here */ - atomic_inc(&ps->pit_timer.pending); + atomic_inc(&ps->pending); else if (value > 0) /* in this case, we had multiple outstanding pit interrupts * that we needed to inject. Reinject @@ -261,28 +261,17 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) if (!kvm_vcpu_is_bsp(vcpu) || !pit) return; - timer = &pit->pit_state.pit_timer.timer; + timer = &pit->pit_state.timer; if (hrtimer_cancel(timer)) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } static void destroy_pit_timer(struct kvm_pit *pit) { - hrtimer_cancel(&pit->pit_state.pit_timer.timer); + hrtimer_cancel(&pit->pit_state.timer); flush_kthread_work(&pit->expired); } -static bool kpit_is_periodic(struct kvm_timer *ktimer) -{ - struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state, - pit_timer); - return ps->is_periodic; -} - -static struct kvm_timer_ops kpit_ops = { - .is_periodic = kpit_is_periodic, -}; - static void pit_do_work(struct kthread_work *work) { struct kvm_pit *pit = container_of(work, struct kvm_pit, expired); @@ -322,16 +311,16 @@ static void pit_do_work(struct kthread_work *work) static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) { - struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); - struct kvm_pit *pt = ktimer->kvm->arch.vpit; + struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer); + struct kvm_pit *pt = ps->kvm->arch.vpit; - if (ktimer->reinject || !atomic_read(&ktimer->pending)) { - atomic_inc(&ktimer->pending); + if (ps->reinject || !atomic_read(&ps->pending)) { + atomic_inc(&ps->pending); queue_kthread_work(&pt->worker, &pt->expired); } - if (ktimer->t_ops->is_periodic(ktimer)) { - hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); + if (ps->is_periodic) { + hrtimer_add_expires_ns(&ps->timer, ps->period); return HRTIMER_RESTART; } else return HRTIMER_NORESTART; @@ -340,7 +329,6 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) { struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; - struct kvm_timer *pt = &ps->pit_timer; s64 interval; if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY) @@ -351,19 +339,18 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) pr_debug("create pit timer, interval is %llu nsec\n", interval); /* TODO The new value only affected after the retriggered */ - hrtimer_cancel(&pt->timer); + hrtimer_cancel(&ps->timer); flush_kthread_work(&ps->pit->expired); - pt->period = interval; + ps->period = interval; ps->is_periodic = is_period; - pt->timer.function = pit_timer_fn; - pt->t_ops = &kpit_ops; - pt->kvm = ps->pit->kvm; + ps->timer.function = pit_timer_fn; + ps->kvm = ps->pit->kvm; - atomic_set(&pt->pending, 0); + atomic_set(&ps->pending, 0); ps->irq_ack = 1; - hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval), + hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); } @@ -639,7 +626,7 @@ void kvm_pit_reset(struct kvm_pit *pit) } mutex_unlock(&pit->pit_state.lock); - atomic_set(&pit->pit_state.pit_timer.pending, 0); + atomic_set(&pit->pit_state.pending, 0); pit->pit_state.irq_ack = 1; } @@ -648,7 +635,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier); if (!mask) { - atomic_set(&pit->pit_state.pit_timer.pending, 0); + atomic_set(&pit->pit_state.pending, 0); pit->pit_state.irq_ack = 1; } } @@ -706,12 +693,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) pit_state = &pit->pit_state; pit_state->pit = pit; - hrtimer_init(&pit_state->pit_timer.timer, - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); pit_state->irq_ack_notifier.gsi = 0; pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq; kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier); - pit_state->pit_timer.reinject = true; + pit_state->reinject = true; mutex_unlock(&pit->pit_state.lock); kvm_pit_reset(pit); @@ -761,7 +747,7 @@ void kvm_free_pit(struct kvm *kvm) kvm_unregister_irq_ack_notifier(kvm, &kvm->arch.vpit->pit_state.irq_ack_notifier); mutex_lock(&kvm->arch.vpit->pit_state.lock); - timer = &kvm->arch.vpit->pit_state.pit_timer.timer; + timer = &kvm->arch.vpit->pit_state.timer; hrtimer_cancel(timer); flush_kthread_work(&kvm->arch.vpit->expired); kthread_stop(kvm->arch.vpit->worker_task); diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index fdf4042..dd1b16b 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -24,8 +24,12 @@ struct kvm_kpit_channel_state { struct kvm_kpit_state { struct kvm_kpit_channel_state channels[3]; u32 flags; - struct kvm_timer pit_timer; bool is_periodic; + s64 period; /* unit: ns */ + struct hrtimer timer; + atomic_t pending; /* accumulated triggered timers */ + bool reinject; + struct kvm *kvm; u32 speaker_data_on; struct mutex lock; struct kvm_pit *pit; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 9fc9aa7..848206d 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -190,17 +190,17 @@ void kvm_pic_update_irq(struct kvm_pic *s) int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level) { - int ret = -1; + int ret, irq_level; + + BUG_ON(irq < 0 || irq >= PIC_NUM_PINS); pic_lock(s); - if (irq >= 0 && irq < PIC_NUM_PINS) { - int irq_level = __kvm_irq_line_state(&s->irq_states[irq], - irq_source_id, level); - ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); - pic_update_irq(s); - trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, - s->pics[irq >> 3].imr, ret == 0); - } + irq_level = __kvm_irq_line_state(&s->irq_states[irq], + irq_source_id, level); + ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level); + pic_update_irq(s); + trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, + s->pics[irq >> 3].imr, ret == 0); pic_unlock(s); return ret; @@ -275,23 +275,20 @@ void kvm_pic_reset(struct kvm_kpic_state *s) { int irq, i; struct kvm_vcpu *vcpu; - u8 irr = s->irr, isr = s->imr; + u8 edge_irr = s->irr & ~s->elcr; bool found = false; s->last_irr = 0; - s->irr = 0; + s->irr &= s->elcr; s->imr = 0; - s->isr = 0; s->priority_add = 0; - s->irq_base = 0; - s->read_reg_select = 0; - s->poll = 0; s->special_mask = 0; - s->init_state = 0; - s->auto_eoi = 0; - s->rotate_on_auto_eoi = 0; - s->special_fully_nested_mode = 0; - s->init4 = 0; + s->read_reg_select = 0; + if (!s->init4) { + s->special_fully_nested_mode = 0; + s->auto_eoi = 0; + } + s->init_state = 1; kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) if (kvm_apic_accept_pic_intr(vcpu)) { @@ -304,7 +301,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s) return; for (irq = 0; irq < PIC_NUM_PINS/2; irq++) - if (irr & (1 << irq) || isr & (1 << irq)) + if (edge_irr & (1 << irq)) pic_clear_isr(s, irq); } @@ -316,40 +313,13 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val) addr &= 1; if (addr == 0) { if (val & 0x10) { - u8 edge_irr = s->irr & ~s->elcr; - int i; - bool found = false; - struct kvm_vcpu *vcpu; - s->init4 = val & 1; - s->last_irr = 0; - s->irr &= s->elcr; - s->imr = 0; - s->priority_add = 0; - s->special_mask = 0; - s->read_reg_select = 0; - if (!s->init4) { - s->special_fully_nested_mode = 0; - s->auto_eoi = 0; - } - s->init_state = 1; if (val & 0x02) pr_pic_unimpl("single mode not supported"); if (val & 0x08) pr_pic_unimpl( - "level sensitive irq not supported"); - - kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm) - if (kvm_apic_accept_pic_intr(vcpu)) { - found = true; - break; - } - - - if (found) - for (irq = 0; irq < PIC_NUM_PINS/2; irq++) - if (edge_irr & (1 << irq)) - pic_clear_isr(s, irq); + "level sensitive irq not supported"); + kvm_pic_reset(s); } else if (val & 0x08) { if (val & 0x04) s->poll = 1; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2086f2b..2d03568 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -70,7 +70,7 @@ struct kvm_pic { struct kvm_io_device dev_slave; struct kvm_io_device dev_eclr; void (*ack_notifier)(void *opaque, int irq); - unsigned long irq_states[16]; + unsigned long irq_states[PIC_NUM_PINS]; }; struct kvm_pic *kvm_create_pic(struct kvm *kvm); diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h deleted file mode 100644 index 497dbaa..0000000 --- a/arch/x86/kvm/kvm_timer.h +++ /dev/null @@ -1,18 +0,0 @@ - -struct kvm_timer { - struct hrtimer timer; - s64 period; /* unit: ns */ - u32 timer_mode_mask; - u64 tscdeadline; - atomic_t pending; /* accumulated triggered timers */ - bool reinject; - struct kvm_timer_ops *t_ops; - struct kvm *kvm; - struct kvm_vcpu *vcpu; -}; - -struct kvm_timer_ops { - bool (*is_periodic)(struct kvm_timer *); -}; - -enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ce87878..43e9fad 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -34,6 +34,7 @@ #include <asm/current.h> #include <asm/apicdef.h> #include <linux/atomic.h> +#include <linux/jump_label.h> #include "kvm_cache_regs.h" #include "irq.h" #include "trace.h" @@ -65,6 +66,7 @@ #define APIC_DEST_NOSHORT 0x0 #define APIC_DEST_MASK 0x800 #define MAX_APIC_VECTOR 256 +#define APIC_VECTORS_PER_REG 32 #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) @@ -72,11 +74,6 @@ static unsigned int min_timer_period_us = 500; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); -static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off) -{ - return *((u32 *) (apic->regs + reg_off)); -} - static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) { *((u32 *) (apic->regs + reg_off)) = val; @@ -117,19 +114,23 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap) return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); } -static inline int apic_hw_enabled(struct kvm_lapic *apic) -{ - return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; -} +struct static_key_deferred apic_hw_disabled __read_mostly; +struct static_key_deferred apic_sw_disabled __read_mostly; -static inline int apic_sw_enabled(struct kvm_lapic *apic) +static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val) { - return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; + if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) { + if (val & APIC_SPIV_APIC_ENABLED) + static_key_slow_dec_deferred(&apic_sw_disabled); + else + static_key_slow_inc(&apic_sw_disabled.key); + } + apic_set_reg(apic, APIC_SPIV, val); } static inline int apic_enabled(struct kvm_lapic *apic) { - return apic_sw_enabled(apic) && apic_hw_enabled(apic); + return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic); } #define LVT_MASK \ @@ -139,36 +140,135 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) +static inline int apic_x2apic_mode(struct kvm_lapic *apic) +{ + return apic->vcpu->arch.apic_base & X2APIC_ENABLE; +} + static inline int kvm_apic_id(struct kvm_lapic *apic) { - return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff; + return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; +} + +static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) +{ + u16 cid; + ldr >>= 32 - map->ldr_bits; + cid = (ldr >> map->cid_shift) & map->cid_mask; + + BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); + + return cid; +} + +static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) +{ + ldr >>= (32 - map->ldr_bits); + return ldr & map->lid_mask; +} + +static void recalculate_apic_map(struct kvm *kvm) +{ + struct kvm_apic_map *new, *old = NULL; + struct kvm_vcpu *vcpu; + int i; + + new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL); + + mutex_lock(&kvm->arch.apic_map_lock); + + if (!new) + goto out; + + new->ldr_bits = 8; + /* flat mode is default */ + new->cid_shift = 8; + new->cid_mask = 0; + new->lid_mask = 0xff; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvm_lapic *apic = vcpu->arch.apic; + u16 cid, lid; + u32 ldr; + + if (!kvm_apic_present(vcpu)) + continue; + + /* + * All APICs have to be configured in the same mode by an OS. + * We take advatage of this while building logical id loockup + * table. After reset APICs are in xapic/flat mode, so if we + * find apic with different setting we assume this is the mode + * OS wants all apics to be in; build lookup table accordingly. + */ + if (apic_x2apic_mode(apic)) { + new->ldr_bits = 32; + new->cid_shift = 16; + new->cid_mask = new->lid_mask = 0xffff; + } else if (kvm_apic_sw_enabled(apic) && + !new->cid_mask /* flat mode */ && + kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { + new->cid_shift = 4; + new->cid_mask = 0xf; + new->lid_mask = 0xf; + } + + new->phys_map[kvm_apic_id(apic)] = apic; + + ldr = kvm_apic_get_reg(apic, APIC_LDR); + cid = apic_cluster_id(new, ldr); + lid = apic_logical_id(new, ldr); + + if (lid) + new->logical_map[cid][ffs(lid) - 1] = apic; + } +out: + old = rcu_dereference_protected(kvm->arch.apic_map, + lockdep_is_held(&kvm->arch.apic_map_lock)); + rcu_assign_pointer(kvm->arch.apic_map, new); + mutex_unlock(&kvm->arch.apic_map_lock); + + if (old) + kfree_rcu(old, rcu); +} + +static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) +{ + apic_set_reg(apic, APIC_ID, id << 24); + recalculate_apic_map(apic->vcpu->kvm); +} + +static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) +{ + apic_set_reg(apic, APIC_LDR, id); + recalculate_apic_map(apic->vcpu->kvm); } static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type) { - return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); + return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED); } static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) { - return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; + return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; } static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) { - return ((apic_get_reg(apic, APIC_LVTT) & + return ((kvm_apic_get_reg(apic, APIC_LVTT) & apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); } static inline int apic_lvtt_period(struct kvm_lapic *apic) { - return ((apic_get_reg(apic, APIC_LVTT) & + return ((kvm_apic_get_reg(apic, APIC_LVTT) & apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); } static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) { - return ((apic_get_reg(apic, APIC_LVTT) & + return ((kvm_apic_get_reg(apic, APIC_LVTT) & apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_TSCDEADLINE); } @@ -184,7 +284,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *feat; u32 v = APIC_VERSION; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!kvm_vcpu_has_lapic(vcpu)) return; feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); @@ -193,12 +293,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) apic_set_reg(apic, APIC_LVR, v); } -static inline int apic_x2apic_mode(struct kvm_lapic *apic) -{ - return apic->vcpu->arch.apic_base & X2APIC_ENABLE; -} - -static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { +static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = { LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ LVT_MASK | APIC_MODE_MASK, /* LVTPC */ @@ -208,25 +303,30 @@ static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { static int find_highest_vector(void *bitmap) { - u32 *word = bitmap; - int word_offset = MAX_APIC_VECTOR >> 5; + int vec; + u32 *reg; - while ((word_offset != 0) && (word[(--word_offset) << 2] == 0)) - continue; + for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; + vec >= 0; vec -= APIC_VECTORS_PER_REG) { + reg = bitmap + REG_POS(vec); + if (*reg) + return fls(*reg) - 1 + vec; + } - if (likely(!word_offset && !word[0])) - return -1; - else - return fls(word[word_offset << 2]) - 1 + (word_offset << 5); + return -1; } static u8 count_vectors(void *bitmap) { - u32 *word = bitmap; - int word_offset; + int vec; + u32 *reg; u8 count = 0; - for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset) - count += hweight32(word[word_offset << 2]); + + for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) { + reg = bitmap + REG_POS(vec); + count += hweight32(*reg); + } + return count; } @@ -285,7 +385,6 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; int highest_irr; /* This may race with setting of irr in __apic_accept_irq() and @@ -293,9 +392,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) * will cause vmexit immediately and the value will be recalculated * on the next vmentry. */ - if (!apic) + if (!kvm_vcpu_has_lapic(vcpu)) return 0; - highest_irr = apic_find_highest_irr(apic); + highest_irr = apic_find_highest_irr(vcpu->arch.apic); return highest_irr; } @@ -378,8 +477,8 @@ static void apic_update_ppr(struct kvm_lapic *apic) u32 tpr, isrv, ppr, old_ppr; int isr; - old_ppr = apic_get_reg(apic, APIC_PROCPRI); - tpr = apic_get_reg(apic, APIC_TASKPRI); + old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI); + tpr = kvm_apic_get_reg(apic, APIC_TASKPRI); isr = apic_find_highest_isr(apic); isrv = (isr != -1) ? isr : 0; @@ -415,13 +514,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) u32 logical_id; if (apic_x2apic_mode(apic)) { - logical_id = apic_get_reg(apic, APIC_LDR); + logical_id = kvm_apic_get_reg(apic, APIC_LDR); return logical_id & mda; } - logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR)); + logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR)); - switch (apic_get_reg(apic, APIC_DFR)) { + switch (kvm_apic_get_reg(apic, APIC_DFR)) { case APIC_DFR_FLAT: if (logical_id & mda) result = 1; @@ -433,7 +532,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) break; default: apic_debug("Bad DFR vcpu %d: %08x\n", - apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR)); + apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR)); break; } @@ -478,6 +577,72 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, return result; } +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r) +{ + struct kvm_apic_map *map; + unsigned long bitmap = 1; + struct kvm_lapic **dst; + int i; + bool ret = false; + + *r = -1; + + if (irq->shorthand == APIC_DEST_SELF) { + *r = kvm_apic_set_irq(src->vcpu, irq); + return true; + } + + if (irq->shorthand) + return false; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + if (!map) + goto out; + + if (irq->dest_mode == 0) { /* physical mode */ + if (irq->delivery_mode == APIC_DM_LOWEST || + irq->dest_id == 0xff) + goto out; + dst = &map->phys_map[irq->dest_id & 0xff]; + } else { + u32 mda = irq->dest_id << (32 - map->ldr_bits); + + dst = map->logical_map[apic_cluster_id(map, mda)]; + + bitmap = apic_logical_id(map, mda); + + if (irq->delivery_mode == APIC_DM_LOWEST) { + int l = -1; + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + if (l < 0) + l = i; + else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0) + l = i; + } + + bitmap = (l >= 0) ? 1 << l : 0; + } + } + + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + if (*r < 0) + *r = 0; + *r += kvm_apic_set_irq(dst[i]->vcpu, irq); + } + + ret = true; +out: + rcu_read_unlock(); + return ret; +} + /* * Add a pending IRQ into lapic. * Return 1 if successfully added and 0 if discarded. @@ -591,7 +756,7 @@ static int apic_set_eoi(struct kvm_lapic *apic) apic_clear_isr(vector, apic); apic_update_ppr(apic); - if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && + if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { int trigger_mode; if (apic_test_vector(vector, apic->regs + APIC_TMR)) @@ -606,8 +771,8 @@ static int apic_set_eoi(struct kvm_lapic *apic) static void apic_send_ipi(struct kvm_lapic *apic) { - u32 icr_low = apic_get_reg(apic, APIC_ICR); - u32 icr_high = apic_get_reg(apic, APIC_ICR2); + u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); + u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2); struct kvm_lapic_irq irq; irq.vector = icr_low & APIC_VECTOR_MASK; @@ -642,7 +807,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) ASSERT(apic != NULL); /* if initial count is 0, current count should also be 0 */ - if (apic_get_reg(apic, APIC_TMICT) == 0) + if (kvm_apic_get_reg(apic, APIC_TMICT) == 0) return 0; remaining = hrtimer_get_remaining(&apic->lapic_timer.timer); @@ -696,13 +861,15 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) val = apic_get_tmcct(apic); break; - + case APIC_PROCPRI: + apic_update_ppr(apic); + val = kvm_apic_get_reg(apic, offset); + break; case APIC_TASKPRI: report_tpr_access(apic, false); /* fall thru */ default: - apic_update_ppr(apic); - val = apic_get_reg(apic, offset); + val = kvm_apic_get_reg(apic, offset); break; } @@ -719,7 +886,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, { unsigned char alignment = offset & 0xf; u32 result; - /* this bitmask has a bit cleared for each reserver register */ + /* this bitmask has a bit cleared for each reserved register */ static const u64 rmask = 0x43ff01ffffffe70cULL; if ((alignment + len) > 4) { @@ -754,7 +921,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) { - return apic_hw_enabled(apic) && + return kvm_apic_hw_enabled(apic) && addr >= apic->base_address && addr < apic->base_address + LAPIC_MMIO_LENGTH; } @@ -777,7 +944,7 @@ static void update_divide_count(struct kvm_lapic *apic) { u32 tmp1, tmp2, tdcr; - tdcr = apic_get_reg(apic, APIC_TDCR); + tdcr = kvm_apic_get_reg(apic, APIC_TDCR); tmp1 = tdcr & 0xf; tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; apic->divide_count = 0x1 << (tmp2 & 0x7); @@ -792,9 +959,9 @@ static void start_apic_timer(struct kvm_lapic *apic) atomic_set(&apic->lapic_timer.pending, 0); if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { - /* lapic timer in oneshot or peroidic mode */ + /* lapic timer in oneshot or periodic mode */ now = apic->lapic_timer.timer.base->get_time(); - apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) + apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT) * APIC_BUS_CYCLE_NS * apic->divide_count; if (!apic->lapic_timer.period) @@ -826,7 +993,7 @@ static void start_apic_timer(struct kvm_lapic *apic) "timer initial count 0x%x, period %lldns, " "expire @ 0x%016" PRIx64 ".\n", __func__, APIC_BUS_CYCLE_NS, ktime_to_ns(now), - apic_get_reg(apic, APIC_TMICT), + kvm_apic_get_reg(apic, APIC_TMICT), apic->lapic_timer.period, ktime_to_ns(ktime_add_ns(now, apic->lapic_timer.period))); @@ -858,7 +1025,7 @@ static void start_apic_timer(struct kvm_lapic *apic) static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) { - int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); + int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0)); if (apic_lvt_nmi_mode(lvt0_val)) { if (!nmi_wd_enabled) { @@ -879,7 +1046,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) switch (reg) { case APIC_ID: /* Local APIC ID */ if (!apic_x2apic_mode(apic)) - apic_set_reg(apic, APIC_ID, val); + kvm_apic_set_id(apic, val >> 24); else ret = 1; break; @@ -895,29 +1062,30 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LDR: if (!apic_x2apic_mode(apic)) - apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK); + kvm_apic_set_ldr(apic, val & APIC_LDR_MASK); else ret = 1; break; case APIC_DFR: - if (!apic_x2apic_mode(apic)) + if (!apic_x2apic_mode(apic)) { apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF); - else + recalculate_apic_map(apic->vcpu->kvm); + } else ret = 1; break; case APIC_SPIV: { u32 mask = 0x3ff; - if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) + if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI) mask |= APIC_SPIV_DIRECTED_EOI; - apic_set_reg(apic, APIC_SPIV, val & mask); + apic_set_spiv(apic, val & mask); if (!(val & APIC_SPIV_APIC_ENABLED)) { int i; u32 lvt_val; for (i = 0; i < APIC_LVT_NUM; i++) { - lvt_val = apic_get_reg(apic, + lvt_val = kvm_apic_get_reg(apic, APIC_LVTT + 0x10 * i); apic_set_reg(apic, APIC_LVTT + 0x10 * i, lvt_val | APIC_LVT_MASKED); @@ -946,7 +1114,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVT1: case APIC_LVTERR: /* TODO: Check vector */ - if (!apic_sw_enabled(apic)) + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; @@ -955,12 +1123,12 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; case APIC_LVTT: - if ((apic_get_reg(apic, APIC_LVTT) & + if ((kvm_apic_get_reg(apic, APIC_LVTT) & apic->lapic_timer.timer_mode_mask) != (val & apic->lapic_timer.timer_mode_mask)) hrtimer_cancel(&apic->lapic_timer.timer); - if (!apic_sw_enabled(apic)) + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); apic_set_reg(apic, APIC_LVTT, val); @@ -1039,24 +1207,30 @@ static int apic_mmio_write(struct kvm_io_device *this, void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; - - if (apic) + if (kvm_vcpu_has_lapic(vcpu)) apic_reg_write(vcpu->arch.apic, APIC_EOI, 0); } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); void kvm_free_lapic(struct kvm_vcpu *vcpu) { + struct kvm_lapic *apic = vcpu->arch.apic; + if (!vcpu->arch.apic) return; - hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); + hrtimer_cancel(&apic->lapic_timer.timer); + + if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)) + static_key_slow_dec_deferred(&apic_hw_disabled); + + if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED)) + static_key_slow_dec_deferred(&apic_sw_disabled); - if (vcpu->arch.apic->regs) - free_page((unsigned long)vcpu->arch.apic->regs); + if (apic->regs) + free_page((unsigned long)apic->regs); - kfree(vcpu->arch.apic); + kfree(apic); } /* @@ -1068,10 +1242,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic) - return 0; - if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + apic_lvtt_period(apic)) return 0; return apic->lapic_timer.tscdeadline; @@ -1080,10 +1253,9 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic) - return; - if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) || + apic_lvtt_period(apic)) return; hrtimer_cancel(&apic->lapic_timer.timer); @@ -1095,20 +1267,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic) + if (!kvm_vcpu_has_lapic(vcpu)) return; + apic_set_tpr(apic, ((cr8 & 0x0f) << 4) - | (apic_get_reg(apic, APIC_TASKPRI) & 4)); + | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4)); } u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; u64 tpr; - if (!apic) + if (!kvm_vcpu_has_lapic(vcpu)) return 0; - tpr = (u64) apic_get_reg(apic, APIC_TASKPRI); + + tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI); return (tpr & 0xf0) >> 4; } @@ -1123,14 +1296,23 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) return; } + /* update jump label if enable bit changes */ + if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) { + if (value & MSR_IA32_APICBASE_ENABLE) + static_key_slow_dec_deferred(&apic_hw_disabled); + else + static_key_slow_inc(&apic_hw_disabled.key); + recalculate_apic_map(vcpu->kvm); + } + if (!kvm_vcpu_is_bsp(apic->vcpu)) value &= ~MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base = value; if (apic_x2apic_mode(apic)) { u32 id = kvm_apic_id(apic); - u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf)); - apic_set_reg(apic, APIC_LDR, ldr); + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + kvm_apic_set_ldr(apic, ldr); } apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; @@ -1155,7 +1337,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) /* Stop the timer in case it's a reset to an active apic */ hrtimer_cancel(&apic->lapic_timer.timer); - apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); + kvm_apic_set_id(apic, vcpu->vcpu_id); kvm_apic_set_version(apic->vcpu); for (i = 0; i < APIC_LVT_NUM; i++) @@ -1164,9 +1346,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT)); apic_set_reg(apic, APIC_DFR, 0xffffffffU); - apic_set_reg(apic, APIC_SPIV, 0xff); + apic_set_spiv(apic, 0xff); apic_set_reg(apic, APIC_TASKPRI, 0); - apic_set_reg(apic, APIC_LDR, 0); + kvm_apic_set_ldr(apic, 0); apic_set_reg(apic, APIC_ESR, 0); apic_set_reg(apic, APIC_ICR, 0); apic_set_reg(apic, APIC_ICR2, 0); @@ -1183,7 +1365,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); if (kvm_vcpu_is_bsp(vcpu)) - vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; + kvm_lapic_set_base(vcpu, + vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP); vcpu->arch.pv_eoi.msr_val = 0; apic_update_ppr(apic); @@ -1196,45 +1379,34 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) vcpu->arch.apic_base, apic->base_address); } -bool kvm_apic_present(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); -} - -int kvm_lapic_enabled(struct kvm_vcpu *vcpu) -{ - return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); -} - /* *---------------------------------------------------------------------- * timer interface *---------------------------------------------------------------------- */ -static bool lapic_is_periodic(struct kvm_timer *ktimer) +static bool lapic_is_periodic(struct kvm_lapic *apic) { - struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, - lapic_timer); return apic_lvtt_period(apic); } int apic_has_pending_timer(struct kvm_vcpu *vcpu) { - struct kvm_lapic *lapic = vcpu->arch.apic; + struct kvm_lapic *apic = vcpu->arch.apic; - if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) - return atomic_read(&lapic->lapic_timer.pending); + if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) && + apic_lvt_enabled(apic, APIC_LVTT)) + return atomic_read(&apic->lapic_timer.pending); return 0; } int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) { - u32 reg = apic_get_reg(apic, lvt_type); + u32 reg = kvm_apic_get_reg(apic, lvt_type); int vector, mode, trig_mode; - if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { + if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { vector = reg & APIC_VECTOR_MASK; mode = reg & APIC_MODE_MASK; trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; @@ -1251,15 +1423,40 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) kvm_apic_local_deliver(apic, APIC_LVT0); } -static struct kvm_timer_ops lapic_timer_ops = { - .is_periodic = lapic_is_periodic, -}; - static const struct kvm_io_device_ops apic_mmio_ops = { .read = apic_mmio_read, .write = apic_mmio_write, }; +static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) +{ + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); + struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); + struct kvm_vcpu *vcpu = apic->vcpu; + wait_queue_head_t *q = &vcpu->wq; + + /* + * There is a race window between reading and incrementing, but we do + * not care about potentially losing timer events in the !reinject + * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked + * in vcpu_enter_guest. + */ + if (!atomic_read(&ktimer->pending)) { + atomic_inc(&ktimer->pending); + /* FIXME: this code should not know anything about vcpus */ + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + } + + if (waitqueue_active(q)) + wake_up_interruptible(q); + + if (lapic_is_periodic(apic)) { + hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); + return HRTIMER_RESTART; + } else + return HRTIMER_NORESTART; +} + int kvm_create_lapic(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic; @@ -1283,14 +1480,17 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - apic->lapic_timer.timer.function = kvm_timer_fn; - apic->lapic_timer.t_ops = &lapic_timer_ops; - apic->lapic_timer.kvm = vcpu->kvm; - apic->lapic_timer.vcpu = vcpu; + apic->lapic_timer.timer.function = apic_timer_fn; - apic->base_address = APIC_DEFAULT_PHYS_BASE; - vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; + /* + * APIC is created enabled. This will prevent kvm_lapic_set_base from + * thinking that APIC satet has changed. + */ + vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE; + kvm_lapic_set_base(vcpu, + APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE); + static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */ kvm_lapic_reset(vcpu); kvm_iodevice_init(&apic->dev, &apic_mmio_ops); @@ -1306,23 +1506,23 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; int highest_irr; - if (!apic || !apic_enabled(apic)) + if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic)) return -1; apic_update_ppr(apic); highest_irr = apic_find_highest_irr(apic); if ((highest_irr == -1) || - ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) + ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI))) return -1; return highest_irr; } int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) { - u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0); + u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0); int r = 0; - if (!apic_hw_enabled(vcpu->arch.apic)) + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) r = 1; if ((lvt0 & APIC_LVT_MASKED) == 0 && GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT) @@ -1334,7 +1534,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic && atomic_read(&apic->lapic_timer.pending) > 0) { + if (!kvm_vcpu_has_lapic(vcpu)) + return; + + if (atomic_read(&apic->lapic_timer.pending) > 0) { if (kvm_apic_local_deliver(apic, APIC_LVTT)) atomic_dec(&apic->lapic_timer.pending); } @@ -1354,12 +1557,17 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) return vector; } -void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) +void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s) { struct kvm_lapic *apic = vcpu->arch.apic; - apic->base_address = vcpu->arch.apic_base & - MSR_IA32_APICBASE_BASE; + kvm_lapic_set_base(vcpu, vcpu->arch.apic_base); + /* set SPIV separately to get count of SW disabled APICs right */ + apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV))); + memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); + /* call kvm_apic_set_id() to put apic into apic_map */ + kvm_apic_set_id(apic, kvm_apic_id(apic)); kvm_apic_set_version(vcpu); apic_update_ppr(apic); @@ -1374,13 +1582,12 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; struct hrtimer *timer; - if (!apic) + if (!kvm_vcpu_has_lapic(vcpu)) return; - timer = &apic->lapic_timer.timer; + timer = &vcpu->arch.apic->lapic_timer.timer; if (hrtimer_cancel(timer)) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } @@ -1478,7 +1685,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff; + tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff; max_irr = apic_find_highest_irr(apic); if (max_irr < 0) max_irr = 0; @@ -1537,7 +1744,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!kvm_vcpu_has_lapic(vcpu)) return 1; /* if this is ICR write vector before command */ @@ -1551,7 +1758,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) struct kvm_lapic *apic = vcpu->arch.apic; u32 low, high = 0; - if (!irqchip_in_kernel(vcpu->kvm)) + if (!kvm_vcpu_has_lapic(vcpu)) return 1; if (apic_reg_read(apic, reg, 4, &low)) @@ -1576,3 +1783,10 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, addr); } + +void kvm_lapic_init(void) +{ + /* do not patch jump label more than once per second */ + jump_label_rate_limit(&apic_hw_disabled, HZ); + jump_label_rate_limit(&apic_sw_disabled, HZ); +} diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 4af5405..e5ebf9f 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -2,10 +2,17 @@ #define __KVM_X86_LAPIC_H #include "iodev.h" -#include "kvm_timer.h" #include <linux/kvm_host.h> +struct kvm_timer { + struct hrtimer timer; + s64 period; /* unit: ns */ + u32 timer_mode_mask; + u64 tscdeadline; + atomic_t pending; /* accumulated triggered timers */ +}; + struct kvm_lapic { unsigned long base_address; struct kvm_io_device dev; @@ -45,11 +52,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); +bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, int *r); + u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); -void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); -int kvm_lapic_enabled(struct kvm_vcpu *vcpu); -bool kvm_apic_present(struct kvm_vcpu *vcpu); +void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, + struct kvm_lapic_state *s); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); @@ -71,4 +80,48 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) } int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); +void kvm_lapic_init(void); + +static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off) +{ + return *((u32 *) (apic->regs + reg_off)); +} + +extern struct static_key kvm_no_apic_vcpu; + +static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu) +{ + if (static_key_false(&kvm_no_apic_vcpu)) + return vcpu->arch.apic; + return true; +} + +extern struct static_key_deferred apic_hw_disabled; + +static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic) +{ + if (static_key_false(&apic_hw_disabled.key)) + return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE; + return MSR_IA32_APICBASE_ENABLE; +} + +extern struct static_key_deferred apic_sw_disabled; + +static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic) +{ + if (static_key_false(&apic_sw_disabled.key)) + return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED; + return APIC_SPIV_APIC_ENABLED; +} + +static inline bool kvm_apic_present(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic); +} + +static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) +{ + return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); +} + #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7fbd0d2..6f85fe0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep) return 0; pfn = spte_to_pfn(old_spte); + + /* + * KVM does not hold the refcount of the page used by + * kvm mmu, before reclaiming the page, we should + * unmap it from mmu first. + */ + WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); + if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) kvm_set_pfn_accessed(pfn); if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask)) @@ -960,13 +968,10 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, struct kvm_memory_slot *slot) { - struct kvm_lpage_info *linfo; - - if (likely(level == PT_PAGE_TABLE_LEVEL)) - return &slot->rmap[gfn - slot->base_gfn]; + unsigned long idx; - linfo = lpage_info_slot(gfn, slot, level); - return &linfo->rmap_pde; + idx = gfn_to_index(gfn, slot->base_gfn, level); + return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; } /* @@ -1173,7 +1178,8 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, unsigned long *rmapp; while (mask) { - rmapp = &slot->rmap[gfn_offset + __ffs(mask)]; + rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PT_PAGE_TABLE_LEVEL, slot); __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); /* clear the first set bit */ @@ -1200,7 +1206,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) } static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, - unsigned long data) + struct kvm_memory_slot *slot, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1218,7 +1224,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, } static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, - unsigned long data) + struct kvm_memory_slot *slot, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1259,43 +1265,67 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, return 0; } -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, - unsigned long data, - int (*handler)(struct kvm *kvm, unsigned long *rmapp, - unsigned long data)) +static int kvm_handle_hva_range(struct kvm *kvm, + unsigned long start, + unsigned long end, + unsigned long data, + int (*handler)(struct kvm *kvm, + unsigned long *rmapp, + struct kvm_memory_slot *slot, + unsigned long data)) { int j; - int ret; - int retval = 0; + int ret = 0; struct kvm_memslots *slots; struct kvm_memory_slot *memslot; slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { - unsigned long start = memslot->userspace_addr; - unsigned long end; + unsigned long hva_start, hva_end; + gfn_t gfn_start, gfn_end; - end = start + (memslot->npages << PAGE_SHIFT); - if (hva >= start && hva < end) { - gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; - gfn_t gfn = memslot->base_gfn + gfn_offset; + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn_start, gfn_start+1, ..., gfn_end-1}. + */ + gfn_start = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - ret = handler(kvm, &memslot->rmap[gfn_offset], data); + for (j = PT_PAGE_TABLE_LEVEL; + j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) { + unsigned long idx, idx_end; + unsigned long *rmapp; - for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) { - struct kvm_lpage_info *linfo; + /* + * {idx(page_j) | page_j intersects with + * [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}. + */ + idx = gfn_to_index(gfn_start, memslot->base_gfn, j); + idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j); - linfo = lpage_info_slot(gfn, memslot, - PT_DIRECTORY_LEVEL + j); - ret |= handler(kvm, &linfo->rmap_pde, data); - } - trace_kvm_age_page(hva, memslot, ret); - retval |= ret; + rmapp = __gfn_to_rmap(gfn_start, j, memslot); + + for (; idx <= idx_end; ++idx) + ret |= handler(kvm, rmapp++, memslot, data); } } - return retval; + return ret; +} + +static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, + unsigned long data, + int (*handler)(struct kvm *kvm, unsigned long *rmapp, + struct kvm_memory_slot *slot, + unsigned long data)) +{ + return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler); } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) @@ -1303,13 +1333,18 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp); } +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); +} + void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); } static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - unsigned long data) + struct kvm_memory_slot *slot, unsigned long data) { u64 *sptep; struct rmap_iterator uninitialized_var(iter); @@ -1323,8 +1358,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, * This has some overhead, but not as much as the cost of swapping * out actively used pages or breaking up actively used hugepages. */ - if (!shadow_accessed_mask) - return kvm_unmap_rmapp(kvm, rmapp, data); + if (!shadow_accessed_mask) { + young = kvm_unmap_rmapp(kvm, rmapp, slot, data); + goto out; + } for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { @@ -1336,12 +1373,14 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, (unsigned long *)sptep); } } - +out: + /* @data has hva passed to kvm_age_hva(). */ + trace_kvm_age_page(data, slot, young); return young; } static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, - unsigned long data) + struct kvm_memory_slot *slot, unsigned long data) { u64 *sptep; struct rmap_iterator iter; @@ -1379,13 +1418,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - kvm_unmap_rmapp(vcpu->kvm, rmapp, 0); + kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0); kvm_flush_remote_tlbs(vcpu->kvm); } int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp); + return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) @@ -2457,6 +2496,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, rmap_recycle(vcpu, sptep, gfn); } } + kvm_release_pfn_clean(pfn); } @@ -2469,17 +2509,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) { struct kvm_memory_slot *slot; - unsigned long hva; slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); - if (!slot) { - get_page(fault_page); - return page_to_pfn(fault_page); - } - - hva = gfn_to_hva_memslot(slot, gfn); + if (!slot) + return KVM_PFN_ERR_FAULT; - return hva_to_pfn_atomic(vcpu->kvm, hva); + return gfn_to_pfn_memslot_atomic(slot, gfn); } static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, @@ -2580,11 +2615,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, iterator.level - 1, 1, ACC_ALL, iterator.sptep); - if (!sp) { - pgprintk("nonpaging_map: ENOMEM\n"); - kvm_release_pfn_clean(pfn); - return -ENOMEM; - } mmu_spte_set(iterator.sptep, __pa(sp->spt) @@ -2611,8 +2641,16 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) { - kvm_release_pfn_clean(pfn); - if (is_hwpoison_pfn(pfn)) { + /* + * Do not cache the mmio info caused by writing the readonly gfn + * into the spte otherwise read access on readonly gfn also can + * caused mmio page fault and treat it as mmio access. + * Return 1 to tell kvm to emulate it. + */ + if (pfn == KVM_PFN_ERR_RO_FAULT) + return 1; + + if (pfn == KVM_PFN_ERR_HWPOISON) { kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); return 0; } @@ -3236,8 +3274,6 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, if (!async) return false; /* *pfn has correct page already */ - put_page(pfn_to_page(*pfn)); - if (!prefault && can_do_async_pf(vcpu)) { trace_kvm_try_async_get_page(gva, gfn); if (kvm_find_async_pf_gfn(vcpu, gfn)) { @@ -3371,6 +3407,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0; } +static inline void protect_clean_gpte(unsigned *access, unsigned gpte) +{ + unsigned mask; + + BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); + + mask = (unsigned)~ACC_WRITE_MASK; + /* Allow write access to dirty gptes */ + mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK; + *access &= mask; +} + static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, int *nr_present) { @@ -3388,6 +3436,25 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access, return false; } +static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte) +{ + unsigned access; + + access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; + access &= ~(gpte >> PT64_NX_SHIFT); + + return access; +} + +static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte) +{ + unsigned index; + + index = level - 1; + index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2); + return mmu->last_pte_bitmap & (1 << index); +} + #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE @@ -3457,6 +3524,56 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, } } +static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) +{ + unsigned bit, byte, pfec; + u8 map; + bool fault, x, w, u, wf, uf, ff, smep; + + smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) { + pfec = byte << 1; + map = 0; + wf = pfec & PFERR_WRITE_MASK; + uf = pfec & PFERR_USER_MASK; + ff = pfec & PFERR_FETCH_MASK; + for (bit = 0; bit < 8; ++bit) { + x = bit & ACC_EXEC_MASK; + w = bit & ACC_WRITE_MASK; + u = bit & ACC_USER_MASK; + + /* Not really needed: !nx will cause pte.nx to fault */ + x |= !mmu->nx; + /* Allow supervisor writes if !cr0.wp */ + w |= !is_write_protection(vcpu) && !uf; + /* Disallow supervisor fetches of user code if cr4.smep */ + x &= !(smep && u && !uf); + + fault = (ff && !x) || (uf && !u) || (wf && !w); + map |= fault << bit; + } + mmu->permissions[byte] = map; + } +} + +static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) +{ + u8 map; + unsigned level, root_level = mmu->root_level; + const unsigned ps_set_index = 1 << 2; /* bit 2 of index: ps */ + + if (root_level == PT32E_ROOT_LEVEL) + --root_level; + /* PT_PAGE_TABLE_LEVEL always terminates */ + map = 1 | (1 << ps_set_index); + for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) { + if (level <= PT_PDPE_LEVEL + && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu))) + map |= 1 << (ps_set_index | (level - 1)); + } + mmu->last_pte_bitmap = map; +} + static int paging64_init_context_common(struct kvm_vcpu *vcpu, struct kvm_mmu *context, int level) @@ -3465,6 +3582,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, context->root_level = level; reset_rsvds_bits_mask(vcpu, context); + update_permission_bitmask(vcpu, context); + update_last_pte_bitmap(vcpu, context); ASSERT(is_pae(vcpu)); context->new_cr3 = paging_new_cr3; @@ -3493,6 +3612,8 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, context->root_level = PT32_ROOT_LEVEL; reset_rsvds_bits_mask(vcpu, context); + update_permission_bitmask(vcpu, context); + update_last_pte_bitmap(vcpu, context); context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; @@ -3553,6 +3674,9 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->gva_to_gpa = paging32_gva_to_gpa; } + update_permission_bitmask(vcpu, context); + update_last_pte_bitmap(vcpu, context); + return 0; } @@ -3628,6 +3752,9 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) g_context->gva_to_gpa = paging32_gva_to_gpa_nested; } + update_permission_bitmask(vcpu, g_context); + update_last_pte_bitmap(vcpu, g_context); + return 0; } diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index e374db9..6987108 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -18,8 +18,10 @@ #define PT_PCD_MASK (1ULL << 4) #define PT_ACCESSED_SHIFT 5 #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) -#define PT_DIRTY_MASK (1ULL << 6) -#define PT_PAGE_SIZE_MASK (1ULL << 7) +#define PT_DIRTY_SHIFT 6 +#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT) +#define PT_PAGE_SIZE_SHIFT 7 +#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT) #define PT_PAT_MASK (1ULL << 7) #define PT_GLOBAL_MASK (1ULL << 8) #define PT64_NX_SHIFT 63 @@ -88,17 +90,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) return kvm_read_cr0_bits(vcpu, X86_CR0_WP); } -static inline bool check_write_user_access(struct kvm_vcpu *vcpu, - bool write_fault, bool user_fault, - unsigned long pte) +/* + * Will a fault with a given page-fault error code (pfec) cause a permission + * fault with the given access (in ACC_* format)? + */ +static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access, + unsigned pfec) { - if (unlikely(write_fault && !is_writable_pte(pte) - && (user_fault || is_write_protection(vcpu)))) - return false; - - if (unlikely(user_fault && !(pte & PT_USER_MASK))) - return false; - - return true; + return (mmu->permissions[pfec >> 1] >> pte_access) & 1; } + #endif diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 7d7d0b9..daff69e 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -116,10 +116,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn); - if (is_error_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + if (is_error_pfn(pfn)) return; - } hpa = pfn << PAGE_SHIFT; if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) @@ -190,7 +188,6 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) { - struct kvm_memory_slot *slot; unsigned long *rmapp; u64 *sptep; struct rmap_iterator iter; @@ -198,8 +195,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) if (sp->role.direct || sp->unsync || sp->role.invalid) return; - slot = gfn_to_memslot(kvm, sp->gfn); - rmapp = &slot->rmap[sp->gfn - slot->base_gfn]; + rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL); for (sptep = rmap_get_first(*rmapp, &iter); sptep; sptep = rmap_get_next(&iter)) { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index bb7cf01..714e2c0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -63,10 +63,12 @@ */ struct guest_walker { int level; + unsigned max_level; gfn_t table_gfn[PT_MAX_FULL_LEVELS]; pt_element_t ptes[PT_MAX_FULL_LEVELS]; pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; + pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; gfn_t gfn; @@ -101,38 +103,41 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, return (ret != orig_pte); } -static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte, - bool last) +static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu, + struct guest_walker *walker, + int write_fault) { - unsigned access; - - access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; - if (last && !is_dirty_gpte(gpte)) - access &= ~ACC_WRITE_MASK; - -#if PTTYPE == 64 - if (vcpu->arch.mmu.nx) - access &= ~(gpte >> PT64_NX_SHIFT); -#endif - return access; -} - -static bool FNAME(is_last_gpte)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - pt_element_t gpte) -{ - if (walker->level == PT_PAGE_TABLE_LEVEL) - return true; - - if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) && - (PTTYPE == 64 || is_pse(vcpu))) - return true; + unsigned level, index; + pt_element_t pte, orig_pte; + pt_element_t __user *ptep_user; + gfn_t table_gfn; + int ret; + + for (level = walker->max_level; level >= walker->level; --level) { + pte = orig_pte = walker->ptes[level - 1]; + table_gfn = walker->table_gfn[level - 1]; + ptep_user = walker->ptep_user[level - 1]; + index = offset_in_page(ptep_user) / sizeof(pt_element_t); + if (!(pte & PT_ACCESSED_MASK)) { + trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte)); + pte |= PT_ACCESSED_MASK; + } + if (level == walker->level && write_fault && !is_dirty_gpte(pte)) { + trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); + pte |= PT_DIRTY_MASK; + } + if (pte == orig_pte) + continue; - if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) && - (mmu->root_level == PT64_ROOT_LEVEL)) - return true; + ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte); + if (ret) + return ret; - return false; + mark_page_dirty(vcpu->kvm, table_gfn); + walker->ptes[level] = pte; + } + return 0; } /* @@ -142,21 +147,22 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gva_t addr, u32 access) { + int ret; pt_element_t pte; pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; - unsigned index, pt_access, uninitialized_var(pte_access); + unsigned index, pt_access, pte_access, accessed_dirty, shift; gpa_t pte_gpa; - bool eperm, last_gpte; int offset; const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; const int fetch_fault = access & PFERR_FETCH_MASK; u16 errcode = 0; + gpa_t real_gpa; + gfn_t gfn; trace_kvm_mmu_pagetable_walk(addr, access); retry_walk: - eperm = false; walker->level = mmu->root_level; pte = mmu->get_cr3(vcpu); @@ -169,15 +175,21 @@ retry_walk: --walker->level; } #endif + walker->max_level = walker->level; ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); - pt_access = ACC_ALL; + accessed_dirty = PT_ACCESSED_MASK; + pt_access = pte_access = ACC_ALL; + ++walker->level; - for (;;) { + do { gfn_t real_gfn; unsigned long host_addr; + pt_access &= pte_access; + --walker->level; + index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); @@ -199,6 +211,7 @@ retry_walk: ptep_user = (pt_element_t __user *)((void *)host_addr + offset); if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte)))) goto error; + walker->ptep_user[walker->level - 1] = ptep_user; trace_kvm_mmu_paging_element(pte, walker->level); @@ -211,92 +224,48 @@ retry_walk: goto error; } - if (!check_write_user_access(vcpu, write_fault, user_fault, - pte)) - eperm = true; - -#if PTTYPE == 64 - if (unlikely(fetch_fault && (pte & PT64_NX_MASK))) - eperm = true; -#endif - - last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte); - if (last_gpte) { - pte_access = pt_access & - FNAME(gpte_access)(vcpu, pte, true); - /* check if the kernel is fetching from user page */ - if (unlikely(pte_access & PT_USER_MASK) && - kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) - if (fetch_fault && !user_fault) - eperm = true; - } - - if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) { - int ret; - trace_kvm_mmu_set_accessed_bit(table_gfn, index, - sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, - pte, pte|PT_ACCESSED_MASK); - if (unlikely(ret < 0)) - goto error; - else if (ret) - goto retry_walk; - - mark_page_dirty(vcpu->kvm, table_gfn); - pte |= PT_ACCESSED_MASK; - } + accessed_dirty &= pte; + pte_access = pt_access & gpte_access(vcpu, pte); walker->ptes[walker->level - 1] = pte; + } while (!is_last_gpte(mmu, walker->level, pte)); - if (last_gpte) { - int lvl = walker->level; - gpa_t real_gpa; - gfn_t gfn; - u32 ac; - - gfn = gpte_to_gfn_lvl(pte, lvl); - gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; - - if (PTTYPE == 32 && - walker->level == PT_DIRECTORY_LEVEL && - is_cpuid_PSE36()) - gfn += pse36_gfn_delta(pte); - - ac = write_fault | fetch_fault | user_fault; + if (unlikely(permission_fault(mmu, pte_access, access))) { + errcode |= PFERR_PRESENT_MASK; + goto error; + } - real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), - ac); - if (real_gpa == UNMAPPED_GVA) - return 0; + gfn = gpte_to_gfn_lvl(pte, walker->level); + gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; - walker->gfn = real_gpa >> PAGE_SHIFT; + if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) + gfn += pse36_gfn_delta(pte); - break; - } + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); + if (real_gpa == UNMAPPED_GVA) + return 0; - pt_access &= FNAME(gpte_access)(vcpu, pte, false); - --walker->level; - } + walker->gfn = real_gpa >> PAGE_SHIFT; - if (unlikely(eperm)) { - errcode |= PFERR_PRESENT_MASK; - goto error; - } + if (!write_fault) + protect_clean_gpte(&pte_access, pte); - if (write_fault && unlikely(!is_dirty_gpte(pte))) { - int ret; + /* + * On a write fault, fold the dirty bit into accessed_dirty by shifting it one + * place right. + * + * On a read fault, do nothing. + */ + shift = write_fault >> ilog2(PFERR_WRITE_MASK); + shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT; + accessed_dirty &= pte >> shift; - trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte)); - ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, - pte, pte|PT_DIRTY_MASK); + if (unlikely(!accessed_dirty)) { + ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); if (unlikely(ret < 0)) goto error; else if (ret) goto retry_walk; - - mark_page_dirty(vcpu->kvm, table_gfn); - pte |= PT_DIRTY_MASK; - walker->ptes[walker->level - 1] = pte; } walker->pt_access = pt_access; @@ -368,12 +337,11 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return; pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); - pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true); + pte_access = sp->role.access & gpte_access(vcpu, gpte); + protect_clean_gpte(&pte_access, gpte); pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); - if (mmu_invalid_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + if (mmu_invalid_pfn(pfn)) return; - } /* * we call mmu_set_spte() with host_writable = true because that @@ -443,15 +411,13 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) continue; - pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, - true); + pte_access = sp->role.access & gpte_access(vcpu, gpte); + protect_clean_gpte(&pte_access, gpte); gfn = gpte_to_gfn(gpte); pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, pte_access & ACC_WRITE_MASK); - if (mmu_invalid_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + if (mmu_invalid_pfn(pfn)) break; - } mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, NULL, PT_PAGE_TABLE_LEVEL, gfn, @@ -798,7 +764,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) gfn = gpte_to_gfn(gpte); pte_access = sp->role.access; - pte_access &= FNAME(gpte_access)(vcpu, gpte, true); + pte_access &= gpte_access(vcpu, gpte); + protect_clean_gpte(&pte_access, gpte); if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present)) continue; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 9b7ec11..cfc258a 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -1,5 +1,5 @@ /* - * Kernel-based Virtual Machine -- Performane Monitoring Unit support + * Kernel-based Virtual Machine -- Performance Monitoring Unit support * * Copyright 2011 Red Hat, Inc. and/or its affiliates. * diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index baead95..d017df3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -163,7 +163,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio); #define MSR_INVALID 0xffffffffU -static struct svm_direct_access_msrs { +static const struct svm_direct_access_msrs { u32 index; /* Index of the MSR */ bool always; /* True if intercept is always on */ } direct_access_msrs[] = { @@ -400,7 +400,7 @@ struct svm_init_data { int r; }; -static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; +static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) #define MSRS_RANGE_SIZE 2048 @@ -1146,7 +1146,6 @@ static void init_vmcb(struct vcpu_svm *svm) svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; - save->dr7 = 0x400; kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; @@ -1643,7 +1642,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, mark_dirty(svm->vmcb, VMCB_SEG); } -static void update_db_intercept(struct kvm_vcpu *vcpu) +static void update_db_bp_intercept(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1663,20 +1662,6 @@ static void update_db_intercept(struct kvm_vcpu *vcpu) vcpu->guest_debug = 0; } -static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; - else - svm->vmcb->save.dr7 = vcpu->arch.dr7; - - mark_dirty(svm->vmcb, VMCB_DR); - - update_db_intercept(vcpu); -} - static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) { if (sd->next_asid > sd->max_asid) { @@ -1748,7 +1733,7 @@ static int db_interception(struct vcpu_svm *svm) if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(&svm->vcpu); + update_db_bp_intercept(&svm->vcpu); } if (svm->vcpu.guest_debug & @@ -2063,7 +2048,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm) if (svm->nested.intercept & 1ULL) { /* * The #vmexit can't be emulated here directly because this - * code path runs with irqs and preemtion disabled. A + * code path runs with irqs and preemption disabled. A * #vmexit emulation might sleep. Only signal request for * the #vmexit here. */ @@ -2105,7 +2090,6 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) return kmap(page); error: - kvm_release_page_clean(page); kvm_inject_gp(&svm->vcpu, 0); return NULL; @@ -2409,7 +2393,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { /* * This function merges the msr permission bitmaps of kvm and the - * nested vmcb. It is omptimized in that it only merges the parts where + * nested vmcb. It is optimized in that it only merges the parts where * the kvm msr permission bitmap may contain zero bits */ int i; @@ -3268,7 +3252,7 @@ static int pause_interception(struct vcpu_svm *svm) return 1; } -static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { +static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR0] = cr_interception, [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, @@ -3660,7 +3644,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) */ svm->nmi_singlestep = true; svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(vcpu); + update_db_bp_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -3783,12 +3767,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } -#ifdef CONFIG_X86_64 -#define R "r" -#else -#define R "e" -#endif - static void svm_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3815,13 +3793,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) local_irq_enable(); asm volatile ( - "push %%"R"bp; \n\t" - "mov %c[rbx](%[svm]), %%"R"bx \n\t" - "mov %c[rcx](%[svm]), %%"R"cx \n\t" - "mov %c[rdx](%[svm]), %%"R"dx \n\t" - "mov %c[rsi](%[svm]), %%"R"si \n\t" - "mov %c[rdi](%[svm]), %%"R"di \n\t" - "mov %c[rbp](%[svm]), %%"R"bp \n\t" + "push %%" _ASM_BP "; \n\t" + "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" + "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" + "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" + "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" + "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" + "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" #ifdef CONFIG_X86_64 "mov %c[r8](%[svm]), %%r8 \n\t" "mov %c[r9](%[svm]), %%r9 \n\t" @@ -3834,20 +3812,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif /* Enter guest mode */ - "push %%"R"ax \n\t" - "mov %c[vmcb](%[svm]), %%"R"ax \n\t" + "push %%" _ASM_AX " \n\t" + "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" __ex(SVM_VMLOAD) "\n\t" __ex(SVM_VMRUN) "\n\t" __ex(SVM_VMSAVE) "\n\t" - "pop %%"R"ax \n\t" + "pop %%" _ASM_AX " \n\t" /* Save guest registers, load host registers */ - "mov %%"R"bx, %c[rbx](%[svm]) \n\t" - "mov %%"R"cx, %c[rcx](%[svm]) \n\t" - "mov %%"R"dx, %c[rdx](%[svm]) \n\t" - "mov %%"R"si, %c[rsi](%[svm]) \n\t" - "mov %%"R"di, %c[rdi](%[svm]) \n\t" - "mov %%"R"bp, %c[rbp](%[svm]) \n\t" + "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" + "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" + "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" + "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" + "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" + "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" #ifdef CONFIG_X86_64 "mov %%r8, %c[r8](%[svm]) \n\t" "mov %%r9, %c[r9](%[svm]) \n\t" @@ -3858,7 +3836,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" #endif - "pop %%"R"bp" + "pop %%" _ASM_BP : : [svm]"a"(svm), [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), @@ -3879,9 +3857,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) #endif : "cc", "memory" - , R"bx", R"cx", R"dx", R"si", R"di" #ifdef CONFIG_X86_64 + , "rbx", "rcx", "rdx", "rsi", "rdi" , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" +#else + , "ebx", "ecx", "edx", "esi", "edi" #endif ); @@ -3941,8 +3921,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) mark_all_clean(svm->vmcb); } -#undef R - static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4069,7 +4047,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) #define POST_MEM(exit) { .exit_code = (exit), \ .stage = X86_ICPT_POST_MEMACCESS, } -static struct __x86_intercept { +static const struct __x86_intercept { u32 exit_code; enum x86_intercept_stage stage; } x86_intercept_map[] = { @@ -4260,7 +4238,7 @@ static struct kvm_x86_ops svm_x86_ops = { .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .set_guest_debug = svm_guest_debug, + .update_db_bp_intercept = update_db_bp_intercept, .get_msr = svm_get_msr, .set_msr = svm_set_msr, .get_segment_base = svm_get_segment_base, diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c deleted file mode 100644 index 6b85cc6..0000000 --- a/arch/x86/kvm/timer.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Kernel-based Virtual Machine driver for Linux - * - * This module enables machines with Intel VT-x extensions to run virtual - * machines without emulation or binary translation. - * - * timer support - * - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include <linux/kvm_host.h> -#include <linux/kvm.h> -#include <linux/hrtimer.h> -#include <linux/atomic.h> -#include "kvm_timer.h" - -enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) -{ - struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); - struct kvm_vcpu *vcpu = ktimer->vcpu; - wait_queue_head_t *q = &vcpu->wq; - - /* - * There is a race window between reading and incrementing, but we do - * not care about potentially losing timer events in the !reinject - * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked - * in vcpu_enter_guest. - */ - if (ktimer->reinject || !atomic_read(&ktimer->pending)) { - atomic_inc(&ktimer->pending); - /* FIXME: this code should not know anything about vcpus */ - kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); - } - - if (waitqueue_active(q)) - wake_up_interruptible(q); - - if (ktimer->t_ops->is_periodic(ktimer)) { - hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); - return HRTIMER_RESTART; - } else - return HRTIMER_NORESTART; -} diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index a71faf7..bca63f0 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 -#define VMX_EXIT_REASONS \ - { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ - { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \ - { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \ - { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \ - { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \ - { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ - { EXIT_REASON_CPUID, "CPUID" }, \ - { EXIT_REASON_HLT, "HLT" }, \ - { EXIT_REASON_INVLPG, "INVLPG" }, \ - { EXIT_REASON_RDPMC, "RDPMC" }, \ - { EXIT_REASON_RDTSC, "RDTSC" }, \ - { EXIT_REASON_VMCALL, "VMCALL" }, \ - { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \ - { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \ - { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \ - { EXIT_REASON_VMPTRST, "VMPTRST" }, \ - { EXIT_REASON_VMREAD, "VMREAD" }, \ - { EXIT_REASON_VMRESUME, "VMRESUME" }, \ - { EXIT_REASON_VMWRITE, "VMWRITE" }, \ - { EXIT_REASON_VMOFF, "VMOFF" }, \ - { EXIT_REASON_VMON, "VMON" }, \ - { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \ - { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \ - { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ - { EXIT_REASON_MSR_READ, "MSR_READ" }, \ - { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ - { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ - { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ - { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \ - { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ - { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ - { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ - { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ - { EXIT_REASON_WBINVD, "WBINVD" } - -#define SVM_EXIT_REASONS \ - { SVM_EXIT_READ_CR0, "read_cr0" }, \ - { SVM_EXIT_READ_CR3, "read_cr3" }, \ - { SVM_EXIT_READ_CR4, "read_cr4" }, \ - { SVM_EXIT_READ_CR8, "read_cr8" }, \ - { SVM_EXIT_WRITE_CR0, "write_cr0" }, \ - { SVM_EXIT_WRITE_CR3, "write_cr3" }, \ - { SVM_EXIT_WRITE_CR4, "write_cr4" }, \ - { SVM_EXIT_WRITE_CR8, "write_cr8" }, \ - { SVM_EXIT_READ_DR0, "read_dr0" }, \ - { SVM_EXIT_READ_DR1, "read_dr1" }, \ - { SVM_EXIT_READ_DR2, "read_dr2" }, \ - { SVM_EXIT_READ_DR3, "read_dr3" }, \ - { SVM_EXIT_WRITE_DR0, "write_dr0" }, \ - { SVM_EXIT_WRITE_DR1, "write_dr1" }, \ - { SVM_EXIT_WRITE_DR2, "write_dr2" }, \ - { SVM_EXIT_WRITE_DR3, "write_dr3" }, \ - { SVM_EXIT_WRITE_DR5, "write_dr5" }, \ - { SVM_EXIT_WRITE_DR7, "write_dr7" }, \ - { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \ - { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \ - { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ - { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ - { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ - { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ - { SVM_EXIT_INTR, "interrupt" }, \ - { SVM_EXIT_NMI, "nmi" }, \ - { SVM_EXIT_SMI, "smi" }, \ - { SVM_EXIT_INIT, "init" }, \ - { SVM_EXIT_VINTR, "vintr" }, \ - { SVM_EXIT_CPUID, "cpuid" }, \ - { SVM_EXIT_INVD, "invd" }, \ - { SVM_EXIT_HLT, "hlt" }, \ - { SVM_EXIT_INVLPG, "invlpg" }, \ - { SVM_EXIT_INVLPGA, "invlpga" }, \ - { SVM_EXIT_IOIO, "io" }, \ - { SVM_EXIT_MSR, "msr" }, \ - { SVM_EXIT_TASK_SWITCH, "task_switch" }, \ - { SVM_EXIT_SHUTDOWN, "shutdown" }, \ - { SVM_EXIT_VMRUN, "vmrun" }, \ - { SVM_EXIT_VMMCALL, "hypercall" }, \ - { SVM_EXIT_VMLOAD, "vmload" }, \ - { SVM_EXIT_VMSAVE, "vmsave" }, \ - { SVM_EXIT_STGI, "stgi" }, \ - { SVM_EXIT_CLGI, "clgi" }, \ - { SVM_EXIT_SKINIT, "skinit" }, \ - { SVM_EXIT_WBINVD, "wbinvd" }, \ - { SVM_EXIT_MONITOR, "monitor" }, \ - { SVM_EXIT_MWAIT, "mwait" }, \ - { SVM_EXIT_XSETBV, "xsetbv" }, \ - { SVM_EXIT_NPF, "npf" } - /* * Tracepoint for kvm guest exit: */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b1eb202..ad6b1dd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO); static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; module_param(ple_window, int, S_IRUGO); +extern const ulong vmx_return; + #define NR_AUTOLOAD_MSRS 8 #define VMCS02_POOL_SIZE 1 @@ -405,16 +407,16 @@ struct vcpu_vmx { struct { int vm86_active; ulong save_rflags; + struct kvm_segment segs[8]; + } rmode; + struct { + u32 bitmask; /* 4 bits per segment (1 bit per field) */ struct kvm_save_segment { u16 selector; unsigned long base; u32 limit; u32 ar; - } tr, es, ds, fs, gs; - } rmode; - struct { - u32 bitmask; /* 4 bits per segment (1 bit per field) */ - struct kvm_save_segment seg[8]; + } seg[8]; } segment_cache; int vpid; bool emulation_required; @@ -450,7 +452,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) #define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \ [number##_HIGH] = VMCS12_OFFSET(name)+4 -static unsigned short vmcs_field_to_offset_table[] = { +static const unsigned short vmcs_field_to_offset_table[] = { FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), FIELD(GUEST_ES_SELECTOR, guest_es_selector), FIELD(GUEST_CS_SELECTOR, guest_cs_selector), @@ -596,10 +598,9 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu) static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr) { struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT); - if (is_error_page(page)) { - kvm_release_page_clean(page); + if (is_error_page(page)) return NULL; - } + return page; } @@ -667,7 +668,7 @@ static struct vmx_capability { .ar_bytes = GUEST_##seg##_AR_BYTES, \ } -static struct kvm_vmx_segment_field { +static const struct kvm_vmx_segment_field { unsigned selector; unsigned base; unsigned limit; @@ -1343,7 +1344,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) guest_efer = vmx->vcpu.arch.efer; /* - * NX is emulated; LMA and LME handled by hardware; SCE meaninless + * NX is emulated; LMA and LME handled by hardware; SCE meaningless * outside long mode */ ignore_bits = EFER_NX | EFER_SCE; @@ -1493,8 +1494,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) #ifdef CONFIG_X86_64 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif - if (user_has_fpu()) - clts(); + /* + * If the FPU is not active (through the host task or + * the guest vcpu), then restore the cr0.TS bit. + */ + if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) + stts(); load_gdt(&__get_cpu_var(host_gdt)); } @@ -1991,7 +1996,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) #endif CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING | - CPU_BASED_RDPMC_EXITING | + CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; /* * We can allow some features even when not supported by the @@ -2287,16 +2292,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) -{ - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]); - else - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - - update_exception_bitmap(vcpu); -} - static __init int cpu_has_kvm_support(void) { return cpu_has_vmx(); @@ -2694,20 +2689,17 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } -static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save) +static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + struct kvm_segment tmp = *save; - if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) { - vmcs_write16(sf->selector, save->selector); - vmcs_writel(sf->base, save->base); - vmcs_write32(sf->limit, save->limit); - vmcs_write32(sf->ar_bytes, save->ar); - } else { - u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK) - << AR_DPL_SHIFT; - vmcs_write32(sf->ar_bytes, 0x93 | dpl); + if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { + tmp.base = vmcs_readl(sf->base); + tmp.selector = vmcs_read16(sf->selector); + tmp.s = 1; } + vmx_set_segment(vcpu, &tmp, seg); } static void enter_pmode(struct kvm_vcpu *vcpu) @@ -2720,10 +2712,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); - vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); - vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); - vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); - vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); + vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); flags = vmcs_readl(GUEST_RFLAGS); flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; @@ -2738,10 +2727,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu) if (emulate_invalid_guest_state) return; - fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es); - fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds); - fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs); - fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs); + fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); + fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); + fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); + fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); vmx_segment_cache_clear(vmx); @@ -2769,14 +2758,10 @@ static gva_t rmode_tss_base(struct kvm *kvm) return kvm->arch.tss_addr; } -static void fix_rmode_seg(int seg, struct kvm_save_segment *save) +static void fix_rmode_seg(int seg, struct kvm_segment *save) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - save->selector = vmcs_read16(sf->selector); - save->base = vmcs_readl(sf->base); - save->limit = vmcs_read32(sf->limit); - save->ar = vmcs_read32(sf->ar_bytes); vmcs_write16(sf->selector, save->base >> 4); vmcs_write32(sf->base, save->base & 0xffff0); vmcs_write32(sf->limit, 0xffff); @@ -2796,9 +2781,16 @@ static void enter_rmode(struct kvm_vcpu *vcpu) if (enable_unrestricted_guest) return; + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); + vmx->emulation_required = 1; vmx->rmode.vm86_active = 1; + /* * Very old userspace does not call KVM_SET_TSS_ADDR before entering * vcpu. Call it here with phys address pointing 16M below 4G. @@ -2813,14 +2805,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); - vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); - vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); - - vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT); vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1); - - vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES); vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); flags = vmcs_readl(GUEST_RFLAGS); @@ -3113,35 +3099,24 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_save_segment *save; u32 ar; if (vmx->rmode.vm86_active && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS - || seg == VCPU_SREG_GS) - && !emulate_invalid_guest_state) { - switch (seg) { - case VCPU_SREG_TR: save = &vmx->rmode.tr; break; - case VCPU_SREG_ES: save = &vmx->rmode.es; break; - case VCPU_SREG_DS: save = &vmx->rmode.ds; break; - case VCPU_SREG_FS: save = &vmx->rmode.fs; break; - case VCPU_SREG_GS: save = &vmx->rmode.gs; break; - default: BUG(); - } - var->selector = save->selector; - var->base = save->base; - var->limit = save->limit; - ar = save->ar; + || seg == VCPU_SREG_GS)) { + *var = vmx->rmode.segs[seg]; if (seg == VCPU_SREG_TR || var->selector == vmx_read_guest_seg_selector(vmx, seg)) - goto use_saved_rmode_seg; + return; + var->base = vmx_read_guest_seg_base(vmx, seg); + var->selector = vmx_read_guest_seg_selector(vmx, seg); + return; } var->base = vmx_read_guest_seg_base(vmx, seg); var->limit = vmx_read_guest_seg_limit(vmx, seg); var->selector = vmx_read_guest_seg_selector(vmx, seg); ar = vmx_read_guest_seg_ar(vmx, seg); -use_saved_rmode_seg: if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) ar = 0; var->type = ar & 15; @@ -3223,23 +3198,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg) { struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; vmx_segment_cache_clear(vmx); if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { vmcs_write16(sf->selector, var->selector); - vmx->rmode.tr.selector = var->selector; - vmx->rmode.tr.base = var->base; - vmx->rmode.tr.limit = var->limit; - vmx->rmode.tr.ar = vmx_segment_access_rights(var); + vmx->rmode.segs[VCPU_SREG_TR] = *var; return; } vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); if (vmx->rmode.vm86_active && var->s) { + vmx->rmode.segs[seg] = *var; /* * Hack real-mode segments into vm86 compatibility. */ @@ -3254,7 +3227,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, * qemu binaries. * IA32 arch specifies that at the time of processor reset the * "Accessed" bit in the AR field of segment registers is 1. And qemu - * is setting it to 0 in the usedland code. This causes invalid guest + * is setting it to 0 in the userland code. This causes invalid guest * state vmexit when "unrestricted guest" mode is turned on. * Fix for this setup issue in cpu_reset is being pushed in the qemu * tree. Newer qemu binaries with that qemu fix would not need this @@ -3284,16 +3257,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_readl(GUEST_CS_BASE) >> 4); break; case VCPU_SREG_ES: - fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es); - break; case VCPU_SREG_DS: - fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds); - break; case VCPU_SREG_GS: - fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs); - break; case VCPU_SREG_FS: - fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs); + fix_rmode_seg(seg, &vmx->rmode.segs[seg]); break; case VCPU_SREG_SS: vmcs_write16(GUEST_SS_SELECTOR, @@ -3347,9 +3314,9 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) if (var.base != (var.selector << 4)) return false; - if (var.limit != 0xffff) + if (var.limit < 0xffff) return false; - if (ar != 0xf3) + if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) return false; return true; @@ -3601,7 +3568,7 @@ out: static void seg_setup(int seg) { - struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; unsigned int ar; vmcs_write16(sf->selector, 0); @@ -3743,7 +3710,7 @@ static void vmx_set_constant_host_state(void) unsigned long tmpl; struct desc_ptr dt; - vmcs_writel(HOST_CR0, read_cr0() | X86_CR0_TS); /* 22.2.3 */ + vmcs_writel(HOST_CR0, read_cr0() & ~X86_CR0_TS); /* 22.2.3 */ vmcs_writel(HOST_CR4, read_cr4()); /* 22.2.3, 22.2.5 */ vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ @@ -3766,8 +3733,7 @@ static void vmx_set_constant_host_state(void) native_store_idt(&dt); vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ - asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl)); - vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */ + vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */ rdmsr(MSR_IA32_SYSENTER_CS, low32, high32); vmcs_write32(HOST_IA32_SYSENTER_CS, low32); @@ -4001,8 +3967,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) kvm_rip_write(vcpu, 0); kvm_register_write(vcpu, VCPU_REGS_RSP, 0); - vmcs_writel(GUEST_DR7, 0x400); - vmcs_writel(GUEST_GDTR_BASE, 0); vmcs_write32(GUEST_GDTR_LIMIT, 0xffff); @@ -4452,7 +4416,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) hypercall[2] = 0xc1; } -/* called to set cr0 as approriate for a mov-to-cr0 exit. */ +/* called to set cr0 as appropriate for a mov-to-cr0 exit. */ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val) { if (to_vmx(vcpu)->nested.vmxon && @@ -4543,7 +4507,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SET_TPR; return 0; } - }; + } break; case 2: /* clts */ handle_clts(vcpu); @@ -5697,7 +5661,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) * may resume. Otherwise they set the kvm_run parameter to indicate what needs * to be done to userspace and return 0. */ -static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { +static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_EXCEPTION_NMI] = handle_exception, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, @@ -6225,17 +6189,10 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) msrs[i].host); } -#ifdef CONFIG_X86_64 -#define R "r" -#define Q "q" -#else -#define R "e" -#define Q "l" -#endif - static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long debugctlmsr; if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -6275,34 +6232,35 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); atomic_switch_perf_msrs(vmx); + debugctlmsr = get_debugctlmsr(); vmx->__launched = vmx->loaded_vmcs->launched; asm( /* Store host registers */ - "push %%"R"dx; push %%"R"bp;" - "push %%"R"cx \n\t" /* placeholder for guest rcx */ - "push %%"R"cx \n\t" - "cmp %%"R"sp, %c[host_rsp](%0) \n\t" + "push %%" _ASM_DX "; push %%" _ASM_BP ";" + "push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */ + "push %%" _ASM_CX " \n\t" + "cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t" "je 1f \n\t" - "mov %%"R"sp, %c[host_rsp](%0) \n\t" + "mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t" __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" "1: \n\t" /* Reload cr2 if changed */ - "mov %c[cr2](%0), %%"R"ax \n\t" - "mov %%cr2, %%"R"dx \n\t" - "cmp %%"R"ax, %%"R"dx \n\t" + "mov %c[cr2](%0), %%" _ASM_AX " \n\t" + "mov %%cr2, %%" _ASM_DX " \n\t" + "cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t" "je 2f \n\t" - "mov %%"R"ax, %%cr2 \n\t" + "mov %%" _ASM_AX", %%cr2 \n\t" "2: \n\t" /* Check if vmlaunch of vmresume is needed */ "cmpl $0, %c[launched](%0) \n\t" /* Load guest registers. Don't clobber flags. */ - "mov %c[rax](%0), %%"R"ax \n\t" - "mov %c[rbx](%0), %%"R"bx \n\t" - "mov %c[rdx](%0), %%"R"dx \n\t" - "mov %c[rsi](%0), %%"R"si \n\t" - "mov %c[rdi](%0), %%"R"di \n\t" - "mov %c[rbp](%0), %%"R"bp \n\t" + "mov %c[rax](%0), %%" _ASM_AX " \n\t" + "mov %c[rbx](%0), %%" _ASM_BX " \n\t" + "mov %c[rdx](%0), %%" _ASM_DX " \n\t" + "mov %c[rsi](%0), %%" _ASM_SI " \n\t" + "mov %c[rdi](%0), %%" _ASM_DI " \n\t" + "mov %c[rbp](%0), %%" _ASM_BP " \n\t" #ifdef CONFIG_X86_64 "mov %c[r8](%0), %%r8 \n\t" "mov %c[r9](%0), %%r9 \n\t" @@ -6313,24 +6271,24 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %c[r14](%0), %%r14 \n\t" "mov %c[r15](%0), %%r15 \n\t" #endif - "mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */ + "mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */ /* Enter guest mode */ - "jne .Llaunched \n\t" + "jne 1f \n\t" __ex(ASM_VMX_VMLAUNCH) "\n\t" - "jmp .Lkvm_vmx_return \n\t" - ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" - ".Lkvm_vmx_return: " + "jmp 2f \n\t" + "1: " __ex(ASM_VMX_VMRESUME) "\n\t" + "2: " /* Save guest registers, load host registers, keep flags */ - "mov %0, %c[wordsize](%%"R"sp) \n\t" + "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" "pop %0 \n\t" - "mov %%"R"ax, %c[rax](%0) \n\t" - "mov %%"R"bx, %c[rbx](%0) \n\t" - "pop"Q" %c[rcx](%0) \n\t" - "mov %%"R"dx, %c[rdx](%0) \n\t" - "mov %%"R"si, %c[rsi](%0) \n\t" - "mov %%"R"di, %c[rdi](%0) \n\t" - "mov %%"R"bp, %c[rbp](%0) \n\t" + "mov %%" _ASM_AX ", %c[rax](%0) \n\t" + "mov %%" _ASM_BX ", %c[rbx](%0) \n\t" + __ASM_SIZE(pop) " %c[rcx](%0) \n\t" + "mov %%" _ASM_DX ", %c[rdx](%0) \n\t" + "mov %%" _ASM_SI ", %c[rsi](%0) \n\t" + "mov %%" _ASM_DI ", %c[rdi](%0) \n\t" + "mov %%" _ASM_BP ", %c[rbp](%0) \n\t" #ifdef CONFIG_X86_64 "mov %%r8, %c[r8](%0) \n\t" "mov %%r9, %c[r9](%0) \n\t" @@ -6341,11 +6299,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" #endif - "mov %%cr2, %%"R"ax \n\t" - "mov %%"R"ax, %c[cr2](%0) \n\t" + "mov %%cr2, %%" _ASM_AX " \n\t" + "mov %%" _ASM_AX ", %c[cr2](%0) \n\t" - "pop %%"R"bp; pop %%"R"dx \n\t" + "pop %%" _ASM_BP "; pop %%" _ASM_DX " \n\t" "setbe %c[fail](%0) \n\t" + ".pushsection .rodata \n\t" + ".global vmx_return \n\t" + "vmx_return: " _ASM_PTR " 2b \n\t" + ".popsection" : : "c"(vmx), "d"((unsigned long)HOST_RSP), [launched]"i"(offsetof(struct vcpu_vmx, __launched)), [fail]"i"(offsetof(struct vcpu_vmx, fail)), @@ -6370,12 +6332,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), [wordsize]"i"(sizeof(ulong)) : "cc", "memory" - , R"ax", R"bx", R"di", R"si" #ifdef CONFIG_X86_64 + , "rax", "rbx", "rdi", "rsi" , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" +#else + , "eax", "ebx", "edi", "esi" #endif ); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ + if (debugctlmsr) + update_debugctlmsr(debugctlmsr); + #ifndef CONFIG_X86_64 /* * The sysexit path does not restore ds/es, so we must set them to @@ -6420,9 +6388,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_complete_interrupts(vmx); } -#undef R -#undef Q - static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7277,7 +7242,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, - .set_guest_debug = set_guest_debug, + .update_db_bp_intercept = update_exception_bitmap, .get_msr = vmx_get_msr, .set_msr = vmx_set_msr, .get_segment_base = vmx_get_segment_base, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2966c84..1eefebe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -246,20 +246,14 @@ static void drop_user_return_notifiers(void *ignore) u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) { - if (irqchip_in_kernel(vcpu->kvm)) - return vcpu->arch.apic_base; - else - return vcpu->arch.apic_base; + return vcpu->arch.apic_base; } EXPORT_SYMBOL_GPL(kvm_get_apic_base); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) { /* TODO: reserve bits check */ - if (irqchip_in_kernel(vcpu->kvm)) - kvm_lapic_set_base(vcpu, data); - else - vcpu->arch.apic_base = data; + kvm_lapic_set_base(vcpu, data); } EXPORT_SYMBOL_GPL(kvm_set_apic_base); @@ -698,6 +692,18 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static void kvm_update_dr7(struct kvm_vcpu *vcpu) +{ + unsigned long dr7; + + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + dr7 = vcpu->arch.guest_debug_dr7; + else + dr7 = vcpu->arch.dr7; + kvm_x86_ops->set_dr7(vcpu, dr7); + vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK); +} + static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { switch (dr) { @@ -723,10 +729,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) if (val & 0xffffffff00000000ULL) return -1; /* #GP */ vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { - kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); - vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); - } + kvm_update_dr7(vcpu); break; } @@ -823,7 +826,7 @@ static u32 msrs_to_save[] = { static unsigned num_msrs_to_save; -static u32 emulated_msrs[] = { +static const u32 emulated_msrs[] = { MSR_IA32_TSCDEADLINE, MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, @@ -1097,7 +1100,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) * For each generation, we track the original measured * nanosecond time, offset, and write, so if TSCs are in * sync, we can match exact offset, and if not, we can match - * exact software computaion in compute_guest_tsc() + * exact software computation in compute_guest_tsc() * * These values are tracked in kvm->arch.cur_xxx variables. */ @@ -1140,6 +1143,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) unsigned long this_tsc_khz; s64 kernel_ns, max_kernel_ns; u64 tsc_timestamp; + u8 pvclock_flags; /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); @@ -1221,7 +1225,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; vcpu->last_kernel_ns = kernel_ns; vcpu->last_guest_tsc = tsc_timestamp; - vcpu->hv_clock.flags = 0; + + pvclock_flags = 0; + if (vcpu->pvclock_set_guest_stopped_request) { + pvclock_flags |= PVCLOCK_GUEST_STOPPED; + vcpu->pvclock_set_guest_stopped_request = false; + } + + vcpu->hv_clock.flags = pvclock_flags; /* * The interface expects us to write an even number signaling that the @@ -1504,7 +1515,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; - /* Bits 2:5 are resrved, Should be zero */ + /* Bits 2:5 are reserved, Should be zero */ if (data & 0x3c) return 1; @@ -1639,10 +1650,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); - if (is_error_page(vcpu->arch.time_page)) { - kvm_release_page_clean(vcpu->arch.time_page); + if (is_error_page(vcpu->arch.time_page)) vcpu->arch.time_page = NULL; - } + break; } case MSR_KVM_ASYNC_PF_EN: @@ -1727,7 +1737,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) * Ignore all writes to this no longer documented MSR. * Writes are only relevant for old K7 processors, * all pre-dating SVM, but a recommended workaround from - * AMD for these chips. It is possible to speicify the + * AMD for these chips. It is possible to specify the * affected processor models on the command line, hence * the need to ignore the workaround. */ @@ -2177,6 +2187,8 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_PCI_2_3: case KVM_CAP_KVMCLOCK_CTRL: + case KVM_CAP_READONLY_MEM: + case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -2358,8 +2370,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s); - kvm_apic_post_state_restore(vcpu); + kvm_apic_post_state_restore(vcpu, s); update_cr8_intercept(vcpu); return 0; @@ -2368,7 +2379,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - if (irq->irq < 0 || irq->irq >= 256) + if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS) return -EINVAL; if (irqchip_in_kernel(vcpu->kvm)) return -ENXIO; @@ -2635,11 +2646,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, */ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) { - struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock; if (!vcpu->arch.time_page) return -EINVAL; - src->flags |= PVCLOCK_GUEST_STOPPED; - mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT); + vcpu->arch.pvclock_set_guest_stopped_request = true; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); return 0; } @@ -3090,7 +3099,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, if (!kvm->arch.vpit) return -ENXIO; mutex_lock(&kvm->arch.vpit->pit_state.lock); - kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject; + kvm->arch.vpit->pit_state.reinject = control->pit_reinject; mutex_unlock(&kvm->arch.vpit->pit_state.lock); return 0; } @@ -3173,6 +3182,16 @@ out: return r; } +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event) +{ + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event->irq, irq_event->level); + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3279,29 +3298,6 @@ long kvm_arch_vm_ioctl(struct file *filp, create_pit_unlock: mutex_unlock(&kvm->slots_lock); break; - case KVM_IRQ_LINE_STATUS: - case KVM_IRQ_LINE: { - struct kvm_irq_level irq_event; - - r = -EFAULT; - if (copy_from_user(&irq_event, argp, sizeof irq_event)) - goto out; - r = -ENXIO; - if (irqchip_in_kernel(kvm)) { - __s32 status; - status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, - irq_event.irq, irq_event.level); - if (ioctl == KVM_IRQ_LINE_STATUS) { - r = -EFAULT; - irq_event.status = status; - if (copy_to_user(argp, &irq_event, - sizeof irq_event)) - goto out; - } - r = 0; - } - break; - } case KVM_GET_IRQCHIP: { /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ struct kvm_irqchip *chip; @@ -3689,20 +3685,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t *gpa, struct x86_exception *exception, bool write) { - u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; + u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0) + | (write ? PFERR_WRITE_MASK : 0); - if (vcpu_match_mmio_gva(vcpu, gva) && - check_write_user_access(vcpu, write, access, - vcpu->arch.access)) { + if (vcpu_match_mmio_gva(vcpu, gva) + && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) { *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | (gva & (PAGE_SIZE - 1)); trace_vcpu_match_mmio(gva, *gpa, write, false); return 1; } - if (write) - access |= PFERR_WRITE_MASK; - *gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception); if (*gpa == UNMAPPED_GVA) @@ -3790,14 +3783,14 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, return X86EMUL_CONTINUE; } -static struct read_write_emulator_ops read_emultor = { +static const struct read_write_emulator_ops read_emultor = { .read_write_prepare = read_prepare, .read_write_emulate = read_emulate, .read_write_mmio = vcpu_mmio_read, .read_write_exit_mmio = read_exit_mmio, }; -static struct read_write_emulator_ops write_emultor = { +static const struct read_write_emulator_ops write_emultor = { .read_write_emulate = write_emulate, .read_write_mmio = write_mmio, .read_write_exit_mmio = write_exit_mmio, @@ -3808,7 +3801,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, unsigned int bytes, struct x86_exception *exception, struct kvm_vcpu *vcpu, - struct read_write_emulator_ops *ops) + const struct read_write_emulator_ops *ops) { gpa_t gpa; int handled, ret; @@ -3857,7 +3850,7 @@ mmio: int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, void *val, unsigned int bytes, struct x86_exception *exception, - struct read_write_emulator_ops *ops) + const struct read_write_emulator_ops *ops) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); gpa_t gpa; @@ -3962,10 +3955,8 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, goto emul_write; page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - if (is_error_page(page)) { - kvm_release_page_clean(page); + if (is_error_page(page)) goto emul_write; - } kaddr = kmap_atomic(page); kaddr += offset_in_page(gpa); @@ -4332,7 +4323,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); } -static struct x86_emulate_ops emulate_ops = { +static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) +{ + return kvm_register_read(emul_to_vcpu(ctxt), reg); +} + +static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val) +{ + kvm_register_write(emul_to_vcpu(ctxt), reg, val); +} + +static const struct x86_emulate_ops emulate_ops = { + .read_gpr = emulator_read_gpr, + .write_gpr = emulator_write_gpr, .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, .fetch = kvm_fetch_guest_virt, @@ -4367,14 +4370,6 @@ static struct x86_emulate_ops emulate_ops = { .get_cpuid = emulator_get_cpuid, }; -static void cache_all_regs(struct kvm_vcpu *vcpu) -{ - kvm_register_read(vcpu, VCPU_REGS_RAX); - kvm_register_read(vcpu, VCPU_REGS_RSP); - kvm_register_read(vcpu, VCPU_REGS_RIP); - vcpu->arch.regs_dirty = ~0; -} - static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) { u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); @@ -4401,12 +4396,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) kvm_queue_exception(vcpu, ctxt->exception.vector); } -static void init_decode_cache(struct x86_emulate_ctxt *ctxt, - const unsigned long *regs) +static void init_decode_cache(struct x86_emulate_ctxt *ctxt) { memset(&ctxt->twobyte, 0, - (void *)&ctxt->regs - (void *)&ctxt->twobyte); - memcpy(ctxt->regs, regs, sizeof(ctxt->regs)); + (void *)&ctxt->_regs - (void *)&ctxt->twobyte); ctxt->fetch.start = 0; ctxt->fetch.end = 0; @@ -4421,14 +4414,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; int cs_db, cs_l; - /* - * TODO: fix emulate.c to use guest_read/write_register - * instead of direct ->regs accesses, can save hundred cycles - * on Intel for instructions that don't read/change RSP, for - * for example. - */ - cache_all_regs(vcpu); - kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); ctxt->eflags = kvm_get_rflags(vcpu); @@ -4440,7 +4425,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) X86EMUL_MODE_PROT16; ctxt->guest_mode = is_guest_mode(vcpu); - init_decode_cache(ctxt, vcpu->arch.regs); + init_decode_cache(ctxt); vcpu->arch.emulate_regs_need_sync_from_vcpu = false; } @@ -4460,7 +4445,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) return EMULATE_FAIL; ctxt->eip = ctxt->_eip; - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); @@ -4493,13 +4477,14 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) { gpa_t gpa; + pfn_t pfn; if (tdp_enabled) return false; /* * if emulation was due to access to shadowed page table - * and it failed try to unshadow page and re-entetr the + * and it failed try to unshadow page and re-enter the * guest to let CPU execute the instruction. */ if (kvm_mmu_unprotect_page_virt(vcpu, gva)) @@ -4510,8 +4495,17 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) if (gpa == UNMAPPED_GVA) return true; /* let cpu generate fault */ - if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT))) + /* + * Do not retry the unhandleable instruction if it faults on the + * readonly host memory, otherwise it will goto a infinite loop: + * retry instruction -> write #PF -> emulation fail -> retry + * instruction -> ... + */ + pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); + if (!is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); return true; + } return false; } @@ -4560,6 +4554,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, return true; } +static int complete_emulated_mmio(struct kvm_vcpu *vcpu); +static int complete_emulated_pio(struct kvm_vcpu *vcpu); + int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, int emulation_type, @@ -4608,7 +4605,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, changes registers values during IO operation */ if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { vcpu->arch.emulate_regs_need_sync_from_vcpu = false; - memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs); + emulator_invalidate_register_cache(ctxt); } restart: @@ -4630,13 +4627,16 @@ restart: } else if (vcpu->arch.pio.count) { if (!vcpu->arch.pio.in) vcpu->arch.pio.count = 0; - else + else { writeback = false; + vcpu->arch.complete_userspace_io = complete_emulated_pio; + } r = EMULATE_DO_MMIO; } else if (vcpu->mmio_needed) { if (!vcpu->mmio_is_write) writeback = false; r = EMULATE_DO_MMIO; + vcpu->arch.complete_userspace_io = complete_emulated_mmio; } else if (r == EMULATION_RESTART) goto restart; else @@ -4646,7 +4646,6 @@ restart: toggle_interruptibility(vcpu, ctxt->interruptibility); kvm_set_rflags(vcpu, ctxt->eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; kvm_rip_write(vcpu, ctxt->eip); } else @@ -4929,6 +4928,7 @@ int kvm_arch_init(void *opaque) if (cpu_has_xsave) host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + kvm_lapic_init(); return 0; out: @@ -5499,6 +5499,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) return r; } +static inline int complete_emulated_io(struct kvm_vcpu *vcpu) +{ + int r; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + if (r != EMULATE_DONE) + return 0; + return 1; +} + +static int complete_emulated_pio(struct kvm_vcpu *vcpu) +{ + BUG_ON(!vcpu->arch.pio.count); + + return complete_emulated_io(vcpu); +} + /* * Implements the following, as a state machine: * @@ -5515,47 +5533,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) * copy data * exit */ -static int complete_mmio(struct kvm_vcpu *vcpu) +static int complete_emulated_mmio(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; struct kvm_mmio_fragment *frag; - int r; - if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) - return 1; + BUG_ON(!vcpu->mmio_needed); - if (vcpu->mmio_needed) { - /* Complete previous fragment */ - frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; - if (!vcpu->mmio_is_write) - memcpy(frag->data, run->mmio.data, frag->len); - if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { - vcpu->mmio_needed = 0; - if (vcpu->mmio_is_write) - return 1; - vcpu->mmio_read_completed = 1; - goto done; - } - /* Initiate next fragment */ - ++frag; - run->exit_reason = KVM_EXIT_MMIO; - run->mmio.phys_addr = frag->gpa; + /* Complete previous fragment */ + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; + if (!vcpu->mmio_is_write) + memcpy(frag->data, run->mmio.data, frag->len); + if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { + vcpu->mmio_needed = 0; if (vcpu->mmio_is_write) - memcpy(run->mmio.data, frag->data, frag->len); - run->mmio.len = frag->len; - run->mmio.is_write = vcpu->mmio_is_write; - return 0; - - } -done: - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - if (r != EMULATE_DONE) - return 0; - return 1; + return 1; + vcpu->mmio_read_completed = 1; + return complete_emulated_io(vcpu); + } + /* Initiate next fragment */ + ++frag; + run->exit_reason = KVM_EXIT_MMIO; + run->mmio.phys_addr = frag->gpa; + if (vcpu->mmio_is_write) + memcpy(run->mmio.data, frag->data, frag->len); + run->mmio.len = frag->len; + run->mmio.is_write = vcpu->mmio_is_write; + vcpu->arch.complete_userspace_io = complete_emulated_mmio; + return 0; } + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -5582,9 +5590,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - r = complete_mmio(vcpu); - if (r <= 0) - goto out; + if (unlikely(vcpu->arch.complete_userspace_io)) { + int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io; + vcpu->arch.complete_userspace_io = NULL; + r = cui(vcpu); + if (r <= 0) + goto out; + } else + WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); r = __vcpu_run(vcpu); @@ -5602,12 +5615,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) /* * We are here if userspace calls get_regs() in the middle of * instruction emulation. Registers state needs to be copied - * back from emulation context to vcpu. Usrapace shouldn't do + * back from emulation context to vcpu. Userspace shouldn't do * that usually, but some bad designed PV devices (vmware * backdoor interface) need this to work */ - struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); + emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; } regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); @@ -5747,7 +5759,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, if (ret) return EMULATE_FAIL; - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); kvm_rip_write(vcpu, ctxt->eip); kvm_set_rflags(vcpu, ctxt->eflags); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5799,7 +5810,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - max_bits = (sizeof sregs->interrupt_bitmap) << 3; + max_bits = KVM_NR_INTERRUPTS; pending_vec = find_first_bit( (const unsigned long *)sregs->interrupt_bitmap, max_bits); if (pending_vec < max_bits) { @@ -5859,13 +5870,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) { for (i = 0; i < KVM_NR_DB_REGS; ++i) vcpu->arch.eff_db[i] = dbg->arch.debugreg[i]; - vcpu->arch.switch_db_regs = - (dbg->arch.debugreg[7] & DR7_BP_EN_MASK); + vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7]; } else { for (i = 0; i < KVM_NR_DB_REGS; i++) vcpu->arch.eff_db[i] = vcpu->arch.db[i]; - vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); } + kvm_update_dr7(vcpu); if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + @@ -5877,7 +5887,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, */ kvm_set_rflags(vcpu, rflags); - kvm_x86_ops->set_guest_debug(vcpu, dbg); + kvm_x86_ops->update_db_bp_intercept(vcpu); r = 0; @@ -5979,7 +5989,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) */ kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; - unlazy_fpu(current); + __kernel_fpu_begin(); fpu_restore_checking(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } @@ -5993,6 +6003,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) vcpu->guest_fpu_loaded = 0; fpu_save_init(&vcpu->arch.guest_fpu); + __kernel_fpu_end(); ++vcpu->stat.fpu_reload; kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); trace_kvm_fpu(0); @@ -6022,7 +6033,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) int r; vcpu->arch.mtrr_state.have_fixed = 1; - vcpu_load(vcpu); + r = vcpu_load(vcpu); + if (r) + return r; r = kvm_arch_vcpu_reset(vcpu); if (r == 0) r = kvm_mmu_setup(vcpu); @@ -6033,9 +6046,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { + int r; vcpu->arch.apf.msr_val = 0; - vcpu_load(vcpu); + r = vcpu_load(vcpu); + BUG_ON(r); kvm_mmu_unload(vcpu); vcpu_put(vcpu); @@ -6049,10 +6064,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.nmi_pending = 0; vcpu->arch.nmi_injected = false; - vcpu->arch.switch_db_regs = 0; memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = DR6_FIXED_1; vcpu->arch.dr7 = DR7_FIXED_1; + kvm_update_dr7(vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); vcpu->arch.apf.msr_val = 0; @@ -6131,7 +6146,7 @@ int kvm_arch_hardware_enable(void *garbage) * as we reset last_host_tsc on all VCPUs to stop this from being * called multiple times (one for each physical CPU bringup). * - * Platforms with unnreliable TSCs don't have to deal with this, they + * Platforms with unreliable TSCs don't have to deal with this, they * will be compensated by the logic in vcpu_load, which sets the TSC to * catchup mode. This will catchup all VCPUs to real time, but cannot * guarantee that they stay in perfect synchronization. @@ -6184,6 +6199,8 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); } +struct static_key kvm_no_apic_vcpu __read_mostly; + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { struct page *page; @@ -6216,7 +6233,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) r = kvm_create_lapic(vcpu); if (r < 0) goto fail_mmu_destroy; - } + } else + static_key_slow_inc(&kvm_no_apic_vcpu); vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, GFP_KERNEL); @@ -6256,6 +6274,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) kvm_mmu_destroy(vcpu); srcu_read_unlock(&vcpu->kvm->srcu, idx); free_page((unsigned long)vcpu->arch.pio_data); + if (!irqchip_in_kernel(vcpu->kvm)) + static_key_slow_dec(&kvm_no_apic_vcpu); } int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) @@ -6268,15 +6288,21 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + /* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */ + set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, + &kvm->arch.irq_sources_bitmap); raw_spin_lock_init(&kvm->arch.tsc_write_lock); + mutex_init(&kvm->arch.apic_map_lock); return 0; } static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { - vcpu_load(vcpu); + int r; + r = vcpu_load(vcpu); + BUG_ON(r); kvm_mmu_unload(vcpu); vcpu_put(vcpu); } @@ -6320,6 +6346,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) put_page(kvm->arch.apic_access_page); if (kvm->arch.ept_identity_pagetable) put_page(kvm->arch.ept_identity_pagetable); + kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } void kvm_arch_free_memslot(struct kvm_memory_slot *free, @@ -6327,10 +6354,18 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, { int i; - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { - kvm_kvfree(free->arch.lpage_info[i]); - free->arch.lpage_info[i] = NULL; + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { + if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { + kvm_kvfree(free->arch.rmap[i]); + free->arch.rmap[i] = NULL; + } + if (i == 0) + continue; + + if (!dont || free->arch.lpage_info[i - 1] != + dont->arch.lpage_info[i - 1]) { + kvm_kvfree(free->arch.lpage_info[i - 1]); + free->arch.lpage_info[i - 1] = NULL; } } } @@ -6339,23 +6374,30 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) { int i; - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { unsigned long ugfn; int lpages; - int level = i + 2; + int level = i + 1; lpages = gfn_to_index(slot->base_gfn + npages - 1, slot->base_gfn, level) + 1; - slot->arch.lpage_info[i] = - kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); - if (!slot->arch.lpage_info[i]) + slot->arch.rmap[i] = + kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i])); + if (!slot->arch.rmap[i]) + goto out_free; + if (i == 0) + continue; + + slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages * + sizeof(*slot->arch.lpage_info[i - 1])); + if (!slot->arch.lpage_info[i - 1]) goto out_free; if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->arch.lpage_info[i][0].write_count = 1; + slot->arch.lpage_info[i - 1][0].write_count = 1; if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->arch.lpage_info[i][lpages - 1].write_count = 1; + slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1; ugfn = slot->userspace_addr >> PAGE_SHIFT; /* * If the gfn and userspace address are not aligned wrt each @@ -6367,16 +6409,21 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) unsigned long j; for (j = 0; j < lpages; ++j) - slot->arch.lpage_info[i][j].write_count = 1; + slot->arch.lpage_info[i - 1][j].write_count = 1; } } return 0; out_free: - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - kvm_kvfree(slot->arch.lpage_info[i]); - slot->arch.lpage_info[i] = NULL; + for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { + kvm_kvfree(slot->arch.rmap[i]); + slot->arch.rmap[i] = NULL; + if (i == 0) + continue; + + kvm_kvfree(slot->arch.lpage_info[i - 1]); + slot->arch.lpage_info[i - 1] = NULL; } return -ENOMEM; } @@ -6395,10 +6442,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, map_flags = MAP_SHARED | MAP_ANONYMOUS; /*To keep backward compatibility with older userspace, - *x86 needs to hanlde !user_alloc case. + *x86 needs to handle !user_alloc case. */ if (!user_alloc) { - if (npages && !old.rmap) { + if (npages && !old.npages) { unsigned long userspace_addr; userspace_addr = vm_mmap(NULL, 0, @@ -6426,7 +6473,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; - if (!user_alloc && !old.user_alloc && old.rmap && !npages) { + if (!user_alloc && !old.user_alloc && old.npages && !npages) { int ret; ret = vm_munmap(old.userspace_addr, @@ -6445,14 +6492,28 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); kvm_mmu_slot_remove_write_access(kvm, mem->slot); spin_unlock(&kvm->mmu_lock); + /* + * If memory slot is created, or moved, we need to clear all + * mmio sptes. + */ + if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) { + kvm_mmu_zap_all(kvm); + kvm_reload_remote_mmus(kvm); + } } -void kvm_arch_flush_shadow(struct kvm *kvm) +void kvm_arch_flush_shadow_all(struct kvm *kvm) { kvm_mmu_zap_all(kvm); kvm_reload_remote_mmus(kvm); } +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + kvm_arch_flush_shadow_all(kvm); +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3d1134d..2b5219c 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -124,4 +124,5 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, extern u64 host_xcr0; +extern struct static_key kvm_no_apic_vcpu; #endif diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 6e121a2..7872a33 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -4,7 +4,6 @@ config LGUEST_GUEST depends on X86_32 select VIRTUALIZATION select VIRTIO - select VIRTIO_RING select VIRTIO_CONSOLE help Lguest is a tiny in-kernel hypervisor. Selecting this will diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 5b2995f..a30ca15 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -17,6 +17,7 @@ #include <asm/cpufeature.h> #include <asm/alternative-asm.h> #include <asm/asm.h> +#include <asm/smap.h> /* * By placing feature2 after feature1 in altinstructions section, we logically @@ -130,6 +131,7 @@ ENDPROC(bad_from_user) */ ENTRY(copy_user_generic_unrolled) CFI_STARTPROC + ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -177,6 +179,7 @@ ENTRY(copy_user_generic_unrolled) decl %ecx jnz 21b 23: xor %eax,%eax + ASM_CLAC ret .section .fixup,"ax" @@ -232,6 +235,7 @@ ENDPROC(copy_user_generic_unrolled) */ ENTRY(copy_user_generic_string) CFI_STARTPROC + ASM_STAC andl %edx,%edx jz 4f cmpl $8,%edx @@ -246,6 +250,7 @@ ENTRY(copy_user_generic_string) 3: rep movsb 4: xorl %eax,%eax + ASM_CLAC ret .section .fixup,"ax" @@ -273,12 +278,14 @@ ENDPROC(copy_user_generic_string) */ ENTRY(copy_user_enhanced_fast_string) CFI_STARTPROC + ASM_STAC andl %edx,%edx jz 2f movl %edx,%ecx 1: rep movsb 2: xorl %eax,%eax + ASM_CLAC ret .section .fixup,"ax" diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index cacddc7..6a4f43c 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S @@ -15,6 +15,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/asm.h> +#include <asm/smap.h> .macro ALIGN_DESTINATION #ifdef FIX_ALIGNMENT @@ -48,6 +49,7 @@ */ ENTRY(__copy_user_nocache) CFI_STARTPROC + ASM_STAC cmpl $8,%edx jb 20f /* less then 8 bytes, go to byte copy loop */ ALIGN_DESTINATION @@ -95,6 +97,7 @@ ENTRY(__copy_user_nocache) decl %ecx jnz 21b 23: xorl %eax,%eax + ASM_CLAC sfence ret diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index b33b1fb..156b9c8 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -33,6 +33,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/asm.h> +#include <asm/smap.h> .text ENTRY(__get_user_1) @@ -40,8 +41,10 @@ ENTRY(__get_user_1) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 1: movzb (%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_1) @@ -53,8 +56,10 @@ ENTRY(__get_user_2) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 2: movzwl -1(%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_2) @@ -66,8 +71,10 @@ ENTRY(__get_user_4) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 3: mov -3(%_ASM_AX),%edx xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_4) @@ -80,8 +87,10 @@ ENTRY(__get_user_8) GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user + ASM_STAC 4: movq -7(%_ASM_AX),%_ASM_DX xor %eax,%eax + ASM_CLAC ret CFI_ENDPROC ENDPROC(__get_user_8) @@ -91,6 +100,7 @@ bad_get_user: CFI_STARTPROC xor %edx,%edx mov $(-EFAULT),%_ASM_AX + ASM_CLAC ret CFI_ENDPROC END(bad_get_user) diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index b1e6c4b..54fcffe 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -18,7 +18,11 @@ * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ +#ifdef __KERNEL__ #include <linux/string.h> +#else +#include <string.h> +#endif #include <asm/inat.h> #include <asm/insn.h> diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index 7f951c8..fc6ba17 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -15,6 +15,7 @@ #include <asm/thread_info.h> #include <asm/errno.h> #include <asm/asm.h> +#include <asm/smap.h> /* @@ -31,7 +32,8 @@ #define ENTER CFI_STARTPROC ; \ GET_THREAD_INFO(%_ASM_BX) -#define EXIT ret ; \ +#define EXIT ASM_CLAC ; \ + ret ; \ CFI_ENDPROC .text @@ -39,6 +41,7 @@ ENTRY(__put_user_1) ENTER cmp TI_addr_limit(%_ASM_BX),%_ASM_CX jae bad_put_user + ASM_STAC 1: movb %al,(%_ASM_CX) xor %eax,%eax EXIT @@ -50,6 +53,7 @@ ENTRY(__put_user_2) sub $1,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 2: movw %ax,(%_ASM_CX) xor %eax,%eax EXIT @@ -61,6 +65,7 @@ ENTRY(__put_user_4) sub $3,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 3: movl %eax,(%_ASM_CX) xor %eax,%eax EXIT @@ -72,6 +77,7 @@ ENTRY(__put_user_8) sub $7,%_ASM_BX cmp %_ASM_BX,%_ASM_CX jae bad_put_user + ASM_STAC 4: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 5: movl %edx,4(%_ASM_CX) diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1781b2f..98f6d6b6 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -42,10 +42,11 @@ do { \ int __d0; \ might_fault(); \ __asm__ __volatile__( \ + ASM_STAC "\n" \ "0: rep; stosl\n" \ " movl %2,%0\n" \ "1: rep; stosb\n" \ - "2:\n" \ + "2: " ASM_CLAC "\n" \ ".section .fixup,\"ax\"\n" \ "3: lea 0(%2,%0,4),%0\n" \ " jmp 2b\n" \ @@ -626,10 +627,12 @@ survive: return n; } #endif + stac(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel(to, from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_to_user_ll); @@ -637,10 +640,12 @@ EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) { + stac(); if (movsl_is_ok(to, from, n)) __copy_user_zeroing(to, from, n); else n = __copy_user_zeroing_intel(to, from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll); @@ -648,11 +653,13 @@ EXPORT_SYMBOL(__copy_from_user_ll); unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, unsigned long n) { + stac(); if (movsl_is_ok(to, from, n)) __copy_user(to, from, n); else n = __copy_user_intel((void __user *)to, (const void *)from, n); + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nozero); @@ -660,6 +667,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero); unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, unsigned long n) { + stac(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_zeroing_intel_nocache(to, from, n); @@ -668,6 +676,7 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, #else __copy_user_zeroing(to, from, n); #endif + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache); @@ -675,6 +684,7 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache); unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, unsigned long n) { + stac(); #ifdef CONFIG_X86_INTEL_USERCOPY if (n > 64 && cpu_has_xmm2) n = __copy_user_intel_nocache(to, from, n); @@ -683,6 +693,7 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr #else __copy_user(to, from, n); #endif + clac(); return n; } EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index e5b130b..05928aa 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -18,6 +18,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) might_fault(); /* no memory constraint because it doesn't change any memory gcc knows about */ + stac(); asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" @@ -40,6 +41,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) : [size8] "=&c"(size), [dst] "=&D" (__d0) : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), [zero] "r" (0UL), [eight] "r" (8UL)); + clac(); return size; } EXPORT_SYMBOL(__clear_user); @@ -82,5 +84,6 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) if (__put_user_nocheck(c, to++, sizeof(char))) break; + clac(); return len; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 76dcd9d..8e13ecb 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -18,6 +18,7 @@ #include <asm/pgalloc.h> /* pgd_*(), ... */ #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ #include <asm/fixmap.h> /* VSYSCALL_START */ +#include <asm/rcu.h> /* exception_enter(), ... */ /* * Page fault error code bits: @@ -995,13 +996,24 @@ static int fault_in_kernel_space(unsigned long address) return address >= TASK_SIZE_MAX; } +static inline bool smap_violation(int error_code, struct pt_regs *regs) +{ + if (error_code & PF_USER) + return false; + + if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC)) + return false; + + return true; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. */ -dotraplinkage void __kprobes -do_page_fault(struct pt_regs *regs, unsigned long error_code) +static void __kprobes +__do_page_fault(struct pt_regs *regs, unsigned long error_code) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -1088,6 +1100,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); + if (static_cpu_has(X86_FEATURE_SMAP)) { + if (unlikely(smap_violation(error_code, regs))) { + bad_area_nosemaphore(regs, error_code, address); + return; + } + } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* @@ -1201,6 +1220,7 @@ good_area: /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; goto retry; } } @@ -1209,3 +1229,11 @@ good_area: up_read(&mm->mmap_sem); } + +dotraplinkage void __kprobes +do_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + exception_enter(regs); + __do_page_fault(regs, error_code); + exception_exit(regs); +} diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index b91e485..937bff5 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) struct address_space *mapping = vma->vm_file->f_mapping; pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - struct prio_tree_iter iter; struct vm_area_struct *svma; unsigned long saddr; pte_t *spte = NULL; @@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return (pte_t *)pmd_alloc(mm, pud, addr); mutex_lock(&mapping->i_mmap_mutex); - vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) { + vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) { if (svma == vma) continue; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ab1f6a9..d7aea41 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -35,40 +35,44 @@ struct map_range { unsigned page_size_mask; }; -static void __init find_early_table_space(struct map_range *mr, unsigned long end, - int use_pse, int use_gbpages) +/* + * First calculate space needed for kernel direct mapping page tables to cover + * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB + * pages. Then find enough contiguous space for those page tables. + */ +static void __init find_early_table_space(struct map_range *mr, int nr_range) { - unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; phys_addr_t base; - puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - - if (use_gbpages) { - unsigned long extra; - - extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); - pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else - pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - if (use_pse) { - unsigned long extra; + if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { + extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); + pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else { + pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; + } - extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); + if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { + extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); #ifdef CONFIG_X86_32 - extra += PMD_SIZE; + extra += PMD_SIZE; #endif - /* The first 2/4M doesn't use large pages. */ - if (mr->start < PMD_SIZE) - extra += mr->end - mr->start; - - ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else - ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + } + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); #ifdef CONFIG_X86_32 @@ -86,7 +90,7 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", - end - 1, pgt_buf_start << PAGE_SHIFT, + mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, (pgt_buf_top << PAGE_SHIFT) - 1); } @@ -267,7 +271,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(&mr[0], end, use_pse, use_gbpages); + find_early_table_space(mr, nr_range); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 575d86f..11a5800 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -445,10 +445,10 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base) } #endif /* CONFIG_HIGHMEM */ -void __init native_pagetable_setup_start(pgd_t *base) +void __init native_pagetable_init(void) { unsigned long pfn, va; - pgd_t *pgd; + pgd_t *pgd, *base = swapper_pg_dir; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -475,10 +475,7 @@ void __init native_pagetable_setup_start(pgd_t *base) pte_clear(NULL, va, pte); } paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT); -} - -void __init native_pagetable_setup_done(pgd_t *base) -{ + paging_init(); } /* @@ -493,7 +490,7 @@ void __init native_pagetable_setup_done(pgd_t *base) * If we're booting paravirtualized under a hypervisor, then there are * more options: we may already be running PAE, and the pagetable may * or may not be based in swapper_pg_dir. In any case, - * paravirt_pagetable_setup_start() will set up swapper_pg_dir + * paravirt_pagetable_init() will set up swapper_pg_dir * appropriately for the rest of the initialization to work. * * In general, pagetable_init() assumes that the pagetable may already @@ -712,7 +709,7 @@ static void __init test_wp_bit(void) "Checking if this processor honours the WP bit even in supervisor mode..."); /* Any page-aligned address will do, the test is non-destructive */ - __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_READONLY); + __set_fixmap(FIX_WP_TEST, __pa(&swapper_pg_dir), PAGE_KERNEL_RO); boot_cpu_data.wp_works_ok = do_test_wp_bit(); clear_fixmap(FIX_WP_TEST); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2b6b4a3..3baff25 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -386,7 +386,8 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * these mappings are more intelligent. */ if (pte_val(*pte)) { - pages++; + if (!after_bootmem) + pages++; continue; } @@ -451,6 +452,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, * attributes. */ if (page_size_mask & (1 << PG_LEVEL_2M)) { + if (!after_bootmem) + pages++; last_map_addr = next; continue; } @@ -526,6 +529,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, * attributes. */ if (page_size_mask & (1 << PG_LEVEL_1G)) { + if (!after_bootmem) + pages++; last_map_addr = next; continue; } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 3d68ef6..0eb572e 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -664,20 +664,20 @@ static void free_pfn_range(u64 paddr, unsigned long size) } /* - * track_pfn_vma_copy is called when vma that is covering the pfnmap gets + * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). * * If the vma has a linear pfn mapping for the entire range, we get the prot * from pte and reserve the entire vma range with single reserve_pfn_range call. */ -int track_pfn_vma_copy(struct vm_area_struct *vma) +int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; - if (is_linear_pfn_mapping(vma)) { + if (vma->vm_flags & VM_PAT) { /* * reserve the whole chunk covered by vma. We need the * starting address and protection from pte. @@ -694,31 +694,59 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) } /* - * track_pfn_vma_new is called when a _new_ pfn mapping is being established - * for physical range indicated by pfn and size. - * * prot is passed in as a parameter for the new mapping. If the vma has a * linear pfn mapping for the entire range reserve the entire vma range with * single reserve_pfn_range call. */ -int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, - unsigned long pfn, unsigned long size) +int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot, + unsigned long pfn, unsigned long addr, unsigned long size) { + resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT; unsigned long flags; - resource_size_t paddr; - unsigned long vma_size = vma->vm_end - vma->vm_start; - if (is_linear_pfn_mapping(vma)) { - /* reserve the whole chunk starting from vm_pgoff */ - paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, prot, 0); + /* reserve the whole chunk starting from paddr */ + if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) { + int ret; + + ret = reserve_pfn_range(paddr, size, prot, 0); + if (!ret) + vma->vm_flags |= VM_PAT; + return ret; } if (!pat_enabled) return 0; - /* for vm_insert_pfn and friends, we set prot based on lookup */ - flags = lookup_memtype(pfn << PAGE_SHIFT); + /* + * For anything smaller than the vma size we set prot based on the + * lookup. + */ + flags = lookup_memtype(paddr); + + /* Check memtype for the remaining pages */ + while (size > PAGE_SIZE) { + size -= PAGE_SIZE; + paddr += PAGE_SIZE; + if (flags != lookup_memtype(paddr)) + return -EINVAL; + } + + *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | + flags); + + return 0; +} + +int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, + unsigned long pfn) +{ + unsigned long flags; + + if (!pat_enabled) + return 0; + + /* Set prot based on lookup */ + flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT); *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | flags); @@ -726,22 +754,31 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, } /* - * untrack_pfn_vma is called while unmapping a pfnmap for a region. + * untrack_pfn is called while unmapping a pfnmap for a region. * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case size can be zero). + * can be for the entire vma (in which case pfn, size are zero). */ -void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size) +void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size) { resource_size_t paddr; - unsigned long vma_size = vma->vm_end - vma->vm_start; + unsigned long prot; - if (is_linear_pfn_mapping(vma)) { - /* free the whole chunk starting from vm_pgoff */ - paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; - free_pfn_range(paddr, vma_size); + if (!(vma->vm_flags & VM_PAT)) return; + + /* free the chunk starting from pfn or the whole chunk */ + paddr = (resource_size_t)pfn << PAGE_SHIFT; + if (!paddr && !size) { + if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { + WARN_ON_ONCE(1); + return; + } + + size = vma->vm_end - vma->vm_start; } + free_pfn_range(paddr, size); + vma->vm_flags &= ~VM_PAT; } pgprot_t pgprot_writecombine(pgprot_t prot) diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 8acaddd..415f6c4 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -12,7 +12,7 @@ #include <linux/debugfs.h> #include <linux/kernel.h> #include <linux/module.h> -#include <linux/rbtree.h> +#include <linux/rbtree_augmented.h> #include <linux/sched.h> #include <linux/gfp.h> @@ -54,29 +54,24 @@ static u64 get_subtree_max_end(struct rb_node *node) return ret; } -/* Update 'subtree_max_end' for a node, based on node and its children */ -static void memtype_rb_augment_cb(struct rb_node *node, void *__unused) +static u64 compute_subtree_max_end(struct memtype *data) { - struct memtype *data; - u64 max_end, child_max_end; - - if (!node) - return; + u64 max_end = data->end, child_max_end; - data = container_of(node, struct memtype, rb); - max_end = data->end; - - child_max_end = get_subtree_max_end(node->rb_right); + child_max_end = get_subtree_max_end(data->rb.rb_right); if (child_max_end > max_end) max_end = child_max_end; - child_max_end = get_subtree_max_end(node->rb_left); + child_max_end = get_subtree_max_end(data->rb.rb_left); if (child_max_end > max_end) max_end = child_max_end; - data->subtree_max_end = max_end; + return max_end; } +RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb, + u64, subtree_max_end, compute_subtree_max_end) + /* Find the first (lowest start addr) overlapping range from rb tree */ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, u64 start, u64 end) @@ -179,15 +174,17 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) struct memtype *data = container_of(*node, struct memtype, rb); parent = *node; + if (data->subtree_max_end < newdata->end) + data->subtree_max_end = newdata->end; if (newdata->start <= data->start) node = &((*node)->rb_left); else if (newdata->start > data->start) node = &((*node)->rb_right); } + newdata->subtree_max_end = newdata->end; rb_link_node(&newdata->rb, parent, node); - rb_insert_color(&newdata->rb, root); - rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL); + rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb); } int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) @@ -209,16 +206,13 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) struct memtype *rbt_memtype_erase(u64 start, u64 end) { - struct rb_node *deepest; struct memtype *data; data = memtype_rb_exact_match(&memtype_rbroot, start, end); if (!data) goto out; - deepest = rb_augment_erase_begin(&data->rb); - rb_erase(&data->rb, &memtype_rbroot); - rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL); + rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb); out: return data; } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 613cd83..0777f04 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -98,6 +98,8 @@ static void flush_tlb_func(void *info) { struct flush_tlb_info *f = info; + inc_irq_stat(irq_tlb_count); + if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) return; @@ -320,7 +322,7 @@ static ssize_t tlbflush_write_file(struct file *file, if (kstrtos8(buf, 0, &shift)) return -EINVAL; - if (shift > 64) + if (shift < -1 || shift >= BITS_PER_LONG) return -EINVAL; tlb_flushall_shift = shift; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 33643a8..520d2bd 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -280,6 +280,31 @@ void bpf_jit_compile(struct sk_filter *fp) } EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */ break; + case BPF_S_ALU_MOD_X: /* A %= X; */ + seen |= SEEN_XREG; + EMIT2(0x85, 0xdb); /* test %ebx,%ebx */ + if (pc_ret0 > 0) { + /* addrs[pc_ret0 - 1] is start address of target + * (addrs[i] - 6) is the address following this jmp + * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long) + */ + EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] - + (addrs[i] - 6)); + } else { + EMIT_COND_JMP(X86_JNE, 2 + 5); + CLEAR_A(); + EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */ + } + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT2(0xf7, 0xf3); /* div %ebx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; + case BPF_S_ALU_MOD_K: /* A %= K; */ + EMIT2(0x31, 0xd2); /* xor %edx,%edx */ + EMIT1(0xb9);EMIT(K, 4); /* mov imm32,%ecx */ + EMIT2(0xf7, 0xf1); /* div %ecx */ + EMIT2(0x89, 0xd0); /* mov %edx,%eax */ + break; case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */ EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */ EMIT(K, 4); @@ -310,9 +335,18 @@ void bpf_jit_compile(struct sk_filter *fp) EMIT1_off32(0x0d, K); /* or imm32,%eax */ break; case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */ + case BPF_S_ALU_XOR_X: seen |= SEEN_XREG; EMIT2(0x31, 0xd8); /* xor %ebx,%eax */ break; + case BPF_S_ALU_XOR_K: /* A ^= K; */ + if (K == 0) + break; + if (is_imm8(K)) + EMIT3(0x83, 0xf0, K); /* xor imm8,%eax */ + else + EMIT1_off32(0x35, K); /* xor imm32,%eax */ + break; case BPF_S_ALU_LSH_X: /* A <<= X; */ seen |= SEEN_XREG; EMIT4(0x89, 0xd9, 0xd3, 0xe0); /* mov %ebx,%ecx; shl %cl,%eax */ diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 26b8a85..48768df 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -55,7 +55,7 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, val |= counter_config->extra; event &= model->event_mask ? model->event_mask : 0xFF; val |= event & 0xFF; - val |= (event & 0x0F00) << 24; + val |= (u64)(event & 0x0F00) << 24; return val; } diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 505acdd..192397c 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -305,7 +305,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->flags = flags; res->start = start; res->end = end; - res->child = NULL; if (!pci_use_crs) { dev_printk(KERN_DEBUG, &info->bridge->dev, @@ -434,7 +433,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, size = sizeof(*info->res) * info->res_num; info->res_num = 0; - info->res = kmalloc(size, GFP_KERNEL); + info->res = kzalloc(size, GFP_KERNEL); if (!info->res) return; diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 937bcec..704b9ec 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) while (i >= sizeof(struct acpi_mcfg_allocation)) { entries++; i -= sizeof(struct acpi_mcfg_allocation); - }; + } if (entries == 0) { pr_err(PREFIX "MMCONFIG has no entries\n"); return -ENODEV; diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 6f2f8ee..3e6d2a6 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -62,11 +62,6 @@ out: return irq; } -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - int __init pci_visws_init(void) { pcibios_enable_irq = &pci_visws_enable_irq; diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 73b8be0..6db1cc4 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o +obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c new file mode 100644 index 0000000..f6a0c1b --- /dev/null +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -0,0 +1,76 @@ +/* + * Copyright 2012 Intel Corporation + * Author: Josh Triplett <josh@joshtriplett.org> + * + * Based on the bgrt driver: + * Copyright 2012 Red Hat, Inc <mjg@redhat.com> + * Author: Matthew Garrett + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> +#include <linux/acpi.h> +#include <linux/efi.h> +#include <linux/efi-bgrt.h> + +struct acpi_table_bgrt *bgrt_tab; +void *bgrt_image; +size_t bgrt_image_size; + +struct bmp_header { + u16 id; + u32 size; +} __packed; + +void efi_bgrt_init(void) +{ + acpi_status status; + void __iomem *image; + bool ioremapped = false; + struct bmp_header bmp_header; + + if (acpi_disabled) + return; + + status = acpi_get_table("BGRT", 0, + (struct acpi_table_header **)&bgrt_tab); + if (ACPI_FAILURE(status)) + return; + + if (bgrt_tab->version != 1) + return; + if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) + return; + + image = efi_lookup_mapped_addr(bgrt_tab->image_address); + if (!image) { + image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); + ioremapped = true; + if (!image) + return; + } + + memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); + if (ioremapped) + iounmap(image); + bgrt_image_size = bmp_header.size; + + bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); + if (!bgrt_image) + return; + + if (ioremapped) { + image = ioremap(bgrt_tab->image_address, bmp_header.size); + if (!image) { + kfree(bgrt_image); + bgrt_image = NULL; + return; + } + } + + memcpy_fromio(bgrt_image, image, bgrt_image_size); + if (ioremapped) + iounmap(image); +} diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 92660eda..ad443914 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -31,6 +31,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/efi.h> +#include <linux/efi-bgrt.h> #include <linux/export.h> #include <linux/bootmem.h> #include <linux/memblock.h> @@ -69,11 +70,15 @@ EXPORT_SYMBOL(efi); struct efi_memory_map memmap; bool efi_64bit; -static bool efi_native; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; +static inline bool efi_is_native(void) +{ + return IS_ENABLED(CONFIG_X86_64) == efi_64bit; +} + static int __init setup_noefi(char *arg) { efi_enabled = 0; @@ -419,10 +424,21 @@ void __init efi_reserve_boot_services(void) } } -static void __init efi_free_boot_services(void) +void __init efi_unmap_memmap(void) +{ + if (memmap.map) { + early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); + memmap.map = NULL; + } +} + +void __init efi_free_boot_services(void) { void *p; + if (!efi_is_native()) + return; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; unsigned long long start = md->phys_addr; @@ -438,6 +454,8 @@ static void __init efi_free_boot_services(void) free_bootmem_late(start, size); } + + efi_unmap_memmap(); } static int __init efi_systab_init(void *phys) @@ -670,12 +688,10 @@ void __init efi_init(void) return; } efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; - efi_native = !efi_64bit; #else efi_phys.systab = (efi_system_table_t *) (boot_params.efi_info.efi_systab | ((__u64)boot_params.efi_info.efi_systab_hi<<32)); - efi_native = efi_64bit; #endif if (efi_systab_init(efi_phys.systab)) { @@ -709,7 +725,7 @@ void __init efi_init(void) * that doesn't match the kernel 32/64-bit mode. */ - if (!efi_native) + if (!efi_is_native()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else if (efi_runtime_init()) { efi_enabled = 0; @@ -721,7 +737,7 @@ void __init efi_init(void) return; } #ifdef CONFIG_X86_32 - if (efi_native) { + if (efi_is_native()) { x86_platform.get_wallclock = efi_get_time; x86_platform.set_wallclock = efi_set_rtc_mmss; } @@ -732,6 +748,11 @@ void __init efi_init(void) #endif } +void __init efi_late_init(void) +{ + efi_bgrt_init(); +} + void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; @@ -764,6 +785,44 @@ static void __init runtime_code_page_mkexec(void) } /* + * We can't ioremap data in EFI boot services RAM, because we've already mapped + * it as RAM. So, look it up in the existing EFI memory map instead. Only + * callable after efi_enter_virtual_mode and before efi_free_boot_services. + */ +void __iomem *efi_lookup_mapped_addr(u64 phys_addr) +{ + void *p; + if (WARN_ON(!memmap.map)) + return NULL; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + u64 size = md->num_pages << EFI_PAGE_SHIFT; + u64 end = md->phys_addr + size; + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + if (!md->virt_addr) + continue; + if (phys_addr >= md->phys_addr && phys_addr < end) { + phys_addr += md->virt_addr - md->phys_addr; + return (__force void __iomem *)(unsigned long)phys_addr; + } + } + return NULL; +} + +void efi_memory_uc(u64 addr, unsigned long size) +{ + unsigned long page_shift = 1UL << EFI_PAGE_SHIFT; + u64 npages; + + npages = round_up(size, page_shift) / page_shift; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); +} + +/* * This function will switch the EFI runtime services to virtual mode. * Essentially, look through the EFI memmap and map every region that * has the runtime attribute bit set in its memory descriptor and update @@ -776,7 +835,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md, *prev_md = NULL; efi_status_t status; unsigned long size; - u64 end, systab, addr, npages, end_pfn; + u64 end, systab, end_pfn; void *p, *va, *new_memmap = NULL; int count = 0; @@ -787,8 +846,10 @@ void __init efi_enter_virtual_mode(void) * non-native EFI */ - if (!efi_native) - goto out; + if (!efi_is_native()) { + efi_unmap_memmap(); + return; + } /* Merge contiguous regions of the same type and attribute */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -830,10 +891,14 @@ void __init efi_enter_virtual_mode(void) end_pfn = PFN_UP(end); if (end_pfn <= max_low_pfn_mapped || (end_pfn > (1UL << (32 - PAGE_SHIFT)) - && end_pfn <= max_pfn_mapped)) + && end_pfn <= max_pfn_mapped)) { va = __va(md->phys_addr); - else - va = efi_ioremap(md->phys_addr, size, md->type); + + if (!(md->attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)va, size); + } else + va = efi_ioremap(md->phys_addr, size, + md->type, md->attribute); md->virt_addr = (u64) (unsigned long) va; @@ -843,13 +908,6 @@ void __init efi_enter_virtual_mode(void) continue; } - if (!(md->attribute & EFI_MEMORY_WB)) { - addr = md->virt_addr; - npages = md->num_pages; - memrange_efi_to_native(&addr, &npages); - set_memory_uc(addr, npages); - } - systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; @@ -878,18 +936,12 @@ void __init efi_enter_virtual_mode(void) } /* - * Thankfully, it does seem that no runtime services other than - * SetVirtualAddressMap() will touch boot services code, so we can - * get rid of it all at this point - */ - efi_free_boot_services(); - - /* * Now that EFI is in virtual mode, update the function * pointers in the runtime service table to the new virtual addresses. * * Call EFI services through wrapper functions. */ + efi.runtime_version = efi_systab.fw_revision; efi.get_time = virt_efi_get_time; efi.set_time = virt_efi_set_time; efi.get_wakeup_time = virt_efi_get_wakeup_time; @@ -906,9 +958,6 @@ void __init efi_enter_virtual_mode(void) if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); -out: - early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); - memmap.map = NULL; kfree(new_memmap); } diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ac3aa54..95fd505 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -82,7 +82,7 @@ void __init efi_call_phys_epilog(void) } void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, - u32 type) + u32 type, u64 attribute) { unsigned long last_map_pfn; @@ -92,8 +92,11 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) { unsigned long top = last_map_pfn << PAGE_SHIFT; - efi_ioremap(top, size - (top - phys_addr), type); + efi_ioremap(top, size - (top - phys_addr), type, attribute); } + if (!(attribute & EFI_MEMORY_WB)) + efi_memory_uc((u64)(unsigned long)__va(phys_addr), size); + return (void __iomem *)__va(phys_addr); } diff --git a/arch/x86/realmode/rm/wakeup.h b/arch/x86/realmode/rm/wakeup.h index 9317e00..7dd86a4 100644 --- a/arch/x86/realmode/rm/wakeup.h +++ b/arch/x86/realmode/rm/wakeup.h @@ -36,5 +36,7 @@ extern struct wakeup_header wakeup_header; /* Wakeup behavior bits */ #define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE 0 +#define WAKEUP_BEHAVIOR_RESTORE_CR4 1 +#define WAKEUP_BEHAVIOR_RESTORE_EFER 2 #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S index 8905166..9e7e147 100644 --- a/arch/x86/realmode/rm/wakeup_asm.S +++ b/arch/x86/realmode/rm/wakeup_asm.S @@ -75,7 +75,7 @@ ENTRY(wakeup_start) lidtl wakeup_idt /* Clear the EFLAGS */ - pushl $0 + pushl $0 popfl /* Check header signature... */ @@ -93,8 +93,8 @@ ENTRY(wakeup_start) /* Restore MISC_ENABLE before entering protected mode, in case BIOS decided to clear XD_DISABLE during S3. */ - movl pmode_behavior, %eax - btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax + movl pmode_behavior, %edi + btl $WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %edi jnc 1f movl pmode_misc_en, %eax @@ -110,15 +110,15 @@ ENTRY(wakeup_start) movl pmode_cr3, %eax movl %eax, %cr3 - movl pmode_cr4, %ecx - jecxz 1f - movl %ecx, %cr4 + btl $WAKEUP_BEHAVIOR_RESTORE_CR4, %edi + jnc 1f + movl pmode_cr4, %eax + movl %eax, %cr4 1: + btl $WAKEUP_BEHAVIOR_RESTORE_EFER, %edi + jnc 1f movl pmode_efer, %eax movl pmode_efer + 4, %edx - movl %eax, %ecx - orl %edx, %ecx - jz 1f movl $MSR_EFER, %ecx wrmsr 1: diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile index 3236aeb..f325af2 100644 --- a/arch/x86/syscalls/Makefile +++ b/arch/x86/syscalls/Makefile @@ -1,7 +1,9 @@ out := $(obj)/../include/generated/asm +uapi := $(obj)/../include/generated/uapi/asm # Create output directory if not already present -_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ + $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') syscall32 := $(srctree)/$(src)/syscall_32.tbl syscall64 := $(srctree)/$(src)/syscall_64.tbl @@ -18,7 +20,7 @@ quiet_cmd_systbl = SYSTBL $@ cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@ syshdr_abi_unistd_32 := i386 -$(out)/unistd_32.h: $(syscall32) $(syshdr) +$(uapi)/unistd_32.h: $(syscall32) $(syshdr) $(call if_changed,syshdr) syshdr_abi_unistd_32_ia32 := i386 @@ -28,11 +30,11 @@ $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) syshdr_abi_unistd_x32 := common,x32 syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT -$(out)/unistd_x32.h: $(syscall64) $(syshdr) +$(uapi)/unistd_x32.h: $(syscall64) $(syshdr) $(call if_changed,syshdr) syshdr_abi_unistd_64 := common,64 -$(out)/unistd_64.h: $(syscall64) $(syshdr) +$(uapi)/unistd_64.h: $(syscall64) $(syshdr) $(call if_changed,syshdr) syshdr_abi_unistd_64_x32 := x32 @@ -45,11 +47,12 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl) $(out)/syscalls_64.h: $(syscall64) $(systbl) $(call if_changed,systbl) -syshdr-y += unistd_32.h unistd_64.h unistd_x32.h +uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h -targets += $(syshdr-y) +targets += $(uapisyshdr-y) $(syshdr-y) -all: $(addprefix $(out)/,$(targets)) +all: $(addprefix $(uapi)/,$(uapisyshdr-y)) +all: $(addprefix $(out)/,$(syshdr-y)) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 7a35a6e..a47103f 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -17,7 +17,7 @@ 8 i386 creat sys_creat 9 i386 link sys_link 10 i386 unlink sys_unlink -11 i386 execve ptregs_execve stub32_execve +11 i386 execve sys_execve stub32_execve 12 i386 chdir sys_chdir 13 i386 time sys_time compat_sys_time 14 i386 mknod sys_mknod diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 733057b..bae601f 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -28,7 +28,7 @@ posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity hostprogs-y += test_get_len insn_sanity # -I needed for generated C source and C source which in the kernel tree. -HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/ HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index aeaff8b..0761175 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -13,6 +13,8 @@ endmenu config UML_X86 def_bool y select GENERIC_FIND_FIRST_BIT + select GENERIC_KERNEL_THREAD + select GENERIC_KERNEL_EXECVE config 64BIT bool "64-bit kernel" if SUBARCH = "x86" @@ -22,9 +24,11 @@ config X86_32 def_bool !64BIT select HAVE_AOUT select ARCH_WANT_IPC_PARSE_VERSION + select MODULES_USE_ELF_REL config X86_64 def_bool 64BIT + select MODULES_USE_ELF_RELA config RWSEM_XCHGADD_ALGORITHM def_bool X86_XADD && 64BIT diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h index b6efe23..4b181b7 100644 --- a/arch/x86/um/asm/checksum.h +++ b/arch/x86/um/asm/checksum.h @@ -1,6 +1,150 @@ #ifndef __UM_CHECKSUM_H #define __UM_CHECKSUM_H +#include <linux/string.h> +#include <linux/in6.h> + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * Note: when you get a NULL pointer exception here this means someone + * passed in an incorrect kernel address to one of these functions. + * + * If you use these functions directly please don't forget the + * access_ok(). + */ + +static __inline__ +__wsum csum_partial_copy_nocheck(const void *src, void *dst, + int len, __wsum sum) +{ + memcpy(dst, src, len); + return csum_partial(dst, len, sum); +} + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +static __inline__ +__wsum csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + if (copy_from_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + + return csum_partial(dst, len, sum); +} + +/** + * csum_fold - Fold and invert a 32bit checksum. + * sum: 32bit unfolded sum + * + * Fold a 32bit running checksum to 16bit and invert it. This is usually + * the last step before putting a checksum into a packet. + * Make sure not to mix with 64bit checksums. + */ +static inline __sum16 csum_fold(__wsum sum) +{ + __asm__( + " addl %1,%0\n" + " adcl $0xffff,%0" + : "=r" (sum) + : "r" ((__force u32)sum << 16), + "0" ((__force u32)sum & 0xffff0000) + ); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/** + * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. + * @saddr: source address + * @daddr: destination address + * @len: length of packet + * @proto: ip protocol of packet + * @sum: initial sum to be added in (32bit unfolded) + * + * Returns the pseudo header checksum the input data. Result is + * 32bit unfolded. + */ +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + asm(" addl %1, %0\n" + " adcl %2, %0\n" + " adcl %3, %0\n" + " adcl $0, %0\n" + : "=r" (sum) + : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/** + * ip_fast_csum - Compute the IPv4 header checksum efficiently. + * iph: ipv4 header + * ihl: length of header / 4 + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + asm( " movl (%1), %0\n" + " subl $4, %2\n" + " jbe 2f\n" + " addl 4(%1), %0\n" + " adcl 8(%1), %0\n" + " adcl 12(%1), %0\n" + "1: adcl 16(%1), %0\n" + " lea 4(%1), %1\n" + " decl %2\n" + " jne 1b\n" + " adcl $0, %0\n" + " movl %0, %2\n" + " shrl $16, %0\n" + " addw %w2, %w0\n" + " adcl $0, %0\n" + " notl %0\n" + "2:" + /* Since the input registers which are loaded with iph and ipl + are modified, we must also specify them as outputs, or gcc + will assume they contain their original values. */ + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "memory"); + return (__force __sum16)sum; +} + #ifdef CONFIG_X86_32 # include "checksum_32.h" #else diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h index caab742..ab77b6f 100644 --- a/arch/x86/um/asm/checksum_32.h +++ b/arch/x86/um/asm/checksum_32.h @@ -5,145 +5,6 @@ #ifndef __UM_SYSDEP_CHECKSUM_H #define __UM_SYSDEP_CHECKSUM_H -#include "linux/in6.h" -#include "linux/string.h" - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * returns a 32-bit number suitable for feeding into itself - * or csum_tcpudp_magic - * - * this function must be called with even lengths, except - * for the last fragment, which may be odd - * - * it's best to have buff aligned on a 32-bit boundary - */ -__wsum csum_partial(const void *buff, int len, __wsum sum); - -/* - * Note: when you get a NULL pointer exception here this means someone - * passed in an incorrect kernel address to one of these functions. - * - * If you use these functions directly please don't forget the - * access_ok(). - */ - -static __inline__ -__wsum csum_partial_copy_nocheck(const void *src, void *dst, - int len, __wsum sum) -{ - memcpy(dst, src, len); - return csum_partial(dst, len, sum); -} - -/* - * the same as csum_partial, but copies from src while it - * checksums, and handles user-space pointer exceptions correctly, when needed. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ - -static __inline__ -__wsum csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) -{ - if (copy_from_user(dst, src, len)) { - *err_ptr = -EFAULT; - return (__force __wsum)-1; - } - - return csum_partial(dst, len, sum); -} - -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. - * - * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by - * Arnt Gulbrandsen. - */ -static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) -{ - unsigned int sum; - - __asm__ __volatile__( - "movl (%1), %0 ;\n" - "subl $4, %2 ;\n" - "jbe 2f ;\n" - "addl 4(%1), %0 ;\n" - "adcl 8(%1), %0 ;\n" - "adcl 12(%1), %0 ;\n" -"1: adcl 16(%1), %0 ;\n" - "lea 4(%1), %1 ;\n" - "decl %2 ;\n" - "jne 1b ;\n" - "adcl $0, %0 ;\n" - "movl %0, %2 ;\n" - "shrl $16, %0 ;\n" - "addw %w2, %w0 ;\n" - "adcl $0, %0 ;\n" - "notl %0 ;\n" -"2: ;\n" - /* Since the input registers which are loaded with iph and ipl - are modified, we must also specify them as outputs, or gcc - will assume they contain their original values. */ - : "=r" (sum), "=r" (iph), "=r" (ihl) - : "1" (iph), "2" (ihl) - : "memory"); - return (__force __sum16)sum; -} - -/* - * Fold a partial checksum - */ - -static inline __sum16 csum_fold(__wsum sum) -{ - __asm__( - "addl %1, %0 ;\n" - "adcl $0xffff, %0 ;\n" - : "=r" (sum) - : "r" ((__force u32)sum << 16), - "0" ((__force u32)sum & 0xffff0000) - ); - return (__force __sum16)(~(__force u32)sum >> 16); -} - -static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) -{ - __asm__( - "addl %1, %0 ;\n" - "adcl %2, %0 ;\n" - "adcl %3, %0 ;\n" - "adcl $0, %0 ;\n" - : "=r" (sum) - : "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum)); - return sum; -} - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -} - -/* - * this routine is used for miscellaneous IP-like checksums, mainly - * in icmp.c - */ - static inline __sum16 ip_compute_csum(const void *buff, int len) { return csum_fold (csum_partial(buff, len, 0)); @@ -198,4 +59,3 @@ static __inline__ __wsum csum_and_copy_to_user(const void *src, } #endif - diff --git a/arch/x86/um/asm/checksum_64.h b/arch/x86/um/asm/checksum_64.h index a5be903..7b6cd19 100644 --- a/arch/x86/um/asm/checksum_64.h +++ b/arch/x86/um/asm/checksum_64.h @@ -5,131 +5,6 @@ #ifndef __UM_SYSDEP_CHECKSUM_H #define __UM_SYSDEP_CHECKSUM_H -#include "linux/string.h" -#include "linux/in6.h" -#include "asm/uaccess.h" - -extern __wsum csum_partial(const void *buff, int len, __wsum sum); - -/* - * Note: when you get a NULL pointer exception here this means someone - * passed in an incorrect kernel address to one of these functions. - * - * If you use these functions directly please don't forget the - * access_ok(). - */ - -static __inline__ -__wsum csum_partial_copy_nocheck(const void *src, void *dst, - int len, __wsum sum) -{ - memcpy(dst, src, len); - return(csum_partial(dst, len, sum)); -} - -static __inline__ -__wsum csum_partial_copy_from_user(const void __user *src, - void *dst, int len, __wsum sum, - int *err_ptr) -{ - if (copy_from_user(dst, src, len)) { - *err_ptr = -EFAULT; - return (__force __wsum)-1; - } - return csum_partial(dst, len, sum); -} - -/** - * csum_fold - Fold and invert a 32bit checksum. - * sum: 32bit unfolded sum - * - * Fold a 32bit running checksum to 16bit and invert it. This is usually - * the last step before putting a checksum into a packet. - * Make sure not to mix with 64bit checksums. - */ -static inline __sum16 csum_fold(__wsum sum) -{ - __asm__( - " addl %1,%0\n" - " adcl $0xffff,%0" - : "=r" (sum) - : "r" ((__force u32)sum << 16), - "0" ((__force u32)sum & 0xffff0000) - ); - return (__force __sum16)(~(__force u32)sum >> 16); -} - -/** - * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum. - * @saddr: source address - * @daddr: destination address - * @len: length of packet - * @proto: ip protocol of packet - * @sum: initial sum to be added in (32bit unfolded) - * - * Returns the pseudo header checksum the input data. Result is - * 32bit unfolded. - */ -static inline __wsum -csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, - unsigned short proto, __wsum sum) -{ - asm(" addl %1, %0\n" - " adcl %2, %0\n" - " adcl %3, %0\n" - " adcl $0, %0\n" - : "=r" (sum) - : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum)); - return sum; -} - -/* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -} - -/** - * ip_fast_csum - Compute the IPv4 header checksum efficiently. - * iph: ipv4 header - * ihl: length of header / 4 - */ -static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) -{ - unsigned int sum; - - asm( " movl (%1), %0\n" - " subl $4, %2\n" - " jbe 2f\n" - " addl 4(%1), %0\n" - " adcl 8(%1), %0\n" - " adcl 12(%1), %0\n" - "1: adcl 16(%1), %0\n" - " lea 4(%1), %1\n" - " decl %2\n" - " jne 1b\n" - " adcl $0, %0\n" - " movl %0, %2\n" - " shrl $16, %0\n" - " addw %w2, %w0\n" - " adcl $0, %0\n" - " notl %0\n" - "2:" - /* Since the input registers which are loaded with iph and ipl - are modified, we must also specify them as outputs, or gcc - will assume they contain their original values. */ - : "=r" (sum), "=r" (iph), "=r" (ihl) - : "1" (iph), "2" (ihl) - : "memory"); - return (__force __sum16)sum; -} - static inline unsigned add32_with_carry(unsigned a, unsigned b) { asm("addl %2,%0\n\t" diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h index 0e07adc..0feee2f 100644 --- a/arch/x86/um/asm/elf.h +++ b/arch/x86/um/asm/elf.h @@ -6,7 +6,7 @@ #define __UM_ELF_X86_H #include <asm/user.h> -#include "skas.h" +#include <skas.h> #ifdef CONFIG_X86_32 diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h index e72cd0d..75513325 100644 --- a/arch/x86/um/asm/ptrace.h +++ b/arch/x86/um/asm/ptrace.h @@ -1,11 +1,13 @@ #ifndef __UM_X86_PTRACE_H #define __UM_X86_PTRACE_H -#ifdef CONFIG_X86_32 -# include "ptrace_32.h" -#else -# include "ptrace_64.h" +#include <linux/compiler.h> +#ifndef CONFIG_X86_32 +#define __FRAME_OFFSETS /* Needed to get the R* macros */ #endif +#include <asm/ptrace-generic.h> + +#define user_mode(r) UPT_IS_USER(&(r)->regs) #define PT_REGS_AX(r) UPT_AX(&(r)->regs) #define PT_REGS_BX(r) UPT_BX(&(r)->regs) @@ -36,4 +38,52 @@ static inline long regs_return_value(struct pt_regs *regs) { return PT_REGS_AX(regs); } + +/* + * Forward declaration to avoid including sysdep/tls.h, which causes a + * circular include, and compilation failures. + */ +struct user_desc; + +#ifdef CONFIG_X86_32 + +#define HOST_AUDIT_ARCH AUDIT_ARCH_I386 + +extern int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc); + +extern int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc); + +#else + +#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 + +#define PT_REGS_R8(r) UPT_R8(&(r)->regs) +#define PT_REGS_R9(r) UPT_R9(&(r)->regs) +#define PT_REGS_R10(r) UPT_R10(&(r)->regs) +#define PT_REGS_R11(r) UPT_R11(&(r)->regs) +#define PT_REGS_R12(r) UPT_R12(&(r)->regs) +#define PT_REGS_R13(r) UPT_R13(&(r)->regs) +#define PT_REGS_R14(r) UPT_R14(&(r)->regs) +#define PT_REGS_R15(r) UPT_R15(&(r)->regs) + +#include <asm/errno.h> + +static inline int ptrace_get_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + return -ENOSYS; +} + +static inline int ptrace_set_thread_area(struct task_struct *child, int idx, + struct user_desc __user *user_desc) +{ + return -ENOSYS; +} + +extern long arch_prctl(struct task_struct *task, int code, + unsigned long __user *addr); + +#endif #endif /* __UM_X86_PTRACE_H */ diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h deleted file mode 100644 index 2cf2253..0000000 --- a/arch/x86/um/asm/ptrace_32.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#ifndef __UM_PTRACE_I386_H -#define __UM_PTRACE_I386_H - -#define HOST_AUDIT_ARCH AUDIT_ARCH_I386 - -#include "linux/compiler.h" -#include "asm/ptrace-generic.h" - -#define user_mode(r) UPT_IS_USER(&(r)->regs) - -/* - * Forward declaration to avoid including sysdep/tls.h, which causes a - * circular include, and compilation failures. - */ -struct user_desc; - -extern int ptrace_get_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc); - -extern int ptrace_set_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc); - -#endif diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h deleted file mode 100644 index ea7bff3..0000000 --- a/arch/x86/um/asm/ptrace_64.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2003 PathScale, Inc. - * - * Licensed under the GPL - */ - -#ifndef __UM_PTRACE_X86_64_H -#define __UM_PTRACE_X86_64_H - -#include "linux/compiler.h" -#include "asm/errno.h" - -#define __FRAME_OFFSETS /* Needed to get the R* macros */ -#include "asm/ptrace-generic.h" - -#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64 - -#define PT_REGS_R8(r) UPT_R8(&(r)->regs) -#define PT_REGS_R9(r) UPT_R9(&(r)->regs) -#define PT_REGS_R10(r) UPT_R10(&(r)->regs) -#define PT_REGS_R11(r) UPT_R11(&(r)->regs) -#define PT_REGS_R12(r) UPT_R12(&(r)->regs) -#define PT_REGS_R13(r) UPT_R13(&(r)->regs) -#define PT_REGS_R14(r) UPT_R14(&(r)->regs) -#define PT_REGS_R15(r) UPT_R15(&(r)->regs) - -/* XXX */ -#define user_mode(r) UPT_IS_USER(&(r)->regs) - -struct user_desc; - -static inline int ptrace_get_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc) -{ - return -ENOSYS; -} - -static inline int ptrace_set_thread_area(struct task_struct *child, int idx, - struct user_desc __user *user_desc) -{ - return -ENOSYS; -} - -extern long arch_prctl(struct task_struct *task, int code, - unsigned long __user *addr); -#endif diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c index 17d88cf..33daff4 100644 --- a/arch/x86/um/bugs_32.c +++ b/arch/x86/um/bugs_32.c @@ -4,9 +4,9 @@ */ #include <signal.h> -#include "kern_util.h" -#include "longjmp.h" -#include "sysdep/ptrace.h" +#include <kern_util.h> +#include <longjmp.h> +#include <sysdep/ptrace.h> #include <generated/asm-offsets.h> /* Set during early boot */ diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c index 44e02ba..8cc8256 100644 --- a/arch/x86/um/bugs_64.c +++ b/arch/x86/um/bugs_64.c @@ -4,7 +4,7 @@ * Licensed under the GPL */ -#include "sysdep/ptrace.h" +#include <sysdep/ptrace.h> void arch_check_bugs(void) { diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c index d670f68..8784ab3 100644 --- a/arch/x86/um/fault.c +++ b/arch/x86/um/fault.c @@ -3,7 +3,7 @@ * Licensed under the GPL */ -#include "sysdep/ptrace.h" +#include <sysdep/ptrace.h> /* These two are from asm-um/uaccess.h and linux/module.h, check them. */ struct exception_table_entry diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c index 26b0e39..8e08176 100644 --- a/arch/x86/um/ldt.c +++ b/arch/x86/um/ldt.c @@ -7,11 +7,11 @@ #include <linux/sched.h> #include <linux/slab.h> #include <asm/unistd.h> -#include "os.h" -#include "proc_mm.h" -#include "skas.h" -#include "skas_ptrace.h" -#include "sysdep/tls.h" +#include <os.h> +#include <proc_mm.h> +#include <skas.h> +#include <skas_ptrace.h> +#include <sysdep/tls.h> extern int modify_ldt(int func, void *ptr, unsigned long bytecount); diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c index 5465187..c6492e7 100644 --- a/arch/x86/um/mem_64.c +++ b/arch/x86/um/mem_64.c @@ -1,6 +1,6 @@ -#include "linux/mm.h" -#include "asm/page.h" -#include "asm/mman.h" +#include <linux/mm.h> +#include <asm/page.h> +#include <asm/mman.h> const char *arch_vma_name(struct vm_area_struct *vma) { diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index 0cdbb86..41bfe84 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -9,8 +9,8 @@ #ifdef __i386__ #include <sys/user.h> #endif -#include "longjmp.h" -#include "sysdep/ptrace_user.h" +#include <longjmp.h> +#include <sysdep/ptrace_user.h> int save_fp_registers(int pid, unsigned long *fp_regs) { diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c index efb16c5..8502ad3 100644 --- a/arch/x86/um/os-Linux/task_size.c +++ b/arch/x86/um/os-Linux/task_size.c @@ -2,7 +2,7 @@ #include <stdlib.h> #include <signal.h> #include <sys/mman.h> -#include "longjmp.h" +#include <longjmp.h> #ifdef __i386__ diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c index 82276b6..9d94b3b 100644 --- a/arch/x86/um/os-Linux/tls.c +++ b/arch/x86/um/os-Linux/tls.c @@ -5,7 +5,7 @@ #include <sys/syscall.h> #include <unistd.h> -#include "sysdep/tls.h" +#include <sysdep/tls.h> #ifndef PTRACE_GET_THREAD_AREA #define PTRACE_GET_THREAD_AREA 25 diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 3b949daa..ce3dd4f 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -3,10 +3,10 @@ * Licensed under the GPL */ -#include "linux/mm.h" -#include "linux/sched.h" -#include "asm/uaccess.h" -#include "skas.h" +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <skas.h> extern int arch_switch_tls(struct task_struct *to); diff --git a/arch/x86/um/ptrace_user.c b/arch/x86/um/ptrace_user.c index 3960ca1..617885b 100644 --- a/arch/x86/um/ptrace_user.c +++ b/arch/x86/um/ptrace_user.c @@ -4,7 +4,7 @@ */ #include <errno.h> -#include "ptrace_user.h" +#include <ptrace_user.h> int ptrace_getregs(long pid, unsigned long *regs_out) { diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h index 6ce2d76..eb93569 100644 --- a/arch/x86/um/shared/sysdep/ptrace.h +++ b/arch/x86/um/shared/sysdep/ptrace.h @@ -2,7 +2,7 @@ #define __SYSDEP_X86_PTRACE_H #include <generated/user_constants.h> -#include "sysdep/faultinfo.h" +#include <sysdep/faultinfo.h> #define MAX_REG_OFFSET (UM_FRAME_SIZE) #define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long)) diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h index bd161e3..3f55e5b 100644 --- a/arch/x86/um/shared/sysdep/stub.h +++ b/arch/x86/um/shared/sysdep/stub.h @@ -1,8 +1,8 @@ #include <asm/unistd.h> #include <sys/mman.h> #include <signal.h> -#include "as-layout.h" -#include "stub-data.h" +#include <as-layout.h> +#include <stub-data.h> #ifdef __i386__ #include "stub_32.h" diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h index 05cb796..8436079 100644 --- a/arch/x86/um/shared/sysdep/syscalls_32.h +++ b/arch/x86/um/shared/sysdep/syscalls_32.h @@ -3,8 +3,8 @@ * Licensed under the GPL */ -#include "asm/unistd.h" -#include "sysdep/ptrace.h" +#include <asm/unistd.h> +#include <sysdep/ptrace.h> typedef long syscall_handler_t(struct pt_regs); diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index ba7363e..bdaa08c 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -11,8 +11,8 @@ #include <asm/unistd.h> #include <asm/uaccess.h> #include <asm/ucontext.h> -#include "frame_kern.h" -#include "skas.h" +#include <frame_kern.h> +#include <skas.h> #ifdef CONFIG_X86_32 diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S index 54a36ec..b972649 100644 --- a/arch/x86/um/stub_32.S +++ b/arch/x86/um/stub_32.S @@ -1,4 +1,4 @@ -#include "as-layout.h" +#include <as-layout.h> .globl syscall_stub .section .__syscall_stub, "ax" diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S index 20e4a96..7160b20 100644 --- a/arch/x86/um/stub_64.S +++ b/arch/x86/um/stub_64.S @@ -1,4 +1,4 @@ -#include "as-layout.h" +#include <as-layout.h> .globl syscall_stub .section .__syscall_stub, "ax" diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index b7450bd..1518d28 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -3,9 +3,9 @@ * Licensed under the GPL */ -#include "sysdep/stub.h" -#include "sysdep/faultinfo.h" -#include "sysdep/mcontext.h" +#include <sysdep/stub.h> +#include <sysdep/faultinfo.h> +#include <sysdep/mcontext.h> void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index b5408ce..232e605 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -25,7 +25,6 @@ #define old_mmap sys_old_mmap #define ptregs_fork sys_fork -#define ptregs_execve sys_execve #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old #define ptregs_clone i386_clone diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c index 2d5cc51..c9bee5b 100644 --- a/arch/x86/um/sysrq_32.c +++ b/arch/x86/um/sysrq_32.c @@ -3,12 +3,12 @@ * Licensed under the GPL */ -#include "linux/kernel.h" -#include "linux/smp.h" -#include "linux/sched.h" -#include "linux/kallsyms.h" -#include "asm/ptrace.h" -#include "sysrq.h" +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/sched.h> +#include <linux/kallsyms.h> +#include <asm/ptrace.h> +#include <asm/sysrq.h> /* This is declared by <linux/sched.h> */ void show_regs(struct pt_regs *regs) diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c index 08258f1..a0e7fb1 100644 --- a/arch/x86/um/sysrq_64.c +++ b/arch/x86/um/sysrq_64.c @@ -10,7 +10,7 @@ #include <linux/utsname.h> #include <asm/current.h> #include <asm/ptrace.h> -#include "sysrq.h" +#include <asm/sysrq.h> void __show_regs(struct pt_regs *regs) { diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c index baba84f..5f5feff 100644 --- a/arch/x86/um/tls_32.c +++ b/arch/x86/um/tls_32.c @@ -3,12 +3,12 @@ * Licensed under the GPL */ -#include "linux/percpu.h" -#include "linux/sched.h" -#include "asm/uaccess.h" -#include "os.h" -#include "skas.h" -#include "sysdep/tls.h" +#include <linux/percpu.h> +#include <linux/sched.h> +#include <asm/uaccess.h> +#include <os.h> +#include <skas.h> +#include <sysdep/tls.h> /* * If needed we can detect when it's uninitialized. diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c index f7ba462..d22363c 100644 --- a/arch/x86/um/tls_64.c +++ b/arch/x86/um/tls_64.c @@ -1,4 +1,4 @@ -#include "linux/sched.h" +#include <linux/sched.h> void clear_flushed_tls(struct task_struct *task) { diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 885eff4..4df6c37 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -80,7 +80,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) } -notrace static inline long vgetns(void) +notrace static inline u64 vgetsns(void) { long v; cycles_t cycles; @@ -91,21 +91,24 @@ notrace static inline long vgetns(void) else return 0; v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; - return (v * gtod->clock.mult) >> gtod->clock.shift; + return v * gtod->clock.mult; } /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ notrace static int __always_inline do_realtime(struct timespec *ts) { - unsigned long seq, ns; + unsigned long seq; + u64 ns; int mode; + ts->tv_nsec = 0; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->wall_time_sec; - ts->tv_nsec = gtod->wall_time_nsec; - ns = vgetns(); + ns = gtod->wall_time_snsec; + ns += vgetsns(); + ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); timespec_add_ns(ts, ns); @@ -114,15 +117,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts) notrace static int do_monotonic(struct timespec *ts) { - unsigned long seq, ns; + unsigned long seq; + u64 ns; int mode; + ts->tv_nsec = 0; do { seq = read_seqcount_begin(>od->seq); mode = gtod->clock.vclock_mode; ts->tv_sec = gtod->monotonic_time_sec; - ts->tv_nsec = gtod->monotonic_time_nsec; - ns = vgetns(); + ns = gtod->monotonic_time_snsec; + ns += vgetsns(); + ns >>= gtod->clock.shift; } while (unlikely(read_seqcount_retry(>od->seq, seq))); timespec_add_ns(ts, ns); diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index ec57bd3..7005ced 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -6,8 +6,9 @@ #include <xen/xen.h> #include <xen/interface/physdev.h> +#include "xen-ops.h" -unsigned int xen_io_apic_read(unsigned apic, unsigned reg) +static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) { struct physdev_apic apic_op; int ret; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 1fbe75a..586d838 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,7 @@ #include <linux/memblock.h> #include <xen/xen.h> +#include <xen/events.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> #include <xen/interface/physdev.h> @@ -984,7 +985,16 @@ static void xen_write_cr4(unsigned long cr4) native_write_cr4(cr4); } - +#ifdef CONFIG_X86_64 +static inline unsigned long xen_read_cr8(void) +{ + return 0; +} +static inline void xen_write_cr8(unsigned long val) +{ + BUG_ON(val); +} +#endif static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; @@ -1153,6 +1163,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_cr4_safe = native_read_cr4_safe, .write_cr4 = xen_write_cr4, +#ifdef CONFIG_X86_64 + .read_cr8 = xen_read_cr8, + .write_cr8 = xen_write_cr8, +#endif + .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, @@ -1161,6 +1176,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, + .read_tscp = native_read_tscp, + .iret = xen_iret, .irq_enable_sysexit = xen_sysexit, #ifdef CONFIG_X86_64 @@ -1288,7 +1305,6 @@ asmlinkage void __init xen_start_kernel(void) { struct physdev_set_iopl set_iopl; int rc; - pgd_t *pgd; if (!xen_start_info) return; @@ -1380,8 +1396,6 @@ asmlinkage void __init xen_start_kernel(void) acpi_numa = -1; #endif - pgd = (pgd_t *)xen_start_info->pt_base; - /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; @@ -1390,7 +1404,7 @@ asmlinkage void __init xen_start_kernel(void) early_boot_irqs_disabled = true; xen_raw_console_write("mapping kernel into physical memory\n"); - pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); + xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages); /* Allocate and initialize top and mid mfn levels for p2m structure */ xen_build_mfn_list_list(); @@ -1441,11 +1455,19 @@ asmlinkage void __init xen_start_kernel(void) const struct dom0_vga_console_info *info = (void *)((char *)xen_start_info + xen_start_info->console.dom0.info_off); + struct xen_platform_op op = { + .cmd = XENPF_firmware_info, + .interface_version = XENPF_INTERFACE_VERSION, + .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS, + }; xen_init_vga(info, xen_start_info->console.dom0.info_size); xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; + if (HYPERVISOR_dom0_op(&op) == 0) + boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; + xen_init_apic(); /* Make sure ACS will be enabled */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 1573376..01a4dc0 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -5,6 +5,7 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> #include <xen/interface/vcpu.h> +#include <xen/events.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5141d80..6226c99 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -47,6 +47,7 @@ #include <linux/gfp.h> #include <linux/memblock.h> #include <linux/seq_file.h> +#include <linux/crash_dump.h> #include <trace/events/xen.h> @@ -84,6 +85,7 @@ */ DEFINE_SPINLOCK(xen_reservation_lock); +#ifdef CONFIG_X86_32 /* * Identity map, in addition to plain kernel map. This needs to be * large enough to allocate page table pages to allocate the rest. @@ -91,7 +93,7 @@ DEFINE_SPINLOCK(xen_reservation_lock); */ #define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4) static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES); - +#endif #ifdef CONFIG_X86_64 /* l3 pud for userspace vsyscall mapping */ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; @@ -1174,9 +1176,7 @@ static void xen_exit_mmap(struct mm_struct *mm) spin_unlock(&mm->page_table_lock); } -static void __init xen_pagetable_setup_start(pgd_t *base) -{ -} +static void xen_post_allocator_init(void); static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) { @@ -1192,14 +1192,87 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) } } -static void xen_post_allocator_init(void); +#ifdef CONFIG_X86_64 +static void __init xen_cleanhighmap(unsigned long vaddr, + unsigned long vaddr_end) +{ + unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; + pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr); -static void __init xen_pagetable_setup_done(pgd_t *base) + /* NOTE: The loop is more greedy than the cleanup_highmap variant. + * We include the PMD passed in on _both_ boundaries. */ + for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE)); + pmd++, vaddr += PMD_SIZE) { + if (pmd_none(*pmd)) + continue; + if (vaddr < (unsigned long) _text || vaddr > kernel_end) + set_pmd(pmd, __pmd(0)); + } + /* In case we did something silly, we should crash in this function + * instead of somewhere later and be confusing. */ + xen_mc_flush(); +} +#endif +static void __init xen_pagetable_init(void) { +#ifdef CONFIG_X86_64 + unsigned long size; + unsigned long addr; +#endif + paging_init(); xen_setup_shared_info(); +#ifdef CONFIG_X86_64 + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + unsigned long new_mfn_list; + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + + /* On 32-bit, we get zero so this never gets executed. */ + new_mfn_list = xen_revector_p2m_tree(); + if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { + /* using __ka address and sticking INVALID_P2M_ENTRY! */ + memset((void *)xen_start_info->mfn_list, 0xff, size); + + /* We should be in __ka space. */ + BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); + addr = xen_start_info->mfn_list; + /* We roundup to the PMD, which means that if anybody at this stage is + * using the __ka address of xen_start_info or xen_start_info->shared_info + * they are in going to crash. Fortunatly we have already revectored + * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ + size = roundup(size, PMD_SIZE); + xen_cleanhighmap(addr, addr + size); + + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + memblock_free(__pa(xen_start_info->mfn_list), size); + /* And revector! Bye bye old array */ + xen_start_info->mfn_list = new_mfn_list; + } else + goto skip; + } + /* At this stage, cleanup_highmap has already cleaned __ka space + * from _brk_limit way up to the max_pfn_mapped (which is the end of + * the ramdisk). We continue on, erasing PMD entries that point to page + * tables - do note that they are accessible at this stage via __va. + * For good measure we also round up to the PMD - which means that if + * anybody is using __ka address to the initial boot-stack - and try + * to use it - they are going to crash. The xen_start_info has been + * taken care of already in xen_setup_kernel_pagetable. */ + addr = xen_start_info->pt_base; + size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); + + xen_cleanhighmap(addr, addr + size); + xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); +#ifdef DEBUG + /* This is superflous and is not neccessary, but you know what + * lets do it. The MODULES_VADDR -> MODULES_END should be clear of + * anything at this stage. */ + xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); +#endif +skip: +#endif xen_post_allocator_init(); } - static void xen_write_cr2(unsigned long cr2) { this_cpu_read(xen_vcpu)->arch.cr2 = cr2; @@ -1655,7 +1728,7 @@ static void set_page_prot(void *addr, pgprot_t prot) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) BUG(); } - +#ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { unsigned pmdidx, pteidx; @@ -1706,7 +1779,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) set_page_prot(pmd, PAGE_KERNEL_RO); } - +#endif void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; @@ -1734,7 +1807,20 @@ static void convert_pfn_mfn(void *v) for (i = 0; i < PTRS_PER_PTE; i++) pte[i] = xen_make_pte(pte[i].pte); } - +static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, + unsigned long addr) +{ + if (*pt_base == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_base)++; + } + if (*pt_end == PFN_DOWN(__pa(addr))) { + set_page_prot((void *)addr, PAGE_KERNEL); + clear_page((void *)addr); + (*pt_end)--; + } +} /* * Set up the initial kernel pagetable. * @@ -1746,11 +1832,13 @@ static void convert_pfn_mfn(void *v) * of the physical mapping once some sort of allocator has been set * up. */ -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pud_t *l3; pmd_t *l2; + unsigned long addr[3]; + unsigned long pt_base, pt_end; + unsigned i; /* max_pfn_mapped is the last pfn mapped in the initial memory * mappings. Considering that on Xen after the kernel mappings we @@ -1758,32 +1846,53 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, * set max_pfn_mapped to the last real pfn mapped. */ max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); + pt_base = PFN_DOWN(__pa(xen_start_info->pt_base)); + pt_end = pt_base + xen_start_info->nr_pt_frames; + /* Zap identity mapping */ init_level4_pgt[0] = __pgd(0); /* Pre-constructed entries are in pfn, so convert to mfn */ + /* L4[272] -> level3_ident_pgt + * L4[511] -> level3_kernel_pgt */ convert_pfn_mfn(init_level4_pgt); + + /* L3_i[0] -> level2_ident_pgt */ convert_pfn_mfn(level3_ident_pgt); + /* L3_k[510] -> level2_kernel_pgt + * L3_i[511] -> level2_fixmap_pgt */ convert_pfn_mfn(level3_kernel_pgt); + /* We get [511][511] and have Xen's version of level2_kernel_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); - memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - + addr[0] = (unsigned long)pgd; + addr[1] = (unsigned long)l3; + addr[2] = (unsigned long)l2; + /* Graft it onto L4[272][0]. Note that we creating an aliasing problem: + * Both L4[272][0] and L4[511][511] have entries that point to the same + * L2 (PMD) tables. Meaning that if you modify it in __va space + * it will be also modified in the __ka space! (But if you just + * modify the PMD table to point to other PTE's or none, then you + * are OK - which is what cleanup_highmap does) */ + copy_page(level2_ident_pgt, l2); + /* Graft it onto L4[511][511] */ + copy_page(level2_kernel_pgt, l2); + + /* Get [511][510] and graft that in level2_fixmap_pgt */ l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); - memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); - - /* Set up identity map */ - xen_map_identity_early(level2_ident_pgt, max_pfn); + copy_page(level2_fixmap_pgt, l2); + /* Note that we don't do anything with level1_fixmap_pgt which + * we don't need. */ /* Make pagetable pieces RO */ set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); + set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); @@ -1794,22 +1903,28 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, /* Unpin Xen-provided one */ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); - /* Switch over */ - pgd = init_level4_pgt; - /* * At this stage there can be no user pgd, and no page * structure to attach it to, so make sure we just set kernel * pgd. */ xen_mc_batch(); - __xen_write_cr3(true, __pa(pgd)); + __xen_write_cr3(true, __pa(init_level4_pgt)); xen_mc_issue(PARAVIRT_LAZY_CPU); - memblock_reserve(__pa(xen_start_info->pt_base), - xen_start_info->nr_pt_frames * PAGE_SIZE); + /* We can't that easily rip out L3 and L2, as the Xen pagetables are + * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for + * the initial domain. For guests using the toolstack, they are in: + * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only + * rip out the [L4] (pgd), but for guests we shave off three pages. + */ + for (i = 0; i < ARRAY_SIZE(addr); i++) + check_pt_base(&pt_base, &pt_end, addr[i]); - return pgd; + /* Our (by three pages) smaller Xen pagetable that we are using */ + memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE); + /* Revector the xen_start_info */ + xen_start_info = (struct start_info *)__va(__pa(xen_start_info)); } #else /* !CONFIG_X86_64 */ static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD); @@ -1834,8 +1949,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) */ swapper_kernel_pmd = extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); - memcpy(swapper_kernel_pmd, initial_kernel_pmd, - sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(swapper_kernel_pmd, initial_kernel_pmd); swapper_pg_dir[KERNEL_PGD_BOUNDARY] = __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT); set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO); @@ -1852,8 +1966,7 @@ static void __init xen_write_cr3_init(unsigned long cr3) pv_mmu_ops.write_cr3 = &xen_write_cr3; } -pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, - unsigned long max_pfn) +void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { pmd_t *kernel_pmd; @@ -1865,11 +1978,11 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, 512*1024); kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); - memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); + copy_page(initial_kernel_pmd, kernel_pmd); xen_map_identity_early(initial_kernel_pmd, max_pfn); - memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD); + copy_page(initial_page_table, pgd); initial_page_table[KERNEL_PGD_BOUNDARY] = __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT); @@ -1885,8 +1998,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, memblock_reserve(__pa(xen_start_info->pt_base), xen_start_info->nr_pt_frames * PAGE_SIZE); - - return initial_page_table; } #endif /* CONFIG_X86_64 */ @@ -2068,8 +2179,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; - x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start; - x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; + x86_init.paging.pagetable_init = xen_pagetable_init; pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); @@ -2272,6 +2382,43 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order) EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); #ifdef CONFIG_XEN_PVHVM +#ifdef CONFIG_PROC_VMCORE +/* + * This function is used in two contexts: + * - the kdump kernel has to check whether a pfn of the crashed kernel + * was a ballooned page. vmcore is using this function to decide + * whether to access a pfn of the crashed kernel. + * - the kexec kernel has to check whether a pfn was ballooned by the + * previous kernel. If the pfn is ballooned, handle it properly. + * Returns 0 if the pfn is not backed by a RAM page, the caller may + * handle the pfn special in this case. + */ +static int xen_oldmem_pfn_is_ram(unsigned long pfn) +{ + struct xen_hvm_get_mem_type a = { + .domid = DOMID_SELF, + .pfn = pfn, + }; + int ram; + + if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) + return -ENXIO; + + switch (a.mem_type) { + case HVMMEM_mmio_dm: + ram = 0; + break; + case HVMMEM_ram_rw: + case HVMMEM_ram_ro: + default: + ram = 1; + break; + } + + return ram; +} +#endif + static void xen_hvm_exit_mmap(struct mm_struct *mm) { struct xen_hvm_pagetable_dying a; @@ -2302,6 +2449,9 @@ void __init xen_hvm_init_mmu_ops(void) { if (is_pagetable_dying_supported()) pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap; +#ifdef CONFIG_PROC_VMCORE + register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram); +#endif } #endif @@ -2337,10 +2487,12 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long range; int err = 0; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -EINVAL; + prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP); - BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) == - (VM_PFNMAP | VM_RESERVED | VM_IO))); + BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); rmd.mfn = mfn; rmd.prot = prot; @@ -2355,8 +2507,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = -EFAULT; - if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0) + err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); + if (err < 0) goto out; nr -= batch; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 72213da..95fb2aa 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -22,7 +22,7 @@ * * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to - * 512 and 1024 entries respectively. + * 512 and 1024 entries respectively. * * In short, these structures contain the Machine Frame Number (MFN) of the PFN. * @@ -139,11 +139,11 @@ * / | ~0, ~0, .... | * | \---------------/ * | - * p2m_missing p2m_missing - * /------------------\ /------------\ - * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | - * | [p2m_mid_missing]+---->| ..., ~0 | - * \------------------/ \------------/ + * p2m_mid_missing p2m_missing + * /-----------------\ /------------\ + * | [p2m_missing] +---->| ~0, ~0, ~0 | + * | [p2m_missing] +---->| ..., ~0 | + * \-----------------/ \------------/ * * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ @@ -396,7 +396,85 @@ void __init xen_build_dynamic_phys_to_machine(void) m2p_override_init(); } +#ifdef CONFIG_X86_64 +#include <linux/bootmem.h> +unsigned long __init xen_revector_p2m_tree(void) +{ + unsigned long va_start; + unsigned long va_end; + unsigned long pfn; + unsigned long pfn_free = 0; + unsigned long *mfn_list = NULL; + unsigned long size; + + va_start = xen_start_info->mfn_list; + /*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long), + * so make sure it is rounded up to that */ + size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); + va_end = va_start + size; + + /* If we were revectored already, don't do it again. */ + if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET) + return 0; + + mfn_list = alloc_bootmem_align(size, PAGE_SIZE); + if (!mfn_list) { + pr_warn("Could not allocate space for a new P2M tree!\n"); + return xen_start_info->mfn_list; + } + /* Fill it out with INVALID_P2M_ENTRY value */ + memset(mfn_list, 0xFF, size); + + for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx; + unsigned long *mid_p; + + if (!p2m_top[topidx]) + continue; + + if (p2m_top[topidx] == p2m_mid_missing) + continue; + + mididx = p2m_mid_index(pfn); + mid_p = p2m_top[topidx][mididx]; + if (!mid_p) + continue; + if ((mid_p == p2m_missing) || (mid_p == p2m_identity)) + continue; + + if ((unsigned long)mid_p == INVALID_P2M_ENTRY) + continue; + + /* The old va. Rebase it on mfn_list */ + if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) { + unsigned long *new; + + if (pfn_free > (size / sizeof(unsigned long))) { + WARN(1, "Only allocated for %ld pages, but we want %ld!\n", + size / sizeof(unsigned long), pfn_free); + return 0; + } + new = &mfn_list[pfn_free]; + + copy_page(new, mid_p); + p2m_top[topidx][mididx] = &mfn_list[pfn_free]; + p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]); + + pfn_free += P2M_PER_PAGE; + } + /* This should be the leafs allocated for identity from _brk. */ + } + return (unsigned long)mfn_list; + +} +#else +unsigned long __init xen_revector_p2m_tree(void) +{ + return 0; +} +#endif unsigned long get_phys_to_machine(unsigned long pfn) { unsigned topidx, mididx, idx; @@ -430,7 +508,7 @@ static void free_p2m_page(void *p) free_page((unsigned long)p); } -/* +/* * Fully allocate the p2m structure for a given pfn. We need to check * that both the top and mid levels are allocated, and make sure the * parallel mfn tree is kept in sync. We may race with other cpus, so diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 967633a..9695704 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -8,6 +8,14 @@ #include <xen/xen.h> #include <asm/iommu_table.h> + +#include <asm/xen/swiotlb-xen.h> +#ifdef CONFIG_X86_64 +#include <asm/iommu.h> +#include <asm/dma.h> +#endif +#include <linux/export.h> + int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { @@ -34,34 +42,64 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { int __init pci_xen_swiotlb_detect(void) { + if (!xen_pv_domain()) + return 0; + /* If running as PV guest, either iommu=soft, or swiotlb=force will * activate this IOMMU. If running as PV privileged, activate it * irregardless. */ - if ((xen_initial_domain() || swiotlb || swiotlb_force) && - (xen_pv_domain())) + if ((xen_initial_domain() || swiotlb || swiotlb_force)) xen_swiotlb = 1; /* If we are running under Xen, we MUST disable the native SWIOTLB. * Don't worry about swiotlb_force flag activating the native, as * the 'swiotlb' flag is the only one turning it on. */ - if (xen_pv_domain()) - swiotlb = 0; + swiotlb = 0; +#ifdef CONFIG_X86_64 + /* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0 + * (so no iommu=X command line over-writes). + * Considering that PV guests do not want the *native SWIOTLB* but + * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here. + */ + if (max_pfn > MAX_DMA32_PFN) + no_iommu = 1; +#endif return xen_swiotlb; } void __init pci_xen_swiotlb_init(void) { if (xen_swiotlb) { - xen_swiotlb_init(1); + xen_swiotlb_init(1, true /* early */); dma_ops = &xen_swiotlb_dma_ops; /* Make sure ACS will be enabled */ pci_request_acs(); } } + +int pci_xen_swiotlb_init_late(void) +{ + int rc; + + if (xen_swiotlb) + return 0; + + rc = xen_swiotlb_init(1, false /* late */); + if (rc) + return rc; + + dma_ops = &xen_swiotlb_dma_ops; + /* Make sure ACS will be enabled */ + pci_request_acs(); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late); + IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, - 0, + NULL, pci_xen_swiotlb_init, - 0); + NULL); diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index ffcf261..0a78524 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -24,6 +24,7 @@ #include <linux/module.h> #include <xen/platform_pci.h> +#include "xen-ops.h" #define XEN_PLATFORM_ERR_MAGIC -1 #define XEN_PLATFORM_ERR_PROTOCOL -2 diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e2d62d6..8971a26 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -432,6 +432,24 @@ char * __init xen_memory_setup(void) * - mfn_list * - xen_start_info * See comment above "struct start_info" in <xen/interface/xen.h> + * We tried to make the the memblock_reserve more selective so + * that it would be clear what region is reserved. Sadly we ran + * in the problem wherein on a 64-bit hypervisor with a 32-bit + * initial domain, the pt_base has the cr3 value which is not + * neccessarily where the pagetable starts! As Jan put it: " + * Actually, the adjustment turns out to be correct: The page + * tables for a 32-on-64 dom0 get allocated in the order "first L1", + * "first L2", "first L3", so the offset to the page table base is + * indeed 2. When reading xen/include/public/xen.h's comment + * very strictly, this is not a violation (since there nothing is said + * that the first thing in the page table space is pointed to by + * pt_base; I admit that this seems to be implied though, namely + * do I think that it is implied that the page table space is the + * range [pt_base, pt_base + nt_pt_frames), whereas that + * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames), + * which - without a priori knowledge - the kernel would have + * difficulty to figure out)." - so lets just fall back to the + * easy way and reserve the whole region. */ memblock_reserve(__pa(xen_start_info->mfn_list), xen_start_info->pt_base - xen_start_info->mfn_list); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index f58dca7..353c50f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle) return rc; if (num_online_cpus() == 1) - alternatives_smp_switch(1); + /* Just in case we booted with a single CPU. */ + alternatives_enable_smp(); rc = xen_smp_intr_init(cpu); if (rc) @@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu) unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); - - if (num_online_cpus() == 1) - alternatives_smp_switch(0); } static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */ diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 1cd7f4d..6722e37 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) info->u.text_mode_3.font_height; break; + case XEN_VGATYPE_EFI_LFB: case XEN_VGATYPE_VESA_LFB: if (size < offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps)) @@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) screen_info->blue_pos = info->u.vesa_lfb.blue_pos; screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + + if (info->video_type == XEN_VGATYPE_EFI_LFB) { + screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; + break; + } + if (size >= offsetof(struct dom0_vga_console_info, u.vesa_lfb.gbl_caps) + sizeof(info->u.vesa_lfb.gbl_caps)) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index aaa7291..7faed58 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -28,9 +28,61 @@ ENTRY(startup_xen) __FINIT .pushsection .text - .align PAGE_SIZE + .balign PAGE_SIZE ENTRY(hypercall_page) - .skip PAGE_SIZE +#define NEXT_HYPERCALL(x) \ + ENTRY(xen_hypercall_##x) \ + .skip 32 + +NEXT_HYPERCALL(set_trap_table) +NEXT_HYPERCALL(mmu_update) +NEXT_HYPERCALL(set_gdt) +NEXT_HYPERCALL(stack_switch) +NEXT_HYPERCALL(set_callbacks) +NEXT_HYPERCALL(fpu_taskswitch) +NEXT_HYPERCALL(sched_op_compat) +NEXT_HYPERCALL(platform_op) +NEXT_HYPERCALL(set_debugreg) +NEXT_HYPERCALL(get_debugreg) +NEXT_HYPERCALL(update_descriptor) +NEXT_HYPERCALL(ni) +NEXT_HYPERCALL(memory_op) +NEXT_HYPERCALL(multicall) +NEXT_HYPERCALL(update_va_mapping) +NEXT_HYPERCALL(set_timer_op) +NEXT_HYPERCALL(event_channel_op_compat) +NEXT_HYPERCALL(xen_version) +NEXT_HYPERCALL(console_io) +NEXT_HYPERCALL(physdev_op_compat) +NEXT_HYPERCALL(grant_table_op) +NEXT_HYPERCALL(vm_assist) +NEXT_HYPERCALL(update_va_mapping_otherdomain) +NEXT_HYPERCALL(iret) +NEXT_HYPERCALL(vcpu_op) +NEXT_HYPERCALL(set_segment_base) +NEXT_HYPERCALL(mmuext_op) +NEXT_HYPERCALL(xsm_op) +NEXT_HYPERCALL(nmi_op) +NEXT_HYPERCALL(sched_op) +NEXT_HYPERCALL(callback_op) +NEXT_HYPERCALL(xenoprof_op) +NEXT_HYPERCALL(event_channel_op) +NEXT_HYPERCALL(physdev_op) +NEXT_HYPERCALL(hvm_op) +NEXT_HYPERCALL(sysctl) +NEXT_HYPERCALL(domctl) +NEXT_HYPERCALL(kexec_op) +NEXT_HYPERCALL(tmem_op) /* 38 */ +ENTRY(xen_hypercall_rsvr) + .skip 320 +NEXT_HYPERCALL(mca) /* 48 */ +NEXT_HYPERCALL(arch_1) +NEXT_HYPERCALL(arch_2) +NEXT_HYPERCALL(arch_3) +NEXT_HYPERCALL(arch_4) +NEXT_HYPERCALL(arch_5) +NEXT_HYPERCALL(arch_6) + .balign PAGE_SIZE .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 202d4c1..a95b417 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); -pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); +void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_reserve_top(void); extern unsigned long xen_max_p2m_pfn; @@ -35,7 +35,6 @@ void xen_set_pat(u64); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); -void __init xen_init_IRQ(void); void xen_enable_sysenter(void); void xen_enable_syscall(void); void xen_vcpu_restore(void); @@ -45,6 +44,7 @@ void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); +unsigned long __init xen_revector_p2m_tree(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); |