diff options
Diffstat (limited to 'arch/x86')
74 files changed, 2124 insertions, 447 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe120da..d98b665 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -336,6 +336,7 @@ config X86_EXTENDED_PLATFORM If you enable this option then you'll be able to select support for the following (non-PC) 32 bit x86 platforms: + Goldfish (Android emulator) AMD Elan NUMAQ (IBM/Sequent) RDC R-321x SoC @@ -410,6 +411,7 @@ config X86_UV config X86_GOLDFISH bool "Goldfish (Virtual Platform)" depends on X86_32 + depends on X86_EXTENDED_PLATFORM ---help--- Enable support for the Goldfish virtual platform used primarily for Android development. Unless you are building for the Android @@ -1058,8 +1060,16 @@ config MICROCODE_INTEL_LIB depends on MICROCODE_INTEL config MICROCODE_INTEL_EARLY + def_bool n + +config MICROCODE_AMD_EARLY + def_bool n + +config MICROCODE_EARLY bool "Early load microcode" - depends on MICROCODE_INTEL && BLK_DEV_INITRD + depends on MICROCODE=y && BLK_DEV_INITRD + select MICROCODE_INTEL_EARLY if MICROCODE_INTEL + select MICROCODE_AMD_EARLY if MICROCODE_AMD default y help This option provides functionality to read additional microcode data @@ -1067,10 +1077,6 @@ config MICROCODE_INTEL_EARLY microcode to CPU's as early as possible. No functional change if no microcode data is glued to the initrd, therefore it's safe to say Y. -config MICROCODE_EARLY - def_bool y - depends on MICROCODE_INTEL_EARLY - config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" ---help--- @@ -1725,7 +1731,7 @@ config PHYSICAL_ALIGN config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" - depends on SMP && HOTPLUG + depends on SMP ---help--- Say Y here to allow turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index c198b7e..c963881 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -122,7 +122,6 @@ config DEBUG_NX_TEST config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EXPERT - depends on X86_32 ---help--- This option allows trapping of rare doublefault exceptions that would otherwise cause a system to silently reboot. Disabling this @@ -304,4 +303,14 @@ config DEBUG_NMI_SELFTEST If unsure, say N. +config X86_DEBUG_STATIC_CPU_HAS + bool "Debug alternatives" + depends on DEBUG_KERNEL + ---help--- + This option causes additional code to be generated which + fails if static_cpu_has() is used before alternatives have + run. + + If unsure, say N. + endmenu diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5c47726..07639c6 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -220,6 +220,12 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=arch/x86/tools +PHONY += kvmconfig +kvmconfig: + $(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target)) + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config arch/x86/configs/kvm_guest.config + $(Q)yes "" | $(MAKE) oldconfig + define archhelp echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' echo ' install - Install kernel using' @@ -233,4 +239,5 @@ define archhelp echo ' bzdisk/fdimage*/isoimage also accept:' echo ' FDARGS="..." arguments for the booted kernel' echo ' FDINITRD=file initrd for the booted kernel' + echo ' kvmconfig - Enable additional options for guest kernel support' endef diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c205035..d606463 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -992,18 +992,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params, efi_memory_desc_t *mem_map; efi_status_t status; __u32 desc_version; + bool called_exit = false; u8 nr_entries; int i; size = sizeof(*mem_map) * 32; again: - size += sizeof(*mem_map); + size += sizeof(*mem_map) * 2; _size = size; status = low_alloc(size, 1, (unsigned long *)&mem_map); if (status != EFI_SUCCESS) return status; +get_map: status = efi_call_phys5(sys_table->boottime->get_memory_map, &size, mem_map, &key, &desc_size, &desc_version); if (status == EFI_BUFFER_TOO_SMALL) { @@ -1029,8 +1031,20 @@ again: /* Might as well exit boot services now */ status = efi_call_phys2(sys_table->boottime->exit_boot_services, handle, key); - if (status != EFI_SUCCESS) - goto free_mem_map; + if (status != EFI_SUCCESS) { + /* + * ExitBootServices() will fail if any of the event + * handlers change the memory map. In which case, we + * must be prepared to retry, but only once so that + * we're guaranteed to exit on repeated failures instead + * of spinning forever. + */ + if (called_exit) + goto free_mem_map; + + called_exit = true; + goto get_map; + } /* Historic? */ boot_params->alt_mem_k = 32 * 1024; diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 16f24e6..06e71c2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -27,8 +27,6 @@ #include <linux/init.h> #include <linux/linkage.h> #include <asm/segment.h> -#include <asm/pgtable_types.h> -#include <asm/page_types.h> #include <asm/boot.h> #include <asm/msr.h> #include <asm/processor-flags.h> diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 94c5446..c941d6a 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -243,6 +243,7 @@ static void parse_zoffset(char *fname) c = fread(buf, 1, sizeof(buf) - 1, file); if (ferror(file)) die("read-error on `zoffset.h'"); + fclose(file); buf[c] = 0; p = (char *)buf; diff --git a/arch/x86/configs/kvm_guest.config b/arch/x86/configs/kvm_guest.config new file mode 100644 index 0000000..f9affcc --- /dev/null +++ b/arch/x86/configs/kvm_guest.config @@ -0,0 +1,28 @@ +CONFIG_NET=y +CONFIG_NET_CORE=y +CONFIG_NETDEVICES=y +CONFIG_BLOCK=y +CONFIG_BLK_DEV=y +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_INET=y +CONFIG_TTY=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_BINFMT_ELF=y +CONFIG_PCI=y +CONFIG_PCI_MSI=y +CONFIG_DEBUG_KERNEL=y +CONFIG_VIRTUALIZATION=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_VIRTIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_9P_FS=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index cf1a471..bccfca6 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -34,8 +34,6 @@ #include <asm/sys_ia32.h> #include <asm/smap.h> -#define FIX_EFLAGS __FIX_EFLAGS - int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { int err = 0; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e99ac27..47538a6 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -92,7 +92,7 @@ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */ #define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ - /* 21 available, was AMD_C1E */ +#define X86_FEATURE_ALWAYS (3*32+21) /* "" Always-present feature */ #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ @@ -356,15 +356,36 @@ extern const char * const x86_power_flags[32]; #endif /* CONFIG_X86_64 */ #if __GNUC__ >= 4 +extern void warn_pre_alternatives(void); +extern bool __static_cpu_has_safe(u16 bit); + /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These are only valid after alternatives have run, but will statically * patch the target code for additional performance. - * */ static __always_inline __pure bool __static_cpu_has(u16 bit) { #if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 + +#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS + /* + * Catch too early usage of this before alternatives + * have run. + */ + asm goto("1: jmp %l[t_warn]\n" + "2:\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" + " .long 0\n" /* no replacement */ + " .word %P0\n" /* 1: do replace */ + " .byte 2b - 1b\n" /* source len */ + " .byte 0\n" /* replacement len */ + ".previous\n" + /* skipping size check since replacement size = 0 */ + : : "i" (X86_FEATURE_ALWAYS) : : t_warn); +#endif + asm goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" @@ -379,7 +400,13 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) return true; t_no: return false; -#else + +#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS + t_warn: + warn_pre_alternatives(); + return false; +#endif +#else /* GCC_VERSION >= 40500 */ u8 flag; /* Open-coded due to __stringify() in ALTERNATIVE() */ asm volatile("1: movb $0,%0\n" @@ -411,11 +438,94 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) __static_cpu_has(bit) : \ boot_cpu_has(bit) \ ) + +static __always_inline __pure bool _static_cpu_has_safe(u16 bit) +{ +#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 +/* + * We need to spell the jumps to the compiler because, depending on the offset, + * the replacement jump can be bigger than the original jump, and this we cannot + * have. Thus, we force the jump to the widest, 4-byte, signed relative + * offset even though the last would often fit in less bytes. + */ + asm goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n" + "2:\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 3f - .\n" /* repl offset */ + " .word %P1\n" /* always replace */ + " .byte 2b - 1b\n" /* src len */ + " .byte 4f - 3f\n" /* repl len */ + ".previous\n" + ".section .altinstr_replacement,\"ax\"\n" + "3: .byte 0xe9\n .long %l[t_no] - 2b\n" + "4:\n" + ".previous\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 0\n" /* no replacement */ + " .word %P0\n" /* feature bit */ + " .byte 2b - 1b\n" /* src len */ + " .byte 0\n" /* repl len */ + ".previous\n" + : : "i" (bit), "i" (X86_FEATURE_ALWAYS) + : : t_dynamic, t_no); + return true; + t_no: + return false; + t_dynamic: + return __static_cpu_has_safe(bit); +#else /* GCC_VERSION >= 40500 */ + u8 flag; + /* Open-coded due to __stringify() in ALTERNATIVE() */ + asm volatile("1: movb $2,%0\n" + "2:\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 3f - .\n" /* repl offset */ + " .word %P2\n" /* always replace */ + " .byte 2b - 1b\n" /* source len */ + " .byte 4f - 3f\n" /* replacement len */ + ".previous\n" + ".section .discard,\"aw\",@progbits\n" + " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */ + ".previous\n" + ".section .altinstr_replacement,\"ax\"\n" + "3: movb $0,%0\n" + "4:\n" + ".previous\n" + ".section .altinstructions,\"a\"\n" + " .long 1b - .\n" /* src offset */ + " .long 5f - .\n" /* repl offset */ + " .word %P1\n" /* feature bit */ + " .byte 4b - 3b\n" /* src len */ + " .byte 6f - 5f\n" /* repl len */ + ".previous\n" + ".section .discard,\"aw\",@progbits\n" + " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */ + ".previous\n" + ".section .altinstr_replacement,\"ax\"\n" + "5: movb $1,%0\n" + "6:\n" + ".previous\n" + : "=qm" (flag) + : "i" (bit), "i" (X86_FEATURE_ALWAYS)); + return (flag == 2 ? __static_cpu_has_safe(bit) : flag); +#endif +} + +#define static_cpu_has_safe(bit) \ +( \ + __builtin_constant_p(boot_cpu_has(bit)) ? \ + boot_cpu_has(bit) : \ + _static_cpu_has_safe(bit) \ +) #else /* * gcc 3.x is too stupid to do the static test; fall back to dynamic. */ -#define static_cpu_has(bit) boot_cpu_has(bit) +#define static_cpu_has(bit) boot_cpu_has(bit) +#define static_cpu_has_safe(bit) boot_cpu_has(bit) #endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 60c89f3..0062a01 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -52,40 +52,40 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6); #define efi_call_phys0(f) \ - efi_call0((void *)(f)) + efi_call0((f)) #define efi_call_phys1(f, a1) \ - efi_call1((void *)(f), (u64)(a1)) + efi_call1((f), (u64)(a1)) #define efi_call_phys2(f, a1, a2) \ - efi_call2((void *)(f), (u64)(a1), (u64)(a2)) + efi_call2((f), (u64)(a1), (u64)(a2)) #define efi_call_phys3(f, a1, a2, a3) \ - efi_call3((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3)) + efi_call3((f), (u64)(a1), (u64)(a2), (u64)(a3)) #define efi_call_phys4(f, a1, a2, a3, a4) \ - efi_call4((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + efi_call4((f), (u64)(a1), (u64)(a2), (u64)(a3), \ (u64)(a4)) #define efi_call_phys5(f, a1, a2, a3, a4, a5) \ - efi_call5((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + efi_call5((f), (u64)(a1), (u64)(a2), (u64)(a3), \ (u64)(a4), (u64)(a5)) #define efi_call_phys6(f, a1, a2, a3, a4, a5, a6) \ - efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3), \ + efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ (u64)(a4), (u64)(a5), (u64)(a6)) #define efi_call_virt0(f) \ - efi_call0((void *)(efi.systab->runtime->f)) + efi_call0((efi.systab->runtime->f)) #define efi_call_virt1(f, a1) \ - efi_call1((void *)(efi.systab->runtime->f), (u64)(a1)) + efi_call1((efi.systab->runtime->f), (u64)(a1)) #define efi_call_virt2(f, a1, a2) \ - efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2)) + efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) #define efi_call_virt3(f, a1, a2, a3) \ - efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3)) #define efi_call_virt4(f, a1, a2, a3, a4) \ - efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4)) #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ - efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4), (u64)(a5)) #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ - efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ + efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 0dc7d9e..e846225 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -81,11 +81,11 @@ enum fixed_addresses { + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, VVAR_PAGE, VSYSCALL_HPET, -#endif #ifdef CONFIG_PARAVIRT_CLOCK PVCLOCK_FIXMAP_BEGIN, PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, #endif +#endif FIX_DBGP_BASE, FIX_EARLYCON_MEM_BASE, #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e25cc33..4d0bda7 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -62,10 +62,8 @@ extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, #define xstateregs_active fpregs_active #ifdef CONFIG_MATH_EMULATION -# define HAVE_HWFP (boot_cpu_data.hard_math) extern void finit_soft_fpu(struct i387_soft_struct *soft); #else -# define HAVE_HWFP 1 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif @@ -345,7 +343,7 @@ static inline void __thread_fpu_end(struct task_struct *tsk) static inline void __thread_fpu_begin(struct task_struct *tsk) { - if (!use_eager_fpu()) + if (!static_cpu_has_safe(X86_FEATURE_EAGER_FPU)) clts(); __thread_set_has_fpu(tsk); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3741c65..af9c552 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -59,7 +59,7 @@ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ - | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_RDWRGSFS \ + | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h new file mode 100644 index 0000000..c6b043f --- /dev/null +++ b/arch/x86/include/asm/microcode_amd.h @@ -0,0 +1,78 @@ +#ifndef _ASM_X86_MICROCODE_AMD_H +#define _ASM_X86_MICROCODE_AMD_H + +#include <asm/microcode.h> + +#define UCODE_MAGIC 0x00414d44 +#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 +#define UCODE_UCODE_TYPE 0x00000001 + +#define SECTION_HDR_SIZE 8 +#define CONTAINER_HDR_SZ 12 + +struct equiv_cpu_entry { + u32 installed_cpu; + u32 fixed_errata_mask; + u32 fixed_errata_compare; + u16 equiv_cpu; + u16 res; +} __attribute__((packed)); + +struct microcode_header_amd { + u32 data_code; + u32 patch_id; + u16 mc_patch_data_id; + u8 mc_patch_data_len; + u8 init_flag; + u32 mc_patch_data_checksum; + u32 nb_dev_id; + u32 sb_dev_id; + u16 processor_rev_id; + u8 nb_rev_id; + u8 sb_rev_id; + u8 bios_api_rev; + u8 reserved1[3]; + u32 match_reg[8]; +} __attribute__((packed)); + +struct microcode_amd { + struct microcode_header_amd hdr; + unsigned int mpb[0]; +}; + +static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, + unsigned int sig) +{ + int i = 0; + + if (!equiv_cpu_table) + return 0; + + while (equiv_cpu_table[i].installed_cpu != 0) { + if (sig == equiv_cpu_table[i].installed_cpu) + return equiv_cpu_table[i].equiv_cpu; + + i++; + } + return 0; +} + +extern int __apply_microcode_amd(struct microcode_amd *mc_amd); +extern int apply_microcode_amd(int cpu); +extern enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size); + +#ifdef CONFIG_MICROCODE_AMD_EARLY +#ifdef CONFIG_X86_32 +#define MPB_MAX_SIZE PAGE_SIZE +extern u8 amd_bsp_mpb[MPB_MAX_SIZE]; +#endif +extern void __init load_ucode_amd_bsp(void); +extern void __cpuinit load_ucode_amd_ap(void); +extern int __init save_microcode_in_initrd_amd(void); +#else +static inline void __init load_ucode_amd_bsp(void) {} +static inline void __cpuinit load_ucode_amd_ap(void) {} +static inline int __init save_microcode_in_initrd_amd(void) { return -EINVAL; } +#endif + +#endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h index 5356f92..87a0853 100644 --- a/arch/x86/include/asm/microcode_intel.h +++ b/arch/x86/include/asm/microcode_intel.h @@ -67,10 +67,12 @@ update_match_revision(struct microcode_header_intel *mc_header, int rev); extern void __init load_ucode_intel_bsp(void); extern void __cpuinit load_ucode_intel_ap(void); extern void show_ucode_info_early(void); +extern int __init save_microcode_in_initrd_intel(void); #else static inline __init void load_ucode_intel_bsp(void) {} static inline __cpuinit void load_ucode_intel_ap(void) {} static inline void show_ucode_info_early(void) {} +static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; } #endif #if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h index 03f90c8..0208c3c 100644 --- a/arch/x86/include/asm/mutex_32.h +++ b/arch/x86/include/asm/mutex_32.h @@ -42,17 +42,14 @@ do { \ * __mutex_fastpath_lock_retval - try to take the lock by moving the count * from 1 to a 0 value * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 * - * Change the count from 1 to a value lower than 1, and call <fail_fn> if it - * wasn't 1 originally. This function returns 0 if the fastpath succeeds, - * or anything the slow path function returns + * Change the count from 1 to a value lower than 1. This function returns 0 + * if the fastpath succeeds, or -1 otherwise. */ -static inline int __mutex_fastpath_lock_retval(atomic_t *count, - int (*fail_fn)(atomic_t *)) +static inline int __mutex_fastpath_lock_retval(atomic_t *count) { if (unlikely(atomic_dec_return(count) < 0)) - return fail_fn(count); + return -1; else return 0; } diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 68a87b0..2c543ff 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -37,17 +37,14 @@ do { \ * __mutex_fastpath_lock_retval - try to take the lock by moving the count * from 1 to a 0 value * @count: pointer of type atomic_t - * @fail_fn: function to call if the original value was not 1 * - * Change the count from 1 to a value lower than 1, and call <fail_fn> if - * it wasn't 1 originally. This function returns 0 if the fastpath succeeds, - * or anything the slow path function returns + * Change the count from 1 to a value lower than 1. This function returns 0 + * if the fastpath succeeds, or -1 otherwise. */ -static inline int __mutex_fastpath_lock_retval(atomic_t *count, - int (*fail_fn)(atomic_t *)) +static inline int __mutex_fastpath_lock_retval(atomic_t *count) { if (unlikely(atomic_dec_return(count) < 0)) - return fail_fn(count); + return -1; else return 0; } diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 57cb634..8249df4 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,6 +29,9 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define HSW_IN_TX (1ULL << 32) +#define HSW_IN_TX_CHECKPOINTED (1ULL << 33) + #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1e67223..5b0818b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -506,9 +506,6 @@ static inline unsigned long pages_to_mb(unsigned long npg) return npg >> (20 - PAGE_SHIFT); } -#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ - remap_pfn_range(vma, vaddr, pfn, size, prot) - #if PAGETABLE_LEVELS > 2 static inline int pud_none(pud_t pud) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 22224b3..29937c4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -89,9 +89,9 @@ struct cpuinfo_x86 { char wp_works_ok; /* It doesn't on 386's */ /* Problems on some 486Dx4's and old 386's: */ - char hard_math; char rfu; char pad0; + char pad1; #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ int x86_tlbsize; @@ -164,6 +164,7 @@ extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); +extern void __cpuinit fpu_detect(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_boot_cpu(void); @@ -981,5 +982,5 @@ bool xen_set_default_idle(void); #endif void stop_this_cpu(void *dummy); - +void df_debug(struct pt_regs *regs, long error_code); #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index beff97f..7a95816 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -7,10 +7,10 @@ #include <asm/processor-flags.h> -#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ - X86_EFLAGS_CF) + X86_EFLAGS_CF | X86_EFLAGS_RF) void signal_fault(struct pt_regs *regs, void __user *frame, char *where); diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 41fc93a..2f4d924 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -16,7 +16,7 @@ static inline void native_clts(void) * all loads stores around it, which can hurt performance. Solution is to * use a variable and mimic reads and writes to it to enforce serialization */ -static unsigned long __force_order; +extern unsigned long __force_order; static inline unsigned long native_read_cr0(void) { diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 50a7fc0..cf51200 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void) static inline void __flush_tlb_one(unsigned long addr) { - __flush_tlb_single(addr); + __flush_tlb_single(addr); } #define TLB_FLUSH_ALL -1UL diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 142810c..4f7923d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -235,7 +235,7 @@ extern long __copy_user_nocache(void *dst, const void __user *src, static inline int __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) { - might_sleep(); + might_fault(); return __copy_user_nocache(dst, src, size, 1); } diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 2af848d..bb04650 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -170,6 +170,9 @@ #define MSR_KNC_EVNTSEL0 0x00000028 #define MSR_KNC_EVNTSEL1 0x00000029 +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + /* AMD64 MSRs. Not complete. See the architecture manual for a more complete list. */ diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index 54991a7..180a0c3 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -2,75 +2,129 @@ #define _UAPI_ASM_X86_PROCESSOR_FLAGS_H /* Various flags defined: can be included from assembler. */ +#include <linux/const.h> + /* * EFLAGS bits */ -#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ -#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */ -#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ -#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ -#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ -#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ -#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ -#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */ -#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */ -#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */ -#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */ -#define X86_EFLAGS_NT 0x00004000 /* Nested Task */ -#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */ -#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */ -#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */ -#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */ -#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ -#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ +#define X86_EFLAGS_CF_BIT 0 /* Carry Flag */ +#define X86_EFLAGS_CF _BITUL(X86_EFLAGS_CF_BIT) +#define X86_EFLAGS_FIXED_BIT 1 /* Bit 1 - always on */ +#define X86_EFLAGS_FIXED _BITUL(X86_EFLAGS_FIXED_BIT) +#define X86_EFLAGS_PF_BIT 2 /* Parity Flag */ +#define X86_EFLAGS_PF _BITUL(X86_EFLAGS_PF_BIT) +#define X86_EFLAGS_AF_BIT 4 /* Auxiliary carry Flag */ +#define X86_EFLAGS_AF _BITUL(X86_EFLAGS_AF_BIT) +#define X86_EFLAGS_ZF_BIT 6 /* Zero Flag */ +#define X86_EFLAGS_ZF _BITUL(X86_EFLAGS_ZF_BIT) +#define X86_EFLAGS_SF_BIT 7 /* Sign Flag */ +#define X86_EFLAGS_SF _BITUL(X86_EFLAGS_SF_BIT) +#define X86_EFLAGS_TF_BIT 8 /* Trap Flag */ +#define X86_EFLAGS_TF _BITUL(X86_EFLAGS_TF_BIT) +#define X86_EFLAGS_IF_BIT 9 /* Interrupt Flag */ +#define X86_EFLAGS_IF _BITUL(X86_EFLAGS_IF_BIT) +#define X86_EFLAGS_DF_BIT 10 /* Direction Flag */ +#define X86_EFLAGS_DF _BITUL(X86_EFLAGS_DF_BIT) +#define X86_EFLAGS_OF_BIT 11 /* Overflow Flag */ +#define X86_EFLAGS_OF _BITUL(X86_EFLAGS_OF_BIT) +#define X86_EFLAGS_IOPL_BIT 12 /* I/O Privilege Level (2 bits) */ +#define X86_EFLAGS_IOPL (_AC(3,UL) << X86_EFLAGS_IOPL_BIT) +#define X86_EFLAGS_NT_BIT 14 /* Nested Task */ +#define X86_EFLAGS_NT _BITUL(X86_EFLAGS_NT_BIT) +#define X86_EFLAGS_RF_BIT 16 /* Resume Flag */ +#define X86_EFLAGS_RF _BITUL(X86_EFLAGS_RF_BIT) +#define X86_EFLAGS_VM_BIT 17 /* Virtual Mode */ +#define X86_EFLAGS_VM _BITUL(X86_EFLAGS_VM_BIT) +#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */ +#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT) +#define X86_EFLAGS_AC_BIT 18 /* Alignment Check/Access Control */ +#define X86_EFLAGS_AC _BITUL(X86_EFLAGS_AC_BIT) +#define X86_EFLAGS_VIF_BIT 19 /* Virtual Interrupt Flag */ +#define X86_EFLAGS_VIF _BITUL(X86_EFLAGS_VIF_BIT) +#define X86_EFLAGS_VIP_BIT 20 /* Virtual Interrupt Pending */ +#define X86_EFLAGS_VIP _BITUL(X86_EFLAGS_VIP_BIT) +#define X86_EFLAGS_ID_BIT 21 /* CPUID detection */ +#define X86_EFLAGS_ID _BITUL(X86_EFLAGS_ID_BIT) /* * Basic CPU control in CR0 */ -#define X86_CR0_PE 0x00000001 /* Protection Enable */ -#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */ -#define X86_CR0_EM 0x00000004 /* Emulation */ -#define X86_CR0_TS 0x00000008 /* Task Switched */ -#define X86_CR0_ET 0x00000010 /* Extension Type */ -#define X86_CR0_NE 0x00000020 /* Numeric Error */ -#define X86_CR0_WP 0x00010000 /* Write Protect */ -#define X86_CR0_AM 0x00040000 /* Alignment Mask */ -#define X86_CR0_NW 0x20000000 /* Not Write-through */ -#define X86_CR0_CD 0x40000000 /* Cache Disable */ -#define X86_CR0_PG 0x80000000 /* Paging */ +#define X86_CR0_PE_BIT 0 /* Protection Enable */ +#define X86_CR0_PE _BITUL(X86_CR0_PE_BIT) +#define X86_CR0_MP_BIT 1 /* Monitor Coprocessor */ +#define X86_CR0_MP _BITUL(X86_CR0_MP_BIT) +#define X86_CR0_EM_BIT 2 /* Emulation */ +#define X86_CR0_EM _BITUL(X86_CR0_EM_BIT) +#define X86_CR0_TS_BIT 3 /* Task Switched */ +#define X86_CR0_TS _BITUL(X86_CR0_TS_BIT) +#define X86_CR0_ET_BIT 4 /* Extension Type */ +#define X86_CR0_ET _BITUL(X86_CR0_ET_BIT) +#define X86_CR0_NE_BIT 5 /* Numeric Error */ +#define X86_CR0_NE _BITUL(X86_CR0_NE_BIT) +#define X86_CR0_WP_BIT 16 /* Write Protect */ +#define X86_CR0_WP _BITUL(X86_CR0_WP_BIT) +#define X86_CR0_AM_BIT 18 /* Alignment Mask */ +#define X86_CR0_AM _BITUL(X86_CR0_AM_BIT) +#define X86_CR0_NW_BIT 29 /* Not Write-through */ +#define X86_CR0_NW _BITUL(X86_CR0_NW_BIT) +#define X86_CR0_CD_BIT 30 /* Cache Disable */ +#define X86_CR0_CD _BITUL(X86_CR0_CD_BIT) +#define X86_CR0_PG_BIT 31 /* Paging */ +#define X86_CR0_PG _BITUL(X86_CR0_PG_BIT) /* * Paging options in CR3 */ -#define X86_CR3_PWT 0x00000008 /* Page Write Through */ -#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */ -#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */ +#define X86_CR3_PWT_BIT 3 /* Page Write Through */ +#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT) +#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */ +#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT) +#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */ /* * Intel CPU features in CR4 */ -#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */ -#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */ -#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */ -#define X86_CR4_DE 0x00000008 /* enable debugging extensions */ -#define X86_CR4_PSE 0x00000010 /* enable page size extensions */ -#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */ -#define X86_CR4_MCE 0x00000040 /* Machine check enable */ -#define X86_CR4_PGE 0x00000080 /* enable global pages */ -#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */ -#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ -#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ -#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ -#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */ -#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */ -#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */ -#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */ -#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */ +#define X86_CR4_VME_BIT 0 /* enable vm86 extensions */ +#define X86_CR4_VME _BITUL(X86_CR4_VME_BIT) +#define X86_CR4_PVI_BIT 1 /* virtual interrupts flag enable */ +#define X86_CR4_PVI _BITUL(X86_CR4_PVI_BIT) +#define X86_CR4_TSD_BIT 2 /* disable time stamp at ipl 3 */ +#define X86_CR4_TSD _BITUL(X86_CR4_TSD_BIT) +#define X86_CR4_DE_BIT 3 /* enable debugging extensions */ +#define X86_CR4_DE _BITUL(X86_CR4_DE_BIT) +#define X86_CR4_PSE_BIT 4 /* enable page size extensions */ +#define X86_CR4_PSE _BITUL(X86_CR4_PSE_BIT) +#define X86_CR4_PAE_BIT 5 /* enable physical address extensions */ +#define X86_CR4_PAE _BITUL(X86_CR4_PAE_BIT) +#define X86_CR4_MCE_BIT 6 /* Machine check enable */ +#define X86_CR4_MCE _BITUL(X86_CR4_MCE_BIT) +#define X86_CR4_PGE_BIT 7 /* enable global pages */ +#define X86_CR4_PGE _BITUL(X86_CR4_PGE_BIT) +#define X86_CR4_PCE_BIT 8 /* enable performance counters at ipl 3 */ +#define X86_CR4_PCE _BITUL(X86_CR4_PCE_BIT) +#define X86_CR4_OSFXSR_BIT 9 /* enable fast FPU save and restore */ +#define X86_CR4_OSFXSR _BITUL(X86_CR4_OSFXSR_BIT) +#define X86_CR4_OSXMMEXCPT_BIT 10 /* enable unmasked SSE exceptions */ +#define X86_CR4_OSXMMEXCPT _BITUL(X86_CR4_OSXMMEXCPT_BIT) +#define X86_CR4_VMXE_BIT 13 /* enable VMX virtualization */ +#define X86_CR4_VMXE _BITUL(X86_CR4_VMXE_BIT) +#define X86_CR4_SMXE_BIT 14 /* enable safer mode (TXT) */ +#define X86_CR4_SMXE _BITUL(X86_CR4_SMXE_BIT) +#define X86_CR4_FSGSBASE_BIT 16 /* enable RDWRFSGS support */ +#define X86_CR4_FSGSBASE _BITUL(X86_CR4_FSGSBASE_BIT) +#define X86_CR4_PCIDE_BIT 17 /* enable PCID support */ +#define X86_CR4_PCIDE _BITUL(X86_CR4_PCIDE_BIT) +#define X86_CR4_OSXSAVE_BIT 18 /* enable xsave and xrestore */ +#define X86_CR4_OSXSAVE _BITUL(X86_CR4_OSXSAVE_BIT) +#define X86_CR4_SMEP_BIT 20 /* enable SMEP support */ +#define X86_CR4_SMEP _BITUL(X86_CR4_SMEP_BIT) +#define X86_CR4_SMAP_BIT 21 /* enable SMAP support */ +#define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT) /* * x86-64 Task Priority Register, CR8 */ -#define X86_CR8_TPR 0x0000000F /* task priority register */ +#define X86_CR8_TPR _AC(0x0000000f,UL) /* task priority register */ /* * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7bd3bd3..612ed34 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -67,7 +67,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o +obj-$(CONFIG_DOUBLEFAULT) += doublefault.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o @@ -93,6 +93,7 @@ obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o +obj-$(CONFIG_MICROCODE_AMD_EARLY) += microcode_amd_early.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 904611b..1600b1c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2302,7 +2302,7 @@ static void lapic_resume(void) apic_write(APIC_SPIV, apic_pm_state.apic_spiv); apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) +#if defined(CONFIG_X86_MCE_INTEL) if (maxlvt >= 5) apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); #endif diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 0ef4bba..d67c4be 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -28,7 +28,6 @@ void foo(void) OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor); OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model); OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask); - OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math); OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level); OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability); OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b0684e4..47b56a7 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o +endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o endif + obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4112be9..0344534 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -17,15 +17,6 @@ #include <asm/paravirt.h> #include <asm/alternative.h> -static int __init no_387(char *s) -{ - boot_cpu_data.hard_math = 0; - write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0()); - return 1; -} - -__setup("no387", no_387); - static double __initdata x = 4195835.0; static double __initdata y = 3145727.0; @@ -44,15 +35,6 @@ static void __init check_fpu(void) { s32 fdiv_bug; - if (!boot_cpu_data.hard_math) { -#ifndef CONFIG_MATH_EMULATION - pr_emerg("No coprocessor found and no math emulation present\n"); - pr_emerg("Giving up\n"); - for (;;) ; -#endif - return; - } - kernel_fpu_begin(); /* @@ -107,5 +89,6 @@ void __init check_bugs(void) * kernel_fpu_begin/end() in check_fpu() relies on the patched * alternative instructions. */ - check_fpu(); + if (cpu_has_fpu) + check_fpu(); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 22018f7..a4a07c0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -711,10 +711,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) return; cpu_detect(c); - get_cpu_vendor(c); - get_cpu_cap(c); + fpu_detect(c); if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -724,6 +723,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); + + setup_force_cpu_cap(X86_FEATURE_ALWAYS); } void __init early_cpu_init(void) @@ -1363,3 +1364,17 @@ void __cpuinit cpu_init(void) fpu_init(); } #endif + +#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS +void warn_pre_alternatives(void) +{ + WARN(1, "You're using static_cpu_has before alternatives have run!\n"); +} +EXPORT_SYMBOL_GPL(warn_pre_alternatives); +#endif + +inline bool __static_cpu_has_safe(u16 bit) +{ + return boot_cpu_has(bit); +} +EXPORT_SYMBOL_GPL(__static_cpu_has_safe); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index d048d5c..7582f47 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -333,7 +333,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) switch (dir0_lsn) { case 0xd: /* either a 486SLC or DLC w/o DEVID */ dir0_msn = 0; - p = Cx486_name[(c->hard_math) ? 1 : 0]; + p = Cx486_name[(cpu_has_fpu ? 1 : 0)]; break; case 0xe: /* a 486S A step */ diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 7c6f7d5..8dc72dd 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -618,36 +618,34 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) * parameters cpuid leaf to find the cache details */ for (i = 0; i < num_cache_leaves; i++) { - struct _cpuid4_info_regs this_leaf; + struct _cpuid4_info_regs this_leaf = {}; int retval; retval = cpuid4_cache_lookup_regs(i, &this_leaf); - if (retval >= 0) { - switch (this_leaf.eax.split.level) { - case 1: - if (this_leaf.eax.split.type == - CACHE_TYPE_DATA) - new_l1d = this_leaf.size/1024; - else if (this_leaf.eax.split.type == - CACHE_TYPE_INST) - new_l1i = this_leaf.size/1024; - break; - case 2: - new_l2 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l2_id = c->apicid & ~((1 << index_msb) - 1); - break; - case 3: - new_l3 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; - index_msb = get_count_order( - num_threads_sharing); - l3_id = c->apicid & ~((1 << index_msb) - 1); - break; - default: - break; - } + if (retval < 0) + continue; + + switch (this_leaf.eax.split.level) { + case 1: + if (this_leaf.eax.split.type == CACHE_TYPE_DATA) + new_l1d = this_leaf.size/1024; + else if (this_leaf.eax.split.type == CACHE_TYPE_INST) + new_l1i = this_leaf.size/1024; + break; + case 2: + new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid & ~((1 << index_msb) - 1); + break; + case 3: + new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid & ~((1 << index_msb) - 1); + break; + default: + break; } } } diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 68a3343..9e451b0 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c @@ -167,7 +167,7 @@ static void post_set(void) setCx86(CX86_CCR3, ccr3); /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); + write_cr0(read_cr0() & ~X86_CR0_CD); /* Restore value of CR4 */ if (cpu_has_pge) diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index fa72a39..d4cdfa6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -510,8 +510,9 @@ generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) static void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) { - unsigned int mask_lo, mask_hi, base_lo, base_hi; - unsigned int tmp, hi; + u32 mask_lo, mask_hi, base_lo, base_hi; + unsigned int hi; + u64 tmp, mask; /* * get_mtrr doesn't need to update mtrr_state, also it could be called @@ -532,18 +533,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask: */ - tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; - mask_lo = size_or_mask | tmp; + tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; + mask = size_or_mask | tmp; /* Expand tmp with high bits to all 1s: */ - hi = fls(tmp); + hi = fls64(tmp); if (hi > 0) { - tmp |= ~((1<<(hi - 1)) - 1); + tmp |= ~((1ULL<<(hi - 1)) - 1); - if (tmp != mask_lo) { + if (tmp != mask) { printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - mask_lo = tmp; + mask = tmp; } } @@ -551,8 +552,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, * This works correctly if size is a power of two, i.e. a * contiguous range: */ - *size = -mask_lo; - *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; + *size = -mask; + *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT; *type = base_lo & 0xff; out_put_cpu: @@ -701,7 +702,7 @@ static void post_set(void) __releases(set_atomicity_lock) mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); /* Enable caches */ - write_cr0(read_cr0() & 0xbfffffff); + write_cr0(read_cr0() & ~X86_CR0_CD); /* Restore value of CR4 */ if (cpu_has_pge) diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 726bf96..ca22b73 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -305,7 +305,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, return -EINVAL; } - if (base & size_or_mask || size & size_or_mask) { + if ((base | (base + size - 1)) >> + (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { pr_warning("mtrr: base or size exceeds the MTRR width\n"); return -EINVAL; } @@ -583,6 +584,7 @@ static struct syscore_ops mtrr_syscore_ops = { int __initdata changed_by_mtrr_cleanup; +#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1)) /** * mtrr_bp_init - initialize mtrrs on the boot CPU * @@ -600,7 +602,7 @@ void __init mtrr_bp_init(void) if (cpu_has_mtrr) { mtrr_if = &generic_mtrr_ops; - size_or_mask = 0xff000000; /* 36 bits */ + size_or_mask = SIZE_OR_MASK_BITS(36); size_and_mask = 0x00f00000; phys_addr = 36; @@ -619,7 +621,7 @@ void __init mtrr_bp_init(void) boot_cpu_data.x86_mask == 0x4)) phys_addr = 36; - size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); + size_or_mask = SIZE_OR_MASK_BITS(phys_addr); size_and_mask = ~size_or_mask & 0xfffff00000ULL; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && boot_cpu_data.x86 == 6) { @@ -627,7 +629,7 @@ void __init mtrr_bp_init(void) * VIA C* family have Intel style MTRRs, * but don't support PAE */ - size_or_mask = 0xfff00000; /* 32 bits */ + size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; phys_addr = 32; } @@ -637,21 +639,21 @@ void __init mtrr_bp_init(void) if (cpu_has_k6_mtrr) { /* Pre-Athlon (K6) AMD CPU MTRRs */ mtrr_if = mtrr_ops[X86_VENDOR_AMD]; - size_or_mask = 0xfff00000; /* 32 bits */ + size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; case X86_VENDOR_CENTAUR: if (cpu_has_centaur_mcr) { mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR]; - size_or_mask = 0xfff00000; /* 32 bits */ + size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; case X86_VENDOR_CYRIX: if (cpu_has_cyrix_arr) { mtrr_if = mtrr_ops[X86_VENDOR_CYRIX]; - size_or_mask = 0xfff00000; /* 32 bits */ + size_or_mask = SIZE_OR_MASK_BITS(32); size_and_mask = 0; } break; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1025f3c..9e581c5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event) * check that PEBS LBR correction does not conflict with * whatever the user is asking with attr->branch_sample_type */ - if (event->attr.precise_ip > 1) { + if (event->attr.precise_ip > 1 && + x86_pmu.intel_cap.pebs_format < 2) { u64 *br_type = &event->attr.branch_sample_type; if (has_branch_stack(event)) { @@ -568,7 +569,7 @@ struct sched_state { struct perf_sched { int max_weight; int max_events; - struct event_constraint **constraints; + struct perf_event **events; struct sched_state state; int saved_states; struct sched_state saved[SCHED_STATES_MAX]; @@ -577,7 +578,7 @@ struct perf_sched { /* * Initialize interator that runs through all events and counters. */ -static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, +static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, int num, int wmin, int wmax) { int idx; @@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint ** memset(sched, 0, sizeof(*sched)); sched->max_events = num; sched->max_weight = wmax; - sched->constraints = c; + sched->events = events; for (idx = 0; idx < num; idx++) { - if (c[idx]->weight == wmin) + if (events[idx]->hw.constraint->weight == wmin) break; } @@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched) if (sched->state.event >= sched->max_events) return false; - c = sched->constraints[sched->state.event]; - + c = sched->events[sched->state.event]->hw.constraint; /* Prefer fixed purpose counters */ if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) { idx = INTEL_PMC_IDX_FIXED; @@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched) if (sched->state.weight > sched->max_weight) return false; } - c = sched->constraints[sched->state.event]; + c = sched->events[sched->state.event]->hw.constraint; } while (c->weight != sched->state.weight); sched->state.counter = 0; /* start with first counter */ @@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched) /* * Assign a counter for each event. */ -int perf_assign_events(struct event_constraint **constraints, int n, +int perf_assign_events(struct perf_event **events, int n, int wmin, int wmax, int *assign) { struct perf_sched sched; - perf_sched_init(&sched, constraints, n, wmin, wmax); + perf_sched_init(&sched, events, n, wmin, wmax); do { if (!perf_sched_find_counter(&sched)) @@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n, int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { - struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; + struct event_constraint *c; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct perf_event *e; int i, wmin, wmax, num = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { + hwc = &cpuc->event_list[i]->hw; c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); - constraints[i] = c; + hwc->constraint = c; + wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); } @@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) */ for (i = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; - c = constraints[i]; + c = hwc->constraint; /* never assigned */ if (hwc->idx == -1) @@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* slow path */ if (i != n) - num = perf_assign_events(constraints, n, wmin, wmax, assign); + num = perf_assign_events(cpuc->event_list, n, wmin, + wmax, assign); /* + * Mark the event as committed, so we do not put_constraint() + * in case new events are added and fail scheduling. + */ + if (!num && assign) { + for (i = 0; i < n; i++) { + e = cpuc->event_list[i]; + e->hw.flags |= PERF_X86_EVENT_COMMITTED; + } + } + /* * scheduling failed or is just a simulation, * free resources if necessary */ if (!assign || num) { for (i = 0; i < n; i++) { + e = cpuc->event_list[i]; + /* + * do not put_constraint() on comitted events, + * because they are good to go + */ + if ((e->hw.flags & PERF_X86_EVENT_COMMITTED)) + continue; + if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); + x86_pmu.put_event_constraints(cpuc, e); } } return num ? -EINVAL : 0; @@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags) int i; /* + * event is descheduled + */ + event->hw.flags &= ~PERF_X86_EVENT_COMMITTED; + + /* * If we're called during a txn, we don't need to do anything. * The events never got scheduled and ->cancel_txn will truncate * the event_list. @@ -1249,10 +1276,20 @@ void perf_events_lapic_init(void) static int __kprobes perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { + int ret; + u64 start_clock; + u64 finish_clock; + if (!atomic_read(&active_events)) return NMI_DONE; - return x86_pmu.handle_irq(regs); + start_clock = local_clock(); + ret = x86_pmu.handle_irq(regs); + finish_clock = local_clock(); + + perf_sample_event_took(finish_clock - start_clock); + + return ret; } struct event_constraint emptyconstraint; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index ba9aadf..97e557b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -63,10 +63,12 @@ struct event_constraint { int flags; }; /* - * struct event_constraint flags + * struct hw_perf_event.flags flags */ #define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ #define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ +#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */ +#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -227,11 +229,14 @@ struct cpu_hw_events { * - inv * - edge * - cnt-mask + * - in_tx + * - in_tx_checkpointed * The other filters are supported by fixed counters. * The any-thread option is supported starting with v3. */ +#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED) #define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) + EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS) /* * Constraint on the Event code + UMask @@ -247,6 +252,11 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) +/* DataLA version of store sampling without extra enable bit. */ +#define INTEL_PST_HSW_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -301,6 +311,11 @@ union perf_capabilities { u64 pebs_arch_reg:1; u64 pebs_format:4; u64 smm_freeze:1; + /* + * PMU supports separate counter range for writing + * values > 32bit. + */ + u64 full_width_write:1; }; u64 capabilities; }; @@ -375,6 +390,7 @@ struct x86_pmu { struct event_constraint *event_constraints; struct x86_pmu_quirk *quirks; int perfctr_second_write; + bool late_ack; /* * sysfs attrs @@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, void x86_pmu_enable_all(int added); -int perf_assign_events(struct event_constraint **constraints, int n, +int perf_assign_events(struct perf_event **events, int n, int wmin, int wmax, int *assign); int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); @@ -633,6 +649,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[]; extern struct event_constraint intel_ivb_pebs_event_constraints[]; +extern struct event_constraint intel_hsw_pebs_event_constraints[]; + struct event_constraint *intel_pebs_constraints(struct perf_event *event); void intel_pmu_pebs_enable(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 7e28d94..4cbe032 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -648,48 +648,48 @@ static __initconst const struct x86_pmu amd_pmu = { .cpu_dead = amd_pmu_cpu_dead, }; -static int setup_event_constraints(void) +static int __init amd_core_pmu_init(void) { - if (boot_cpu_data.x86 == 0x15) + if (!cpu_has_perfctr_core) + return 0; + + switch (boot_cpu_data.x86) { + case 0x15: + pr_cont("Fam15h "); x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; - return 0; -} + break; -static int setup_perfctr_core(void) -{ - if (!cpu_has_perfctr_core) { - WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h, - KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!"); + default: + pr_err("core perfctr but no constraints; unknown hardware!\n"); return -ENODEV; } - WARN(x86_pmu.get_event_constraints == amd_get_event_constraints, - KERN_ERR "hw perf events core counters need constraints handler!"); - /* * If core performance counter extensions exists, we must use * MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also - * x86_pmu_addr_offset(). + * amd_pmu_addr_offset(). */ x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.perfctr = MSR_F15H_PERF_CTR; x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; - printk(KERN_INFO "perf: AMD core performance counters detected\n"); - + pr_cont("core perfctr, "); return 0; } __init int amd_pmu_init(void) { + int ret; + /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) return -ENODEV; x86_pmu = amd_pmu; - setup_event_constraints(); - setup_perfctr_core(); + ret = amd_core_pmu_init(); + if (ret) + return ret; /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c new file mode 100644 index 0000000..0db655e --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c @@ -0,0 +1,504 @@ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Steven Kinney <Steven.Kinney@amd.com> + * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> + * + * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/perf_event.h> +#include <linux/module.h> +#include <linux/cpumask.h> +#include <linux/slab.h> + +#include "perf_event.h" +#include "perf_event_amd_iommu.h" + +#define COUNTER_SHIFT 16 + +#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8)) +#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg)) + +/* iommu pmu config masks */ +#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL)) +#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL) +#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL) +#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL) +#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL) +#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL) +#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL) + +static struct perf_amd_iommu __perf_iommu; + +struct perf_amd_iommu { + struct pmu pmu; + u8 max_banks; + u8 max_counters; + u64 cntr_assign_mask; + raw_spinlock_t lock; + const struct attribute_group *attr_groups[4]; +}; + +#define format_group attr_groups[0] +#define cpumask_group attr_groups[1] +#define events_group attr_groups[2] +#define null_group attr_groups[3] + +/*--------------------------------------------- + * sysfs format attributes + *---------------------------------------------*/ +PMU_FORMAT_ATTR(csource, "config:0-7"); +PMU_FORMAT_ATTR(devid, "config:8-23"); +PMU_FORMAT_ATTR(pasid, "config:24-39"); +PMU_FORMAT_ATTR(domid, "config:40-55"); +PMU_FORMAT_ATTR(devid_mask, "config1:0-15"); +PMU_FORMAT_ATTR(pasid_mask, "config1:16-31"); +PMU_FORMAT_ATTR(domid_mask, "config1:32-47"); + +static struct attribute *iommu_format_attrs[] = { + &format_attr_csource.attr, + &format_attr_devid.attr, + &format_attr_pasid.attr, + &format_attr_domid.attr, + &format_attr_devid_mask.attr, + &format_attr_pasid_mask.attr, + &format_attr_domid_mask.attr, + NULL, +}; + +static struct attribute_group amd_iommu_format_group = { + .name = "format", + .attrs = iommu_format_attrs, +}; + +/*--------------------------------------------- + * sysfs events attributes + *---------------------------------------------*/ +struct amd_iommu_event_desc { + struct kobj_attribute attr; + const char *event; +}; + +static ssize_t _iommu_event_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct amd_iommu_event_desc *event = + container_of(attr, struct amd_iommu_event_desc, attr); + return sprintf(buf, "%s\n", event->event); +} + +#define AMD_IOMMU_EVENT_DESC(_name, _event) \ +{ \ + .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \ + .event = _event, \ +} + +static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = { + AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"), + AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"), + AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"), + AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"), + AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"), + AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"), + AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"), + AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"), + AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"), + AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"), + AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"), + AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"), + AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"), + AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"), + { /* end: all zeroes */ }, +}; + +/*--------------------------------------------- + * sysfs cpumask attributes + *---------------------------------------------*/ +static cpumask_t iommu_cpumask; + +static ssize_t _iommu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask); + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} +static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL); + +static struct attribute *iommu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group amd_iommu_cpumask_group = { + .attrs = iommu_cpumask_attrs, +}; + +/*---------------------------------------------*/ + +static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu) +{ + unsigned long flags; + int shift, bank, cntr, retval; + int max_banks = perf_iommu->max_banks; + int max_cntrs = perf_iommu->max_counters; + + raw_spin_lock_irqsave(&perf_iommu->lock, flags); + + for (bank = 0, shift = 0; bank < max_banks; bank++) { + for (cntr = 0; cntr < max_cntrs; cntr++) { + shift = bank + (bank*3) + cntr; + if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) { + continue; + } else { + perf_iommu->cntr_assign_mask |= (1ULL<<shift); + retval = ((u16)((u16)bank<<8) | (u8)(cntr)); + goto out; + } + } + } + retval = -ENOSPC; +out: + raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); + return retval; +} + +static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu, + u8 bank, u8 cntr) +{ + unsigned long flags; + int max_banks, max_cntrs; + int shift = 0; + + max_banks = perf_iommu->max_banks; + max_cntrs = perf_iommu->max_counters; + + if ((bank > max_banks) || (cntr > max_cntrs)) + return -EINVAL; + + shift = bank + cntr + (bank*3); + + raw_spin_lock_irqsave(&perf_iommu->lock, flags); + perf_iommu->cntr_assign_mask &= ~(1ULL<<shift); + raw_spin_unlock_irqrestore(&perf_iommu->lock, flags); + + return 0; +} + +static int perf_iommu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_amd_iommu *perf_iommu; + u64 config, config1; + + /* test the event attr type check for PMU enumeration */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* + * IOMMU counters are shared across all cores. + * Therefore, it does not support per-process mode. + * Also, it does not support event sampling mode. + */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + /* IOMMU counters do not have usr/os/guest/host bits */ + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + perf_iommu = &__perf_iommu; + + if (event->pmu != &perf_iommu->pmu) + return -ENOENT; + + if (perf_iommu) { + config = event->attr.config; + config1 = event->attr.config1; + } else { + return -EINVAL; + } + + /* integrate with iommu base devid (0000), assume one iommu */ + perf_iommu->max_banks = + amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID); + perf_iommu->max_counters = + amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID); + if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0)) + return -EINVAL; + + /* update the hw_perf_event struct with the iommu config data */ + hwc->config = config; + hwc->extra_reg.config = config1; + + return 0; +} + +static void perf_iommu_enable_event(struct perf_event *ev) +{ + u8 csource = _GET_CSOURCE(ev); + u16 devid = _GET_DEVID(ev); + u64 reg = 0ULL; + + reg = csource; + amd_iommu_pc_get_set_reg_val(devid, + _GET_BANK(ev), _GET_CNTR(ev) , + IOMMU_PC_COUNTER_SRC_REG, ®, true); + + reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32); + if (reg) + reg |= (1UL << 31); + amd_iommu_pc_get_set_reg_val(devid, + _GET_BANK(ev), _GET_CNTR(ev) , + IOMMU_PC_DEVID_MATCH_REG, ®, true); + + reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32); + if (reg) + reg |= (1UL << 31); + amd_iommu_pc_get_set_reg_val(devid, + _GET_BANK(ev), _GET_CNTR(ev) , + IOMMU_PC_PASID_MATCH_REG, ®, true); + + reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32); + if (reg) + reg |= (1UL << 31); + amd_iommu_pc_get_set_reg_val(devid, + _GET_BANK(ev), _GET_CNTR(ev) , + IOMMU_PC_DOMID_MATCH_REG, ®, true); +} + +static void perf_iommu_disable_event(struct perf_event *event) +{ + u64 reg = 0ULL; + + amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), + _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_SRC_REG, ®, true); +} + +static void perf_iommu_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + pr_debug("perf: amd_iommu:perf_iommu_start\n"); + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + if (flags & PERF_EF_RELOAD) { + u64 prev_raw_count = local64_read(&hwc->prev_count); + amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), + _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_REG, &prev_raw_count, true); + } + + perf_iommu_enable_event(event); + perf_event_update_userpage(event); + +} + +static void perf_iommu_read(struct perf_event *event) +{ + u64 count = 0ULL; + u64 prev_raw_count = 0ULL; + u64 delta = 0ULL; + struct hw_perf_event *hwc = &event->hw; + pr_debug("perf: amd_iommu:perf_iommu_read\n"); + + amd_iommu_pc_get_set_reg_val(_GET_DEVID(event), + _GET_BANK(event), _GET_CNTR(event), + IOMMU_PC_COUNTER_REG, &count, false); + + /* IOMMU pc counter register is only 48 bits */ + count &= 0xFFFFFFFFFFFFULL; + + prev_raw_count = local64_read(&hwc->prev_count); + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + count) != prev_raw_count) + return; + + /* Handling 48-bit counter overflowing */ + delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT); + delta >>= COUNTER_SHIFT; + local64_add(delta, &event->count); + +} + +static void perf_iommu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + u64 config; + + pr_debug("perf: amd_iommu:perf_iommu_stop\n"); + + if (hwc->state & PERF_HES_UPTODATE) + return; + + perf_iommu_disable_event(event); + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + if (hwc->state & PERF_HES_UPTODATE) + return; + + config = hwc->config; + perf_iommu_read(event); + hwc->state |= PERF_HES_UPTODATE; +} + +static int perf_iommu_add(struct perf_event *event, int flags) +{ + int retval; + struct perf_amd_iommu *perf_iommu = + container_of(event->pmu, struct perf_amd_iommu, pmu); + + pr_debug("perf: amd_iommu:perf_iommu_add\n"); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + /* request an iommu bank/counter */ + retval = get_next_avail_iommu_bnk_cntr(perf_iommu); + if (retval != -ENOSPC) + event->hw.extra_reg.reg = (u16)retval; + else + return retval; + + if (flags & PERF_EF_START) + perf_iommu_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void perf_iommu_del(struct perf_event *event, int flags) +{ + struct perf_amd_iommu *perf_iommu = + container_of(event->pmu, struct perf_amd_iommu, pmu); + + pr_debug("perf: amd_iommu:perf_iommu_del\n"); + perf_iommu_stop(event, PERF_EF_UPDATE); + + /* clear the assigned iommu bank/counter */ + clear_avail_iommu_bnk_cntr(perf_iommu, + _GET_BANK(event), + _GET_CNTR(event)); + + perf_event_update_userpage(event); +} + +static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu) +{ + struct attribute **attrs; + struct attribute_group *attr_group; + int i = 0, j; + + while (amd_iommu_v2_event_descs[i].attr.attr.name) + i++; + + attr_group = kzalloc(sizeof(struct attribute *) + * (i + 1) + sizeof(*attr_group), GFP_KERNEL); + if (!attr_group) + return -ENOMEM; + + attrs = (struct attribute **)(attr_group + 1); + for (j = 0; j < i; j++) + attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr; + + attr_group->name = "events"; + attr_group->attrs = attrs; + perf_iommu->events_group = attr_group; + + return 0; +} + +static __init void amd_iommu_pc_exit(void) +{ + if (__perf_iommu.events_group != NULL) { + kfree(__perf_iommu.events_group); + __perf_iommu.events_group = NULL; + } +} + +static __init int _init_perf_amd_iommu( + struct perf_amd_iommu *perf_iommu, char *name) +{ + int ret; + + raw_spin_lock_init(&perf_iommu->lock); + + /* Init format attributes */ + perf_iommu->format_group = &amd_iommu_format_group; + + /* Init cpumask attributes to only core 0 */ + cpumask_set_cpu(0, &iommu_cpumask); + perf_iommu->cpumask_group = &amd_iommu_cpumask_group; + + /* Init events attributes */ + if (_init_events_attrs(perf_iommu) != 0) + pr_err("perf: amd_iommu: Only support raw events.\n"); + + /* Init null attributes */ + perf_iommu->null_group = NULL; + perf_iommu->pmu.attr_groups = perf_iommu->attr_groups; + + ret = perf_pmu_register(&perf_iommu->pmu, name, -1); + if (ret) { + pr_err("perf: amd_iommu: Failed to initialized.\n"); + amd_iommu_pc_exit(); + } else { + pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n", + amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID), + amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID)); + } + + return ret; +} + +static struct perf_amd_iommu __perf_iommu = { + .pmu = { + .event_init = perf_iommu_event_init, + .add = perf_iommu_add, + .del = perf_iommu_del, + .start = perf_iommu_start, + .stop = perf_iommu_stop, + .read = perf_iommu_read, + }, + .max_banks = 0x00, + .max_counters = 0x00, + .cntr_assign_mask = 0ULL, + .format_group = NULL, + .cpumask_group = NULL, + .events_group = NULL, + .null_group = NULL, +}; + +static __init int amd_iommu_pc_init(void) +{ + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_supported()) { + pr_err("perf: amd_iommu PMU not installed. No support!\n"); + return -ENODEV; + } + + _init_perf_amd_iommu(&__perf_iommu, "amd_iommu"); + + return 0; +} + +device_initcall(amd_iommu_pc_init); diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h new file mode 100644 index 0000000..845d173 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Steven Kinney <Steven.Kinney@amd.com> + * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _PERF_EVENT_AMD_IOMMU_H_ +#define _PERF_EVENT_AMD_IOMMU_H_ + +/* iommu pc mmio region register indexes */ +#define IOMMU_PC_COUNTER_REG 0x00 +#define IOMMU_PC_COUNTER_SRC_REG 0x08 +#define IOMMU_PC_PASID_MATCH_REG 0x10 +#define IOMMU_PC_DOMID_MATCH_REG 0x18 +#define IOMMU_PC_DEVID_MATCH_REG 0x20 +#define IOMMU_PC_COUNTER_REPORT_REG 0x28 + +/* maximun specified bank/counters */ +#define PC_MAX_SPEC_BNKS 64 +#define PC_MAX_SPEC_CNTRS 16 + +/* iommu pc reg masks*/ +#define IOMMU_BASE_DEVID 0x0000 + +/* amd_iommu_init.c external support functions */ +extern bool amd_iommu_pc_supported(void); + +extern u8 amd_iommu_pc_get_max_banks(u16 devid); + +extern u8 amd_iommu_pc_get_max_counters(u16 devid); + +extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); + +#endif /*_PERF_EVENT_AMD_IOMMU_H_*/ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a9e2207..fbc9210 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/export.h> +#include <asm/cpufeature.h> #include <asm/hardirq.h> #include <asm/apic.h> @@ -190,6 +191,22 @@ struct attribute *snb_events_attrs[] = { NULL, }; +static struct event_constraint intel_hsw_event_constraints[] = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + INTEL_EVENT_CONSTRAINT(0x08a3, 0x4), + /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ + INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4), + /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ + INTEL_EVENT_CONSTRAINT(0x04a3, 0xf), + EVENT_CONSTRAINT_END +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -872,7 +889,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) return true; /* implicit branch sampling to correct PEBS skid */ - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) + if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 && + x86_pmu.intel_cap.pebs_format < 2) return true; return false; @@ -1167,15 +1185,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); /* - * Some chipsets need to unmask the LVTPC in a particular spot - * inside the nmi handler. As a result, the unmasking was pushed - * into all the nmi handlers. - * - * This handler doesn't seem to have any issues with the unmasking - * so it was left at the top. + * No known reason to not always do late ACK, + * but just in case do it opt-in. */ - apic_write(APIC_LVTPC, APIC_DM_NMI); - + if (!x86_pmu.late_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); status = intel_pmu_get_status(); @@ -1188,8 +1202,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) again: intel_pmu_ack_status(status); if (++loops > 100) { - WARN_ONCE(1, "perfevents: irq loop stuck!\n"); - perf_event_print_debug(); + static bool warned = false; + if (!warned) { + WARN(1, "perfevents: irq loop stuck!\n"); + perf_event_print_debug(); + warned = true; + } intel_pmu_reset(); goto done; } @@ -1235,6 +1253,13 @@ again: done: intel_pmu_enable_all(0); + /* + * Only unmask the NMI after the overflow counters + * have been reset. This avoids spurious NMIs on + * Haswell CPUs. + */ + if (x86_pmu.late_ack) + apic_write(APIC_LVTPC, APIC_DM_NMI); return handled; } @@ -1425,7 +1450,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { if ((event->hw.config & c->cmask) == c->code) { - /* hw.flags zeroed at initialization */ event->hw.flags |= c->flags; return c; } @@ -1473,7 +1497,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, static void intel_put_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - event->hw.flags = 0; intel_put_shared_regs_event_constraints(cpuc, event); } @@ -1646,6 +1669,47 @@ static void core_pmu_enable_all(int added) } } +static int hsw_hw_config(struct perf_event *event) +{ + int ret = intel_pmu_hw_config(event); + + if (ret) + return ret; + if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE)) + return 0; + event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED); + + /* + * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with + * PEBS or in ANY thread mode. Since the results are non-sensical forbid + * this combination. + */ + if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) && + ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) || + event->attr.precise_ip > 0)) + return -EOPNOTSUPP; + + return 0; +} + +static struct event_constraint counter2_constraint = + EVENT_CONSTRAINT(0, 0x4, 0); + +static struct event_constraint * +hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct event_constraint *c = intel_get_event_constraints(cpuc, event); + + /* Handle special quirk on in_tx_checkpointed only in counter 2 */ + if (event->hw.config & HSW_IN_TX_CHECKPOINTED) { + if (c->idxmsk64 & (1U << 2)) + return &counter2_constraint; + return &emptyconstraint; + } + + return c; +} + PMU_FORMAT_ATTR(event, "config:0-7" ); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); @@ -1653,6 +1717,8 @@ PMU_FORMAT_ATTR(pc, "config:19" ); PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */ PMU_FORMAT_ATTR(inv, "config:23" ); PMU_FORMAT_ATTR(cmask, "config:24-31" ); +PMU_FORMAT_ATTR(in_tx, "config:32"); +PMU_FORMAT_ATTR(in_tx_cp, "config:33"); static struct attribute *intel_arch_formats_attr[] = { &format_attr_event.attr, @@ -1807,6 +1873,8 @@ static struct attribute *intel_arch3_formats_attr[] = { &format_attr_any.attr, &format_attr_inv.attr, &format_attr_cmask.attr, + &format_attr_in_tx.attr, + &format_attr_in_tx_cp.attr, &format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */ &format_attr_ldlat.attr, /* PEBS load latency */ @@ -1966,6 +2034,15 @@ static __init void intel_nehalem_quirk(void) } } +EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3"); +EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82") + +static struct attribute *hsw_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_hsw), + NULL +}; + __init int intel_pmu_init(void) { union cpuid10_edx edx; @@ -2189,6 +2266,30 @@ __init int intel_pmu_init(void) break; + case 60: /* Haswell Client */ + case 70: + case 71: + case 63: + x86_pmu.late_ack = true; + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + + intel_pmu_lbr_init_snb(); + + x86_pmu.event_constraints = intel_hsw_event_constraints; + x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints; + x86_pmu.extra_regs = intel_snb_extra_regs; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; + /* all extra regs are per-cpu when HT is on */ + x86_pmu.er_flags |= ERF_HAS_RSP_1; + x86_pmu.er_flags |= ERF_NO_HT_SHARING; + + x86_pmu.hw_config = hsw_hw_config; + x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.cpu_events = hsw_events_attrs; + pr_cont("Haswell events, "); + break; + default: switch (x86_pmu.version) { case 1: @@ -2227,7 +2328,7 @@ __init int intel_pmu_init(void) * counter, so do not extend mask to generic counters */ for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != X86_RAW_EVENT_MASK + if (c->cmask != FIXED_EVENT_FLAGS || c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) { continue; } @@ -2237,5 +2338,12 @@ __init int intel_pmu_init(void) } } + /* Support full width counters using alternative MSR range */ + if (x86_pmu.intel_cap.full_width_write) { + x86_pmu.max_period = x86_pmu.cntval_mask; + x86_pmu.perfctr = MSR_IA32_PMC0; + pr_cont("full-width counters, "); + } + return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 60250f6..3065c57 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status) return val; } +static u64 precise_store_data_hsw(u64 status) +{ + union perf_mem_data_src dse; + + dse.val = 0; + dse.mem_op = PERF_MEM_OP_STORE; + dse.mem_lvl = PERF_MEM_LVL_NA; + if (status & 1) + dse.mem_lvl = PERF_MEM_LVL_L1; + /* Nothing else supported. Sorry. */ + return dse.val; +} + static u64 load_latency_data(u64 status) { union intel_x86_pebs_dse dse; @@ -165,6 +178,22 @@ struct pebs_record_nhm { u64 status, dla, dse, lat; }; +/* + * Same as pebs_record_nhm, with two additional fields. + */ +struct pebs_record_hsw { + struct pebs_record_nhm nhm; + /* + * Real IP of the event. In the Intel documentation this + * is called eventingrip. + */ + u64 real_ip; + /* + * TSX tuning information field: abort cycles and abort flags. + */ + u64 tsx_tuning; +}; + void init_debug_store_on_cpu(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_hsw_pebs_event_constraints[] = { + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ + INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ + INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */ + INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */ + /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), + /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), + INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), + INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ + INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ + INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */ + /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ + INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf), + /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ + INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf), + /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ + INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf), + /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */ + INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */ + INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */ + + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; @@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; cpuc->pebs_enabled &= ~(1ULL << hwc->idx); + + if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT) + cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); + else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST) + cpuc->pebs_enabled &= ~(1ULL << 63); + if (cpuc->enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); @@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, */ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct pebs_record_nhm *pebs = __pebs; + struct pebs_record_hsw *pebs_hsw = __pebs; struct perf_sample_data data; struct pt_regs regs; u64 sample_type; @@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, return; fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; - fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; + fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST | + PERF_X86_EVENT_PEBS_ST_HSW); perf_sample_data_init(&data, 0, event->hw.last_period); @@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * if PEBS-LL or PreciseStore */ if (fll || fst) { - if (sample_type & PERF_SAMPLE_ADDR) - data.addr = pebs->dla; - /* * Use latency for weight (only avail with PEBS-LL) */ @@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) { if (fll) data.data_src.val = load_latency_data(pebs->dse); + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + data.data_src.val = + precise_store_data_hsw(pebs->dse); else data.data_src.val = precise_store_data(pebs->dse); } @@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.bp = pebs->bp; regs.sp = pebs->sp; - if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) + if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { + regs.ip = pebs_hsw->real_ip; + regs.flags |= PERF_EFLAGS_EXACT; + } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) regs.flags |= PERF_EFLAGS_EXACT; else regs.flags &= ~PERF_EFLAGS_EXACT; + if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && + x86_pmu.intel_cap.pebs_format >= 1) + data.addr = pebs->dla; + if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; @@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) __intel_pmu_pebs_event(event, iregs, at); } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, + void *top) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; - struct pebs_record_nhm *at, *top; struct perf_event *event = NULL; u64 status = 0; - int bit, n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; + int bit; ds->pebs_index = ds->pebs_buffer_base; - n = top - at; - if (n <= 0) - return; - - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n); + for (; at < top; at += x86_pmu.pebs_record_size) { + struct pebs_record_nhm *p = at; - for ( ; at < top; at++) { - for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { + for_each_set_bit(bit, (unsigned long *)&p->status, + x86_pmu.max_pebs_events) { event = cpuc->events[bit]; if (!test_bit(bit, cpuc->active_mask)) continue; @@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) } } +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct pebs_record_nhm *at, *top; + int n; + + if (!x86_pmu.pebs_active) + return; + + at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; + + ds->pebs_index = ds->pebs_buffer_base; + + n = top - at; + if (n <= 0) + return; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ONCE(n > x86_pmu.max_pebs_events, + "Unexpected number of pebs records %d\n", n); + + return __intel_pmu_drain_pebs_nhm(iregs, at, top); +} + +static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct pebs_record_hsw *at, *top; + int n; + + if (!x86_pmu.pebs_active) + return; + + at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; + + n = top - at; + if (n <= 0) + return; + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ONCE(n > x86_pmu.max_pebs_events, + "Unexpected number of pebs records %d\n", n); + + return __intel_pmu_drain_pebs_nhm(iregs, at, top); +} + /* * BTS, PEBS probe and setup */ @@ -888,6 +1010,12 @@ void intel_ds_init(void) x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; break; + case 2: + pr_cont("PEBS fmt2%c, ", pebs_type); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; + break; + default: printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index d978353..d5be06a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -12,6 +12,16 @@ enum { LBR_FORMAT_LIP = 0x01, LBR_FORMAT_EIP = 0x02, LBR_FORMAT_EIP_FLAGS = 0x03, + LBR_FORMAT_EIP_FLAGS2 = 0x04, + LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2, +}; + +static enum { + LBR_EIP_FLAGS = 1, + LBR_TSX = 2, +} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { + [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, + [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, }; /* @@ -56,6 +66,8 @@ enum { LBR_FAR) #define LBR_FROM_FLAG_MISPRED (1ULL << 63) +#define LBR_FROM_FLAG_IN_TX (1ULL << 62) +#define LBR_FROM_FLAG_ABORT (1ULL << 61) #define for_each_branch_sample_type(x) \ for ((x) = PERF_SAMPLE_BRANCH_USER; \ @@ -81,9 +93,13 @@ enum { X86_BR_JMP = 1 << 9, /* jump */ X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ X86_BR_IND_CALL = 1 << 11,/* indirect calls */ + X86_BR_ABORT = 1 << 12,/* transaction abort */ + X86_BR_IN_TX = 1 << 13,/* in transaction */ + X86_BR_NO_TX = 1 << 14,/* not in transaction */ }; #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) +#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX) #define X86_BR_ANY \ (X86_BR_CALL |\ @@ -95,6 +111,7 @@ enum { X86_BR_JCC |\ X86_BR_JMP |\ X86_BR_IRQ |\ + X86_BR_ABORT |\ X86_BR_IND_CALL) #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) @@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; - u64 from, to, mis = 0, pred = 0; + u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; + int skip = 0; + int lbr_flags = lbr_desc[lbr_format]; rdmsrl(x86_pmu.lbr_from + lbr_idx, from); rdmsrl(x86_pmu.lbr_to + lbr_idx, to); - if (lbr_format == LBR_FORMAT_EIP_FLAGS) { + if (lbr_flags & LBR_EIP_FLAGS) { mis = !!(from & LBR_FROM_FLAG_MISPRED); pred = !mis; - from = (u64)((((s64)from) << 1) >> 1); + skip = 1; + } + if (lbr_flags & LBR_TSX) { + in_tx = !!(from & LBR_FROM_FLAG_IN_TX); + abort = !!(from & LBR_FROM_FLAG_ABORT); + skip = 3; } + from = (u64)((((s64)from) << skip) >> skip); cpuc->lbr_entries[i].from = from; cpuc->lbr_entries[i].to = to; cpuc->lbr_entries[i].mispred = mis; cpuc->lbr_entries[i].predicted = pred; + cpuc->lbr_entries[i].in_tx = in_tx; + cpuc->lbr_entries[i].abort = abort; cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; @@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void) * - in case there is no HW filter * - in case the HW filter has errata or limitations */ -static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) { u64 br_type = event->attr.branch_sample_type; int mask = 0; @@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_USER) mask |= X86_BR_USER; - if (br_type & PERF_SAMPLE_BRANCH_KERNEL) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) mask |= X86_BR_KERNEL; - } /* we ignore BRANCH_HV here */ @@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) mask |= X86_BR_IND_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX) + mask |= X86_BR_ABORT; + + if (br_type & PERF_SAMPLE_BRANCH_IN_TX) + mask |= X86_BR_IN_TX; + + if (br_type & PERF_SAMPLE_BRANCH_NO_TX) + mask |= X86_BR_NO_TX; + /* * stash actual user request into reg, it may * be used by fixup code for some CPU */ event->hw.branch_reg.reg = mask; - - return 0; } /* @@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) /* * setup SW LBR filter */ - ret = intel_pmu_setup_sw_lbr_filter(event); - if (ret) - return ret; + intel_pmu_setup_sw_lbr_filter(event); /* * setup HW LBR filter, if any @@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event) * decoded (e.g., text page not present), then X86_BR_NONE is * returned. */ -static int branch_type(unsigned long from, unsigned long to) +static int branch_type(unsigned long from, unsigned long to, int abort) { struct insn insn; void *addr; @@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to) if (from == 0 || to == 0) return X86_BR_NONE; + if (abort) + return X86_BR_ABORT | to_plm; + if (from_plm == X86_BR_USER) { /* * can happen if measuring at the user level only @@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) from = cpuc->lbr_entries[i].from; to = cpuc->lbr_entries[i].to; - type = branch_type(from, to); + type = branch_type(from, to, cpuc->lbr_entries[i].abort); + if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) { + if (cpuc->lbr_entries[i].in_tx) + type |= X86_BR_IN_TX; + else + type |= X86_BR_NO_TX; + } /* if type does not correspond, then discard */ if (type == X86_BR_NONE || (br_sel & type) != type) { diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 52441a2..9dd9975 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -536,7 +536,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve if (!uncore_box_is_fake(box)) reg1->alloc |= alloc; - return 0; + return NULL; fail: for (; i >= 0; i--) { if (alloc & (0x1 << i)) @@ -644,7 +644,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event) (!uncore_box_is_fake(box) && reg1->alloc)) return NULL; again: - mask = 0xff << (idx * 8); + mask = 0xffULL << (idx * 8); raw_spin_lock_irqsave(&er->lock, flags); if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) || !((config1 ^ er->config) & mask)) { @@ -1923,7 +1923,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif { struct hw_perf_event *hwc = &event->hw; struct hw_perf_event_extra *reg1 = &hwc->extra_reg; - int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); + u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8); u64 config = reg1->config; /* get the non-shared control bits and shift them */ @@ -2723,15 +2723,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n) { unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)]; - struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX]; + struct event_constraint *c; int i, wmin, wmax, ret = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX); for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { + hwc = &box->event_list[i]->hw; c = uncore_get_event_constraint(box, box->event_list[i]); - constraints[i] = c; + hwc->constraint = c; wmin = min(wmin, c->weight); wmax = max(wmax, c->weight); } @@ -2739,7 +2740,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int /* fastpath, try to reuse previous register */ for (i = 0; i < n; i++) { hwc = &box->event_list[i]->hw; - c = constraints[i]; + c = hwc->constraint; /* never assigned */ if (hwc->idx == -1) @@ -2759,7 +2760,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int } /* slow path */ if (i != n) - ret = perf_assign_events(constraints, n, wmin, wmax, assign); + ret = perf_assign_events(box->event_list, n, + wmin, wmax, assign); if (!assign || ret) { for (i = 0; i < n; i++) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index f952891..47b3d00 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -337,10 +337,10 @@ NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK) #define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23)) -#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n))) +#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n))) #define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24)) -#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n))) +#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n))) /* * use the 9~13 bits to select event If the 7th bit is not set, diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 37a198b..aee6317 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -37,8 +37,8 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no", static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no", static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no", - c->hard_math ? "yes" : "no", - c->hard_math ? "yes" : "no", + static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no", + static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no", c->cpuid_level, c->wp_works_ok ? "yes" : "no"); } diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault.c index 155a13f..5d3fe8d 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault.c @@ -9,6 +9,8 @@ #include <asm/processor.h> #include <asm/desc.h> +#ifdef CONFIG_X86_32 + #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; #define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) @@ -67,3 +69,16 @@ struct tss_struct doublefault_tss __cacheline_aligned = { .__cr3 = __pa_nodebug(swapper_pg_dir), } }; + +/* dummy for do_double_fault() call */ +void df_debug(struct pt_regs *regs, long error_code) {} + +#else /* !CONFIG_X86_32 */ + +void df_debug(struct pt_regs *regs, long error_code) +{ + pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code); + show_regs(regs); + panic("Machine halted."); +} +#endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7272089..5fe1fb2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -365,7 +365,7 @@ ENDPROC(native_usergs_sysret64) /*CFI_REL_OFFSET ss,0*/ pushq_cfi %rax /* rsp */ CFI_REL_OFFSET rsp,0 - pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ + pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */ /*CFI_REL_OFFSET rflags,0*/ pushq_cfi $__KERNEL_CS /* cs */ /*CFI_REL_OFFSET cs,0*/ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 73afd11..e65ddc6 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -444,7 +444,6 @@ is486: orl %ecx,%eax movl %eax,%cr0 - call check_x87 lgdt early_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f @@ -467,26 +466,6 @@ is486: pushl $0 # fake return address for unwinder jmp *(initial_code) -/* - * We depend on ET to be correct. This checks for 287/387. - */ -check_x87: - movb $0,X86_HARD_MATH - clts - fninit - fstsw %ax - cmpb $0,%al - je 1f - movl %cr0,%eax /* no coprocessor: have to set bits */ - xorl $4,%eax /* set EM */ - movl %eax,%cr0 - ret - ALIGN -1: movb $1,X86_HARD_MATH - .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */ - ret - - #include "verify_cpu.S" /* diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index cb33909..b627746 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -131,7 +131,7 @@ static void __cpuinit init_thread_xstate(void) * xsave_init(). */ - if (!HAVE_HWFP) { + if (!cpu_has_fpu) { /* * Disable xsave as we do not support it if i387 * emulation is enabled. @@ -158,6 +158,14 @@ void __cpuinit fpu_init(void) unsigned long cr0; unsigned long cr4_mask = 0; +#ifndef CONFIG_MATH_EMULATION + if (!cpu_has_fpu) { + pr_emerg("No FPU found and no math emulation present\n"); + pr_emerg("Giving up\n"); + for (;;) + asm volatile("hlt"); + } +#endif if (cpu_has_fxsr) cr4_mask |= X86_CR4_OSFXSR; if (cpu_has_xmm) @@ -167,7 +175,7 @@ void __cpuinit fpu_init(void) cr0 = read_cr0(); cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ - if (!HAVE_HWFP) + if (!cpu_has_fpu) cr0 |= X86_CR0_EM; write_cr0(cr0); @@ -185,7 +193,7 @@ void __cpuinit fpu_init(void) void fpu_finit(struct fpu *fpu) { - if (!HAVE_HWFP) { + if (!cpu_has_fpu) { finit_soft_fpu(&fpu->state->soft); return; } @@ -214,7 +222,7 @@ int init_fpu(struct task_struct *tsk) int ret; if (tsk_used_math(tsk)) { - if (HAVE_HWFP && tsk == current) + if (cpu_has_fpu && tsk == current) unlazy_fpu(tsk); tsk->thread.fpu.last_cpu = ~0; return 0; @@ -511,14 +519,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - if (!HAVE_HWFP) + if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - if (!cpu_has_fxsr) { + if (!cpu_has_fxsr) return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fpu.state->fsave, 0, -1); - } sanitize_i387_state(target); @@ -545,13 +552,13 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, sanitize_i387_state(target); - if (!HAVE_HWFP) + if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - if (!cpu_has_fxsr) { + if (!cpu_has_fxsr) return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, -1); - } + &target->thread.fpu.state->fsave, 0, + -1); if (pos > 0 || count < sizeof(env)) convert_from_fxsr(&env, target); @@ -592,3 +599,33 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) EXPORT_SYMBOL(dump_fpu); #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ + +static int __init no_387(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FPU); + return 1; +} + +__setup("no387", no_387); + +void __cpuinit fpu_detect(struct cpuinfo_x86 *c) +{ + unsigned long cr0; + u16 fsw, fcw; + + fsw = fcw = 0xffff; + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS | X86_CR0_EM); + write_cr0(cr0); + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); + + /* The final cr0 value is set in fpu_init() */ +} diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 9895a9a..211bce4 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -365,10 +365,14 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src) return insn.length; } -static void __kprobes arch_copy_kprobe(struct kprobe *p) +static int __kprobes arch_copy_kprobe(struct kprobe *p) { + int ret; + /* Copy an instruction with recovering if other optprobe modifies it.*/ - __copy_instruction(p->ainsn.insn, p->addr); + ret = __copy_instruction(p->ainsn.insn, p->addr); + if (!ret) + return -EINVAL; /* * __copy_instruction can modify the displacement of the instruction, @@ -384,6 +388,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) /* Also, displacement change doesn't affect the first byte */ p->opcode = p->ainsn.insn[0]; + + return 0; } int __kprobes arch_prepare_kprobe(struct kprobe *p) @@ -397,8 +403,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) return -ENOMEM; - arch_copy_kprobe(p); - return 0; + + return arch_copy_kprobe(p); } void __kprobes arch_arm_kprobe(struct kprobe *p) diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index efdec7c..47ebb1d 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -31,48 +31,12 @@ #include <asm/microcode.h> #include <asm/processor.h> #include <asm/msr.h> +#include <asm/microcode_amd.h> MODULE_DESCRIPTION("AMD Microcode Update Driver"); MODULE_AUTHOR("Peter Oruba"); MODULE_LICENSE("GPL v2"); -#define UCODE_MAGIC 0x00414d44 -#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 -#define UCODE_UCODE_TYPE 0x00000001 - -struct equiv_cpu_entry { - u32 installed_cpu; - u32 fixed_errata_mask; - u32 fixed_errata_compare; - u16 equiv_cpu; - u16 res; -} __attribute__((packed)); - -struct microcode_header_amd { - u32 data_code; - u32 patch_id; - u16 mc_patch_data_id; - u8 mc_patch_data_len; - u8 init_flag; - u32 mc_patch_data_checksum; - u32 nb_dev_id; - u32 sb_dev_id; - u16 processor_rev_id; - u8 nb_rev_id; - u8 sb_rev_id; - u8 bios_api_rev; - u8 reserved1[3]; - u32 match_reg[8]; -} __attribute__((packed)); - -struct microcode_amd { - struct microcode_header_amd hdr; - unsigned int mpb[0]; -}; - -#define SECTION_HDR_SIZE 8 -#define CONTAINER_HDR_SZ 12 - static struct equiv_cpu_entry *equiv_cpu_table; struct ucode_patch { @@ -84,21 +48,10 @@ struct ucode_patch { static LIST_HEAD(pcache); -static u16 find_equiv_id(unsigned int cpu) +static u16 __find_equiv_id(unsigned int cpu) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - int i = 0; - - if (!equiv_cpu_table) - return 0; - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; - } - return 0; + return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig); } static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu) @@ -163,7 +116,7 @@ static struct ucode_patch *find_patch(unsigned int cpu) { u16 equiv_id; - equiv_id = find_equiv_id(cpu); + equiv_id = __find_equiv_id(cpu); if (!equiv_id) return NULL; @@ -173,9 +126,20 @@ static struct ucode_patch *find_patch(unsigned int cpu) static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct ucode_patch *p; csig->sig = cpuid_eax(0x00000001); csig->rev = c->microcode; + + /* + * a patch could have been loaded early, set uci->mc so that + * mc_bp_resume() can call apply_microcode() + */ + p = find_patch(cpu); + if (p && (p->patch_id == csig->rev)) + uci->mc = p->data; + pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); return 0; @@ -215,7 +179,21 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size, return patch_size; } -static int apply_microcode_amd(int cpu) +int __apply_microcode_amd(struct microcode_amd *mc_amd) +{ + u32 rev, dummy; + + wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); + + /* verify patch application was successful */ + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (rev != mc_amd->hdr.patch_id) + return -1; + + return 0; +} + +int apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; @@ -242,19 +220,15 @@ static int apply_microcode_amd(int cpu) return 0; } - wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); - - /* verify patch application was successful */ - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (rev != mc_amd->hdr.patch_id) { + if (__apply_microcode_amd(mc_amd)) pr_err("CPU%d: update failed for patch_level=0x%08x\n", - cpu, mc_amd->hdr.patch_id); - return -1; - } + cpu, mc_amd->hdr.patch_id); + else + pr_info("CPU%d: new patch_level=0x%08x\n", cpu, + mc_amd->hdr.patch_id); - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); - uci->cpu_sig.rev = rev; - c->microcode = rev; + uci->cpu_sig.rev = mc_amd->hdr.patch_id; + c->microcode = mc_amd->hdr.patch_id; return 0; } @@ -364,7 +338,7 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover) return crnt_size; } -static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) +static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t size) { enum ucode_state ret = UCODE_ERROR; unsigned int leftover; @@ -398,6 +372,32 @@ static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) return UCODE_OK; } +enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size) +{ + enum ucode_state ret; + + /* free old equiv table */ + free_equiv_cpu_table(); + + ret = __load_microcode_amd(cpu, data, size); + + if (ret != UCODE_OK) + cleanup(); + +#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32) + /* save BSP's matching patch for early load */ + if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) { + struct ucode_patch *p = find_patch(cpu); + if (p) { + memset(amd_bsp_mpb, 0, MPB_MAX_SIZE); + memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data), + MPB_MAX_SIZE)); + } + } +#endif + return ret; +} + /* * AMD microcode firmware naming convention, up to family 15h they are in * the legacy file: @@ -440,12 +440,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, goto fw_release; } - /* free old equiv table */ - free_equiv_cpu_table(); - ret = load_microcode_amd(cpu, fw->data, fw->size); - if (ret != UCODE_OK) - cleanup(); fw_release: release_firmware(fw); diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c new file mode 100644 index 0000000..1ac6e9a --- /dev/null +++ b/arch/x86/kernel/microcode_amd_early.c @@ -0,0 +1,302 @@ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Jacob Shin <jacob.shin@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/earlycpio.h> +#include <linux/initrd.h> + +#include <asm/cpu.h> +#include <asm/setup.h> +#include <asm/microcode_amd.h> + +static bool ucode_loaded; +static u32 ucode_new_rev; +static unsigned long ucode_offset; +static size_t ucode_size; + +/* + * Microcode patch container file is prepended to the initrd in cpio format. + * See Documentation/x86/early-microcode.txt + */ +static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin"; + +static struct cpio_data __init find_ucode_in_initrd(void) +{ + long offset = 0; + char *path; + void *start; + size_t size; + unsigned long *uoffset; + size_t *usize; + struct cpio_data cd; + +#ifdef CONFIG_X86_32 + struct boot_params *p; + + /* + * On 32-bit, early load occurs before paging is turned on so we need + * to use physical addresses. + */ + p = (struct boot_params *)__pa_nodebug(&boot_params); + path = (char *)__pa_nodebug(ucode_path); + start = (void *)p->hdr.ramdisk_image; + size = p->hdr.ramdisk_size; + uoffset = (unsigned long *)__pa_nodebug(&ucode_offset); + usize = (size_t *)__pa_nodebug(&ucode_size); +#else + path = ucode_path; + start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); + size = boot_params.hdr.ramdisk_size; + uoffset = &ucode_offset; + usize = &ucode_size; +#endif + + cd = find_cpio_data(path, start, size, &offset); + if (!cd.data) + return cd; + + if (*(u32 *)cd.data != UCODE_MAGIC) { + cd.data = NULL; + cd.size = 0; + return cd; + } + + *uoffset = (u8 *)cd.data - (u8 *)start; + *usize = cd.size; + + return cd; +} + +/* + * Early load occurs before we can vmalloc(). So we look for the microcode + * patch container file in initrd, traverse equivalent cpu table, look for a + * matching microcode patch, and update, all in initrd memory in place. + * When vmalloc() is available for use later -- on 64-bit during first AP load, + * and on 32-bit during save_microcode_in_initrd_amd() -- we can call + * load_microcode_amd() to save equivalent cpu table and microcode patches in + * kernel heap memory. + */ +static void __cpuinit apply_ucode_in_initrd(void *ucode, size_t size) +{ + struct equiv_cpu_entry *eq; + u32 *header; + u8 *data; + u16 eq_id = 0; + int offset, left; + u32 rev, eax; + u32 *new_rev; + unsigned long *uoffset; + size_t *usize; + +#ifdef CONFIG_X86_32 + new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); + uoffset = (unsigned long *)__pa_nodebug(&ucode_offset); + usize = (size_t *)__pa_nodebug(&ucode_size); +#else + new_rev = &ucode_new_rev; + uoffset = &ucode_offset; + usize = &ucode_size; +#endif + + data = ucode; + left = size; + header = (u32 *)data; + + /* find equiv cpu table */ + + if (header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ + header[2] == 0) /* size */ + return; + + eax = cpuid_eax(0x00000001); + + while (left > 0) { + eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); + + offset = header[2] + CONTAINER_HDR_SZ; + data += offset; + left -= offset; + + eq_id = find_equiv_id(eq, eax); + if (eq_id) + break; + + /* + * support multiple container files appended together. if this + * one does not have a matching equivalent cpu entry, we fast + * forward to the next container file. + */ + while (left > 0) { + header = (u32 *)data; + if (header[0] == UCODE_MAGIC && + header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) + break; + + offset = header[1] + SECTION_HDR_SIZE; + data += offset; + left -= offset; + } + + /* mark where the next microcode container file starts */ + offset = data - (u8 *)ucode; + *uoffset += offset; + *usize -= offset; + ucode = data; + } + + if (!eq_id) { + *usize = 0; + return; + } + + /* find ucode and update if needed */ + + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); + + while (left > 0) { + struct microcode_amd *mc; + + header = (u32 *)data; + if (header[0] != UCODE_UCODE_TYPE || /* type */ + header[1] == 0) /* size */ + break; + + mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); + if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) + if (__apply_microcode_amd(mc) == 0) { + rev = mc->hdr.patch_id; + *new_rev = rev; + } + + offset = header[1] + SECTION_HDR_SIZE; + data += offset; + left -= offset; + } + + /* mark where this microcode container file ends */ + offset = *usize - (data - (u8 *)ucode); + *usize -= offset; + + if (!(*new_rev)) + *usize = 0; +} + +void __init load_ucode_amd_bsp(void) +{ + struct cpio_data cd = find_ucode_in_initrd(); + if (!cd.data) + return; + + apply_ucode_in_initrd(cd.data, cd.size); +} + +#ifdef CONFIG_X86_32 +u8 amd_bsp_mpb[MPB_MAX_SIZE]; + +/* + * On 32-bit, since AP's early load occurs before paging is turned on, we + * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during + * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During + * save_microcode_in_initrd_amd() BSP's patch is copied to amd_bsp_mpb, which + * is used upon resume from suspend. + */ +void __cpuinit load_ucode_amd_ap(void) +{ + struct microcode_amd *mc; + unsigned long *initrd; + unsigned long *uoffset; + size_t *usize; + void *ucode; + + mc = (struct microcode_amd *)__pa(amd_bsp_mpb); + if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { + __apply_microcode_amd(mc); + return; + } + + initrd = (unsigned long *)__pa(&initrd_start); + uoffset = (unsigned long *)__pa(&ucode_offset); + usize = (size_t *)__pa(&ucode_size); + + if (!*usize || !*initrd) + return; + + ucode = (void *)((unsigned long)__pa(*initrd) + *uoffset); + apply_ucode_in_initrd(ucode, *usize); +} + +static void __init collect_cpu_sig_on_bsp(void *arg) +{ + unsigned int cpu = smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + uci->cpu_sig.sig = cpuid_eax(0x00000001); +} +#else +static void __cpuinit collect_cpu_info_amd_early(struct cpuinfo_x86 *c, + struct ucode_cpu_info *uci) +{ + u32 rev, eax; + + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); + eax = cpuid_eax(0x00000001); + + uci->cpu_sig.sig = eax; + uci->cpu_sig.rev = rev; + c->microcode = rev; + c->x86 = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); +} + +void __cpuinit load_ucode_amd_ap(void) +{ + unsigned int cpu = smp_processor_id(); + + collect_cpu_info_amd_early(&cpu_data(cpu), ucode_cpu_info + cpu); + + if (cpu && !ucode_loaded) { + void *ucode; + + if (!ucode_size || !initrd_start) + return; + + ucode = (void *)(initrd_start + ucode_offset); + if (load_microcode_amd(0, ucode, ucode_size) != UCODE_OK) + return; + ucode_loaded = true; + } + + apply_microcode_amd(cpu); +} +#endif + +int __init save_microcode_in_initrd_amd(void) +{ + enum ucode_state ret; + void *ucode; +#ifdef CONFIG_X86_32 + unsigned int bsp = boot_cpu_data.cpu_index; + struct ucode_cpu_info *uci = ucode_cpu_info + bsp; + + if (!uci->cpu_sig.sig) + smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); +#endif + if (ucode_new_rev) + pr_info("microcode: updated early to new patch_level=0x%08x\n", + ucode_new_rev); + + if (ucode_loaded || !ucode_size || !initrd_start) + return 0; + + ucode = (void *)(initrd_start + ucode_offset); + ret = load_microcode_amd(0, ucode, ucode_size); + if (ret != UCODE_OK) + return -EINVAL; + + ucode_loaded = true; + return 0; +} diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c index 833d51d..86119f6 100644 --- a/arch/x86/kernel/microcode_core_early.c +++ b/arch/x86/kernel/microcode_core_early.c @@ -18,6 +18,7 @@ */ #include <linux/module.h> #include <asm/microcode_intel.h> +#include <asm/microcode_amd.h> #include <asm/processor.h> #define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) @@ -81,8 +82,18 @@ void __init load_ucode_bsp(void) vendor = x86_vendor(); x86 = x86_family(); - if (vendor == X86_VENDOR_INTEL && x86 >= 6) - load_ucode_intel_bsp(); + switch (vendor) { + case X86_VENDOR_INTEL: + if (x86 >= 6) + load_ucode_intel_bsp(); + break; + case X86_VENDOR_AMD: + if (x86 >= 0x10) + load_ucode_amd_bsp(); + break; + default: + break; + } } void __cpuinit load_ucode_ap(void) @@ -95,6 +106,36 @@ void __cpuinit load_ucode_ap(void) vendor = x86_vendor(); x86 = x86_family(); - if (vendor == X86_VENDOR_INTEL && x86 >= 6) - load_ucode_intel_ap(); + switch (vendor) { + case X86_VENDOR_INTEL: + if (x86 >= 6) + load_ucode_intel_ap(); + break; + case X86_VENDOR_AMD: + if (x86 >= 0x10) + load_ucode_amd_ap(); + break; + default: + break; + } +} + +int __init save_microcode_in_initrd(void) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + switch (c->x86_vendor) { + case X86_VENDOR_INTEL: + if (c->x86 >= 6) + save_microcode_in_initrd_intel(); + break; + case X86_VENDOR_AMD: + if (c->x86 >= 0x10) + save_microcode_in_initrd_amd(); + break; + default: + break; + } + + return 0; } diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c index 2e9e128..dabef95 100644 --- a/arch/x86/kernel/microcode_intel_early.c +++ b/arch/x86/kernel/microcode_intel_early.c @@ -529,7 +529,7 @@ int save_mc_for_early(u8 *mc) */ ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); if (ret) { - pr_err("Can not save microcode patch.\n"); + pr_err("Cannot save microcode patch.\n"); goto out; } @@ -699,7 +699,7 @@ static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data, * This function converts microcode patch offsets previously stored in * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. */ -int __init save_microcode_in_initrd(void) +int __init save_microcode_in_initrd_intel(void) { unsigned int count = mc_saved_data.mc_saved_count; struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; @@ -711,7 +711,7 @@ int __init save_microcode_in_initrd(void) microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); ret = save_microcode(&mc_saved_data, mc_saved, count); if (ret) - pr_err("Can not save microcod patches from initrd"); + pr_err("Cannot save microcode patches from initrd.\n"); show_saved_mc(); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 6030805..0920212 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -14,6 +14,7 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/nmi.h> +#include <linux/debugfs.h> #include <linux/delay.h> #include <linux/hardirq.h> #include <linux/slab.h> @@ -29,6 +30,9 @@ #include <asm/nmi.h> #include <asm/x86_init.h> +#define CREATE_TRACE_POINTS +#include <trace/events/nmi.h> + struct nmi_desc { spinlock_t lock; struct list_head head; @@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); #define nmi_to_desc(type) (&nmi_desc[type]) +static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC; +static int __init nmi_warning_debugfs(void) +{ + debugfs_create_u64("nmi_longest_ns", 0644, + arch_debugfs_dir, &nmi_longest_ns); + return 0; +} +fs_initcall(nmi_warning_debugfs); + static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) { struct nmi_desc *desc = nmi_to_desc(type); @@ -96,8 +109,27 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 * can be latched at any given time. Walk the whole list * to handle those situations. */ - list_for_each_entry_rcu(a, &desc->head, list) - handled += a->handler(type, regs); + list_for_each_entry_rcu(a, &desc->head, list) { + u64 before, delta, whole_msecs; + int decimal_msecs, thishandled; + + before = local_clock(); + thishandled = a->handler(type, regs); + handled += thishandled; + delta = local_clock() - before; + trace_nmi_handler(a->handler, (int)delta, thishandled); + + if (delta < nmi_longest_ns) + continue; + + nmi_longest_ns = delta; + whole_msecs = do_div(delta, (1000 * 1000)); + decimal_msecs = do_div(delta, 1000) % 1000; + printk_ratelimited(KERN_INFO + "INFO: NMI handler (%ps) took too long to run: " + "%lld.%03d msecs\n", a->handler, whole_msecs, + decimal_msecs); + } rcu_read_unlock(); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7305f7d..f8adefc 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -110,11 +110,16 @@ void __show_regs(struct pt_regs *regs, int all) get_debugreg(d1, 1); get_debugreg(d2, 2); get_debugreg(d3, 3); - printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", - d0, d1, d2, d3); - get_debugreg(d6, 6); get_debugreg(d7, 7); + + /* Only print out debug registers if they are in their non-default state. */ + if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && + (d6 == DR6_RESERVED) && (d7 == 0x400)) + return; + + printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", + d0, d1, d2, d3); printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", d6, d7); } @@ -147,7 +152,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->bp = arg; childregs->orig_ax = -1; childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; + childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; p->fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 355ae06..05646ba 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -105,11 +105,18 @@ void __show_regs(struct pt_regs *regs, int all) get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); + + /* Only print out debug registers if they are in their non-default state. */ + if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) && + (d6 == DR6_RESERVED) && (d7 == 0x400)) + return; + + printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + } void release_thread(struct task_struct *dead_task) @@ -176,7 +183,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->bp = arg; childregs->orig_ax = -1; childregs->cs = __KERNEL_CS | get_kernel_rpl(); - childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; + childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED; return 0; } *childregs = *current_pt_regs(); diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 36818f8..e13f8e7 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S @@ -186,7 +186,7 @@ identity_mapped: movl CP_PA_PGD(%ebx), %eax movl %eax, %cr3 movl %cr0, %eax - orl $(1<<31), %eax + orl $X86_CR0_PG, %eax movl %eax, %cr0 lea PAGE_SIZE(%edi), %esp movl %edi, %eax diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index f2bb9c9..3fd2c69 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -151,21 +151,21 @@ identity_mapped: testq %r11, %r11 jnz 1f - xorq %rax, %rax - xorq %rbx, %rbx - xorq %rcx, %rcx - xorq %rdx, %rdx - xorq %rsi, %rsi - xorq %rdi, %rdi - xorq %rbp, %rbp - xorq %r8, %r8 - xorq %r9, %r9 - xorq %r10, %r10 - xorq %r11, %r11 - xorq %r12, %r12 - xorq %r13, %r13 - xorq %r14, %r14 - xorq %r15, %r15 + xorl %eax, %eax + xorl %ebx, %ebx + xorl %ecx, %ecx + xorl %edx, %edx + xorl %esi, %esi + xorl %edi, %edi + xorl %ebp, %ebp + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d ret @@ -212,8 +212,8 @@ virtual_mapped: /* Do the copies */ swap_pages: movq %rdi, %rcx /* Put the page_list in %rcx */ - xorq %rdi, %rdi - xorq %rsi, %rsi + xorl %edi, %edi + xorl %esi, %esi jmp 1f 0: /* top, read another word for the indirection page */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6956299..cf91358 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -43,12 +43,6 @@ #include <asm/sigframe.h> -#ifdef CONFIG_X86_32 -# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) -#else -# define FIX_EFLAGS __FIX_EFLAGS -#endif - #define COPY(x) do { \ get_user_ex(regs->x, &sc->x); \ } while (0) @@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) if (!failed) { /* * Clear the direction flag as per the ABI for function entry. - */ - regs->flags &= ~X86_EFLAGS_DF; - /* + * + * Clear RF when entering the signal handler, because + * it might disable possible debug exception from the + * signal handler. + * * Clear TF when entering the signal handler, but * notify any tracer that was single-stepping it. * The tracer may want to single-step inside the * handler too. */ - regs->flags &= ~X86_EFLAGS_TF; + regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF); } signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); } diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index f84fe00..3ff42d2 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -31,6 +31,7 @@ #include <linux/pfn.h> #include <linux/mm.h> #include <linux/tboot.h> +#include <linux/debugfs.h> #include <asm/realmode.h> #include <asm/processor.h> @@ -338,6 +339,73 @@ static struct notifier_block tboot_cpu_notifier __cpuinitdata = .notifier_call = tboot_cpu_callback, }; +#ifdef CONFIG_DEBUG_FS + +#define TBOOT_LOG_UUID { 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \ + 0x4c, 0x84, 0xa3, 0xe9, 0x53, 0xb8, 0x81, 0x74 } + +#define TBOOT_SERIAL_LOG_ADDR 0x60000 +#define TBOOT_SERIAL_LOG_SIZE 0x08000 +#define LOG_MAX_SIZE_OFF 16 +#define LOG_BUF_OFF 24 + +static uint8_t tboot_log_uuid[16] = TBOOT_LOG_UUID; + +static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) +{ + void __iomem *log_base; + u8 log_uuid[16]; + u32 max_size; + void *kbuf; + int ret = -EFAULT; + + log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE); + if (!log_base) + return ret; + + memcpy_fromio(log_uuid, log_base, sizeof(log_uuid)); + if (memcmp(&tboot_log_uuid, log_uuid, sizeof(log_uuid))) + goto err_iounmap; + + max_size = readl(log_base + LOG_MAX_SIZE_OFF); + if (*ppos >= max_size) { + ret = 0; + goto err_iounmap; + } + + if (*ppos + count > max_size) + count = max_size - *ppos; + + kbuf = kmalloc(count, GFP_KERNEL); + if (!kbuf) { + ret = -ENOMEM; + goto err_iounmap; + } + + memcpy_fromio(kbuf, log_base + LOG_BUF_OFF + *ppos, count); + if (copy_to_user(user_buf, kbuf, count)) + goto err_kfree; + + *ppos += count; + + ret = count; + +err_kfree: + kfree(kbuf); + +err_iounmap: + iounmap(log_base); + + return ret; +} + +static const struct file_operations tboot_log_fops = { + .read = tboot_log_read, + .llseek = default_llseek, +}; + +#endif /* CONFIG_DEBUG_FS */ + static __init int tboot_late_init(void) { if (!tboot_enabled()) @@ -348,6 +416,11 @@ static __init int tboot_late_init(void) atomic_set(&ap_wfs_count, 0); register_hotcpu_notifier(&tboot_cpu_notifier); +#ifdef CONFIG_DEBUG_FS + debugfs_create_file("tboot_log", S_IRUSR, + arch_debugfs_dir, NULL, &tboot_log_fops); +#endif + acpi_os_set_prepare_sleep(&tboot_sleep); return 0; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 772e2a8..ee3d8e5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -254,6 +254,9 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_DF; +#ifdef CONFIG_DOUBLEFAULT + df_debug(regs, error_code); +#endif /* * This is always a kernel trap and never fixable (and thus must * never return). @@ -437,7 +440,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; - if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, + if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, SIGTRAP) == NOTIFY_STOP) goto exit; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index ada87a3..d6c28ac 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -243,7 +243,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_WRITE, buf, size)) return -EACCES; - if (!HAVE_HWFP) + if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(current, NULL, 0, sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_ia32 __user *) buf) ? -1 : 1; @@ -350,11 +350,10 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!used_math() && init_fpu(tsk)) return -1; - if (!HAVE_HWFP) { + if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), NULL, buf) != 0; - } if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 260a919..b30f5a5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7942,7 +7942,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); - vmx_set_rflags(vcpu, X86_EFLAGS_BIT1); + vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); /* * Note that calling vmx_set_cr0 is important, even if cr0 hasn't * actually changed, because it depends on the current state of diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e8ba99c..292e6ca 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -618,7 +618,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP)) return 1; - if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_RDWRGSFS)) + if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE)) return 1; if (is_long_mode(vcpu)) { diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7114c63..d482bca 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1410,7 +1410,7 @@ __init void lguest_init(void) new_cpu_data.x86_capability[0] = cpuid_edx(1); /* Math is always hard! */ - new_cpu_data.hard_math = 1; + set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); /* We don't have features. We have puppies! Puppies! */ #ifdef CONFIG_X86_MCE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index bb00c46..b3940b6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -368,7 +368,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) * * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) * - * phys_addr holds the negative offset to the kernel, which is added + * phys_base holds the negative offset to the kernel, which is added * to the compile time generated pmds. This results in invalid pmds up * to the point where we hit the physaddr 0 mapping. * diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 9a1e658..0215e2c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -501,15 +501,15 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) } if (slot < 0) { - printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n", - (u64)phys_addr, size); + printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n", + __func__, (u64)phys_addr, size); WARN_ON(1); return NULL; } if (early_ioremap_debug) { - printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ", - (u64)phys_addr, size, slot); + printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ", + __func__, (u64)phys_addr, size, slot); dump_stack(); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index d2fbced..b410b71 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -929,6 +929,13 @@ void __init efi_enter_virtual_mode(void) va = efi_ioremap(md->phys_addr, size, md->type, md->attribute); + if (!(md->attribute & EFI_MEMORY_RUNTIME)) { + if (!va) + pr_err("ioremap of 0x%llX failed!\n", + (unsigned long long)md->phys_addr); + continue; + } + md->virt_addr = (u64) (unsigned long) va; if (!va) { diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 0faad64..d6bfb87 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -372,7 +372,7 @@ subsys_initcall(sysenter_setup); /* Register vsyscall32 into the ABI table */ #include <linux/sysctl.h> -static ctl_table abi_table2[] = { +static struct ctl_table abi_table2[] = { { .procname = "vsyscall32", .data = &sysctl_vsyscall32, @@ -383,7 +383,7 @@ static ctl_table abi_table2[] = { {} }; -static ctl_table abi_root_table2[] = { +static struct ctl_table abi_root_table2[] = { { .procname = "abi", .mode = 0555, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a492be2..2fa02bc 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1557,7 +1557,7 @@ asmlinkage void __init xen_start_kernel(void) #ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); - new_cpu_data.hard_math = 1; + set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); new_cpu_data.wp_works_ok = 1; new_cpu_data.x86_capability[0] = cpuid_edx(1); #endif |