summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig91
-rw-r--r--arch/x86/boot/Makefile2
-rw-r--r--arch/x86/boot/compressed/eboot.c65
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/entry/calling.h33
-rw-r--r--arch/x86/entry/entry_32.S141
-rw-r--r--arch/x86/entry/entry_64.S16
-rw-r--r--arch/x86/entry/vdso/vma.c10
-rw-r--r--arch/x86/events/amd/core.c8
-rw-r--r--arch/x86/events/core.c22
-rw-r--r--arch/x86/events/intel/core.c2
-rw-r--r--arch/x86/events/intel/cstate.c1
-rw-r--r--arch/x86/events/intel/ds.c35
-rw-r--r--arch/x86/events/intel/pt.c45
-rw-r--r--arch/x86/events/intel/uncore.c8
-rw-r--r--arch/x86/events/intel/uncore_snb.c12
-rw-r--r--arch/x86/events/perf_event.h4
-rw-r--r--arch/x86/include/asm/Kbuild4
-rw-r--r--arch/x86/include/asm/amd_nb.h23
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/compat.h4
-rw-r--r--arch/x86/include/asm/cpufeatures.h4
-rw-r--r--arch/x86/include/asm/efi.h16
-rw-r--r--arch/x86/include/asm/kdebug.h1
-rw-r--r--arch/x86/include/asm/mce.h23
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/msr-index.h4
-rw-r--r--arch/x86/include/asm/msr.h46
-rw-r--r--arch/x86/include/asm/mutex.h5
-rw-r--r--arch/x86/include/asm/mutex_32.h110
-rw-r--r--arch/x86/include/asm/mutex_64.h127
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h16
-rw-r--r--arch/x86/include/asm/preempt.h8
-rw-r--r--arch/x86/include/asm/processor.h16
-rw-r--r--arch/x86/include/asm/qspinlock.h6
-rw-r--r--arch/x86/include/asm/stacktrace.h8
-rw-r--r--arch/x86/include/asm/topology.h32
-rw-r--r--arch/x86/include/asm/uaccess.h13
-rw-r--r--arch/x86/include/asm/unwind.h16
-rw-r--r--arch/x86/include/asm/vgtod.h7
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h4
-rw-r--r--arch/x86/include/uapi/asm/mce.h1
-rw-r--r--arch/x86/include/uapi/asm/prctl.h8
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c2
-rw-r--r--arch/x86/kernel/amd_nb.c164
-rw-r--r--arch/x86/kernel/apic/apic.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c4
-rw-r--r--arch/x86/kernel/apm_32.c4
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c31
-rw-r--r--arch/x86/kernel/cpu/bugs.c26
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c33
-rw-r--r--arch/x86/kernel/cpu/common.c53
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c72
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c272
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c41
-rw-r--r--arch/x86/kernel/cpu/scattered.c57
-rw-r--r--arch/x86/kernel/cpuid.c4
-rw-r--r--arch/x86/kernel/dumpstack.c68
-rw-r--r--arch/x86/kernel/dumpstack_32.c56
-rw-r--r--arch/x86/kernel/dumpstack_64.c79
-rw-r--r--arch/x86/kernel/fpu/xstate.c2
-rw-r--r--arch/x86/kernel/head_32.S49
-rw-r--r--arch/x86/kernel/head_64.S60
-rw-r--r--arch/x86/kernel/itmt.c215
-rw-r--r--arch/x86/kernel/kvm.c17
-rw-r--r--arch/x86/kernel/ldt.c12
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c14
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c12
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c12
-rw-r--r--arch/x86/kernel/process_32.c7
-rw-r--r--arch/x86/kernel/process_64.c13
-rw-r--r--arch/x86/kernel/smp.c2
-rw-r--r--arch/x86/kernel/smpboot.c53
-rw-r--r--arch/x86/kernel/unwind_frame.c161
-rw-r--r--arch/x86/kernel/unwind_guess.c9
-rw-r--r--arch/x86/kernel/vmlinux.lds.S2
-rw-r--r--arch/x86/kvm/emulate.c36
-rw-r--r--arch/x86/kvm/ioapic.c2
-rw-r--r--arch/x86/kvm/ioapic.h4
-rw-r--r--arch/x86/kvm/irq_comm.c13
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/x86.c16
-rw-r--r--arch/x86/lib/copy_user_64.S47
-rw-r--r--arch/x86/lib/msr.c4
-rw-r--r--arch/x86/lib/usercopy.c49
-rw-r--r--arch/x86/lib/usercopy_32.c49
-rw-r--r--arch/x86/mm/fault.c3
-rw-r--r--arch/x86/mm/pat.c7
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c2
-rw-r--r--arch/x86/platform/uv/uv_nmi.c4
-rw-r--r--arch/x86/ras/mce_amd_inj.c2
-rw-r--r--arch/x86/realmode/rm/Makefile2
-rw-r--r--arch/x86/tools/insn_sanity.c3
-rw-r--r--arch/x86/tools/relocs.h2
-rw-r--r--arch/x86/tools/test_get_len.c2
-rw-r--r--arch/x86/um/asm/processor.h1
-rw-r--r--arch/x86/xen/spinlock.c2
103 files changed, 1804 insertions, 1075 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bada636..a219cf2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -9,28 +9,50 @@ config 64BIT
config X86_32
def_bool y
depends on !64BIT
+ # Options that are inherently 32-bit kernel only:
+ select ARCH_WANT_IPC_PARSE_VERSION
+ select CLKSRC_I8253
+ select CLONE_BACKWARDS
+ select HAVE_AOUT
+ select HAVE_GENERIC_DMA_COHERENT
+ select MODULES_USE_ELF_REL
+ select OLD_SIGACTION
config X86_64
def_bool y
depends on 64BIT
+ # Options that are inherently 64-bit kernel only:
+ select ARCH_HAS_GIGANTIC_PAGE
+ select ARCH_SUPPORTS_INT128
+ select ARCH_USE_CMPXCHG_LOCKREF
+ select HAVE_ARCH_SOFT_DIRTY
+ select MODULES_USE_ELF_RELA
+ select X86_DEV_DMA_OPS
-### Arch settings
+#
+# Arch settings
+#
+# ( Note that options that are marked 'if X86_64' could in principle be
+# ported to 32-bit as well. )
+#
config X86
def_bool y
+ #
+ # Note: keep this list sorted alphabetically
+ #
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ANON_INODES
select ARCH_CLOCKSOURCE_DATA
select ARCH_DISCARD_MEMBLOCK
- select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
+ select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_GCOV_PROFILE_ALL
- select ARCH_HAS_GIGANTIC_PAGE if X86_64
select ARCH_HAS_KCOV if X86_64
- select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_MMIO_FLUSH
+ select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -39,23 +61,17 @@ config X86
select ARCH_MIGHT_HAVE_PC_SERIO
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
- select ARCH_SUPPORTS_INT128 if X86_64
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
select ARCH_USE_BUILTIN_BSWAP
- select ARCH_USE_CMPXCHG_LOCKREF if X86_64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
- select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_FRAME_POINTERS
- select ARCH_WANT_IPC_PARSE_VERSION if X86_32
+ select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select BUILDTIME_EXTABLE_SORT
select CLKEVT_I8253
- select CLKSRC_I8253 if X86_32
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
- select CLONE_BACKWARDS if X86_32
- select COMPAT_OLD_SIGACTION if IA32_EMULATION
select DCACHE_WORD_ACCESS
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
@@ -77,7 +93,6 @@ config X86
select HAVE_ACPI_APEI if ACPI
select HAVE_ACPI_APEI_NMI if ACPI
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
- select HAVE_AOUT if X86_32
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
@@ -88,12 +103,10 @@ config X86
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
select HAVE_ARCH_SECCOMP_FILTER
- select HAVE_ARCH_SOFT_DIRTY if X86_64
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
- select HAVE_ARCH_WITHIN_STACK_FRAMES
- select HAVE_EBPF_JIT if X86_64
select HAVE_ARCH_VMAP_STACK if X86_64
+ select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_CC_STACKPROTECTOR
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
@@ -106,6 +119,7 @@ config X86
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select HAVE_EBPF_JIT if X86_64
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EXIT_THREAD
select HAVE_FENTRY if X86_64
@@ -113,7 +127,6 @@ config X86
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
- select HAVE_GENERIC_DMA_COHERENT if X86_32
select HAVE_HW_BREAKPOINT
select HAVE_IDE
select HAVE_IOREMAP_PROT
@@ -142,15 +155,11 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_STACK_VALIDATION if X86_64
select HAVE_SYSCALL_TRACEPOINTS
- select HAVE_UID16 if X86_32 || IA32_EMULATION
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
select IRQ_FORCED_THREADING
- select MODULES_USE_ELF_RELA if X86_64
- select MODULES_USE_ELF_REL if X86_32
- select OLD_SIGACTION if X86_32
- select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
select PERF_EVENTS
select RTC_LIB
select RTC_MC146818_LIB
@@ -160,11 +169,7 @@ config X86
select THREAD_INFO_IN_TASK
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
- select X86_DEV_DMA_OPS if X86_64
select X86_FEATURE_NAMES if PROC_FS
- select HAVE_STACK_VALIDATION if X86_64
- select ARCH_USES_HIGH_VMA_FLAGS if X86_INTEL_MEMORY_PROTECTION_KEYS
- select ARCH_HAS_PKEYS if X86_INTEL_MEMORY_PROTECTION_KEYS
config INSTRUCTION_DECODER
def_bool y
@@ -939,6 +944,27 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
+config SCHED_MC_PRIO
+ bool "CPU core priorities scheduler support"
+ depends on SCHED_MC && CPU_SUP_INTEL
+ select X86_INTEL_PSTATE
+ select CPU_FREQ
+ default y
+ ---help---
+ Intel Turbo Boost Max Technology 3.0 enabled CPUs have a
+ core ordering determined at manufacturing time, which allows
+ certain cores to reach higher turbo frequencies (when running
+ single threaded workloads) than others.
+
+ Enabling this kernel feature teaches the scheduler about
+ the TBM3 (aka ITMT) priority order of the CPU cores and adjusts the
+ scheduler's CPU selection logic accordingly, so that higher
+ overall system performance can be achieved.
+
+ This feature will have no effect on CPUs without this feature.
+
+ If unsure say Y here.
+
source "kernel/Kconfig.preempt"
config UP_LATE_INIT
@@ -1025,7 +1051,7 @@ config X86_MCE_INTEL
config X86_MCE_AMD
def_bool y
prompt "AMD MCE features"
- depends on X86_MCE && X86_LOCAL_APIC
+ depends on X86_MCE && X86_LOCAL_APIC && AMD_NB
---help---
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
@@ -1737,6 +1763,8 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
def_bool y
# Note: only available in 64-bit mode
depends on CPU_SUP_INTEL && X86_64
+ select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_HAS_PKEYS
---help---
Memory Protection Keys provides a mechanism for enforcing
page-based protections, but without requiring modification of the
@@ -2092,7 +2120,7 @@ config DEBUG_HOTPLUG_CPU0
config COMPAT_VDSO
def_bool n
prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
- depends on X86_32 || IA32_EMULATION
+ depends on COMPAT_32
---help---
Certain buggy versions of glibc will crash if they are
presented with a 32-bit vDSO that is not mapped at the address
@@ -2694,9 +2722,10 @@ source "fs/Kconfig.binfmt"
config IA32_EMULATION
bool "IA32 Emulation"
depends on X86_64
+ select ARCH_WANT_OLD_COMPAT_IPC
select BINFMT_ELF
select COMPAT_BINFMT_ELF
- select ARCH_WANT_OLD_COMPAT_IPC
+ select COMPAT_OLD_SIGACTION
---help---
Include code to run legacy 32-bit programs under a
64-bit kernel. You should likely turn this on, unless you're
@@ -2721,6 +2750,12 @@ config X86_X32
elf32_x86_64 support enabled to compile a kernel with this
option set.
+config COMPAT_32
+ def_bool y
+ depends on IA32_EMULATION || X86_32
+ select HAVE_UID16
+ select OLD_SIGSUSPEND3
+
config COMPAT
def_bool y
depends on IA32_EMULATION || X86_X32
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index 12ea8f8..0d810fb 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -65,7 +65,7 @@ clean-files += cpustr.h
# ---------------------------------------------------------------------------
-KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP
+KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
UBSAN_SANITIZE := n
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index cc69e37..ff01c8f 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -537,6 +537,69 @@ free_handle:
efi_call_early(free_pool, pci_handle);
}
+static void retrieve_apple_device_properties(struct boot_params *boot_params)
+{
+ efi_guid_t guid = APPLE_PROPERTIES_PROTOCOL_GUID;
+ struct setup_data *data, *new;
+ efi_status_t status;
+ u32 size = 0;
+ void *p;
+
+ status = efi_call_early(locate_protocol, &guid, NULL, &p);
+ if (status != EFI_SUCCESS)
+ return;
+
+ if (efi_table_attr(apple_properties_protocol, version, p) != 0x10000) {
+ efi_printk(sys_table, "Unsupported properties proto version\n");
+ return;
+ }
+
+ efi_call_proto(apple_properties_protocol, get_all, p, NULL, &size);
+ if (!size)
+ return;
+
+ do {
+ status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
+ size + sizeof(struct setup_data), &new);
+ if (status != EFI_SUCCESS) {
+ efi_printk(sys_table,
+ "Failed to alloc mem for properties\n");
+ return;
+ }
+
+ status = efi_call_proto(apple_properties_protocol, get_all, p,
+ new->data, &size);
+
+ if (status == EFI_BUFFER_TOO_SMALL)
+ efi_call_early(free_pool, new);
+ } while (status == EFI_BUFFER_TOO_SMALL);
+
+ new->type = SETUP_APPLE_PROPERTIES;
+ new->len = size;
+ new->next = 0;
+
+ data = (struct setup_data *)(unsigned long)boot_params->hdr.setup_data;
+ if (!data)
+ boot_params->hdr.setup_data = (unsigned long)new;
+ else {
+ while (data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+ data->next = (unsigned long)new;
+ }
+}
+
+static void setup_quirks(struct boot_params *boot_params)
+{
+ efi_char16_t const apple[] = { 'A', 'p', 'p', 'l', 'e', 0 };
+ efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
+ efi_table_attr(efi_system_table, fw_vendor, sys_table);
+
+ if (!memcmp(fw_vendor, apple, sizeof(apple))) {
+ if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
+ retrieve_apple_device_properties(boot_params);
+ }
+}
+
static efi_status_t
setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height)
{
@@ -1098,6 +1161,8 @@ struct boot_params *efi_main(struct efi_config *c,
setup_efi_pci(boot_params);
+ setup_quirks(boot_params);
+
status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
sizeof(*gdt), (void **)&gdt);
if (status != EFI_SUCCESS) {
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index efdfba2..4d85e60 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -119,8 +119,7 @@ ENTRY(startup_32)
*/
/* Load new GDT with the 64bit segments using 32bit descriptor */
- leal gdt(%ebp), %eax
- movl %eax, gdt+2(%ebp)
+ addl %ebp, gdt+2(%ebp)
lgdt gdt(%ebp)
/* Enable PAE mode */
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9a9e588..05ed3d3 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -90,8 +90,8 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
- .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
- addq $-(15*8+\addskip), %rsp
+ .macro ALLOC_PT_GPREGS_ON_STACK
+ addq $-(15*8), %rsp
.endm
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
@@ -147,15 +147,6 @@ For 32-bit we have the following conventions - kernel is built with
movq 5*8+\offset(%rsp), %rbx
.endm
- .macro ZERO_EXTRA_REGS
- xorl %r15d, %r15d
- xorl %r14d, %r14d
- xorl %r13d, %r13d
- xorl %r12d, %r12d
- xorl %ebp, %ebp
- xorl %ebx, %ebx
- .endm
-
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11
movq 6*8(%rsp), %r11
@@ -201,6 +192,26 @@ For 32-bit we have the following conventions - kernel is built with
.byte 0xf1
.endm
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+#ifdef CONFIG_FRAME_POINTER
+ .if \ptregs_offset
+ leaq \ptregs_offset(%rsp), %rbp
+ .else
+ mov %rsp, %rbp
+ .endif
+ orq $0x1, %rbp
+#endif
+.endm
+
#endif /* CONFIG_X86_64 */
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 21b352a..acc0c6f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -45,6 +45,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
+#include <asm/frame.h>
.section .entry.text, "ax"
@@ -175,6 +176,22 @@
SET_KERNEL_GS %edx
.endm
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original rbp.
+ */
+.macro ENCODE_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
+ mov %esp, %ebp
+ orl $0x1, %ebp
+#endif
+.endm
+
.macro RESTORE_INT_REGS
popl %ebx
popl %ecx
@@ -238,6 +255,23 @@ ENTRY(__switch_to_asm)
END(__switch_to_asm)
/*
+ * The unwinder expects the last frame on the stack to always be at the same
+ * offset from the end of the page, which allows it to validate the stack.
+ * Calling schedule_tail() directly would break that convention because its an
+ * asmlinkage function so its argument has to be pushed on the stack. This
+ * wrapper creates a proper "end of stack" frame header before the call.
+ */
+ENTRY(schedule_tail_wrapper)
+ FRAME_BEGIN
+
+ pushl %eax
+ call schedule_tail
+ popl %eax
+
+ FRAME_END
+ ret
+ENDPROC(schedule_tail_wrapper)
+/*
* A newly forked process directly context switches into this address.
*
* eax: prev task we switched from
@@ -245,9 +279,7 @@ END(__switch_to_asm)
* edi: kernel thread arg
*/
ENTRY(ret_from_fork)
- pushl %eax
- call schedule_tail
- popl %eax
+ call schedule_tail_wrapper
testl %ebx, %ebx
jnz 1f /* kernel threads are uncommon */
@@ -307,13 +339,13 @@ END(ret_from_exception)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
-need_resched:
+.Lneed_resched:
cmpl $0, PER_CPU_VAR(__preempt_count)
jnz restore_all
testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all
call preempt_schedule_irq
- jmp need_resched
+ jmp .Lneed_resched
END(resume_kernel)
#endif
@@ -334,7 +366,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
*/
ENTRY(xen_sysenter_target)
addl $5*4, %esp /* remove xen-provided frame */
- jmp sysenter_past_esp
+ jmp .Lsysenter_past_esp
#endif
/*
@@ -371,7 +403,7 @@ ENTRY(xen_sysenter_target)
*/
ENTRY(entry_SYSENTER_32)
movl TSS_sysenter_sp0(%esp), %esp
-sysenter_past_esp:
+.Lsysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
@@ -504,9 +536,9 @@ ENTRY(entry_INT80_32)
restore_all:
TRACE_IRQS_IRET
-restore_all_notrace:
+.Lrestore_all_notrace:
#ifdef CONFIG_X86_ESPFIX32
- ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
+ ALTERNATIVE "jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
/*
@@ -518,22 +550,23 @@ restore_all_notrace:
movb PT_CS(%esp), %al
andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
- je ldt_ss # returning to user-space with LDT SS
+ je .Lldt_ss # returning to user-space with LDT SS
#endif
-restore_nocheck:
+.Lrestore_nocheck:
RESTORE_REGS 4 # skip orig_eax/error_code
-irq_return:
+.Lirq_return:
INTERRUPT_RETURN
+
.section .fixup, "ax"
ENTRY(iret_exc )
pushl $0 # no error code
pushl $do_iret_error
- jmp error_code
+ jmp common_exception
.previous
- _ASM_EXTABLE(irq_return, iret_exc)
+ _ASM_EXTABLE(.Lirq_return, iret_exc)
#ifdef CONFIG_X86_ESPFIX32
-ldt_ss:
+.Lldt_ss:
/*
* Setup and switch to ESPFIX stack
*
@@ -562,7 +595,7 @@ ldt_ss:
*/
DISABLE_INTERRUPTS(CLBR_EAX)
lss (%esp), %esp /* switch to espfix segment */
- jmp restore_nocheck
+ jmp .Lrestore_nocheck
#endif
ENDPROC(entry_INT80_32)
@@ -624,6 +657,7 @@ common_interrupt:
ASM_CLAC
addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
SAVE_ALL
+ ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
movl %esp, %eax
call do_IRQ
@@ -635,6 +669,7 @@ ENTRY(name) \
ASM_CLAC; \
pushl $~(nr); \
SAVE_ALL; \
+ ENCODE_FRAME_POINTER; \
TRACE_IRQS_OFF \
movl %esp, %eax; \
call fn; \
@@ -659,7 +694,7 @@ ENTRY(coprocessor_error)
ASM_CLAC
pushl $0
pushl $do_coprocessor_error
- jmp error_code
+ jmp common_exception
END(coprocessor_error)
ENTRY(simd_coprocessor_error)
@@ -673,14 +708,14 @@ ENTRY(simd_coprocessor_error)
#else
pushl $do_simd_coprocessor_error
#endif
- jmp error_code
+ jmp common_exception
END(simd_coprocessor_error)
ENTRY(device_not_available)
ASM_CLAC
pushl $-1 # mark this as an int
pushl $do_device_not_available
- jmp error_code
+ jmp common_exception
END(device_not_available)
#ifdef CONFIG_PARAVIRT
@@ -694,59 +729,59 @@ ENTRY(overflow)
ASM_CLAC
pushl $0
pushl $do_overflow
- jmp error_code
+ jmp common_exception
END(overflow)
ENTRY(bounds)
ASM_CLAC
pushl $0
pushl $do_bounds
- jmp error_code
+ jmp common_exception
END(bounds)
ENTRY(invalid_op)
ASM_CLAC
pushl $0
pushl $do_invalid_op
- jmp error_code
+ jmp common_exception
END(invalid_op)
ENTRY(coprocessor_segment_overrun)
ASM_CLAC
pushl $0
pushl $do_coprocessor_segment_overrun
- jmp error_code
+ jmp common_exception
END(coprocessor_segment_overrun)
ENTRY(invalid_TSS)
ASM_CLAC
pushl $do_invalid_TSS
- jmp error_code
+ jmp common_exception
END(invalid_TSS)
ENTRY(segment_not_present)
ASM_CLAC
pushl $do_segment_not_present
- jmp error_code
+ jmp common_exception
END(segment_not_present)
ENTRY(stack_segment)
ASM_CLAC
pushl $do_stack_segment
- jmp error_code
+ jmp common_exception
END(stack_segment)
ENTRY(alignment_check)
ASM_CLAC
pushl $do_alignment_check
- jmp error_code
+ jmp common_exception
END(alignment_check)
ENTRY(divide_error)
ASM_CLAC
pushl $0 # no error code
pushl $do_divide_error
- jmp error_code
+ jmp common_exception
END(divide_error)
#ifdef CONFIG_X86_MCE
@@ -754,7 +789,7 @@ ENTRY(machine_check)
ASM_CLAC
pushl $0
pushl machine_check_vector
- jmp error_code
+ jmp common_exception
END(machine_check)
#endif
@@ -762,13 +797,14 @@ ENTRY(spurious_interrupt_bug)
ASM_CLAC
pushl $0
pushl $do_spurious_interrupt_bug
- jmp error_code
+ jmp common_exception
END(spurious_interrupt_bug)
#ifdef CONFIG_XEN
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
+ ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
/*
@@ -823,6 +859,7 @@ ENTRY(xen_failsafe_callback)
jmp iret_exc
5: pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
+ ENCODE_FRAME_POINTER
jmp ret_from_exception
.section .fixup, "ax"
@@ -882,7 +919,7 @@ ftrace_call:
popl %edx
popl %ecx
popl %eax
-ftrace_ret:
+.Lftrace_ret:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
@@ -952,7 +989,7 @@ GLOBAL(ftrace_regs_call)
popl %gs
addl $8, %esp /* Skip orig_ax and ip */
popf /* Pop flags at end (no addl to corrupt flags) */
- jmp ftrace_ret
+ jmp .Lftrace_ret
popf
jmp ftrace_stub
@@ -963,7 +1000,7 @@ ENTRY(mcount)
jb ftrace_stub /* Paging not enabled yet? */
cmpl $ftrace_stub, ftrace_trace_function
- jnz trace
+ jnz .Ltrace
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
cmpl $ftrace_stub, ftrace_graph_return
jnz ftrace_graph_caller
@@ -976,7 +1013,7 @@ ftrace_stub:
ret
/* taken from glibc */
-trace:
+.Ltrace:
pushl %eax
pushl %ecx
pushl %edx
@@ -1027,7 +1064,7 @@ return_to_handler:
ENTRY(trace_page_fault)
ASM_CLAC
pushl $trace_do_page_fault
- jmp error_code
+ jmp common_exception
END(trace_page_fault)
#endif
@@ -1035,7 +1072,10 @@ ENTRY(page_fault)
ASM_CLAC
pushl $do_page_fault
ALIGN
-error_code:
+ jmp common_exception
+END(page_fault)
+
+common_exception:
/* the function address is in %gs's slot on the stack */
pushl %fs
pushl %es
@@ -1047,6 +1087,7 @@ error_code:
pushl %edx
pushl %ecx
pushl %ebx
+ ENCODE_FRAME_POINTER
cld
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
@@ -1064,7 +1105,7 @@ error_code:
movl %esp, %eax # pt_regs pointer
call *%edi
jmp ret_from_exception
-END(page_fault)
+END(common_exception)
ENTRY(debug)
/*
@@ -1079,6 +1120,7 @@ ENTRY(debug)
ASM_CLAC
pushl $-1 # mark this as an int
SAVE_ALL
+ ENCODE_FRAME_POINTER
xorl %edx, %edx # error code 0
movl %esp, %eax # pt_regs pointer
@@ -1094,11 +1136,11 @@ ENTRY(debug)
.Ldebug_from_sysenter_stack:
/* We're on the SYSENTER stack. Switch off. */
- movl %esp, %ebp
+ movl %esp, %ebx
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
TRACE_IRQS_OFF
call do_debug
- movl %ebp, %esp
+ movl %ebx, %esp
jmp ret_from_exception
END(debug)
@@ -1116,11 +1158,12 @@ ENTRY(nmi)
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
popl %eax
- je nmi_espfix_stack
+ je .Lnmi_espfix_stack
#endif
pushl %eax # pt_regs->orig_ax
SAVE_ALL
+ ENCODE_FRAME_POINTER
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
@@ -1132,21 +1175,21 @@ ENTRY(nmi)
/* Not on SYSENTER stack. */
call do_nmi
- jmp restore_all_notrace
+ jmp .Lrestore_all_notrace
.Lnmi_from_sysenter_stack:
/*
* We're on the SYSENTER stack. Switch off. No one (not even debug)
* is using the thread stack right now, so it's safe for us to use it.
*/
- movl %esp, %ebp
+ movl %esp, %ebx
movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
call do_nmi
- movl %ebp, %esp
- jmp restore_all_notrace
+ movl %ebx, %esp
+ jmp .Lrestore_all_notrace
#ifdef CONFIG_X86_ESPFIX32
-nmi_espfix_stack:
+.Lnmi_espfix_stack:
/*
* create the pointer to lss back
*/
@@ -1159,12 +1202,13 @@ nmi_espfix_stack:
.endr
pushl %eax
SAVE_ALL
+ ENCODE_FRAME_POINTER
FIXUP_ESPFIX_STACK # %eax == %esp
xorl %edx, %edx # zero error code
call do_nmi
RESTORE_REGS
lss 12+4(%esp), %esp # back to espfix stack
- jmp irq_return
+ jmp .Lirq_return
#endif
END(nmi)
@@ -1172,6 +1216,7 @@ ENTRY(int3)
ASM_CLAC
pushl $-1 # mark this as an int
SAVE_ALL
+ ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
xorl %edx, %edx # zero error code
movl %esp, %eax # pt_regs pointer
@@ -1181,14 +1226,14 @@ END(int3)
ENTRY(general_protection)
pushl $do_general_protection
- jmp error_code
+ jmp common_exception
END(general_protection)
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
ASM_CLAC
pushl $do_async_page_fault
- jmp error_code
+ jmp common_exception
END(async_page_fault)
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ef766a3..5b21970 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -38,12 +38,6 @@
#include <asm/export.h>
#include <linux/err.h>
-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_64BIT 0x80000000
-#define __AUDIT_ARCH_LE 0x40000000
-
.code64
.section .entry.text, "ax"
@@ -469,6 +463,7 @@ END(irq_entries_start)
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
+ ENCODE_FRAME_POINTER
testb $3, CS(%rsp)
jz 1f
@@ -985,6 +980,7 @@ ENTRY(xen_failsafe_callback)
ALLOC_PT_GPREGS_ON_STACK
SAVE_C_REGS
SAVE_EXTRA_REGS
+ ENCODE_FRAME_POINTER
jmp error_exit
END(xen_failsafe_callback)
@@ -1028,6 +1024,7 @@ ENTRY(paranoid_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
+ ENCODE_FRAME_POINTER 8
movl $1, %ebx
movl $MSR_GS_BASE, %ecx
rdmsr
@@ -1075,6 +1072,7 @@ ENTRY(error_entry)
cld
SAVE_C_REGS 8
SAVE_EXTRA_REGS 8
+ ENCODE_FRAME_POINTER 8
xorl %ebx, %ebx
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
@@ -1257,6 +1255,7 @@ ENTRY(nmi)
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
+ ENCODE_FRAME_POINTER
/*
* At this point we no longer need to worry about stack damage
@@ -1270,11 +1269,10 @@ ENTRY(nmi)
/*
* Return back to user mode. We must *not* do the normal exit
- * work, because we don't want to enable interrupts. Fortunately,
- * do_nmi doesn't modify pt_regs.
+ * work, because we don't want to enable interrupts.
*/
SWAPGS
- jmp restore_c_regs_and_iret
+ jmp restore_regs_and_iret
.Lnmi_from_kernel:
/*
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 23c881c..e739002 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -161,8 +161,6 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
}
text_start = addr - image->sym_vvar_start;
- current->mm->context.vdso = (void __user *)text_start;
- current->mm->context.vdso_image = image;
/*
* MAYWRITE to allow gdb to COW and set breakpoints
@@ -189,14 +187,12 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
do_munmap(mm, text_start, image->size);
+ } else {
+ current->mm->context.vdso = (void __user *)text_start;
+ current->mm->context.vdso_image = image;
}
up_fail:
- if (ret) {
- current->mm->context.vdso = NULL;
- current->mm->context.vdso_image = NULL;
- }
-
up_write(&mm->mmap_sem);
return ret;
}
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index f5f4b3f..afb222b 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void)
pr_cont("Fam15h ");
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
break;
-
+ case 0x17:
+ pr_cont("Fam17h ");
+ /*
+ * In family 17h, there are no event constraints in the PMC hardware.
+ * We fallback to using default amd_get_event_constraints.
+ */
+ break;
default:
pr_err("core perfctr but no constraints; unknown hardware!\n");
return -ENODEV;
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d31735f..f1c2258 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -69,7 +69,7 @@ u64 x86_perf_event_update(struct perf_event *event)
int shift = 64 - x86_pmu.cntval_bits;
u64 prev_raw_count, new_raw_count;
int idx = hwc->idx;
- s64 delta;
+ u64 delta;
if (idx == INTEL_PMC_IDX_FIXED_BTS)
return 0;
@@ -365,7 +365,11 @@ int x86_add_exclusive(unsigned int what)
{
int i;
- if (x86_pmu.lbr_pt_coexist)
+ /*
+ * When lbr_pt_coexist we allow PT to coexist with either LBR or BTS.
+ * LBR and BTS are still mutually exclusive.
+ */
+ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return 0;
if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
@@ -388,7 +392,7 @@ fail_unlock:
void x86_del_exclusive(unsigned int what)
{
- if (x86_pmu.lbr_pt_coexist)
+ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return;
atomic_dec(&x86_pmu.lbr_exclusive[what]);
@@ -2299,7 +2303,7 @@ valid_user_frame(const void __user *fp, unsigned long size)
static unsigned long get_segment_base(unsigned int segment)
{
struct desc_struct *desc;
- int idx = segment >> 3;
+ unsigned int idx = segment >> 3;
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
@@ -2352,7 +2356,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
frame.next_frame = 0;
frame.return_address = 0;
- if (!access_ok(VERIFY_READ, fp, 8))
+ if (!valid_user_frame(fp, sizeof(frame)))
break;
bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
@@ -2362,9 +2366,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
if (bytes != 0)
break;
- if (!valid_user_frame(fp, sizeof(frame)))
- break;
-
perf_callchain_store(entry, cs_base + frame.return_address);
fp = compat_ptr(ss_base + frame.next_frame);
}
@@ -2413,7 +2414,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
frame.next_frame = NULL;
frame.return_address = 0;
- if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
+ if (!valid_user_frame(fp, sizeof(frame)))
break;
bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
@@ -2423,9 +2424,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
if (bytes != 0)
break;
- if (!valid_user_frame(fp, sizeof(frame)))
- break;
-
perf_callchain_store(entry, frame.return_address);
fp = (void __user *)frame.next_frame;
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a74a2db..cb85222 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4034,7 +4034,7 @@ __init int intel_pmu_init(void)
/* Support full width counters using alternative MSR range */
if (x86_pmu.intel_cap.full_width_write) {
- x86_pmu.max_period = x86_pmu.cntval_mask;
+ x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
x86_pmu.perfctr = MSR_IA32_PMC0;
pr_cont("full-width counters, ");
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 4f5ac72..da51e5a 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -540,6 +540,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 0319311..be20239 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event,
}
/*
- * We use the interrupt regs as a base because the PEBS record
- * does not contain a full regs set, specifically it seems to
- * lack segment descriptors, which get used by things like
- * user_mode().
+ * We use the interrupt regs as a base because the PEBS record does not
+ * contain a full regs set, specifically it seems to lack segment
+ * descriptors, which get used by things like user_mode().
*
- * In the simple case fix up only the IP and BP,SP regs, for
- * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
- * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+ * In the simple case fix up only the IP for PERF_SAMPLE_IP.
+ *
+ * We must however always use BP,SP from iregs for the unwinder to stay
+ * sane; the record BP,SP can point into thin air when the record is
+ * from a previous PMI context or an (I)RET happend between the record
+ * and PMI.
*/
*regs = *iregs;
regs->flags = pebs->flags;
set_linear_ip(regs, pebs->ip);
- regs->bp = pebs->bp;
- regs->sp = pebs->sp;
if (sample_type & PERF_SAMPLE_REGS_INTR) {
regs->ax = pebs->ax;
@@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
regs->dx = pebs->dx;
regs->si = pebs->si;
regs->di = pebs->di;
- regs->bp = pebs->bp;
- regs->sp = pebs->sp;
- regs->flags = pebs->flags;
+ /*
+ * Per the above; only set BP,SP if we don't need callchains.
+ *
+ * XXX: does this make sense?
+ */
+ if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ regs->bp = pebs->bp;
+ regs->sp = pebs->sp;
+ }
+
+ /*
+ * Preserve PERF_EFLAGS_VM from set_linear_ip().
+ */
+ regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM);
#ifndef CONFIG_X86_32
regs->r8 = pebs->r8;
regs->r9 = pebs->r9;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index c5047b8..1c1b9fe 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -36,13 +36,6 @@ static DEFINE_PER_CPU(struct pt, pt_ctx);
static struct pt_pmu pt_pmu;
-enum cpuid_regs {
- CR_EAX = 0,
- CR_ECX,
- CR_EDX,
- CR_EBX
-};
-
/*
* Capabilities of Intel PT hardware, such as number of address bits or
* supported output schemes, are cached and exported to userspace as "caps"
@@ -64,21 +57,21 @@ static struct pt_cap_desc {
u8 reg;
u32 mask;
} pt_caps[] = {
- PT_CAP(max_subleaf, 0, CR_EAX, 0xffffffff),
- PT_CAP(cr3_filtering, 0, CR_EBX, BIT(0)),
- PT_CAP(psb_cyc, 0, CR_EBX, BIT(1)),
- PT_CAP(ip_filtering, 0, CR_EBX, BIT(2)),
- PT_CAP(mtc, 0, CR_EBX, BIT(3)),
- PT_CAP(ptwrite, 0, CR_EBX, BIT(4)),
- PT_CAP(power_event_trace, 0, CR_EBX, BIT(5)),
- PT_CAP(topa_output, 0, CR_ECX, BIT(0)),
- PT_CAP(topa_multiple_entries, 0, CR_ECX, BIT(1)),
- PT_CAP(single_range_output, 0, CR_ECX, BIT(2)),
- PT_CAP(payloads_lip, 0, CR_ECX, BIT(31)),
- PT_CAP(num_address_ranges, 1, CR_EAX, 0x3),
- PT_CAP(mtc_periods, 1, CR_EAX, 0xffff0000),
- PT_CAP(cycle_thresholds, 1, CR_EBX, 0xffff),
- PT_CAP(psb_periods, 1, CR_EBX, 0xffff0000),
+ PT_CAP(max_subleaf, 0, CPUID_EAX, 0xffffffff),
+ PT_CAP(cr3_filtering, 0, CPUID_EBX, BIT(0)),
+ PT_CAP(psb_cyc, 0, CPUID_EBX, BIT(1)),
+ PT_CAP(ip_filtering, 0, CPUID_EBX, BIT(2)),
+ PT_CAP(mtc, 0, CPUID_EBX, BIT(3)),
+ PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)),
+ PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)),
+ PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)),
+ PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)),
+ PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)),
+ PT_CAP(payloads_lip, 0, CPUID_ECX, BIT(31)),
+ PT_CAP(num_address_ranges, 1, CPUID_EAX, 0x3),
+ PT_CAP(mtc_periods, 1, CPUID_EAX, 0xffff0000),
+ PT_CAP(cycle_thresholds, 1, CPUID_EBX, 0xffff),
+ PT_CAP(psb_periods, 1, CPUID_EBX, 0xffff0000),
};
static u32 pt_cap_get(enum pt_capabilities cap)
@@ -213,10 +206,10 @@ static int __init pt_pmu_hw_init(void)
for (i = 0; i < PT_CPUID_LEAVES; i++) {
cpuid_count(20, i,
- &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM],
- &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM],
- &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM],
- &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]);
+ &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM],
+ &pt_pmu.caps[CPUID_EBX + i*PT_CPUID_REGS_NUM],
+ &pt_pmu.caps[CPUID_ECX + i*PT_CPUID_REGS_NUM],
+ &pt_pmu.caps[CPUID_EDX + i*PT_CPUID_REGS_NUM]);
}
ret = -ENOMEM;
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index efca268..dbaaf7dc 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
*/
static int uncore_pmu_event_init(struct perf_event *event);
-static bool is_uncore_event(struct perf_event *event)
+static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
{
- return event->pmu->event_init == uncore_pmu_event_init;
+ return &box->pmu->pmu == event->pmu;
}
static int
@@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
n = box->n_events;
- if (is_uncore_event(leader)) {
+ if (is_box_event(box, leader)) {
box->event_list[n] = leader;
n++;
}
@@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
return n;
list_for_each_entry(event, &leader->sibling_list, group_entry) {
- if (!is_uncore_event(event) ||
+ if (!is_box_event(box, event) ||
event->state <= PERF_EVENT_STATE_OFF)
continue;
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 81195cc..a3dcc12 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -490,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
snb_uncore_imc_event_start(event, 0);
- box->n_events++;
-
return 0;
}
static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
{
- struct intel_uncore_box *box = uncore_event_to_box(event);
- int i;
-
snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
- for (i = 0; i < box->n_events; i++) {
- if (event == box->event_list[i]) {
- --box->n_events;
- break;
- }
- }
}
int snb_pci2phy_map_init(int devid)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 5874d8d..bcbb1d2 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -113,7 +113,7 @@ struct debug_store {
* Per register state.
*/
struct er_account {
- raw_spinlock_t lock; /* per-core: protect structure */
+ raw_spinlock_t lock; /* per-core: protect structure */
u64 config; /* extra MSR config */
u64 reg; /* extra MSR number */
atomic_t ref; /* reference count */
@@ -604,7 +604,7 @@ struct x86_pmu {
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */
bool lbr_double_abort; /* duplicated lbr aborts */
- bool lbr_pt_coexist; /* LBR may coexist with PT */
+ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */
/*
* Intel PT/LBR/BTS are exclusive
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 2cfed17..2b892e2 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -6,10 +6,6 @@ generated-y += unistd_32_ia32.h
generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
-genhdr-y += unistd_32.h
-genhdr-y += unistd_64.h
-genhdr-y += unistd_x32.h
-
generic-y += clkdev.h
generic-y += cputime.h
generic-y += dma-contiguous.h
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 5e828da..00c88a0 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -21,6 +21,10 @@ extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, unsigned long);
+extern int amd_smn_read(u16 node, u32 address, u32 *value);
+extern int amd_smn_write(u16 node, u32 address, u32 value);
+extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo);
+
struct amd_l3_cache {
unsigned indices;
u8 subcaches[4];
@@ -55,6 +59,7 @@ struct threshold_bank {
};
struct amd_northbridge {
+ struct pci_dev *root;
struct pci_dev *misc;
struct pci_dev *link;
struct amd_l3_cache l3_cache;
@@ -66,7 +71,6 @@ struct amd_northbridge_info {
u64 flags;
struct amd_northbridge *nb;
};
-extern struct amd_northbridge_info amd_northbridges;
#define AMD_NB_GART BIT(0)
#define AMD_NB_L3_INDEX_DISABLE BIT(1)
@@ -74,20 +78,9 @@ extern struct amd_northbridge_info amd_northbridges;
#ifdef CONFIG_AMD_NB
-static inline u16 amd_nb_num(void)
-{
- return amd_northbridges.num;
-}
-
-static inline bool amd_nb_has_feature(unsigned feature)
-{
- return ((amd_northbridges.flags & feature) == feature);
-}
-
-static inline struct amd_northbridge *node_to_amd_nb(int node)
-{
- return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
-}
+u16 amd_nb_num(void);
+bool amd_nb_has_feature(unsigned int feature);
+struct amd_northbridge *node_to_amd_nb(int node);
static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
{
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f5aaf6c..a5a0bcf 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -196,7 +196,7 @@ static inline void native_apic_msr_write(u32 reg, u32 v)
static inline void native_apic_msr_eoi_write(u32 reg, u32 v)
{
- wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
+ wrmsr_notrace(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0);
}
static inline u32 native_apic_msr_read(u32 reg)
@@ -332,6 +332,7 @@ struct apic {
* on write for EOI.
*/
void (*eoi_write)(u32 reg, u32 v);
+ void (*native_eoi_write)(u32 reg, u32 v);
u64 (*icr_read)(void);
void (*icr_write)(u32 low, u32 high);
void (*wait_icr_idle)(void);
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 03d269b..24118c0 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -272,7 +272,6 @@ struct compat_shmid64_ds {
/*
* The type of struct elf_prstatus.pr_reg in compatible core dumps.
*/
-#ifdef CONFIG_X86_X32_ABI
typedef struct user_regs_struct compat_elf_gregset_t;
/* Full regset -- prstatus on x32, otherwise on ia32 */
@@ -281,10 +280,9 @@ typedef struct user_regs_struct compat_elf_gregset_t;
do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
while (0)
+#ifdef CONFIG_X86_X32_ABI
#define COMPAT_USE_64BIT_TIME \
(!!(task_pt_regs(current)->orig_ax & __X32_SYSCALL_BIT))
-#else
-typedef struct user_regs_struct32 compat_elf_gregset_t;
#endif
/*
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index cddd5d0..e83f972 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -192,6 +192,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
@@ -225,6 +226,7 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
@@ -278,8 +280,10 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 389d700..e99675b 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -210,12 +210,18 @@ static inline bool efi_is_64bit(void)
return __efi_early()->is64;
}
+#define efi_table_attr(table, attr, instance) \
+ (efi_is_64bit() ? \
+ ((table##_64_t *)(unsigned long)instance)->attr : \
+ ((table##_32_t *)(unsigned long)instance)->attr)
+
+#define efi_call_proto(protocol, f, instance, ...) \
+ __efi_early()->call(efi_table_attr(protocol, f, instance), \
+ instance, ##__VA_ARGS__)
+
#define efi_call_early(f, ...) \
- __efi_early()->call(efi_is_64bit() ? \
- ((efi_boot_services_64_t *)(unsigned long) \
- __efi_early()->boot_services)->f : \
- ((efi_boot_services_32_t *)(unsigned long) \
- __efi_early()->boot_services)->f, __VA_ARGS__)
+ __efi_early()->call(efi_table_attr(efi_boot_services, f, \
+ __efi_early()->boot_services), __VA_ARGS__)
#define __efi_call_early(f, ...) \
__efi_early()->call((unsigned long)f, __VA_ARGS__);
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index d318811..29a594a 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -21,7 +21,6 @@ enum die_val {
DIE_NMIUNKNOWN,
};
-extern void printk_address(unsigned long address);
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 9bd7ff5..caab413 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -252,8 +252,10 @@ static inline void cmci_recheck(void) {}
#ifdef CONFIG_X86_MCE_AMD
void mce_amd_feature_init(struct cpuinfo_x86 *c);
+int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
#else
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
+static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
#endif
int mce_available(struct cpuinfo_x86 *c);
@@ -356,28 +358,23 @@ enum smca_bank_types {
N_SMCA_BANK_TYPES
};
-struct smca_bank_name {
- const char *name; /* Short name for sysfs */
- const char *long_name; /* Long name for pretty-printing */
-};
-
-extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
-
-#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
+#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype))
-struct smca_hwid_mcatype {
+struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
};
-struct smca_bank_info {
- struct smca_hwid_mcatype *type;
- u32 type_instance;
+struct smca_bank {
+ struct smca_hwid *hwid;
+ /* Instance ID */
+ u32 id;
};
-extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
+extern struct smca_bank smca_banks[MAX_NR_BANKS];
+extern const char *smca_get_long_name(enum smca_bank_types t);
#endif
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8e0a9fe..306c7e12 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -47,7 +47,7 @@ struct ldt_struct {
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
- int size;
+ unsigned int size;
};
/*
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 78f3760..710273c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -37,6 +37,10 @@
#define EFER_FFXSR (1<<_EFER_FFXSR)
/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL 0x0000004e
+#define MSR_PPIN 0x0000004f
+
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index b5fee97..db0b90c 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -70,14 +70,14 @@ extern struct tracepoint __tracepoint_read_msr;
extern struct tracepoint __tracepoint_write_msr;
extern struct tracepoint __tracepoint_rdpmc;
#define msr_tracepoint_active(t) static_key_false(&(t).key)
-extern void do_trace_write_msr(unsigned msr, u64 val, int failed);
-extern void do_trace_read_msr(unsigned msr, u64 val, int failed);
-extern void do_trace_rdpmc(unsigned msr, u64 val, int failed);
+extern void do_trace_write_msr(unsigned int msr, u64 val, int failed);
+extern void do_trace_read_msr(unsigned int msr, u64 val, int failed);
+extern void do_trace_rdpmc(unsigned int msr, u64 val, int failed);
#else
#define msr_tracepoint_active(t) false
-static inline void do_trace_write_msr(unsigned msr, u64 val, int failed) {}
-static inline void do_trace_read_msr(unsigned msr, u64 val, int failed) {}
-static inline void do_trace_rdpmc(unsigned msr, u64 val, int failed) {}
+static inline void do_trace_write_msr(unsigned int msr, u64 val, int failed) {}
+static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {}
+static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {}
#endif
static inline unsigned long long native_read_msr(unsigned int msr)
@@ -115,22 +115,36 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr,
}
/* Can be uninlined because referenced by paravirt */
-notrace static inline void native_write_msr(unsigned int msr,
- unsigned low, unsigned high)
+static inline void notrace
+__native_write_msr_notrace(unsigned int msr, u32 low, u32 high)
{
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe)
: : "c" (msr), "a"(low), "d" (high) : "memory");
+}
+
+/* Can be uninlined because referenced by paravirt */
+static inline void notrace
+native_write_msr(unsigned int msr, u32 low, u32 high)
+{
+ __native_write_msr_notrace(msr, low, high);
if (msr_tracepoint_active(__tracepoint_write_msr))
do_trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
+static inline void
+wrmsr_notrace(unsigned int msr, u32 low, u32 high)
+{
+ __native_write_msr_notrace(msr, low, high);
+}
+
/* Can be uninlined because referenced by paravirt */
-notrace static inline int native_write_msr_safe(unsigned int msr,
- unsigned low, unsigned high)
+static inline int notrace
+native_write_msr_safe(unsigned int msr, u32 low, u32 high)
{
int err;
+
asm volatile("2: wrmsr ; xor %[err],%[err]\n"
"1:\n\t"
".section .fixup,\"ax\"\n\t"
@@ -223,7 +237,7 @@ do { \
(void)((high) = (u32)(__val >> 32)); \
} while (0)
-static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
+static inline void wrmsr(unsigned int msr, u32 low, u32 high)
{
native_write_msr(msr, low, high);
}
@@ -231,13 +245,13 @@ static inline void wrmsr(unsigned msr, unsigned low, unsigned high)
#define rdmsrl(msr, val) \
((val) = native_read_msr((msr)))
-static inline void wrmsrl(unsigned msr, u64 val)
+static inline void wrmsrl(unsigned int msr, u64 val)
{
native_write_msr(msr, (u32)(val & 0xffffffffULL), (u32)(val >> 32));
}
/* wrmsr with exception handling */
-static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
+static inline int wrmsr_safe(unsigned int msr, u32 low, u32 high)
{
return native_write_msr_safe(msr, low, high);
}
@@ -252,7 +266,7 @@ static inline int wrmsr_safe(unsigned msr, unsigned low, unsigned high)
__err; \
})
-static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
+static inline int rdmsrl_safe(unsigned int msr, unsigned long long *p)
{
int err;
@@ -325,12 +339,12 @@ static inline int wrmsrl_on_cpu(unsigned int cpu, u32 msr_no, u64 q)
static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr *msrs)
{
- rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
+ rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h));
}
static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no,
struct msr *msrs)
{
- wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
+ wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h);
}
static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no,
u32 *l, u32 *h)
diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h
deleted file mode 100644
index 7d3a482..0000000
--- a/arch/x86/include/asm/mutex.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_X86_32
-# include <asm/mutex_32.h>
-#else
-# include <asm/mutex_64.h>
-#endif
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
deleted file mode 100644
index e9355a8..0000000
--- a/arch/x86/include/asm/mutex_32.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#ifndef _ASM_X86_MUTEX_32_H
-#define _ASM_X86_MUTEX_32_H
-
-#include <asm/alternative.h>
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fn> if it
- * wasn't 1 originally. This function MUST leave the value lower than 1
- * even when the "1" assertion wasn't true.
- */
-#define __mutex_fastpath_lock(count, fail_fn) \
-do { \
- unsigned int dummy; \
- \
- typecheck(atomic_t *, count); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " decl (%%eax)\n" \
- " jns 1f \n" \
- " call " #fail_fn "\n" \
- "1:\n" \
- : "=a" (dummy) \
- : "a" (count) \
- : "memory", "ecx", "edx"); \
-} while (0)
-
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(atomic_dec_return(count) < 0))
- return -1;
- else
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * try to promote the mutex from 0 to 1. if it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value
- * to 1, or to set it to a value lower than 1.
- *
- * If the implementation sets it to a value of lower than 1, the
- * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
- * to return 0 otherwise.
- */
-#define __mutex_fastpath_unlock(count, fail_fn) \
-do { \
- unsigned int dummy; \
- \
- typecheck(atomic_t *, count); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " incl (%%eax)\n" \
- " jg 1f\n" \
- " call " #fail_fn "\n" \
- "1:\n" \
- : "=a" (dummy) \
- : "a" (count) \
- : "memory", "ecx", "edx"); \
-} while (0)
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to a value lower than 1, and return 0 (failure)
- * if it wasn't 1 originally, or return 1 (success) otherwise. This function
- * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
- * Additionally, if the value was < 0 originally, this function must not leave
- * it to 0 on failure.
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
- int (*fail_fn)(atomic_t *))
-{
- /* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
- return 1;
-
- return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_32_H */
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
deleted file mode 100644
index d985075..0000000
--- a/arch/x86/include/asm/mutex_64.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Assembly implementation of the mutex fastpath, based on atomic
- * decrement/increment.
- *
- * started by Ingo Molnar:
- *
- * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#ifndef _ASM_X86_MUTEX_64_H
-#define _ASM_X86_MUTEX_64_H
-
-/**
- * __mutex_fastpath_lock - decrement and call function if negative
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is negative
- *
- * Atomically decrements @v and calls <fail_fn> if the result is negative.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_lock(atomic_t *v,
- void (*fail_fn)(atomic_t *))
-{
- asm_volatile_goto(LOCK_PREFIX " decl %0\n"
- " jns %l[exit]\n"
- : : "m" (v->counter)
- : "memory", "cc"
- : exit);
- fail_fn(v);
-exit:
- return;
-}
-#else
-#define __mutex_fastpath_lock(v, fail_fn) \
-do { \
- unsigned long dummy; \
- \
- typecheck(atomic_t *, v); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " decl (%%rdi)\n" \
- " jns 1f \n" \
- " call " #fail_fn "\n" \
- "1:" \
- : "=D" (dummy) \
- : "D" (v) \
- : "rax", "rsi", "rdx", "rcx", \
- "r8", "r9", "r10", "r11", "memory"); \
-} while (0)
-#endif
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int __mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(atomic_dec_return(count) < 0))
- return -1;
- else
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - increment and call function if nonpositive
- * @v: pointer of type atomic_t
- * @fail_fn: function to call if the result is nonpositive
- *
- * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
- */
-#ifdef CC_HAVE_ASM_GOTO
-static inline void __mutex_fastpath_unlock(atomic_t *v,
- void (*fail_fn)(atomic_t *))
-{
- asm_volatile_goto(LOCK_PREFIX " incl %0\n"
- " jg %l[exit]\n"
- : : "m" (v->counter)
- : "memory", "cc"
- : exit);
- fail_fn(v);
-exit:
- return;
-}
-#else
-#define __mutex_fastpath_unlock(v, fail_fn) \
-do { \
- unsigned long dummy; \
- \
- typecheck(atomic_t *, v); \
- typecheck_fn(void (*)(atomic_t *), fail_fn); \
- \
- asm volatile(LOCK_PREFIX " incl (%%rdi)\n" \
- " jg 1f\n" \
- " call " #fail_fn "\n" \
- "1:" \
- : "=D" (dummy) \
- : "D" (v) \
- : "rax", "rsi", "rdx", "rcx", \
- "r8", "r9", "r10", "r11", "memory"); \
-} while (0)
-#endif
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to 0 and return 1 (success), or return 0 (failure)
- * if it wasn't 1 originally. [the fallback function is never used on
- * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.]
- */
-static inline int __mutex_fastpath_trylock(atomic_t *count,
- int (*fail_fn)(atomic_t *))
-{
- if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
- return 1;
-
- return 0;
-}
-
-#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index f1fb4db..1eea6ca 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -673,6 +673,11 @@ static __always_inline void pv_kick(int cpu)
PVOP_VCALL1(pv_lock_ops.kick, cpu);
}
+static __always_inline bool pv_vcpu_is_preempted(int cpu)
+{
+ return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu);
+}
+
#endif /* SMP && PARAVIRT_SPINLOCKS */
#ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 545426a..bb2de45 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -308,6 +308,8 @@ struct pv_lock_ops {
void (*wait)(u8 *ptr, u8 val);
void (*kick)(int cpu);
+
+ struct paravirt_callee_save vcpu_is_preempted;
};
/* This contains all the paravirt structures: we get a convenient
@@ -506,6 +508,18 @@ int paravirt_disable_iospace(void);
#define PVOP_TEST_NULL(op) ((void)op)
#endif
+#define PVOP_RETMASK(rettype) \
+ ({ unsigned long __mask = ~0UL; \
+ switch (sizeof(rettype)) { \
+ case 1: __mask = 0xffUL; break; \
+ case 2: __mask = 0xffffUL; break; \
+ case 4: __mask = 0xffffffffUL; break; \
+ default: break; \
+ } \
+ __mask; \
+ })
+
+
#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr, \
pre, post, ...) \
({ \
@@ -533,7 +547,7 @@ int paravirt_disable_iospace(void);
paravirt_clobber(clbr), \
##__VA_ARGS__ \
: "memory", "cc" extra_clbr); \
- __ret = (rettype)__eax; \
+ __ret = (rettype)(__eax & PVOP_RETMASK(rettype)); \
} \
__ret; \
})
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 17f2186..ec1f3c6 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -24,7 +24,13 @@ static __always_inline int preempt_count(void)
static __always_inline void preempt_count_set(int pc)
{
- raw_cpu_write_4(__preempt_count, pc);
+ int old, new;
+
+ do {
+ old = raw_cpu_read_4(__preempt_count);
+ new = (old & PREEMPT_NEED_RESCHED) |
+ (pc & ~PREEMPT_NEED_RESCHED);
+ } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
}
/*
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 984a7bf..1f6a929 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -137,6 +137,17 @@ struct cpuinfo_x86 {
u32 microcode;
};
+struct cpuid_regs {
+ u32 eax, ebx, ecx, edx;
+};
+
+enum cpuid_regs_idx {
+ CPUID_EAX = 0,
+ CPUID_EBX,
+ CPUID_ECX,
+ CPUID_EDX,
+};
+
#define X86_VENDOR_INTEL 0
#define X86_VENDOR_CYRIX 1
#define X86_VENDOR_AMD 2
@@ -178,6 +189,9 @@ extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+ unsigned int sub_leaf,
+ enum cpuid_regs_idx reg);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
@@ -588,8 +602,6 @@ static __always_inline void cpu_relax(void)
rep_nop();
}
-#define cpu_relax_lowlatency() cpu_relax()
-
/* Stop speculative execution and prefetching of modified code. */
static inline void sync_core(void)
{
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index eaba080..c343ab5 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -32,6 +32,12 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
{
pv_queued_spin_unlock(lock);
}
+
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+ return pv_vcpu_is_preempted(cpu);
+}
#else
static inline void queued_spin_unlock(struct qspinlock *lock)
{
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 37f2e0b..a3269c8 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -30,8 +30,7 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
int get_stack_info(unsigned long *stack, struct task_struct *task,
struct stack_info *info, unsigned long *visit_mask);
-void stack_type_str(enum stack_type type, const char **begin,
- const char **end);
+const char *stack_type_name(enum stack_type type);
static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
{
@@ -43,8 +42,6 @@ static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
addr + len > begin && addr + len <= end);
}
-extern int kstack_depth_to_print;
-
#ifdef CONFIG_X86_32
#define STACKSLOTS_PER_LINE 8
#else
@@ -86,9 +83,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, char *log_lvl);
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, char *log_lvl);
-
extern unsigned int code_bytes;
/* The form of the top of the frame on the stack */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index cf75871..6358a85 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -146,4 +146,36 @@ struct pci_bus;
int x86_pci_root_bus_node(int bus);
void x86_pci_root_bus_resources(int bus, struct list_head *resources);
+extern bool x86_topology_update;
+
+#ifdef CONFIG_SCHED_MC_PRIO
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
+
+/* Interface to set priority of a cpu */
+void sched_set_itmt_core_prio(int prio, int core_cpu);
+
+/* Interface to notify scheduler that system supports ITMT */
+int sched_set_itmt_support(void);
+
+/* Interface to notify scheduler that system revokes ITMT support */
+void sched_clear_itmt_support(void);
+
+#else /* CONFIG_SCHED_MC_PRIO */
+
+#define sysctl_sched_itmt_enabled 0
+static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
+{
+}
+static inline int sched_set_itmt_support(void)
+{
+ return 0;
+}
+static inline void sched_clear_itmt_support(void)
+{
+}
+#endif /* CONFIG_SCHED_MC_PRIO */
+
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index faf3687..ea14831 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -68,6 +68,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
})
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
+#else
+# define WARN_ON_IN_IRQ()
+#endif
+
/**
* access_ok: - Checks if a user space pointer is valid
* @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
@@ -88,8 +94,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
-#define access_ok(type, addr, size) \
- likely(!__range_not_ok(addr, size, user_addr_max()))
+#define access_ok(type, addr, size) \
+({ \
+ WARN_ON_IN_IRQ(); \
+ likely(!__range_not_ok(addr, size, user_addr_max())); \
+})
/*
* These are the main single-value transfer routines. They automatically
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 46de9ac..c5a7f3a 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -13,6 +13,7 @@ struct unwind_state {
int graph_idx;
#ifdef CONFIG_FRAME_POINTER
unsigned long *bp;
+ struct pt_regs *regs;
#else
unsigned long *sp;
#endif
@@ -47,7 +48,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
if (unwind_done(state))
return NULL;
- return state->bp + 1;
+ return state->regs ? &state->regs->ip : state->bp + 1;
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return NULL;
+
+ return state->regs;
}
#else /* !CONFIG_FRAME_POINTER */
@@ -58,6 +67,11 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
return NULL;
}
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+ return NULL;
+}
+
#endif /* CONFIG_FRAME_POINTER */
#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index e728699..3a01996 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -89,8 +89,13 @@ static inline unsigned int __getcpu(void)
* works on all CPUs. This is volatile so that it orders
* correctly wrt barrier() and to keep gcc from cleverly
* hoisting it out of the calling function.
+ *
+ * If RDPID is available, use it.
*/
- asm volatile ("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+ alternative_io ("lsl %[p],%[seg]",
+ ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
+ X86_FEATURE_RDPID,
+ [p] "=a" (p), [seg] "r" (__PER_CPU_SEG));
return p;
}
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index c18ce67..b10bf31 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,6 +7,7 @@
#define SETUP_DTB 2
#define SETUP_PCI 3
#define SETUP_EFI 4
+#define SETUP_APPLE_PROPERTIES 5
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 94dc8ca..1421a65 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -45,7 +45,9 @@ struct kvm_steal_time {
__u64 steal;
__u32 version;
__u32 flags;
- __u32 pad[12];
+ __u8 preempted;
+ __u8 u8_pad[3];
+ __u32 pad[11];
};
#define KVM_STEAL_ALIGNMENT_BITS 5
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 69a6e07..eb6247a 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -28,6 +28,7 @@ struct mce {
__u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
__u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */
__u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */
+ __u64 ppin; /* Protected Processor Inventory Number */
};
#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index ae135de..835aa51 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,10 +6,8 @@
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
-#ifdef CONFIG_CHECKPOINT_RESTORE
-# define ARCH_MAP_VDSO_X32 0x2001
-# define ARCH_MAP_VDSO_32 0x2002
-# define ARCH_MAP_VDSO_64 0x2003
-#endif
+#define ARCH_MAP_VDSO_X32 0x2001
+#define ARCH_MAP_VDSO_32 0x2002
+#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 79076d7..05110c1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -123,6 +123,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
+obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
ifdef CONFIG_FRAME_POINTER
obj-y += unwind_frame.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 931ced8..4764fa5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -76,6 +76,7 @@ int acpi_fix_pin2_polarity __initdata;
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
#endif
+#ifdef CONFIG_X86_IO_APIC
/*
* Locks related to IOAPIC hotplug
* Hotplug side:
@@ -88,6 +89,7 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
* ->ioapic_lock
*/
static DEFINE_MUTEX(acpi_ioapic_lock);
+#endif
/* --------------------------------------------------------------------------
Boot-time Configuration
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4fdf623..458da85 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -13,8 +13,20 @@
#include <linux/spinlock.h>
#include <asm/amd_nb.h>
+#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450
+#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463
+#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464
+
+/* Protect the PCI config register pairs used for SMN and DF indirect access. */
+static DEFINE_MUTEX(smn_mutex);
+
static u32 *flush_words;
+static const struct pci_device_id amd_root_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) },
+ {}
+};
+
const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -24,9 +36,10 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
{}
};
-EXPORT_SYMBOL(amd_nb_misc_ids);
+EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
@@ -34,6 +47,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
{}
};
@@ -44,8 +58,25 @@ const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
{ }
};
-struct amd_northbridge_info amd_northbridges;
-EXPORT_SYMBOL(amd_northbridges);
+static struct amd_northbridge_info amd_northbridges;
+
+u16 amd_nb_num(void)
+{
+ return amd_northbridges.num;
+}
+EXPORT_SYMBOL_GPL(amd_nb_num);
+
+bool amd_nb_has_feature(unsigned int feature)
+{
+ return ((amd_northbridges.flags & feature) == feature);
+}
+EXPORT_SYMBOL_GPL(amd_nb_has_feature);
+
+struct amd_northbridge *node_to_amd_nb(int node)
+{
+ return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
+}
+EXPORT_SYMBOL_GPL(node_to_amd_nb);
static struct pci_dev *next_northbridge(struct pci_dev *dev,
const struct pci_device_id *ids)
@@ -58,13 +89,106 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev,
return dev;
}
+static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write)
+{
+ struct pci_dev *root;
+ int err = -ENODEV;
+
+ if (node >= amd_northbridges.num)
+ goto out;
+
+ root = node_to_amd_nb(node)->root;
+ if (!root)
+ goto out;
+
+ mutex_lock(&smn_mutex);
+
+ err = pci_write_config_dword(root, 0x60, address);
+ if (err) {
+ pr_warn("Error programming SMN address 0x%x.\n", address);
+ goto out_unlock;
+ }
+
+ err = (write ? pci_write_config_dword(root, 0x64, *value)
+ : pci_read_config_dword(root, 0x64, value));
+ if (err)
+ pr_warn("Error %s SMN address 0x%x.\n",
+ (write ? "writing to" : "reading from"), address);
+
+out_unlock:
+ mutex_unlock(&smn_mutex);
+
+out:
+ return err;
+}
+
+int amd_smn_read(u16 node, u32 address, u32 *value)
+{
+ return __amd_smn_rw(node, address, value, false);
+}
+EXPORT_SYMBOL_GPL(amd_smn_read);
+
+int amd_smn_write(u16 node, u32 address, u32 value)
+{
+ return __amd_smn_rw(node, address, &value, true);
+}
+EXPORT_SYMBOL_GPL(amd_smn_write);
+
+/*
+ * Data Fabric Indirect Access uses FICAA/FICAD.
+ *
+ * Fabric Indirect Configuration Access Address (FICAA): Constructed based
+ * on the device's Instance Id and the PCI function and register offset of
+ * the desired register.
+ *
+ * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
+ * and FICAD HI registers but so far we only need the LO register.
+ */
+int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
+{
+ struct pci_dev *F4;
+ u32 ficaa;
+ int err = -ENODEV;
+
+ if (node >= amd_northbridges.num)
+ goto out;
+
+ F4 = node_to_amd_nb(node)->link;
+ if (!F4)
+ goto out;
+
+ ficaa = 1;
+ ficaa |= reg & 0x3FC;
+ ficaa |= (func & 0x7) << 11;
+ ficaa |= instance_id << 16;
+
+ mutex_lock(&smn_mutex);
+
+ err = pci_write_config_dword(F4, 0x5C, ficaa);
+ if (err) {
+ pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa);
+ goto out_unlock;
+ }
+
+ err = pci_read_config_dword(F4, 0x98, lo);
+ if (err)
+ pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa);
+
+out_unlock:
+ mutex_unlock(&smn_mutex);
+
+out:
+ return err;
+}
+EXPORT_SYMBOL_GPL(amd_df_indirect_read);
+
int amd_cache_northbridges(void)
{
u16 i = 0;
struct amd_northbridge *nb;
- struct pci_dev *misc, *link;
+ struct pci_dev *root, *misc, *link;
- if (amd_nb_num())
+ if (amd_northbridges.num)
return 0;
misc = NULL;
@@ -74,15 +198,17 @@ int amd_cache_northbridges(void)
if (!i)
return -ENODEV;
- nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
+ nb = kcalloc(i, sizeof(struct amd_northbridge), GFP_KERNEL);
if (!nb)
return -ENOMEM;
amd_northbridges.nb = nb;
amd_northbridges.num = i;
- link = misc = NULL;
- for (i = 0; i != amd_nb_num(); i++) {
+ link = misc = root = NULL;
+ for (i = 0; i != amd_northbridges.num; i++) {
+ node_to_amd_nb(i)->root = root =
+ next_northbridge(root, amd_root_ids);
node_to_amd_nb(i)->misc = misc =
next_northbridge(misc, amd_nb_misc_ids);
node_to_amd_nb(i)->link = link =
@@ -139,13 +265,13 @@ struct resource *amd_get_mmconfig_range(struct resource *res)
{
u32 address;
u64 base, msr;
- unsigned segn_busn_bits;
+ unsigned int segn_busn_bits;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return NULL;
/* assume all cpus from fam10h have mmconfig */
- if (boot_cpu_data.x86 < 0x10)
+ if (boot_cpu_data.x86 < 0x10)
return NULL;
address = MSR_FAM10H_MMIO_CONF_BASE;
@@ -226,14 +352,14 @@ static void amd_cache_gart(void)
if (!amd_nb_has_feature(AMD_NB_GART))
return;
- flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
+ flush_words = kmalloc_array(amd_northbridges.num, sizeof(u32), GFP_KERNEL);
if (!flush_words) {
amd_northbridges.flags &= ~AMD_NB_GART;
pr_notice("Cannot initialize GART flush words, GART support disabled\n");
return;
}
- for (i = 0; i != amd_nb_num(); i++)
+ for (i = 0; i != amd_northbridges.num; i++)
pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &flush_words[i]);
}
@@ -246,18 +372,20 @@ void amd_flush_garts(void)
if (!amd_nb_has_feature(AMD_NB_GART))
return;
- /* Avoid races between AGP and IOMMU. In theory it's not needed
- but I'm not sure if the hardware won't lose flush requests
- when another is pending. This whole thing is so expensive anyways
- that it doesn't matter to serialize more. -AK */
+ /*
+ * Avoid races between AGP and IOMMU. In theory it's not needed
+ * but I'm not sure if the hardware won't lose flush requests
+ * when another is pending. This whole thing is so expensive anyways
+ * that it doesn't matter to serialize more. -AK
+ */
spin_lock_irqsave(&gart_lock, flags);
flushed = 0;
- for (i = 0; i < amd_nb_num(); i++) {
+ for (i = 0; i < amd_northbridges.num; i++) {
pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
flush_words[i] | 1);
flushed++;
}
- for (i = 0; i < amd_nb_num(); i++) {
+ for (i = 0; i < amd_northbridges.num; i++) {
u32 w;
/* Make sure the hardware actually executed the flush*/
for (;;) {
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 88c657b..2686894 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2263,6 +2263,7 @@ void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
/* Should happen once for each apic */
WARN_ON((*drv)->eoi_write == eoi_write);
+ (*drv)->native_eoi_write = (*drv)->eoi_write;
(*drv)->eoi_write = eoi_write;
}
}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index aeef53c..35690a1 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -815,9 +815,9 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
l = li;
}
addr1 = (base << shift) +
- f * (unsigned long)(1 << m_io);
+ f * (1ULL << m_io);
addr2 = (base << shift) +
- (l + 1) * (unsigned long)(1 << m_io);
+ (l + 1) * (1ULL << m_io);
pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
id, fi, li, lnasid, addr1, addr2);
if (max_io < l)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 51287cd..643818a7 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -906,14 +906,14 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
- cputime_t stime;
+ cputime_t stime, utime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
unsigned int bucket;
recalc:
- task_cputime(current, NULL, &stime);
+ task_cputime(current, &utime, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
} else if (jiffies_since_last_check > idle_period) {
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4a8697f..33b6367 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -20,13 +20,11 @@ obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
+obj-y += bugs.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_X86_32) += bugs.o
-obj-$(CONFIG_X86_64) += bugs_64.o
-
obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1e81a37..4daad1e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -314,11 +314,30 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
smp_num_siblings = ((ebx >> 8) & 3) + 1;
c->x86_max_cores /= smp_num_siblings;
c->cpu_core_id = ebx & 0xff;
+
+ /*
+ * We may have multiple LLCs if L3 caches exist, so check if we
+ * have an L3 cache by looking at the L3 cache CPUID leaf.
+ */
+ if (cpuid_edx(0x80000006)) {
+ if (c->x86 == 0x17) {
+ /*
+ * LLC is at the core complex level.
+ * Core complex id is ApicId[3].
+ */
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+ } else {
+ /* LLC is at the node level. */
+ per_cpu(cpu_llc_id, cpu) = node_id;
+ }
+ }
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
node_id = value & 7;
+
+ per_cpu(cpu_llc_id, cpu) = node_id;
} else
return;
@@ -329,9 +348,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_AMD_DCM);
cus_per_node = c->x86_max_cores / nodes_per_socket;
- /* store NodeID, use llc_shared_map to store sibling info */
- per_cpu(cpu_llc_id, cpu) = node_id;
-
/* core id has to be in the [0 .. cores_per_node - 1] range */
c->cpu_core_id %= cus_per_node;
}
@@ -356,15 +372,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
amd_get_topology(c);
-
- /*
- * Fix percpu cpu_llc_id here as LLC topology is different
- * for Fam17h systems.
- */
- if (c->x86 != 0x17 || !cpuid_edx(0x80000006))
- return;
-
- per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
#endif
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bd17db1..a44ef52 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -16,15 +16,19 @@
#include <asm/msr.h>
#include <asm/paravirt.h>
#include <asm/alternative.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>
void __init check_bugs(void)
{
identify_boot_cpu();
-#ifndef CONFIG_SMP
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
+ if (!IS_ENABLED(CONFIG_SMP)) {
+ pr_info("CPU: ");
+ print_cpu_info(&boot_cpu_data);
+ }
+
+#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
*
@@ -40,4 +44,18 @@ void __init check_bugs(void)
alternative_instructions();
fpu__init_check_bugs();
+#else /* CONFIG_X86_64 */
+ alternative_instructions();
+
+ /*
+ * Make sure the first 2MB area is not mapped by huge pages
+ * There are typically fixed size MTRRs in there and overlapping
+ * MTRRs into large pages causes slow downs.
+ *
+ * Right now we don't do that with gbpages because there seems
+ * very little benefit for that case.
+ */
+ if (!direct_gbpages)
+ set_memory_4k((unsigned long)__va(0), 1);
+#endif
}
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
deleted file mode 100644
index a972ac4..0000000
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- * Copyright (C) 2000 SuSE
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <asm/alternative.h>
-#include <asm/bugs.h>
-#include <asm/processor.h>
-#include <asm/mtrr.h>
-#include <asm/cacheflush.h>
-
-void __init check_bugs(void)
-{
- identify_boot_cpu();
-#if !defined(CONFIG_SMP)
- pr_info("CPU: ");
- print_cpu_info(&boot_cpu_data);
-#endif
- alternative_instructions();
-
- /*
- * Make sure the first 2MB area is not mapped by huge pages
- * There are typically fixed size MTRRs in there and overlapping
- * MTRRs into large pages causes slow downs.
- *
- * Right now we don't do that with gbpages because there seems
- * very little benefit for that case.
- */
- if (!direct_gbpages)
- set_memory_4k((unsigned long)__va(0), 1);
-}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cc9e980..9886cf4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1190,51 +1190,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
mtrr_ap_init();
}
-struct msr_range {
- unsigned min;
- unsigned max;
-};
-
-static const struct msr_range msr_range_array[] = {
- { 0x00000000, 0x00000418},
- { 0xc0000000, 0xc000040b},
- { 0xc0010000, 0xc0010142},
- { 0xc0011000, 0xc001103b},
-};
-
-static void __print_cpu_msr(void)
-{
- unsigned index_min, index_max;
- unsigned index;
- u64 val;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
- index_min = msr_range_array[i].min;
- index_max = msr_range_array[i].max;
-
- for (index = index_min; index < index_max; index++) {
- if (rdmsrl_safe(index, &val))
- continue;
- pr_info(" MSR%08x: %016llx\n", index, val);
- }
- }
-}
-
-static int show_msr;
-
-static __init int setup_show_msr(char *arg)
-{
- int num;
-
- get_option(&arg, &num);
-
- if (num > 0)
- show_msr = num;
- return 1;
-}
-__setup("show_msr=", setup_show_msr);
-
static __init int setup_noclflush(char *arg)
{
setup_clear_cpu_cap(X86_FEATURE_CLFLUSH);
@@ -1268,14 +1223,6 @@ void print_cpu_info(struct cpuinfo_x86 *c)
pr_cont(", stepping: 0x%x)\n", c->x86_mask);
else
pr_cont(")\n");
-
- print_cpu_msr(c);
-}
-
-void print_cpu_msr(struct cpuinfo_x86 *c)
-{
- if (c->cpu_index < show_msr)
- __print_cpu_msr();
}
static __init int setup_disablecpuid(char *arg)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 631356c..c7efbcf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -311,7 +311,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
*msg = s->msg;
s->covered = 1;
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
- if (panic_on_oops || tolerant < 1)
+ if (tolerant < 1)
return MCE_PANIC_SEVERITY;
}
return s->sev;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index a7fdf45..a3cb27a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -43,6 +43,7 @@
#include <linux/export.h>
#include <linux/jump_label.h>
+#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/traps.h>
#include <asm/tlbflush.h>
@@ -135,6 +136,9 @@ void mce_setup(struct mce *m)
m->socketid = cpu_data(m->extcpu).phys_proc_id;
m->apicid = cpu_data(m->extcpu).initial_apicid;
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
+
+ if (this_cpu_has(X86_FEATURE_INTEL_PPIN))
+ rdmsrl(MSR_PPIN, m->ppin);
}
DEFINE_PER_CPU(struct mce, injectm);
@@ -207,8 +211,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log);
static struct notifier_block mce_srao_nb;
+static atomic_t num_notifiers;
+
void mce_register_decode_chain(struct notifier_block *nb)
{
+ atomic_inc(&num_notifiers);
+
/* Ensure SRAO notifier has the highest priority in the decode chain. */
if (nb != &mce_srao_nb && nb->priority == INT_MAX)
nb->priority -= 1;
@@ -219,6 +227,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain);
void mce_unregister_decode_chain(struct notifier_block *nb)
{
+ atomic_dec(&num_notifiers);
+
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
}
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
@@ -270,17 +280,17 @@ struct mca_msr_regs msr_ops = {
.misc = misc_reg
};
-static void print_mce(struct mce *m)
+static void __print_mce(struct mce *m)
{
- int ret = 0;
-
- pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
- m->extcpu, m->mcgstatus, m->bank, m->status);
+ pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
+ m->extcpu,
+ (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""),
+ m->mcgstatus, m->bank, m->status);
if (m->ip) {
pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->ip);
+ m->cs, m->ip);
if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip);
@@ -308,6 +318,13 @@ static void print_mce(struct mce *m)
pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
cpu_data(m->extcpu).microcode);
+}
+
+static void print_mce(struct mce *m)
+{
+ int ret = 0;
+
+ __print_mce(m);
/*
* Print out human-readable details about the MCE error,
@@ -569,6 +586,32 @@ static struct notifier_block mce_srao_nb = {
.priority = INT_MAX,
};
+static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct mce *m = (struct mce *)data;
+
+ if (!m)
+ return NOTIFY_DONE;
+
+ /*
+ * Run the default notifier if we have only the SRAO
+ * notifier and us registered.
+ */
+ if (atomic_read(&num_notifiers) > 2)
+ return NOTIFY_DONE;
+
+ __print_mce(m);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block mce_default_nb = {
+ .notifier_call = mce_default_notifier,
+ /* lowest prio, we want it to run last. */
+ .priority = 0,
+};
+
/*
* Read ADDR and MISC registers.
*/
@@ -667,6 +710,15 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_gather_info(&m, NULL);
+ /*
+ * m.tsc was set in mce_setup(). Clear it if not requested.
+ *
+ * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid
+ * that dance.
+ */
+ if (!(flags & MCP_TIMESTAMP))
+ m.tsc = 0;
+
for (i = 0; i < mca_cfg.banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -674,14 +726,12 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
m.misc = 0;
m.addr = 0;
m.bank = i;
- m.tsc = 0;
barrier();
m.status = mce_rdmsrl(msr_ops.status(i));
if (!(m.status & MCI_STATUS_VAL))
continue;
-
/*
* Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here.
@@ -696,9 +746,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_read_aux(&m, i);
- if (!(flags & MCP_TIMESTAMP))
- m.tsc = 0;
-
severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
@@ -1355,7 +1402,7 @@ static void mce_timer_fn(unsigned long data)
iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) {
- machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks));
+ machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
if (mce_intel_cmci_poll()) {
iv = mce_adjust_timer(iv);
@@ -2138,6 +2185,7 @@ int __init mcheck_init(void)
{
mcheck_intel_therm_init();
mce_register_decode_chain(&mce_srao_nb);
+ mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity();
INIT_WORK(&mce_work, mce_process_work);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9b54034..1a0b0a0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -69,7 +69,12 @@ static const char * const smca_umc_block_names[] = {
"misc_umc"
};
-struct smca_bank_name smca_bank_names[] = {
+struct smca_bank_name {
+ const char *name; /* Short name for sysfs */
+ const char *long_name; /* Long name for pretty-printing */
+};
+
+static struct smca_bank_name smca_names[] = {
[SMCA_LS] = { "load_store", "Load Store Unit" },
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
@@ -84,9 +89,25 @@ struct smca_bank_name smca_bank_names[] = {
[SMCA_PSP] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" },
};
-EXPORT_SYMBOL_GPL(smca_bank_names);
-static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
+const char *smca_get_name(enum smca_bank_types t)
+{
+ if (t >= N_SMCA_BANK_TYPES)
+ return NULL;
+
+ return smca_names[t].name;
+}
+
+const char *smca_get_long_name(enum smca_bank_types t)
+{
+ if (t >= N_SMCA_BANK_TYPES)
+ return NULL;
+
+ return smca_names[t].long_name;
+}
+EXPORT_SYMBOL_GPL(smca_get_long_name);
+
+static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
/* ZN Core (HWID=0xB0) MCA types */
@@ -116,7 +137,7 @@ static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
};
-struct smca_bank_info smca_banks[MAX_NR_BANKS];
+struct smca_bank smca_banks[MAX_NR_BANKS];
EXPORT_SYMBOL_GPL(smca_banks);
/*
@@ -142,35 +163,34 @@ static void default_deferred_error_interrupt(void)
}
void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
-/*
- * CPU Initialization
- */
-
static void get_smca_bank_info(unsigned int bank)
{
unsigned int i, hwid_mcatype, cpu = smp_processor_id();
- struct smca_hwid_mcatype *type;
- u32 high, instanceId;
- u16 hwid, mcatype;
+ struct smca_hwid *s_hwid;
+ u32 high, instance_id;
/* Collect bank_info using CPU 0 for now. */
if (cpu)
return;
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
+ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instance_id, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
}
- hwid = high & MCI_IPID_HWID;
- mcatype = (high & MCI_IPID_MCATYPE) >> 16;
- hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
+ hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID,
+ (high & MCI_IPID_MCATYPE) >> 16);
for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
- type = &smca_hwid_mcatypes[i];
- if (hwid_mcatype == type->hwid_mcatype) {
- smca_banks[bank].type = type;
- smca_banks[bank].type_instance = instanceId;
+ s_hwid = &smca_hwid_mcatypes[i];
+ if (hwid_mcatype == s_hwid->hwid_mcatype) {
+
+ WARN(smca_banks[bank].hwid,
+ "Bank %s already initialized!\n",
+ smca_get_name(s_hwid->bank_type));
+
+ smca_banks[bank].hwid = s_hwid;
+ smca_banks[bank].id = instance_id;
break;
}
}
@@ -533,6 +553,206 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
deferred_error_interrupt_enable(c);
}
+int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
+{
+ u64 dram_base_addr, dram_limit_addr, dram_hole_base;
+ /* We start from the normalized address */
+ u64 ret_addr = norm_addr;
+
+ u32 tmp;
+
+ u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
+ u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
+ u8 intlv_addr_sel, intlv_addr_bit;
+ u8 num_intlv_bits, hashed_bit;
+ u8 lgcy_mmio_hole_en, base = 0;
+ u8 cs_mask, cs_id = 0;
+ bool hash_enabled = false;
+
+ /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
+ if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp))
+ goto out_err;
+
+ /* Remove HiAddrOffset from normalized address, if enabled: */
+ if (tmp & BIT(0)) {
+ u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;
+
+ if (norm_addr >= hi_addr_offset) {
+ ret_addr -= hi_addr_offset;
+ base = 1;
+ }
+ }
+
+ /* Read D18F0x110 (DramBaseAddress). */
+ if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp))
+ goto out_err;
+
+ /* Check if address range is valid. */
+ if (!(tmp & BIT(0))) {
+ pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n",
+ __func__, tmp);
+ goto out_err;
+ }
+
+ lgcy_mmio_hole_en = tmp & BIT(1);
+ intlv_num_chan = (tmp >> 4) & 0xF;
+ intlv_addr_sel = (tmp >> 8) & 0x7;
+ dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16;
+
+ /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
+ if (intlv_addr_sel > 3) {
+ pr_err("%s: Invalid interleave address select %d.\n",
+ __func__, intlv_addr_sel);
+ goto out_err;
+ }
+
+ /* Read D18F0x114 (DramLimitAddress). */
+ if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
+ goto out_err;
+
+ intlv_num_sockets = (tmp >> 8) & 0x1;
+ intlv_num_dies = (tmp >> 10) & 0x3;
+ dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
+
+ intlv_addr_bit = intlv_addr_sel + 8;
+
+ /* Re-use intlv_num_chan by setting it equal to log2(#channels) */
+ switch (intlv_num_chan) {
+ case 0: intlv_num_chan = 0; break;
+ case 1: intlv_num_chan = 1; break;
+ case 3: intlv_num_chan = 2; break;
+ case 5: intlv_num_chan = 3; break;
+ case 7: intlv_num_chan = 4; break;
+
+ case 8: intlv_num_chan = 1;
+ hash_enabled = true;
+ break;
+ default:
+ pr_err("%s: Invalid number of interleaved channels %d.\n",
+ __func__, intlv_num_chan);
+ goto out_err;
+ }
+
+ num_intlv_bits = intlv_num_chan;
+
+ if (intlv_num_dies > 2) {
+ pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
+ __func__, intlv_num_dies);
+ goto out_err;
+ }
+
+ num_intlv_bits += intlv_num_dies;
+
+ /* Add a bit if sockets are interleaved. */
+ num_intlv_bits += intlv_num_sockets;
+
+ /* Assert num_intlv_bits <= 4 */
+ if (num_intlv_bits > 4) {
+ pr_err("%s: Invalid interleave bits %d.\n",
+ __func__, num_intlv_bits);
+ goto out_err;
+ }
+
+ if (num_intlv_bits > 0) {
+ u64 temp_addr_x, temp_addr_i, temp_addr_y;
+ u8 die_id_bit, sock_id_bit, cs_fabric_id;
+
+ /*
+ * Read FabricBlockInstanceInformation3_CS[BlockFabricID].
+ * This is the fabric id for this coherent slave. Use
+ * umc/channel# as instance id of the coherent slave
+ * for FICAA.
+ */
+ if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp))
+ goto out_err;
+
+ cs_fabric_id = (tmp >> 8) & 0xFF;
+ die_id_bit = 0;
+
+ /* If interleaved over more than 1 channel: */
+ if (intlv_num_chan) {
+ die_id_bit = intlv_num_chan;
+ cs_mask = (1 << die_id_bit) - 1;
+ cs_id = cs_fabric_id & cs_mask;
+ }
+
+ sock_id_bit = die_id_bit;
+
+ /* Read D18F1x208 (SystemFabricIdMask). */
+ if (intlv_num_dies || intlv_num_sockets)
+ if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
+ goto out_err;
+
+ /* If interleaved over more than 1 die. */
+ if (intlv_num_dies) {
+ sock_id_bit = die_id_bit + intlv_num_dies;
+ die_id_shift = (tmp >> 24) & 0xF;
+ die_id_mask = (tmp >> 8) & 0xFF;
+
+ cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
+ }
+
+ /* If interleaved over more than 1 socket. */
+ if (intlv_num_sockets) {
+ socket_id_shift = (tmp >> 28) & 0xF;
+ socket_id_mask = (tmp >> 16) & 0xFF;
+
+ cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
+ }
+
+ /*
+ * The pre-interleaved address consists of XXXXXXIIIYYYYY
+ * where III is the ID for this CS, and XXXXXXYYYYY are the
+ * address bits from the post-interleaved address.
+ * "num_intlv_bits" has been calculated to tell us how many "I"
+ * bits there are. "intlv_addr_bit" tells us how many "Y" bits
+ * there are (where "I" starts).
+ */
+ temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0);
+ temp_addr_i = (cs_id << intlv_addr_bit);
+ temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
+ ret_addr = temp_addr_x | temp_addr_i | temp_addr_y;
+ }
+
+ /* Add dram base address */
+ ret_addr += dram_base_addr;
+
+ /* If legacy MMIO hole enabled */
+ if (lgcy_mmio_hole_en) {
+ if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp))
+ goto out_err;
+
+ dram_hole_base = tmp & GENMASK(31, 24);
+ if (ret_addr >= dram_hole_base)
+ ret_addr += (BIT_ULL(32) - dram_hole_base);
+ }
+
+ if (hash_enabled) {
+ /* Save some parentheses and grab ls-bit at the end. */
+ hashed_bit = (ret_addr >> 12) ^
+ (ret_addr >> 18) ^
+ (ret_addr >> 21) ^
+ (ret_addr >> 30) ^
+ cs_id;
+
+ hashed_bit &= BIT(0);
+
+ if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0)))
+ ret_addr ^= BIT(intlv_addr_bit);
+ }
+
+ /* Is calculated system address is above DRAM limit address? */
+ if (ret_addr > dram_limit_addr)
+ goto out_err;
+
+ *sys_addr = ret_addr;
+ return 0;
+
+out_err:
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
+
static void
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{
@@ -645,6 +865,7 @@ static void amd_threshold_interrupt(void)
{
u32 low = 0, high = 0, address = 0;
unsigned int bank, block, cpu = smp_processor_id();
+ struct thresh_restart tr;
/* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -681,6 +902,11 @@ static void amd_threshold_interrupt(void)
log:
__log_error(bank, false, true, ((u64)high << 32) | low);
+
+ /* Reset threshold block after logging error. */
+ memset(&tr, 0, sizeof(tr));
+ tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
+ threshold_restart_bank(&tr);
}
/*
@@ -826,10 +1052,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return th_names[bank];
}
- if (!smca_banks[bank].type)
+ if (!smca_banks[bank].hwid)
return NULL;
- bank_type = smca_banks[bank].type->bank_type;
+ bank_type = smca_banks[bank].hwid->bank_type;
if (b && bank_type == SMCA_UMC) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
@@ -838,8 +1064,8 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
}
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
- "%s_%x", smca_bank_names[bank_type].name,
- smca_banks[bank].type_instance);
+ "%s_%x", smca_get_name(bank_type),
+ smca_banks[bank].id);
return buf_mcatype;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 1defb8e..190b3e6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -11,6 +11,8 @@
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <asm/apic.h>
+#include <asm/cpufeature.h>
+#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
@@ -130,7 +132,7 @@ bool mce_intel_cmci_poll(void)
* Reset the counter if we've logged an error in the last poll
* during the storm.
*/
- if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
+ if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)))
this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
else
this_cpu_dec(cmci_backoff_cnt);
@@ -342,7 +344,7 @@ void cmci_recheck(void)
return;
local_irq_save(flags);
- machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
+ machine_check_poll(0, this_cpu_ptr(&mce_banks_owned));
local_irq_restore(flags);
}
@@ -464,11 +466,46 @@ static void intel_clear_lmce(void)
wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
}
+static void intel_ppin_init(struct cpuinfo_x86 *c)
+{
+ unsigned long long val;
+
+ /*
+ * Even if testing the presence of the MSR would be enough, we don't
+ * want to risk the situation where other models reuse this MSR for
+ * other purposes.
+ */
+ switch (c->x86_model) {
+ case INTEL_FAM6_IVYBRIDGE_X:
+ case INTEL_FAM6_HASWELL_X:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_SKYLAKE_X:
+ if (rdmsrl_safe(MSR_PPIN_CTL, &val))
+ return;
+
+ if ((val & 3UL) == 1UL) {
+ /* PPIN available but disabled: */
+ return;
+ }
+
+ /* If PPIN is disabled, but not locked, try to enable: */
+ if (!(val & 3UL)) {
+ wrmsrl_safe(MSR_PPIN_CTL, val | 2UL);
+ rdmsrl_safe(MSR_PPIN_CTL, &val);
+ }
+
+ if ((val & 3UL) == 2UL)
+ set_cpu_cap(c, X86_FEATURE_INTEL_PPIN);
+ }
+}
+
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
intel_init_thermal(c);
intel_init_cmci();
intel_init_lmce();
+ intel_ppin_init(c);
}
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 1db8dc4..d1316f9 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -17,11 +17,17 @@ struct cpuid_bit {
u32 sub_leaf;
};
-enum cpuid_regs {
- CR_EAX = 0,
- CR_ECX,
- CR_EDX,
- CR_EBX
+/* Please keep the leaf sorted by cpuid_bit.level for faster search. */
+static const struct cpuid_bit cpuid_bits[] = {
+ { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
+ { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
+ { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
+ { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
+ { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
+ { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
+ { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
+ { 0, 0, 0, 0, 0 }
};
void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
@@ -30,18 +36,6 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
u32 regs[4];
const struct cpuid_bit *cb;
- static const struct cpuid_bit cpuid_bits[] = {
- { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 },
- { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 },
- { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 },
- { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
- { X86_FEATURE_PROC_FEEDBACK, CR_EDX,11, 0x80000007, 0 },
- { 0, 0, 0, 0, 0 }
- };
-
for (cb = cpuid_bits; cb->feature; cb++) {
/* Verify that the level is valid */
@@ -50,10 +44,35 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
max_level > (cb->level | 0xffff))
continue;
- cpuid_count(cb->level, cb->sub_leaf, &regs[CR_EAX],
- &regs[CR_EBX], &regs[CR_ECX], &regs[CR_EDX]);
+ cpuid_count(cb->level, cb->sub_leaf, &regs[CPUID_EAX],
+ &regs[CPUID_EBX], &regs[CPUID_ECX],
+ &regs[CPUID_EDX]);
if (regs[cb->reg] & (1 << cb->bit))
set_cpu_cap(c, cb->feature);
}
}
+
+u32 get_scattered_cpuid_leaf(unsigned int level, unsigned int sub_leaf,
+ enum cpuid_regs_idx reg)
+{
+ const struct cpuid_bit *cb;
+ u32 cpuid_val = 0;
+
+ for (cb = cpuid_bits; cb->feature; cb++) {
+
+ if (level > cb->level)
+ continue;
+
+ if (level < cb->level)
+ break;
+
+ if (reg == cb->reg && sub_leaf == cb->sub_leaf) {
+ if (cpu_has(&boot_cpu_data, cb->feature))
+ cpuid_val |= BIT(cb->bit);
+ }
+ }
+
+ return cpuid_val;
+}
+EXPORT_SYMBOL_GPL(get_scattered_cpuid_leaf);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 2836de3..9095c80 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -46,10 +46,6 @@
static struct class *cpuid_class;
-struct cpuid_regs {
- u32 eax, ebx, ecx, edx;
-};
-
static void cpuid_smp_cpuid(void *cmd_block)
{
struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 85f854b..0cfd01d 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,7 +22,6 @@
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
unsigned int code_bytes = 64;
-int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
static int die_counter;
bool in_task_stack(unsigned long *stack, struct task_struct *task,
@@ -46,14 +45,7 @@ static void printk_stack_address(unsigned long address, int reliable,
char *log_lvl)
{
touch_nmi_watchdog();
- printk("%s [<%p>] %s%pB\n",
- log_lvl, (void *)address, reliable ? "" : "? ",
- (void *)address);
-}
-
-void printk_address(unsigned long address)
-{
- pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
+ printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
}
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
@@ -67,6 +59,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
printk("%sCall Trace:\n", log_lvl);
unwind_start(&state, task, regs, stack);
+ stack = stack ? : get_stack_pointer(task, regs);
/*
* Iterate through the stacks, starting with the current stack pointer.
@@ -82,8 +75,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* - softirq stack
* - hardirq stack
*/
- for (; stack; stack = stack_info.next_sp) {
- const char *str_begin, *str_end;
+ for (regs = NULL; stack; stack = stack_info.next_sp) {
+ const char *stack_name;
/*
* If we overflowed the task stack into a guard page, jump back
@@ -95,9 +88,9 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (get_stack_info(stack, task, &stack_info, &visit_mask))
break;
- stack_type_str(stack_info.type, &str_begin, &str_end);
- if (str_begin)
- printk("%s <%s> ", log_lvl, str_begin);
+ stack_name = stack_type_name(stack_info.type);
+ if (stack_name)
+ printk("%s <%s>\n", log_lvl, stack_name);
/*
* Scan the stack, printing any text addresses we find. At the
@@ -119,6 +112,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (!__kernel_text_address(addr))
continue;
+ /*
+ * Don't print regs->ip again if it was already printed
+ * by __show_regs() below.
+ */
+ if (regs && stack == &regs->ip) {
+ unwind_next_frame(&state);
+ continue;
+ }
+
if (stack == ret_addr_p)
reliable = 1;
@@ -146,10 +148,15 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
* of the addresses will just be printed as unreliable.
*/
unwind_next_frame(&state);
+
+ /* if the frame has entry regs, print them */
+ regs = unwind_get_entry_regs(&state);
+ if (regs)
+ __show_regs(regs, 0);
}
- if (str_end)
- printk("%s <%s> ", log_lvl, str_end);
+ if (stack_name)
+ printk("%s </%s>\n", log_lvl, stack_name);
}
}
@@ -164,12 +171,12 @@ void show_stack(struct task_struct *task, unsigned long *sp)
if (!sp && task == current)
sp = get_stack_pointer(current, NULL);
- show_stack_log_lvl(task, NULL, sp, "");
+ show_trace_log_lvl(task, NULL, sp, KERN_DEFAULT);
}
void show_stack_regs(struct pt_regs *regs)
{
- show_stack_log_lvl(current, regs, NULL, "");
+ show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
}
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -261,14 +268,11 @@ int __die(const char *str, struct pt_regs *regs, long err)
sp = kernel_stack_pointer(regs);
savesegment(ss, ss);
}
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
- print_symbol("%s", regs->ip);
- printk(" SS:ESP %04x:%08lx\n", ss, sp);
+ printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n",
+ (void *)regs->ip, ss, sp);
#else
/* Executive summary in case the oops scrolled away */
- printk(KERN_ALERT "RIP ");
- printk_address(regs->ip);
- printk(" RSP <%016lx>\n", regs->sp);
+ printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp);
#endif
return 0;
}
@@ -291,22 +295,6 @@ void die(const char *str, struct pt_regs *regs, long err)
oops_end(flags, regs, sig);
}
-static int __init kstack_setup(char *s)
-{
- ssize_t ret;
- unsigned long val;
-
- if (!s)
- return -EINVAL;
-
- ret = kstrtoul(s, 0, &val);
- if (ret)
- return ret;
- kstack_depth_to_print = val;
- return 0;
-}
-early_param("kstack", kstack_setup);
-
static int __init code_bytes_setup(char *s)
{
ssize_t ret;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 06eb322..bb3b5b9 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,18 +16,15 @@
#include <asm/stacktrace.h>
-void stack_type_str(enum stack_type type, const char **begin, const char **end)
+const char *stack_type_name(enum stack_type type)
{
- switch (type) {
- case STACK_TYPE_IRQ:
- case STACK_TYPE_SOFTIRQ:
- *begin = "IRQ";
- *end = "EOI";
- break;
- default:
- *begin = NULL;
- *end = NULL;
- }
+ if (type == STACK_TYPE_IRQ)
+ return "IRQ";
+
+ if (type == STACK_TYPE_SOFTIRQ)
+ return "SOFTIRQ";
+
+ return NULL;
}
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
@@ -109,8 +106,10 @@ recursion_check:
* just break out and report an unknown stack type.
*/
if (visit_mask) {
- if (*visit_mask & (1UL << info->type))
+ if (*visit_mask & (1UL << info->type)) {
+ printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
+ }
*visit_mask |= 1UL << info->type;
}
@@ -121,36 +120,6 @@ unknown:
return -EINVAL;
}
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, char *log_lvl)
-{
- unsigned long *stack;
- int i;
-
- if (!try_get_task_stack(task))
- return;
-
- sp = sp ? : get_stack_pointer(task, regs);
-
- stack = sp;
- for (i = 0; i < kstack_depth_to_print; i++) {
- if (kstack_end(stack))
- break;
- if ((i % STACKSLOTS_PER_LINE) == 0) {
- if (i != 0)
- pr_cont("\n");
- printk("%s %08lx", log_lvl, *stack++);
- } else
- pr_cont(" %08lx", *stack++);
- touch_nmi_watchdog();
- }
- pr_cont("\n");
- show_trace_log_lvl(task, regs, sp, log_lvl);
-
- put_task_stack(task);
-}
-
-
void show_regs(struct pt_regs *regs)
{
int i;
@@ -168,8 +137,7 @@ void show_regs(struct pt_regs *regs)
unsigned char c;
u8 *ip;
- pr_emerg("Stack:\n");
- show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
+ show_trace_log_lvl(current, regs, NULL, KERN_EMERG);
pr_emerg("Code:");
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 36cf1a4..fac189e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -28,23 +28,17 @@ static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
[DEBUG_STACK - 1] = DEBUG_STKSZ
};
-void stack_type_str(enum stack_type type, const char **begin, const char **end)
+const char *stack_type_name(enum stack_type type)
{
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
- switch (type) {
- case STACK_TYPE_IRQ:
- *begin = "IRQ";
- *end = "EOI";
- break;
- case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
- *begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
- *end = "EOE";
- break;
- default:
- *begin = NULL;
- *end = NULL;
- }
+ if (type == STACK_TYPE_IRQ)
+ return "IRQ";
+
+ if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
+ return exception_stack_names[type - STACK_TYPE_EXCEPTION];
+
+ return NULL;
}
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
@@ -128,8 +122,10 @@ recursion_check:
* just break out and report an unknown stack type.
*/
if (visit_mask) {
- if (*visit_mask & (1UL << info->type))
+ if (*visit_mask & (1UL << info->type)) {
+ printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
+ }
*visit_mask |= 1UL << info->type;
}
@@ -140,56 +136,6 @@ unknown:
return -EINVAL;
}
-void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, char *log_lvl)
-{
- unsigned long *irq_stack_end;
- unsigned long *irq_stack;
- unsigned long *stack;
- int i;
-
- if (!try_get_task_stack(task))
- return;
-
- irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
- irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
-
- sp = sp ? : get_stack_pointer(task, regs);
-
- stack = sp;
- for (i = 0; i < kstack_depth_to_print; i++) {
- unsigned long word;
-
- if (stack >= irq_stack && stack <= irq_stack_end) {
- if (stack == irq_stack_end) {
- stack = (unsigned long *) (irq_stack_end[-1]);
- pr_cont(" <EOI> ");
- }
- } else {
- if (kstack_end(stack))
- break;
- }
-
- if (probe_kernel_address(stack, word))
- break;
-
- if ((i % STACKSLOTS_PER_LINE) == 0) {
- if (i != 0)
- pr_cont("\n");
- printk("%s %016lx", log_lvl, word);
- } else
- pr_cont(" %016lx", word);
-
- stack++;
- touch_nmi_watchdog();
- }
-
- pr_cont("\n");
- show_trace_log_lvl(task, regs, sp, log_lvl);
-
- put_task_stack(task);
-}
-
void show_regs(struct pt_regs *regs)
{
int i;
@@ -207,8 +153,7 @@ void show_regs(struct pt_regs *regs)
unsigned char c;
u8 *ip;
- printk(KERN_DEFAULT "Stack:\n");
- show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
+ show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT);
printk(KERN_DEFAULT "Code: ");
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c7c11cc..1d77704 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -65,6 +65,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX);
setup_clear_cpu_cap(X86_FEATURE_AVX2);
setup_clear_cpu_cap(X86_FEATURE_AVX512F);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
@@ -73,6 +74,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
setup_clear_cpu_cap(X86_FEATURE_MPX);
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
setup_clear_cpu_cap(X86_FEATURE_PKU);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 2dabea4..4e8577d 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -63,6 +63,8 @@
#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
#endif
+#define SIZEOF_PTREGS 17*4
+
/*
* Number of possible pages in the lowmem region.
*
@@ -248,19 +250,19 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
#ifdef CONFIG_PARAVIRT
/* This is can only trip for a broken bootloader... */
cmpw $0x207, pa(boot_params + BP_version)
- jb default_entry
+ jb .Ldefault_entry
/* Paravirt-compatible boot parameters. Look to see what architecture
we're booting under. */
movl pa(boot_params + BP_hardware_subarch), %eax
cmpl $num_subarch_entries, %eax
- jae bad_subarch
+ jae .Lbad_subarch
movl pa(subarch_entries)(,%eax,4), %eax
subl $__PAGE_OFFSET, %eax
jmp *%eax
-bad_subarch:
+.Lbad_subarch:
WEAK(lguest_entry)
WEAK(xen_entry)
/* Unknown implementation; there's really
@@ -270,14 +272,14 @@ WEAK(xen_entry)
__INITDATA
subarch_entries:
- .long default_entry /* normal x86/PC */
+ .long .Ldefault_entry /* normal x86/PC */
.long lguest_entry /* lguest hypervisor */
.long xen_entry /* Xen hypervisor */
- .long default_entry /* Moorestown MID */
+ .long .Ldefault_entry /* Moorestown MID */
num_subarch_entries = (. - subarch_entries) / 4
.previous
#else
- jmp default_entry
+ jmp .Ldefault_entry
#endif /* CONFIG_PARAVIRT */
#ifdef CONFIG_HOTPLUG_CPU
@@ -289,7 +291,8 @@ num_subarch_entries = (. - subarch_entries) / 4
ENTRY(start_cpu0)
movl initial_stack, %ecx
movl %ecx, %esp
- jmp *(initial_code)
+ call *(initial_code)
+1: jmp 1b
ENDPROC(start_cpu0)
#endif
@@ -317,7 +320,7 @@ ENTRY(startup_32_smp)
call load_ucode_ap
#endif
-default_entry:
+.Ldefault_entry:
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
X86_CR0_PG)
@@ -347,7 +350,7 @@ default_entry:
pushfl
popl %eax # get EFLAGS
testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
- jz enable_paging # hw disallowed setting of ID bit
+ jz .Lenable_paging # hw disallowed setting of ID bit
# which means no CPUID and no CR4
xorl %eax,%eax
@@ -357,13 +360,13 @@ default_entry:
movl $1,%eax
cpuid
andl $~1,%edx # Ignore CPUID.FPU
- jz enable_paging # No flags or only CPUID.FPU = no CR4
+ jz .Lenable_paging # No flags or only CPUID.FPU = no CR4
movl pa(mmu_cr4_features),%eax
movl %eax,%cr4
testb $X86_CR4_PAE, %al # check if PAE is enabled
- jz enable_paging
+ jz .Lenable_paging
/* Check if extended functions are implemented */
movl $0x80000000, %eax
@@ -371,7 +374,7 @@ default_entry:
/* Value must be in the range 0x80000001 to 0x8000ffff */
subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax
- ja enable_paging
+ ja .Lenable_paging
/* Clear bogus XD_DISABLE bits */
call verify_cpu
@@ -380,7 +383,7 @@ default_entry:
cpuid
/* Execute Disable bit supported? */
btl $(X86_FEATURE_NX & 31), %edx
- jnc enable_paging
+ jnc .Lenable_paging
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
@@ -390,7 +393,7 @@ default_entry:
/* Make changes effective */
wrmsr
-enable_paging:
+.Lenable_paging:
/*
* Enable paging
@@ -419,7 +422,7 @@ enable_paging:
*/
movb $4,X86 # at least 486
cmpl $-1,X86_CPUID
- je is486
+ je .Lis486
/* get vendor info */
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
@@ -430,7 +433,7 @@ enable_paging:
movl %ecx,X86_VENDOR_ID+8 # last 4 chars
orl %eax,%eax # do we have processor info as well?
- je is486
+ je .Lis486
movl $1,%eax # Use the CPUID instruction to get CPU type
cpuid
@@ -444,7 +447,7 @@ enable_paging:
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
-is486:
+.Lis486:
movl $0x50022,%ecx # set AM, WP, NE and MP
movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET
@@ -470,8 +473,9 @@ is486:
xorl %eax,%eax # Clear LDT
lldt %ax
- pushl $0 # fake return address for unwinder
- jmp *(initial_code)
+ call *(initial_code)
+1: jmp 1b
+ENDPROC(startup_32_smp)
#include "verify_cpu.S"
@@ -709,7 +713,12 @@ ENTRY(initial_page_table)
.data
.balign 4
ENTRY(initial_stack)
- .long init_thread_union+THREAD_SIZE
+ /*
+ * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
+ * unwinder reliably detect the end of the stack.
+ */
+ .long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
+ TOP_OF_KERNEL_STACK_PADDING;
__INITRODATA
int_msg:
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b4421cc..90de288 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -66,13 +66,8 @@ startup_64:
* tables and then reload them.
*/
- /*
- * Setup stack for verify_cpu(). "-8" because initial_stack is defined
- * this way, see below. Our best guess is a NULL ptr for stack
- * termination heuristics and we don't want to break anything which
- * might depend on it (kgdb, ...).
- */
- leaq (__end_init_task - 8)(%rip), %rsp
+ /* Set up the stack for verify_cpu(), similar to initial_stack below */
+ leaq (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
/* Sanitize CPU configuration */
call verify_cpu
@@ -117,20 +112,20 @@ startup_64:
movq %rdi, %rax
shrq $PGDIR_SHIFT, %rax
- leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx
+ leaq (PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
movq %rdx, 0(%rbx,%rax,8)
movq %rdx, 8(%rbx,%rax,8)
- addq $4096, %rdx
+ addq $PAGE_SIZE, %rdx
movq %rdi, %rax
shrq $PUD_SHIFT, %rax
andl $(PTRS_PER_PUD-1), %eax
- movq %rdx, 4096(%rbx,%rax,8)
+ movq %rdx, PAGE_SIZE(%rbx,%rax,8)
incl %eax
andl $(PTRS_PER_PUD-1), %eax
- movq %rdx, 4096(%rbx,%rax,8)
+ movq %rdx, PAGE_SIZE(%rbx,%rax,8)
- addq $8192, %rbx
+ addq $PAGE_SIZE * 2, %rbx
movq %rdi, %rax
shrq $PMD_SHIFT, %rdi
addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax
@@ -147,6 +142,9 @@ startup_64:
decl %ecx
jnz 1b
+ test %rbp, %rbp
+ jz .Lskip_fixup
+
/*
* Fixup the kernel text+data virtual addresses. Note that
* we might write invalid pmds, when the kernel is relocated
@@ -154,9 +152,9 @@ startup_64:
* beyond _end.
*/
leaq level2_kernel_pgt(%rip), %rdi
- leaq 4096(%rdi), %r8
+ leaq PAGE_SIZE(%rdi), %r8
/* See if it is a valid page table entry */
-1: testb $1, 0(%rdi)
+1: testb $_PAGE_PRESENT, 0(%rdi)
jz 2f
addq %rbp, 0(%rdi)
/* Go to the next page */
@@ -167,6 +165,7 @@ startup_64:
/* Fixup phys_base */
addq %rbp, phys_base(%rip)
+.Lskip_fixup:
movq $(early_level4_pgt - __START_KERNEL_map), %rax
jmp 1f
ENTRY(secondary_startup_64)
@@ -265,13 +264,17 @@ ENTRY(secondary_startup_64)
movl $MSR_GS_BASE,%ecx
movl initial_gs(%rip),%eax
movl initial_gs+4(%rip),%edx
- wrmsr
+ wrmsr
/* rsi is pointer to real mode structure with interesting info.
pass it to C */
movq %rsi, %rdi
-
- /* Finally jump to run C code and to be on real kernel address
+ jmp start_cpu
+ENDPROC(secondary_startup_64)
+
+ENTRY(start_cpu)
+ /*
+ * Jump to run C code and to be on a real kernel address.
* Since we are running on identity-mapped space we have to jump
* to the full 64bit address, this is only possible as indirect
* jump. In addition we need to ensure %cs is set so we make this
@@ -295,12 +298,13 @@ ENTRY(secondary_startup_64)
* REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
* address given in m16:64.
*/
- movq initial_code(%rip),%rax
- pushq $0 # fake return address to stop unwinder
+ call 1f # put return address on stack for unwinder
+1: xorq %rbp, %rbp # clear frame pointer
+ movq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space
lretq
-ENDPROC(secondary_startup_64)
+ENDPROC(start_cpu)
#include "verify_cpu.S"
@@ -308,15 +312,11 @@ ENDPROC(secondary_startup_64)
/*
* Boot CPU0 entry point. It's called from play_dead(). Everything has been set
* up already except stack. We just set up stack here. Then call
- * start_secondary().
+ * start_secondary() via start_cpu().
*/
ENTRY(start_cpu0)
- movq initial_stack(%rip),%rsp
- movq initial_code(%rip),%rax
- pushq $0 # fake return address to stop unwinder
- pushq $__KERNEL_CS # set correct cs
- pushq %rax # target address in negative space
- lretq
+ movq initial_stack(%rip), %rsp
+ jmp start_cpu
ENDPROC(start_cpu0)
#endif
@@ -328,7 +328,11 @@ ENDPROC(start_cpu0)
GLOBAL(initial_gs)
.quad INIT_PER_CPU_VAR(irq_stack_union)
GLOBAL(initial_stack)
- .quad init_thread_union+THREAD_SIZE-8
+ /*
+ * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
+ * unwinder reliably detect the end of the stack.
+ */
+ .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
__FINITDATA
bad_address:
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
new file mode 100644
index 0000000..cb9c1ed
--- /dev/null
+++ b/arch/x86/kernel/itmt.c
@@ -0,0 +1,215 @@
+/*
+ * itmt.c: Support Intel Turbo Boost Max Technology 3.0
+ *
+ * (C) Copyright 2016 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
+ * the maximum turbo frequencies of some cores in a CPU package may be
+ * higher than for the other cores in the same package. In that case,
+ * better performance can be achieved by making the scheduler prefer
+ * to run tasks on the CPUs with higher max turbo frequencies.
+ *
+ * This file provides functions and data structures for enabling the
+ * scheduler to favor scheduling on cores can be boosted to a higher
+ * frequency under ITMT.
+ */
+
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/cpuset.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/sysctl.h>
+#include <linux/nodemask.h>
+
+static DEFINE_MUTEX(itmt_update_mutex);
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
+
+/* Boolean to track if system has ITMT capabilities */
+static bool __read_mostly sched_itmt_capable;
+
+/*
+ * Boolean to control whether we want to move processes to cpu capable
+ * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
+ * Technology 3.0.
+ *
+ * It can be set via /proc/sys/kernel/sched_itmt_enabled
+ */
+unsigned int __read_mostly sysctl_sched_itmt_enabled;
+
+static int sched_itmt_update_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ unsigned int old_sysctl;
+ int ret;
+
+ mutex_lock(&itmt_update_mutex);
+
+ if (!sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return -EINVAL;
+ }
+
+ old_sysctl = sysctl_sched_itmt_enabled;
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+
+ if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
+ mutex_unlock(&itmt_update_mutex);
+
+ return ret;
+}
+
+static unsigned int zero;
+static unsigned int one = 1;
+static struct ctl_table itmt_kern_table[] = {
+ {
+ .procname = "sched_itmt_enabled",
+ .data = &sysctl_sched_itmt_enabled,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = sched_itmt_update_handler,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+ {}
+};
+
+static struct ctl_table itmt_root_table[] = {
+ {
+ .procname = "kernel",
+ .mode = 0555,
+ .child = itmt_kern_table,
+ },
+ {}
+};
+
+static struct ctl_table_header *itmt_sysctl_header;
+
+/**
+ * sched_set_itmt_support() - Indicate platform supports ITMT
+ *
+ * This function is used by the OS to indicate to scheduler that the platform
+ * is capable of supporting the ITMT feature.
+ *
+ * The current scheme has the pstate driver detects if the system
+ * is ITMT capable and call sched_set_itmt_support.
+ *
+ * This must be done only after sched_set_itmt_core_prio
+ * has been called to set the cpus' priorities.
+ * It must not be called with cpu hot plug lock
+ * held as we need to acquire the lock to rebuild sched domains
+ * later.
+ *
+ * Return: 0 on success
+ */
+int sched_set_itmt_support(void)
+{
+ mutex_lock(&itmt_update_mutex);
+
+ if (sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return 0;
+ }
+
+ itmt_sysctl_header = register_sysctl_table(itmt_root_table);
+ if (!itmt_sysctl_header) {
+ mutex_unlock(&itmt_update_mutex);
+ return -ENOMEM;
+ }
+
+ sched_itmt_capable = true;
+
+ sysctl_sched_itmt_enabled = 1;
+
+ if (sysctl_sched_itmt_enabled) {
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
+ mutex_unlock(&itmt_update_mutex);
+
+ return 0;
+}
+
+/**
+ * sched_clear_itmt_support() - Revoke platform's support of ITMT
+ *
+ * This function is used by the OS to indicate that it has
+ * revoked the platform's support of ITMT feature.
+ *
+ * It must not be called with cpu hot plug lock
+ * held as we need to acquire the lock to rebuild sched domains
+ * later.
+ */
+void sched_clear_itmt_support(void)
+{
+ mutex_lock(&itmt_update_mutex);
+
+ if (!sched_itmt_capable) {
+ mutex_unlock(&itmt_update_mutex);
+ return;
+ }
+ sched_itmt_capable = false;
+
+ if (itmt_sysctl_header) {
+ unregister_sysctl_table(itmt_sysctl_header);
+ itmt_sysctl_header = NULL;
+ }
+
+ if (sysctl_sched_itmt_enabled) {
+ /* disable sched_itmt if we are no longer ITMT capable */
+ sysctl_sched_itmt_enabled = 0;
+ x86_topology_update = true;
+ rebuild_sched_domains();
+ }
+
+ mutex_unlock(&itmt_update_mutex);
+}
+
+int arch_asym_cpu_priority(int cpu)
+{
+ return per_cpu(sched_core_priority, cpu);
+}
+
+/**
+ * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
+ * @prio: Priority of cpu core
+ * @core_cpu: The cpu number associated with the core
+ *
+ * The pstate driver will find out the max boost frequency
+ * and call this function to set a priority proportional
+ * to the max boost frequency. CPU with higher boost
+ * frequency will receive higher priority.
+ *
+ * No need to rebuild sched domain after updating
+ * the CPU priorities. The sched domains have no
+ * dependency on CPU priorities.
+ */
+void sched_set_itmt_core_prio(int prio, int core_cpu)
+{
+ int cpu, i = 1;
+
+ for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
+ int smt_prio;
+
+ /*
+ * Ensure that the siblings are moved to the end
+ * of the priority chain and only used when
+ * all other high priority cpus are out of capacity.
+ */
+ smt_prio = prio * smp_num_siblings / i;
+ per_cpu(sched_core_priority, cpu) = smt_prio;
+ i++;
+ }
+}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index edbbfc8..fb5afc6 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -308,7 +308,7 @@ static void kvm_register_steal_time(void)
static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
-static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
+static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
{
/**
* This relies on __test_and_clear_bit to modify the memory
@@ -319,7 +319,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
*/
if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
return;
- apic_write(APIC_EOI, APIC_EOI_ACK);
+ apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
}
static void kvm_guest_cpu_init(void)
@@ -592,6 +592,14 @@ out:
local_irq_restore(flags);
}
+__visible bool __kvm_vcpu_is_preempted(int cpu)
+{
+ struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
+
+ return !!src->preempted;
+}
+PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
+
/*
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
*/
@@ -608,6 +616,11 @@ void __init kvm_spinlock_init(void)
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
pv_lock_ops.wait = kvm_wait;
pv_lock_ops.kick = kvm_kick_cpu;
+
+ if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+ pv_lock_ops.vcpu_is_preempted =
+ PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
+ }
}
static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 6707039..f09df2f 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -34,10 +34,10 @@ static void flush_ldt(void *current_mm)
}
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
-static struct ldt_struct *alloc_ldt_struct(int size)
+static struct ldt_struct *alloc_ldt_struct(unsigned int size)
{
struct ldt_struct *new_ldt;
- int alloc_size;
+ unsigned int alloc_size;
if (size > LDT_ENTRIES)
return NULL;
@@ -207,11 +207,11 @@ static int read_default_ldt(void __user *ptr, unsigned long bytecount)
static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
struct mm_struct *mm = current->mm;
+ struct ldt_struct *new_ldt, *old_ldt;
+ unsigned int oldsize, newsize;
+ struct user_desc ldt_info;
struct desc_struct ldt;
int error;
- struct user_desc ldt_info;
- int oldsize, newsize;
- struct ldt_struct *new_ldt, *old_ldt;
error = -EINVAL;
if (bytecount != sizeof(ldt_info))
@@ -249,7 +249,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
old_ldt = mm->context.ldt;
oldsize = old_ldt ? old_ldt->size : 0;
- newsize = max((int)(ldt_info.entry_number + 1), oldsize);
+ newsize = max(ldt_info.entry_number + 1, oldsize);
error = -ENOMEM;
new_ldt = alloc_ldt_struct(newsize);
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 2c55a00..6d4bf81 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -12,7 +12,6 @@ __visible void __native_queued_spin_unlock(struct qspinlock *lock)
{
native_queued_spin_unlock(lock);
}
-
PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
bool pv_is_native_spin_unlock(void)
@@ -21,12 +20,25 @@ bool pv_is_native_spin_unlock(void)
__raw_callee_save___native_queued_spin_unlock;
}
+__visible bool __native_vcpu_is_preempted(int cpu)
+{
+ return false;
+}
+PV_CALLEE_SAVE_REGS_THUNK(__native_vcpu_is_preempted);
+
+bool pv_is_native_vcpu_is_preempted(void)
+{
+ return pv_lock_ops.vcpu_is_preempted.func ==
+ __raw_callee_save___native_vcpu_is_preempted;
+}
+
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
.wait = paravirt_nop,
.kick = paravirt_nop,
+ .vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted),
#endif /* SMP */
};
EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d3f7f14..d33ef16 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -11,6 +11,7 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %eax, %eax");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
@@ -26,6 +27,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
}
extern bool pv_is_native_spin_unlock(void);
+extern bool pv_is_native_vcpu_is_preempted(void);
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
@@ -54,9 +56,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
end = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
+ goto patch_default;
+
+ case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
+ if (pv_is_native_vcpu_is_preempted()) {
+ start = start_pv_lock_ops_vcpu_is_preempted;
+ end = end_pv_lock_ops_vcpu_is_preempted;
+ goto patch_site;
+ }
+ goto patch_default;
#endif
default:
+patch_default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 915a4c0..f4fcf26 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -20,6 +20,7 @@ DEF_NATIVE(, mov64, "mov %rdi, %rax");
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
+DEF_NATIVE(pv_lock_ops, vcpu_is_preempted, "xor %rax, %rax");
#endif
unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
@@ -35,6 +36,7 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
}
extern bool pv_is_native_spin_unlock(void);
+extern bool pv_is_native_vcpu_is_preempted(void);
unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
unsigned long addr, unsigned len)
@@ -66,9 +68,19 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
end = end_pv_lock_ops_queued_spin_unlock;
goto patch_site;
}
+ goto patch_default;
+
+ case PARAVIRT_PATCH(pv_lock_ops.vcpu_is_preempted):
+ if (pv_is_native_vcpu_is_preempted()) {
+ start = start_pv_lock_ops_vcpu_is_preempted;
+ end = end_pv_lock_ops_vcpu_is_preempted;
+ goto patch_site;
+ }
+ goto patch_default;
#endif
default:
+patch_default:
ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
break;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7dc8c9c..f854404 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -72,10 +72,9 @@ void __show_regs(struct pt_regs *regs, int all)
savesegment(gs, gs);
}
- printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
- (u16)regs->cs, regs->ip, regs->flags,
- smp_processor_id());
- print_symbol("EIP is at %s\n", regs->ip);
+ printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip);
+ printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags,
+ smp_processor_id());
printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 9c3a7b0..6c1b43e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -61,10 +61,15 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
- printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
- printk_address(regs->ip);
- printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
- regs->sp, regs->flags);
+ printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs & 0xffff,
+ (void *)regs->ip);
+ printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
+ regs->sp, regs->flags);
+ if (regs->orig_ax != -1)
+ pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
+ else
+ pr_cont("\n");
+
printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->ax, regs->bx, regs->cx);
printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index c00cb64..68f8cc2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -261,10 +261,8 @@ static inline void __smp_reschedule_interrupt(void)
__visible void smp_reschedule_interrupt(struct pt_regs *regs)
{
- irq_enter();
ack_APIC_irq();
__smp_reschedule_interrupt();
- irq_exit();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index d29c852..2a501ab 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -109,6 +109,17 @@ static bool logical_packages_frozen __read_mostly;
/* Maximum number of SMT threads on any online core */
int __max_smt_threads __read_mostly;
+/* Flag to indicate if a complete sched domain rebuild is required */
+bool x86_topology_update;
+
+int arch_update_cpu_topology(void)
+{
+ int retval = x86_topology_update;
+
+ x86_topology_update = false;
+ return retval;
+}
+
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
@@ -471,22 +482,42 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
+#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
+static inline int x86_sched_itmt_flags(void)
+{
+ return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
+}
+
+#ifdef CONFIG_SCHED_MC
+static int x86_core_flags(void)
+{
+ return cpu_core_flags() | x86_sched_itmt_flags();
+}
+#endif
+#ifdef CONFIG_SCHED_SMT
+static int x86_smt_flags(void)
+{
+ return cpu_smt_flags() | x86_sched_itmt_flags();
+}
+#endif
+#endif
+
static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ NULL, },
};
static struct sched_domain_topology_level x86_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
@@ -821,14 +852,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-void smp_announce(void)
-{
- int num_nodes = num_online_nodes();
-
- printk(KERN_INFO "x86: Booted up %d node%s, %d CPUs\n",
- num_nodes, (num_nodes > 1 ? "s" : ""), num_online_cpus());
-}
-
/* reduce the number of lines printed when booting a large cpu count system */
static void announce_cpu(int cpu, int apicid)
{
@@ -964,9 +987,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
int cpu0_nmi_registered = 0;
unsigned long timeout;
- idle->thread.sp = (unsigned long) (((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(idle))) - 1);
-
+ idle->thread.sp = (unsigned long)task_pt_regs(idle);
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
@@ -1331,7 +1352,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
default_setup_apic_routing();
cpu0_logical_apicid = apic_bsp_setup(false);
- pr_info("CPU%d: ", 0);
+ pr_info("CPU0: ");
print_cpu_info(&cpu_data(0));
if (is_uv_system())
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index a2456d4..ea7b7f9 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -14,13 +14,55 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
if (unwind_done(state))
return 0;
+ if (state->regs && user_mode(state->regs))
+ return 0;
+
addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
addr_p);
- return __kernel_text_address(addr) ? addr : 0;
+ if (!__kernel_text_address(addr)) {
+ printk_deferred_once(KERN_WARNING
+ "WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n",
+ (void *)addr, addr_p, state->task->comm,
+ state->task->pid);
+ return 0;
+ }
+
+ return addr;
}
EXPORT_SYMBOL_GPL(unwind_get_return_address);
+static size_t regs_size(struct pt_regs *regs)
+{
+ /* x86_32 regs from kernel mode are two words shorter: */
+ if (IS_ENABLED(CONFIG_X86_32) && !user_mode(regs))
+ return sizeof(*regs) - 2*sizeof(long);
+
+ return sizeof(*regs);
+}
+
+static bool is_last_task_frame(struct unwind_state *state)
+{
+ unsigned long bp = (unsigned long)state->bp;
+ unsigned long regs = (unsigned long)task_pt_regs(state->task);
+
+ return bp == regs - FRAME_HEADER_SIZE;
+}
+
+/*
+ * This determines if the frame pointer actually contains an encoded pointer to
+ * pt_regs on the stack. See ENCODE_FRAME_POINTER.
+ */
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+ unsigned long regs = (unsigned long)bp;
+
+ if (!(regs & 0x1))
+ return NULL;
+
+ return (struct pt_regs *)(regs & ~0x1);
+}
+
static bool update_stack_state(struct unwind_state *state, void *addr,
size_t len)
{
@@ -43,26 +85,117 @@ static bool update_stack_state(struct unwind_state *state, void *addr,
bool unwind_next_frame(struct unwind_state *state)
{
- unsigned long *next_bp;
+ struct pt_regs *regs;
+ unsigned long *next_bp, *next_frame;
+ size_t next_len;
+ enum stack_type prev_type = state->stack_info.type;
if (unwind_done(state))
return false;
- next_bp = (unsigned long *)*state->bp;
+ /* have we reached the end? */
+ if (state->regs && user_mode(state->regs))
+ goto the_end;
+
+ if (is_last_task_frame(state)) {
+ regs = task_pt_regs(state->task);
+
+ /*
+ * kthreads (other than the boot CPU's idle thread) have some
+ * partial regs at the end of their stack which were placed
+ * there by copy_thread_tls(). But the regs don't have any
+ * useful information, so we can skip them.
+ *
+ * This user_mode() check is slightly broader than a PF_KTHREAD
+ * check because it also catches the awkward situation where a
+ * newly forked kthread transitions into a user task by calling
+ * do_execve(), which eventually clears PF_KTHREAD.
+ */
+ if (!user_mode(regs))
+ goto the_end;
+
+ /*
+ * We're almost at the end, but not quite: there's still the
+ * syscall regs frame. Entry code doesn't encode the regs
+ * pointer for syscalls, so we have to set it manually.
+ */
+ state->regs = regs;
+ state->bp = NULL;
+ return true;
+ }
+
+ /* get the next frame pointer */
+ if (state->regs)
+ next_bp = (unsigned long *)state->regs->bp;
+ else
+ next_bp = (unsigned long *)*state->bp;
+
+ /* is the next frame pointer an encoded pointer to pt_regs? */
+ regs = decode_frame_pointer(next_bp);
+ if (regs) {
+ next_frame = (unsigned long *)regs;
+ next_len = sizeof(*regs);
+ } else {
+ next_frame = next_bp;
+ next_len = FRAME_HEADER_SIZE;
+ }
/* make sure the next frame's data is accessible */
- if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
- return false;
+ if (!update_stack_state(state, next_frame, next_len)) {
+ /*
+ * Don't warn on bad regs->bp. An interrupt in entry code
+ * might cause a false positive warning.
+ */
+ if (state->regs)
+ goto the_end;
+
+ goto bad_address;
+ }
+
+ /* Make sure it only unwinds up and doesn't overlap the last frame: */
+ if (state->stack_info.type == prev_type) {
+ if (state->regs && (void *)next_frame < (void *)state->regs + regs_size(state->regs))
+ goto bad_address;
+
+ if (state->bp && (void *)next_frame < (void *)state->bp + FRAME_HEADER_SIZE)
+ goto bad_address;
+ }
/* move to the next frame */
- state->bp = next_bp;
+ if (regs) {
+ state->regs = regs;
+ state->bp = NULL;
+ } else {
+ state->bp = next_bp;
+ state->regs = NULL;
+ }
+
return true;
+
+bad_address:
+ if (state->regs) {
+ printk_deferred_once(KERN_WARNING
+ "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
+ state->regs, state->task->comm,
+ state->task->pid, next_frame);
+ } else {
+ printk_deferred_once(KERN_WARNING
+ "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n",
+ state->bp, state->task->comm,
+ state->task->pid, next_frame);
+ }
+the_end:
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
+ return false;
}
EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
struct pt_regs *regs, unsigned long *first_frame)
{
+ unsigned long *bp, *frame;
+ size_t len;
+
memset(state, 0, sizeof(*state));
state->task = task;
@@ -73,12 +206,22 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
}
/* set up the starting stack frame */
- state->bp = get_frame_pointer(task, regs);
+ bp = get_frame_pointer(task, regs);
+ regs = decode_frame_pointer(bp);
+ if (regs) {
+ state->regs = regs;
+ frame = (unsigned long *)regs;
+ len = sizeof(*regs);
+ } else {
+ state->bp = bp;
+ frame = bp;
+ len = FRAME_HEADER_SIZE;
+ }
/* initialize stack info and make sure the frame data is accessible */
- get_stack_info(state->bp, state->task, &state->stack_info,
+ get_stack_info(frame, state->task, &state->stack_info,
&state->stack_mask);
- update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
+ update_stack_state(state, frame, len);
/*
* The caller can provide the address of the first frame directly
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
index b80e8bf..22881dd 100644
--- a/arch/x86/kernel/unwind_guess.c
+++ b/arch/x86/kernel/unwind_guess.c
@@ -7,11 +7,13 @@
unsigned long unwind_get_return_address(struct unwind_state *state)
{
- unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+ unsigned long addr;
if (unwind_done(state))
return 0;
+ addr = READ_ONCE_NOCHECK(*state->sp);
+
return ftrace_graph_ret_addr(state->task, &state->graph_idx,
addr, state->sp);
}
@@ -25,11 +27,12 @@ bool unwind_next_frame(struct unwind_state *state)
return false;
do {
- unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+ for (state->sp++; state->sp < info->end; state->sp++) {
+ unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
- for (state->sp++; state->sp < info->end; state->sp++)
if (__kernel_text_address(addr))
return true;
+ }
state->sp = info->next_sp;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index dbf67f6..e79f15f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -91,10 +91,10 @@ SECTIONS
/* Text and read-only data */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
_text = .;
+ _stext = .;
/* bootstrapping code */
HEAD_TEXT
. = ALIGN(8);
- _stext = .;
TEXT_TEXT
SCHED_TEXT
CPUIDLE_TEXT
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index cbd7b92..a3ce9d2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2105,16 +2105,10 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned short sel, old_sel;
- struct desc_struct old_desc, new_desc;
- const struct x86_emulate_ops *ops = ctxt->ops;
+ unsigned short sel;
+ struct desc_struct new_desc;
u8 cpl = ctxt->ops->cpl(ctxt);
- /* Assignment of RIP may only fail in 64-bit mode */
- if (ctxt->mode == X86EMUL_MODE_PROT64)
- ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
- VCPU_SREG_CS);
-
memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
@@ -2124,12 +2118,10 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
return rc;
rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
- if (rc != X86EMUL_CONTINUE) {
- WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
- /* assigning eip failed; restore the old cs */
- ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
- return rc;
- }
+ /* Error handling is not implemented. */
+ if (rc != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+
return rc;
}
@@ -2189,14 +2181,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
unsigned long eip, cs;
- u16 old_cs;
int cpl = ctxt->ops->cpl(ctxt);
- struct desc_struct old_desc, new_desc;
- const struct x86_emulate_ops *ops = ctxt->ops;
-
- if (ctxt->mode == X86EMUL_MODE_PROT64)
- ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
- VCPU_SREG_CS);
+ struct desc_struct new_desc;
rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
@@ -2213,10 +2199,10 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
if (rc != X86EMUL_CONTINUE)
return rc;
rc = assign_eip_far(ctxt, eip, &new_desc);
- if (rc != X86EMUL_CONTINUE) {
- WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
- ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
- }
+ /* Error handling is not implemented. */
+ if (rc != X86EMUL_CONTINUE)
+ return X86EMUL_UNHANDLEABLE;
+
return rc;
}
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 1a22de7..6e219e5 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -94,7 +94,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
{
ioapic->rtc_status.pending_eoi = 0;
- bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPUS);
+ bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
}
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 7d2692a..1cc6e54 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -42,13 +42,13 @@ struct kvm_vcpu;
struct dest_map {
/* vcpu bitmap where IRQ has been sent */
- DECLARE_BITMAP(map, KVM_MAX_VCPUS);
+ DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
/*
* Vector sent to a given vcpu, only valid when
* the vcpu's bit in map is set
*/
- u8 vectors[KVM_MAX_VCPUS];
+ u8 vectors[KVM_MAX_VCPU_ID];
};
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 4da0303..6c01916 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -41,6 +41,15 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
bool line_status)
{
struct kvm_pic *pic = pic_irqchip(kvm);
+
+ /*
+ * XXX: rejecting pic routes when pic isn't in use would be better,
+ * but the default routing table is installed while kvm->arch.vpic is
+ * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE.
+ */
+ if (!pic)
+ return -1;
+
return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
}
@@ -49,6 +58,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
bool line_status)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+ if (!ioapic)
+ return -1;
+
return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
line_status);
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 23b99f3..6f69340 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -138,7 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
*mask = dest_id & 0xff;
return true;
case KVM_APIC_MODE_XAPIC_CLUSTER:
- *cluster = map->xapic_cluster_map[dest_id >> 4];
+ *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
*mask = dest_id & 0xf;
return true;
default:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 56900f3..6f5f465 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2071,6 +2071,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
return;
+ vcpu->arch.st.steal.preempted = 0;
+
if (vcpu->arch.st.steal.version & 1)
vcpu->arch.st.steal.version += 1; /* first time write, random junk */
@@ -2826,8 +2828,22 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
}
+static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
+ return;
+
+ vcpu->arch.st.steal.preempted = 1;
+
+ kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
+ &vcpu->arch.st.steal.preempted,
+ offsetof(struct kvm_steal_time, preempted),
+ sizeof(vcpu->arch.st.steal.preempted));
+}
+
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ kvm_steal_time_set_preempted(vcpu);
kvm_x86_ops->vcpu_put(vcpu);
kvm_put_guest_fpu(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index d376e4b..c595957 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,53 +16,6 @@
#include <asm/smap.h>
#include <asm/export.h>
-/* Standard copy_to_user with segment limit checking */
-ENTRY(_copy_to_user)
- mov PER_CPU_VAR(current_task), %rax
- movq %rdi,%rcx
- addq %rdx,%rcx
- jc bad_to_user
- cmpq TASK_addr_limit(%rax),%rcx
- ja bad_to_user
- ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
- "jmp copy_user_generic_string", \
- X86_FEATURE_REP_GOOD, \
- "jmp copy_user_enhanced_fast_string", \
- X86_FEATURE_ERMS
-ENDPROC(_copy_to_user)
-EXPORT_SYMBOL(_copy_to_user)
-
-/* Standard copy_from_user with segment limit checking */
-ENTRY(_copy_from_user)
- mov PER_CPU_VAR(current_task), %rax
- movq %rsi,%rcx
- addq %rdx,%rcx
- jc bad_from_user
- cmpq TASK_addr_limit(%rax),%rcx
- ja bad_from_user
- ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
- "jmp copy_user_generic_string", \
- X86_FEATURE_REP_GOOD, \
- "jmp copy_user_enhanced_fast_string", \
- X86_FEATURE_ERMS
-ENDPROC(_copy_from_user)
-EXPORT_SYMBOL(_copy_from_user)
-
-
- .section .fixup,"ax"
- /* must zero dest */
-ENTRY(bad_from_user)
-bad_from_user:
- movl %edx,%ecx
- xorl %eax,%eax
- rep
- stosb
-bad_to_user:
- movl %edx,%eax
- ret
-ENDPROC(bad_from_user)
- .previous
-
/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index d1dee75..0776425 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -113,14 +113,14 @@ int msr_clear_bit(u32 msr, u8 bit)
}
#ifdef CONFIG_TRACEPOINTS
-void do_trace_write_msr(unsigned msr, u64 val, int failed)
+void do_trace_write_msr(unsigned int msr, u64 val, int failed)
{
trace_write_msr(msr, val, failed);
}
EXPORT_SYMBOL(do_trace_write_msr);
EXPORT_TRACEPOINT_SYMBOL(write_msr);
-void do_trace_read_msr(unsigned msr, u64 val, int failed)
+void do_trace_read_msr(unsigned int msr, u64 val, int failed)
{
trace_read_msr(msr, val, failed);
}
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index b490878..c074799 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -34,3 +34,52 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
return ret;
}
EXPORT_SYMBOL_GPL(copy_from_user_nmi);
+
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to: Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
+{
+ if (access_ok(VERIFY_WRITE, to, n))
+ n = __copy_to_user(to, from, n);
+ return n;
+}
+EXPORT_SYMBOL(_copy_to_user);
+
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to: Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n: Number of bytes to copy.
+ *
+ * Context: User context only. This function may sleep if pagefaults are
+ * enabled.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
+{
+ if (access_ok(VERIFY_READ, from, n))
+ n = __copy_from_user(to, from, n);
+ else
+ memset(to, 0, n);
+ return n;
+}
+EXPORT_SYMBOL(_copy_from_user);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 3bc7baf..0b28121 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -640,52 +640,3 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
return n;
}
EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero);
-
-/**
- * copy_to_user: - Copy a block of data into user space.
- * @to: Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * Copy data from kernel space to user space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
-unsigned long _copy_to_user(void __user *to, const void *from, unsigned n)
-{
- if (access_ok(VERIFY_WRITE, to, n))
- n = __copy_to_user(to, from, n);
- return n;
-}
-EXPORT_SYMBOL(_copy_to_user);
-
-/**
- * copy_from_user: - Copy a block of data from user space.
- * @to: Destination address, in kernel space.
- * @from: Source address, in user space.
- * @n: Number of bytes to copy.
- *
- * Context: User context only. This function may sleep if pagefaults are
- * enabled.
- *
- * Copy data from user space to kernel space.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- */
-unsigned long _copy_from_user(void *to, const void __user *from, unsigned n)
-{
- if (access_ok(VERIFY_READ, from, n))
- n = __copy_from_user(to, from, n);
- else
- memset(to, 0, n);
- return n;
-}
-EXPORT_SYMBOL(_copy_from_user);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9f72ca3..17c55a5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -679,8 +679,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
printk(KERN_CONT "paging request");
printk(KERN_CONT " at %p\n", (void *) address);
- printk(KERN_ALERT "IP:");
- printk_address(regs->ip);
+ printk(KERN_ALERT "IP: %pS\n", (void *)regs->ip);
dump_pagetable(address);
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 83e701f..efc32bc 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -986,20 +986,17 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return 0;
}
-int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- pfn_t pfn)
+void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
{
enum page_cache_mode pcm;
if (!pat_enabled())
- return 0;
+ return;
/* Set prot based on lookup */
pcm = lookup_memtype(pfn_t_to_phys(pfn));
*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
-
- return 0;
}
/*
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index fe04a04..e76d1af 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -853,7 +853,7 @@ xadd: if (is_imm8(insn->off))
func = (u8 *) __bpf_call_base + imm32;
jmp_offset = func - (image + addrs[i]);
if (seen_ld_abs) {
- reload_skb_data = bpf_helper_changes_skb_data(func);
+ reload_skb_data = bpf_helper_changes_pkt_data(func);
if (reload_skb_data) {
EMIT1(0x57); /* push %rdi */
jmp_offset += 22; /* pop, mov, sub, mov */
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 5513084..c0533fb 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -196,6 +196,7 @@ static int xo15_sci_remove(struct acpi_device *device)
return 0;
}
+#ifdef CONFIG_PM_SLEEP
static int xo15_sci_resume(struct device *dev)
{
/* Enable all EC events */
@@ -207,6 +208,7 @@ static int xo15_sci_resume(struct device *dev)
return 0;
}
+#endif
static SIMPLE_DEV_PM_OPS(xo15_sci_pm, NULL, xo15_sci_resume);
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index cd5173a..8410e7d 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -387,8 +387,8 @@ static void uv_nmi_dump_cpu_ip_hdr(void)
/* Dump Instruction Pointer info */
static void uv_nmi_dump_cpu_ip(int cpu, struct pt_regs *regs)
{
- pr_info("UV: %4d %6d %-32.32s ", cpu, current->pid, current->comm);
- printk_address(regs->ip);
+ pr_info("UV: %4d %6d %-32.32s %pS",
+ cpu, current->pid, current->comm, (void *)regs->ip);
}
/*
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 1ac7647..8730c28 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -275,6 +275,8 @@ static void do_inject(void)
unsigned int cpu = i_mce.extcpu;
u8 b = i_mce.bank;
+ rdtscll(i_mce.tsc);
+
if (i_mce.misc)
i_mce.status |= MCI_STATUS_MISCV;
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 25012ab..4463fa7 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -69,7 +69,7 @@ $(obj)/realmode.relocs: $(obj)/realmode.elf FORCE
# ---------------------------------------------------------------------------
-KBUILD_CFLAGS := $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
+KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
-I$(srctree)/arch/x86/boot
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index ba70ff2..1972565 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -269,7 +269,8 @@ int main(int argc, char **argv)
insns++;
}
- fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
+ fprintf((errors) ? stderr : stdout,
+ "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
prog,
(errors) ? "Failure" : "Success",
insns,
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index f595906..1d23bf9 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -16,7 +16,7 @@
#include <regex.h>
#include <tools/le_byteshift.h>
-void die(char *fmt, ...);
+void die(char *fmt, ...) __attribute__((noreturn));
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index 56f04db..ecf31e0 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -167,7 +167,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Warning: decoded and checked %d"
" instructions with %d warnings\n", insns, warnings);
else
- fprintf(stderr, "Succeed: decoded and checked %d"
+ fprintf(stdout, "Success: decoded and checked %d"
" instructions\n", insns);
return 0;
}
diff --git a/arch/x86/um/asm/processor.h b/arch/x86/um/asm/processor.h
index 233ee09..c77db22 100644
--- a/arch/x86/um/asm/processor.h
+++ b/arch/x86/um/asm/processor.h
@@ -26,7 +26,6 @@ static inline void rep_nop(void)
}
#define cpu_relax() rep_nop()
-#define cpu_relax_lowlatency() cpu_relax()
#define task_pt_regs(t) (&(t)->thread.regs)
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 3d6e006..e8a9ea7 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -114,6 +114,7 @@ void xen_uninit_lock_cpu(int cpu)
per_cpu(irq_name, cpu) = NULL;
}
+PV_CALLEE_SAVE_REGS_THUNK(xen_vcpu_stolen);
/*
* Our init of PV spinlocks is split in two init functions due to us
@@ -137,6 +138,7 @@ void __init xen_init_spinlocks(void)
pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
pv_lock_ops.wait = xen_qlock_wait;
pv_lock_ops.kick = xen_qlock_kick;
+ pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen);
}
/*
OpenPOWER on IntegriCloud