summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile12
-rw-r--r--arch/x86/kernel/acpi/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c5
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/acpi/sleep.h1
-rw-r--r--arch/x86/kernel/alternative.c26
-rw-r--r--arch/x86/kernel/amd_nb.c41
-rw-r--r--arch/x86/kernel/aperture_64.c1
-rw-r--r--arch/x86/kernel/apic/Makefile1
-rw-r--r--arch/x86/kernel/apic/apic.c19
-rw-r--r--arch/x86/kernel/apic/apic_noop.c1
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c1
-rw-r--r--arch/x86/kernel/apic/htirq.c5
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c10
-rw-r--r--arch/x86/kernel/apic/ipi.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c1
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c5
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/audit_64.c1
-rw-r--r--arch/x86/kernel/bootflag.c1
-rw-r--r--arch/x86/kernel/check.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/bugs.c1
-rw-r--r--arch/x86/kernel/cpu/centaur.c1
-rw-r--r--arch/x86/kernel/cpu/common.c58
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c121
-rw-r--r--arch/x86/kernel/cpu/cyrix.c1
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c64
-rw-r--r--arch/x86/kernel/cpu/intel.c1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h1
-rw-r--r--arch/x86/kernel/cpu/match.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile1
-rw-r--r--arch/x86/kernel/cpu/mcheck/dev-mcelog.c121
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c9
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c13
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c1
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c27
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c19
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh1
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c6
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h1
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c1
-rw-r--r--arch/x86/kernel/cpu/powerflags.c1
-rw-r--r--arch/x86/kernel/cpu/proc.c11
-rw-r--r--arch/x86/kernel/cpu/topology.c1
-rw-r--r--arch/x86/kernel/cpu/transmeta.c1
-rw-r--r--arch/x86/kernel/cpu/umc.c1
-rw-r--r--arch/x86/kernel/cpu/vmware.c8
-rw-r--r--arch/x86/kernel/crash.c18
-rw-r--r--arch/x86/kernel/crash_dump_32.c1
-rw-r--r--arch/x86/kernel/crash_dump_64.c1
-rw-r--r--arch/x86/kernel/devicetree.c1
-rw-r--r--arch/x86/kernel/doublefault.c1
-rw-r--r--arch/x86/kernel/dumpstack_32.c1
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/early-quirks.c1
-rw-r--r--arch/x86/kernel/early_printk.c1
-rw-r--r--arch/x86/kernel/ebda.c1
-rw-r--r--arch/x86/kernel/espfix_64.c6
-rw-r--r--arch/x86/kernel/fpu/bugs.c1
-rw-r--r--arch/x86/kernel/fpu/init.c11
-rw-r--r--arch/x86/kernel/fpu/regset.c1
-rw-r--r--arch/x86/kernel/fpu/signal.c1
-rw-r--r--arch/x86/kernel/fpu/xstate.c43
-rw-r--r--arch/x86/kernel/ftrace.c1
-rw-r--r--arch/x86/kernel/ftrace_32.S1
-rw-r--r--arch/x86/kernel/ftrace_64.S1
-rw-r--r--arch/x86/kernel/head32.c6
-rw-r--r--arch/x86/kernel/head64.c1
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S46
-rw-r--r--arch/x86/kernel/i8253.c1
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/idt.c2
-rw-r--r--arch/x86/kernel/io_delay.c1
-rw-r--r--arch/x86/kernel/ioport.c1
-rw-r--r--arch/x86/kernel/irq_32.c1
-rw-r--r--arch/x86/kernel/irq_64.c1
-rw-r--r--arch/x86/kernel/irq_work.c1
-rw-r--r--arch/x86/kernel/irqinit.c1
-rw-r--r--arch/x86/kernel/jump_label.c1
-rw-r--r--arch/x86/kernel/kprobes/common.h20
-rw-r--r--arch/x86/kernel/kprobes/core.c63
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c32
-rw-r--r--arch/x86/kernel/kprobes/opt.c79
-rw-r--r--arch/x86/kernel/kvm.c63
-rw-r--r--arch/x86/kernel/kvmclock.c67
-rw-r--r--arch/x86/kernel/ldt.c19
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c1
-rw-r--r--arch/x86/kernel/module.c13
-rw-r--r--arch/x86/kernel/mpparse.c1
-rw-r--r--arch/x86/kernel/nmi.c2
-rw-r--r--arch/x86/kernel/nmi_selftest.c1
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c1
-rw-r--r--arch/x86/kernel/paravirt.c14
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c1
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c1
-rw-r--r--arch/x86/kernel/pci-dma.c1
-rw-r--r--arch/x86/kernel/pci-iommu_table.c1
-rw-r--r--arch/x86/kernel/pci-nommu.c1
-rw-r--r--arch/x86/kernel/pci-swiotlb.c1
-rw-r--r--arch/x86/kernel/pcspeaker.c1
-rw-r--r--arch/x86/kernel/perf_regs.c1
-rw-r--r--arch/x86/kernel/platform-quirks.c1
-rw-r--r--arch/x86/kernel/pmem.c3
-rw-r--r--arch/x86/kernel/probe_roms.c1
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/quirks.c1
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c1
-rw-r--r--arch/x86/kernel/resource.c1
-rw-r--r--arch/x86/kernel/rtc.c1
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/setup_percpu.c1
-rw-r--r--arch/x86/kernel/signal.c1
-rw-r--r--arch/x86/kernel/signal_compat.c1
-rw-r--r--arch/x86/kernel/smpboot.c21
-rw-r--r--arch/x86/kernel/stacktrace.c10
-rw-r--r--arch/x86/kernel/step.c1
-rw-r--r--arch/x86/kernel/sys_x86_64.c1
-rw-r--r--arch/x86/kernel/time.c1
-rw-r--r--arch/x86/kernel/tls.c1
-rw-r--r--arch/x86/kernel/trace_clock.c1
-rw-r--r--arch/x86/kernel/tracepoint.c1
-rw-r--r--arch/x86/kernel/traps.c19
-rw-r--r--arch/x86/kernel/tsc.c8
-rw-r--r--arch/x86/kernel/tsc_sync.c1
-rw-r--r--arch/x86/kernel/umip.c366
-rw-r--r--arch/x86/kernel/unwind_frame.c38
-rw-r--r--arch/x86/kernel/unwind_orc.c31
-rw-r--r--arch/x86/kernel/uprobes.c15
-rw-r--r--arch/x86/kernel/verify_cpu.S3
-rw-r--r--arch/x86/kernel/vm86_32.c21
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/kernel/x86_init.c10
152 files changed, 1338 insertions, 440 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fd0a789..81bb565 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux kernel.
#
@@ -24,9 +25,9 @@ endif
KASAN_SANITIZE_head$(BITS).o := n
KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
-KASAN_SANITIZE_stacktrace.o := n
+KASAN_SANITIZE_stacktrace.o := n
+KASAN_SANITIZE_paravirt.o := n
-OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
@@ -126,10 +127,11 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
+obj-$(CONFIG_X86_INTEL_UMIP) += umip.o
-obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
-obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
-obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
+obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
+obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
+obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 85a9e17..f1bb57b 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
obj-$(CONFIG_ACPI) += boot.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 079535e53..ef9e02e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -961,6 +961,11 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
x86_platform.legacy.rtc = 0;
}
+ if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_VGA) {
+ pr_debug("ACPI: probing for VGA not safe\n");
+ x86_platform.legacy.no_vga = 1;
+ }
+
#ifdef CONFIG_X86_PM_TIMER
/* detect the location of the ACPI PM Timer */
if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) {
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ed01481..7188aea 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sleep.c - x86-specific ACPI sleep support.
*
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 65c7b60..fbb60ca 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Variables and functions used by the code in sleep.c
*/
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3344d33..dbaf14d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -442,7 +442,6 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
{
const s32 *poff;
- mutex_lock(&text_mutex);
for (poff = start; poff < end; poff++) {
u8 *ptr = (u8 *)poff + *poff;
@@ -452,7 +451,6 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
if (*ptr == 0x3e)
text_poke(ptr, ((unsigned char []){0xf0}), 1);
}
- mutex_unlock(&text_mutex);
}
static void alternatives_smp_unlock(const s32 *start, const s32 *end,
@@ -460,7 +458,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
{
const s32 *poff;
- mutex_lock(&text_mutex);
for (poff = start; poff < end; poff++) {
u8 *ptr = (u8 *)poff + *poff;
@@ -470,7 +467,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
if (*ptr == 0xf0)
text_poke(ptr, ((unsigned char []){0x3E}), 1);
}
- mutex_unlock(&text_mutex);
}
struct smp_alt_module {
@@ -489,8 +485,7 @@ struct smp_alt_module {
struct list_head next;
};
static LIST_HEAD(smp_alt_modules);
-static DEFINE_MUTEX(smp_alt);
-static bool uniproc_patched = false; /* protected by smp_alt */
+static bool uniproc_patched = false; /* protected by text_mutex */
void __init_or_module alternatives_smp_module_add(struct module *mod,
char *name,
@@ -499,7 +494,7 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
{
struct smp_alt_module *smp;
- mutex_lock(&smp_alt);
+ mutex_lock(&text_mutex);
if (!uniproc_patched)
goto unlock;
@@ -526,14 +521,14 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
smp_unlock:
alternatives_smp_unlock(locks, locks_end, text, text_end);
unlock:
- mutex_unlock(&smp_alt);
+ mutex_unlock(&text_mutex);
}
void __init_or_module alternatives_smp_module_del(struct module *mod)
{
struct smp_alt_module *item;
- mutex_lock(&smp_alt);
+ mutex_lock(&text_mutex);
list_for_each_entry(item, &smp_alt_modules, next) {
if (mod != item->mod)
continue;
@@ -541,7 +536,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
kfree(item);
break;
}
- mutex_unlock(&smp_alt);
+ mutex_unlock(&text_mutex);
}
void alternatives_enable_smp(void)
@@ -551,7 +546,7 @@ void alternatives_enable_smp(void)
/* Why bother if there are no other CPUs? */
BUG_ON(num_possible_cpus() == 1);
- mutex_lock(&smp_alt);
+ mutex_lock(&text_mutex);
if (uniproc_patched) {
pr_info("switching to SMP code\n");
@@ -563,10 +558,13 @@ void alternatives_enable_smp(void)
mod->text, mod->text_end);
uniproc_patched = false;
}
- mutex_unlock(&smp_alt);
+ mutex_unlock(&text_mutex);
}
-/* Return 1 if the address range is reserved for smp-alternatives */
+/*
+ * Return 1 if the address range is reserved for SMP-alternatives.
+ * Must hold text_mutex.
+ */
int alternatives_text_reserved(void *start, void *end)
{
struct smp_alt_module *mod;
@@ -574,6 +572,8 @@ int alternatives_text_reserved(void *start, void *end)
u8 *text_start = start;
u8 *text_end = end;
+ lockdep_assert_held(&text_mutex);
+
list_for_each_entry(mod, &smp_alt_modules, next) {
if (mod->text > text_end || mod->text_end < text_start)
continue;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 458da85..6db28f1 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = {
{}
};
+#define PCI_DEVICE_ID_AMD_CNB17H_F4 0x1704
+
const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{}
};
EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{}
};
@@ -402,11 +406,48 @@ void amd_flush_garts(void)
}
EXPORT_SYMBOL_GPL(amd_flush_garts);
+static void __fix_erratum_688(void *info)
+{
+#define MSR_AMD64_IC_CFG 0xC0011021
+
+ msr_set_bit(MSR_AMD64_IC_CFG, 3);
+ msr_set_bit(MSR_AMD64_IC_CFG, 14);
+}
+
+/* Apply erratum 688 fix so machines without a BIOS fix work. */
+static __init void fix_erratum_688(void)
+{
+ struct pci_dev *F4;
+ u32 val;
+
+ if (boot_cpu_data.x86 != 0x14)
+ return;
+
+ if (!amd_northbridges.num)
+ return;
+
+ F4 = node_to_amd_nb(0)->link;
+ if (!F4)
+ return;
+
+ if (pci_read_config_dword(F4, 0x164, &val))
+ return;
+
+ if (val & BIT(2))
+ return;
+
+ on_each_cpu(__fix_erratum_688, NULL, 0);
+
+ pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");
+}
+
static __init int init_amd_nbs(void)
{
amd_cache_northbridges();
amd_cache_gart();
+ fix_erratum_688();
+
return 0;
}
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index ef2859f..f5d92bc 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Firmware replacement code.
*
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8e63ebd..2fb7309 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for local APIC drivers and for the IO-APIC code
#
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c76..f72ecd5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -573,11 +573,21 @@ static u32 bdx_deadline_rev(void)
return ~0U;
}
+static u32 skx_deadline_rev(void)
+{
+ switch (boot_cpu_data.x86_mask) {
+ case 0x03: return 0x01000136;
+ case 0x04: return 0x02000014;
+ }
+
+ return ~0U;
+}
+
static const struct x86_cpu_id deadline_match[] = {
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev),
- DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X, 0x02000014),
+ DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22),
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20),
@@ -600,7 +610,8 @@ static void apic_check_deadline_errata(void)
const struct x86_cpu_id *m;
u32 rev;
- if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
+ boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
m = x86_match_cpu(deadline_match);
@@ -1462,7 +1473,7 @@ void setup_local_APIC(void)
/*
* Set up LVT0, LVT1:
*
- * set up through-local-APIC on the BP's LINT0. This is not
+ * set up through-local-APIC on the boot CPU's LINT0. This is not
* strictly necessary in pure symmetric-IO mode, but sometimes
* we delegate interrupts to the 8259A.
*/
@@ -1634,7 +1645,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
* under KVM
*/
if (max_physical_apicid > 255 ||
- !hypervisor_x2apic_available()) {
+ !x86_init.hyper.x2apic_available()) {
pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
x2apic_disable();
return;
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 6599f43..c8d2112 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* NOOP APIC driver.
*
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 456e45e..e12fbcf 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
*
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
index 56ccf93..b07075d 100644
--- a/arch/x86/kernel/apic/htirq.c
+++ b/arch/x86/kernel/apic/htirq.c
@@ -112,8 +112,8 @@ static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
-static void htirq_domain_activate(struct irq_domain *domain,
- struct irq_data *irq_data)
+static int htirq_domain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early)
{
struct ht_irq_msg msg;
struct irq_cfg *cfg = irqd_cfg(irq_data);
@@ -132,6 +132,7 @@ static void htirq_domain_activate(struct irq_domain *domain,
HT_IRQ_LOW_MT_ARBITRATED) |
HT_IRQ_LOW_IRQ_MASKED;
write_ht_irq_msg(irq_data->irq, &msg);
+ return 0;
}
static void htirq_domain_deactivate(struct irq_domain *domain,
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index d6f3877..d1fc62a 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* HW NMI watchdog support
*
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 70e48aa..18c8aca 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel IO-APIC support for multi-Pentium hosts.
*
@@ -2096,7 +2097,7 @@ static inline void __init check_timer(void)
unmask_ioapic_irq(irq_get_irq_data(0));
}
irq_domain_deactivate_irq(irq_data);
- irq_domain_activate_irq(irq_data);
+ irq_domain_activate_irq(irq_data, false);
if (timer_irq_works()) {
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
@@ -2118,7 +2119,7 @@ static inline void __init check_timer(void)
*/
replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
irq_domain_deactivate_irq(irq_data);
- irq_domain_activate_irq(irq_data);
+ irq_domain_activate_irq(irq_data, false);
legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2977,8 +2978,8 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
-void mp_irqdomain_activate(struct irq_domain *domain,
- struct irq_data *irq_data)
+int mp_irqdomain_activate(struct irq_domain *domain,
+ struct irq_data *irq_data, bool early)
{
unsigned long flags;
struct irq_pin_list *entry;
@@ -2988,6 +2989,7 @@ void mp_irqdomain_activate(struct irq_domain *domain,
for_each_irq_pin(entry, data->irq_2_pin)
__ioapic_write_entry(entry->apic, entry->pin, data->entry);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+ return 0;
}
void mp_irqdomain_deactivate(struct irq_domain *domain,
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 3a205d4..82f9244 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/cpumask.h>
#include <linux/interrupt.h>
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 481237c..e216cf3 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 3baf0c3..b94d3532 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0d57bb9..c0b6948 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -920,9 +920,8 @@ static __init void uv_rtc_init(void)
/*
* percpu heartbeat timer
*/
-static void uv_heartbeat(unsigned long ignored)
+static void uv_heartbeat(struct timer_list *timer)
{
- struct timer_list *timer = &uv_scir_info->timer;
unsigned char bits = uv_scir_info->state;
/* Flip heartbeat bit: */
@@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
- setup_pinned_timer(timer, uv_heartbeat, cpu);
+ timer_setup(timer, uv_heartbeat, TIMER_PINNED);
timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
add_timer_on(timer, cpu);
uv_cpu_scir_info(cpu)->enabled = 1;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index de827d6..8ea7827 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Generate definitions needed by assembly language modules.
* This code generates raw asm output which is post-processed to extract
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 710edab..dedf428 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#ifndef __LINUX_KBUILD_H
# error "Please do not build this file directly, build asm-offsets.c instead"
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index cf42206..630212f 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#ifndef __LINUX_KBUILD_H
# error "Please do not build this file directly, build asm-offsets.c instead"
#endif
diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c
index f367250..e1efe44 100644
--- a/arch/x86/kernel/audit_64.c
+++ b/arch/x86/kernel/audit_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/types.h>
#include <linux/audit.h>
diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 52c8e3c..3fed7ae 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Implement 'Simple Boot Flag Specification 2.0'
*/
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 145863d..3339942 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/kthread.h>
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index e17942c..90cb82d 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for x86-compatible CPU details, features and quirks
#
@@ -22,6 +23,7 @@ obj-y += rdrand.o
obj-y += match.o
obj-y += bugs.o
obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
+obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 0af86d9..ba0b242 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1994 Linus Torvalds
*
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 44207b7..68bc6d9 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/sched.h>
#include <linux/sched/clock.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176ba..13ae9e5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -329,6 +329,28 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
}
}
+static __always_inline void setup_umip(struct cpuinfo_x86 *c)
+{
+ /* Check the boot processor, plus build option for UMIP. */
+ if (!cpu_feature_enabled(X86_FEATURE_UMIP))
+ goto out;
+
+ /* Check the current processor's cpuid bits. */
+ if (!cpu_has(c, X86_FEATURE_UMIP))
+ goto out;
+
+ cr4_set_bits(X86_CR4_UMIP);
+
+ return;
+
+out:
+ /*
+ * Make sure UMIP is disabled in case it was enabled in a
+ * previous boot (e.g., via kexec).
+ */
+ cr4_clear_bits(X86_CR4_UMIP);
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -863,8 +885,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
* cache alignment.
* The others are not touched to avoid unwanted side effects.
*
- * WARNING: this function is only called on the BP. Don't add code here
- * that is supposed to run on all CPUs.
+ * WARNING: this function is only called on the boot CPU. Don't add code
+ * here that is supposed to run on all CPUs.
*/
static void __init early_identify_cpu(struct cpuinfo_x86 *c)
{
@@ -1147,9 +1169,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);
- /* Set up SMEP/SMAP */
+ /* Set up SMEP/SMAP/UMIP */
setup_smep(c);
setup_smap(c);
+ setup_umip(c);
/*
* The vendor-specific functions might have changed features.
@@ -1301,18 +1324,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
pr_cont(")\n");
}
-static __init int setup_disablecpuid(char *arg)
+/*
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
+ * But we need to keep a dummy __setup around otherwise it would
+ * show up as an environment variable for init.
+ */
+static __init int setup_clearcpuid(char *arg)
{
- int bit;
-
- if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
- setup_clear_cpu_cap(bit);
- else
- return 0;
-
return 1;
}
-__setup("clearcpuid=", setup_disablecpuid);
+__setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1572,9 +1593,13 @@ void cpu_init(void)
initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, me);
- load_sp0(t, &current->thread);
+ /*
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
+ * task never enters user mode.
+ */
set_tss_desc(cpu, t);
load_TR_desc();
+
load_mm_ldt(&init_mm);
clear_all_debug_regs();
@@ -1596,7 +1621,6 @@ void cpu_init(void)
int cpu = smp_processor_id();
struct task_struct *curr = current;
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
- struct thread_struct *thread = &curr->thread;
wait_for_master_cpu(cpu);
@@ -1627,9 +1651,13 @@ void cpu_init(void)
initialize_tlbstate_and_flush();
enter_lazy_tlb(&init_mm, curr);
- load_sp0(t, thread);
+ /*
+ * Initialize the TSS. Don't bother initializing sp0, as the initial
+ * task never enters user mode.
+ */
set_tss_desc(cpu, t);
load_TR_desc();
+
load_mm_ldt(&init_mm);
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2584265..f52a370 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ARCH_X86_CPU_H
#define ARCH_X86_CPU_H
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 0000000..904b0a3c4
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,121 @@
+/* Declare dependencies between CPUIDs */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cpufeature.h>
+
+struct cpuid_dep {
+ unsigned int feature;
+ unsigned int depends;
+};
+
+/*
+ * Table of CPUID features that depend on others.
+ *
+ * This only includes dependencies that can be usefully disabled, not
+ * features part of the base set (like FPU).
+ *
+ * Note this all is not __init / __initdata because it can be
+ * called from cpu hotplug. It shouldn't do anything in this case,
+ * but it's difficult to tell that to the init reference checker.
+ */
+const static struct cpuid_dep cpuid_deps[] = {
+ { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
+ { X86_FEATURE_AVX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_PKU, X86_FEATURE_XSAVE },
+ { X86_FEATURE_MPX, X86_FEATURE_XSAVE },
+ { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
+ { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM, X86_FEATURE_FXSR },
+ { X86_FEATURE_XMM2, X86_FEATURE_XMM },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
+ { X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
+ { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
+ { X86_FEATURE_F16C, X86_FEATURE_XMM2, },
+ { X86_FEATURE_AES, X86_FEATURE_XMM2 },
+ { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
+ { X86_FEATURE_FMA, X86_FEATURE_AVX },
+ { X86_FEATURE_AVX2, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
+ { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
+ { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
+ { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
+ {}
+};
+
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ /*
+ * Note: This could use the non atomic __*_bit() variants, but the
+ * rest of the cpufeature code uses atomics as well, so keep it for
+ * consistency. Cleanup all of it separately.
+ */
+ if (!c) {
+ clear_cpu_cap(&boot_cpu_data, feature);
+ set_bit(feature, (unsigned long *)cpu_caps_cleared);
+ } else {
+ clear_bit(feature, (unsigned long *)c->x86_capability);
+ }
+}
+
+/* Take the capabilities and the BUG bits into account */
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
+
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
+ const struct cpuid_dep *d;
+ bool changed;
+
+ if (WARN_ON(feature >= MAX_FEATURE_BITS))
+ return;
+
+ clear_feature(c, feature);
+
+ /* Collect all features to disable, handling dependencies */
+ memset(disable, 0, sizeof(disable));
+ __set_bit(feature, disable);
+
+ /* Loop until we get a stable state. */
+ do {
+ changed = false;
+ for (d = cpuid_deps; d->feature; d++) {
+ if (!test_bit(d->depends, disable))
+ continue;
+ if (__test_and_set_bit(d->feature, disable))
+ continue;
+
+ changed = true;
+ clear_feature(c, d->feature);
+ }
+ } while (changed);
+}
+
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+ do_clear_cpu_cap(c, feature);
+}
+
+void setup_clear_cpu_cap(unsigned int feature)
+{
+ do_clear_cpu_cap(NULL, feature);
+}
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6f07744..6b4bb33 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/pci.h>
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 4fa9000..bea8d3e 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -26,6 +26,12 @@
#include <asm/processor.h>
#include <asm/hypervisor.h>
+extern const struct hypervisor_x86 x86_hyper_vmware;
+extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_kvm;
+
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
#ifdef CONFIG_XEN_PV
@@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
#endif
};
-const struct hypervisor_x86 *x86_hyper;
-EXPORT_SYMBOL(x86_hyper);
+enum x86_hypervisor_type x86_hyper_type;
+EXPORT_SYMBOL(x86_hyper_type);
-static inline void __init
+static inline const struct hypervisor_x86 * __init
detect_hypervisor_vendor(void)
{
- const struct hypervisor_x86 *h, * const *p;
+ const struct hypervisor_x86 *h = NULL, * const *p;
uint32_t pri, max_pri = 0;
for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
- h = *p;
- pri = h->detect();
- if (pri != 0 && pri > max_pri) {
+ pri = (*p)->detect();
+ if (pri > max_pri) {
max_pri = pri;
- x86_hyper = h;
+ h = *p;
}
}
- if (max_pri)
- pr_info("Hypervisor detected: %s\n", x86_hyper->name);
+ if (h)
+ pr_info("Hypervisor detected: %s\n", h->name);
+
+ return h;
}
-void __init init_hypervisor_platform(void)
+static void __init copy_array(const void *src, void *target, unsigned int size)
{
+ unsigned int i, n = size / sizeof(void *);
+ const void * const *from = (const void * const *)src;
+ const void **to = (const void **)target;
- detect_hypervisor_vendor();
-
- if (!x86_hyper)
- return;
-
- if (x86_hyper->init_platform)
- x86_hyper->init_platform();
+ for (i = 0; i < n; i++)
+ if (from[i])
+ to[i] = from[i];
}
-bool __init hypervisor_x2apic_available(void)
+void __init init_hypervisor_platform(void)
{
- return x86_hyper &&
- x86_hyper->x2apic_available &&
- x86_hyper->x2apic_available();
-}
+ const struct hypervisor_x86 *h;
-void hypervisor_pin_vcpu(int cpu)
-{
- if (!x86_hyper)
+ h = detect_hypervisor_vendor();
+
+ if (!h)
return;
- if (x86_hyper->pin_vcpu)
- x86_hyper->pin_vcpu(cpu);
- else
- WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
+ copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
+ copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
+
+ x86_hyper_type = h->type;
+ x86_init.hyper.init_platform();
}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index dfa90a3..b720dac 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/string.h>
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 24f74932..54d04d5 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Routines to identify caches on Intel CPU.
*
@@ -831,7 +832,6 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
unsigned int apicid, nshared, first, last;
- this_leaf = this_cpu_ci->info_list + index;
nshared = base->eax.split.num_threads_sharing + 1;
apicid = cpu_data(cpu).apicid;
first = apicid - (apicid % nshared);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index ebaddae..a43a72d 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_INTEL_RDT_H
#define _ASM_X86_INTEL_RDT_H
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index e42117d..3fed388 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
#include <linux/cpu.h>
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 43051f0..bcc7c54 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
obj-y = mce.o mce-severity.o mce-genpool.o
obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 10cec43..7f85b76 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -24,14 +24,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
static char mce_helper[128];
static char *mce_helper_argv[2] = { mce_helper, NULL };
-#define mce_log_get_idx_check(p) \
-({ \
- RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
- !lockdep_is_held(&mce_chrdev_read_mutex), \
- "suspicious mce_log_get_idx_check() usage"); \
- smp_load_acquire(&(p)); \
-})
-
/*
* Lockless MCE logging infrastructure.
* This avoids deadlocks on printk locks without having to break locks. Also
@@ -53,43 +45,32 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
- unsigned int next, entry;
-
- wmb();
- for (;;) {
- entry = mce_log_get_idx_check(mcelog.next);
- for (;;) {
-
- /*
- * When the buffer fills up discard new entries.
- * Assume that the earlier errors are the more
- * interesting ones:
- */
- if (entry >= MCE_LOG_LEN) {
- set_bit(MCE_OVERFLOW,
- (unsigned long *)&mcelog.flags);
- return NOTIFY_OK;
- }
- /* Old left over entry. Skip: */
- if (mcelog.entry[entry].finished) {
- entry++;
- continue;
- }
- break;
- }
- smp_rmb();
- next = entry + 1;
- if (cmpxchg(&mcelog.next, entry, next) == entry)
- break;
+ unsigned int entry;
+
+ mutex_lock(&mce_chrdev_read_mutex);
+
+ entry = mcelog.next;
+
+ /*
+ * When the buffer fills up discard new entries. Assume that the
+ * earlier errors are the more interesting ones:
+ */
+ if (entry >= MCE_LOG_LEN) {
+ set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+ goto unlock;
}
+
+ mcelog.next = entry + 1;
+
memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
- wmb();
mcelog.entry[entry].finished = 1;
- wmb();
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
+unlock:
+ mutex_unlock(&mce_chrdev_read_mutex);
+
return NOTIFY_OK;
}
@@ -177,13 +158,6 @@ static int mce_chrdev_release(struct inode *inode, struct file *file)
return 0;
}
-static void collect_tscs(void *data)
-{
- unsigned long *cpu_tsc = (unsigned long *)data;
-
- cpu_tsc[smp_processor_id()] = rdtsc();
-}
-
static int mce_apei_read_done;
/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
@@ -231,14 +205,9 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
size_t usize, loff_t *off)
{
char __user *buf = ubuf;
- unsigned long *cpu_tsc;
- unsigned prev, next;
+ unsigned next;
int i, err;
- cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
- if (!cpu_tsc)
- return -ENOMEM;
-
mutex_lock(&mce_chrdev_read_mutex);
if (!mce_apei_read_done) {
@@ -247,65 +216,29 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
goto out;
}
- next = mce_log_get_idx_check(mcelog.next);
-
/* Only supports full reads right now */
err = -EINVAL;
if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
goto out;
+ next = mcelog.next;
err = 0;
- prev = 0;
- do {
- for (i = prev; i < next; i++) {
- unsigned long start = jiffies;
- struct mce *m = &mcelog.entry[i];
-
- while (!m->finished) {
- if (time_after_eq(jiffies, start + 2)) {
- memset(m, 0, sizeof(*m));
- goto timeout;
- }
- cpu_relax();
- }
- smp_rmb();
- err |= copy_to_user(buf, m, sizeof(*m));
- buf += sizeof(*m);
-timeout:
- ;
- }
-
- memset(mcelog.entry + prev, 0,
- (next - prev) * sizeof(struct mce));
- prev = next;
- next = cmpxchg(&mcelog.next, prev, 0);
- } while (next != prev);
-
- synchronize_sched();
- /*
- * Collect entries that were still getting written before the
- * synchronize.
- */
- on_each_cpu(collect_tscs, cpu_tsc, 1);
-
- for (i = next; i < MCE_LOG_LEN; i++) {
+ for (i = 0; i < next; i++) {
struct mce *m = &mcelog.entry[i];
- if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
- err |= copy_to_user(buf, m, sizeof(*m));
- smp_rmb();
- buf += sizeof(*m);
- memset(m, 0, sizeof(*m));
- }
+ err |= copy_to_user(buf, m, sizeof(*m));
+ buf += sizeof(*m);
}
+ memset(mcelog.entry, 0, next * sizeof(struct mce));
+ mcelog.next = 0;
+
if (err)
err = -EFAULT;
out:
mutex_unlock(&mce_chrdev_read_mutex);
- kfree(cpu_tsc);
return err ? err : buf - ubuf;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 098530a..aa0d5df 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,3 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __X86_MCE_INTERNAL_H__
+#define __X86_MCE_INTERNAL_H__
+
#include <linux/device.h>
#include <asm/mce.h>
@@ -108,3 +112,7 @@ static inline void mce_work_trigger(void) { }
static inline void mce_register_injector_chain(struct notifier_block *nb) { }
static inline void mce_unregister_injector_chain(struct notifier_block *nb) { }
#endif
+
+extern struct mca_config mca_cfg;
+
+#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 87cc9ab..4ca632a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,7 +204,7 @@ static int error_context(struct mce *m)
return IN_KERNEL;
}
-static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
{
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
u32 low, high;
@@ -245,6 +245,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
if (m->status & MCI_STATUS_UC) {
+ if (ctx == IN_KERNEL)
+ return MCE_PANIC_SEVERITY;
+
/*
* On older systems where overflow_recov flag is not present, we
* should simply panic if an error overflow occurs. If
@@ -255,10 +258,6 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
if (mce_flags.smca)
return mce_severity_amd_smca(m, ctx);
- /* software can try to contain */
- if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
- return MCE_PANIC_SEVERITY;
-
/* kill current process */
return MCE_AR_SEVERITY;
} else {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065..b1d616d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1367,13 +1367,12 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
local_irq_restore(flags);
}
-static void mce_timer_fn(unsigned long data)
+static void mce_timer_fn(struct timer_list *t)
{
- struct timer_list *t = this_cpu_ptr(&mce_timer);
- int cpu = smp_processor_id();
+ struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
unsigned long iv;
- WARN_ON(cpu != data);
+ WARN_ON(cpu_t != t);
iv = __this_cpu_read(mce_next_interval);
@@ -1763,17 +1762,15 @@ static void mce_start_timer(struct timer_list *t)
static void __mcheck_cpu_setup_timer(void)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- unsigned int cpu = smp_processor_id();
- setup_pinned_timer(t, mce_timer_fn, cpu);
+ timer_setup(t, mce_timer_fn, TIMER_PINNED);
}
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
- unsigned int cpu = smp_processor_id();
- setup_pinned_timer(t, mce_timer_fn, cpu);
+ timer_setup(t, mce_timer_fn, TIMER_PINNED);
mce_start_timer(t);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 40e28ed..486f640 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -28,6 +28,8 @@
#include <asm/msr.h>
#include <asm/trace/irq_vectors.h>
+#include "mce-internal.h"
+
#define NR_BLOCKS 5
#define THRESHOLD_MAX 0xFFF
#define INT_TYPE_APIC 0x00020000
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e84db79..d05be30 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2a0717b..5cddf83 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* P5 specific Machine Check Exception Reporting
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 5e7249e..2b584b3 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Common corrected MCE threshold handler code:
*/
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index c6a722e..3b45b27 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IDT Winchip specific Machine Check Exception Reporting
* (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 86e8f0b..c4fa4a8 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void)
bool *res = &dis_ucode_ldr;
#endif
- if (!have_cpuid_p())
- return *res;
-
/*
* CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
* completely accurate as xen pv guests don't see that CPUID bit set but
@@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
void __init load_ucode_bsp(void)
{
unsigned int cpuid_1_eax;
+ bool intel = true;
- if (check_loader_disabled_bsp())
+ if (!have_cpuid_p())
return;
cpuid_1_eax = native_cpuid_eax(1);
switch (x86_cpuid_vendor()) {
case X86_VENDOR_INTEL:
- if (x86_family(cpuid_1_eax) >= 6)
- load_ucode_intel_bsp();
+ if (x86_family(cpuid_1_eax) < 6)
+ return;
break;
+
case X86_VENDOR_AMD:
- if (x86_family(cpuid_1_eax) >= 0x10)
- load_ucode_amd_bsp(cpuid_1_eax);
+ if (x86_family(cpuid_1_eax) < 0x10)
+ return;
+ intel = false;
break;
+
default:
- break;
+ return;
}
+
+ if (check_loader_disabled_bsp())
+ return;
+
+ if (intel)
+ load_ucode_intel_bsp();
+ else
+ load_ucode_amd_bsp(cpuid_1_eax);
}
static bool check_loader_disabled_ap(void)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8f7a9bb..7dbcb7a 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -34,6 +34,7 @@
#include <linux/mm.h>
#include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/setup.h>
@@ -918,6 +919,18 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
return 0;
}
+static bool is_blacklisted(unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
+ pr_err_once("late loading on model 79 is disabled.\n");
+ return true;
+ }
+
+ return false;
+}
+
static enum ucode_state request_microcode_fw(int cpu, struct device *device,
bool refresh_fw)
{
@@ -926,6 +939,9 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
const struct firmware *firmware;
enum ucode_state ret;
+ if (is_blacklisted(cpu))
+ return UCODE_NFOUND;
+
sprintf(name, "intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
@@ -950,6 +966,9 @@ static int get_ucode_user(void *to, const void *from, size_t n)
static enum ucode_state
request_microcode_user(int cpu, const void __user *buf, size_t size)
{
+ if (is_blacklisted(cpu))
+ return UCODE_NFOUND;
+
return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
}
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 6988c74..d0dfb89 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,4 +1,5 @@
#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
#
# Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
#
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 236324e8..85eb5fc 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void)
#endif
}
-const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
+const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
.name = "Microsoft Hyper-V",
.detect = ms_hyperv_platform,
- .init_platform = ms_hyperv_init_platform,
+ .type = X86_HYPER_MS_HYPERV,
+ .init.init_platform = ms_hyperv_init_platform,
};
-EXPORT_SYMBOL(x86_hyper_ms_hyperv);
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 92ba9cd..a65a027 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/mm.h>
#include <asm/mtrr.h>
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index 3d68993..f271778 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/mm.h>
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index b1086f7..4296c702 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/io.h>
#include <linux/mm.h>
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 6d9b455..558444b 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/capability.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index ad8bd76..2ac99e5 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* local MTRR defines.
*/
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 181eabe..d389083 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* local apic based NMI watchdog for various CPUs.
*
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 1dd8294..fd6ec2a 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Strings for the various x86 power flags
*
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 218f798..6b7e17b 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -1,7 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/smp.h>
#include <linux/timex.h>
#include <linux/string.h>
#include <linux/seq_file.h>
+#include <linux/cpufreq.h>
/*
* Get CPU information for use by the procfs.
@@ -75,9 +77,14 @@ static int show_cpuinfo(struct seq_file *m, void *v)
if (c->microcode)
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
- if (cpu_has(c, X86_FEATURE_TSC))
+ if (cpu_has(c, X86_FEATURE_TSC)) {
+ unsigned int freq = cpufreq_quick_get(cpu);
+
+ if (!freq)
+ freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- cpu_khz / 1000, (cpu_khz % 1000));
+ freq / 1000, (freq % 1000));
+ }
/* Cache size */
if (c->x86_cache_size >= 0)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index cd53135..b099024 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Check for extended topology enumeration cpuid leaf 0xb and if it
* exists, use it for populating initial_apicid and cpu topology
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index d77d07a..42c9398 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index ef9c2a0..65a58a3 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <asm/processor.h>
#include "cpu.h"
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 40ed268..8e00532 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
(eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
}
-const __refconst struct hypervisor_x86 x86_hyper_vmware = {
+const __initconst struct hypervisor_x86 x86_hyper_vmware = {
.name = "VMware",
.detect = vmware_platform,
- .init_platform = vmware_platform_setup,
- .x2apic_available = vmware_legacy_x2apic_available,
+ .type = X86_HYPER_VMWARE,
+ .init.init_platform = vmware_platform_setup,
+ .init.x2apic_available = vmware_legacy_x2apic_available,
};
-EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 44404e2..10e74d4 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -209,7 +209,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC_FILE
-static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
+static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
@@ -342,7 +342,7 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced,
return ret;
}
-static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
+static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
{
struct crash_elf_data *ced = arg;
Elf64_Ehdr *ehdr;
@@ -355,7 +355,7 @@ static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
ehdr = ced->ehdr;
/* Exclude unwanted mem ranges */
- ret = elf_header_exclude_ranges(ced, start, end);
+ ret = elf_header_exclude_ranges(ced, res->start, res->end);
if (ret)
return ret;
@@ -518,14 +518,14 @@ static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
return 0;
}
-static int memmap_entry_callback(u64 start, u64 end, void *arg)
+static int memmap_entry_callback(struct resource *res, void *arg)
{
struct crash_memmap_data *cmd = arg;
struct boot_params *params = cmd->params;
struct e820_entry ei;
- ei.addr = start;
- ei.size = end - start + 1;
+ ei.addr = res->start;
+ ei.size = resource_size(res);
ei.type = cmd->type;
add_e820_entry(params, &ei);
@@ -619,12 +619,12 @@ out:
return ret;
}
-static int determine_backup_region(u64 start, u64 end, void *arg)
+static int determine_backup_region(struct resource *res, void *arg)
{
struct kimage *image = arg;
- image->arch.backup_src_start = start;
- image->arch.backup_src_sz = end - start + 1;
+ image->arch.backup_src_start = res->start;
+ image->arch.backup_src_sz = resource_size(res);
/* Expecting only one range for backup region */
return 1;
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 538fede..33ee476 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Memory preserving reboot related code.
*
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index afa64ad..4f2e077 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Memory preserving reboot related code.
*
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index cbf1f6b..76e0769 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Architecture specific OF callbacks.
*/
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index f9c324e..0e662c5 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 4f04814..daefae8 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 225af41..88ce2ff 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 927abea..1e82f78 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Various workarounds for chipset bugs.
This code runs very early and can't use the regular PCI subsystem
The entries are keyed to PCI bridges which usually identify chipsets
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 0f08403..5e801c8 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/console.h>
#include <linux/kernel.h>
#include <linux/init.h>
diff --git a/arch/x86/kernel/ebda.c b/arch/x86/kernel/ebda.c
index 4312f8a..38e7d59 100644
--- a/arch/x86/kernel/ebda.c
+++ b/arch/x86/kernel/ebda.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/memblock.h>
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 9c4e7ba..7d7715d 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -155,14 +155,14 @@ void init_espfix_ap(int cpu)
page = cpu/ESPFIX_STACKS_PER_PAGE;
/* Did another CPU already set this up? */
- stack_page = ACCESS_ONCE(espfix_pages[page]);
+ stack_page = READ_ONCE(espfix_pages[page]);
if (likely(stack_page))
goto done;
mutex_lock(&espfix_init_mutex);
/* Did we race on the lock? */
- stack_page = ACCESS_ONCE(espfix_pages[page]);
+ stack_page = READ_ONCE(espfix_pages[page]);
if (stack_page)
goto unlock_done;
@@ -200,7 +200,7 @@ void init_espfix_ap(int cpu)
set_pte(&pte_p[n*PTE_STRIDE], pte);
/* Job is done for this CPU and any CPU which shares this page */
- ACCESS_ONCE(espfix_pages[page]) = stack_page;
+ WRITE_ONCE(espfix_pages[page], stack_page);
unlock_done:
mutex_unlock(&espfix_init_mutex);
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index d913047..2954fab 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* x86 FPU bug checks:
*/
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e..6abd835 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
*/
static void __init fpu__init_parse_early_param(void)
{
+ char arg[32];
+ char *argptr = arg;
+ int bit;
+
if (cmdline_find_option_bool(boot_command_line, "no387"))
setup_clear_cpu_cap(X86_FEATURE_FPU);
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+ if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
+ sizeof(arg)) &&
+ get_option(&argptr, &bit) &&
+ bit >= 0 &&
+ bit < NCAPINTS * 32)
+ setup_clear_cpu_cap(bit);
}
/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 3ea1513..bc02f51 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* FPU register's regset abstraction, for ptrace, core dumps, etc.
*/
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index fb639e7..23f1691 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* FPU signal frame handling routines.
*/
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476..87a57b7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -15,6 +15,7 @@
#include <asm/fpu/xstate.h>
#include <asm/tlbflush.h>
+#include <asm/cpufeature.h>
/*
* Although we spell it out in here, the Processor Trace
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
"unknown xstate feature" ,
};
+static short xsave_cpuid_features[] __initdata = {
+ X86_FEATURE_FPU,
+ X86_FEATURE_XMM,
+ X86_FEATURE_AVX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_MPX,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_AVX512F,
+ X86_FEATURE_INTEL_PT,
+ X86_FEATURE_PKU,
+};
+
/*
* Mask of xstate features supported by the CPU and the kernel:
*/
@@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
void fpu__xstate_clear_all_cpu_caps(void)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
- setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
- setup_clear_cpu_cap(X86_FEATURE_XSAVES);
- setup_clear_cpu_cap(X86_FEATURE_AVX);
- setup_clear_cpu_cap(X86_FEATURE_AVX2);
- setup_clear_cpu_cap(X86_FEATURE_AVX512F);
- setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
- setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
- setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
- setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
- setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
- setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
- setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
- setup_clear_cpu_cap(X86_FEATURE_MPX);
- setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
- setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
- setup_clear_cpu_cap(X86_FEATURE_PKU);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
- setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
}
/*
@@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
unsigned int eax, ebx, ecx, edx;
static int on_boot_cpu __initdata = 1;
int err;
+ int i;
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
@@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
goto out_disable;
}
+ /*
+ * Clear XSAVE features that are disabled in the normal CPUID.
+ */
+ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
+ if (!boot_cpu_has(xsave_cpuid_features[i]))
+ xfeatures_mask &= ~BIT(i);
+ }
+
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
/* Enable xstate instructions to be able to continue with initialization: */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 9bef1bb..01ebcb6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Dynamic function tracing support.
*
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 722a145..b6c6468 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2017 Steven Rostedt, VMware Inc.
*/
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 1dfac63..c832291 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2014 Steven Rostedt, Red Hat Inc
*/
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index cf2ce06..ec6fefb 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/arch/i386/kernel/head32.c -- prepare to run common code
*
@@ -30,10 +31,11 @@ static void __init i386_default_early_setup(void)
asmlinkage __visible void __init i386_start_kernel(void)
{
- cr4_init_shadow();
-
+ /* Make sure IDT is set up before any exception happens */
idt_setup_early_handler();
+ cr4_init_shadow();
+
sanitize_boot_params(&boot_params);
x86_early_init_platform_quirks();
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index bab4fa5..6a5d757 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* prepare to run common code
*
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 9ed3074..c290209 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
*
* Copyright (C) 1991, 1992 Linus Torvalds
@@ -211,9 +212,6 @@ ENTRY(startup_32_smp)
#endif
.Ldefault_entry:
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
- X86_CR0_PG)
movl $(CR0_STATE & ~X86_CR0_PG),%eax
movl %eax,%cr0
@@ -401,7 +399,7 @@ ENTRY(early_idt_handler_array)
# 24(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
pushl $0 # Dummy error code, to make stack frame uniform
.endif
pushl $i # 20(%esp) Vector number
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 513cbb0..7dca675 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
*
@@ -37,11 +38,12 @@
*
*/
-#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
+#endif
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
@@ -49,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
.code64
.globl startup_64
startup_64:
+ UNWIND_HINT_EMPTY
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
* and someone has loaded an identity mapped page table
@@ -88,6 +91,7 @@ startup_64:
addq $(early_top_pgt - __START_KERNEL_map), %rax
jmp 1f
ENTRY(secondary_startup_64)
+ UNWIND_HINT_EMPTY
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
* and someone has loaded a mapped page table.
@@ -132,6 +136,7 @@ ENTRY(secondary_startup_64)
movq $1f, %rax
jmp *%rax
1:
+ UNWIND_HINT_EMPTY
/* Check if nx is implemented */
movl $0x80000001, %eax
@@ -149,9 +154,6 @@ ENTRY(secondary_startup_64)
1: wrmsr /* Make changes effective */
/* Setup cr0 */
-#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
- X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
- X86_CR0_PG)
movl $CR0_STATE, %eax
/* Make changes effective */
movq %rax, %cr0
@@ -234,7 +236,7 @@ ENTRY(secondary_startup_64)
pushq %rax # target address in negative space
lretq
.Lafter_lret:
-ENDPROC(secondary_startup_64)
+END(secondary_startup_64)
#include "verify_cpu.S"
@@ -246,6 +248,7 @@ ENDPROC(secondary_startup_64)
*/
ENTRY(start_cpu0)
movq initial_stack(%rip), %rsp
+ UNWIND_HINT_EMPTY
jmp .Ljump_to_C_code
ENDPROC(start_cpu0)
#endif
@@ -265,26 +268,24 @@ ENDPROC(start_cpu0)
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
__FINITDATA
-bad_address:
- jmp bad_address
-
__INIT
ENTRY(early_idt_handler_array)
- # 104(%rsp) %rflags
- # 96(%rsp) %cs
- # 88(%rsp) %rip
- # 80(%rsp) error code
i = 0
.rept NUM_EXCEPTION_VECTORS
- .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
- pushq $0 # Dummy error code, to make stack frame uniform
+ .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
+ UNWIND_HINT_IRET_REGS
+ pushq $0 # Dummy error code, to make stack frame uniform
+ .else
+ UNWIND_HINT_IRET_REGS offset=8
.endif
pushq $i # 72(%rsp) Vector number
jmp early_idt_handler_common
+ UNWIND_HINT_IRET_REGS
i = i + 1
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
-ENDPROC(early_idt_handler_array)
+ UNWIND_HINT_IRET_REGS offset=16
+END(early_idt_handler_array)
early_idt_handler_common:
/*
@@ -312,6 +313,7 @@ early_idt_handler_common:
pushq %r13 /* pt_regs->r13 */
pushq %r14 /* pt_regs->r14 */
pushq %r15 /* pt_regs->r15 */
+ UNWIND_HINT_REGS
cmpq $14,%rsi /* Page fault? */
jnz 10f
@@ -326,8 +328,8 @@ early_idt_handler_common:
20:
decl early_recursion_flag(%rip)
- jmp restore_regs_and_iret
-ENDPROC(early_idt_handler_common)
+ jmp restore_regs_and_return_to_kernel
+END(early_idt_handler_common)
__INITDATA
@@ -361,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts)
.data
-#ifndef CONFIG_XEN
-NEXT_PAGE(init_top_pgt)
- .fill 512,8,0
-#else
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
@@ -381,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt)
* Don't set NX because code runs from these pages.
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+#else
+NEXT_PAGE(init_top_pgt)
+ .fill 512,8,0
#endif
#ifdef CONFIG_X86_5LEVEL
@@ -434,7 +436,7 @@ ENTRY(phys_base)
EXPORT_SYMBOL(phys_base)
#include "../../x86/xen/xen-head.S"
-
+
__PAGE_ALIGNED_BSS
NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 6ebe00c..0d307a6 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* 8253/PIT functions
*
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4e3b8a5..8f5cb2c 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/signal.h>
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 6107ee1..014cb2f 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -92,8 +92,6 @@ static const __initdata struct idt_data def_idts[] = {
INTG(X86_TRAP_DF, double_fault),
#endif
INTG(X86_TRAP_DB, debug),
- INTG(X86_TRAP_NMI, nmi),
- INTG(X86_TRAP_BP, int3),
#ifdef CONFIG_X86_MCE
INTG(X86_TRAP_MC, &machine_check),
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 7ebcc4a..805b7a3 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* I/O delay strategies for inb_p/outb_p
*
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 9c3cf09..3feb648 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* This contains the io-permission bitmap code - written by obz, with changes
* by Linus. 32/64 bits code unification by Miguel Botón.
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index d4eb450..a83b334 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
*
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 3be74fb..020efbf 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
*
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 70dee05..80bee76 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* x86 specific code for irq_work
*
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 1add9e0..1e4094e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/signal.h>
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ab4f491..e56c95b 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* jump label x86 support
*
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index db2182d..ae38dcc 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -1,8 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __X86_KERNEL_KPROBES_COMMON_H
#define __X86_KERNEL_KPROBES_COMMON_H
/* Kprobes and Optprobes common header */
+#include <asm/asm.h>
+
+#ifdef CONFIG_FRAME_POINTER
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
+ " mov %" _ASM_SP ", %" _ASM_BP "\n"
+#else
+# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
+#endif
+
#ifdef CONFIG_X86_64
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax. */ \
@@ -17,7 +27,7 @@
" pushq %r10\n" \
" pushq %r11\n" \
" pushq %rbx\n" \
- " pushq %rbp\n" \
+ SAVE_RBP_STRING \
" pushq %r12\n" \
" pushq %r13\n" \
" pushq %r14\n" \
@@ -48,7 +58,7 @@
" pushl %es\n" \
" pushl %ds\n" \
" pushl %eax\n" \
- " pushl %ebp\n" \
+ SAVE_RBP_STRING \
" pushl %edi\n" \
" pushl %esi\n" \
" pushl %edx\n" \
@@ -75,11 +85,11 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
* Copy an instruction and adjust the displacement if the instruction
* uses the %rip-relative addressing mode.
*/
-extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn);
+extern int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn);
/* Generate a relative-jump/call instruction */
-extern void synthesize_reljump(void *from, void *to);
-extern void synthesize_relcall(void *from, void *to);
+extern void synthesize_reljump(void *dest, void *from, void *to);
+extern void synthesize_relcall(void *dest, void *from, void *to);
#ifdef CONFIG_OPTPROBES
extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f015371..bd36f3c 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -119,29 +119,29 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
static nokprobe_inline void
-__synthesize_relative_insn(void *from, void *to, u8 op)
+__synthesize_relative_insn(void *dest, void *from, void *to, u8 op)
{
struct __arch_relative_insn {
u8 op;
s32 raddr;
} __packed *insn;
- insn = (struct __arch_relative_insn *)from;
+ insn = (struct __arch_relative_insn *)dest;
insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
insn->op = op;
}
/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-void synthesize_reljump(void *from, void *to)
+void synthesize_reljump(void *dest, void *from, void *to)
{
- __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
+ __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE);
}
NOKPROBE_SYMBOL(synthesize_reljump);
/* Insert a call instruction at address 'from', which calls address 'to'.*/
-void synthesize_relcall(void *from, void *to)
+void synthesize_relcall(void *dest, void *from, void *to)
{
- __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+ __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE);
}
NOKPROBE_SYMBOL(synthesize_relcall);
@@ -346,10 +346,11 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
/*
* Copy an instruction with recovering modified instruction by kprobes
* and adjust the displacement if the instruction uses the %rip-relative
- * addressing mode.
+ * addressing mode. Note that since @real will be the final place of copied
+ * instruction, displacement must be adjust by @real, not @dest.
* This returns the length of copied instruction, or 0 if it has an error.
*/
-int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
+int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
{
kprobe_opcode_t buf[MAX_INSN_SIZE];
unsigned long recovered_insn =
@@ -387,11 +388,11 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
* have given.
*/
newdisp = (u8 *) src + (s64) insn->displacement.value
- - (u8 *) dest;
+ - (u8 *) real;
if ((s64) (s32) newdisp != newdisp) {
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
- src, dest, insn->displacement.value);
+ src, real, insn->displacement.value);
return 0;
}
disp = (u8 *) dest + insn_offset_displacement(insn);
@@ -402,20 +403,38 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
}
/* Prepare reljump right after instruction to boost */
-static void prepare_boost(struct kprobe *p, struct insn *insn)
+static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
+ struct insn *insn)
{
+ int len = insn->length;
+
if (can_boost(insn, p->addr) &&
- MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) {
+ MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) {
/*
* These instructions can be executed directly if it
* jumps back to correct address.
*/
- synthesize_reljump(p->ainsn.insn + insn->length,
+ synthesize_reljump(buf + len, p->ainsn.insn + len,
p->addr + insn->length);
+ len += RELATIVEJUMP_SIZE;
p->ainsn.boostable = true;
} else {
p->ainsn.boostable = false;
}
+
+ return len;
+}
+
+/* Make page to RO mode when allocate it */
+void *alloc_insn_page(void)
+{
+ void *page;
+
+ page = module_alloc(PAGE_SIZE);
+ if (page)
+ set_memory_ro((unsigned long)page & PAGE_MASK, 1);
+
+ return page;
}
/* Recover page to RW mode before releasing it */
@@ -429,12 +448,11 @@ void free_insn_page(void *page)
static int arch_copy_kprobe(struct kprobe *p)
{
struct insn insn;
+ kprobe_opcode_t buf[MAX_INSN_SIZE];
int len;
- set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
-
/* Copy an instruction with recovering if other optprobe modifies it.*/
- len = __copy_instruction(p->ainsn.insn, p->addr, &insn);
+ len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn);
if (!len)
return -EINVAL;
@@ -442,15 +460,16 @@ static int arch_copy_kprobe(struct kprobe *p)
* __copy_instruction can modify the displacement of the instruction,
* but it doesn't affect boostable check.
*/
- prepare_boost(p, &insn);
-
- set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
+ len = prepare_boost(buf, p, &insn);
/* Check whether the instruction modifies Interrupt Flag or not */
- p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
+ p->ainsn.if_modifier = is_IF_modifier(buf);
/* Also, displacement change doesn't affect the first byte */
- p->opcode = p->ainsn.insn[0];
+ p->opcode = buf[0];
+
+ /* OK, write back the instruction(s) into ROX insn buffer */
+ text_poke(p->ainsn.insn, buf, len);
return 0;
}
@@ -1080,8 +1099,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
* raw stack chunk with redzones:
*/
__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
- regs->flags &= ~X86_EFLAGS_IF;
- trace_hardirqs_off();
regs->ip = (unsigned long)(jp->entry);
/*
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 041f7b6..8dc0161 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -26,7 +26,7 @@
#include "common.h"
static nokprobe_inline
-int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+void __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb, unsigned long orig_ip)
{
/*
@@ -41,33 +41,31 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
__this_cpu_write(current_kprobe, NULL);
if (orig_ip)
regs->ip = orig_ip;
- return 1;
}
int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- if (kprobe_ftrace(p))
- return __skip_singlestep(p, regs, kcb, 0);
- else
- return 0;
+ if (kprobe_ftrace(p)) {
+ __skip_singlestep(p, regs, kcb, 0);
+ preempt_enable_no_resched();
+ return 1;
+ }
+ return 0;
}
NOKPROBE_SYMBOL(skip_singlestep);
-/* Ftrace callback handler for kprobes */
+/* Ftrace callback handler for kprobes -- called under preepmt disabed */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct pt_regs *regs)
{
struct kprobe *p;
struct kprobe_ctlblk *kcb;
- unsigned long flags;
-
- /* Disable irq for emulating a breakpoint and avoiding preempt */
- local_irq_save(flags);
+ /* Preempt is disabled by ftrace */
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
- goto end;
+ return;
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
@@ -77,17 +75,19 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
regs->ip = ip + sizeof(kprobe_opcode_t);
+ /* To emulate trap based kprobes, preempt_disable here */
+ preempt_disable();
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs))
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
__skip_singlestep(p, regs, kcb, orig_ip);
+ preempt_enable_no_resched();
+ }
/*
* If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe.
+ * resets current kprobe, and keep preempt count +1.
*/
}
-end:
- local_irq_restore(flags);
}
NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4f98aad..e941136 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -142,11 +142,11 @@ void optprobe_template_func(void);
STACK_FRAME_NON_STANDARD(optprobe_template_func);
#define TMPL_MOVE_IDX \
- ((long)&optprobe_template_val - (long)&optprobe_template_entry)
+ ((long)optprobe_template_val - (long)optprobe_template_entry)
#define TMPL_CALL_IDX \
- ((long)&optprobe_template_call - (long)&optprobe_template_entry)
+ ((long)optprobe_template_call - (long)optprobe_template_entry)
#define TMPL_END_IDX \
- ((long)&optprobe_template_end - (long)&optprobe_template_entry)
+ ((long)optprobe_template_end - (long)optprobe_template_entry)
#define INT3_SIZE sizeof(kprobe_opcode_t)
@@ -154,17 +154,15 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func);
static void
optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- unsigned long flags;
-
/* This is possible if op is under delayed unoptimizing */
if (kprobe_disabled(&op->kp))
return;
- local_irq_save(flags);
+ preempt_disable();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
} else {
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
/* Save skipped registers */
#ifdef CONFIG_X86_64
regs->cs = __KERNEL_CS;
@@ -180,17 +178,17 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
opt_pre_handler(&op->kp, regs);
__this_cpu_write(current_kprobe, NULL);
}
- local_irq_restore(flags);
+ preempt_enable_no_resched();
}
NOKPROBE_SYMBOL(optimized_callback);
-static int copy_optimized_instructions(u8 *dest, u8 *src)
+static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
{
struct insn insn;
int len = 0, ret;
while (len < RELATIVEJUMP_SIZE) {
- ret = __copy_instruction(dest + len, src + len, &insn);
+ ret = __copy_instruction(dest + len, src + len, real, &insn);
if (!ret || !can_boost(&insn, src + len))
return -EINVAL;
len += ret;
@@ -343,57 +341,66 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
struct kprobe *__unused)
{
- u8 *buf;
- int ret;
+ u8 *buf = NULL, *slot;
+ int ret, len;
long rel;
if (!can_optimize((unsigned long)op->kp.addr))
return -EILSEQ;
- op->optinsn.insn = get_optinsn_slot();
- if (!op->optinsn.insn)
+ buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
+ if (!buf)
return -ENOMEM;
+ op->optinsn.insn = slot = get_optinsn_slot();
+ if (!slot) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
/*
* Verify if the address gap is in 2GB range, because this uses
* a relative jump.
*/
- rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+ rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
if (abs(rel) > 0x7fffffff) {
- __arch_remove_optimized_kprobe(op, 0);
- return -ERANGE;
+ ret = -ERANGE;
+ goto err;
}
- buf = (u8 *)op->optinsn.insn;
- set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
+ /* Copy arch-dep-instance from template */
+ memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
/* Copy instructions into the out-of-line buffer */
- ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
- if (ret < 0) {
- __arch_remove_optimized_kprobe(op, 0);
- return ret;
- }
+ ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
+ slot + TMPL_END_IDX);
+ if (ret < 0)
+ goto err;
op->optinsn.size = ret;
-
- /* Copy arch-dep-instance from template */
- memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+ len = TMPL_END_IDX + op->optinsn.size;
/* Set probe information */
synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
/* Set probe function call */
- synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+ synthesize_relcall(buf + TMPL_CALL_IDX,
+ slot + TMPL_CALL_IDX, optimized_callback);
/* Set returning jmp instruction at the tail of out-of-line buffer */
- synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+ synthesize_reljump(buf + len, slot + len,
(u8 *)op->kp.addr + op->optinsn.size);
-
- set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
-
- flush_icache_range((unsigned long) buf,
- (unsigned long) buf + TMPL_END_IDX +
- op->optinsn.size + RELATIVEJUMP_SIZE);
- return 0;
+ len += RELATIVEJUMP_SIZE;
+
+ /* We have to use text_poke for instuction buffer because it is RO */
+ text_poke(slot, buf, len);
+ ret = 0;
+out:
+ kfree(buf);
+ return ret;
+
+err:
+ __arch_remove_optimized_kprobe(op, 0);
+ goto out;
}
/*
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index e675704..b40ffbf 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -75,8 +75,8 @@ static int parse_no_kvmclock_vsyscall(char *arg)
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
-static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
-static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
+static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
/*
@@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
return NULL;
}
-void kvm_async_pf_task_wait(u32 token)
+/*
+ * @interrupt_kernel: Is this called from a routine which interrupts the kernel
+ * (other than user space)?
+ */
+void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
{
u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -140,8 +144,10 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
- n.halted = is_idle_task(current) || preempt_count() > 1 ||
- rcu_preempt_depth();
+ n.halted = is_idle_task(current) ||
+ (IS_ENABLED(CONFIG_PREEMPT_COUNT)
+ ? preempt_count() > 1 || rcu_preempt_depth()
+ : interrupt_kernel);
init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
raw_spin_unlock(&b->lock);
@@ -269,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
prev_state = exception_enter();
- kvm_async_pf_task_wait((u32)read_cr2());
+ kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
exception_exit(prev_state);
break;
case KVM_PV_REASON_PAGE_READY:
@@ -306,7 +312,7 @@ static void kvm_register_steal_time(void)
cpu, (unsigned long long) slow_virt_to_phys(st));
}
-static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
+static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
{
@@ -420,9 +426,42 @@ void kvm_disable_steal_time(void)
wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
}
+static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
+{
+ early_set_memory_decrypted((unsigned long) ptr, size);
+}
+
+/*
+ * Iterate through all possible CPUs and map the memory region pointed
+ * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once.
+ *
+ * Note: we iterate through all possible CPUs to ensure that CPUs
+ * hotplugged will have their per-cpu variable already mapped as
+ * decrypted.
+ */
+static void __init sev_map_percpu_data(void)
+{
+ int cpu;
+
+ if (!sev_active())
+ return;
+
+ for_each_possible_cpu(cpu) {
+ __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
+ __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
+ __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
+ }
+}
+
#ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu(void)
{
+ /*
+ * Map the per-cpu variables as decrypted before kvm_guest_cpu_init()
+ * shares the guest physical address with the hypervisor.
+ */
+ sev_map_percpu_data();
+
kvm_guest_cpu_init();
native_smp_prepare_boot_cpu();
kvm_spinlock_init();
@@ -459,7 +498,7 @@ static void __init kvm_apf_trap_init(void)
update_intr_gate(X86_TRAP_PF, async_page_fault);
}
-void __init kvm_guest_init(void)
+static void __init kvm_guest_init(void)
{
int i;
@@ -490,6 +529,7 @@ void __init kvm_guest_init(void)
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
#else
+ sev_map_percpu_data();
kvm_guest_cpu_init();
#endif
@@ -538,12 +578,13 @@ static uint32_t __init kvm_detect(void)
return kvm_cpuid_base();
}
-const struct hypervisor_x86 x86_hyper_kvm __refconst = {
+const __initconst struct hypervisor_x86 x86_hyper_kvm = {
.name = "KVM",
.detect = kvm_detect,
- .x2apic_available = kvm_para_available,
+ .type = X86_HYPER_KVM,
+ .init.guest_late_init = kvm_guest_init,
+ .init.x2apic_available = kvm_para_available,
};
-EXPORT_SYMBOL_GPL(x86_hyper_kvm);
static __init int activate_jump_labels(void)
{
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d889676..77b492c 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -27,6 +27,7 @@
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <asm/mem_encrypt.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
#include <asm/kvmclock.h>
@@ -45,7 +46,7 @@ early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
static struct pvclock_vsyscall_time_info *hv_clock;
-static struct pvclock_wall_clock wall_clock;
+static struct pvclock_wall_clock *wall_clock;
struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
{
@@ -64,22 +65,22 @@ static void kvm_get_wallclock(struct timespec *now)
int low, high;
int cpu;
- low = (int)__pa_symbol(&wall_clock);
- high = ((u64)__pa_symbol(&wall_clock) >> 32);
+ low = (int)slow_virt_to_phys(wall_clock);
+ high = ((u64)slow_virt_to_phys(wall_clock) >> 32);
native_write_msr(msr_kvm_wall_clock, low, high);
cpu = get_cpu();
vcpu_time = &hv_clock[cpu].pvti;
- pvclock_read_wallclock(&wall_clock, vcpu_time, now);
+ pvclock_read_wallclock(wall_clock, vcpu_time, now);
put_cpu();
}
static int kvm_set_wallclock(const struct timespec *now)
{
- return -1;
+ return -ENODEV;
}
static u64 kvm_clock_read(void)
@@ -249,11 +250,39 @@ static void kvm_shutdown(void)
native_machine_shutdown();
}
+static phys_addr_t __init kvm_memblock_alloc(phys_addr_t size,
+ phys_addr_t align)
+{
+ phys_addr_t mem;
+
+ mem = memblock_alloc(size, align);
+ if (!mem)
+ return 0;
+
+ if (sev_active()) {
+ if (early_set_memory_decrypted((unsigned long)__va(mem), size))
+ goto e_free;
+ }
+
+ return mem;
+e_free:
+ memblock_free(mem, size);
+ return 0;
+}
+
+static void __init kvm_memblock_free(phys_addr_t addr, phys_addr_t size)
+{
+ if (sev_active())
+ early_set_memory_encrypted((unsigned long)__va(addr), size);
+
+ memblock_free(addr, size);
+}
+
void __init kvmclock_init(void)
{
struct pvclock_vcpu_time_info *vcpu_time;
- unsigned long mem;
- int size, cpu;
+ unsigned long mem, mem_wall_clock;
+ int size, cpu, wall_clock_size;
u8 flags;
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
@@ -267,21 +296,35 @@ void __init kvmclock_init(void)
} else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)))
return;
- printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
- msr_kvm_system_time, msr_kvm_wall_clock);
+ wall_clock_size = PAGE_ALIGN(sizeof(struct pvclock_wall_clock));
+ mem_wall_clock = kvm_memblock_alloc(wall_clock_size, PAGE_SIZE);
+ if (!mem_wall_clock)
+ return;
- mem = memblock_alloc(size, PAGE_SIZE);
- if (!mem)
+ wall_clock = __va(mem_wall_clock);
+ memset(wall_clock, 0, wall_clock_size);
+
+ mem = kvm_memblock_alloc(size, PAGE_SIZE);
+ if (!mem) {
+ kvm_memblock_free(mem_wall_clock, wall_clock_size);
+ wall_clock = NULL;
return;
+ }
+
hv_clock = __va(mem);
memset(hv_clock, 0, size);
if (kvm_register_clock("primary cpu clock")) {
hv_clock = NULL;
- memblock_free(mem, size);
+ kvm_memblock_free(mem, size);
+ kvm_memblock_free(mem_wall_clock, wall_clock_size);
+ wall_clock = NULL;
return;
}
+ printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
+ msr_kvm_system_time, msr_kvm_wall_clock);
+
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index f0e64db..1c1eae9 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
@@ -12,6 +13,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
@@ -101,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
static void install_ldt(struct mm_struct *current_mm,
struct ldt_struct *ldt)
{
- /* Synchronizes with lockless_dereference in load_mm_ldt. */
+ /* Synchronizes with READ_ONCE in load_mm_ldt. */
smp_store_release(&current_mm->context.ldt, ldt);
/* Activate the LDT for all CPUs using current_mm. */
@@ -294,8 +296,8 @@ out:
return error;
}
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
- unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+ unsigned long , bytecount)
{
int ret = -ENOSYS;
@@ -313,5 +315,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
ret = write_ldt(ptr, bytecount, 0);
break;
}
- return ret;
+ /*
+ * The SYSCALL_DEFINE() macros give us an 'unsigned long'
+ * return type, but tht ABI for sys_modify_ldt() expects
+ * 'int'. This cast gives us an int-sized value in %rax
+ * for the return code. The 'unsigned' is necessary so
+ * the compiler does not try to sign-extend the negative
+ * return codes into the high half of the register when
+ * taking the value from int->long.
+ */
+ return (unsigned int)ret;
}
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index f4c886d..b5cb49e 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* AMD Family 10h mmconfig enablement
*/
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 62e7d70..da0c160 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -172,19 +172,27 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_X86_64_NONE:
break;
case R_X86_64_64:
+ if (*(u64 *)loc != 0)
+ goto invalid_relocation;
*(u64 *)loc = val;
break;
case R_X86_64_32:
+ if (*(u32 *)loc != 0)
+ goto invalid_relocation;
*(u32 *)loc = val;
if (val != *(u32 *)loc)
goto overflow;
break;
case R_X86_64_32S:
+ if (*(s32 *)loc != 0)
+ goto invalid_relocation;
*(s32 *)loc = val;
if ((s64)val != *(s32 *)loc)
goto overflow;
break;
case R_X86_64_PC32:
+ if (*(u32 *)loc != 0)
+ goto invalid_relocation;
val -= (u64)loc;
*(u32 *)loc = val;
#if 0
@@ -200,6 +208,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
}
return 0;
+invalid_relocation:
+ pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
+ (int)ELF64_R_TYPE(rel[i].r_info), loc, val);
+ return -ENOEXEC;
+
overflow:
pr_err("overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5cbb317..410c5da 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Intel Multiprocessor Specification 1.1 and 1.4
* compliant MP-table parsing routines.
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 35aafc9..18bc9b5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -105,7 +105,7 @@ static void nmi_max_handler(struct irq_work *w)
{
struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
int remainder_ns, decimal_msecs;
- u64 whole_msecs = ACCESS_ONCE(a->max_duration);
+ u64 whole_msecs = READ_ONCE(a->max_duration);
remainder_ns = do_div(whole_msecs, (1000 * 1000));
decimal_msecs = remainder_ns / 1000;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index d27f8d8..a1a96df 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* arch/x86/kernel/nmi-selftest.c
*
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 8f2d1c9..71f2d11 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Split spinlock implementation out into its own file, so it can be
* compiled in a FTRACE-compatible way.
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 19a3e8f..041096b 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -115,8 +115,18 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
return 5;
}
-/* Neat trick to map patch type back to the call within the
- * corresponding structure. */
+DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
+
+void __init native_pv_lock_init(void)
+{
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ static_branch_disable(&virt_spin_lock_key);
+}
+
+/*
+ * Neat trick to map patch type back to the call within the
+ * corresponding structure.
+ */
static void *get_call_destination(u8 type)
{
struct paravirt_patch_template tmpl = {
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 553acbb..758e69d 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <asm/paravirt.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 11aaf1e..ac0be82 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <asm/paravirt.h>
#include <asm/asm-offsets.h>
#include <linux/stringify.h>
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 0accc24..599d746 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/dma-mapping.h>
#include <linux/dma-debug.h>
#include <linux/dmar.h>
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index f712dfd..4dfd90a 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/dma-mapping.h>
#include <asm/iommu_table.h>
#include <linux/string.h>
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 4fc3cb6..b0caae2 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Fallback functions when the main IOMMU code is not compiled in. This
code is roughly equivalent to i386. */
#include <linux/dma-mapping.h>
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6770775..53bd05e 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Glue code to lib/swiotlb.c */
#include <linux/pci.h>
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index a311ffc..da5190a 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/platform_device.h>
#include <linux/err.h>
#include <linux/init.h>
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 587d887..e47b2db 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/sched.h>
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index 502a77d..39a5929 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/init.h>
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 0c5315d..6b07faa 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015, Christoph Hellwig.
* Copyright (c) 2015, Intel Corporation.
@@ -6,7 +7,7 @@
#include <linux/init.h>
#include <linux/ioport.h>
-static int found(u64 start, u64 end, void *data)
+static int found(struct resource *res, void *data)
{
return 1;
}
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 963e3fb..ee02863 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bd6b85f..97fb3e5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/errno.h>
@@ -48,7 +49,13 @@
*/
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
.x86_tss = {
- .sp0 = TOP_OF_INIT_STACK,
+ /*
+ * .sp0 is only used when entering ring 0 from a lower
+ * privilege level. Since the init task never runs anything
+ * but ring 0 code, there is no need for a valid value here.
+ * Poison it.
+ */
+ .sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 1196625..45bf0c5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Reload esp0 and cpu_current_top_of_stack. This changes
- * current_thread_info().
+ * current_thread_info(). Refresh the SYSENTER configuration in
+ * case prev or next is vm86.
*/
- load_sp0(tss, next);
+ update_sp0(next_p);
+ refresh_sysenter_cs(next);
this_cpu_write(cpu_current_top_of_stack,
(unsigned long)task_stack_page(next_p) +
THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 302e7b2..eeeb34f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
struct inactive_task_frame *frame;
struct task_struct *me = current;
- p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
fork_frame = container_of(childregs, struct fork_frame, regs);
frame = &fork_frame->frame;
@@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
this_cpu_write(current_task, next_p);
- /* Reload esp0 and ss1. This changes current_thread_info(). */
- load_sp0(tss, next);
+ /* Reload sp0. */
+ update_sp0(next_p);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index eaa591c..697a4ce 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* This file contains work-arounds for x86 and x86_64 platform bugs.
*/
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 54180fa..2126b9d 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/export.h>
@@ -105,6 +106,10 @@ void __noreturn machine_real_restart(unsigned int type)
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
+
+ /* Exiting long mode will fail if CR4.PCIDE is set. */
+ if (static_cpu_has(X86_FEATURE_PCID))
+ cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index c8e41e9..b7c0f14 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* This is a good place to put board specific reboot fixups.
*
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 5ab3895..9b9fb78 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/ioport.h>
#include <asm/e820/api.h>
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 5b21cb7..69ac9cb 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* RTC related functions
*/
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0957dd7..6cced87 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -380,9 +380,11 @@ static void __init reserve_initrd(void)
* If SME is active, this memory will be marked encrypted by the
* kernel when it is accessed (including relocation). However, the
* ramdisk image was loaded decrypted by the bootloader, so make
- * sure that it is encrypted before accessing it.
+ * sure that it is encrypted before accessing it. For SEV the
+ * ramdisk will already be encrypted, so only do this for SME.
*/
- sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
+ if (sme_active())
+ sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
initrd_start = 0;
@@ -1045,7 +1047,7 @@ void __init setup_arch(char **cmdline_p)
/*
* VMware detection requires dmi to be available, so this
- * needs to be done after dmi_scan_machine, for the BP.
+ * needs to be done after dmi_scan_machine(), for the boot CPU.
*/
init_hypervisor_platform();
@@ -1294,7 +1296,7 @@ void __init setup_arch(char **cmdline_p)
io_apic_init_mappings();
- kvm_guest_init();
+ x86_init.hyper.guest_late_init();
e820__reserve_resources();
e820__register_nosave_regions(max_low_pfn);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 28dafed..497aa76 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4e188fd..b9e00e8 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index ab9feb5..8c6da1a 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/compat.h>
#include <linux/uaccess.h>
#include <linux/ptrace.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ad59edd..c58e81a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -77,6 +77,7 @@
#include <asm/i8259.h>
#include <asm/realmode.h>
#include <asm/misc.h>
+#include <asm/qspinlock.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@@ -194,6 +195,12 @@ static void smp_callin(void)
smp_store_cpu_info(cpuid);
/*
+ * The topology information must be up to date before
+ * calibrate_delay() and notify_cpu_starting().
+ */
+ set_cpu_sibling_map(raw_smp_processor_id());
+
+ /*
* Get our bogomips.
* Update loops_per_jiffy in cpu_data. Previous call to
* smp_store_cpu_info() stored a value that is close but not as
@@ -203,11 +210,6 @@ static void smp_callin(void)
cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
pr_debug("Stack at about %p\n", &cpuid);
- /*
- * This must be done before setting cpu_online_mask
- * or calling notify_cpu_starting.
- */
- set_cpu_sibling_map(raw_smp_processor_id());
wmb();
notify_cpu_starting(cpuid);
@@ -249,7 +251,7 @@ static void notrace start_secondary(void *unused)
/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
/*
- * Check TSC synchronization with the BP:
+ * Check TSC synchronization with the boot CPU:
*/
check_tsc_sync_target();
@@ -961,8 +963,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
- per_cpu(cpu_current_top_of_stack, cpu) =
- (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+ per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
@@ -1094,7 +1095,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
unsigned long flags;
int err, ret = 0;
- WARN_ON(irqs_disabled());
+ lockdep_assert_irqs_enabled();
pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
@@ -1357,6 +1358,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
pr_info("CPU0: ");
print_cpu_info(&cpu_data(0));
+ native_pv_lock_init();
+
uv_system_init();
set_mtrr_aps_delayed_init();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 8dabd7b..77835bc 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -30,7 +30,7 @@ static int save_stack_address(struct stack_trace *trace, unsigned long addr,
return 0;
}
-static void __save_stack_trace(struct stack_trace *trace,
+static void noinline __save_stack_trace(struct stack_trace *trace,
struct task_struct *task, struct pt_regs *regs,
bool nosched)
{
@@ -56,6 +56,7 @@ static void __save_stack_trace(struct stack_trace *trace,
*/
void save_stack_trace(struct stack_trace *trace)
{
+ trace->skip++;
__save_stack_trace(trace, current, NULL, false);
}
EXPORT_SYMBOL_GPL(save_stack_trace);
@@ -70,6 +71,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
if (!try_get_task_stack(tsk))
return;
+ if (tsk == current)
+ trace->skip++;
__save_stack_trace(trace, tsk, NULL, true);
put_task_stack(tsk);
@@ -88,8 +91,9 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
} \
})
-static int __save_stack_trace_reliable(struct stack_trace *trace,
- struct task_struct *task)
+static int __always_inline
+__save_stack_trace_reliable(struct stack_trace *trace,
+ struct task_struct *task)
{
struct unwind_state state;
struct pt_regs *regs;
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 5ee6638..60d2c37 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* x86 single-step support code, common to 32-bit and 64-bit.
*/
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 73e4d28..a63fe77 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e0754cd..879af86 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 1991,1992,1995 Linus Torvalds
* Copyright (c) 1994 Alan Modra
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index a106b97..9a9c9b0 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/sched.h>
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c
index 80bb24d..b8e7abe 100644
--- a/arch/x86/kernel/trace_clock.c
+++ b/arch/x86/kernel/trace_clock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* X86 trace clocks
*/
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
index c6636d1..5bd30c4 100644
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Code for supporting irq vector tracepoints.
*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 67db4f4..c3aca0b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -60,6 +60,7 @@
#include <asm/trace/mpx.h>
#include <asm/mpx.h>
#include <asm/vm86.h>
+#include <asm/umip.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
@@ -141,8 +142,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
* will catch asm bugs and any attempt to use ist_preempt_enable
* from double_fault.
*/
- BUG_ON((unsigned long)(current_top_of_stack() -
- current_stack_pointer) >= THREAD_SIZE);
+ BUG_ON(!on_thread_stack());
preempt_enable_no_resched();
}
@@ -209,9 +209,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
if (fixup_exception(regs, trapnr))
return 0;
- if (fixup_bug(regs, trapnr))
- return 0;
-
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = trapnr;
die(str, regs, error_code);
@@ -292,6 +289,13 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
+ /*
+ * WARN*()s end up here; fix them up before we call the
+ * notifier chain.
+ */
+ if (!user_mode(regs) && fixup_bug(regs, trapnr))
+ return;
+
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
NOTIFY_STOP) {
cond_local_irq_enable(regs);
@@ -514,6 +518,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
cond_local_irq_enable(regs);
+ if (static_cpu_has(X86_FEATURE_UMIP)) {
+ if (user_mode(regs) && fixup_umip_exception(regs))
+ return;
+ }
+
if (v8086_mode(regs)) {
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 796d96b..ad2b925 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1346,12 +1346,10 @@ void __init tsc_init(void)
unsigned long calibrate_delay_is_known(void)
{
int sibling, cpu = smp_processor_id();
- struct cpumask *mask = topology_core_cpumask(cpu);
+ int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
+ const struct cpumask *mask = topology_core_cpumask(cpu);
- if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
- return 0;
-
- if (!mask)
+ if (tsc_disabled || !constant_tsc || !mask)
return 0;
sibling = cpumask_any_but(mask, cpu);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 7842371..e76a988 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* check TSC synchronization.
*
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
new file mode 100644
index 0000000..6ba82be
--- /dev/null
+++ b/arch/x86/kernel/umip.c
@@ -0,0 +1,366 @@
+/*
+ * umip.c Emulation for instruction protected by the Intel User-Mode
+ * Instruction Prevention feature
+ *
+ * Copyright (c) 2017, Intel Corporation.
+ * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+ */
+
+#include <linux/uaccess.h>
+#include <asm/umip.h>
+#include <asm/traps.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
+#include <linux/ratelimit.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "umip: " fmt
+
+/** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
+ *
+ * The feature User-Mode Instruction Prevention present in recent Intel
+ * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str)
+ * from being executed with CPL > 0. Otherwise, a general protection fault is
+ * issued.
+ *
+ * Rather than relaying to the user space the general protection fault caused by
+ * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
+ * trapped and emulate the result of such instructions to provide dummy values.
+ * This allows to both conserve the current kernel behavior and not reveal the
+ * system resources that UMIP intends to protect (i.e., the locations of the
+ * global descriptor and interrupt descriptor tables, the segment selectors of
+ * the local descriptor table, the value of the task state register and the
+ * contents of the CR0 register).
+ *
+ * This emulation is needed because certain applications (e.g., WineHQ and
+ * DOSEMU2) rely on this subset of instructions to function.
+ *
+ * The instructions protected by UMIP can be split in two groups. Those which
+ * return a kernel memory address (sgdt and sidt) and those which return a
+ * value (sldt, str and smsw).
+ *
+ * For the instructions that return a kernel memory address, applications
+ * such as WineHQ rely on the result being located in the kernel memory space,
+ * not the actual location of the table. The result is emulated as a hard-coded
+ * value that, lies close to the top of the kernel memory. The limit for the GDT
+ * and the IDT are set to zero.
+ *
+ * Given that sldt and str are not commonly used in programs that run on WineHQ
+ * or DOSEMU2, they are not emulated.
+ *
+ * The instruction smsw is emulated to return the value that the register CR0
+ * has at boot time as set in the head_32.
+ *
+ * Also, emulation is provided only for 32-bit processes; 64-bit processes
+ * that attempt to use the instructions that UMIP protects will receive the
+ * SIGSEGV signal issued as a consequence of the general protection fault.
+ *
+ * Care is taken to appropriately emulate the results when segmentation is
+ * used. That is, rather than relying on USER_DS and USER_CS, the function
+ * insn_get_addr_ref() inspects the segment descriptor pointed by the
+ * registers in pt_regs. This ensures that we correctly obtain the segment
+ * base address and the address and operand sizes even if the user space
+ * application uses a local descriptor table.
+ */
+
+#define UMIP_DUMMY_GDT_BASE 0xfffe0000
+#define UMIP_DUMMY_IDT_BASE 0xffff0000
+
+/*
+ * The SGDT and SIDT instructions store the contents of the global descriptor
+ * table and interrupt table registers, respectively. The destination is a
+ * memory operand of X+2 bytes. X bytes are used to store the base address of
+ * the table and 2 bytes are used to store the limit. In 32-bit processes, the
+ * only processes for which emulation is provided, X has a value of 4.
+ */
+#define UMIP_GDT_IDT_BASE_SIZE 4
+#define UMIP_GDT_IDT_LIMIT_SIZE 2
+
+#define UMIP_INST_SGDT 0 /* 0F 01 /0 */
+#define UMIP_INST_SIDT 1 /* 0F 01 /1 */
+#define UMIP_INST_SMSW 3 /* 0F 01 /4 */
+
+/**
+ * identify_insn() - Identify a UMIP-protected instruction
+ * @insn: Instruction structure with opcode and ModRM byte.
+ *
+ * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected
+ * instruction that can be emulated.
+ *
+ * Returns:
+ *
+ * On success, a constant identifying a specific UMIP-protected instruction that
+ * can be emulated.
+ *
+ * -EINVAL on error or when not an UMIP-protected instruction that can be
+ * emulated.
+ */
+static int identify_insn(struct insn *insn)
+{
+ /* By getting modrm we also get the opcode. */
+ insn_get_modrm(insn);
+
+ if (!insn->modrm.nbytes)
+ return -EINVAL;
+
+ /* All the instructions of interest start with 0x0f. */
+ if (insn->opcode.bytes[0] != 0xf)
+ return -EINVAL;
+
+ if (insn->opcode.bytes[1] == 0x1) {
+ switch (X86_MODRM_REG(insn->modrm.value)) {
+ case 0:
+ return UMIP_INST_SGDT;
+ case 1:
+ return UMIP_INST_SIDT;
+ case 4:
+ return UMIP_INST_SMSW;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* SLDT AND STR are not emulated */
+ return -EINVAL;
+}
+
+/**
+ * emulate_umip_insn() - Emulate UMIP instructions and return dummy values
+ * @insn: Instruction structure with operands
+ * @umip_inst: A constant indicating the instruction to emulate
+ * @data: Buffer into which the dummy result is stored
+ * @data_size: Size of the emulated result
+ *
+ * Emulate an instruction protected by UMIP and provide a dummy result. The
+ * result of the emulation is saved in @data. The size of the results depends
+ * on both the instruction and type of operand (register vs memory address).
+ * The size of the result is updated in @data_size. Caller is responsible
+ * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE +
+ * UMIP_GDT_IDT_LIMIT_SIZE bytes.
+ *
+ * Returns:
+ *
+ * 0 on success, -EINVAL on error while emulating.
+ */
+static int emulate_umip_insn(struct insn *insn, int umip_inst,
+ unsigned char *data, int *data_size)
+{
+ unsigned long dummy_base_addr, dummy_value;
+ unsigned short dummy_limit = 0;
+
+ if (!data || !data_size || !insn)
+ return -EINVAL;
+ /*
+ * These two instructions return the base address and limit of the
+ * global and interrupt descriptor table, respectively. According to the
+ * Intel Software Development manual, the base address can be 24-bit,
+ * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is
+ * 16-bit, the returned value of the base address is supposed to be a
+ * zero-extended 24-byte number. However, it seems that a 32-byte number
+ * is always returned irrespective of the operand size.
+ */
+ if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
+ /* SGDT and SIDT do not use registers operands. */
+ if (X86_MODRM_MOD(insn->modrm.value) == 3)
+ return -EINVAL;
+
+ if (umip_inst == UMIP_INST_SGDT)
+ dummy_base_addr = UMIP_DUMMY_GDT_BASE;
+ else
+ dummy_base_addr = UMIP_DUMMY_IDT_BASE;
+
+ *data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE;
+
+ memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE);
+ memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
+
+ } else if (umip_inst == UMIP_INST_SMSW) {
+ dummy_value = CR0_STATE;
+
+ /*
+ * Even though the CR0 register has 4 bytes, the number
+ * of bytes to be copied in the result buffer is determined
+ * by whether the operand is a register or a memory location.
+ * If operand is a register, return as many bytes as the operand
+ * size. If operand is memory, return only the two least
+ * siginificant bytes of CR0.
+ */
+ if (X86_MODRM_MOD(insn->modrm.value) == 3)
+ *data_size = insn->opnd_bytes;
+ else
+ *data_size = 2;
+
+ memcpy(data, &dummy_value, *data_size);
+ /* STR and SLDT are not emulated */
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR
+ * @addr: Address that caused the signal
+ * @regs: Register set containing the instruction pointer
+ *
+ * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is
+ * intended to be used to provide a segmentation fault when the result of the
+ * UMIP emulation could not be copied to the user space memory.
+ *
+ * Returns: none
+ */
+static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
+{
+ siginfo_t info;
+ struct task_struct *tsk = current;
+
+ tsk->thread.cr2 = (unsigned long)addr;
+ tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE;
+ tsk->thread.trap_nr = X86_TRAP_PF;
+
+ info.si_signo = SIGSEGV;
+ info.si_errno = 0;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = addr;
+ force_sig_info(SIGSEGV, &info, tsk);
+
+ if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
+ return;
+
+ pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n",
+ tsk->comm, task_pid_nr(tsk), regs->ip,
+ regs->sp, X86_PF_USER | X86_PF_WRITE,
+ regs->ip);
+}
+
+/**
+ * fixup_umip_exception() - Fixup a general protection fault caused by UMIP
+ * @regs: Registers as saved when entering the #GP handler
+ *
+ * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
+ * fault if executed with CPL > 0 (i.e., from user space). If the offending
+ * user-space process is not in long mode, this function fixes the exception
+ * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not
+ * fixed up. Also long mode user-space processes are not fixed up.
+ *
+ * If operands are memory addresses, results are copied to user-space memory as
+ * indicated by the instruction pointed by eIP using the registers indicated in
+ * the instruction operands. If operands are registers, results are copied into
+ * the context that was saved when entering kernel mode.
+ *
+ * Returns:
+ *
+ * True if emulation was successful; false if not.
+ */
+bool fixup_umip_exception(struct pt_regs *regs)
+{
+ int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst;
+ unsigned long seg_base = 0, *reg_addr;
+ /* 10 bytes is the maximum size of the result of UMIP instructions */
+ unsigned char dummy_data[10] = { 0 };
+ unsigned char buf[MAX_INSN_SIZE];
+ void __user *uaddr;
+ struct insn insn;
+ char seg_defs;
+
+ if (!regs)
+ return false;
+
+ /* Do not emulate 64-bit processes. */
+ if (user_64bit_mode(regs))
+ return false;
+
+ /*
+ * If not in user-space long mode, a custom code segment could be in
+ * use. This is true in protected mode (if the process defined a local
+ * descriptor table), or virtual-8086 mode. In most of the cases
+ * seg_base will be zero as in USER_CS.
+ */
+ if (!user_64bit_mode(regs))
+ seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
+
+ if (seg_base == -1L)
+ return false;
+
+ not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
+ sizeof(buf));
+ nr_copied = sizeof(buf) - not_copied;
+
+ /*
+ * The copy_from_user above could have failed if user code is protected
+ * by a memory protection key. Give up on emulation in such a case.
+ * Should we issue a page fault?
+ */
+ if (!nr_copied)
+ return false;
+
+ insn_init(&insn, buf, nr_copied, user_64bit_mode(regs));
+
+ /*
+ * Override the default operand and address sizes with what is specified
+ * in the code segment descriptor. The instruction decoder only sets
+ * the address size it to either 4 or 8 address bytes and does nothing
+ * for the operand bytes. This OK for most of the cases, but we could
+ * have special cases where, for instance, a 16-bit code segment
+ * descriptor is used.
+ * If there is an address override prefix, the instruction decoder
+ * correctly updates these values, even for 16-bit defaults.
+ */
+ seg_defs = insn_get_code_seg_params(regs);
+ if (seg_defs == -EINVAL)
+ return false;
+
+ insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
+ insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
+
+ insn_get_length(&insn);
+ if (nr_copied < insn.length)
+ return false;
+
+ umip_inst = identify_insn(&insn);
+ if (umip_inst < 0)
+ return false;
+
+ if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
+ return false;
+
+ /*
+ * If operand is a register, write result to the copy of the register
+ * value that was pushed to the stack when entering into kernel mode.
+ * Upon exit, the value we write will be restored to the actual hardware
+ * register.
+ */
+ if (X86_MODRM_MOD(insn.modrm.value) == 3) {
+ reg_offset = insn_get_modrm_rm_off(&insn, regs);
+
+ /*
+ * Negative values are usually errors. In memory addressing,
+ * the exception is -EDOM. Since we expect a register operand,
+ * all negative values are errors.
+ */
+ if (reg_offset < 0)
+ return false;
+
+ reg_addr = (unsigned long *)((unsigned long)regs + reg_offset);
+ memcpy(reg_addr, dummy_data, dummy_data_size);
+ } else {
+ uaddr = insn_get_addr_ref(&insn, regs);
+ if ((unsigned long)uaddr == -1L)
+ return false;
+
+ nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
+ if (nr_copied > 0) {
+ /*
+ * If copy fails, send a signal and tell caller that
+ * fault was fixed up.
+ */
+ force_sig_info_umip_fault(uaddr, regs);
+ return true;
+ }
+ }
+
+ /* increase IP to let the program keep going */
+ regs->ip += insn.length;
+ return true;
+}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index d145a0b..3dc26f9 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -44,7 +44,8 @@ static void unwind_dump(struct unwind_state *state)
state->stack_info.type, state->stack_info.next_sp,
state->stack_mask, state->graph_idx);
- for (sp = state->orig_sp; sp; sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+ for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp;
+ sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
break;
@@ -174,6 +175,7 @@ static bool is_last_task_frame(struct unwind_state *state)
* This determines if the frame pointer actually contains an encoded pointer to
* pt_regs on the stack. See ENCODE_FRAME_POINTER.
*/
+#ifdef CONFIG_X86_64
static struct pt_regs *decode_frame_pointer(unsigned long *bp)
{
unsigned long regs = (unsigned long)bp;
@@ -183,6 +185,23 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
return (struct pt_regs *)(regs & ~0x1);
}
+#else
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+ unsigned long regs = (unsigned long)bp;
+
+ if (regs & 0x80000000)
+ return NULL;
+
+ return (struct pt_regs *)(regs | 0x80000000);
+}
+#endif
+
+#ifdef CONFIG_X86_32
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
+#else
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
+#endif
static bool update_stack_state(struct unwind_state *state,
unsigned long *next_bp)
@@ -202,7 +221,7 @@ static bool update_stack_state(struct unwind_state *state,
regs = decode_frame_pointer(next_bp);
if (regs) {
frame = (unsigned long *)regs;
- len = regs_size(regs);
+ len = KERNEL_REGS_SIZE;
state->got_irq = true;
} else {
frame = next_bp;
@@ -226,6 +245,14 @@ static bool update_stack_state(struct unwind_state *state,
frame < prev_frame_end)
return false;
+ /*
+ * On 32-bit with user mode regs, make sure the last two regs are safe
+ * to access:
+ */
+ if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
+ !on_stack(info, frame, len + 2*sizeof(long)))
+ return false;
+
/* Move state to the next frame: */
if (regs) {
state->regs = regs;
@@ -328,6 +355,13 @@ bad_address:
state->regs->sp < (unsigned long)task_pt_regs(state->task))
goto the_end;
+ /*
+ * There are some known frame pointer issues on 32-bit. Disable
+ * unwinder warnings on 32-bit until it gets objtool support.
+ */
+ if (IS_ENABLED(CONFIG_X86_32))
+ goto the_end;
+
if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 570b70d..a3f973b 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -86,8 +86,8 @@ static struct orc_entry *orc_find(unsigned long ip)
idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
if (unlikely((idx >= lookup_num_blocks-1))) {
- orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
- idx, lookup_num_blocks, ip);
+ orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
+ idx, lookup_num_blocks, (void *)ip);
return NULL;
}
@@ -96,8 +96,8 @@ static struct orc_entry *orc_find(unsigned long ip)
if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
(__start_orc_unwind + stop > __stop_orc_unwind))) {
- orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
- idx, lookup_num_blocks, start, stop, ip);
+ orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
+ idx, lookup_num_blocks, start, stop, (void *)ip);
return NULL;
}
@@ -279,7 +279,7 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
if (!stack_access_ok(state, addr, sizeof(long)))
return false;
- *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr);
+ *val = READ_ONCE_NOCHECK(*(unsigned long *)addr);
return true;
}
@@ -373,7 +373,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_R10:
if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R10 at ip %p\n",
+ orc_warn("missing regs for base reg R10 at ip %pB\n",
(void *)state->ip);
goto done;
}
@@ -382,7 +382,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_R13:
if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg R13 at ip %p\n",
+ orc_warn("missing regs for base reg R13 at ip %pB\n",
(void *)state->ip);
goto done;
}
@@ -391,7 +391,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_DI:
if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DI at ip %p\n",
+ orc_warn("missing regs for base reg DI at ip %pB\n",
(void *)state->ip);
goto done;
}
@@ -400,7 +400,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_REG_DX:
if (!state->regs || !state->full_regs) {
- orc_warn("missing regs for base reg DX at ip %p\n",
+ orc_warn("missing regs for base reg DX at ip %pB\n",
(void *)state->ip);
goto done;
}
@@ -408,7 +408,7 @@ bool unwind_next_frame(struct unwind_state *state)
break;
default:
- orc_warn("unknown SP base reg %d for ip %p\n",
+ orc_warn("unknown SP base reg %d for ip %pB\n",
orc->sp_reg, (void *)state->ip);
goto done;
}
@@ -436,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_TYPE_REGS:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
- orc_warn("can't dereference registers at %p for ip %p\n",
+ orc_warn("can't dereference registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
}
@@ -448,7 +448,7 @@ bool unwind_next_frame(struct unwind_state *state)
case ORC_TYPE_REGS_IRET:
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
- orc_warn("can't dereference iret registers at %p for ip %p\n",
+ orc_warn("can't dereference iret registers at %p for ip %pB\n",
(void *)sp, (void *)orig_ip);
goto done;
}
@@ -465,7 +465,8 @@ bool unwind_next_frame(struct unwind_state *state)
break;
default:
- orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
+ orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+ orc->type, (void *)orig_ip);
break;
}
@@ -487,7 +488,7 @@ bool unwind_next_frame(struct unwind_state *state)
break;
default:
- orc_warn("unknown BP base reg %d for ip %p\n",
+ orc_warn("unknown BP base reg %d for ip %pB\n",
orc->bp_reg, (void *)orig_ip);
goto done;
}
@@ -496,7 +497,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (state->stack_info.type == prev_type &&
on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
state->sp <= prev_sp) {
- orc_warn("stack going in the wrong direction? ip=%p\n",
+ orc_warn("stack going in the wrong direction? ip=%pB\n",
(void *)orig_ip);
goto done;
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 495c776..a3755d2 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -271,12 +271,15 @@ static bool is_prefix_bad(struct insn *insn)
int i;
for (i = 0; i < insn->prefixes.nbytes; i++) {
- switch (insn->prefixes.bytes[i]) {
- case 0x26: /* INAT_PFX_ES */
- case 0x2E: /* INAT_PFX_CS */
- case 0x36: /* INAT_PFX_DS */
- case 0x3E: /* INAT_PFX_SS */
- case 0xF0: /* INAT_PFX_LOCK */
+ insn_attr_t attr;
+
+ attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+ switch (attr) {
+ case INAT_MAKE_PREFIX(INAT_PFX_ES):
+ case INAT_MAKE_PREFIX(INAT_PFX_CS):
+ case INAT_MAKE_PREFIX(INAT_PFX_DS):
+ case INAT_MAKE_PREFIX(INAT_PFX_SS):
+ case INAT_MAKE_PREFIX(INAT_PFX_LOCK):
return true;
}
}
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 014ea59..3d3c2f7 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -33,7 +33,7 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
-verify_cpu:
+ENTRY(verify_cpu)
pushf # Save caller passed flags
push $0 # Kill any dangerous flags
popf
@@ -139,3 +139,4 @@ verify_cpu:
popf # Restore caller passed flags
xorl %eax, %eax
ret
+ENDPROC(verify_cpu)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 7924a53..5edb27f 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1994 Linus Torvalds
*
@@ -54,6 +55,7 @@
#include <asm/irq.h>
#include <asm/traps.h>
#include <asm/vm86.h>
+#include <asm/switch_to.h>
/*
* Known problems:
@@ -93,7 +95,6 @@
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
{
- struct tss_struct *tss;
struct task_struct *tsk = current;
struct vm86plus_struct __user *user;
struct vm86 *vm86 = current->thread.vm86;
@@ -145,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
do_exit(SIGSEGV);
}
- tss = &per_cpu(cpu_tss, get_cpu());
+ preempt_disable();
tsk->thread.sp0 = vm86->saved_sp0;
tsk->thread.sysenter_cs = __KERNEL_CS;
- load_sp0(tss, &tsk->thread);
+ update_sp0(tsk);
+ refresh_sysenter_cs(&tsk->thread);
vm86->saved_sp0 = 0;
- put_cpu();
+ preempt_enable();
memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
@@ -236,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
{
- struct tss_struct *tss;
struct task_struct *tsk = current;
struct vm86 *vm86 = tsk->thread.vm86;
struct kernel_vm86_regs vm86regs;
@@ -364,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
vm86->saved_sp0 = tsk->thread.sp0;
lazy_save_gs(vm86->regs32.gs);
- tss = &per_cpu(cpu_tss, get_cpu());
/* make room for real-mode segments */
+ preempt_disable();
tsk->thread.sp0 += 16;
- if (static_cpu_has(X86_FEATURE_SEP))
+ if (static_cpu_has(X86_FEATURE_SEP)) {
tsk->thread.sysenter_cs = 0;
+ refresh_sysenter_cs(&tsk->thread);
+ }
- load_sp0(tss, &tsk->thread);
- put_cpu();
+ update_sp0(tsk);
+ preempt_enable();
if (vm86->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f05f00a..a4009fb 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* ld script for the x86 kernel
*
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a088b2c..c8fa4cd 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -28,6 +28,8 @@ void x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
int __init iommu_init_noop(void) { return 0; }
void iommu_shutdown_noop(void) { }
+bool __init bool_x86_init_noop(void) { return false; }
+void x86_op_int_noop(int cpu) { }
/*
* The platform setup functions are preset with the default functions
@@ -81,6 +83,13 @@ struct x86_init_ops x86_init __initdata = {
.init_irq = x86_default_pci_init_irq,
.fixup_irqs = x86_default_pci_fixup_irqs,
},
+
+ .hyper = {
+ .init_platform = x86_init_noop,
+ .guest_late_init = x86_init_noop,
+ .x2apic_available = bool_x86_init_noop,
+ .init_mem_mapping = x86_init_noop,
+ },
};
struct x86_cpuinit_ops x86_cpuinit = {
@@ -101,6 +110,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
.get_nmi_reason = default_get_nmi_reason,
.save_sched_clock_state = tsc_save_sched_clock_state,
.restore_sched_clock_state = tsc_restore_sched_clock_state,
+ .hyper.pin_vcpu = x86_op_int_noop,
};
EXPORT_SYMBOL_GPL(x86_platform);
OpenPOWER on IntegriCloud