summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-03-01 09:02:26 +0100
committerIngo Molnar <mingo@kernel.org>2017-03-01 09:02:26 +0100
commit0871d5a66da5c41151e0896a90298b163e42f2e0 (patch)
tree1ba71fab9016cb28bb9d18ffd62b6b744f2f761c /arch/x86/kernel
parente22af0be2cf654bb225f19750c6b9aab1627dc9e (diff)
parent2d6be4abf514fc26c83d239c7f31da1f95e4a31d (diff)
downloadop-kernel-dev-0871d5a66da5c41151e0896a90298b163e42f2e0.zip
op-kernel-dev-0871d5a66da5c41151e0896a90298b163e42f2e0.tar.gz
Merge branch 'linus' into WIP.x86/boot, to fix up conflicts and to pick up updates
Conflicts: arch/x86/xen/setup.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c14
-rw-r--r--arch/x86/kernel/acpi/cstate.c9
-rw-r--r--arch/x86/kernel/amd_gart_64.c2
-rw-r--r--arch/x86/kernel/apic/apic.c41
-rw-r--r--arch/x86/kernel/apic/io_apic.c6
-rw-r--r--arch/x86/kernel/apic/msi.c2
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c548
-rw-r--r--arch/x86/kernel/apm_32.c6
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c9
-rw-r--r--arch/x86/kernel/cpu/amd.c22
-rw-r--r--arch/x86/kernel/cpu/centaur.c6
-rw-r--r--arch/x86/kernel/cpu/common.c45
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c50
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-genpool.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c88
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c13
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c36
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c4
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c500
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c101
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c9
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c50
-rw-r--r--arch/x86/kernel/cpu/transmeta.c3
-rw-r--r--arch/x86/kernel/fpu/core.c7
-rw-r--r--arch/x86/kernel/fpu/init.c29
-rw-r--r--arch/x86/kernel/fpu/xstate.c9
-rw-r--r--arch/x86/kernel/hpet.c1
-rw-r--r--arch/x86/kernel/ioport.c5
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/irq_work.c5
-rw-r--r--arch/x86/kernel/itmt.c6
-rw-r--r--arch/x86/kernel/jump_label.c3
-rw-r--r--arch/x86/kernel/kprobes/core.c2
-rw-r--r--arch/x86/kernel/kvm.c40
-rw-r--r--arch/x86/kernel/kvmclock.c7
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c5
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/pci-calgary_64.c8
-rw-r--r--arch/x86/kernel/pci-dma.c7
-rw-r--r--arch/x86/kernel/pci-nommu.c2
-rw-r--r--arch/x86/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86/kernel/process.c10
-rw-r--r--arch/x86/kernel/setup.c18
-rw-r--r--arch/x86/kernel/smp.c15
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/test_nx.c173
-rw-r--r--arch/x86/kernel/test_rodata.c75
-rw-r--r--arch/x86/kernel/traps.c5
-rw-r--r--arch/x86/kernel/tsc.c16
-rw-r--r--arch/x86/kernel/tsc_sync.c16
-rw-r--r--arch/x86/kernel/vm86_32.c5
-rw-r--r--arch/x86/kernel/vmlinux.lds.S1
59 files changed, 915 insertions, 1162 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 581386c..84c0059 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,8 +100,6 @@ obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_APB_TIMER) += apb_timer.o
obj-$(CONFIG_AMD_NB) += amd_nb.o
-obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
-obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8735527..a468f0f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -35,6 +35,7 @@
#include <linux/bootmem.h>
#include <linux/ioport.h>
#include <linux/pci.h>
+#include <linux/efi-bgrt.h>
#include <asm/e820/api.h>
#include <asm/irqdomain.h>
@@ -724,11 +725,12 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
return 0;
}
-int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id,
+ int *pcpu)
{
int cpu;
- cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED);
+ cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED);
if (cpu < 0) {
pr_info(PREFIX "Unable to map lapic to logical cpu number\n");
return cpu;
@@ -1558,6 +1560,12 @@ int __init early_acpi_boot_init(void)
return 0;
}
+static int __init acpi_parse_bgrt(struct acpi_table_header *table)
+{
+ efi_bgrt_init(table);
+ return 0;
+}
+
int __init acpi_boot_init(void)
{
/* those are executed after early-quirks are executed */
@@ -1582,6 +1590,8 @@ int __init acpi_boot_init(void)
acpi_process_madt();
acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
+ if (IS_ENABLED(CONFIG_ACPI_BGRT))
+ acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);
if (!acpi_noirq)
x86_init.pci.init = pci_acpi_init;
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index af15f44..8233a63 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -12,7 +12,6 @@
#include <linux/sched.h>
#include <acpi/processor.h>
-#include <asm/acpi.h>
#include <asm/mwait.h>
#include <asm/special_insns.h>
@@ -89,7 +88,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
retval = 0;
/* If the HW does not support any sub-states in this C-state */
if (num_cstate_subtype == 0) {
- pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n", cx->address, edx_part);
+ pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n",
+ cx->address, edx_part);
retval = -1;
goto out;
}
@@ -104,8 +104,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
if (!mwait_supported[cstate_type]) {
mwait_supported[cstate_type] = 1;
printk(KERN_DEBUG
- "Monitor-Mwait will be used to enter C-%d "
- "state\n", cx->type);
+ "Monitor-Mwait will be used to enter C-%d state\n",
+ cx->type);
}
snprintf(cx->desc,
ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
@@ -166,6 +166,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);
static int __init ffh_cstate_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
+
if (c->x86_vendor != X86_VENDOR_INTEL)
return -1;
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 63ff468..82dfe32 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -695,7 +695,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
return -1;
}
-static struct dma_map_ops gart_dma_ops = {
+static const struct dma_map_ops gart_dma_ops = {
.map_sg = gart_map_sg,
.unmap_sg = gart_unmap_sg,
.map_page = gart_map_page,
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4cb43df..4eca103 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -529,18 +529,19 @@ static void lapic_timer_broadcast(const struct cpumask *mask)
* The local apic timer can be used for any function which is CPU local.
*/
static struct clock_event_device lapic_clockevent = {
- .name = "lapic",
- .features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
- | CLOCK_EVT_FEAT_DUMMY,
- .shift = 32,
- .set_state_shutdown = lapic_timer_shutdown,
- .set_state_periodic = lapic_timer_set_periodic,
- .set_state_oneshot = lapic_timer_set_oneshot,
- .set_next_event = lapic_next_event,
- .broadcast = lapic_timer_broadcast,
- .rating = 100,
- .irq = -1,
+ .name = "lapic",
+ .features = CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
+ | CLOCK_EVT_FEAT_DUMMY,
+ .shift = 32,
+ .set_state_shutdown = lapic_timer_shutdown,
+ .set_state_periodic = lapic_timer_set_periodic,
+ .set_state_oneshot = lapic_timer_set_oneshot,
+ .set_state_oneshot_stopped = lapic_timer_shutdown,
+ .set_next_event = lapic_next_event,
+ .broadcast = lapic_timer_broadcast,
+ .rating = 100,
+ .irq = -1,
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
@@ -1245,7 +1246,7 @@ static void lapic_setup_esr(void)
/**
* setup_local_APIC - setup the local APIC
*
- * Used to setup local APIC while initializing BSP or bringin up APs.
+ * Used to setup local APIC while initializing BSP or bringing up APs.
* Always called with preemption disabled.
*/
void setup_local_APIC(void)
@@ -1864,14 +1865,14 @@ static void __smp_spurious_interrupt(u8 vector)
"should never happen.\n", vector, smp_processor_id());
}
-__visible void smp_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs)
{
entering_irq();
__smp_spurious_interrupt(~regs->orig_ax);
exiting_irq();
}
-__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs)
{
u8 vector = ~regs->orig_ax;
@@ -1922,14 +1923,14 @@ static void __smp_error_interrupt(struct pt_regs *regs)
}
-__visible void smp_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs)
{
entering_irq();
__smp_error_interrupt(regs);
exiting_irq();
}
-__visible void smp_trace_error_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_error_apic_entry(ERROR_APIC_VECTOR);
@@ -2028,8 +2029,8 @@ void disconnect_bsp_APIC(int virt_wire_setup)
/*
* The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
* contiguously, it equals to current allocated max logical CPU ID plus 1.
- * All allocated CPU ID should be in [0, nr_logical_cpuidi), so the maximum of
- * nr_logical_cpuids is nr_cpu_ids.
+ * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range,
+ * so the maximum of nr_logical_cpuids is nr_cpu_ids.
*
* NOTE: Reserve 0 for BSP.
*/
@@ -2094,7 +2095,7 @@ int __generic_processor_info(int apicid, int version, bool enabled)
* Since fixing handling of boot_cpu_physical_apicid requires
* another discussion and tests on each platform, we leave it
* for now and here we use read_apic_id() directly in this
- * function, generic_processor_info().
+ * function, __generic_processor_info().
*/
if (disabled_cpu_apicid != BAD_APICID &&
disabled_cpu_apicid != read_apic_id() &&
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1e35dd0..347bb9f 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1107,12 +1107,12 @@ int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info)
ioapic = mp_find_ioapic(gsi);
if (ioapic < 0)
- return -1;
+ return -ENODEV;
pin = mp_find_ioapic_pin(ioapic, gsi);
idx = find_irq_entry(ioapic, pin, mp_INT);
if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
- return -1;
+ return -ENODEV;
return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info);
}
@@ -2117,6 +2117,7 @@ static inline void __init check_timer(void)
if (idx != -1 && irq_trigger(idx))
unmask_ioapic_irq(irq_get_chip_data(0));
}
+ irq_domain_deactivate_irq(irq_data);
irq_domain_activate_irq(irq_data);
if (timer_irq_works()) {
if (disable_timer_pin_1 > 0)
@@ -2138,6 +2139,7 @@ static inline void __init check_timer(void)
* legacy devices should be connected to IO APIC #0
*/
replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
+ irq_domain_deactivate_irq(irq_data);
irq_domain_activate_irq(irq_data);
legacy_pic->unmask(0);
if (timer_irq_works()) {
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 015bbf3..c61aec7 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -82,7 +82,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
if (domain == NULL)
return -ENOSYS;
- return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
+ return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
}
void native_teardown_msi_irq(unsigned int irq)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 5d30c5e..f3557a1 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -559,7 +559,7 @@ void send_cleanup_vector(struct irq_cfg *cfg)
__send_cleanup_vector(data);
}
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
{
unsigned vector, me;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c99d933..7f179f1 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -42,40 +42,44 @@
DEFINE_PER_CPU(int, x2apic_extra_bits);
-#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args)
-
-static enum uv_system_type uv_system_type;
-static u64 gru_start_paddr, gru_end_paddr;
-static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
-static u64 gru_dist_lmask, gru_dist_umask;
-static union uvh_apicid uvh_apicid;
-
-/* info derived from CPUID */
+static enum uv_system_type uv_system_type;
+static bool uv_hubless_system;
+static u64 gru_start_paddr, gru_end_paddr;
+static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
+static u64 gru_dist_lmask, gru_dist_umask;
+static union uvh_apicid uvh_apicid;
+
+/* Information derived from CPUID: */
static struct {
unsigned int apicid_shift;
unsigned int apicid_mask;
unsigned int socketid_shift; /* aka pnode_shift for UV1/2/3 */
unsigned int pnode_mask;
unsigned int gpa_shift;
+ unsigned int gnode_shift;
} uv_cpuid;
int uv_min_hub_revision_id;
EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+
unsigned int uv_apicid_hibits;
EXPORT_SYMBOL_GPL(uv_apicid_hibits);
static struct apic apic_x2apic_uv_x;
static struct uv_hub_info_s uv_hub_info_node0;
-/* Set this to use hardware error handler instead of kernel panic */
+/* Set this to use hardware error handler instead of kernel panic: */
static int disable_uv_undefined_panic = 1;
+
unsigned long uv_undefined(char *str)
{
if (likely(!disable_uv_undefined_panic))
panic("UV: error: undefined MMR: %s\n", str);
else
pr_crit("UV: error: undefined MMR: %s\n", str);
- return ~0ul; /* cause a machine fault */
+
+ /* Cause a machine fault: */
+ return ~0ul;
}
EXPORT_SYMBOL(uv_undefined);
@@ -86,18 +90,19 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
val = *mmr;
early_iounmap(mmr, sizeof(*mmr));
+
return val;
}
static inline bool is_GRU_range(u64 start, u64 end)
{
if (gru_dist_base) {
- u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */
- u64 sl = start & gru_dist_lmask; /* base offset bits */
+ u64 su = start & gru_dist_umask; /* Upper (incl pnode) bits */
+ u64 sl = start & gru_dist_lmask; /* Base offset bits */
u64 eu = end & gru_dist_umask;
u64 el = end & gru_dist_lmask;
- /* Must reside completely within a single GRU range */
+ /* Must reside completely within a single GRU range: */
return (sl == gru_dist_base && el == gru_dist_base &&
su >= gru_first_node_paddr &&
su <= gru_last_node_paddr &&
@@ -134,13 +139,14 @@ static int __init early_get_pnodeid(void)
break;
case UV4_HUB_PART_NUMBER:
uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1;
+ uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */
break;
}
uv_hub_info->hub_revision = uv_min_hub_revision_id;
uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1;
pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask;
- uv_cpuid.gpa_shift = 46; /* default unless changed */
+ uv_cpuid.gpa_shift = 46; /* Default unless changed */
pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n",
node_id.s.revision, node_id.s.part_number, node_id.s.node_id,
@@ -148,11 +154,12 @@ static int __init early_get_pnodeid(void)
return pnode;
}
-/* [copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
-#define SMT_LEVEL 0 /* leaf 0xb SMT level */
-#define INVALID_TYPE 0 /* leaf 0xb sub-leaf types */
-#define SMT_TYPE 1
-#define CORE_TYPE 2
+/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
+
+#define SMT_LEVEL 0 /* Leaf 0xb SMT level */
+#define INVALID_TYPE 0 /* Leaf 0xb sub-leaf types */
+#define SMT_TYPE 1
+#define CORE_TYPE 2
#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
@@ -166,11 +173,13 @@ static void set_x2apic_bits(void)
pr_info("UV: CPU does not have CPUID.11\n");
return;
}
+
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) {
pr_info("UV: CPUID.11 not implemented\n");
return;
}
+
sid_shift = BITS_SHIFT_NEXT_LEVEL(eax);
sub_index = 1;
do {
@@ -181,8 +190,9 @@ static void set_x2apic_bits(void)
}
sub_index++;
} while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
- uv_cpuid.apicid_shift = 0;
- uv_cpuid.apicid_mask = (~(-1 << sid_shift));
+
+ uv_cpuid.apicid_shift = 0;
+ uv_cpuid.apicid_mask = (~(-1 << sid_shift));
uv_cpuid.socketid_shift = sid_shift;
}
@@ -193,10 +203,8 @@ static void __init early_get_apic_socketid_shift(void)
set_x2apic_bits();
- pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n",
- uv_cpuid.apicid_shift, uv_cpuid.apicid_mask);
- pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n",
- uv_cpuid.socketid_shift, uv_cpuid.pnode_mask);
+ pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", uv_cpuid.apicid_shift, uv_cpuid.apicid_mask);
+ pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", uv_cpuid.socketid_shift, uv_cpuid.pnode_mask);
}
/*
@@ -209,10 +217,8 @@ static void __init uv_set_apicid_hibit(void)
union uv1h_lb_target_physical_apic_id_mask_u apicid_mask;
if (is_uv1_hub()) {
- apicid_mask.v =
- uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK);
- uv_apicid_hibits =
- apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK;
+ apicid_mask.v = uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK);
+ uv_apicid_hibits = apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK;
}
}
@@ -221,20 +227,26 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
int pnodeid;
int uv_apic;
- if (strncmp(oem_id, "SGI", 3) != 0)
+ if (strncmp(oem_id, "SGI", 3) != 0) {
+ if (strncmp(oem_id, "NSGI", 4) == 0) {
+ uv_hubless_system = true;
+ pr_info("UV: OEM IDs %s/%s, HUBLESS\n",
+ oem_id, oem_table_id);
+ }
return 0;
+ }
if (numa_off) {
pr_err("UV: NUMA is off, disabling UV support\n");
return 0;
}
- /* Setup early hub type field in uv_hub_info for Node 0 */
+ /* Set up early hub type field in uv_hub_info for Node 0 */
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
/*
* Determine UV arch type.
- * SGI: UV100/1000
+ * SGI: UV100/1000
* SGI2: UV2000/3000
* SGI3: UV300 (truncated to 4 chars because of different varieties)
* SGI4: UV400 (truncated to 4 chars because of different varieties)
@@ -250,31 +262,32 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
pnodeid = early_get_pnodeid();
early_get_apic_socketid_shift();
- x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
+
+ x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
x86_platform.nmi_init = uv_nmi_init;
- if (!strcmp(oem_table_id, "UVX")) { /* most common */
+ if (!strcmp(oem_table_id, "UVX")) {
+ /* This is the most common hardware variant: */
uv_system_type = UV_X2APIC;
uv_apic = 0;
- } else if (!strcmp(oem_table_id, "UVH")) { /* only UV1 systems */
+ } else if (!strcmp(oem_table_id, "UVH")) {
+ /* Only UV1 systems: */
uv_system_type = UV_NON_UNIQUE_APIC;
- __this_cpu_write(x2apic_extra_bits,
- pnodeid << uvh_apicid.s.pnode_shift);
+ __this_cpu_write(x2apic_extra_bits, pnodeid << uvh_apicid.s.pnode_shift);
uv_set_apicid_hibit();
uv_apic = 1;
- } else if (!strcmp(oem_table_id, "UVL")) { /* only used for */
- uv_system_type = UV_LEGACY_APIC; /* very small systems */
+ } else if (!strcmp(oem_table_id, "UVL")) {
+ /* Only used for very small systems: */
+ uv_system_type = UV_LEGACY_APIC;
uv_apic = 0;
} else {
goto badbios;
}
- pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n",
- oem_id, oem_table_id, uv_system_type,
- uv_min_hub_revision_id, uv_apic);
+ pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic);
return uv_apic;
@@ -295,6 +308,12 @@ int is_uv_system(void)
}
EXPORT_SYMBOL_GPL(is_uv_system);
+int is_uv_hubless(void)
+{
+ return uv_hubless_system;
+}
+EXPORT_SYMBOL_GPL(is_uv_hubless);
+
void **__uv_hub_info_list;
EXPORT_SYMBOL_GPL(__uv_hub_info_list);
@@ -307,16 +326,18 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
unsigned long sn_rtc_cycles_per_second;
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
-/* the following values are used for the per node hub info struct */
-static __initdata unsigned short *_node_to_pnode;
-static __initdata unsigned short _min_socket, _max_socket;
-static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
-static __initdata struct uv_gam_range_entry *uv_gre_table;
-static __initdata struct uv_gam_parameters *uv_gp_table;
-static __initdata unsigned short *_socket_to_node;
-static __initdata unsigned short *_socket_to_pnode;
-static __initdata unsigned short *_pnode_to_socket;
-static __initdata struct uv_gam_range_s *_gr_table;
+/* The following values are used for the per node hub info struct */
+static __initdata unsigned short *_node_to_pnode;
+static __initdata unsigned short _min_socket, _max_socket;
+static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
+static __initdata struct uv_gam_range_entry *uv_gre_table;
+static __initdata struct uv_gam_parameters *uv_gp_table;
+static __initdata unsigned short *_socket_to_node;
+static __initdata unsigned short *_socket_to_pnode;
+static __initdata unsigned short *_pnode_to_socket;
+
+static __initdata struct uv_gam_range_s *_gr_table;
+
#define SOCK_EMPTY ((unsigned short)~0)
extern int uv_hub_info_version(void)
@@ -325,7 +346,7 @@ extern int uv_hub_info_version(void)
}
EXPORT_SYMBOL(uv_hub_info_version);
-/* Build GAM range lookup table */
+/* Build GAM range lookup table: */
static __init void build_uv_gr_table(void)
{
struct uv_gam_range_entry *gre = uv_gre_table;
@@ -343,25 +364,24 @@ static __init void build_uv_gr_table(void)
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE) {
- if (!ram_limit) { /* mark hole between ram/non-ram */
+ if (!ram_limit) {
+ /* Mark hole between RAM/non-RAM: */
ram_limit = last_limit;
last_limit = gre->limit;
lsid++;
continue;
}
last_limit = gre->limit;
- pr_info("UV: extra hole in GAM RE table @%d\n",
- (int)(gre - uv_gre_table));
+ pr_info("UV: extra hole in GAM RE table @%d\n", (int)(gre - uv_gre_table));
continue;
}
if (_max_socket < gre->sockid) {
- pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n",
- gre->sockid, _max_socket,
- (int)(gre - uv_gre_table));
+ pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n", gre->sockid, _max_socket, (int)(gre - uv_gre_table));
continue;
}
sid = gre->sockid - _min_socket;
- if (lsid < sid) { /* new range */
+ if (lsid < sid) {
+ /* New range: */
grt = &_gr_table[indx];
grt->base = lindx;
grt->nasid = gre->nasid;
@@ -370,27 +390,32 @@ static __init void build_uv_gr_table(void)
lindx = indx++;
continue;
}
- if (lsid == sid && !ram_limit) { /* update range */
- if (grt->limit == last_limit) { /* .. if contiguous */
+ /* Update range: */
+ if (lsid == sid && !ram_limit) {
+ /* .. if contiguous: */
+ if (grt->limit == last_limit) {
grt->limit = last_limit = gre->limit;
continue;
}
}
- if (!ram_limit) { /* non-contiguous ram range */
+ /* Non-contiguous RAM range: */
+ if (!ram_limit) {
grt++;
grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
continue;
}
- grt++; /* non-contiguous/non-ram */
- grt->base = grt - _gr_table; /* base is this entry */
+ /* Non-contiguous/non-RAM: */
+ grt++;
+ /* base is this entry */
+ grt->base = grt - _gr_table;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
lsid++;
}
- /* shorten table if possible */
+ /* Shorten table if possible */
grt++;
i = grt - _gr_table;
if (i < _gr_table_len) {
@@ -404,16 +429,15 @@ static __init void build_uv_gr_table(void)
}
}
- /* display resultant gam range table */
+ /* Display resultant GAM range table: */
for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) {
+ unsigned long start, end;
int gb = grt->base;
- unsigned long start = gb < 0 ? 0 :
- (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT;
- unsigned long end =
- (unsigned long)grt->limit << UV_GAM_RANGE_SHFT;
- pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n",
- i, grt->nasid, start, end, gb);
+ start = gb < 0 ? 0 : (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT;
+ end = (unsigned long)grt->limit << UV_GAM_RANGE_SHFT;
+
+ pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n", i, grt->nasid, start, end, gb);
}
}
@@ -424,16 +448,19 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
pnode = uv_apicid_to_pnode(phys_apicid);
phys_apicid |= uv_apicid_hibits;
+
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_INIT;
+
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_STARTUP;
+
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
return 0;
@@ -567,7 +594,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
.apic_id_registered = uv_apic_id_registered,
.irq_delivery_mode = dest_Fixed,
- .irq_dest_mode = 0, /* physical */
+ .irq_dest_mode = 0, /* Physical */
.target_cpus = online_target_cpus,
.disable_esr = 0,
@@ -628,23 +655,22 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
switch (i) {
case 0:
m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR;
- m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR;
+ m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR;
break;
case 1:
m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR;
- m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR;
+ m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR;
break;
case 2:
m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR;
- m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR;
+ m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR;
break;
}
alias.v = uv_read_local_mmr(m_overlay);
if (alias.s.enable && alias.s.base == 0) {
*size = (1UL << alias.s.m_alias);
redirect.v = uv_read_local_mmr(m_redirect);
- *base = (unsigned long)redirect.s.dest_base
- << DEST_SHIFT;
+ *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
return;
}
}
@@ -653,8 +679,7 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
enum map_type {map_wb, map_uc};
-static __init void map_high(char *id, unsigned long base, int pshift,
- int bshift, int max_pnode, enum map_type map_type)
+static __init void map_high(char *id, unsigned long base, int pshift, int bshift, int max_pnode, enum map_type map_type)
{
unsigned long bytes, paddr;
@@ -679,16 +704,19 @@ static __init void map_gru_distributed(unsigned long c)
int nid;
gru.v = c;
- /* only base bits 42:28 relevant in dist mode */
+
+ /* Only base bits 42:28 relevant in dist mode */
gru_dist_base = gru.v & 0x000007fff0000000UL;
if (!gru_dist_base) {
pr_info("UV: Map GRU_DIST base address NULL\n");
return;
}
+
bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
+
for_each_online_node(nid) {
paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
gru_dist_base;
@@ -696,11 +724,12 @@ static __init void map_gru_distributed(unsigned long c)
gru_first_node_paddr = min(paddr, gru_first_node_paddr);
gru_last_node_paddr = max(paddr, gru_last_node_paddr);
}
+
/* Save upper (63:M) bits of address only for is_GRU_range */
gru_first_node_paddr &= gru_dist_umask;
gru_last_node_paddr &= gru_dist_umask;
- pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n",
- gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
+
+ pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n", gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
}
static __init void map_gru_high(int max_pnode)
@@ -720,6 +749,7 @@ static __init void map_gru_high(int max_pnode)
map_gru_distributed(gru.v);
return;
}
+
base = (gru.v & mask) >> shift;
map_high("GRU", base, shift, shift, max_pnode, map_wb);
gru_start_paddr = ((u64)base << shift);
@@ -773,8 +803,8 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
id = mmiohs[index].id;
overlay.v = uv_read_local_mmr(mmiohs[index].overlay);
- pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n",
- id, overlay.v, overlay.s3.base, overlay.s3.m_io);
+
+ pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", id, overlay.v, overlay.s3.base, overlay.s3.m_io);
if (!overlay.s3.enable) {
pr_info("UV: %s disabled\n", id);
return;
@@ -785,7 +815,8 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
m_io = overlay.s3.m_io;
mmr = mmiohs[index].redirect;
n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
- min_pnode *= 2; /* convert to NASID */
+ /* Convert to NASID: */
+ min_pnode *= 2;
max_pnode *= 2;
max_io = lnasid = fi = li = -1;
@@ -794,16 +825,18 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
redirect.v = uv_read_local_mmr(mmr + i * 8);
nasid = redirect.s3.nasid;
+ /* Invalid NASID: */
if (nasid < min_pnode || max_pnode < nasid)
- nasid = -1; /* invalid NASID */
+ nasid = -1;
if (nasid == lnasid) {
li = i;
- if (i != n-1) /* last entry check */
+ /* Last entry check: */
+ if (i != n-1)
continue;
}
- /* check if we have a cached (or last) redirect to print */
+ /* Check if we have a cached (or last) redirect to print: */
if (lnasid != -1 || (i == n-1 && nasid != -1)) {
unsigned long addr1, addr2;
int f, l;
@@ -815,12 +848,9 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
f = fi;
l = li;
}
- addr1 = (base << shift) +
- f * (1ULL << m_io);
- addr2 = (base << shift) +
- (l + 1) * (1ULL << m_io);
- pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
- id, fi, li, lnasid, addr1, addr2);
+ addr1 = (base << shift) + f * (1ULL << m_io);
+ addr2 = (base << shift) + (l + 1) * (1ULL << m_io);
+ pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", id, fi, li, lnasid, addr1, addr2);
if (max_io < l)
max_io = l;
}
@@ -828,8 +858,7 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
lnasid = nasid;
}
- pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n",
- id, base, shift, m_io, max_io);
+ pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n", id, base, shift, m_io, max_io);
if (max_io >= 0)
map_high(id, base, shift, m_io, max_io, map_uc);
@@ -842,36 +871,35 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode)
int shift, enable, m_io, n_io;
if (is_uv3_hub() || is_uv4_hub()) {
- /* Map both MMIOH Regions */
+ /* Map both MMIOH regions: */
map_mmioh_high_uv3(0, min_pnode, max_pnode);
map_mmioh_high_uv3(1, min_pnode, max_pnode);
return;
}
if (is_uv1_hub()) {
- mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
- shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
- mmioh.v = uv_read_local_mmr(mmr);
- enable = !!mmioh.s1.enable;
- base = mmioh.s1.base;
- m_io = mmioh.s1.m_io;
- n_io = mmioh.s1.n_io;
+ mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
+ shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ mmioh.v = uv_read_local_mmr(mmr);
+ enable = !!mmioh.s1.enable;
+ base = mmioh.s1.base;
+ m_io = mmioh.s1.m_io;
+ n_io = mmioh.s1.n_io;
} else if (is_uv2_hub()) {
- mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
- shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
- mmioh.v = uv_read_local_mmr(mmr);
- enable = !!mmioh.s2.enable;
- base = mmioh.s2.base;
- m_io = mmioh.s2.m_io;
- n_io = mmioh.s2.n_io;
- } else
+ mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
+ shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ mmioh.v = uv_read_local_mmr(mmr);
+ enable = !!mmioh.s2.enable;
+ base = mmioh.s2.base;
+ m_io = mmioh.s2.m_io;
+ n_io = mmioh.s2.n_io;
+ } else {
return;
+ }
if (enable) {
max_pnode &= (1 << n_io) - 1;
- pr_info(
- "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
- base, shift, m_io, n_io, max_pnode);
+ pr_info("UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n", base, shift, m_io, n_io, max_pnode);
map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
} else {
pr_info("UV: MMIOH disabled\n");
@@ -889,16 +917,16 @@ static __init void uv_rtc_init(void)
long status;
u64 ticks_per_sec;
- status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
- &ticks_per_sec);
+ status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec);
+
if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
- printk(KERN_WARNING
- "unable to determine platform RTC clock frequency, "
- "guessing.\n");
- /* BIOS gives wrong value for clock freq. so guess */
+ pr_warn("UV: unable to determine platform RTC clock frequency, guessing.\n");
+
+ /* BIOS gives wrong value for clock frequency, so guess: */
sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
- } else
+ } else {
sn_rtc_cycles_per_second = ticks_per_sec;
+ }
}
/*
@@ -909,19 +937,19 @@ static void uv_heartbeat(unsigned long ignored)
struct timer_list *timer = &uv_scir_info->timer;
unsigned char bits = uv_scir_info->state;
- /* flip heartbeat bit */
+ /* Flip heartbeat bit: */
bits ^= SCIR_CPU_HEARTBEAT;
- /* is this cpu idle? */
+ /* Is this CPU idle? */
if (idle_cpu(raw_smp_processor_id()))
bits &= ~SCIR_CPU_ACTIVITY;
else
bits |= SCIR_CPU_ACTIVITY;
- /* update system controller interface reg */
+ /* Update system controller interface reg: */
uv_set_scir_bits(bits);
- /* enable next timer period */
+ /* Enable next timer period: */
mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
}
@@ -936,7 +964,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
add_timer_on(timer, cpu);
uv_cpu_scir_info(cpu)->enabled = 1;
- /* also ensure that boot cpu is enabled */
+ /* Also ensure that boot CPU is enabled: */
cpu = 0;
}
return 0;
@@ -969,9 +997,11 @@ static __init int uv_init_heartbeat(void)
{
int cpu;
- if (is_uv_system())
+ if (is_uv_system()) {
for_each_online_cpu(cpu)
uv_heartbeat_enable(cpu);
+ }
+
return 0;
}
@@ -980,14 +1010,10 @@ late_initcall(uv_init_heartbeat);
#endif /* !CONFIG_HOTPLUG_CPU */
/* Direct Legacy VGA I/O traffic to designated IOH */
-int uv_set_vga_state(struct pci_dev *pdev, bool decode,
- unsigned int command_bits, u32 flags)
+int uv_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags)
{
int domain, bus, rc;
- PR_DEVEL("devfn %x decode %d cmd %x flags %d\n",
- pdev->devfn, decode, command_bits, flags);
-
if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
return 0;
@@ -998,13 +1024,12 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode,
bus = pdev->bus->number;
rc = uv_bios_set_legacy_vga_target(decode, domain, bus);
- PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc);
return rc;
}
/*
- * Called on each cpu to initialize the per_cpu UV data area.
+ * Called on each CPU to initialize the per_cpu UV data area.
* FIXME: hotplug not supported yet
*/
void uv_cpu_init(void)
@@ -1031,90 +1056,79 @@ static void get_mn(struct mn *mnp)
union uvh_rh_gam_config_mmr_u m_n_config;
union uv3h_gr0_gam_gr_config_u m_gr_config;
- m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR);
- mnp->n_val = m_n_config.s.n_skt;
+ /* Make sure the whole structure is well initialized: */
+ memset(mnp, 0, sizeof(*mnp));
+
+ m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR);
+ mnp->n_val = m_n_config.s.n_skt;
+
if (is_uv4_hub()) {
- mnp->m_val = 0;
- mnp->n_lshift = 0;
+ mnp->m_val = 0;
+ mnp->n_lshift = 0;
} else if (is_uv3_hub()) {
- mnp->m_val = m_n_config.s3.m_skt;
- m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
- mnp->n_lshift = m_gr_config.s3.m_skt;
+ mnp->m_val = m_n_config.s3.m_skt;
+ m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
+ mnp->n_lshift = m_gr_config.s3.m_skt;
} else if (is_uv2_hub()) {
- mnp->m_val = m_n_config.s2.m_skt;
- mnp->n_lshift = mnp->m_val == 40 ? 40 : 39;
+ mnp->m_val = m_n_config.s2.m_skt;
+ mnp->n_lshift = mnp->m_val == 40 ? 40 : 39;
} else if (is_uv1_hub()) {
- mnp->m_val = m_n_config.s1.m_skt;
- mnp->n_lshift = mnp->m_val;
+ mnp->m_val = m_n_config.s1.m_skt;
+ mnp->n_lshift = mnp->m_val;
}
mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0;
}
-void __init uv_init_hub_info(struct uv_hub_info_s *hub_info)
+void __init uv_init_hub_info(struct uv_hub_info_s *hi)
{
- struct mn mn = {0}; /* avoid unitialized warnings */
union uvh_node_id_u node_id;
+ struct mn mn;
get_mn(&mn);
- hub_info->m_val = mn.m_val;
- hub_info->n_val = mn.n_val;
- hub_info->m_shift = mn.m_shift;
- hub_info->n_lshift = mn.n_lshift ? mn.n_lshift : 0;
-
- hub_info->hub_revision = uv_hub_info->hub_revision;
- hub_info->pnode_mask = uv_cpuid.pnode_mask;
- hub_info->min_pnode = _min_pnode;
- hub_info->min_socket = _min_socket;
- hub_info->pnode_to_socket = _pnode_to_socket;
- hub_info->socket_to_node = _socket_to_node;
- hub_info->socket_to_pnode = _socket_to_pnode;
- hub_info->gr_table_len = _gr_table_len;
- hub_info->gr_table = _gr_table;
- hub_info->gpa_mask = mn.m_val ?
+ hi->gpa_mask = mn.m_val ?
(1UL << (mn.m_val + mn.n_val)) - 1 :
(1UL << uv_cpuid.gpa_shift) - 1;
- node_id.v = uv_read_local_mmr(UVH_NODE_ID);
- hub_info->gnode_extra =
- (node_id.s.node_id & ~((1 << mn.n_val) - 1)) >> 1;
-
- hub_info->gnode_upper =
- ((unsigned long)hub_info->gnode_extra << mn.m_val);
+ hi->m_val = mn.m_val;
+ hi->n_val = mn.n_val;
+ hi->m_shift = mn.m_shift;
+ hi->n_lshift = mn.n_lshift ? mn.n_lshift : 0;
+ hi->hub_revision = uv_hub_info->hub_revision;
+ hi->pnode_mask = uv_cpuid.pnode_mask;
+ hi->min_pnode = _min_pnode;
+ hi->min_socket = _min_socket;
+ hi->pnode_to_socket = _pnode_to_socket;
+ hi->socket_to_node = _socket_to_node;
+ hi->socket_to_pnode = _socket_to_pnode;
+ hi->gr_table_len = _gr_table_len;
+ hi->gr_table = _gr_table;
+
+ node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+ uv_cpuid.gnode_shift = max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val);
+ hi->gnode_extra = (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1;
+ hi->gnode_upper = (unsigned long)hi->gnode_extra << mn.m_val;
if (uv_gp_table) {
- hub_info->global_mmr_base = uv_gp_table->mmr_base;
- hub_info->global_mmr_shift = uv_gp_table->mmr_shift;
- hub_info->global_gru_base = uv_gp_table->gru_base;
- hub_info->global_gru_shift = uv_gp_table->gru_shift;
- hub_info->gpa_shift = uv_gp_table->gpa_shift;
- hub_info->gpa_mask = (1UL << hub_info->gpa_shift) - 1;
+ hi->global_mmr_base = uv_gp_table->mmr_base;
+ hi->global_mmr_shift = uv_gp_table->mmr_shift;
+ hi->global_gru_base = uv_gp_table->gru_base;
+ hi->global_gru_shift = uv_gp_table->gru_shift;
+ hi->gpa_shift = uv_gp_table->gpa_shift;
+ hi->gpa_mask = (1UL << hi->gpa_shift) - 1;
} else {
- hub_info->global_mmr_base =
- uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
- ~UV_MMR_ENABLE;
- hub_info->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT;
+ hi->global_mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE;
+ hi->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT;
}
- get_lowmem_redirect(
- &hub_info->lowmem_remap_base, &hub_info->lowmem_remap_top);
-
- hub_info->apic_pnode_shift = uv_cpuid.socketid_shift;
-
- /* show system specific info */
- pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n",
- hub_info->n_val, hub_info->m_val,
- hub_info->m_shift, hub_info->n_lshift);
-
- pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n",
- hub_info->gpa_mask, hub_info->gpa_shift,
- hub_info->pnode_mask, hub_info->apic_pnode_shift);
+ get_lowmem_redirect(&hi->lowmem_remap_base, &hi->lowmem_remap_top);
- pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n",
- hub_info->global_mmr_base, hub_info->global_mmr_shift,
- hub_info->global_gru_base, hub_info->global_gru_shift);
+ hi->apic_pnode_shift = uv_cpuid.socketid_shift;
- pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n",
- hub_info->gnode_upper, hub_info->gnode_extra);
+ /* Show system specific info: */
+ pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", hi->n_val, hi->m_val, hi->m_shift, hi->n_lshift);
+ pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", hi->gpa_mask, hi->gpa_shift, hi->pnode_mask, hi->apic_pnode_shift);
+ pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n", hi->global_mmr_base, hi->global_mmr_shift, hi->global_gru_base, hi->global_gru_shift);
+ pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", hi->gnode_upper, hi->gnode_extra);
}
static void __init decode_gam_params(unsigned long ptr)
@@ -1140,12 +1154,9 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (!index) {
pr_info("UV: GAM Range Table...\n");
- pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
- "Range", "", "Size", "Type", "NASID",
- "SID", "PN");
+ pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
}
- pr_info(
- "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
+ pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
@@ -1163,29 +1174,32 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
if (pnode_max < gre->pnode)
pnode_max = gre->pnode;
}
- _min_socket = sock_min;
- _max_socket = sock_max;
- _min_pnode = pnode_min;
- _max_pnode = pnode_max;
- _gr_table_len = index;
- pr_info(
- "UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n",
- index, _min_socket, _max_socket, _min_pnode, _max_pnode);
+ _min_socket = sock_min;
+ _max_socket = sock_max;
+ _min_pnode = pnode_min;
+ _max_pnode = pnode_max;
+ _gr_table_len = index;
+
+ pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode);
}
-static void __init decode_uv_systab(void)
+static int __init decode_uv_systab(void)
{
struct uv_systab *st;
int i;
+ if (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE)
+ return 0; /* No extended UVsystab required */
+
st = uv_systab;
- if ((!st || st->revision < UV_SYSTAB_VERSION_UV4) && !is_uv4_hub())
- return;
- if (st->revision != UV_SYSTAB_VERSION_UV4_LATEST) {
- pr_crit(
- "UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n",
- st->revision, UV_SYSTAB_VERSION_UV4_LATEST);
- BUG();
+ if ((!st) || (st->revision < UV_SYSTAB_VERSION_UV4_LATEST)) {
+ int rev = st ? st->revision : 0;
+
+ pr_err("UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", rev, UV_SYSTAB_VERSION_UV4_LATEST);
+ pr_err("UV: Cannot support UV operations, switching to generic PC\n");
+ uv_system_type = UV_NONE;
+
+ return -EINVAL;
}
for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) {
@@ -1206,10 +1220,11 @@ static void __init decode_uv_systab(void)
break;
}
}
+ return 0;
}
/*
- * Setup physical blade translations from UVH_NODE_PRESENT_TABLE
+ * Set up physical blade translations from UVH_NODE_PRESENT_TABLE
* .. NB: UVH_NODE_PRESENT_TABLE is going away,
* .. being replaced by GAM Range Table
*/
@@ -1245,14 +1260,13 @@ static void __init build_socket_tables(void)
if (!gre) {
if (is_uv1_hub() || is_uv2_hub() || is_uv3_hub()) {
pr_info("UV: No UVsystab socket table, ignoring\n");
- return; /* not required */
+ return;
}
- pr_crit(
- "UV: Error: UVsystab address translations not available!\n");
+ pr_crit("UV: Error: UVsystab address translations not available!\n");
BUG();
}
- /* build socket id -> node id, pnode */
+ /* Build socket id -> node id, pnode */
num = maxsock - minsock + 1;
bytes = num * sizeof(_socket_to_node[0]);
_socket_to_node = kmalloc(bytes, GFP_KERNEL);
@@ -1269,27 +1283,27 @@ static void __init build_socket_tables(void)
for (i = 0; i < nump; i++)
_pnode_to_socket[i] = SOCK_EMPTY;
- /* fill in pnode/node/addr conversion list values */
+ /* Fill in pnode/node/addr conversion list values: */
pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
i = gre->sockid - minsock;
+ /* Duplicate: */
if (_socket_to_pnode[i] != SOCK_EMPTY)
- continue; /* duplicate */
+ continue;
_socket_to_pnode[i] = gre->pnode;
i = gre->pnode - minpnode;
_pnode_to_socket[i] = gre->sockid;
- pr_info(
- "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
+ pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
_socket_to_pnode[gre->sockid - minsock],
_pnode_to_socket[gre->pnode - minpnode]);
}
- /* Set socket -> node values */
+ /* Set socket -> node values: */
lnid = -1;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
@@ -1305,7 +1319,7 @@ static void __init build_socket_tables(void)
sockid, apicid, nid);
}
- /* Setup physical blade to pnode translation from GAM Range Table */
+ /* Set up physical blade to pnode translation from GAM Range Table: */
bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]);
_node_to_pnode = kmalloc(bytes, GFP_KERNEL);
BUG_ON(!_node_to_pnode);
@@ -1315,8 +1329,7 @@ static void __init build_socket_tables(void)
for (sockid = minsock; sockid <= maxsock; sockid++) {
if (lnid == _socket_to_node[sockid - minsock]) {
- _node_to_pnode[lnid] =
- _socket_to_pnode[sockid - minsock];
+ _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock];
break;
}
}
@@ -1333,8 +1346,7 @@ static void __init build_socket_tables(void)
pr_info("UV: Checking socket->node/pnode for identity maps\n");
if (minsock == 0) {
for (i = 0; i < num; i++)
- if (_socket_to_node[i] == SOCK_EMPTY ||
- i != _socket_to_node[i])
+ if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i])
break;
if (i >= num) {
kfree(_socket_to_node);
@@ -1355,7 +1367,7 @@ static void __init build_socket_tables(void)
}
}
-void __init uv_system_init(void)
+static void __init uv_system_init_hub(void)
{
struct uv_hub_info_s hub_info = {0};
int bytes, cpu, nodeid;
@@ -1373,8 +1385,13 @@ void __init uv_system_init(void)
map_low_mmrs();
- uv_bios_init(); /* get uv_systab for decoding */
- decode_uv_systab();
+ /* Get uv_systab for decoding: */
+ uv_bios_init();
+
+ /* If there's an UVsystab problem then abort UV init: */
+ if (decode_uv_systab() < 0)
+ return;
+
build_socket_tables();
build_uv_gr_table();
uv_init_hub_info(&hub_info);
@@ -1382,14 +1399,10 @@ void __init uv_system_init(void)
if (!_node_to_pnode)
boot_init_possible_blades(&hub_info);
- /* uv_num_possible_blades() is really the hub count */
- pr_info("UV: Found %d hubs, %d nodes, %d cpus\n",
- uv_num_possible_blades(),
- num_possible_nodes(),
- num_possible_cpus());
+ /* uv_num_possible_blades() is really the hub count: */
+ pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus());
- uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
- &sn_region_size, &system_serial_number);
+ uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, &sn_region_size, &system_serial_number);
hub_info.coherency_domain_number = sn_coherency_id;
uv_rtc_init();
@@ -1402,33 +1415,31 @@ void __init uv_system_init(void)
struct uv_hub_info_s *new_hub;
if (__uv_hub_info_list[nodeid]) {
- pr_err("UV: Node %d UV HUB already initialized!?\n",
- nodeid);
+ pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid);
BUG();
}
/* Allocate new per hub info list */
- new_hub = (nodeid == 0) ?
- &uv_hub_info_node0 :
- kzalloc_node(bytes, GFP_KERNEL, nodeid);
+ new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid);
BUG_ON(!new_hub);
__uv_hub_info_list[nodeid] = new_hub;
new_hub = uv_hub_info_list(nodeid);
BUG_ON(!new_hub);
*new_hub = hub_info;
- /* Use information from GAM table if available */
+ /* Use information from GAM table if available: */
if (_node_to_pnode)
new_hub->pnode = _node_to_pnode[nodeid];
- else /* Fill in during cpu loop */
+ else /* Or fill in during CPU loop: */
new_hub->pnode = 0xffff;
+
new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
new_hub->memory_nid = -1;
new_hub->nr_possible_cpus = 0;
new_hub->nr_online_cpus = 0;
}
- /* Initialize per cpu info */
+ /* Initialize per CPU info: */
for_each_possible_cpu(cpu) {
int apicid = per_cpu(x86_cpu_to_apicid, cpu);
int numa_node_id;
@@ -1439,22 +1450,24 @@ void __init uv_system_init(void)
pnode = uv_apicid_to_pnode(apicid);
uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
- uv_cpu_info_per(cpu)->blade_cpu_id =
- uv_cpu_hub_info(cpu)->nr_possible_cpus++;
+ uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++;
if (uv_cpu_hub_info(cpu)->memory_nid == -1)
uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
- if (nodeid != numa_node_id && /* init memoryless node */
+
+ /* Init memoryless node: */
+ if (nodeid != numa_node_id &&
uv_hub_info_list(numa_node_id)->pnode == 0xffff)
uv_hub_info_list(numa_node_id)->pnode = pnode;
else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
uv_cpu_hub_info(cpu)->pnode = pnode;
+
uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid);
}
for_each_node(nodeid) {
unsigned short pnode = uv_hub_info_list(nodeid)->pnode;
- /* Add pnode info for pre-GAM list nodes without cpus */
+ /* Add pnode info for pre-GAM list nodes without CPUs: */
if (pnode == 0xffff) {
unsigned long paddr;
@@ -1480,15 +1493,30 @@ void __init uv_system_init(void)
uv_scir_register_cpu_notifier();
proc_mkdir("sgi_uv", NULL);
- /* register Legacy VGA I/O redirection handler */
+ /* Register Legacy VGA I/O redirection handler: */
pci_register_set_vga_state(uv_set_vga_state);
/*
* For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as
- * EFI is not enabled in the kdump kernel.
+ * EFI is not enabled in the kdump kernel:
*/
if (is_kdump_kernel())
reboot_type = BOOT_ACPI;
}
+/*
+ * There is a small amount of UV specific code needed to initialize a
+ * UV system that does not have a "UV HUB" (referred to as "hubless").
+ */
+void __init uv_system_init(void)
+{
+ if (likely(!is_uv_system() && !is_uv_hubless()))
+ return;
+
+ if (is_uv_system())
+ uv_system_init_hub();
+ else
+ uv_nmi_setup_hubless();
+}
+
apic_driver(apic_x2apic_uv_x);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 45d44c173c..4a7080c 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -905,8 +905,8 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
{
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
- static unsigned int last_stime; /* = 0 */
- cputime_t stime, utime;
+ static u64 last_stime; /* = 0 */
+ u64 stime, utime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@@ -919,7 +919,7 @@ recalc:
} else if (jiffies_since_last_check > idle_period) {
unsigned int idle_percentage;
- idle_percentage = cputime_to_jiffies(stime - last_stime);
+ idle_percentage = nsecs_to_jiffies(stime - last_stime);
idle_percentage *= 100;
idle_percentage /= jiffies_since_last_check;
use_apm_idle = (idle_percentage > idle_threshold);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index c62e015..de827d6 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -81,6 +81,7 @@ void common(void) {
BLANK();
OFFSET(BP_scratch, boot_params, scratch);
+ OFFSET(BP_secure_boot, boot_params, secure_boot);
OFFSET(BP_loadflags, boot_params, hdr.loadflags);
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 210927e..99332f5 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -13,6 +13,10 @@ static char syscalls_ia32[] = {
#include <asm/syscalls_32.h>
};
+#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+#include <asm/kvm_para.h>
+#endif
+
int main(void)
{
#ifdef CONFIG_PARAVIRT
@@ -22,6 +26,11 @@ int main(void)
BLANK();
#endif
+#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
+ OFFSET(KVM_STEAL_TIME_preempted, kvm_steal_time, preempted);
+ BLANK();
+#endif
+
#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry)
ENTRY(bx);
ENTRY(cx);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1d31672..4e95b2e 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -309,8 +309,22 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ u32 eax, ebx, ecx, edx;
- node_id = cpuid_ecx(0x8000001e) & 7;
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+
+ node_id = ecx & 0xff;
+ smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
+
+ if (c->x86 == 0x15)
+ c->cu_id = ebx & 0xff;
+
+ if (c->x86 >= 0x17) {
+ c->cpu_core_id = ebx & 0xff;
+
+ if (smp_num_siblings > 1)
+ c->x86_max_cores /= smp_num_siblings;
+ }
/*
* We may have multiple LLCs if L3 caches exist, so check if we
@@ -541,8 +555,10 @@ static void early_init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
- if (!check_tsc_unstable())
- set_sched_clock_stable();
+ if (check_tsc_unstable())
+ clear_sched_clock_stable();
+ } else {
+ clear_sched_clock_stable();
}
/* Bit 12 of 8000_0007 edx is accumulated power mechanism. */
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 48d4ff2..0e150d6 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,5 +1,5 @@
-#include <linux/bitops.h>
-#include <linux/kernel.h>
+
+#include <linux/sched.h>
#include <asm/cpufeature.h>
#include <asm/e820/api.h>
@@ -104,6 +104,8 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#endif
+
+ clear_sched_clock_stable();
}
static void init_centaur(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9bab7a8..c64ca592 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -35,6 +35,7 @@
#include <asm/desc.h>
#include <asm/fpu/internal.h>
#include <asm/mtrr.h>
+#include <asm/hwcap2.h>
#include <linux/numa.h>
#include <asm/asm.h>
#include <asm/bugs.h>
@@ -51,6 +52,8 @@
#include "cpu.h"
+u32 elf_hwcap2 __read_mostly;
+
/* all of these masks are initialized in setup_cpu_local_masks() */
cpumask_var_t cpu_initialized_mask;
cpumask_var_t cpu_callout_mask;
@@ -83,6 +86,7 @@ static void default_init(struct cpuinfo_x86 *c)
strcpy(c->x86_model_id, "386");
}
#endif
+ clear_sched_clock_stable();
}
static const struct cpu_dev default_cpu = {
@@ -655,6 +659,16 @@ void cpu_detect(struct cpuinfo_x86 *c)
}
}
+static void apply_forced_caps(struct cpuinfo_x86 *c)
+{
+ int i;
+
+ for (i = 0; i < NCAPINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+}
+
void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
@@ -748,6 +762,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
init_scattered_cpuid_features(c);
+
+ /*
+ * Clear/Set all flags overridden by options, after probe.
+ * This needs to happen each time we re-probe, which may happen
+ * several times during CPU initialization.
+ */
+ apply_forced_caps(c);
}
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -801,14 +822,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
memset(&c->x86_capability, 0, sizeof c->x86_capability);
c->extended_cpuid_level = 0;
- if (!have_cpuid_p())
- identify_cpu_without_cpuid(c);
-
/* cyrix could have cpuid enabled via c_identify()*/
if (have_cpuid_p()) {
cpu_detect(c);
get_cpu_vendor(c);
get_cpu_cap(c);
+ setup_force_cpu_cap(X86_FEATURE_CPUID);
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
@@ -818,6 +837,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
+ } else {
+ identify_cpu_without_cpuid(c);
+ setup_clear_cpu_cap(X86_FEATURE_CPUID);
}
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
@@ -1015,6 +1037,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
c->x86_coreid_bits = 0;
+ c->cu_id = 0xff;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
@@ -1034,10 +1057,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_identify(c);
/* Clear/Set all flags overridden by options, after probe */
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ apply_forced_caps(c);
#ifdef CONFIG_X86_64
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
@@ -1055,6 +1075,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
*/
if (this_cpu->c_init)
this_cpu->c_init(c);
+ else
+ clear_sched_clock_stable();
/* Disable the PN if appropriate */
squash_the_stupid_serial_number(c);
@@ -1096,10 +1118,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
* Clear/Set all flags overridden by options, need do it
* before following smp all cpus cap AND.
*/
- for (i = 0; i < NCAPINTS; i++) {
- c->x86_capability[i] &= ~cpu_caps_cleared[i];
- c->x86_capability[i] |= cpu_caps_set[i];
- }
+ apply_forced_caps(c);
/*
* On SMP, boot_cpu_data holds the common feature set between
@@ -1491,7 +1510,7 @@ void cpu_init(void)
for (i = 0; i <= IO_BITMAP_LONGS; i++)
t->io_bitmap[i] = ~0UL;
- atomic_inc(&init_mm.mm_count);
+ mmgrab(&init_mm);
me->active_mm = &init_mm;
BUG_ON(me->mm);
enter_lazy_tlb(&init_mm, me);
@@ -1542,7 +1561,7 @@ void cpu_init(void)
/*
* Set up and load the per-CPU TSS and LDT
*/
- atomic_inc(&init_mm.mm_count);
+ mmgrab(&init_mm);
curr->active_mm = &init_mm;
BUG_ON(curr->mm);
enter_lazy_tlb(&init_mm, curr);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index bd9dcd6..47416f9 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -9,6 +9,7 @@
#include <asm/pci-direct.h>
#include <asm/tsc.h>
#include <asm/cpufeature.h>
+#include <linux/sched.h>
#include "cpu.h"
@@ -183,6 +184,7 @@ static void early_init_cyrix(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
break;
}
+ clear_sched_clock_stable();
}
static void init_cyrix(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 203f860..017ecd3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -15,6 +15,8 @@
#include <asm/cpu.h>
#include <asm/intel-family.h>
#include <asm/microcode_intel.h>
+#include <asm/hwcap2.h>
+#include <asm/elf.h>
#ifdef CONFIG_X86_64
#include <linux/topology.h>
@@ -62,6 +64,46 @@ void check_mpx_erratum(struct cpuinfo_x86 *c)
}
}
+static bool ring3mwait_disabled __read_mostly;
+
+static int __init ring3mwait_disable(char *__unused)
+{
+ ring3mwait_disabled = true;
+ return 0;
+}
+__setup("ring3mwait=disable", ring3mwait_disable);
+
+static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
+{
+ /*
+ * Ring 3 MONITOR/MWAIT feature cannot be detected without
+ * cpu model and family comparison.
+ */
+ if (c->x86 != 6)
+ return;
+ switch (c->x86_model) {
+ case INTEL_FAM6_XEON_PHI_KNL:
+ case INTEL_FAM6_XEON_PHI_KNM:
+ break;
+ default:
+ return;
+ }
+
+ if (ring3mwait_disabled) {
+ msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
+ MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+ return;
+ }
+
+ msr_set_bit(MSR_MISC_FEATURE_ENABLES,
+ MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+
+ set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
+
+ if (c == &boot_cpu_data)
+ ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -119,8 +161,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
- if (!check_tsc_unstable())
- set_sched_clock_stable();
+ if (check_tsc_unstable())
+ clear_sched_clock_stable();
+ } else {
+ clear_sched_clock_stable();
}
/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
@@ -560,6 +604,8 @@ static void init_intel(struct cpuinfo_x86 *c)
detect_vmx_virtcap(c);
init_intel_energy_perf(c);
+
+ probe_xeon_phi_r3mwait(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 83f1a98..2eee853 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -52,8 +52,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
if (severity >= GHES_SEV_RECOVERABLE)
m.status |= MCI_STATUS_UC;
- if (severity >= GHES_SEV_PANIC)
+
+ if (severity >= GHES_SEV_PANIC) {
m.status |= MCI_STATUS_PCC;
+ m.tsc = rdtsc();
+ }
m.addr = mem_err->physical_addr;
mce_log(&m);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 93d824e..1e5a50c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -72,7 +72,7 @@ struct llist_node *mce_gen_pool_prepare_records(void)
return new_head.first;
}
-void mce_gen_pool_process(void)
+void mce_gen_pool_process(struct work_struct *__unused)
{
struct llist_node *head;
struct mce_evt_llist *node, *tmp;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 517619e..99165b2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -152,7 +152,6 @@ static void raise_mce(struct mce *m)
if (context == MCJ_CTX_RANDOM)
return;
-#ifdef CONFIG_X86_LOCAL_APIC
if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
unsigned long start;
int cpu;
@@ -192,9 +191,7 @@ static void raise_mce(struct mce *m)
raise_local();
put_cpu();
put_online_cpus();
- } else
-#endif
- {
+ } else {
preempt_disable();
raise_local();
preempt_enable();
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index cd74a3f..903043e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -31,7 +31,7 @@ struct mce_evt_llist {
struct mce mce;
};
-void mce_gen_pool_process(void);
+void mce_gen_pool_process(struct work_struct *__unused);
bool mce_gen_pool_empty(void);
int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 00ef432..8e9725c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -128,7 +128,6 @@ void mce_setup(struct mce *m)
{
memset(m, 0, sizeof(struct mce));
m->cpu = m->extcpu = smp_processor_id();
- m->tsc = rdtsc();
/* We hope get_seconds stays lockless */
m->time = get_seconds();
m->cpuvendor = boot_cpu_data.x86_vendor;
@@ -217,9 +216,7 @@ void mce_register_decode_chain(struct notifier_block *nb)
{
atomic_inc(&num_notifiers);
- /* Ensure SRAO notifier has the highest priority in the decode chain. */
- if (nb != &mce_srao_nb && nb->priority == INT_MAX)
- nb->priority -= 1;
+ WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC);
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
}
@@ -583,7 +580,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
}
static struct notifier_block mce_srao_nb = {
.notifier_call = srao_decode_notifier,
- .priority = INT_MAX,
+ .priority = MCE_PRIO_SRAO,
};
static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
@@ -609,7 +606,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
static struct notifier_block mce_default_nb = {
.notifier_call = mce_default_notifier,
/* lowest prio, we want it to run last. */
- .priority = 0,
+ .priority = MCE_PRIO_LOWEST,
};
/*
@@ -710,14 +707,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_gather_info(&m, NULL);
- /*
- * m.tsc was set in mce_setup(). Clear it if not requested.
- *
- * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid
- * that dance.
- */
- if (!(flags & MCP_TIMESTAMP))
- m.tsc = 0;
+ if (flags & MCP_TIMESTAMP)
+ m.tsc = rdtsc();
for (i = 0; i < mca_cfg.banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
@@ -1156,6 +1147,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
goto out;
mce_gather_info(&m, regs);
+ m.tsc = rdtsc();
final = this_cpu_ptr(&mces_seen);
*final = m;
@@ -1322,41 +1314,6 @@ int memory_failure(unsigned long pfn, int vector, int flags)
#endif
/*
- * Action optional processing happens here (picking up
- * from the list of faulting pages that do_machine_check()
- * placed into the genpool).
- */
-static void mce_process_work(struct work_struct *dummy)
-{
- mce_gen_pool_process();
-}
-
-#ifdef CONFIG_X86_MCE_INTEL
-/***
- * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog
- * @cpu: The CPU on which the event occurred.
- * @status: Event status information
- *
- * This function should be called by the thermal interrupt after the
- * event has been processed and the decision was made to log the event
- * further.
- *
- * The status parameter will be saved to the 'status' field of 'struct mce'
- * and historically has been the register value of the
- * MSR_IA32_THERMAL_STATUS (Intel) msr.
- */
-void mce_log_therm_throt_event(__u64 status)
-{
- struct mce m;
-
- mce_setup(&m);
- m.bank = MCE_THERMAL_BANK;
- m.status = status;
- mce_log(&m);
-}
-#endif /* CONFIG_X86_MCE_INTEL */
-
-/*
* Periodic polling timer for "silent" machine check errors. If the
* poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds).
@@ -1373,20 +1330,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
-static void __restart_timer(struct timer_list *t, unsigned long interval)
+static void __start_timer(struct timer_list *t, unsigned long interval)
{
unsigned long when = jiffies + interval;
unsigned long flags;
local_irq_save(flags);
- if (timer_pending(t)) {
- if (time_before(when, t->expires))
- mod_timer(t, when);
- } else {
- t->expires = round_jiffies(when);
- add_timer_on(t, smp_processor_id());
- }
+ if (!timer_pending(t) || time_before(when, t->expires))
+ mod_timer(t, round_jiffies(when));
local_irq_restore(flags);
}
@@ -1421,7 +1373,7 @@ static void mce_timer_fn(unsigned long data)
done:
__this_cpu_write(mce_next_interval, iv);
- __restart_timer(t, iv);
+ __start_timer(t, iv);
}
/*
@@ -1432,7 +1384,7 @@ void mce_timer_kick(unsigned long interval)
struct timer_list *t = this_cpu_ptr(&mce_timer);
unsigned long iv = __this_cpu_read(mce_next_interval);
- __restart_timer(t, interval);
+ __start_timer(t, interval);
if (interval < iv)
__this_cpu_write(mce_next_interval, interval);
@@ -1779,17 +1731,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
}
}
-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
+static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
if (mca_cfg.ignore_ce || !iv)
return;
- per_cpu(mce_next_interval, cpu) = iv;
-
- t->expires = round_jiffies(jiffies + iv);
- add_timer_on(t, cpu);
+ this_cpu_write(mce_next_interval, iv);
+ __start_timer(t, iv);
}
static void __mcheck_cpu_setup_timer(void)
@@ -1806,7 +1756,7 @@ static void __mcheck_cpu_init_timer(void)
unsigned int cpu = smp_processor_id();
setup_pinned_timer(t, mce_timer_fn, cpu);
- mce_start_timer(cpu, t);
+ mce_start_timer(t);
}
/* Handle unconfigured int18 (should never happen) */
@@ -2196,7 +2146,7 @@ int __init mcheck_init(void)
mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity();
- INIT_WORK(&mce_work, mce_process_work);
+ INIT_WORK(&mce_work, mce_gen_pool_process);
init_irq_work(&mce_irq_work, mce_irq_work_cb);
return 0;
@@ -2566,7 +2516,7 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
- struct timer_list *t = &per_cpu(mce_timer, cpu);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
int ret;
mce_device_create(cpu);
@@ -2577,13 +2527,13 @@ static int mce_cpu_online(unsigned int cpu)
return ret;
}
mce_reenable_cpu();
- mce_start_timer(cpu, t);
+ mce_start_timer(t);
return 0;
}
static int mce_cpu_pre_down(unsigned int cpu)
{
- struct timer_list *t = &per_cpu(mce_timer, cpu);
+ struct timer_list *t = this_cpu_ptr(&mce_timer);
mce_disable_cpu();
del_timer_sync(t);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index a5fd137..524cc57 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -192,6 +192,7 @@ static void get_smca_bank_info(unsigned int bank)
smca_banks[bank].hwid = s_hwid;
smca_banks[bank].id = instance_id;
+ smca_banks[bank].sysfs_id = s_hwid->count++;
break;
}
}
@@ -777,7 +778,8 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
mce_setup(&m);
m.status = status;
- m.bank = bank;
+ m.bank = bank;
+ m.tsc = rdtsc();
if (threshold_err)
m.misc = misc;
@@ -814,14 +816,14 @@ static inline void __smp_deferred_error_interrupt(void)
deferred_error_int_vector();
}
-asmlinkage __visible void smp_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void)
{
entering_irq();
__smp_deferred_error_interrupt();
exiting_ack_irq();
}
-asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
{
entering_irq();
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
@@ -1064,9 +1066,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b)
return NULL;
}
+ if (smca_banks[bank].hwid->count == 1)
+ return smca_get_name(bank_type);
+
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
"%s_%x", smca_get_name(bank_type),
- smca_banks[bank].id);
+ smca_banks[bank].sysfs_id);
return buf_mcatype;
}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 465aca8..d7cc190 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -6,7 +6,7 @@
*
* Maintains a counter in /sys that keeps track of the number of thermal
* events, such that the user knows how bad the thermal problem might be
- * (since the logging to syslog and mcelog is rate limited).
+ * (since the logging to syslog is rate limited).
*
* Author: Dmitriy Zavin (dmitriyz@google.com)
*
@@ -141,13 +141,8 @@ static struct attribute_group thermal_attr_group = {
* IRQ has been acknowledged.
*
* It will take care of rate limiting and printing messages to the syslog.
- *
- * Returns: 0 : Event should NOT be further logged, i.e. still in
- * "timeout" from previous log message.
- * 1 : Event should be logged further, and a message has been
- * printed to the syslog.
*/
-static int therm_throt_process(bool new_event, int event, int level)
+static void therm_throt_process(bool new_event, int event, int level)
{
struct _thermal_state *state;
unsigned int this_cpu = smp_processor_id();
@@ -162,16 +157,16 @@ static int therm_throt_process(bool new_event, int event, int level)
else if (event == POWER_LIMIT_EVENT)
state = &pstate->core_power_limit;
else
- return 0;
+ return;
} else if (level == PACKAGE_LEVEL) {
if (event == THERMAL_THROTTLING_EVENT)
state = &pstate->package_throttle;
else if (event == POWER_LIMIT_EVENT)
state = &pstate->package_power_limit;
else
- return 0;
+ return;
} else
- return 0;
+ return;
old_event = state->new_event;
state->new_event = new_event;
@@ -181,7 +176,7 @@ static int therm_throt_process(bool new_event, int event, int level)
if (time_before64(now, state->next_check) &&
state->count != state->last_count)
- return 0;
+ return;
state->next_check = now + CHECK_INTERVAL;
state->last_count = state->count;
@@ -193,16 +188,14 @@ static int therm_throt_process(bool new_event, int event, int level)
this_cpu,
level == CORE_LEVEL ? "Core" : "Package",
state->count);
- return 1;
+ return;
}
if (old_event) {
if (event == THERMAL_THROTTLING_EVENT)
pr_info("CPU%d: %s temperature/speed normal\n", this_cpu,
level == CORE_LEVEL ? "Core" : "Package");
- return 1;
+ return;
}
-
- return 0;
}
static int thresh_event_valid(int level, int event)
@@ -365,10 +358,9 @@ static void intel_thermal_interrupt(void)
/* Check for violation of core thermal thresholds*/
notify_thresholds(msr_val);
- if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
- THERMAL_THROTTLING_EVENT,
- CORE_LEVEL) != 0)
- mce_log_therm_throt_event(msr_val);
+ therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
+ THERMAL_THROTTLING_EVENT,
+ CORE_LEVEL);
if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
@@ -404,14 +396,16 @@ static inline void __smp_thermal_interrupt(void)
smp_thermal_vector();
}
-asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
__smp_thermal_interrupt();
exiting_ack_irq();
}
-asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry
+smp_trace_thermal_interrupt(struct pt_regs *regs)
{
entering_irq();
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 9beb092..bb0e75ee 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -23,14 +23,14 @@ static inline void __smp_threshold_interrupt(void)
mce_threshold_vector();
}
-asmlinkage __visible void smp_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_threshold_interrupt(void)
{
entering_irq();
__smp_threshold_interrupt();
exiting_ack_irq();
}
-asmlinkage __visible void smp_trace_threshold_interrupt(void)
+asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void)
{
entering_irq();
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 6a31e26..7889ae4 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -42,16 +42,19 @@ static struct equiv_cpu_entry *equiv_cpu_table;
/*
* This points to the current valid container of microcode patches which we will
- * save from the initrd/builtin before jettisoning its contents.
+ * save from the initrd/builtin before jettisoning its contents. @mc is the
+ * microcode patch we found to match.
*/
-struct container {
- u8 *data;
- size_t size;
-} cont;
+struct cont_desc {
+ struct microcode_amd *mc;
+ u32 cpuid_1_eax;
+ u32 psize;
+ u8 *data;
+ size_t size;
+};
static u32 ucode_new_rev;
static u8 amd_ucode_patch[PATCH_MAX_SIZE];
-static u16 this_equiv_id;
/*
* Microcode patch container file is prepended to the initrd in cpio
@@ -60,57 +63,13 @@ static u16 this_equiv_id;
static const char
ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin";
-static size_t compute_container_size(u8 *data, u32 total_size)
+static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig)
{
- size_t size = 0;
- u32 *header = (u32 *)data;
-
- if (header[0] != UCODE_MAGIC ||
- header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
- header[2] == 0) /* size */
- return size;
-
- size = header[2] + CONTAINER_HDR_SZ;
- total_size -= size;
- data += size;
-
- while (total_size) {
- u16 patch_size;
-
- header = (u32 *)data;
-
- if (header[0] != UCODE_UCODE_TYPE)
- break;
-
- /*
- * Sanity-check patch size.
- */
- patch_size = header[1];
- if (patch_size > PATCH_MAX_SIZE)
- break;
-
- size += patch_size + SECTION_HDR_SIZE;
- data += patch_size + SECTION_HDR_SIZE;
- total_size -= patch_size + SECTION_HDR_SIZE;
+ for (; equiv_table && equiv_table->installed_cpu; equiv_table++) {
+ if (sig == equiv_table->installed_cpu)
+ return equiv_table->equiv_cpu;
}
- return size;
-}
-
-static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
- unsigned int sig)
-{
- int i = 0;
-
- if (!equiv_cpu_table)
- return 0;
-
- while (equiv_cpu_table[i].installed_cpu != 0) {
- if (sig == equiv_cpu_table[i].installed_cpu)
- return equiv_cpu_table[i].equiv_cpu;
-
- i++;
- }
return 0;
}
@@ -118,91 +77,109 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table,
* This scans the ucode blob for the proper container as we can have multiple
* containers glued together. Returns the equivalence ID from the equivalence
* table or 0 if none found.
+ * Returns the amount of bytes consumed while scanning. @desc contains all the
+ * data we're going to use in later stages of the application.
*/
-static u16
-find_proper_container(u8 *ucode, size_t size, struct container *ret_cont)
+static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc)
{
- struct container ret = { NULL, 0 };
- u32 eax, ebx, ecx, edx;
struct equiv_cpu_entry *eq;
- int offset, left;
- u16 eq_id = 0;
- u32 *header;
- u8 *data;
+ ssize_t orig_size = size;
+ u32 *hdr = (u32 *)ucode;
+ u16 eq_id;
+ u8 *buf;
- data = ucode;
- left = size;
- header = (u32 *)data;
+ /* Am I looking at an equivalence table header? */
+ if (hdr[0] != UCODE_MAGIC ||
+ hdr[1] != UCODE_EQUIV_CPU_TABLE_TYPE ||
+ hdr[2] == 0)
+ return CONTAINER_HDR_SZ;
+ buf = ucode;
- /* find equiv cpu table */
- if (header[0] != UCODE_MAGIC ||
- header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
- header[2] == 0) /* size */
- return eq_id;
+ eq = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ);
- eax = 0x00000001;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
+ /* Find the equivalence ID of our CPU in this table: */
+ eq_id = find_equiv_id(eq, desc->cpuid_1_eax);
- while (left > 0) {
- eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
+ buf += hdr[2] + CONTAINER_HDR_SZ;
+ size -= hdr[2] + CONTAINER_HDR_SZ;
+
+ /*
+ * Scan through the rest of the container to find where it ends. We do
+ * some basic sanity-checking too.
+ */
+ while (size > 0) {
+ struct microcode_amd *mc;
+ u32 patch_size;
- ret.data = data;
+ hdr = (u32 *)buf;
- /* Advance past the container header */
- offset = header[2] + CONTAINER_HDR_SZ;
- data += offset;
- left -= offset;
+ if (hdr[0] != UCODE_UCODE_TYPE)
+ break;
- eq_id = find_equiv_id(eq, eax);
- if (eq_id) {
- ret.size = compute_container_size(ret.data, left + offset);
+ /* Sanity-check patch size. */
+ patch_size = hdr[1];
+ if (patch_size > PATCH_MAX_SIZE)
+ break;
- /*
- * truncate how much we need to iterate over in the
- * ucode update loop below
- */
- left = ret.size - offset;
+ /* Skip patch section header: */
+ buf += SECTION_HDR_SIZE;
+ size -= SECTION_HDR_SIZE;
- *ret_cont = ret;
- return eq_id;
+ mc = (struct microcode_amd *)buf;
+ if (eq_id == mc->hdr.processor_rev_id) {
+ desc->psize = patch_size;
+ desc->mc = mc;
}
- /*
- * support multiple container files appended together. if this
- * one does not have a matching equivalent cpu entry, we fast
- * forward to the next container file.
- */
- while (left > 0) {
- header = (u32 *)data;
-
- if (header[0] == UCODE_MAGIC &&
- header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
- break;
-
- offset = header[1] + SECTION_HDR_SIZE;
- data += offset;
- left -= offset;
- }
+ buf += patch_size;
+ size -= patch_size;
+ }
- /* mark where the next microcode container file starts */
- offset = data - (u8 *)ucode;
- ucode = data;
+ /*
+ * If we have found a patch (desc->mc), it means we're looking at the
+ * container which has a patch for this CPU so return 0 to mean, @ucode
+ * already points to the proper container. Otherwise, we return the size
+ * we scanned so that we can advance to the next container in the
+ * buffer.
+ */
+ if (desc->mc) {
+ desc->data = ucode;
+ desc->size = orig_size - size;
+
+ return 0;
}
- return eq_id;
+ return orig_size - size;
}
-static int __apply_microcode_amd(struct microcode_amd *mc_amd)
+/*
+ * Scan the ucode blob for the proper container as we can have multiple
+ * containers glued together.
+ */
+static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc)
+{
+ ssize_t rem = size;
+
+ while (rem >= 0) {
+ ssize_t s = parse_container(ucode, rem, desc);
+ if (!s)
+ return;
+
+ ucode += s;
+ rem -= s;
+ }
+}
+
+static int __apply_microcode_amd(struct microcode_amd *mc)
{
u32 rev, dummy;
- native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+ native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc->hdr.data_code);
/* verify patch application was successful */
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- if (rev != mc_amd->hdr.patch_id)
+ if (rev != mc->hdr.patch_id)
return -1;
return 0;
@@ -217,17 +194,16 @@ static int __apply_microcode_amd(struct microcode_amd *mc_amd)
* load_microcode_amd() to save equivalent cpu table and microcode patches in
* kernel heap memory.
*
- * Returns true if container found (sets @ret_cont), false otherwise.
+ * Returns true if container found (sets @desc), false otherwise.
*/
-static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch,
- struct container *ret_cont)
+static bool
+apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch)
{
+ struct cont_desc desc = { 0 };
u8 (*patch)[PATCH_MAX_SIZE];
- u32 rev, *header, *new_rev;
- struct container ret;
- int offset, left;
- u16 eq_id = 0;
- u8 *data;
+ struct microcode_amd *mc;
+ u32 rev, dummy, *new_rev;
+ bool ret = false;
#ifdef CONFIG_X86_32
new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
@@ -237,50 +213,27 @@ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch,
patch = &amd_ucode_patch;
#endif
- if (check_current_patch_level(&rev, true))
- return false;
-
- eq_id = find_proper_container(ucode, size, &ret);
- if (!eq_id)
- return false;
-
- this_equiv_id = eq_id;
- header = (u32 *)ret.data;
-
- /* We're pointing to an equiv table, skip over it. */
- data = ret.data + header[2] + CONTAINER_HDR_SZ;
- left = ret.size - (header[2] + CONTAINER_HDR_SZ);
-
- while (left > 0) {
- struct microcode_amd *mc;
-
- header = (u32 *)data;
- if (header[0] != UCODE_UCODE_TYPE || /* type */
- header[1] == 0) /* size */
- break;
+ desc.cpuid_1_eax = cpuid_1_eax;
- mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
+ scan_containers(ucode, size, &desc);
- if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) {
+ mc = desc.mc;
+ if (!mc)
+ return ret;
- if (!__apply_microcode_amd(mc)) {
- rev = mc->hdr.patch_id;
- *new_rev = rev;
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+ if (rev >= mc->hdr.patch_id)
+ return ret;
- if (save_patch)
- memcpy(patch, mc, min_t(u32, header[1], PATCH_MAX_SIZE));
- }
- }
+ if (!__apply_microcode_amd(mc)) {
+ *new_rev = mc->hdr.patch_id;
+ ret = true;
- offset = header[1] + SECTION_HDR_SIZE;
- data += offset;
- left -= offset;
+ if (save_patch)
+ memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE));
}
- if (ret_cont)
- *ret_cont = ret;
-
- return true;
+ return ret;
}
static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
@@ -298,10 +251,9 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family)
#endif
}
-void __init load_ucode_amd_bsp(unsigned int family)
+void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret)
{
struct ucode_cpu_info *uci;
- u32 eax, ebx, ecx, edx;
struct cpio_data cp;
const char *path;
bool use_pa;
@@ -316,183 +268,95 @@ void __init load_ucode_amd_bsp(unsigned int family)
use_pa = false;
}
- if (!get_builtin_microcode(&cp, family))
+ if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax)))
cp = find_microcode_in_initrd(path, use_pa);
- if (!(cp.data && cp.size))
- return;
-
- /* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */
- eax = 1;
- ecx = 0;
- native_cpuid(&eax, &ebx, &ecx, &edx);
- uci->cpu_sig.sig = eax;
+ /* Needed in load_microcode_amd() */
+ uci->cpu_sig.sig = cpuid_1_eax;
- apply_microcode_early_amd(cp.data, cp.size, true, NULL);
+ *ret = cp;
}
-#ifdef CONFIG_X86_32
-/*
- * On 32-bit, since AP's early load occurs before paging is turned on, we
- * cannot traverse cpu_equiv_table and microcode_cache in kernel heap memory.
- * So during cold boot, AP will apply_ucode_in_initrd() just like the BSP.
- * In save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch,
- * which is used upon resume from suspend.
- */
-void load_ucode_amd_ap(unsigned int family)
+void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
{
- struct microcode_amd *mc;
- struct cpio_data cp;
-
- mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
- if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
- __apply_microcode_amd(mc);
- return;
- }
-
- if (!get_builtin_microcode(&cp, family))
- cp = find_microcode_in_initrd((const char *)__pa_nodebug(ucode_path), true);
+ struct cpio_data cp = { };
+ __load_ucode_amd(cpuid_1_eax, &cp);
if (!(cp.data && cp.size))
return;
- /*
- * This would set amd_ucode_patch above so that the following APs can
- * use it directly instead of going down this path again.
- */
- apply_microcode_early_amd(cp.data, cp.size, true, NULL);
+ apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true);
}
-#else
-void load_ucode_amd_ap(unsigned int family)
+
+void load_ucode_amd_ap(unsigned int cpuid_1_eax)
{
- struct equiv_cpu_entry *eq;
struct microcode_amd *mc;
- u32 rev, eax;
- u16 eq_id;
-
- /* 64-bit runs with paging enabled, thus early==false. */
- if (check_current_patch_level(&rev, false))
- return;
-
- /* First AP hasn't cached it yet, go through the blob. */
- if (!cont.data) {
- struct cpio_data cp = { NULL, 0, "" };
+ struct cpio_data cp;
+ u32 *new_rev, rev, dummy;
- if (cont.size == -1)
- return;
+ if (IS_ENABLED(CONFIG_X86_32)) {
+ mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
+ new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
+ } else {
+ mc = (struct microcode_amd *)amd_ucode_patch;
+ new_rev = &ucode_new_rev;
+ }
-reget:
- if (!get_builtin_microcode(&cp, family)) {
-#ifdef CONFIG_BLK_DEV_INITRD
- cp = find_cpio_data(ucode_path, (void *)initrd_start,
- initrd_end - initrd_start, NULL);
-#endif
- if (!(cp.data && cp.size)) {
- /*
- * Mark it so that other APs do not scan again
- * for no real reason and slow down boot
- * needlessly.
- */
- cont.size = -1;
- return;
- }
- }
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) {
- cont.size = -1;
+ /* Check whether we have saved a new patch already: */
+ if (*new_rev && rev < mc->hdr.patch_id) {
+ if (!__apply_microcode_amd(mc)) {
+ *new_rev = mc->hdr.patch_id;
return;
}
}
- eax = cpuid_eax(0x00000001);
- eq = (struct equiv_cpu_entry *)(cont.data + CONTAINER_HDR_SZ);
-
- eq_id = find_equiv_id(eq, eax);
- if (!eq_id)
+ __load_ucode_amd(cpuid_1_eax, &cp);
+ if (!(cp.data && cp.size))
return;
- if (eq_id == this_equiv_id) {
- mc = (struct microcode_amd *)amd_ucode_patch;
-
- if (mc && rev < mc->hdr.patch_id) {
- if (!__apply_microcode_amd(mc))
- ucode_new_rev = mc->hdr.patch_id;
- }
-
- } else {
-
- /*
- * AP has a different equivalence ID than BSP, looks like
- * mixed-steppings silicon so go through the ucode blob anew.
- */
- goto reget;
- }
+ apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false);
}
-#endif /* CONFIG_X86_32 */
static enum ucode_state
load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
-int __init save_microcode_in_initrd_amd(unsigned int fam)
+int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
{
+ struct cont_desc desc = { 0 };
enum ucode_state ret;
- int retval = 0;
- u16 eq_id;
-
- if (!cont.data) {
- if (IS_ENABLED(CONFIG_X86_32) && (cont.size != -1)) {
- struct cpio_data cp = { NULL, 0, "" };
-
-#ifdef CONFIG_BLK_DEV_INITRD
- cp = find_cpio_data(ucode_path, (void *)initrd_start,
- initrd_end - initrd_start, NULL);
-#endif
+ struct cpio_data cp;
- if (!(cp.data && cp.size)) {
- cont.size = -1;
- return -EINVAL;
- }
+ cp = find_microcode_in_initrd(ucode_path, false);
+ if (!(cp.data && cp.size))
+ return -EINVAL;
- eq_id = find_proper_container(cp.data, cp.size, &cont);
- if (!eq_id) {
- cont.size = -1;
- return -EINVAL;
- }
+ desc.cpuid_1_eax = cpuid_1_eax;
- } else
- return -EINVAL;
- }
+ scan_containers(cp.data, cp.size, &desc);
+ if (!desc.mc)
+ return -EINVAL;
- ret = load_microcode_amd(smp_processor_id(), fam, cont.data, cont.size);
+ ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
+ desc.data, desc.size);
if (ret != UCODE_OK)
- retval = -EINVAL;
-
- /*
- * This will be freed any msec now, stash patches for the current
- * family and switch to patch cache for cpu hotplug, etc later.
- */
- cont.data = NULL;
- cont.size = 0;
+ return -EINVAL;
- return retval;
+ return 0;
}
void reload_ucode_amd(void)
{
struct microcode_amd *mc;
- u32 rev;
-
- /*
- * early==false because this is a syscore ->resume path and by
- * that time paging is long enabled.
- */
- if (check_current_patch_level(&rev, false))
- return;
+ u32 rev, dummy;
mc = (struct microcode_amd *)amd_ucode_patch;
if (!mc)
return;
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
if (rev < mc->hdr.patch_id) {
if (!__apply_microcode_amd(mc)) {
ucode_new_rev = mc->hdr.patch_id;
@@ -630,60 +494,13 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
return patch_size;
}
-/*
- * Those patch levels cannot be updated to newer ones and thus should be final.
- */
-static u32 final_levels[] = {
- 0x01000098,
- 0x0100009f,
- 0x010000af,
- 0, /* T-101 terminator */
-};
-
-/*
- * Check the current patch level on this CPU.
- *
- * @rev: Use it to return the patch level. It is set to 0 in the case of
- * error.
- *
- * Returns:
- * - true: if update should stop
- * - false: otherwise
- */
-bool check_current_patch_level(u32 *rev, bool early)
-{
- u32 lvl, dummy, i;
- bool ret = false;
- u32 *levels;
-
- native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
-
- if (IS_ENABLED(CONFIG_X86_32) && early)
- levels = (u32 *)__pa_nodebug(&final_levels);
- else
- levels = final_levels;
-
- for (i = 0; levels[i]; i++) {
- if (lvl == levels[i]) {
- lvl = 0;
- ret = true;
- break;
- }
- }
-
- if (rev)
- *rev = lvl;
-
- return ret;
-}
-
static int apply_microcode_amd(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_amd *mc_amd;
struct ucode_cpu_info *uci;
struct ucode_patch *p;
- u32 rev;
+ u32 rev, dummy;
BUG_ON(raw_smp_processor_id() != cpu);
@@ -696,8 +513,7 @@ static int apply_microcode_amd(int cpu)
mc_amd = p->data;
uci->mc = p->data;
- if (check_current_patch_level(&rev, false))
- return -1;
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 2af69d2..b4a4cd3 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -46,6 +46,8 @@
static struct microcode_ops *microcode_ops;
static bool dis_ucode_ldr = true;
+bool initrd_gone;
+
LIST_HEAD(microcode_cache);
/*
@@ -64,19 +66,50 @@ static DEFINE_MUTEX(microcode_mutex);
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
-/*
- * Operations that are run on a target cpu:
- */
-
struct cpu_info_ctx {
struct cpu_signature *cpu_sig;
int err;
};
+/*
+ * Those patch levels cannot be updated to newer ones and thus should be final.
+ */
+static u32 final_levels[] = {
+ 0x01000098,
+ 0x0100009f,
+ 0x010000af,
+ 0, /* T-101 terminator */
+};
+
+/*
+ * Check the current patch level on this CPU.
+ *
+ * Returns:
+ * - true: if update should stop
+ * - false: otherwise
+ */
+static bool amd_check_current_patch_level(void)
+{
+ u32 lvl, dummy, i;
+ u32 *levels;
+
+ native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy);
+
+ if (IS_ENABLED(CONFIG_X86_32))
+ levels = (u32 *)__pa_nodebug(&final_levels);
+ else
+ levels = final_levels;
+
+ for (i = 0; levels[i]; i++) {
+ if (lvl == levels[i])
+ return true;
+ }
+ return false;
+}
+
static bool __init check_loader_disabled_bsp(void)
{
static const char *__dis_opt_str = "dis_ucode_ldr";
- u32 a, b, c, d;
#ifdef CONFIG_X86_32
const char *cmdline = (const char *)__pa_nodebug(boot_command_line);
@@ -92,18 +125,19 @@ static bool __init check_loader_disabled_bsp(void)
if (!have_cpuid_p())
return *res;
- a = 1;
- c = 0;
- native_cpuid(&a, &b, &c, &d);
-
/*
* CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
* completely accurate as xen pv guests don't see that CPUID bit set but
* that's good enough as they don't land on the BSP path anyway.
*/
- if (c & BIT(31))
+ if (native_cpuid_ecx(1) & BIT(31))
return *res;
+ if (x86_cpuid_vendor() == X86_VENDOR_AMD) {
+ if (amd_check_current_patch_level())
+ return *res;
+ }
+
if (cmdline_find_option_bool(cmdline, option) <= 0)
*res = false;
@@ -131,23 +165,21 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
void __init load_ucode_bsp(void)
{
- int vendor;
- unsigned int family;
+ unsigned int cpuid_1_eax;
if (check_loader_disabled_bsp())
return;
- vendor = x86_cpuid_vendor();
- family = x86_cpuid_family();
+ cpuid_1_eax = native_cpuid_eax(1);
- switch (vendor) {
+ switch (x86_cpuid_vendor()) {
case X86_VENDOR_INTEL:
- if (family >= 6)
+ if (x86_family(cpuid_1_eax) >= 6)
load_ucode_intel_bsp();
break;
case X86_VENDOR_AMD:
- if (family >= 0x10)
- load_ucode_amd_bsp(family);
+ if (x86_family(cpuid_1_eax) >= 0x10)
+ load_ucode_amd_bsp(cpuid_1_eax);
break;
default:
break;
@@ -165,22 +197,21 @@ static bool check_loader_disabled_ap(void)
void load_ucode_ap(void)
{
- int vendor, family;
+ unsigned int cpuid_1_eax;
if (check_loader_disabled_ap())
return;
- vendor = x86_cpuid_vendor();
- family = x86_cpuid_family();
+ cpuid_1_eax = native_cpuid_eax(1);
- switch (vendor) {
+ switch (x86_cpuid_vendor()) {
case X86_VENDOR_INTEL:
- if (family >= 6)
+ if (x86_family(cpuid_1_eax) >= 6)
load_ucode_intel_ap();
break;
case X86_VENDOR_AMD:
- if (family >= 0x10)
- load_ucode_amd_ap(family);
+ if (x86_family(cpuid_1_eax) >= 0x10)
+ load_ucode_amd_ap(cpuid_1_eax);
break;
default:
break;
@@ -190,21 +221,24 @@ void load_ucode_ap(void)
static int __init save_microcode_in_initrd(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
+ int ret = -EINVAL;
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
if (c->x86 >= 6)
- return save_microcode_in_initrd_intel();
+ ret = save_microcode_in_initrd_intel();
break;
case X86_VENDOR_AMD:
if (c->x86 >= 0x10)
- return save_microcode_in_initrd_amd(c->x86);
+ return save_microcode_in_initrd_amd(cpuid_eax(1));
break;
default:
break;
}
- return -EINVAL;
+ initrd_gone = true;
+
+ return ret;
}
struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
@@ -247,9 +281,16 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
* has the virtual address of the beginning of the initrd. It also
* possibly relocates the ramdisk. In either case, initrd_start contains
* the updated address so use that instead.
+ *
+ * initrd_gone is for the hotplug case where we've thrown out initrd
+ * already.
*/
- if (!use_pa && initrd_start)
- start = initrd_start;
+ if (!use_pa) {
+ if (initrd_gone)
+ return (struct cpio_data){ NULL, 0, "" };
+ if (initrd_start)
+ start = initrd_start;
+ }
return find_cpio_data(path, (void *)start, size, NULL);
#else /* !CONFIG_BLK_DEV_INITRD */
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 3f329b7..8325d8a 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -41,7 +41,7 @@
static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
-/* Current microcode patch used in early patching */
+/* Current microcode patch used in early patching on the APs. */
struct microcode_intel *intel_ucode_patch;
static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1,
@@ -607,12 +607,6 @@ int __init save_microcode_in_initrd_intel(void)
struct ucode_cpu_info uci;
struct cpio_data cp;
- /*
- * AP loading didn't find any microcode patch, no need to save anything.
- */
- if (!intel_ucode_patch || IS_ERR(intel_ucode_patch))
- return 0;
-
if (!load_builtin_intel_microcode(&cp))
cp = find_microcode_in_initrd(ucode_path, false);
@@ -628,7 +622,6 @@ int __init save_microcode_in_initrd_intel(void)
return 0;
}
-
/*
* @res_patch, output: a pointer to the patch we found.
*/
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 65e20c9..b5375b9 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -133,26 +133,6 @@ static uint32_t __init ms_hyperv_platform(void)
return 0;
}
-static u64 read_hv_clock(struct clocksource *arg)
-{
- u64 current_tick;
- /*
- * Read the partition counter to get the current tick count. This count
- * is set to 0 when the partition is created and is incremented in
- * 100 nanosecond units.
- */
- rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
- return current_tick;
-}
-
-static struct clocksource hyperv_cs = {
- .name = "hyperv_clocksource",
- .rating = 400, /* use this when running on Hyperv*/
- .read = read_hv_clock,
- .mask = CLOCKSOURCE_MASK(64),
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
static unsigned char hv_get_nmi_reason(void)
{
return 0;
@@ -180,6 +160,11 @@ static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs)
static void __init ms_hyperv_init_platform(void)
{
+ int hv_host_info_eax;
+ int hv_host_info_ebx;
+ int hv_host_info_ecx;
+ int hv_host_info_edx;
+
/*
* Extract the features and hints
*/
@@ -190,6 +175,21 @@ static void __init ms_hyperv_init_platform(void)
pr_info("HyperV: features 0x%x, hints 0x%x\n",
ms_hyperv.features, ms_hyperv.hints);
+ /*
+ * Extract host information.
+ */
+ if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) {
+ hv_host_info_eax = cpuid_eax(HVCPUID_VERSION);
+ hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION);
+ hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION);
+ hv_host_info_edx = cpuid_edx(HVCPUID_VERSION);
+
+ pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n",
+ hv_host_info_eax, hv_host_info_ebx >> 16,
+ hv_host_info_ebx & 0xFFFF, hv_host_info_ecx,
+ hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF);
+ }
+
#ifdef CONFIG_X86_LOCAL_APIC
if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) {
/*
@@ -208,9 +208,6 @@ static void __init ms_hyperv_init_platform(void)
"hv_nmi_unknown");
#endif
- if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
- clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
-
#ifdef CONFIG_X86_IO_APIC
no_timer_check = 1;
#endif
@@ -227,6 +224,13 @@ static void __init ms_hyperv_init_platform(void)
*/
if (efi_enabled(EFI_BOOT))
x86_platform.get_nmi_reason = hv_get_nmi_reason;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ /*
+ * Setup the hook to get control post apic initialization.
+ */
+ x86_platform.apic_post_init = hyperv_init;
+#endif
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 3417856..c1ea5b9 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,4 +1,5 @@
#include <linux/kernel.h>
+#include <linux/sched.h>
#include <linux/mm.h>
#include <asm/cpufeature.h>
#include <asm/msr.h>
@@ -14,6 +15,8 @@ static void early_init_transmeta(struct cpuinfo_x86 *c)
if (xlvl >= 0x80860001)
c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001);
}
+
+ clear_sched_clock_stable();
}
static void init_transmeta(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e4e97a5..e1114f0 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -178,13 +178,8 @@ void fpstate_init(union fpregs_state *state)
memset(state, 0, fpu_kernel_xstate_size);
- /*
- * XRSTORS requires that this bit is set in xcomp_bv, or
- * it will #GP. Make sure it is replaced after the memset().
- */
if (static_cpu_has(X86_FEATURE_XSAVES))
- state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
-
+ fpstate_init_xstate(&state->xsave);
if (static_cpu_has(X86_FEATURE_FXSR))
fpstate_init_fxstate(&state->fxsave);
else
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 60dece3..19bdd1b 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -48,13 +48,7 @@ void fpu__init_cpu(void)
fpu__init_cpu_xstate();
}
-/*
- * The earliest FPU detection code.
- *
- * Set the X86_FEATURE_FPU CPU-capability bit based on
- * trying to execute an actual sequence of FPU instructions:
- */
-static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
+static bool fpu__probe_without_cpuid(void)
{
unsigned long cr0;
u16 fsw, fcw;
@@ -65,18 +59,25 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
write_cr0(cr0);
- if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
- asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
- : "+m" (fsw), "+m" (fcw));
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1" : "+m" (fsw), "+m" (fcw));
+
+ pr_info("x86/fpu: Probing for FPU: FSW=0x%04hx FCW=0x%04hx\n", fsw, fcw);
- if (fsw == 0 && (fcw & 0x103f) == 0x003f)
- set_cpu_cap(c, X86_FEATURE_FPU);
+ return fsw == 0 && (fcw & 0x103f) == 0x003f;
+}
+
+static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
+{
+ if (!boot_cpu_has(X86_FEATURE_CPUID) &&
+ !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
+ if (fpu__probe_without_cpuid())
+ setup_force_cpu_cap(X86_FEATURE_FPU);
else
- clear_cpu_cap(c, X86_FEATURE_FPU);
+ setup_clear_cpu_cap(X86_FEATURE_FPU);
}
#ifndef CONFIG_MATH_EMULATION
- if (!boot_cpu_has(X86_FEATURE_FPU)) {
+ if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_FPU)) {
pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
for (;;)
asm volatile("hlt");
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 1d77704..c24ac1e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -78,6 +78,7 @@ void fpu__xstate_clear_all_cpu_caps(void)
setup_clear_cpu_cap(X86_FEATURE_PKU);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
+ setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
}
/*
@@ -705,8 +706,14 @@ void __init fpu__init_system_xstate(void)
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
+ if (!boot_cpu_has(X86_FEATURE_FPU)) {
+ pr_info("x86/fpu: No FPU detected\n");
+ return;
+ }
+
if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
- pr_info("x86/fpu: Legacy x87 FPU detected.\n");
+ pr_info("x86/fpu: x87 FPU will use %s\n",
+ boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE");
return;
}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 85e87b4..dc6ba5b 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -352,6 +352,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer)
} else {
struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+ irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq));
irq_domain_activate_irq(irq_get_irq_data(hdev->irq));
disable_irq(hdev->irq);
irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 589b319..b01bc8517 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -16,6 +16,7 @@
#include <linux/syscalls.h>
#include <linux/bitmap.h>
#include <asm/syscalls.h>
+#include <asm/desc.h>
/*
* this changes the io permissions bitmap in the current task.
@@ -45,6 +46,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
memset(bitmap, 0xff, IO_BITMAP_BYTES);
t->io_bitmap_ptr = bitmap;
set_thread_flag(TIF_IO_BITMAP);
+
+ preempt_disable();
+ refresh_TR();
+ preempt_enable();
}
/*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 7c6e9ff..4d8183b 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -264,7 +264,7 @@ void __smp_x86_platform_ipi(void)
x86_platform_ipi_callback();
}
-__visible void smp_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -315,7 +315,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs)
}
#endif
-__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 3512ba6..2754878 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -9,6 +9,7 @@
#include <linux/hardirq.h>
#include <asm/apic.h>
#include <asm/trace/irq_vectors.h>
+#include <linux/interrupt.h>
static inline void __smp_irq_work_interrupt(void)
{
@@ -16,14 +17,14 @@ static inline void __smp_irq_work_interrupt(void)
irq_work_run();
}
-__visible void smp_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
__smp_irq_work_interrupt();
exiting_irq();
}
-__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
trace_irq_work_entry(IRQ_WORK_VECTOR);
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index cb9c1ed..f73f475 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -132,10 +132,8 @@ int sched_set_itmt_support(void)
sysctl_sched_itmt_enabled = 1;
- if (sysctl_sched_itmt_enabled) {
- x86_topology_update = true;
- rebuild_sched_domains();
- }
+ x86_topology_update = true;
+ rebuild_sched_domains();
mutex_unlock(&itmt_update_mutex);
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index fc25f69..c37bd0f 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -32,8 +32,7 @@ static void bug_at(unsigned char *ip, int line)
* Something went wrong. Crash the box, as something could be
* corrupting the kernel.
*/
- pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
- ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
+ pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line);
BUG();
}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index eb35093..520b8df 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -745,7 +745,7 @@ __visible __used void *trampoline_handler(struct pt_regs *regs)
* will be the real return address, and all the rest will
* point to kretprobe_trampoline.
*/
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ hlist_for_each_entry(ri, head, hlist) {
if (ri->task != current)
/* another task is sharing our hash bucket */
continue;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 36bc664..14f65a5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -589,7 +589,8 @@ out:
local_irq_restore(flags);
}
-__visible bool __kvm_vcpu_is_preempted(int cpu)
+#ifdef CONFIG_X86_32
+__visible bool __kvm_vcpu_is_preempted(long cpu)
{
struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
@@ -597,6 +598,29 @@ __visible bool __kvm_vcpu_is_preempted(int cpu)
}
PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
+#else
+
+#include <asm/asm-offsets.h>
+
+extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
+
+/*
+ * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
+ * restoring to/from the stack.
+ */
+asm(
+".pushsection .text;"
+".global __raw_callee_save___kvm_vcpu_is_preempted;"
+".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
+"__raw_callee_save___kvm_vcpu_is_preempted:"
+"movq __per_cpu_offset(,%rdi,8), %rax;"
+"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
+"setne %al;"
+"ret;"
+".popsection");
+
+#endif
+
/*
* Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
*/
@@ -620,18 +644,4 @@ void __init kvm_spinlock_init(void)
}
}
-static __init int kvm_spinlock_init_jump(void)
-{
- if (!kvm_para_available())
- return 0;
- if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
- return 0;
-
- static_key_slow_inc(&paravirt_ticketlocks_enabled);
- printk(KERN_INFO "KVM setup paravirtual spinlock\n");
-
- return 0;
-}
-early_initcall(kvm_spinlock_init_jump);
-
#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 2a5cafd..bae6ea6 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
#include <asm/x86_init.h>
#include <asm/reboot.h>
+#include <asm/kvmclock.h>
static int kvmclock __ro_after_init = 1;
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
@@ -49,6 +50,7 @@ struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
{
return hv_clock;
}
+EXPORT_SYMBOL_GPL(pvclock_pvti_cpu0_va);
/*
* The wallclock is the time of day when we booted. Since then, some time may
@@ -107,12 +109,12 @@ static inline void kvm_sched_clock_init(bool stable)
{
if (!stable) {
pv_time_ops.sched_clock = kvm_clock_read;
+ clear_sched_clock_stable();
return;
}
kvm_sched_clock_offset = kvm_clock_read();
pv_time_ops.sched_clock = kvm_sched_clock_read;
- set_sched_clock_stable();
printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n",
kvm_sched_clock_offset);
@@ -174,13 +176,14 @@ bool kvm_check_and_clear_guest_paused(void)
return ret;
}
-static struct clocksource kvm_clock = {
+struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_get_cycles,
.rating = 400,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+EXPORT_SYMBOL_GPL(kvm_clock);
int kvm_register_clock(char *txt)
{
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 6d4bf81..8f2d1c9 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -20,7 +20,7 @@ bool pv_is_native_spin_unlock(void)
__raw_callee_save___native_queued_spin_unlock;
}
-__visible bool __native_vcpu_is_preempted(int cpu)
+__visible bool __native_vcpu_is_preempted(long cpu)
{
return false;
}
@@ -42,6 +42,3 @@ struct pv_lock_ops pv_lock_ops = {
#endif /* SMP */
};
EXPORT_SYMBOL(pv_lock_ops);
-
-struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
-EXPORT_SYMBOL(paravirt_ticketlocks_enabled);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index a1bfba0..4797e87 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -425,6 +425,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.pmd_clear = native_pmd_clear,
#endif
.set_pud = native_set_pud,
+ .set_pud_at = native_set_pud_at,
.pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 5d400ba..0c150c0 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -296,7 +296,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
/* were we called with bad_dma_address? */
badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE);
- if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) {
+ if (unlikely(dma_addr < badend)) {
WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
"address 0x%Lx\n", dma_addr);
return;
@@ -478,7 +478,7 @@ static void calgary_free_coherent(struct device *dev, size_t size,
free_pages((unsigned long)vaddr, get_order(size));
}
-static struct dma_map_ops calgary_dma_ops = {
+static const struct dma_map_ops calgary_dma_ops = {
.alloc = calgary_alloc_coherent,
.free = calgary_free_coherent,
.map_sg = calgary_map_sg,
@@ -1177,7 +1177,7 @@ static int __init calgary_init(void)
tbl = find_iommu_table(&dev->dev);
if (translation_enabled(tbl))
- dev->dev.archdata.dma_ops = &calgary_dma_ops;
+ dev->dev.dma_ops = &calgary_dma_ops;
}
return ret;
@@ -1201,7 +1201,7 @@ error:
calgary_disable_translation(dev);
calgary_free_bus(dev);
pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
- dev->dev.archdata.dma_ops = NULL;
+ dev->dev.dma_ops = NULL;
} while (1);
return ret;
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index d30c377..3a216ec 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -17,7 +17,7 @@
static int forbid_dac __read_mostly;
-struct dma_map_ops *dma_ops = &nommu_dma_ops;
+const struct dma_map_ops *dma_ops = &nommu_dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -91,7 +91,8 @@ again:
page = NULL;
/* CMA can be used only in the context which permits sleeping */
if (gfpflags_allow_blocking(flag)) {
- page = dma_alloc_from_contiguous(dev, count, get_order(size));
+ page = dma_alloc_from_contiguous(dev, count, get_order(size),
+ flag);
if (page && page_to_phys(page) + size > dma_mask) {
dma_release_from_contiguous(dev, page, count);
page = NULL;
@@ -214,7 +215,7 @@ early_param("iommu", iommu_setup);
int dma_supported(struct device *dev, u64 mask)
{
- struct dma_map_ops *ops = get_dma_ops(dev);
+ const struct dma_map_ops *ops = get_dma_ops(dev);
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 00e71ce..a88952e 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -88,7 +88,7 @@ static void nommu_sync_sg_for_device(struct device *dev,
flush_write_buffers();
}
-struct dma_map_ops nommu_dma_ops = {
+const struct dma_map_ops nommu_dma_ops = {
.alloc = dma_generic_alloc_coherent,
.free = dma_generic_free_coherent,
.map_sg = nommu_map_sg,
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 410efb2..1e23577 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -45,7 +45,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size,
dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
}
-static struct dma_map_ops swiotlb_dma_ops = {
+static const struct dma_map_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc = x86_swiotlb_alloc_coherent,
.free = x86_swiotlb_free_coherent,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b615a11..7780efa 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
#include <asm/mce.h>
#include <asm/vm86.h>
#include <asm/switch_to.h>
+#include <asm/desc.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -64,6 +65,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
+DEFINE_PER_CPU(bool, need_tr_refresh);
+EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh);
+
/*
* this gets called so that we can store lazy state into memory and copy the
* current task into the new thread.
@@ -209,6 +213,12 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
+
+ /*
+ * Make sure that the TSS limit is correct for the CPU
+ * to notice the IO bitmap.
+ */
+ refresh_TR();
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
/*
* Clear any possible leftover bits:
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a1114ff..cab13f7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -552,7 +552,9 @@ static void __init reserve_crashkernel(void)
/* 0 means: find the address automatically */
if (crash_base <= 0) {
/*
- * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
+ * Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
+ * as old kexec-tools loads bzImage below that, unless
+ * "crashkernel=size[KMG],high" is specified.
*/
crash_base = memblock_find_in_range(CRASH_ALIGN,
high ? CRASH_ADDR_HIGH_MAX
@@ -1152,6 +1154,20 @@ void __init setup_arch(char **cmdline_p)
/* Allocate bigger log buffer */
setup_log_buf(1);
+ if (efi_enabled(EFI_BOOT)) {
+ switch (boot_params.secure_boot) {
+ case efi_secureboot_mode_disabled:
+ pr_info("Secure boot disabled\n");
+ break;
+ case efi_secureboot_mode_enabled:
+ pr_info("Secure boot enabled\n");
+ break;
+ default:
+ pr_info("Secure boot could not be determined\n");
+ break;
+ }
+ }
+
reserve_initrd();
acpi_table_upgrade();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 68f8cc2..d3c66a1 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -259,7 +259,7 @@ static inline void __smp_reschedule_interrupt(void)
scheduler_ipi();
}
-__visible void smp_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
__smp_reschedule_interrupt();
@@ -268,7 +268,7 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs)
*/
}
-__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs)
{
/*
* Need to call irq_enter() before calling the trace point.
@@ -292,14 +292,15 @@ static inline void __smp_call_function_interrupt(void)
inc_irq_stat(irq_call_count);
}
-__visible void smp_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
__smp_call_function_interrupt();
exiting_irq();
}
-__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
@@ -314,14 +315,16 @@ static inline void __smp_call_function_single_interrupt(void)
inc_irq_stat(irq_call_count);
}
-__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_call_function_single_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
__smp_call_function_single_interrupt();
exiting_irq();
}
-__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+__visible void __irq_entry
+smp_trace_call_function_single_interrupt(struct pt_regs *regs)
{
ipi_entering_ack_irq();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 46732dc..a0d3868 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -433,9 +433,15 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
- per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
- c->cpu_core_id == o->cpu_core_id)
- return topology_sane(c, o, "smt");
+ per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
+ if (c->cpu_core_id == o->cpu_core_id)
+ return topology_sane(c, o, "smt");
+
+ if ((c->cu_id != 0xff) &&
+ (o->cu_id != 0xff) &&
+ (c->cu_id == o->cu_id))
+ return topology_sane(c, o, "smt");
+ }
} else if (c->phys_proc_id == o->phys_proc_id &&
c->cpu_core_id == o->cpu_core_id) {
@@ -1341,8 +1347,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
pr_info("CPU0: ");
print_cpu_info(&cpu_data(0));
- if (is_uv_system())
- uv_system_init();
+ uv_system_init();
set_mtrr_aps_delayed_init();
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
deleted file mode 100644
index a3b875c..0000000
--- a/arch/x86/kernel/test_nx.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * test_nx.c: functional test for NX functionality
- *
- * (C) Copyright 2008 Intel Corporation
- * Author: Arjan van de Ven <arjan@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <linux/module.h>
-#include <linux/sort.h>
-#include <linux/slab.h>
-
-#include <linux/uaccess.h>
-#include <asm/asm.h>
-
-extern int rodata_test_data;
-
-/*
- * This file checks 4 things:
- * 1) Check if the stack is not executable
- * 2) Check if kmalloc memory is not executable
- * 3) Check if the .rodata section is not executable
- * 4) Check if the .data section of a module is not executable
- *
- * To do this, the test code tries to execute memory in stack/kmalloc/etc,
- * and then checks if the expected trap happens.
- *
- * Sadly, this implies having a dynamic exception handling table entry.
- * ... which can be done (and will make Rusty cry)... but it can only
- * be done in a stand-alone module with only 1 entry total.
- * (otherwise we'd have to sort and that's just too messy)
- */
-
-
-
-/*
- * We want to set up an exception handling point on our stack,
- * which means a variable value. This function is rather dirty
- * and walks the exception table of the module, looking for a magic
- * marker and replaces it with a specific function.
- */
-static void fudze_exception_table(void *marker, void *new)
-{
- struct module *mod = THIS_MODULE;
- struct exception_table_entry *extable;
-
- /*
- * Note: This module has only 1 exception table entry,
- * so searching and sorting is not needed. If that changes,
- * this would be the place to search and re-sort the exception
- * table.
- */
- if (mod->num_exentries > 1) {
- printk(KERN_ERR "test_nx: too many exception table entries!\n");
- printk(KERN_ERR "test_nx: test results are not reliable.\n");
- return;
- }
- extable = (struct exception_table_entry *)mod->extable;
- extable[0].insn = (unsigned long)new;
-}
-
-
-/*
- * exception tables get their symbols translated so we need
- * to use a fake function to put in there, which we can then
- * replace at runtime.
- */
-void foo_label(void);
-
-/*
- * returns 0 for not-executable, negative for executable
- *
- * Note: we cannot allow this function to be inlined, because
- * that would give us more than 1 exception table entry.
- * This in turn would break the assumptions above.
- */
-static noinline int test_address(void *address)
-{
- unsigned long result;
-
- /* Set up an exception table entry for our address */
- fudze_exception_table(&foo_label, address);
- result = 1;
- asm volatile(
- "foo_label:\n"
- "0: call *%[fake_code]\n"
- "1:\n"
- ".section .fixup,\"ax\"\n"
- "2: mov %[zero], %[rslt]\n"
- " ret\n"
- ".previous\n"
- _ASM_EXTABLE(0b,2b)
- : [rslt] "=r" (result)
- : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result)
- );
- /* change the exception table back for the next round */
- fudze_exception_table(address, &foo_label);
-
- if (result)
- return -ENODEV;
- return 0;
-}
-
-static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */
-
-static int test_NX(void)
-{
- int ret = 0;
- /* 0xC3 is the opcode for "ret" */
- char stackcode[] = {0xC3, 0x90, 0 };
- char *heap;
-
- test_data = 0xC3;
-
- printk(KERN_INFO "Testing NX protection\n");
-
- /* Test 1: check if the stack is not executable */
- if (test_address(&stackcode)) {
- printk(KERN_ERR "test_nx: stack was executable\n");
- ret = -ENODEV;
- }
-
-
- /* Test 2: Check if the heap is executable */
- heap = kmalloc(64, GFP_KERNEL);
- if (!heap)
- return -ENOMEM;
- heap[0] = 0xC3; /* opcode for "ret" */
-
- if (test_address(heap)) {
- printk(KERN_ERR "test_nx: heap was executable\n");
- ret = -ENODEV;
- }
- kfree(heap);
-
- /*
- * The following 2 tests currently fail, this needs to get fixed
- * Until then, don't run them to avoid too many people getting scared
- * by the error message
- */
-
- /* Test 3: Check if the .rodata section is executable */
- if (rodata_test_data != 0xC3) {
- printk(KERN_ERR "test_nx: .rodata marker has invalid value\n");
- ret = -ENODEV;
- } else if (test_address(&rodata_test_data)) {
- printk(KERN_ERR "test_nx: .rodata section is executable\n");
- ret = -ENODEV;
- }
-
-#if 0
- /* Test 4: Check if the .data section of a module is executable */
- if (test_address(&test_data)) {
- printk(KERN_ERR "test_nx: .data section is executable\n");
- ret = -ENODEV;
- }
-
-#endif
- return ret;
-}
-
-static void test_exit(void)
-{
-}
-
-module_init(test_NX);
-module_exit(test_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Testcase for the NX infrastructure");
-MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
deleted file mode 100644
index 222e84e..0000000
--- a/arch/x86/kernel/test_rodata.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * test_rodata.c: functional test for mark_rodata_ro function
- *
- * (C) Copyright 2008 Intel Corporation
- * Author: Arjan van de Ven <arjan@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-#include <asm/cacheflush.h>
-#include <asm/sections.h>
-#include <asm/asm.h>
-
-int rodata_test(void)
-{
- unsigned long result;
- unsigned long start, end;
-
- /* test 1: read the value */
- /* If this test fails, some previous testrun has clobbered the state */
- if (!rodata_test_data) {
- printk(KERN_ERR "rodata_test: test 1 fails (start data)\n");
- return -ENODEV;
- }
-
- /* test 2: write to the variable; this should fault */
- /*
- * If this test fails, we managed to overwrite the data
- *
- * This is written in assembly to be able to catch the
- * exception that is supposed to happen in the correct
- * case
- */
-
- result = 1;
- asm volatile(
- "0: mov %[zero],(%[rodata_test])\n"
- " mov %[zero], %[rslt]\n"
- "1:\n"
- ".section .fixup,\"ax\"\n"
- "2: jmp 1b\n"
- ".previous\n"
- _ASM_EXTABLE(0b,2b)
- : [rslt] "=r" (result)
- : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL)
- );
-
-
- if (!result) {
- printk(KERN_ERR "rodata_test: test data was not read only\n");
- return -ENODEV;
- }
-
- /* test 3: check the value hasn't changed */
- /* If this test fails, we managed to overwrite the data */
- if (!rodata_test_data) {
- printk(KERN_ERR "rodata_test: Test 3 fails (end data)\n");
- return -ENODEV;
- }
- /* test 4: check if the rodata section is 4Kb aligned */
- start = (unsigned long)__start_rodata;
- end = (unsigned long)__end_rodata;
- if (start & (PAGE_SIZE - 1)) {
- printk(KERN_ERR "rodata_test: .rodata is not 4k aligned\n");
- return -ENODEV;
- }
- if (end & (PAGE_SIZE - 1)) {
- printk(KERN_ERR "rodata_test: .rodata end is not 4k aligned\n");
- return -ENODEV;
- }
-
- return 0;
-}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bf0c6d0..1dc86ee 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -563,11 +563,9 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
- preempt_disable();
cond_local_irq_enable(regs);
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
cond_local_irq_disable(regs);
- preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
ist_exit(regs);
@@ -742,14 +740,12 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */
- preempt_disable();
cond_local_irq_enable(regs);
if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
cond_local_irq_disable(regs);
- preempt_enable_no_resched();
debug_stack_usage_dec();
goto exit;
}
@@ -769,7 +765,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
cond_local_irq_disable(regs);
- preempt_enable_no_resched();
debug_stack_usage_dec();
exit:
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e41af59..2724dc8 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1107,6 +1107,16 @@ static u64 read_tsc(struct clocksource *cs)
return (u64)rdtsc_ordered();
}
+static void tsc_cs_mark_unstable(struct clocksource *cs)
+{
+ if (tsc_unstable)
+ return;
+ tsc_unstable = 1;
+ clear_sched_clock_stable();
+ disable_sched_clock_irqtime();
+ pr_info("Marking TSC unstable due to clocksource watchdog\n");
+}
+
/*
* .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
*/
@@ -1119,6 +1129,7 @@ static struct clocksource clocksource_tsc = {
CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
+ .mark_unstable = tsc_cs_mark_unstable,
};
void mark_tsc_unstable(char *reason)
@@ -1356,6 +1367,9 @@ void __init tsc_init(void)
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
+ /* Sanitize TSC ADJUST before cyc2ns gets initialized */
+ tsc_store_and_check_tsc_adjust(true);
+
/*
* Secondary CPUs do not run through tsc_init(), so set up
* all the scale factors for all CPUs, assuming the same
@@ -1386,8 +1400,6 @@ void __init tsc_init(void)
if (unsynchronized_tsc())
mark_tsc_unstable("TSCs unsynchronized");
- else
- tsc_store_and_check_tsc_adjust(true);
check_system_tsc_reliable();
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index d0db011..728f753 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -286,13 +286,6 @@ void check_tsc_sync_source(int cpu)
if (unsynchronized_tsc())
return;
- if (tsc_clocksource_reliable) {
- if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
- pr_info(
- "Skipped synchronization checks as TSC is reliable.\n");
- return;
- }
-
/*
* Set the maximum number of test runs to
* 1 if the CPU does not provide the TSC_ADJUST MSR
@@ -380,14 +373,19 @@ void check_tsc_sync_target(void)
int cpus = 2;
/* Also aborts if there is no TSC. */
- if (unsynchronized_tsc() || tsc_clocksource_reliable)
+ if (unsynchronized_tsc())
return;
/*
* Store, verify and sanitize the TSC adjust register. If
* successful skip the test.
+ *
+ * The test is also skipped when the TSC is marked reliable. This
+ * is true for SoCs which have no fallback clocksource. On these
+ * SoCs the TSC is frequency synchronized, but still the TSC ADJUST
+ * register might have been wreckaged by the BIOS..
*/
- if (tsc_store_and_check_tsc_adjust(false)) {
+ if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) {
atomic_inc(&skip_test);
return;
}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index ec5d754..0442d98 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -160,11 +160,12 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
static void mark_screen_rdonly(struct mm_struct *mm)
{
+ struct vm_area_struct *vma;
+ spinlock_t *ptl;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- spinlock_t *ptl;
int i;
down_write(&mm->mmap_sem);
@@ -177,7 +178,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
pmd = pmd_offset(pud, 0xA0000);
if (pmd_trans_huge(*pmd)) {
- struct vm_area_struct *vma = find_vma(mm, 0xA0000);
+ vma = find_vma(mm, 0xA0000);
split_huge_pmd(vma, pmd, 0xA0000);
}
if (pmd_none_or_clear_bad(pmd))
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e79f15f..ad0118f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -346,6 +346,7 @@ SECTIONS
/DISCARD/ : {
*(.eh_frame)
*(__func_stack_frame_non_standard)
+ *(__unreachable)
}
}
OpenPOWER on IntegriCloud