summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2013-11-14 17:38:05 -0800
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2013-11-14 17:38:05 -0800
commit42249094f79422fbf5ed4b54eeb48ff096809b8f (patch)
tree91e6850c8c7e8cc284cf8bb6363f8662f84011f4 /arch/x86/kernel
parent936816161978ca716a56c5e553c68f25972b1e3a (diff)
parent2c027b7c48a888ab173ba45babb4525e278375d9 (diff)
downloadop-kernel-dev-42249094f79422fbf5ed4b54eeb48ff096809b8f.zip
op-kernel-dev-42249094f79422fbf5ed4b54eeb48ff096809b8f.tar.gz
Merge branch 'next' into for-linus
Merge first round of changes for 3.13 merge window.
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile9
-rw-r--r--arch/x86/kernel/acpi/boot.c38
-rw-r--r--arch/x86/kernel/acpi/sleep.c22
-rw-r--r--arch/x86/kernel/acpi/sleep.h2
-rw-r--r--arch/x86/kernel/alternative.c155
-rw-r--r--arch/x86/kernel/amd_nb.c13
-rw-r--r--arch/x86/kernel/apic/apic.c103
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c1
-rw-r--r--arch/x86/kernel/apic/io_apic.c14
-rw-r--r--arch/x86/kernel/apic/numaq_32.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c82
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/amd.c59
-rw-r--r--arch/x86/kernel/cpu/bugs.c21
-rw-r--r--arch/x86/kernel/cpu/centaur.c26
-rw-r--r--arch/x86/kernel/cpu/common.c107
-rw-r--r--arch/x86/kernel/cpu/cyrix.c42
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c17
-rw-r--r--arch/x86/kernel/cpu/intel.c30
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c107
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c56
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c54
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c139
-rw-r--r--arch/x86/kernel/cpu/mcheck/threshold.c24
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c25
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c87
-rw-r--r--arch/x86/kernel/cpu/perf_event.c85
-rw-r--r--arch/x86/kernel/cpu/perf_event.h26
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c37
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.c502
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_iommu.h40
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_uncore.c31
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c321
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c211
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c69
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c308
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.h14
-rw-r--r--arch/x86/kernel/cpu/powerflags.c8
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/cpu/rdrand.c2
-rw-r--r--arch/x86/kernel/cpu/scattered.c4
-rw-r--r--arch/x86/kernel/cpu/topology.c2
-rw-r--r--arch/x86/kernel/cpu/transmeta.c6
-rw-r--r--arch/x86/kernel/cpu/umc.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c10
-rw-r--r--arch/x86/kernel/cpuid.c7
-rw-r--r--arch/x86/kernel/crash.c4
-rw-r--r--arch/x86/kernel/devicetree.c9
-rw-r--r--arch/x86/kernel/doublefault.c (renamed from arch/x86/kernel/doublefault_32.c)15
-rw-r--r--arch/x86/kernel/e820.c5
-rw-r--r--arch/x86/kernel/early-quirks.c168
-rw-r--r--arch/x86/kernel/entry_32.S15
-rw-r--r--arch/x86/kernel/entry_64.S53
-rw-r--r--arch/x86/kernel/head32.c2
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S24
-rw-r--r--arch/x86/kernel/head_64.S9
-rw-r--r--arch/x86/kernel/hw_breakpoint.c3
-rw-r--r--arch/x86/kernel/i387.c69
-rw-r--r--arch/x86/kernel/irq.c37
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_work.c24
-rw-r--r--arch/x86/kernel/jump_label.c84
-rw-r--r--arch/x86/kernel/kprobes/common.h5
-rw-r--r--arch/x86/kernel/kprobes/core.c18
-rw-r--r--arch/x86/kernel/kprobes/opt.c115
-rw-r--r--arch/x86/kernel/kvm.c278
-rw-r--r--arch/x86/kernel/kvmclock.c12
-rw-r--r--arch/x86/kernel/microcode_amd.c148
-rw-r--r--arch/x86/kernel/microcode_amd_early.c301
-rw-r--r--arch/x86/kernel/microcode_core.c2
-rw-r--r--arch/x86/kernel/microcode_core_early.c55
-rw-r--r--arch/x86/kernel/microcode_intel_early.c32
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c12
-rw-r--r--arch/x86/kernel/msr.c6
-rw-r--r--arch/x86/kernel/nmi.c37
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c18
-rw-r--r--arch/x86/kernel/paravirt.c9
-rw-r--r--arch/x86/kernel/process.c16
-rw-r--r--arch/x86/kernel/process_32.c15
-rw-r--r--arch/x86/kernel/process_64.c15
-rw-r--r--arch/x86/kernel/ptrace.c204
-rw-r--r--arch/x86/kernel/pvclock.c44
-rw-r--r--arch/x86/kernel/reboot.c135
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S34
-rw-r--r--arch/x86/kernel/rtc.c17
-rw-r--r--arch/x86/kernel/setup.c31
-rw-r--r--arch/x86/kernel/signal.c28
-rw-r--r--arch/x86/kernel/smp.c72
-rw-r--r--arch/x86/kernel/smpboot.c39
-rw-r--r--arch/x86/kernel/sys_x86_64.c2
-rw-r--r--arch/x86/kernel/syscall_32.c2
-rw-r--r--arch/x86/kernel/syscall_64.c5
-rw-r--r--arch/x86/kernel/sysfb.c74
-rw-r--r--arch/x86/kernel/sysfb_efi.c214
-rw-r--r--arch/x86/kernel/sysfb_simplefb.c95
-rw-r--r--arch/x86/kernel/tboot.c89
-rw-r--r--arch/x86/kernel/tracepoint.c59
-rw-r--r--arch/x86/kernel/traps.c23
-rw-r--r--arch/x86/kernel/tsc.c10
-rw-r--r--arch/x86/kernel/tsc_sync.c18
-rw-r--r--arch/x86/kernel/vsyscall_64.c6
-rw-r--r--arch/x86/kernel/x86_init.c28
-rw-r--r--arch/x86/kernel/xsave.c9
118 files changed, 4356 insertions, 1501 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 7bd3bd3..a5408b9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -16,6 +16,8 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
+CFLAGS_irq.o := -I$(src)/../include/asm/trace
+
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
@@ -67,7 +69,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
+obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_VM86) += vm86_32.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
@@ -93,6 +95,7 @@ obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
+obj-$(CONFIG_MICROCODE_AMD_EARLY) += microcode_amd_early.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
@@ -100,8 +103,12 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_OF) += devicetree.o
obj-$(CONFIG_UPROBES) += uprobes.o
+obj-y += sysfb.o
+obj-$(CONFIG_X86_SYSFB) += sysfb_simplefb.o
+obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
+obj-$(CONFIG_TRACING) += tracepoint.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 230c8ea..40c7660 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -44,6 +44,7 @@
#include <asm/mpspec.h>
#include <asm/smp.h>
+#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
u32 acpi_rsdt_forced;
int acpi_disabled;
@@ -66,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
+int acpi_disable_cmcff;
u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
@@ -140,16 +142,8 @@ static u32 irq_to_gsi(int irq)
}
/*
- * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
- * to map the target physical address. The problem is that set_fixmap()
- * provides a single page, and it is possible that the page is not
- * sufficient.
- * By using this area, we can map up to MAX_IO_APICS pages temporarily,
- * i.e. until the next __va_range() call.
- *
- * Important Safety Note: The fixed I/O APIC page numbers are *subtracted*
- * from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
- * count idx down while incrementing the phys address.
+ * This is just a simple wrapper around early_ioremap(),
+ * with sanity checks for phys == 0 and size == 0.
*/
char *__init __acpi_map_table(unsigned long phys, unsigned long size)
{
@@ -159,6 +153,7 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
return early_ioremap(phys, size);
}
+
void __init __acpi_unmap_table(char *map, unsigned long size)
{
if (!map || !size)
@@ -194,11 +189,11 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
return 0;
}
-static void __cpuinit acpi_register_lapic(int id, u8 enabled)
+static void acpi_register_lapic(int id, u8 enabled)
{
unsigned int ver = 0;
- if (id >= (MAX_LOCAL_APIC-1)) {
+ if (id >= MAX_LOCAL_APIC) {
printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
return;
}
@@ -559,6 +554,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity) = acpi_register_gsi_pic;
+#ifdef CONFIG_ACPI_SLEEP
+int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
+#else
+int (*acpi_suspend_lowlevel)(void);
+#endif
+
/*
* success: return IRQ number (>=0)
* failure: return < 0
@@ -600,7 +601,7 @@ void __init acpi_set_irq_model_ioapic(void)
#ifdef CONFIG_ACPI_HOTPLUG_CPU
#include <acpi/processor.h>
-static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
{
#ifdef CONFIG_ACPI_NUMA
int nid;
@@ -613,7 +614,7 @@ static void __cpuinit acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
#endif
}
-static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
+static int _acpi_map_lsapic(acpi_handle handle, int *pcpu)
{
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *obj;
@@ -1113,6 +1114,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
int ioapic;
int ioapic_pin;
struct io_apic_irq_attr irq_attr;
+ int ret;
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
return gsi;
@@ -1142,7 +1144,9 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
- io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
+ ret = io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr);
+ if (ret < 0)
+ gsi = INT_MIN;
return gsi;
}
@@ -1619,6 +1623,10 @@ static int __init parse_acpi(char *arg)
/* "acpi=copy_dsdt" copys DSDT */
else if (strcmp(arg, "copy_dsdt") == 0) {
acpi_gbl_copy_dsdt_locally = 1;
+ }
+ /* "acpi=nocmcff" disables FF mode for corrected errors */
+ else if (strcmp(arg, "nocmcff") == 0) {
+ acpi_disable_cmcff = 1;
} else {
/* Core will printk when we return error. */
return -EINVAL;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index b44577b..3312010 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -26,12 +26,12 @@ static char temp_stack[4096];
#endif
/**
- * acpi_suspend_lowlevel - save kernel state
+ * x86_acpi_suspend_lowlevel - save kernel state
*
* Create an identity mapped page table and copy the wakeup routine to
* low memory.
*/
-int acpi_suspend_lowlevel(void)
+int x86_acpi_suspend_lowlevel(void)
{
struct wakeup_header *header =
(struct wakeup_header *) __va(real_mode_header->wakeup_header);
@@ -48,9 +48,20 @@ int acpi_suspend_lowlevel(void)
#ifndef CONFIG_64BIT
native_store_gdt((struct desc_ptr *)&header->pmode_gdt);
+ /*
+ * We have to check that we can write back the value, and not
+ * just read it. At least on 90 nm Pentium M (Family 6, Model
+ * 13), reading an invalid MSR is not guaranteed to trap, see
+ * Erratum X4 in "Intel Pentium M Processor on 90 nm Process
+ * with 2-MB L2 Cache and Intel® Processor A100 and A110 on 90
+ * nm process with 512-KB L2 Cache Specification Update".
+ */
if (!rdmsr_safe(MSR_EFER,
&header->pmode_efer_low,
- &header->pmode_efer_high))
+ &header->pmode_efer_high) &&
+ !wrmsr_safe(MSR_EFER,
+ header->pmode_efer_low,
+ header->pmode_efer_high))
header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_EFER);
#endif /* !CONFIG_64BIT */
@@ -61,7 +72,10 @@ int acpi_suspend_lowlevel(void)
}
if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
&header->pmode_misc_en_low,
- &header->pmode_misc_en_high))
+ &header->pmode_misc_en_high) &&
+ !wrmsr_safe(MSR_IA32_MISC_ENABLE,
+ header->pmode_misc_en_low,
+ header->pmode_misc_en_high))
header->pmode_behavior |=
(1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE);
header->realmode_flags = acpi_realmode_flags;
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 67f59f8..c9c2c98 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -15,3 +15,5 @@ extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
extern void do_suspend_lowlevel(void);
+
+extern int x86_acpi_suspend_lowlevel(void);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index c15cf9a..15e8563 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
#include <linux/memory.h>
#include <linux/stop_machine.h>
#include <linux/slab.h>
+#include <linux/kdebug.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -596,97 +597,93 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
return addr;
}
-/*
- * Cross-modifying kernel text with stop_machine().
- * This code originally comes from immediate value.
- */
-static atomic_t stop_machine_first;
-static int wrote_text;
+static void do_sync_core(void *info)
+{
+ sync_core();
+}
-struct text_poke_params {
- struct text_poke_param *params;
- int nparams;
-};
+static bool bp_patching_in_progress;
+static void *bp_int3_handler, *bp_int3_addr;
-static int __kprobes stop_machine_text_poke(void *data)
+int poke_int3_handler(struct pt_regs *regs)
{
- struct text_poke_params *tpp = data;
- struct text_poke_param *p;
- int i;
+ /* bp_patching_in_progress */
+ smp_rmb();
- if (atomic_xchg(&stop_machine_first, 0)) {
- for (i = 0; i < tpp->nparams; i++) {
- p = &tpp->params[i];
- text_poke(p->addr, p->opcode, p->len);
- }
- smp_wmb(); /* Make sure other cpus see that this has run */
- wrote_text = 1;
- } else {
- while (!wrote_text)
- cpu_relax();
- smp_mb(); /* Load wrote_text before following execution */
- }
+ if (likely(!bp_patching_in_progress))
+ return 0;
- for (i = 0; i < tpp->nparams; i++) {
- p = &tpp->params[i];
- flush_icache_range((unsigned long)p->addr,
- (unsigned long)p->addr + p->len);
- }
- /*
- * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
- * that a core serializing instruction such as "cpuid" should be
- * executed on _each_ core before the new instruction is made visible.
- */
- sync_core();
- return 0;
-}
+ if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+ return 0;
+
+ /* set up the specified breakpoint handler */
+ regs->ip = (unsigned long) bp_int3_handler;
+
+ return 1;
-/**
- * text_poke_smp - Update instructions on a live kernel on SMP
- * @addr: address to modify
- * @opcode: source of the copy
- * @len: length to copy
- *
- * Modify multi-byte instruction by using stop_machine() on SMP. This allows
- * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
- * should be allowed, since stop_machine() does _not_ protect code against
- * NMI and MCE.
- *
- * Note: Must be called under get_online_cpus() and text_mutex.
- */
-void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
-{
- struct text_poke_params tpp;
- struct text_poke_param p;
-
- p.addr = addr;
- p.opcode = opcode;
- p.len = len;
- tpp.params = &p;
- tpp.nparams = 1;
- atomic_set(&stop_machine_first, 1);
- wrote_text = 0;
- /* Use __stop_machine() because the caller already got online_cpus. */
- __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
- return addr;
}
/**
- * text_poke_smp_batch - Update instructions on a live kernel on SMP
- * @params: an array of text_poke parameters
- * @n: the number of elements in params.
+ * text_poke_bp() -- update instructions on live kernel on SMP
+ * @addr: address to patch
+ * @opcode: opcode of new instruction
+ * @len: length to copy
+ * @handler: address to jump to when the temporary breakpoint is hit
*
- * Modify multi-byte instruction by using stop_machine() on SMP. Since the
- * stop_machine() is heavy task, it is better to aggregate text_poke requests
- * and do it once if possible.
+ * Modify multi-byte instruction by using int3 breakpoint on SMP.
+ * We completely avoid stop_machine() here, and achieve the
+ * synchronization using int3 breakpoint.
*
- * Note: Must be called under get_online_cpus() and text_mutex.
+ * The way it is done:
+ * - add a int3 trap to the address that will be patched
+ * - sync cores
+ * - update all but the first byte of the patched range
+ * - sync cores
+ * - replace the first byte (int3) by the first byte of
+ * replacing opcode
+ * - sync cores
+ *
+ * Note: must be called under text_mutex.
*/
-void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
{
- struct text_poke_params tpp = {.params = params, .nparams = n};
+ unsigned char int3 = 0xcc;
+
+ bp_int3_handler = handler;
+ bp_int3_addr = (u8 *)addr + sizeof(int3);
+ bp_patching_in_progress = true;
+ /*
+ * Corresponding read barrier in int3 notifier for
+ * making sure the in_progress flags is correctly ordered wrt.
+ * patching
+ */
+ smp_wmb();
+
+ text_poke(addr, &int3, sizeof(int3));
- atomic_set(&stop_machine_first, 1);
- wrote_text = 0;
- __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
+ on_each_cpu(do_sync_core, NULL, 1);
+
+ if (len - sizeof(int3) > 0) {
+ /* patch all but the first byte */
+ text_poke((char *)addr + sizeof(int3),
+ (const char *) opcode + sizeof(int3),
+ len - sizeof(int3));
+ /*
+ * According to Intel, this core syncing is very likely
+ * not necessary and we'd be safe even without it. But
+ * better safe than sorry (plus there's not only Intel).
+ */
+ on_each_cpu(do_sync_core, NULL, 1);
+ }
+
+ /* patch the first byte */
+ text_poke(addr, opcode, sizeof(int3));
+
+ on_each_cpu(do_sync_core, NULL, 1);
+
+ bp_patching_in_progress = false;
+ smp_wmb();
+
+ return addr;
}
+
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3048ded..59554dc 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{}
};
@@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{}
};
@@ -81,13 +83,20 @@ int amd_cache_northbridges(void)
next_northbridge(misc, amd_nb_misc_ids);
node_to_amd_nb(i)->link = link =
next_northbridge(link, amd_nb_link_ids);
- }
+ }
+ /* GART present only on Fam15h upto model 0fh */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
- boot_cpu_data.x86 == 0x15)
+ (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
amd_northbridges.flags |= AMD_NB_GART;
/*
+ * Check for L3 cache presence.
+ */
+ if (!cpuid_edx(0x80000006))
+ return 0;
+
+ /*
* Some CPU families support L3 Cache Index Disable. There are some
* limitations because of E382 and E388 on family 0x10.
*/
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 904611b..a7eb82d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -35,6 +35,7 @@
#include <linux/smp.h>
#include <linux/mm.h>
+#include <asm/trace/irq_vectors.h>
#include <asm/irq_remapping.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
@@ -57,7 +58,7 @@
unsigned int num_processors;
-unsigned disabled_cpus __cpuinitdata;
+unsigned disabled_cpus;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
@@ -543,7 +544,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
* Setup the local APIC timer for this CPU. Copy the initialized values
* of the boot CPU and register the clock event in the framework.
*/
-static void __cpuinit setup_APIC_timer(void)
+static void setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
@@ -865,7 +866,7 @@ void __init setup_boot_APIC_clock(void)
setup_APIC_timer();
}
-void __cpuinit setup_secondary_APIC_clock(void)
+void setup_secondary_APIC_clock(void)
{
setup_APIC_timer();
}
@@ -912,24 +913,42 @@ static void local_apic_timer_interrupt(void)
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
-void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
/*
* NOTE! We'd better ACK the irq immediately,
* because timer handling can be slow.
+ *
+ * update_process_times() expects us to have done irq_enter().
+ * Besides, if we don't timer interrupts ignore the global
+ * interrupt lock, which is the WrongThing (tm) to do.
*/
- ack_APIC_irq();
+ entering_ack_irq();
+ local_apic_timer_interrupt();
+ exiting_irq();
+
+ set_irq_regs(old_regs);
+}
+
+__visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
/*
+ * NOTE! We'd better ACK the irq immediately,
+ * because timer handling can be slow.
+ *
* update_process_times() expects us to have done irq_enter().
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
- irq_enter();
- exit_idle();
+ entering_ack_irq();
+ trace_local_timer_entry(LOCAL_TIMER_VECTOR);
local_apic_timer_interrupt();
- irq_exit();
+ trace_local_timer_exit(LOCAL_TIMER_VECTOR);
+ exiting_irq();
set_irq_regs(old_regs);
}
@@ -1210,7 +1229,7 @@ void __init init_bsp_APIC(void)
apic_write(APIC_LVT1, value);
}
-static void __cpuinit lapic_setup_esr(void)
+static void lapic_setup_esr(void)
{
unsigned int oldvalue, value, maxlvt;
@@ -1257,7 +1276,7 @@ static void __cpuinit lapic_setup_esr(void)
* Used to setup local APIC while initializing BSP or bringin up APs.
* Always called with preemption disabled.
*/
-void __cpuinit setup_local_APIC(void)
+void setup_local_APIC(void)
{
int cpu = smp_processor_id();
unsigned int value, queued;
@@ -1452,7 +1471,7 @@ void __cpuinit setup_local_APIC(void)
#endif
}
-void __cpuinit end_local_APIC_setup(void)
+void end_local_APIC_setup(void)
{
lapic_setup_esr();
@@ -1907,12 +1926,10 @@ int __init APIC_init_uniprocessor(void)
/*
* This interrupt should _never_ happen with our APIC/SMP architecture
*/
-void smp_spurious_interrupt(struct pt_regs *regs)
+static inline void __smp_spurious_interrupt(void)
{
u32 v;
- irq_enter();
- exit_idle();
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
@@ -1927,13 +1944,28 @@ void smp_spurious_interrupt(struct pt_regs *regs)
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
pr_info("spurious APIC interrupt on CPU#%d, "
"should never happen.\n", smp_processor_id());
- irq_exit();
+}
+
+__visible void smp_spurious_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ __smp_spurious_interrupt();
+ exiting_irq();
+}
+
+__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
+ __smp_spurious_interrupt();
+ trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
+ exiting_irq();
}
/*
* This interrupt should never happen with our APIC/SMP architecture
*/
-void smp_error_interrupt(struct pt_regs *regs)
+static inline void __smp_error_interrupt(struct pt_regs *regs)
{
u32 v0, v1;
u32 i = 0;
@@ -1948,8 +1980,6 @@ void smp_error_interrupt(struct pt_regs *regs)
"Illegal register address", /* APIC Error Bit 7 */
};
- irq_enter();
- exit_idle();
/* First tickle the hardware, only then report what went on. -- REW */
v0 = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
@@ -1970,7 +2000,22 @@ void smp_error_interrupt(struct pt_regs *regs)
apic_printk(APIC_DEBUG, KERN_CONT "\n");
- irq_exit();
+}
+
+__visible void smp_error_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ __smp_error_interrupt(regs);
+ exiting_irq();
+}
+
+__visible void smp_trace_error_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ trace_error_apic_entry(ERROR_APIC_VECTOR);
+ __smp_error_interrupt(regs);
+ trace_error_apic_exit(ERROR_APIC_VECTOR);
+ exiting_irq();
}
/**
@@ -2062,7 +2107,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
apic_write(APIC_LVT1, value);
}
-void __cpuinit generic_processor_info(int apicid, int version)
+void generic_processor_info(int apicid, int version)
{
int cpu, max = nr_cpu_ids;
bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2302,7 +2347,7 @@ static void lapic_resume(void)
apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+#if defined(CONFIG_X86_MCE_INTEL)
if (maxlvt >= 5)
apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
#endif
@@ -2332,7 +2377,7 @@ static struct syscore_ops lapic_syscore_ops = {
.suspend = lapic_suspend,
};
-static void __cpuinit apic_pm_activate(void)
+static void apic_pm_activate(void)
{
apic_pm_state.active = 1;
}
@@ -2357,7 +2402,7 @@ static void apic_pm_activate(void) { }
#ifdef CONFIG_X86_64
-static int __cpuinit apic_cluster_num(void)
+static int apic_cluster_num(void)
{
int i, clusters, zeros;
unsigned id;
@@ -2402,10 +2447,10 @@ static int __cpuinit apic_cluster_num(void)
return clusters;
}
-static int __cpuinitdata multi_checked;
-static int __cpuinitdata multi;
+static int multi_checked;
+static int multi;
-static int __cpuinit set_multi(const struct dmi_system_id *d)
+static int set_multi(const struct dmi_system_id *d)
{
if (multi)
return 0;
@@ -2414,7 +2459,7 @@ static int __cpuinit set_multi(const struct dmi_system_id *d)
return 0;
}
-static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
+static const struct dmi_system_id multi_dmi_table[] = {
{
.callback = set_multi,
.ident = "IBM System Summit2",
@@ -2426,7 +2471,7 @@ static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
{}
};
-static void __cpuinit dmi_check_multi(void)
+static void dmi_check_multi(void)
{
if (multi_checked)
return;
@@ -2443,7 +2488,7 @@ static void __cpuinit dmi_check_multi(void)
* multi-chassis.
* Use DMI to check them
*/
-__cpuinit int apic_is_clustered_box(void)
+int apic_is_clustered_box(void)
{
dmi_check_multi();
if (multi)
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 9a91109..3e67f9e 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -74,7 +74,7 @@ static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
return initial_apic_id >> index_msb;
}
-static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
union numachip_csr_g3_ext_irq_gen int_gen;
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 0874799..c552247 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -130,7 +130,7 @@ int es7000_plat;
*/
-static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
{
unsigned long vect = 0, psaival = 0;
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 31cb9ae..a698d71 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -9,6 +9,7 @@
*
*/
#include <asm/apic.h>
+#include <asm/nmi.h>
#include <linux/cpumask.h>
#include <linux/kdebug.h>
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9ed796c..e63a5bd 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1534,6 +1534,11 @@ void intel_ir_io_apic_print_entries(unsigned int apic,
}
}
+void ioapic_zap_locks(void)
+{
+ raw_spin_lock_init(&ioapic_lock);
+}
+
__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
{
union IO_APIC_reg_00 reg_00;
@@ -3375,12 +3380,15 @@ int io_apic_setup_irq_pin_once(unsigned int irq, int node,
{
unsigned int ioapic_idx = attr->ioapic, pin = attr->ioapic_pin;
int ret;
+ struct IO_APIC_route_entry orig_entry;
/* Avoid redundant programming */
if (test_bit(pin, ioapics[ioapic_idx].pin_programmed)) {
- pr_debug("Pin %d-%d already programmed\n",
- mpc_ioapic_id(ioapic_idx), pin);
- return 0;
+ pr_debug("Pin %d-%d already programmed\n", mpc_ioapic_id(ioapic_idx), pin);
+ orig_entry = ioapic_read_entry(attr->ioapic, pin);
+ if (attr->trigger == orig_entry.trigger && attr->polarity == orig_entry.polarity)
+ return 0;
+ return -EBUSY;
}
ret = io_apic_setup_irq_pin(irq, node, attr);
if (!ret)
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index d661ee9..1e42e8f 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -105,7 +105,7 @@ static void __init smp_dump_qct(void)
}
}
-void __cpuinit numaq_tsc_disable(void)
+void numaq_tsc_disable(void)
{
if (!found_numaq)
return;
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index c88baa4..140e29d 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -148,7 +148,7 @@ static void init_x2apic_ldr(void)
/*
* At CPU state changes, update the x2apic cluster sibling info.
*/
-static int __cpuinit
+static int
update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int this_cpu = (unsigned long)hcpu;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 794f6eb..1191ac1 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -25,6 +25,7 @@
#include <linux/kdebug.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
+#include <linux/reboot.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
@@ -36,7 +37,6 @@
#include <asm/ipi.h>
#include <asm/smp.h>
#include <asm/x86_init.h>
-#include <asm/emergency-restart.h>
#include <asm/nmi.h>
/* BMC sets a bit this MMR non-zero before sending an NMI */
@@ -51,6 +51,8 @@ DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
static u64 gru_start_paddr, gru_end_paddr;
+static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
+static u64 gru_dist_lmask, gru_dist_umask;
static union uvh_apicid uvh_apicid;
int uv_min_hub_revision_id;
EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
@@ -72,7 +74,20 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr)
static inline bool is_GRU_range(u64 start, u64 end)
{
- return start >= gru_start_paddr && end <= gru_end_paddr;
+ if (gru_dist_base) {
+ u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */
+ u64 sl = start & gru_dist_lmask; /* base offset bits */
+ u64 eu = end & gru_dist_umask;
+ u64 el = end & gru_dist_lmask;
+
+ /* Must reside completely within a single GRU range */
+ return (sl == gru_dist_base && el == gru_dist_base &&
+ su >= gru_first_node_paddr &&
+ su <= gru_last_node_paddr &&
+ eu == su);
+ } else {
+ return start >= gru_start_paddr && end <= gru_end_paddr;
+ }
}
static bool uv_is_untracked_pat_range(u64 start, u64 end)
@@ -194,7 +209,7 @@ EXPORT_SYMBOL_GPL(uv_possible_blades);
unsigned long sn_rtc_cycles_per_second;
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
-static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
#ifdef CONFIG_SMP
unsigned long val;
@@ -401,7 +416,7 @@ static struct apic __refdata apic_x2apic_uv_x = {
.safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
};
-static __cpuinit void set_x2apic_extra_bits(int pnode)
+static void set_x2apic_extra_bits(int pnode)
{
__this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
}
@@ -463,26 +478,63 @@ static __init void map_high(char *id, unsigned long base, int pshift,
pr_info("UV: Map %s_HI base address NULL\n", id);
return;
}
- pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
+ pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
init_extra_mapping_wb(paddr, bytes);
}
+static __init void map_gru_distributed(unsigned long c)
+{
+ union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+ u64 paddr;
+ unsigned long bytes;
+ int nid;
+
+ gru.v = c;
+ /* only base bits 42:28 relevant in dist mode */
+ gru_dist_base = gru.v & 0x000007fff0000000UL;
+ if (!gru_dist_base) {
+ pr_info("UV: Map GRU_DIST base address NULL\n");
+ return;
+ }
+ bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+ gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
+ gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
+ gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
+ for_each_online_node(nid) {
+ paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
+ gru_dist_base;
+ init_extra_mapping_wb(paddr, bytes);
+ gru_first_node_paddr = min(paddr, gru_first_node_paddr);
+ gru_last_node_paddr = max(paddr, gru_last_node_paddr);
+ }
+ /* Save upper (63:M) bits of address only for is_GRU_range */
+ gru_first_node_paddr &= gru_dist_umask;
+ gru_last_node_paddr &= gru_dist_umask;
+ pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n",
+ gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
+}
+
static __init void map_gru_high(int max_pnode)
{
union uvh_rh_gam_gru_overlay_config_mmr_u gru;
int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
- if (gru.s.enable) {
- map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
- gru_start_paddr = ((u64)gru.s.base << shift);
- gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
- } else {
+ if (!gru.s.enable) {
pr_info("UV: GRU disabled\n");
+ return;
+ }
+
+ if (is_uv3_hub() && gru.s3.mode) {
+ map_gru_distributed(gru.v);
+ return;
}
+ map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
+ gru_start_paddr = ((u64)gru.s.base << shift);
+ gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
}
static __init void map_mmr_high(int max_pnode)
@@ -683,7 +735,7 @@ static void uv_heartbeat(unsigned long ignored)
mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
}
-static void __cpuinit uv_heartbeat_enable(int cpu)
+static void uv_heartbeat_enable(int cpu)
{
while (!uv_cpu_hub_info(cpu)->scir.enabled) {
struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
@@ -700,7 +752,7 @@ static void __cpuinit uv_heartbeat_enable(int cpu)
}
#ifdef CONFIG_HOTPLUG_CPU
-static void __cpuinit uv_heartbeat_disable(int cpu)
+static void uv_heartbeat_disable(int cpu)
{
if (uv_cpu_hub_info(cpu)->scir.enabled) {
uv_cpu_hub_info(cpu)->scir.enabled = 0;
@@ -712,8 +764,8 @@ static void __cpuinit uv_heartbeat_disable(int cpu)
/*
* cpu hotplug notifier
*/
-static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
long cpu = (long)hcpu;
@@ -783,7 +835,7 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode,
* Called on each cpu to initialize the per_cpu UV data area.
* FIXME: hotplug not supported yet
*/
-void __cpuinit uv_cpu_init(void)
+void uv_cpu_init(void)
{
/* CPU 0 initilization will be done via uv_system_init. */
if (!uv_blade_info)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 53a4e27..3ab0343 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -392,7 +392,7 @@ static struct cpuidle_device apm_cpuidle_device;
/*
* Local variables
*/
-static struct {
+__visible struct {
unsigned long offset;
unsigned short segment;
} apm_bios_entry;
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 0ef4bba..d67c4be 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -28,7 +28,6 @@ void foo(void)
OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
- OFFSET(CPUINFO_hard_math, cpuinfo_x86, hard_math);
OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index b0684e4..47b56a7 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o
+ifdef CONFIG_AMD_IOMMU
+obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
+endif
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
endif
+
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 5013a48..903a264 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -66,10 +66,10 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
* performance at the same time..
*/
-extern void vide(void);
-__asm__(".align 4\nvide: ret");
+extern __visible void vide(void);
+__asm__(".globl vide\n\t.align 4\nvide: ret");
-static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
+static void init_amd_k5(struct cpuinfo_x86 *c)
{
/*
* General Systems BIOSen alias the cpu frequency registers
@@ -87,10 +87,10 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
}
-static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
+static void init_amd_k6(struct cpuinfo_x86 *c)
{
u32 l, h;
- int mbytes = num_physpages >> (20-PAGE_SHIFT);
+ int mbytes = get_num_physpages() >> (20-PAGE_SHIFT);
if (c->x86_model < 6) {
/* Based on AMD doc 20734R - June 2000 */
@@ -179,7 +179,7 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
+static void amd_k7_smp_check(struct cpuinfo_x86 *c)
{
/* calling is from identify_secondary_cpu() ? */
if (!c->cpu_index)
@@ -222,7 +222,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE);
}
-static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
+static void init_amd_k7(struct cpuinfo_x86 *c)
{
u32 l, h;
@@ -267,7 +267,7 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
* To workaround broken NUMA config. Read the comment in
* srat_detect_node().
*/
-static int __cpuinit nearby_node(int apicid)
+static int nearby_node(int apicid)
{
int i, node;
@@ -292,7 +292,7 @@ static int __cpuinit nearby_node(int apicid)
* (2) AMD processors supporting compute units
*/
#ifdef CONFIG_X86_HT
-static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
+static void amd_get_topology(struct cpuinfo_x86 *c)
{
u32 nodes, cores_per_cu = 1;
u8 node_id;
@@ -342,7 +342,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
* Assumes number of cores is a power of two.
*/
-static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
+static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_HT
unsigned bits;
@@ -369,7 +369,7 @@ u16 amd_get_nb_id(int cpu)
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
-static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
+static void srat_detect_node(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_NUMA
int cpu = smp_processor_id();
@@ -421,7 +421,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
+static void early_init_amd_mc(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_HT
unsigned bits, ecx;
@@ -447,7 +447,7 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
#endif
}
-static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
+static void bsp_init_amd(struct cpuinfo_x86 *c)
{
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
@@ -475,7 +475,7 @@ static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+static void early_init_amd(struct cpuinfo_x86 *c)
{
early_init_amd_mc(c);
@@ -512,9 +512,9 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
static const int amd_erratum_383[];
static const int amd_erratum_400[];
-static bool cpu_has_amd_erratum(const int *erratum);
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
-static void __cpuinit init_amd(struct cpuinfo_x86 *c)
+static void init_amd(struct cpuinfo_x86 *c)
{
u32 dummy;
unsigned long long value;
@@ -729,19 +729,18 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
value &= ~(1ULL << 24);
wrmsrl_safe(MSR_AMD64_BU_CFG2, value);
- if (cpu_has_amd_erratum(amd_erratum_383))
+ if (cpu_has_amd_erratum(c, amd_erratum_383))
set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
}
- if (cpu_has_amd_erratum(amd_erratum_400))
+ if (cpu_has_amd_erratum(c, amd_erratum_400))
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
}
#ifdef CONFIG_X86_32
-static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
- unsigned int size)
+static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
/* AMD errata T13 (order #21922) */
if ((c->x86 == 6)) {
@@ -757,7 +756,7 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
}
#endif
-static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
+static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
{
tlb_flushall_shift = 5;
@@ -765,7 +764,7 @@ static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
tlb_flushall_shift = 4;
}
-static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
+static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
{
u32 ebx, eax, ecx, edx;
u16 mask = 0xfff;
@@ -820,7 +819,7 @@ static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
cpu_set_tlb_flushall_shift(c);
}
-static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
+static const struct cpu_dev amd_cpu_dev = {
.c_vendor = "AMD",
.c_ident = { "AuthenticAMD" },
#ifdef CONFIG_X86_32
@@ -879,23 +878,13 @@ static const int amd_erratum_400[] =
static const int amd_erratum_383[] =
AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
-static bool cpu_has_amd_erratum(const int *erratum)
+
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
{
- struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info);
int osvw_id = *erratum++;
u32 range;
u32 ms;
- /*
- * If called early enough that current_cpu_data hasn't been initialized
- * yet, fall back to boot_cpu_data.
- */
- if (cpu->x86 == 0)
- cpu = &boot_cpu_data;
-
- if (cpu->x86_vendor != X86_VENDOR_AMD)
- return false;
-
if (osvw_id >= 0 && osvw_id < 65536 &&
cpu_has(cpu, X86_FEATURE_OSVW)) {
u64 osvw_len;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4112be9..0344534 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -17,15 +17,6 @@
#include <asm/paravirt.h>
#include <asm/alternative.h>
-static int __init no_387(char *s)
-{
- boot_cpu_data.hard_math = 0;
- write_cr0(X86_CR0_TS | X86_CR0_EM | X86_CR0_MP | read_cr0());
- return 1;
-}
-
-__setup("no387", no_387);
-
static double __initdata x = 4195835.0;
static double __initdata y = 3145727.0;
@@ -44,15 +35,6 @@ static void __init check_fpu(void)
{
s32 fdiv_bug;
- if (!boot_cpu_data.hard_math) {
-#ifndef CONFIG_MATH_EMULATION
- pr_emerg("No coprocessor found and no math emulation present\n");
- pr_emerg("Giving up\n");
- for (;;) ;
-#endif
- return;
- }
-
kernel_fpu_begin();
/*
@@ -107,5 +89,6 @@ void __init check_bugs(void)
* kernel_fpu_begin/end() in check_fpu() relies on the patched
* alternative instructions.
*/
- check_fpu();
+ if (cpu_has_fpu)
+ check_fpu();
}
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 159103c..fbf6c3b 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -11,7 +11,7 @@
#ifdef CONFIG_X86_OOSTORE
-static u32 __cpuinit power2(u32 x)
+static u32 power2(u32 x)
{
u32 s = 1;
@@ -25,7 +25,7 @@ static u32 __cpuinit power2(u32 x)
/*
* Set up an actual MCR
*/
-static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
+static void centaur_mcr_insert(int reg, u32 base, u32 size, int key)
{
u32 lo, hi;
@@ -42,7 +42,7 @@ static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
*
* Shortcut: We know you can't put 4Gig of RAM on a winchip
*/
-static u32 __cpuinit ramtop(void)
+static u32 ramtop(void)
{
u32 clip = 0xFFFFFFFFUL;
u32 top = 0;
@@ -91,7 +91,7 @@ static u32 __cpuinit ramtop(void)
/*
* Compute a set of MCR's to give maximum coverage
*/
-static int __cpuinit centaur_mcr_compute(int nr, int key)
+static int centaur_mcr_compute(int nr, int key)
{
u32 mem = ramtop();
u32 root = power2(mem);
@@ -157,7 +157,7 @@ static int __cpuinit centaur_mcr_compute(int nr, int key)
return ct;
}
-static void __cpuinit centaur_create_optimal_mcr(void)
+static void centaur_create_optimal_mcr(void)
{
int used;
int i;
@@ -181,7 +181,7 @@ static void __cpuinit centaur_create_optimal_mcr(void)
wrmsr(MSR_IDT_MCR0+i, 0, 0);
}
-static void __cpuinit winchip2_create_optimal_mcr(void)
+static void winchip2_create_optimal_mcr(void)
{
u32 lo, hi;
int used;
@@ -217,7 +217,7 @@ static void __cpuinit winchip2_create_optimal_mcr(void)
/*
* Handle the MCR key on the Winchip 2.
*/
-static void __cpuinit winchip2_unprotect_mcr(void)
+static void winchip2_unprotect_mcr(void)
{
u32 lo, hi;
u32 key;
@@ -229,7 +229,7 @@ static void __cpuinit winchip2_unprotect_mcr(void)
wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
}
-static void __cpuinit winchip2_protect_mcr(void)
+static void winchip2_protect_mcr(void)
{
u32 lo, hi;
@@ -247,7 +247,7 @@ static void __cpuinit winchip2_protect_mcr(void)
#define RNG_ENABLED (1 << 3)
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
-static void __cpuinit init_c3(struct cpuinfo_x86 *c)
+static void init_c3(struct cpuinfo_x86 *c)
{
u32 lo, hi;
@@ -318,7 +318,7 @@ enum {
EAMD3D = 1<<20,
};
-static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+static void early_init_centaur(struct cpuinfo_x86 *c)
{
switch (c->x86) {
#ifdef CONFIG_X86_32
@@ -337,7 +337,7 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
#endif
}
-static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+static void init_centaur(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
char *name;
@@ -468,7 +468,7 @@ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
#endif
}
-static unsigned int __cpuinit
+static unsigned int
centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
#ifdef CONFIG_X86_32
@@ -488,7 +488,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
return size;
}
-static const struct cpu_dev __cpuinitconst centaur_cpu_dev = {
+static const struct cpu_dev centaur_cpu_dev = {
.c_vendor = "Centaur",
.c_ident = { "CentaurHauls" },
.c_early_init = early_init_centaur,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22018f7..2793d1f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -63,7 +63,7 @@ void __init setup_cpu_local_masks(void)
alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
}
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
+static void default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
cpu_detect_cache_sizes(c);
@@ -80,13 +80,13 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
#endif
}
-static const struct cpu_dev __cpuinitconst default_cpu = {
+static const struct cpu_dev default_cpu = {
.c_init = default_init,
.c_vendor = "Unknown",
.c_x86_vendor = X86_VENDOR_UNKNOWN,
};
-static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
+static const struct cpu_dev *this_cpu = &default_cpu;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
@@ -160,8 +160,8 @@ static int __init x86_xsaveopt_setup(char *s)
__setup("noxsaveopt", x86_xsaveopt_setup);
#ifdef CONFIG_X86_32
-static int cachesize_override __cpuinitdata = -1;
-static int disable_x86_serial_nr __cpuinitdata = 1;
+static int cachesize_override = -1;
+static int disable_x86_serial_nr = 1;
static int __init cachesize_setup(char *str)
{
@@ -215,12 +215,12 @@ static inline int flag_is_changeable_p(u32 flag)
}
/* Probe for the CPUID instruction */
-int __cpuinit have_cpuid_p(void)
+int have_cpuid_p(void)
{
return flag_is_changeable_p(X86_EFLAGS_ID);
}
-static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
@@ -298,7 +298,7 @@ struct cpuid_dependent_feature {
u32 level;
};
-static const struct cpuid_dependent_feature __cpuinitconst
+static const struct cpuid_dependent_feature
cpuid_dependent_features[] = {
{ X86_FEATURE_MWAIT, 0x00000005 },
{ X86_FEATURE_DCA, 0x00000009 },
@@ -306,7 +306,7 @@ cpuid_dependent_features[] = {
{ 0, 0 }
};
-static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
+static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
{
const struct cpuid_dependent_feature *df;
@@ -344,7 +344,7 @@ static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
*/
/* Look up CPU names by table lookup. */
-static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
+static const char *table_lookup_model(struct cpuinfo_x86 *c)
{
const struct cpu_model_info *info;
@@ -364,8 +364,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
-__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS];
+__u32 cpu_caps_set[NCAPINTS];
void load_percpu_segment(int cpu)
{
@@ -394,9 +394,9 @@ void switch_to_new_gdt(int cpu)
load_percpu_segment(cpu);
}
-static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
+static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
-static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
+static void get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
char *p, *q;
@@ -425,7 +425,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
}
}
-void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
+void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -479,7 +479,7 @@ u16 __read_mostly tlb_lld_4m[NR_INFO];
*/
s8 __read_mostly tlb_flushall_shift = -1;
-void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
+void cpu_detect_tlb(struct cpuinfo_x86 *c)
{
if (this_cpu->c_detect_tlb)
this_cpu->c_detect_tlb(c);
@@ -493,7 +493,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
tlb_flushall_shift);
}
-void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+void detect_ht(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_HT
u32 eax, ebx, ecx, edx;
@@ -544,7 +544,7 @@ out:
#endif
}
-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
+static void get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
int i;
@@ -571,7 +571,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
this_cpu = &default_cpu;
}
-void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
+void cpu_detect(struct cpuinfo_x86 *c)
{
/* Get vendor name */
cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
@@ -601,7 +601,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
}
}
-void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
+void get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
u32 ebx;
@@ -652,7 +652,7 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
init_scattered_cpuid_features(c);
}
-static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
int i;
@@ -711,10 +711,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
return;
cpu_detect(c);
-
get_cpu_vendor(c);
-
get_cpu_cap(c);
+ fpu_detect(c);
if (this_cpu->c_early_init)
this_cpu->c_early_init(c);
@@ -724,6 +723,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (this_cpu->c_bsp_init)
this_cpu->c_bsp_init(c);
+
+ setup_force_cpu_cap(X86_FEATURE_ALWAYS);
}
void __init early_cpu_init(void)
@@ -768,7 +769,7 @@ void __init early_cpu_init(void)
* unless we can find a reliable way to detect all the broken cases.
* Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
*/
-static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+static void detect_nopl(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
clear_cpu_cap(c, X86_FEATURE_NOPL);
@@ -777,7 +778,7 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
#endif
}
-static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
+static void generic_identify(struct cpuinfo_x86 *c)
{
c->extended_cpuid_level = 0;
@@ -814,7 +815,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
-static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void identify_cpu(struct cpuinfo_x86 *c)
{
int i;
@@ -959,7 +960,7 @@ void __init identify_boot_cpu(void)
cpu_detect_tlb(&boot_cpu_data);
}
-void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+void identify_secondary_cpu(struct cpuinfo_x86 *c)
{
BUG_ON(c == &boot_cpu_data);
identify_cpu(c);
@@ -974,14 +975,14 @@ struct msr_range {
unsigned max;
};
-static const struct msr_range msr_range_array[] __cpuinitconst = {
+static const struct msr_range msr_range_array[] = {
{ 0x00000000, 0x00000418},
{ 0xc0000000, 0xc000040b},
{ 0xc0010000, 0xc0010142},
{ 0xc0011000, 0xc001103b},
};
-static void __cpuinit __print_cpu_msr(void)
+static void __print_cpu_msr(void)
{
unsigned index_min, index_max;
unsigned index;
@@ -1000,7 +1001,7 @@ static void __cpuinit __print_cpu_msr(void)
}
}
-static int show_msr __cpuinitdata;
+static int show_msr;
static __init int setup_show_msr(char *arg)
{
@@ -1021,7 +1022,7 @@ static __init int setup_noclflush(char *arg)
}
__setup("noclflush", setup_noclflush);
-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+void print_cpu_info(struct cpuinfo_x86 *c)
{
const char *vendor = NULL;
@@ -1050,7 +1051,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
print_cpu_msr(c);
}
-void __cpuinit print_cpu_msr(struct cpuinfo_x86 *c)
+void print_cpu_msr(struct cpuinfo_x86 *c)
{
if (c->cpu_index < show_msr)
__print_cpu_msr();
@@ -1071,11 +1072,11 @@ __setup("clearcpuid=", setup_disablecpuid);
#ifdef CONFIG_X86_64
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
-struct desc_ptr nmi_idt_descr = { NR_VECTORS * 16 - 1,
- (unsigned long) nmi_idt_table };
+struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
+ (unsigned long) debug_idt_table };
DEFINE_PER_CPU_FIRST(union irq_stack_union,
- irq_stack_union) __aligned(PAGE_SIZE);
+ irq_stack_union) __aligned(PAGE_SIZE) __visible;
/*
* The following four percpu variables are hot. Align current_task to
@@ -1092,7 +1093,7 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
DEFINE_PER_CPU(char *, irq_stack_ptr) =
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
-DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
@@ -1148,20 +1149,20 @@ int is_debug_stack(unsigned long addr)
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}
-static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
+DEFINE_PER_CPU(u32, debug_idt_ctr);
void debug_stack_set_zero(void)
{
- this_cpu_inc(debug_stack_use_ctr);
- load_idt((const struct desc_ptr *)&nmi_idt_descr);
+ this_cpu_inc(debug_idt_ctr);
+ load_current_idt();
}
void debug_stack_reset(void)
{
- if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
+ if (WARN_ON(!this_cpu_read(debug_idt_ctr)))
return;
- if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
- load_idt((const struct desc_ptr *)&idt_descr);
+ if (this_cpu_dec_return(debug_idt_ctr) == 0)
+ load_current_idt();
}
#else /* CONFIG_X86_64 */
@@ -1215,7 +1216,7 @@ static void dbg_restore_debug_regs(void)
*/
#ifdef CONFIG_X86_64
-void __cpuinit cpu_init(void)
+void cpu_init(void)
{
struct orig_ist *oist;
struct task_struct *me;
@@ -1257,7 +1258,7 @@ void __cpuinit cpu_init(void)
switch_to_new_gdt(cpu);
loadsegment(fs, 0);
- load_idt((const struct desc_ptr *)&idt_descr);
+ load_current_idt();
memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
syscall_init();
@@ -1314,7 +1315,7 @@ void __cpuinit cpu_init(void)
#else
-void __cpuinit cpu_init(void)
+void cpu_init(void)
{
int cpu = smp_processor_id();
struct task_struct *curr = current;
@@ -1334,7 +1335,7 @@ void __cpuinit cpu_init(void)
if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- load_idt(&idt_descr);
+ load_current_idt();
switch_to_new_gdt(cpu);
/*
@@ -1363,3 +1364,17 @@ void __cpuinit cpu_init(void)
fpu_init();
}
#endif
+
+#ifdef CONFIG_X86_DEBUG_STATIC_CPU_HAS
+void warn_pre_alternatives(void)
+{
+ WARN(1, "You're using static_cpu_has before alternatives have run!\n");
+}
+EXPORT_SYMBOL_GPL(warn_pre_alternatives);
+#endif
+
+inline bool __static_cpu_has_safe(u16 bit)
+{
+ return boot_cpu_has(bit);
+}
+EXPORT_SYMBOL_GPL(__static_cpu_has_safe);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index d048d5c..d0969c7 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -15,7 +15,7 @@
/*
* Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
*/
-static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+static void __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
{
unsigned char ccr2, ccr3;
@@ -44,7 +44,7 @@ static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
}
}
-static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+static void do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
{
unsigned long flags;
@@ -59,25 +59,25 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
* Actually since bugs.h doesn't even reference this perhaps someone should
* fix the documentation ???
*/
-static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
+static unsigned char Cx86_dir0_msb = 0;
-static const char __cpuinitconst Cx86_model[][9] = {
+static const char Cx86_model[][9] = {
"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
"M II ", "Unknown"
};
-static const char __cpuinitconst Cx486_name[][5] = {
+static const char Cx486_name[][5] = {
"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
"SRx2", "DRx2"
};
-static const char __cpuinitconst Cx486S_name[][4] = {
+static const char Cx486S_name[][4] = {
"S", "S2", "Se", "S2e"
};
-static const char __cpuinitconst Cx486D_name[][4] = {
+static const char Cx486D_name[][4] = {
"DX", "DX2", "?", "?", "?", "DX4"
};
-static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
-static const char __cpuinitconst cyrix_model_mult1[] = "12??43";
-static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
+static char Cx86_cb[] = "?.5x Core/Bus Clock";
+static const char cyrix_model_mult1[] = "12??43";
+static const char cyrix_model_mult2[] = "12233445";
/*
* Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -87,7 +87,7 @@ static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
* FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
*/
-static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
+static void check_cx686_slop(struct cpuinfo_x86 *c)
{
unsigned long flags;
@@ -112,7 +112,7 @@ static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
}
-static void __cpuinit set_cx86_reorder(void)
+static void set_cx86_reorder(void)
{
u8 ccr3;
@@ -127,7 +127,7 @@ static void __cpuinit set_cx86_reorder(void)
setCx86(CX86_CCR3, ccr3);
}
-static void __cpuinit set_cx86_memwb(void)
+static void set_cx86_memwb(void)
{
printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
@@ -143,7 +143,7 @@ static void __cpuinit set_cx86_memwb(void)
* Configure later MediaGX and/or Geode processor.
*/
-static void __cpuinit geode_configure(void)
+static void geode_configure(void)
{
unsigned long flags;
u8 ccr3;
@@ -166,7 +166,7 @@ static void __cpuinit geode_configure(void)
local_irq_restore(flags);
}
-static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c)
+static void early_init_cyrix(struct cpuinfo_x86 *c)
{
unsigned char dir0, dir0_msn, dir1 = 0;
@@ -185,7 +185,7 @@ static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
+static void init_cyrix(struct cpuinfo_x86 *c)
{
unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
char *buf = c->x86_model_id;
@@ -333,7 +333,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
switch (dir0_lsn) {
case 0xd: /* either a 486SLC or DLC w/o DEVID */
dir0_msn = 0;
- p = Cx486_name[(c->hard_math) ? 1 : 0];
+ p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
break;
case 0xe: /* a 486S A step */
@@ -356,7 +356,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
/*
* Handle National Semiconductor branded processors
*/
-static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
+static void init_nsc(struct cpuinfo_x86 *c)
{
/*
* There may be GX1 processors in the wild that are branded
@@ -405,7 +405,7 @@ static inline int test_cyrix_52div(void)
return (unsigned char) (test >> 8) == 0x02;
}
-static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
+static void cyrix_identify(struct cpuinfo_x86 *c)
{
/* Detect Cyrix with disabled CPUID */
if (c->x86 == 4 && test_cyrix_52div()) {
@@ -441,7 +441,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
}
}
-static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
+static const struct cpu_dev cyrix_cpu_dev = {
.c_vendor = "Cyrix",
.c_ident = { "CyrixInstead" },
.c_early_init = early_init_cyrix,
@@ -452,7 +452,7 @@ static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
cpu_dev_register(cyrix_cpu_dev);
-static const struct cpu_dev __cpuinitconst nsc_cpu_dev = {
+static const struct cpu_dev nsc_cpu_dev = {
.c_vendor = "NSC",
.c_ident = { "Geode by NSC" },
.c_init = init_nsc,
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 1e7e84a..36ce402 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -25,11 +25,6 @@
#include <asm/processor.h>
#include <asm/hypervisor.h>
-/*
- * Hypervisor detect order. This is specified explicitly here because
- * some hypervisors might implement compatibility modes for other
- * hypervisors and therefore need to be detected in specific sequence.
- */
static const __initconst struct hypervisor_x86 * const hypervisors[] =
{
#ifdef CONFIG_XEN_PVHVM
@@ -49,18 +44,22 @@ static inline void __init
detect_hypervisor_vendor(void)
{
const struct hypervisor_x86 *h, * const *p;
+ uint32_t pri, max_pri = 0;
for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
h = *p;
- if (h->detect()) {
+ pri = h->detect();
+ if (pri != 0 && pri > max_pri) {
+ max_pri = pri;
x86_hyper = h;
- printk(KERN_INFO "Hypervisor detected: %s\n", h->name);
- break;
}
}
+
+ if (max_pri)
+ printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name);
}
-void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
+void init_hypervisor(struct cpuinfo_x86 *c)
{
if (x86_hyper && x86_hyper->set_cpu_features)
x86_hyper->set_cpu_features(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 9b0c441..ec72995 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -26,7 +26,7 @@
#include <asm/apic.h>
#endif
-static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -163,7 +163,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
* This is called before we do cpu ident work
*/
-int __cpuinit ppro_with_ram_bug(void)
+int ppro_with_ram_bug(void)
{
/* Uses data from early_cpu_detect now */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -176,7 +176,7 @@ int __cpuinit ppro_with_ram_bug(void)
return 0;
}
-static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
+static void intel_smp_check(struct cpuinfo_x86 *c)
{
/* calling is from identify_secondary_cpu() ? */
if (!c->cpu_index)
@@ -196,7 +196,7 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
+static void intel_workarounds(struct cpuinfo_x86 *c)
{
unsigned long lo, hi;
@@ -275,12 +275,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
intel_smp_check(c);
}
#else
-static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
+static void intel_workarounds(struct cpuinfo_x86 *c)
{
}
#endif
-static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
+static void srat_detect_node(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_NUMA
unsigned node;
@@ -300,7 +300,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
/*
* find out the number of processor cores on the die
*/
-static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
+static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
@@ -315,7 +315,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
return 1;
}
-static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c)
+static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
/* Intel VMX MSR indicated features */
#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
@@ -353,7 +353,7 @@ static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit init_intel(struct cpuinfo_x86 *c)
+static void init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
@@ -472,7 +472,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
}
#ifdef CONFIG_X86_32
-static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
+static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
{
/*
* Intel PIII Tualatin. This comes in two flavours.
@@ -506,7 +506,7 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
#define STLB_4K 0x41
-static const struct _tlb_table intel_tlb_table[] __cpuinitconst = {
+static const struct _tlb_table intel_tlb_table[] = {
{ 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" },
{ 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" },
{ 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" },
@@ -536,7 +536,7 @@ static const struct _tlb_table intel_tlb_table[] __cpuinitconst = {
{ 0x00, 0, 0 }
};
-static void __cpuinit intel_tlb_lookup(const unsigned char desc)
+static void intel_tlb_lookup(const unsigned char desc)
{
unsigned char k;
if (desc == 0)
@@ -605,7 +605,7 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc)
}
}
-static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
+static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
{
switch ((c->x86 << 8) + c->x86_model) {
case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
@@ -634,7 +634,7 @@ static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
+static void intel_detect_tlb(struct cpuinfo_x86 *c)
{
int i, j, n;
unsigned int regs[4];
@@ -661,7 +661,7 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
intel_tlb_flushall_shift_set(c);
}
-static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
+static const struct cpu_dev intel_cpu_dev = {
.c_vendor = "Intel",
.c_ident = { "GenuineIntel" },
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 7c6f7d5..1414c90 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -37,7 +37,7 @@ struct _cache_table {
/* All the cache descriptor types we care about (no TLB or
trace cache entries) */
-static const struct _cache_table __cpuinitconst cache_table[] =
+static const struct _cache_table cache_table[] =
{
{ 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
{ 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
@@ -203,7 +203,7 @@ union l3_cache {
unsigned val;
};
-static const unsigned short __cpuinitconst assocs[] = {
+static const unsigned short assocs[] = {
[1] = 1,
[2] = 2,
[4] = 4,
@@ -217,10 +217,10 @@ static const unsigned short __cpuinitconst assocs[] = {
[0xf] = 0xffff /* fully associative - no way to show this currently */
};
-static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
-static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
+static const unsigned char levels[] = { 1, 1, 2, 3 };
+static const unsigned char types[] = { 1, 2, 3, 3 };
-static void __cpuinit
+static void
amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
union _cpuid4_leaf_ebx *ebx,
union _cpuid4_leaf_ecx *ecx)
@@ -302,7 +302,7 @@ struct _cache_attr {
/*
* L3 cache descriptors
*/
-static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
+static void amd_calc_l3_indices(struct amd_northbridge *nb)
{
struct amd_l3_cache *l3 = &nb->l3_cache;
unsigned int sc0, sc1, sc2, sc3;
@@ -325,7 +325,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb)
l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}
-static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
+static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
{
int node;
@@ -528,8 +528,7 @@ static struct _cache_attr subcaches =
#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
static int
-__cpuinit cpuid4_cache_lookup_regs(int index,
- struct _cpuid4_info_regs *this_leaf)
+cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
{
union _cpuid4_leaf_eax eax;
union _cpuid4_leaf_ebx ebx;
@@ -560,7 +559,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
return 0;
}
-static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c)
+static int find_num_cache_leaves(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx, op;
union _cpuid4_leaf_eax cache_eax;
@@ -580,7 +579,7 @@ static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
-void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c)
+void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
if (cpu_has_topoext) {
@@ -593,7 +592,7 @@ void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
}
-unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
+unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
@@ -618,36 +617,34 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
* parameters cpuid leaf to find the cache details
*/
for (i = 0; i < num_cache_leaves; i++) {
- struct _cpuid4_info_regs this_leaf;
+ struct _cpuid4_info_regs this_leaf = {};
int retval;
retval = cpuid4_cache_lookup_regs(i, &this_leaf);
- if (retval >= 0) {
- switch (this_leaf.eax.split.level) {
- case 1:
- if (this_leaf.eax.split.type ==
- CACHE_TYPE_DATA)
- new_l1d = this_leaf.size/1024;
- else if (this_leaf.eax.split.type ==
- CACHE_TYPE_INST)
- new_l1i = this_leaf.size/1024;
- break;
- case 2:
- new_l2 = this_leaf.size/1024;
- num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
- index_msb = get_count_order(num_threads_sharing);
- l2_id = c->apicid & ~((1 << index_msb) - 1);
- break;
- case 3:
- new_l3 = this_leaf.size/1024;
- num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
- index_msb = get_count_order(
- num_threads_sharing);
- l3_id = c->apicid & ~((1 << index_msb) - 1);
- break;
- default:
- break;
- }
+ if (retval < 0)
+ continue;
+
+ switch (this_leaf.eax.split.level) {
+ case 1:
+ if (this_leaf.eax.split.type == CACHE_TYPE_DATA)
+ new_l1d = this_leaf.size/1024;
+ else if (this_leaf.eax.split.type == CACHE_TYPE_INST)
+ new_l1i = this_leaf.size/1024;
+ break;
+ case 2:
+ new_l2 = this_leaf.size/1024;
+ num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ l2_id = c->apicid & ~((1 << index_msb) - 1);
+ break;
+ case 3:
+ new_l3 = this_leaf.size/1024;
+ num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
+ index_msb = get_count_order(num_threads_sharing);
+ l3_id = c->apicid & ~((1 << index_msb) - 1);
+ break;
+ default:
+ break;
}
}
}
@@ -746,7 +743,7 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
#ifdef CONFIG_SMP
-static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
+static int cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf;
int i, sibling;
@@ -795,7 +792,7 @@ static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
return 1;
}
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf, *sibling_leaf;
unsigned long num_threads_sharing;
@@ -830,7 +827,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
}
}
}
-static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf, *sibling_leaf;
int sibling;
@@ -843,16 +840,16 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
}
}
#else
-static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
+static void cache_shared_cpu_map_setup(unsigned int cpu, int index)
{
}
-static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
+static void cache_remove_shared_cpu_map(unsigned int cpu, int index)
{
}
#endif
-static void __cpuinit free_cache_attributes(unsigned int cpu)
+static void free_cache_attributes(unsigned int cpu)
{
int i;
@@ -863,7 +860,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
per_cpu(ici_cpuid4_info, cpu) = NULL;
}
-static void __cpuinit get_cpu_leaves(void *_retval)
+static void get_cpu_leaves(void *_retval)
{
int j, *retval = _retval, cpu = smp_processor_id();
@@ -883,7 +880,7 @@ static void __cpuinit get_cpu_leaves(void *_retval)
}
}
-static int __cpuinit detect_cache_attributes(unsigned int cpu)
+static int detect_cache_attributes(unsigned int cpu)
{
int retval;
@@ -1017,7 +1014,7 @@ static struct attribute *default_attrs[] = {
};
#ifdef CONFIG_AMD_NB
-static struct attribute ** __cpuinit amd_l3_attrs(void)
+static struct attribute **amd_l3_attrs(void)
{
static struct attribute **attrs;
int n;
@@ -1093,7 +1090,7 @@ static struct kobj_type ktype_percpu_entry = {
.sysfs_ops = &sysfs_ops,
};
-static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
+static void cpuid4_cache_sysfs_exit(unsigned int cpu)
{
kfree(per_cpu(ici_cache_kobject, cpu));
kfree(per_cpu(ici_index_kobject, cpu));
@@ -1102,7 +1099,7 @@ static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
free_cache_attributes(cpu);
}
-static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
+static int cpuid4_cache_sysfs_init(unsigned int cpu)
{
int err;
@@ -1134,7 +1131,7 @@ err_out:
static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
/* Add/Remove cache interface for CPU device */
-static int __cpuinit cache_add_dev(struct device *dev)
+static int cache_add_dev(struct device *dev)
{
unsigned int cpu = dev->id;
unsigned long i, j;
@@ -1185,7 +1182,7 @@ static int __cpuinit cache_add_dev(struct device *dev)
return 0;
}
-static void __cpuinit cache_remove_dev(struct device *dev)
+static void cache_remove_dev(struct device *dev)
{
unsigned int cpu = dev->id;
unsigned long i;
@@ -1202,8 +1199,8 @@ static void __cpuinit cache_remove_dev(struct device *dev)
cpuid4_cache_sysfs_exit(cpu);
}
-static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int cacheinfo_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct device *dev;
@@ -1222,7 +1219,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
+static struct notifier_block cacheinfo_cpu_notifier = {
.notifier_call = cacheinfo_cpu_callback,
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index ddc72f8..5ac2d1f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -153,7 +153,7 @@ static void raise_mce(struct mce *m)
return;
#ifdef CONFIG_X86_LOCAL_APIC
- if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
+ if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
unsigned long start;
int cpu;
@@ -167,7 +167,7 @@ static void raise_mce(struct mce *m)
cpumask_clear_cpu(cpu, mce_inject_cpumask);
}
if (!cpumask_empty(mce_inject_cpumask)) {
- if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
+ if (m->inject_flags & MCJ_IRQ_BROADCAST) {
/*
* don't wait because mce_irq_ipi is necessary
* to be sync with following raise_local
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 5b7d4fa..09edd0b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void);
extern struct mce_bank *mce_banks;
+extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
+void cmci_disable_bank(int bank);
#else
# define mce_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+static inline void cmci_disable_bank(int bank) { }
#endif
void mce_timer_kick(unsigned long interval);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index beb1f16..c370e1c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -110,22 +110,17 @@ static struct severity {
/* known AR MCACODs: */
#ifdef CONFIG_MEMORY_FAILURE
MCESEV(
- KEEP, "HT thread notices Action required: data load error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
- MCGMASK(MCG_STATUS_EIPV, 0)
+ KEEP, "Action required but unaffected thread is continuable",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
+ MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
),
MCESEV(
- AR, "Action required: data load error",
+ AR, "Action required: data load error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
USER
),
MCESEV(
- KEEP, "HT thread notices Action required: instruction fetch error",
- SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
- MCGMASK(MCG_STATUS_EIPV, 0)
- ),
- MCESEV(
- AR, "Action required: instruction fetch error",
+ AR, "Action required: instruction fetch error in a user process",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
USER
),
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9239504..b3218cd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -89,11 +89,23 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
-/* MCA banks polled by the period polling timer for corrected events */
+/*
+ * MCA banks polled by the period polling timer for corrected events.
+ * With Intel CMCI, this only has MCA banks which do not support CMCI (if any).
+ */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
};
+/*
+ * MCA banks controlled through firmware first for corrected errors.
+ * This is a global list of banks for which we won't enable CMCI and we
+ * won't poll. Firmware controls these banks and is responsible for
+ * reporting corrected errors through GHES. Uncorrected/recoverable
+ * errors are still notified through a machine check.
+ */
+mce_banks_t mce_banks_ce_disabled;
+
static DEFINE_PER_CPU(struct work_struct, mce_work);
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
@@ -1360,7 +1372,7 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
-static int __cpuinit __mcheck_cpu_mce_banks_init(void)
+static int __mcheck_cpu_mce_banks_init(void)
{
int i;
u8 num_banks = mca_cfg.banks;
@@ -1381,7 +1393,7 @@ static int __cpuinit __mcheck_cpu_mce_banks_init(void)
/*
* Initialize Machine Checks for a CPU.
*/
-static int __cpuinit __mcheck_cpu_cap_init(void)
+static int __mcheck_cpu_cap_init(void)
{
unsigned b;
u64 cap;
@@ -1480,7 +1492,7 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
}
/* Add per CPU specific workarounds here */
-static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
+static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
struct mca_config *cfg = &mca_cfg;
@@ -1590,7 +1602,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
return 0;
}
-static int __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
+static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
return 0;
@@ -1661,7 +1673,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
*/
-void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
+void mcheck_cpu_init(struct cpuinfo_x86 *c)
{
if (mca_cfg.disabled)
return;
@@ -1932,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = {
&mce_chrdev_ops,
};
+static void __mce_disable_bank(void *arg)
+{
+ int bank = *((int *)arg);
+ __clear_bit(bank, __get_cpu_var(mce_poll_banks));
+ cmci_disable_bank(bank);
+}
+
+void mce_disable_bank(int bank)
+{
+ if (bank >= mca_cfg.banks) {
+ pr_warn(FW_BUG
+ "Ignoring request to disable invalid MCA bank %d.\n",
+ bank);
+ return;
+ }
+ set_bit(bank, mce_banks_ce_disabled);
+ on_each_cpu(__mce_disable_bank, &bank, 1);
+}
+
/*
* mce=off Disables machine check
* mce=no_cmci Disables CMCI
@@ -2079,7 +2110,6 @@ static struct bus_type mce_subsys = {
DEFINE_PER_CPU(struct device *, mce_device);
-__cpuinitdata
void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
static inline struct mce_bank *attr_to_bank(struct device_attribute *attr)
@@ -2225,7 +2255,7 @@ static void mce_device_release(struct device *dev)
}
/* Per cpu device init. All of the cpus still share the same ctrl bank: */
-static __cpuinit int mce_device_create(unsigned int cpu)
+static int mce_device_create(unsigned int cpu)
{
struct device *dev;
int err;
@@ -2271,7 +2301,7 @@ error:
return err;
}
-static __cpuinit void mce_device_remove(unsigned int cpu)
+static void mce_device_remove(unsigned int cpu)
{
struct device *dev = per_cpu(mce_device, cpu);
int i;
@@ -2291,7 +2321,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
}
/* Make sure there are no machine checks on offlined CPUs. */
-static void __cpuinit mce_disable_cpu(void *h)
+static void mce_disable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
@@ -2309,7 +2339,7 @@ static void __cpuinit mce_disable_cpu(void *h)
}
}
-static void __cpuinit mce_reenable_cpu(void *h)
+static void mce_reenable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
@@ -2328,7 +2358,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static int __cpuinit
+static int
mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
@@ -2364,7 +2394,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
-static struct notifier_block mce_cpu_notifier __cpuinitdata = {
+static struct notifier_block mce_cpu_notifier = {
.notifier_call = mce_cpu_callback,
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9cb5276..603df4f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -458,10 +458,8 @@ static struct kobj_type threshold_ktype = {
.default_attrs = default_attrs,
};
-static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
- unsigned int bank,
- unsigned int block,
- u32 address)
+static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
+ unsigned int block, u32 address)
{
struct threshold_block *b = NULL;
u32 low, high;
@@ -543,7 +541,7 @@ out_free:
return err;
}
-static __cpuinit int __threshold_add_blocks(struct threshold_bank *b)
+static int __threshold_add_blocks(struct threshold_bank *b)
{
struct list_head *head = &b->blocks->miscj;
struct threshold_block *pos = NULL;
@@ -567,7 +565,7 @@ static __cpuinit int __threshold_add_blocks(struct threshold_bank *b)
return err;
}
-static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
+static int threshold_create_bank(unsigned int cpu, unsigned int bank)
{
struct device *dev = per_cpu(mce_device, cpu);
struct amd_northbridge *nb = NULL;
@@ -632,7 +630,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
}
/* create dir/files for all valid threshold banks */
-static __cpuinit int threshold_create_device(unsigned int cpu)
+static int threshold_create_device(unsigned int cpu)
{
unsigned int bank;
struct threshold_bank **bp;
@@ -736,7 +734,7 @@ static void threshold_remove_device(unsigned int cpu)
}
/* get notified when a cpu comes on/off */
-static void __cpuinit
+static void
amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
{
switch (action) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index ae1697c..4cfe045 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -24,6 +24,18 @@
* Also supports reliable discovery of shared banks.
*/
+/*
+ * CMCI can be delivered to multiple cpus that share a machine check bank
+ * so we need to designate a single cpu to process errors logged in each bank
+ * in the interrupt handler (otherwise we would have many races and potential
+ * double reporting of the same error).
+ * Note that this can change when a cpu is offlined or brought online since
+ * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
+ * disables CMCI on all banks owned by the cpu and clears this bitfield. At
+ * this point, cmci_rediscover() kicks in and a different cpu may end up
+ * taking ownership of some of the shared MCA banks that were previously
+ * owned by the offlined cpu.
+ */
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
@@ -191,6 +203,10 @@ static void cmci_discover(int banks)
if (test_bit(i, owned))
continue;
+ /* Skip banks in firmware first mode */
+ if (test_bit(i, mce_banks_ce_disabled))
+ continue;
+
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Already owned by someone else? */
@@ -259,6 +275,19 @@ void cmci_recheck(void)
local_irq_restore(flags);
}
+/* Caller must hold the lock on cmci_discover_lock */
+static void __cmci_disable_bank(int bank)
+{
+ u64 val;
+
+ if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
+ return;
+ rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ val &= ~MCI_CTL2_CMCI_EN;
+ wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+ __clear_bit(bank, __get_cpu_var(mce_banks_owned));
+}
+
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
@@ -268,20 +297,12 @@ void cmci_clear(void)
unsigned long flags;
int i;
int banks;
- u64 val;
if (!cmci_supported(&banks))
return;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
- for (i = 0; i < banks; i++) {
- if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
- continue;
- /* Disable CMCI */
- rdmsrl(MSR_IA32_MCx_CTL2(i), val);
- val &= ~MCI_CTL2_CMCI_EN;
- wrmsrl(MSR_IA32_MCx_CTL2(i), val);
- __clear_bit(i, __get_cpu_var(mce_banks_owned));
- }
+ for (i = 0; i < banks; i++)
+ __cmci_disable_bank(i);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
@@ -315,6 +336,19 @@ void cmci_reenable(void)
cmci_discover(banks);
}
+void cmci_disable_bank(int bank)
+{
+ int banks;
+ unsigned long flags;
+
+ if (!cmci_supported(&banks))
+ return;
+
+ raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ __cmci_disable_bank(bank);
+ raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+}
+
static void intel_init_cmci(void)
{
int banks;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 47a1870..3eec7de 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -29,6 +29,7 @@
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
+#include <asm/trace/irq_vectors.h>
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
@@ -54,12 +55,24 @@ struct thermal_state {
struct _thermal_state package_power_limit;
struct _thermal_state core_thresh0;
struct _thermal_state core_thresh1;
+ struct _thermal_state pkg_thresh0;
+ struct _thermal_state pkg_thresh1;
};
/* Callback to handle core threshold interrupts */
int (*platform_thermal_notify)(__u64 msr_val);
EXPORT_SYMBOL(platform_thermal_notify);
+/* Callback to handle core package threshold_interrupts */
+int (*platform_thermal_package_notify)(__u64 msr_val);
+EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
+
+/* Callback support of rate control, return true, if
+ * callback has rate control */
+bool (*platform_thermal_package_rate_control)(void);
+EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
+
+
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -181,11 +194,6 @@ static int therm_throt_process(bool new_event, int event, int level)
this_cpu,
level == CORE_LEVEL ? "Core" : "Package",
state->count);
- else
- printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
- this_cpu,
- level == CORE_LEVEL ? "Core" : "Package",
- state->count);
return 1;
}
if (old_event) {
@@ -193,36 +201,46 @@ static int therm_throt_process(bool new_event, int event, int level)
printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
this_cpu,
level == CORE_LEVEL ? "Core" : "Package");
- else
- printk(KERN_INFO "CPU%d: %s power limit normal\n",
- this_cpu,
- level == CORE_LEVEL ? "Core" : "Package");
return 1;
}
return 0;
}
-static int thresh_event_valid(int event)
+static int thresh_event_valid(int level, int event)
{
struct _thermal_state *state;
unsigned int this_cpu = smp_processor_id();
struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
u64 now = get_jiffies_64();
- state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
+ if (level == PACKAGE_LEVEL)
+ state = (event == 0) ? &pstate->pkg_thresh0 :
+ &pstate->pkg_thresh1;
+ else
+ state = (event == 0) ? &pstate->core_thresh0 :
+ &pstate->core_thresh1;
if (time_before64(now, state->next_check))
return 0;
state->next_check = now + CHECK_INTERVAL;
+
+ return 1;
+}
+
+static bool int_pln_enable;
+static int __init int_pln_enable_setup(char *s)
+{
+ int_pln_enable = true;
+
return 1;
}
+__setup("int_pln_enable", int_pln_enable_setup);
#ifdef CONFIG_SYSFS
/* Add/Remove thermal_throttle interface for CPU device: */
-static __cpuinit int thermal_throttle_add_dev(struct device *dev,
- unsigned int cpu)
+static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
{
int err;
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -231,7 +249,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
if (err)
return err;
- if (cpu_has(c, X86_FEATURE_PLN))
+ if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
err = sysfs_add_file_to_group(&dev->kobj,
&dev_attr_core_power_limit_count.attr,
thermal_attr_group.name);
@@ -239,7 +257,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
err = sysfs_add_file_to_group(&dev->kobj,
&dev_attr_package_throttle_count.attr,
thermal_attr_group.name);
- if (cpu_has(c, X86_FEATURE_PLN))
+ if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
err = sysfs_add_file_to_group(&dev->kobj,
&dev_attr_package_power_limit_count.attr,
thermal_attr_group.name);
@@ -248,7 +266,7 @@ static __cpuinit int thermal_throttle_add_dev(struct device *dev,
return err;
}
-static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
+static void thermal_throttle_remove_dev(struct device *dev)
{
sysfs_remove_group(&dev->kobj, &thermal_attr_group);
}
@@ -257,7 +275,7 @@ static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
static DEFINE_MUTEX(therm_cpu_lock);
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
-static __cpuinit int
+static int
thermal_throttle_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
@@ -288,7 +306,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
return notifier_from_errno(err);
}
-static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
+static struct notifier_block thermal_throttle_cpu_notifier =
{
.notifier_call = thermal_throttle_cpu_callback,
};
@@ -321,6 +339,39 @@ device_initcall(thermal_throttle_init_device);
#endif /* CONFIG_SYSFS */
+static void notify_package_thresholds(__u64 msr_val)
+{
+ bool notify_thres_0 = false;
+ bool notify_thres_1 = false;
+
+ if (!platform_thermal_package_notify)
+ return;
+
+ /* lower threshold check */
+ if (msr_val & THERM_LOG_THRESHOLD0)
+ notify_thres_0 = true;
+ /* higher threshold check */
+ if (msr_val & THERM_LOG_THRESHOLD1)
+ notify_thres_1 = true;
+
+ if (!notify_thres_0 && !notify_thres_1)
+ return;
+
+ if (platform_thermal_package_rate_control &&
+ platform_thermal_package_rate_control()) {
+ /* Rate control is implemented in callback */
+ platform_thermal_package_notify(msr_val);
+ return;
+ }
+
+ /* lower threshold reached */
+ if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
+ platform_thermal_package_notify(msr_val);
+ /* higher threshold reached */
+ if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
+ platform_thermal_package_notify(msr_val);
+}
+
static void notify_thresholds(__u64 msr_val)
{
/* check whether the interrupt handler is defined;
@@ -330,10 +381,12 @@ static void notify_thresholds(__u64 msr_val)
return;
/* lower threshold reached */
- if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
+ if ((msr_val & THERM_LOG_THRESHOLD0) &&
+ thresh_event_valid(CORE_LEVEL, 0))
platform_thermal_notify(msr_val);
/* higher threshold reached */
- if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
+ if ((msr_val & THERM_LOG_THRESHOLD1) &&
+ thresh_event_valid(CORE_LEVEL, 1))
platform_thermal_notify(msr_val);
}
@@ -352,17 +405,19 @@ static void intel_thermal_interrupt(void)
CORE_LEVEL) != 0)
mce_log_therm_throt_event(msr_val);
- if (this_cpu_has(X86_FEATURE_PLN))
+ if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
CORE_LEVEL);
if (this_cpu_has(X86_FEATURE_PTS)) {
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
+ /* check violations of package thermal thresholds */
+ notify_package_thresholds(msr_val);
therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
THERMAL_THROTTLING_EVENT,
PACKAGE_LEVEL);
- if (this_cpu_has(X86_FEATURE_PLN))
+ if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
therm_throt_process(msr_val &
PACKAGE_THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
@@ -378,15 +433,26 @@ static void unexpected_thermal_interrupt(void)
static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
-asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+static inline void __smp_thermal_interrupt(void)
{
- irq_enter();
- exit_idle();
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
- irq_exit();
- /* Ack only at the end to avoid potential reentry */
- ack_APIC_irq();
+}
+
+asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ __smp_thermal_interrupt();
+ exiting_ack_irq();
+}
+
+asmlinkage void smp_trace_thermal_interrupt(struct pt_regs *regs)
+{
+ entering_irq();
+ trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
+ __smp_thermal_interrupt();
+ trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
+ exiting_ack_irq();
}
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
@@ -470,9 +536,13 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
apic_write(APIC_LVTTHMR, h);
rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
- if (cpu_has(c, X86_FEATURE_PLN))
+ if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
+ wrmsr(MSR_IA32_THERM_INTERRUPT,
+ (l | (THERM_INT_LOW_ENABLE
+ | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
+ else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
wrmsr(MSR_IA32_THERM_INTERRUPT,
- l | (THERM_INT_LOW_ENABLE
+ l | (THERM_INT_LOW_ENABLE
| THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
else
wrmsr(MSR_IA32_THERM_INTERRUPT,
@@ -480,9 +550,14 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_PTS)) {
rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
- if (cpu_has(c, X86_FEATURE_PLN))
+ if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
- l | (PACKAGE_THERM_INT_LOW_ENABLE
+ (l | (PACKAGE_THERM_INT_LOW_ENABLE
+ | PACKAGE_THERM_INT_HIGH_ENABLE))
+ & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
+ else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ l | (PACKAGE_THERM_INT_LOW_ENABLE
| PACKAGE_THERM_INT_HIGH_ENABLE
| PACKAGE_THERM_INT_PLN_ENABLE), h);
else
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index aa578ca..fe6b1c8 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -8,6 +8,7 @@
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
+#include <asm/trace/irq_vectors.h>
static void default_threshold_interrupt(void)
{
@@ -17,13 +18,24 @@ static void default_threshold_interrupt(void)
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-asmlinkage void smp_threshold_interrupt(void)
+static inline void __smp_threshold_interrupt(void)
{
- irq_enter();
- exit_idle();
inc_irq_stat(irq_threshold_count);
mce_threshold_vector();
- irq_exit();
- /* Ack only at the end to avoid potential reentry */
- ack_APIC_irq();
+}
+
+asmlinkage void smp_threshold_interrupt(void)
+{
+ entering_irq();
+ __smp_threshold_interrupt();
+ exiting_ack_irq();
+}
+
+asmlinkage void smp_trace_threshold_interrupt(void)
+{
+ entering_irq();
+ trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
+ __smp_threshold_interrupt();
+ trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
+ exiting_ack_irq();
}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 8f4be53..71a39f3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -27,20 +27,23 @@
struct ms_hyperv_info ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
-static bool __init ms_hyperv_platform(void)
+static uint32_t __init ms_hyperv_platform(void)
{
u32 eax;
u32 hyp_signature[3];
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return false;
+ return 0;
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
- return eax >= HYPERV_CPUID_MIN &&
- eax <= HYPERV_CPUID_MAX &&
- !memcmp("Microsoft Hv", hyp_signature, 12);
+ if (eax >= HYPERV_CPUID_MIN &&
+ eax <= HYPERV_CPUID_MAX &&
+ !memcmp("Microsoft Hv", hyp_signature, 12))
+ return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+
+ return 0;
}
static cycle_t read_hv_clock(struct clocksource *arg)
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 35ffda5..5f90b85 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -714,15 +714,15 @@ int __init mtrr_cleanup(unsigned address_bits)
if (mtrr_tom2)
x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
- nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
/*
* [0, 1M) should always be covered by var mtrr with WB
* and fixed mtrrs should take effect before var mtrr for it:
*/
- nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
+ nr_range = add_range_with_merge(range, RANGE_NUM, 0, 0,
1ULL<<(20 - PAGE_SHIFT));
- /* Sort the ranges: */
- sort_range(range, nr_range);
+ /* add from var mtrr at last */
+ nr_range = x86_get_mtrr_mem_range(range, nr_range,
+ x_remove_base, x_remove_size);
range_sums = sum_ranges(range, nr_range);
printk(KERN_INFO "total RAM covered: %ldM\n",
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index 68a3343..9e451b0 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -167,7 +167,7 @@ static void post_set(void)
setCx86(CX86_CCR3, ccr3);
/* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
+ write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index fa72a39..ce2d0a2 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -510,8 +510,9 @@ generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
static void generic_get_mtrr(unsigned int reg, unsigned long *base,
unsigned long *size, mtrr_type *type)
{
- unsigned int mask_lo, mask_hi, base_lo, base_hi;
- unsigned int tmp, hi;
+ u32 mask_lo, mask_hi, base_lo, base_hi;
+ unsigned int hi;
+ u64 tmp, mask;
/*
* get_mtrr doesn't need to update mtrr_state, also it could be called
@@ -532,18 +533,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
/* Work out the shifted address mask: */
- tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
- mask_lo = size_or_mask | tmp;
+ tmp = (u64)mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+ mask = size_or_mask | tmp;
/* Expand tmp with high bits to all 1s: */
- hi = fls(tmp);
+ hi = fls64(tmp);
if (hi > 0) {
- tmp |= ~((1<<(hi - 1)) - 1);
+ tmp |= ~((1ULL<<(hi - 1)) - 1);
- if (tmp != mask_lo) {
+ if (tmp != mask) {
printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n");
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
- mask_lo = tmp;
+ mask = tmp;
}
}
@@ -551,8 +552,8 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
* This works correctly if size is a power of two, i.e. a
* contiguous range:
*/
- *size = -mask_lo;
- *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+ *size = -mask;
+ *base = (u64)base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
*type = base_lo & 0xff;
out_put_cpu:
@@ -682,6 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
}
/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
+ count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
/* Save MTRR state */
@@ -695,13 +697,14 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
static void post_set(void) __releases(set_atomicity_lock)
{
/* Flush TLBs (no need to flush caches - they are disabled) */
+ count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
__flush_tlb();
/* Intel (P6) standard MTRRs */
mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
/* Enable caches */
- write_cr0(read_cr0() & 0xbfffffff);
+ write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
if (cpu_has_pge)
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 726bf96..f961de9 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -51,9 +51,13 @@
#include <asm/e820.h>
#include <asm/mtrr.h>
#include <asm/msr.h>
+#include <asm/pat.h>
#include "mtrr.h"
+/* arch_phys_wc_add returns an MTRR register index plus this offset. */
+#define MTRR_TO_PHYS_WC_OFFSET 1000
+
u32 num_var_ranges;
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
@@ -305,7 +309,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return -EINVAL;
}
- if (base & size_or_mask || size & size_or_mask) {
+ if ((base | (base + size - 1)) >>
+ (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
pr_warning("mtrr: base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -524,6 +529,73 @@ int mtrr_del(int reg, unsigned long base, unsigned long size)
}
EXPORT_SYMBOL(mtrr_del);
+/**
+ * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
+ * @base: Physical base address
+ * @size: Size of region
+ *
+ * If PAT is available, this does nothing. If PAT is unavailable, it
+ * attempts to add a WC MTRR covering size bytes starting at base and
+ * logs an error if this fails.
+ *
+ * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
+ * but drivers should not try to interpret that return value.
+ */
+int arch_phys_wc_add(unsigned long base, unsigned long size)
+{
+ int ret;
+
+ if (pat_enabled)
+ return 0; /* Success! (We don't need to do anything.) */
+
+ ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
+ if (ret < 0) {
+ pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
+ (void *)base, (void *)(base + size - 1));
+ return ret;
+ }
+ return ret + MTRR_TO_PHYS_WC_OFFSET;
+}
+EXPORT_SYMBOL(arch_phys_wc_add);
+
+/*
+ * arch_phys_wc_del - undoes arch_phys_wc_add
+ * @handle: Return value from arch_phys_wc_add
+ *
+ * This cleans up after mtrr_add_wc_if_needed.
+ *
+ * The API guarantees that mtrr_del_wc_if_needed(error code) and
+ * mtrr_del_wc_if_needed(0) do nothing.
+ */
+void arch_phys_wc_del(int handle)
+{
+ if (handle >= 1) {
+ WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
+ mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
+ }
+}
+EXPORT_SYMBOL(arch_phys_wc_del);
+
+/*
+ * phys_wc_to_mtrr_index - translates arch_phys_wc_add's return value
+ * @handle: Return value from arch_phys_wc_add
+ *
+ * This will turn the return value from arch_phys_wc_add into an mtrr
+ * index suitable for debugging.
+ *
+ * Note: There is no legitimate use for this function, except possibly
+ * in printk line. Alas there is an illegitimate use in some ancient
+ * drm ioctls.
+ */
+int phys_wc_to_mtrr_index(int handle)
+{
+ if (handle < MTRR_TO_PHYS_WC_OFFSET)
+ return -1;
+ else
+ return handle - MTRR_TO_PHYS_WC_OFFSET;
+}
+EXPORT_SYMBOL_GPL(phys_wc_to_mtrr_index);
+
/*
* HACK ALERT!
* These should be called implicitly, but we can't yet until all the initcall
@@ -583,6 +655,7 @@ static struct syscore_ops mtrr_syscore_ops = {
int __initdata changed_by_mtrr_cleanup;
+#define SIZE_OR_MASK_BITS(n) (~((1ULL << ((n) - PAGE_SHIFT)) - 1))
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -600,7 +673,7 @@ void __init mtrr_bp_init(void)
if (cpu_has_mtrr) {
mtrr_if = &generic_mtrr_ops;
- size_or_mask = 0xff000000; /* 36 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(36);
size_and_mask = 0x00f00000;
phys_addr = 36;
@@ -619,7 +692,7 @@ void __init mtrr_bp_init(void)
boot_cpu_data.x86_mask == 0x4))
phys_addr = 36;
- size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1);
+ size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
boot_cpu_data.x86 == 6) {
@@ -627,7 +700,7 @@ void __init mtrr_bp_init(void)
* VIA C* family have Intel style MTRRs,
* but don't support PAE
*/
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
phys_addr = 32;
}
@@ -637,21 +710,21 @@ void __init mtrr_bp_init(void)
if (cpu_has_k6_mtrr) {
/* Pre-Athlon (K6) AMD CPU MTRRs */
mtrr_if = mtrr_ops[X86_VENDOR_AMD];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CENTAUR:
if (cpu_has_centaur_mcr) {
mtrr_if = mtrr_ops[X86_VENDOR_CENTAUR];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
case X86_VENDOR_CYRIX:
if (cpu_has_cyrix_arr) {
mtrr_if = mtrr_ops[X86_VENDOR_CYRIX];
- size_or_mask = 0xfff00000; /* 32 bits */
+ size_or_mask = SIZE_OR_MASK_BITS(32);
size_and_mask = 0;
}
break;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1025f3c..897783b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -403,7 +403,8 @@ int x86_pmu_hw_config(struct perf_event *event)
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
- if (event->attr.precise_ip > 1) {
+ if (event->attr.precise_ip > 1 &&
+ x86_pmu.intel_cap.pebs_format < 2) {
u64 *br_type = &event->attr.branch_sample_type;
if (has_branch_stack(event)) {
@@ -568,7 +569,7 @@ struct sched_state {
struct perf_sched {
int max_weight;
int max_events;
- struct event_constraint **constraints;
+ struct perf_event **events;
struct sched_state state;
int saved_states;
struct sched_state saved[SCHED_STATES_MAX];
@@ -577,7 +578,7 @@ struct perf_sched {
/*
* Initialize interator that runs through all events and counters.
*/
-static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+static void perf_sched_init(struct perf_sched *sched, struct perf_event **events,
int num, int wmin, int wmax)
{
int idx;
@@ -585,10 +586,10 @@ static void perf_sched_init(struct perf_sched *sched, struct event_constraint **
memset(sched, 0, sizeof(*sched));
sched->max_events = num;
sched->max_weight = wmax;
- sched->constraints = c;
+ sched->events = events;
for (idx = 0; idx < num; idx++) {
- if (c[idx]->weight == wmin)
+ if (events[idx]->hw.constraint->weight == wmin)
break;
}
@@ -635,8 +636,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)
if (sched->state.event >= sched->max_events)
return false;
- c = sched->constraints[sched->state.event];
-
+ c = sched->events[sched->state.event]->hw.constraint;
/* Prefer fixed purpose counters */
if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {
idx = INTEL_PMC_IDX_FIXED;
@@ -694,7 +694,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)
if (sched->state.weight > sched->max_weight)
return false;
}
- c = sched->constraints[sched->state.event];
+ c = sched->events[sched->state.event]->hw.constraint;
} while (c->weight != sched->state.weight);
sched->state.counter = 0; /* start with first counter */
@@ -705,12 +705,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)
/*
* Assign a counter for each event.
*/
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
int wmin, int wmax, int *assign)
{
struct perf_sched sched;
- perf_sched_init(&sched, constraints, n, wmin, wmax);
+ perf_sched_init(&sched, events, n, wmin, wmax);
do {
if (!perf_sched_find_counter(&sched))
@@ -724,16 +724,19 @@ int perf_assign_events(struct event_constraint **constraints, int n,
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
{
- struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
+ struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+ struct perf_event *e;
int i, wmin, wmax, num = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ hwc = &cpuc->event_list[i]->hw;
c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
- constraints[i] = c;
+ hwc->constraint = c;
+
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -743,7 +746,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
*/
for (i = 0; i < n; i++) {
hwc = &cpuc->event_list[i]->hw;
- c = constraints[i];
+ c = hwc->constraint;
/* never assigned */
if (hwc->idx == -1)
@@ -764,16 +767,35 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
/* slow path */
if (i != n)
- num = perf_assign_events(constraints, n, wmin, wmax, assign);
+ num = perf_assign_events(cpuc->event_list, n, wmin,
+ wmax, assign);
/*
+ * Mark the event as committed, so we do not put_constraint()
+ * in case new events are added and fail scheduling.
+ */
+ if (!num && assign) {
+ for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ e->hw.flags |= PERF_X86_EVENT_COMMITTED;
+ }
+ }
+ /*
* scheduling failed or is just a simulation,
* free resources if necessary
*/
if (!assign || num) {
for (i = 0; i < n; i++) {
+ e = cpuc->event_list[i];
+ /*
+ * do not put_constraint() on comitted events,
+ * because they are good to go
+ */
+ if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
+ continue;
+
if (x86_pmu.put_event_constraints)
- x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]);
+ x86_pmu.put_event_constraints(cpuc, e);
}
}
return num ? -EINVAL : 0;
@@ -1153,6 +1175,11 @@ static void x86_pmu_del(struct perf_event *event, int flags)
int i;
/*
+ * event is descheduled
+ */
+ event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
+
+ /*
* If we're called during a txn, we don't need to do anything.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
@@ -1249,16 +1276,26 @@ void perf_events_lapic_init(void)
static int __kprobes
perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs)
{
+ int ret;
+ u64 start_clock;
+ u64 finish_clock;
+
if (!atomic_read(&active_events))
return NMI_DONE;
- return x86_pmu.handle_irq(regs);
+ start_clock = local_clock();
+ ret = x86_pmu.handle_irq(regs);
+ finish_clock = local_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+
+ return ret;
}
struct event_constraint emptyconstraint;
struct event_constraint unconstrained;
-static int __cpuinit
+static int
x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
@@ -1469,7 +1506,7 @@ static int __init init_hw_perf_events(void)
err = amd_pmu_init();
break;
default:
- return 0;
+ err = -ENOTSUPP;
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
@@ -1846,8 +1883,9 @@ static struct pmu pmu = {
void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
- userpg->cap_usr_time = 0;
- userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc;
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
userpg->pmc_width = x86_pmu.cntval_bits;
if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
@@ -1856,10 +1894,15 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
return;
- userpg->cap_usr_time = 1;
+ userpg->cap_user_time = 1;
userpg->time_mult = this_cpu_read(cyc2ns);
userpg->time_shift = CYC2NS_SCALE_FACTOR;
userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+
+ if (sched_clock_stable && !check_tsc_disabled()) {
+ userpg->cap_user_time_zero = 1;
+ userpg->time_zero = this_cpu_read(cyc2ns_offset);
+ }
}
/*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ba9aadf..cc16faa 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -63,10 +63,12 @@ struct event_constraint {
int flags;
};
/*
- * struct event_constraint flags
+ * struct hw_perf_event.flags flags
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */
+#define PERF_X86_EVENT_PEBS_ST_HSW 0x4 /* haswell style st data sampling */
+#define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -227,11 +229,14 @@ struct cpu_hw_events {
* - inv
* - edge
* - cnt-mask
+ * - in_tx
+ * - in_tx_checkpointed
* The other filters are supported by fixed counters.
* The any-thread option is supported starting with v3.
*/
+#define FIXED_EVENT_FLAGS (X86_RAW_EVENT_MASK|HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)
#define FIXED_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
+ EVENT_CONSTRAINT(c, (1ULL << (32+n)), FIXED_EVENT_FLAGS)
/*
* Constraint on the Event code + UMask
@@ -247,6 +252,11 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+/* DataLA version of store sampling without extra enable bit. */
+#define INTEL_PST_HSW_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
+
#define EVENT_CONSTRAINT_END \
EVENT_CONSTRAINT(0, 0, 0)
@@ -301,6 +311,11 @@ union perf_capabilities {
u64 pebs_arch_reg:1;
u64 pebs_format:4;
u64 smm_freeze:1;
+ /*
+ * PMU supports separate counter range for writing
+ * values > 32bit.
+ */
+ u64 full_width_write:1;
};
u64 capabilities;
};
@@ -375,6 +390,7 @@ struct x86_pmu {
struct event_constraint *event_constraints;
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
+ bool late_ack;
/*
* sysfs attrs
@@ -528,7 +544,7 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
void x86_pmu_enable_all(int added);
-int perf_assign_events(struct event_constraint **constraints, int n,
+int perf_assign_events(struct perf_event **events, int n,
int wmin, int wmax, int *assign);
int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);
@@ -625,6 +641,8 @@ extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
+extern struct event_constraint intel_slm_pebs_event_constraints[];
+
extern struct event_constraint intel_nehalem_pebs_event_constraints[];
extern struct event_constraint intel_westmere_pebs_event_constraints[];
@@ -633,6 +651,8 @@ extern struct event_constraint intel_snb_pebs_event_constraints[];
extern struct event_constraint intel_ivb_pebs_event_constraints[];
+extern struct event_constraint intel_hsw_pebs_event_constraints[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 7e28d94..beeb7cc 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -347,8 +347,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
struct amd_nb *nb;
int i;
- nb = kmalloc_node(sizeof(struct amd_nb), GFP_KERNEL | __GFP_ZERO,
- cpu_to_node(cpu));
+ nb = kzalloc_node(sizeof(struct amd_nb), GFP_KERNEL, cpu_to_node(cpu));
if (!nb)
return NULL;
@@ -648,48 +647,48 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
};
-static int setup_event_constraints(void)
+static int __init amd_core_pmu_init(void)
{
- if (boot_cpu_data.x86 == 0x15)
+ if (!cpu_has_perfctr_core)
+ return 0;
+
+ switch (boot_cpu_data.x86) {
+ case 0x15:
+ pr_cont("Fam15h ");
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
- return 0;
-}
+ break;
-static int setup_perfctr_core(void)
-{
- if (!cpu_has_perfctr_core) {
- WARN(x86_pmu.get_event_constraints == amd_get_event_constraints_f15h,
- KERN_ERR "Odd, counter constraints enabled but no core perfctrs detected!");
+ default:
+ pr_err("core perfctr but no constraints; unknown hardware!\n");
return -ENODEV;
}
- WARN(x86_pmu.get_event_constraints == amd_get_event_constraints,
- KERN_ERR "hw perf events core counters need constraints handler!");
-
/*
* If core performance counter extensions exists, we must use
* MSR_F15H_PERF_CTL/MSR_F15H_PERF_CTR msrs. See also
- * x86_pmu_addr_offset().
+ * amd_pmu_addr_offset().
*/
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
- printk(KERN_INFO "perf: AMD core performance counters detected\n");
-
+ pr_cont("core perfctr, ");
return 0;
}
__init int amd_pmu_init(void)
{
+ int ret;
+
/* Performance-monitoring supported from K7 and later: */
if (boot_cpu_data.x86 < 6)
return -ENODEV;
x86_pmu = amd_pmu;
- setup_event_constraints();
- setup_perfctr_core();
+ ret = amd_core_pmu_init();
+ if (ret)
+ return ret;
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 5f0581e..e09f0bf 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -851,7 +851,7 @@ static void clear_APIC_ibs(void *dummy)
setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
}
-static int __cpuinit
+static int
perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
switch (action & ~CPU_TASKS_FROZEN) {
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
new file mode 100644
index 0000000..639d128
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
+
+#include "perf_event.h"
+#include "perf_event_amd_iommu.h"
+
+#define COUNTER_SHIFT 16
+
+#define _GET_BANK(ev) ((u8)(ev->hw.extra_reg.reg >> 8))
+#define _GET_CNTR(ev) ((u8)(ev->hw.extra_reg.reg))
+
+/* iommu pmu config masks */
+#define _GET_CSOURCE(ev) ((ev->hw.config & 0xFFULL))
+#define _GET_DEVID(ev) ((ev->hw.config >> 8) & 0xFFFFULL)
+#define _GET_PASID(ev) ((ev->hw.config >> 24) & 0xFFFFULL)
+#define _GET_DOMID(ev) ((ev->hw.config >> 40) & 0xFFFFULL)
+#define _GET_DEVID_MASK(ev) ((ev->hw.extra_reg.config) & 0xFFFFULL)
+#define _GET_PASID_MASK(ev) ((ev->hw.extra_reg.config >> 16) & 0xFFFFULL)
+#define _GET_DOMID_MASK(ev) ((ev->hw.extra_reg.config >> 32) & 0xFFFFULL)
+
+static struct perf_amd_iommu __perf_iommu;
+
+struct perf_amd_iommu {
+ struct pmu pmu;
+ u8 max_banks;
+ u8 max_counters;
+ u64 cntr_assign_mask;
+ raw_spinlock_t lock;
+ const struct attribute_group *attr_groups[4];
+};
+
+#define format_group attr_groups[0]
+#define cpumask_group attr_groups[1]
+#define events_group attr_groups[2]
+#define null_group attr_groups[3]
+
+/*---------------------------------------------
+ * sysfs format attributes
+ *---------------------------------------------*/
+PMU_FORMAT_ATTR(csource, "config:0-7");
+PMU_FORMAT_ATTR(devid, "config:8-23");
+PMU_FORMAT_ATTR(pasid, "config:24-39");
+PMU_FORMAT_ATTR(domid, "config:40-55");
+PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
+PMU_FORMAT_ATTR(pasid_mask, "config1:16-31");
+PMU_FORMAT_ATTR(domid_mask, "config1:32-47");
+
+static struct attribute *iommu_format_attrs[] = {
+ &format_attr_csource.attr,
+ &format_attr_devid.attr,
+ &format_attr_pasid.attr,
+ &format_attr_domid.attr,
+ &format_attr_devid_mask.attr,
+ &format_attr_pasid_mask.attr,
+ &format_attr_domid_mask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_format_group = {
+ .name = "format",
+ .attrs = iommu_format_attrs,
+};
+
+/*---------------------------------------------
+ * sysfs events attributes
+ *---------------------------------------------*/
+struct amd_iommu_event_desc {
+ struct kobj_attribute attr;
+ const char *event;
+};
+
+static ssize_t _iommu_event_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct amd_iommu_event_desc *event =
+ container_of(attr, struct amd_iommu_event_desc, attr);
+ return sprintf(buf, "%s\n", event->event);
+}
+
+#define AMD_IOMMU_EVENT_DESC(_name, _event) \
+{ \
+ .attr = __ATTR(_name, 0444, _iommu_event_show, NULL), \
+ .event = _event, \
+}
+
+static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
+ AMD_IOMMU_EVENT_DESC(mem_pass_untrans, "csource=0x01"),
+ AMD_IOMMU_EVENT_DESC(mem_pass_pretrans, "csource=0x02"),
+ AMD_IOMMU_EVENT_DESC(mem_pass_excl, "csource=0x03"),
+ AMD_IOMMU_EVENT_DESC(mem_target_abort, "csource=0x04"),
+ AMD_IOMMU_EVENT_DESC(mem_trans_total, "csource=0x05"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit, "csource=0x06"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis, "csource=0x07"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit, "csource=0x08"),
+ AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis, "csource=0x09"),
+ AMD_IOMMU_EVENT_DESC(mem_dte_hit, "csource=0x0a"),
+ AMD_IOMMU_EVENT_DESC(mem_dte_mis, "csource=0x0b"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_tot, "csource=0x0c"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_nst, "csource=0x0d"),
+ AMD_IOMMU_EVENT_DESC(page_tbl_read_gst, "csource=0x0e"),
+ AMD_IOMMU_EVENT_DESC(int_dte_hit, "csource=0x0f"),
+ AMD_IOMMU_EVENT_DESC(int_dte_mis, "csource=0x10"),
+ AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"),
+ AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"),
+ AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"),
+ { /* end: all zeroes */ },
+};
+
+/*---------------------------------------------
+ * sysfs cpumask attributes
+ *---------------------------------------------*/
+static cpumask_t iommu_cpumask;
+
+static ssize_t _iommu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
+ buf[n++] = '\n';
+ buf[n] = '\0';
+ return n;
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
+
+static struct attribute *iommu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group amd_iommu_cpumask_group = {
+ .attrs = iommu_cpumask_attrs,
+};
+
+/*---------------------------------------------*/
+
+static int get_next_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu)
+{
+ unsigned long flags;
+ int shift, bank, cntr, retval;
+ int max_banks = perf_iommu->max_banks;
+ int max_cntrs = perf_iommu->max_counters;
+
+ raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+
+ for (bank = 0, shift = 0; bank < max_banks; bank++) {
+ for (cntr = 0; cntr < max_cntrs; cntr++) {
+ shift = bank + (bank*3) + cntr;
+ if (perf_iommu->cntr_assign_mask & (1ULL<<shift)) {
+ continue;
+ } else {
+ perf_iommu->cntr_assign_mask |= (1ULL<<shift);
+ retval = ((u16)((u16)bank<<8) | (u8)(cntr));
+ goto out;
+ }
+ }
+ }
+ retval = -ENOSPC;
+out:
+ raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+ return retval;
+}
+
+static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
+ u8 bank, u8 cntr)
+{
+ unsigned long flags;
+ int max_banks, max_cntrs;
+ int shift = 0;
+
+ max_banks = perf_iommu->max_banks;
+ max_cntrs = perf_iommu->max_counters;
+
+ if ((bank > max_banks) || (cntr > max_cntrs))
+ return -EINVAL;
+
+ shift = bank + cntr + (bank*3);
+
+ raw_spin_lock_irqsave(&perf_iommu->lock, flags);
+ perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
+ raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
+
+ return 0;
+}
+
+static int perf_iommu_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct perf_amd_iommu *perf_iommu;
+ u64 config, config1;
+
+ /* test the event attr type check for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /*
+ * IOMMU counters are shared across all cores.
+ * Therefore, it does not support per-process mode.
+ * Also, it does not support event sampling mode.
+ */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ /* IOMMU counters do not have usr/os/guest/host bits */
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ perf_iommu = &__perf_iommu;
+
+ if (event->pmu != &perf_iommu->pmu)
+ return -ENOENT;
+
+ if (perf_iommu) {
+ config = event->attr.config;
+ config1 = event->attr.config1;
+ } else {
+ return -EINVAL;
+ }
+
+ /* integrate with iommu base devid (0000), assume one iommu */
+ perf_iommu->max_banks =
+ amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID);
+ perf_iommu->max_counters =
+ amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID);
+ if ((perf_iommu->max_banks == 0) || (perf_iommu->max_counters == 0))
+ return -EINVAL;
+
+ /* update the hw_perf_event struct with the iommu config data */
+ hwc->config = config;
+ hwc->extra_reg.config = config1;
+
+ return 0;
+}
+
+static void perf_iommu_enable_event(struct perf_event *ev)
+{
+ u8 csource = _GET_CSOURCE(ev);
+ u16 devid = _GET_DEVID(ev);
+ u64 reg = 0ULL;
+
+ reg = csource;
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+
+ reg = 0ULL | devid | (_GET_DEVID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_DEVID_MATCH_REG, &reg, true);
+
+ reg = 0ULL | _GET_PASID(ev) | (_GET_PASID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_PASID_MATCH_REG, &reg, true);
+
+ reg = 0ULL | _GET_DOMID(ev) | (_GET_DOMID_MASK(ev) << 32);
+ if (reg)
+ reg |= (1UL << 31);
+ amd_iommu_pc_get_set_reg_val(devid,
+ _GET_BANK(ev), _GET_CNTR(ev) ,
+ IOMMU_PC_DOMID_MATCH_REG, &reg, true);
+}
+
+static void perf_iommu_disable_event(struct perf_event *event)
+{
+ u64 reg = 0ULL;
+
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_SRC_REG, &reg, true);
+}
+
+static void perf_iommu_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ pr_debug("perf: amd_iommu:perf_iommu_start\n");
+ if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
+ return;
+
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+ hwc->state = 0;
+
+ if (flags & PERF_EF_RELOAD) {
+ u64 prev_raw_count = local64_read(&hwc->prev_count);
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_REG, &prev_raw_count, true);
+ }
+
+ perf_iommu_enable_event(event);
+ perf_event_update_userpage(event);
+
+}
+
+static void perf_iommu_read(struct perf_event *event)
+{
+ u64 count = 0ULL;
+ u64 prev_raw_count = 0ULL;
+ u64 delta = 0ULL;
+ struct hw_perf_event *hwc = &event->hw;
+ pr_debug("perf: amd_iommu:perf_iommu_read\n");
+
+ amd_iommu_pc_get_set_reg_val(_GET_DEVID(event),
+ _GET_BANK(event), _GET_CNTR(event),
+ IOMMU_PC_COUNTER_REG, &count, false);
+
+ /* IOMMU pc counter register is only 48 bits */
+ count &= 0xFFFFFFFFFFFFULL;
+
+ prev_raw_count = local64_read(&hwc->prev_count);
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ count) != prev_raw_count)
+ return;
+
+ /* Handling 48-bit counter overflowing */
+ delta = (count << COUNTER_SHIFT) - (prev_raw_count << COUNTER_SHIFT);
+ delta >>= COUNTER_SHIFT;
+ local64_add(delta, &event->count);
+
+}
+
+static void perf_iommu_stop(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 config;
+
+ pr_debug("perf: amd_iommu:perf_iommu_stop\n");
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ perf_iommu_disable_event(event);
+ WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
+ hwc->state |= PERF_HES_STOPPED;
+
+ if (hwc->state & PERF_HES_UPTODATE)
+ return;
+
+ config = hwc->config;
+ perf_iommu_read(event);
+ hwc->state |= PERF_HES_UPTODATE;
+}
+
+static int perf_iommu_add(struct perf_event *event, int flags)
+{
+ int retval;
+ struct perf_amd_iommu *perf_iommu =
+ container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+ pr_debug("perf: amd_iommu:perf_iommu_add\n");
+ event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+ /* request an iommu bank/counter */
+ retval = get_next_avail_iommu_bnk_cntr(perf_iommu);
+ if (retval != -ENOSPC)
+ event->hw.extra_reg.reg = (u16)retval;
+ else
+ return retval;
+
+ if (flags & PERF_EF_START)
+ perf_iommu_start(event, PERF_EF_RELOAD);
+
+ return 0;
+}
+
+static void perf_iommu_del(struct perf_event *event, int flags)
+{
+ struct perf_amd_iommu *perf_iommu =
+ container_of(event->pmu, struct perf_amd_iommu, pmu);
+
+ pr_debug("perf: amd_iommu:perf_iommu_del\n");
+ perf_iommu_stop(event, PERF_EF_UPDATE);
+
+ /* clear the assigned iommu bank/counter */
+ clear_avail_iommu_bnk_cntr(perf_iommu,
+ _GET_BANK(event),
+ _GET_CNTR(event));
+
+ perf_event_update_userpage(event);
+}
+
+static __init int _init_events_attrs(struct perf_amd_iommu *perf_iommu)
+{
+ struct attribute **attrs;
+ struct attribute_group *attr_group;
+ int i = 0, j;
+
+ while (amd_iommu_v2_event_descs[i].attr.attr.name)
+ i++;
+
+ attr_group = kzalloc(sizeof(struct attribute *)
+ * (i + 1) + sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group)
+ return -ENOMEM;
+
+ attrs = (struct attribute **)(attr_group + 1);
+ for (j = 0; j < i; j++)
+ attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
+
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+ perf_iommu->events_group = attr_group;
+
+ return 0;
+}
+
+static __init void amd_iommu_pc_exit(void)
+{
+ if (__perf_iommu.events_group != NULL) {
+ kfree(__perf_iommu.events_group);
+ __perf_iommu.events_group = NULL;
+ }
+}
+
+static __init int _init_perf_amd_iommu(
+ struct perf_amd_iommu *perf_iommu, char *name)
+{
+ int ret;
+
+ raw_spin_lock_init(&perf_iommu->lock);
+
+ /* Init format attributes */
+ perf_iommu->format_group = &amd_iommu_format_group;
+
+ /* Init cpumask attributes to only core 0 */
+ cpumask_set_cpu(0, &iommu_cpumask);
+ perf_iommu->cpumask_group = &amd_iommu_cpumask_group;
+
+ /* Init events attributes */
+ if (_init_events_attrs(perf_iommu) != 0)
+ pr_err("perf: amd_iommu: Only support raw events.\n");
+
+ /* Init null attributes */
+ perf_iommu->null_group = NULL;
+ perf_iommu->pmu.attr_groups = perf_iommu->attr_groups;
+
+ ret = perf_pmu_register(&perf_iommu->pmu, name, -1);
+ if (ret) {
+ pr_err("perf: amd_iommu: Failed to initialized.\n");
+ amd_iommu_pc_exit();
+ } else {
+ pr_info("perf: amd_iommu: Detected. (%d banks, %d counters/bank)\n",
+ amd_iommu_pc_get_max_banks(IOMMU_BASE_DEVID),
+ amd_iommu_pc_get_max_counters(IOMMU_BASE_DEVID));
+ }
+
+ return ret;
+}
+
+static struct perf_amd_iommu __perf_iommu = {
+ .pmu = {
+ .event_init = perf_iommu_event_init,
+ .add = perf_iommu_add,
+ .del = perf_iommu_del,
+ .start = perf_iommu_start,
+ .stop = perf_iommu_stop,
+ .read = perf_iommu_read,
+ },
+ .max_banks = 0x00,
+ .max_counters = 0x00,
+ .cntr_assign_mask = 0ULL,
+ .format_group = NULL,
+ .cpumask_group = NULL,
+ .events_group = NULL,
+ .null_group = NULL,
+};
+
+static __init int amd_iommu_pc_init(void)
+{
+ /* Make sure the IOMMU PC resource is available */
+ if (!amd_iommu_pc_supported())
+ return -ENODEV;
+
+ _init_perf_amd_iommu(&__perf_iommu, "amd_iommu");
+
+ return 0;
+}
+
+device_initcall(amd_iommu_pc_init);
diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
new file mode 100644
index 0000000..845d173
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Steven Kinney <Steven.Kinney@amd.com>
+ * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PERF_EVENT_AMD_IOMMU_H_
+#define _PERF_EVENT_AMD_IOMMU_H_
+
+/* iommu pc mmio region register indexes */
+#define IOMMU_PC_COUNTER_REG 0x00
+#define IOMMU_PC_COUNTER_SRC_REG 0x08
+#define IOMMU_PC_PASID_MATCH_REG 0x10
+#define IOMMU_PC_DOMID_MATCH_REG 0x18
+#define IOMMU_PC_DEVID_MATCH_REG 0x20
+#define IOMMU_PC_COUNTER_REPORT_REG 0x28
+
+/* maximun specified bank/counters */
+#define PC_MAX_SPEC_BNKS 64
+#define PC_MAX_SPEC_CNTRS 16
+
+/* iommu pc reg masks*/
+#define IOMMU_BASE_DEVID 0x0000
+
+/* amd_iommu_init.c external support functions */
+extern bool amd_iommu_pc_supported(void);
+
+extern u8 amd_iommu_pc_get_max_banks(u16 devid);
+
+extern u8 amd_iommu_pc_get_max_counters(u16 devid);
+
+extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr,
+ u8 fxn, u64 *value, bool is_write);
+
+#endif /*_PERF_EVENT_AMD_IOMMU_H_*/
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index c0c661a..754291a 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -288,13 +288,13 @@ static struct pmu amd_l2_pmu = {
.read = amd_uncore_read,
};
-static struct amd_uncore * __cpuinit amd_uncore_alloc(unsigned int cpu)
+static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
{
return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
cpu_to_node(cpu));
}
-static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu)
+static void amd_uncore_cpu_up_prepare(unsigned int cpu)
{
struct amd_uncore *uncore;
@@ -322,8 +322,8 @@ static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu)
}
static struct amd_uncore *
-__cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this,
- struct amd_uncore * __percpu *uncores)
+amd_uncore_find_online_sibling(struct amd_uncore *this,
+ struct amd_uncore * __percpu *uncores)
{
unsigned int cpu;
struct amd_uncore *that;
@@ -348,7 +348,7 @@ __cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this,
return this;
}
-static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu)
+static void amd_uncore_cpu_starting(unsigned int cpu)
{
unsigned int eax, ebx, ecx, edx;
struct amd_uncore *uncore;
@@ -376,8 +376,8 @@ static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu)
}
}
-static void __cpuinit uncore_online(unsigned int cpu,
- struct amd_uncore * __percpu *uncores)
+static void uncore_online(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
{
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
@@ -388,7 +388,7 @@ static void __cpuinit uncore_online(unsigned int cpu,
cpumask_set_cpu(cpu, uncore->active_mask);
}
-static void __cpuinit amd_uncore_cpu_online(unsigned int cpu)
+static void amd_uncore_cpu_online(unsigned int cpu)
{
if (amd_uncore_nb)
uncore_online(cpu, amd_uncore_nb);
@@ -397,8 +397,8 @@ static void __cpuinit amd_uncore_cpu_online(unsigned int cpu)
uncore_online(cpu, amd_uncore_l2);
}
-static void __cpuinit uncore_down_prepare(unsigned int cpu,
- struct amd_uncore * __percpu *uncores)
+static void uncore_down_prepare(unsigned int cpu,
+ struct amd_uncore * __percpu *uncores)
{
unsigned int i;
struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
@@ -423,7 +423,7 @@ static void __cpuinit uncore_down_prepare(unsigned int cpu,
}
}
-static void __cpuinit amd_uncore_cpu_down_prepare(unsigned int cpu)
+static void amd_uncore_cpu_down_prepare(unsigned int cpu)
{
if (amd_uncore_nb)
uncore_down_prepare(cpu, amd_uncore_nb);
@@ -432,8 +432,7 @@ static void __cpuinit amd_uncore_cpu_down_prepare(unsigned int cpu)
uncore_down_prepare(cpu, amd_uncore_l2);
}
-static void __cpuinit uncore_dead(unsigned int cpu,
- struct amd_uncore * __percpu *uncores)
+static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
{
struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
@@ -445,7 +444,7 @@ static void __cpuinit uncore_dead(unsigned int cpu,
*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
}
-static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu)
+static void amd_uncore_cpu_dead(unsigned int cpu)
{
if (amd_uncore_nb)
uncore_dead(cpu, amd_uncore_nb);
@@ -454,7 +453,7 @@ static void __cpuinit amd_uncore_cpu_dead(unsigned int cpu)
uncore_dead(cpu, amd_uncore_l2);
}
-static int __cpuinit
+static int
amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
void *hcpu)
{
@@ -489,7 +488,7 @@ amd_uncore_cpu_notifier(struct notifier_block *self, unsigned long action,
return NOTIFY_OK;
}
-static struct notifier_block amd_uncore_cpu_notifier_block __cpuinitdata = {
+static struct notifier_block amd_uncore_cpu_notifier_block = {
.notifier_call = amd_uncore_cpu_notifier,
.priority = CPU_PRI_PERF + 1,
};
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f60d41f..f31a165 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/export.h>
+#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/apic.h>
@@ -80,7 +81,8 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
{
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
@@ -122,6 +124,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
@@ -142,8 +145,9 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
{
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
EVENT_EXTRA_END
};
@@ -161,17 +165,28 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_slm_event_constraints[] __read_mostly =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+ EVENT_CONSTRAINT_END
+};
+
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
- INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
EVENT_EXTRA_END
};
@@ -190,6 +205,22 @@ struct attribute *snb_events_attrs[] = {
NULL,
};
+static struct event_constraint intel_hsw_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.* */
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
+ /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
+ INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
+ /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
+ INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -865,6 +896,140 @@ static __initconst const u64 atom_hw_cache_event_ids
},
};
+static struct extra_reg intel_slm_extra_regs[] __read_mostly =
+{
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+#define SLM_DMND_READ SNB_DMND_DATA_RD
+#define SLM_DMND_WRITE SNB_DMND_RFO
+#define SLM_DMND_PREFETCH (SNB_PF_DATA_RD|SNB_PF_RFO)
+
+#define SLM_SNP_ANY (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
+#define SLM_LLC_ACCESS SNB_RESP_ANY
+#define SLM_LLC_MISS (SLM_SNP_ANY|SNB_NON_DRAM)
+
+static __initconst const u64 slm_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_WRITE|SLM_LLC_MISS,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
+ [ C(RESULT_MISS) ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
+ },
+ },
+};
+
+static __initconst const u64 slm_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0104, /* LD_DCU_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
+ [ C(RESULT_MISS) ] = 0x0280, /* ICACGE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01b7,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0804, /* LD_DTLB_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
+ [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
{
/* user explicitly requested branch sampling */
@@ -872,7 +1037,8 @@ static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event)
return true;
/* implicit branch sampling to correct PEBS skid */
- if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+ if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1 &&
+ x86_pmu.intel_cap.pebs_format < 2)
return true;
return false;
@@ -1167,15 +1333,11 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
cpuc = &__get_cpu_var(cpu_hw_events);
/*
- * Some chipsets need to unmask the LVTPC in a particular spot
- * inside the nmi handler. As a result, the unmasking was pushed
- * into all the nmi handlers.
- *
- * This handler doesn't seem to have any issues with the unmasking
- * so it was left at the top.
+ * No known reason to not always do late ACK,
+ * but just in case do it opt-in.
*/
- apic_write(APIC_LVTPC, APIC_DM_NMI);
-
+ if (!x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_pmu_disable_all();
handled = intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status();
@@ -1188,8 +1350,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
again:
intel_pmu_ack_status(status);
if (++loops > 100) {
- WARN_ONCE(1, "perfevents: irq loop stuck!\n");
- perf_event_print_debug();
+ static bool warned = false;
+ if (!warned) {
+ WARN(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
+ warned = true;
+ }
intel_pmu_reset();
goto done;
}
@@ -1235,6 +1401,13 @@ again:
done:
intel_pmu_enable_all(0);
+ /*
+ * Only unmask the NMI after the overflow counters
+ * have been reset. This avoids spurious NMIs on
+ * Haswell CPUs.
+ */
+ if (x86_pmu.late_ack)
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
return handled;
}
@@ -1276,11 +1449,11 @@ static void intel_fixup_er(struct perf_event *event, int idx)
if (idx == EXTRA_REG_RSP_0) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01b7;
+ event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
} else if (idx == EXTRA_REG_RSP_1) {
event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
- event->hw.config |= 0x01bb;
+ event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
}
}
@@ -1425,7 +1598,6 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
if ((event->hw.config & c->cmask) == c->code) {
- /* hw.flags zeroed at initialization */
event->hw.flags |= c->flags;
return c;
}
@@ -1473,7 +1645,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
- event->hw.flags = 0;
intel_put_shared_regs_event_constraints(cpuc, event);
}
@@ -1646,6 +1817,47 @@ static void core_pmu_enable_all(int added)
}
}
+static int hsw_hw_config(struct perf_event *event)
+{
+ int ret = intel_pmu_hw_config(event);
+
+ if (ret)
+ return ret;
+ if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
+ return 0;
+ event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
+
+ /*
+ * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
+ * PEBS or in ANY thread mode. Since the results are non-sensical forbid
+ * this combination.
+ */
+ if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
+ ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
+ event->attr.precise_ip > 0))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static struct event_constraint counter2_constraint =
+ EVENT_CONSTRAINT(0, 0x4, 0);
+
+static struct event_constraint *
+hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ struct event_constraint *c = intel_get_event_constraints(cpuc, event);
+
+ /* Handle special quirk on in_tx_checkpointed only in counter 2 */
+ if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
+ if (c->idxmsk64 & (1U << 2))
+ return &counter2_constraint;
+ return &emptyconstraint;
+ }
+
+ return c;
+}
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -1653,6 +1865,8 @@ PMU_FORMAT_ATTR(pc, "config:19" );
PMU_FORMAT_ATTR(any, "config:21" ); /* v3 + */
PMU_FORMAT_ATTR(inv, "config:23" );
PMU_FORMAT_ATTR(cmask, "config:24-31" );
+PMU_FORMAT_ATTR(in_tx, "config:32");
+PMU_FORMAT_ATTR(in_tx_cp, "config:33");
static struct attribute *intel_arch_formats_attr[] = {
&format_attr_event.attr,
@@ -1807,6 +2021,8 @@ static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_any.attr,
&format_attr_inv.attr,
&format_attr_cmask.attr,
+ &format_attr_in_tx.attr,
+ &format_attr_in_tx_cp.attr,
&format_attr_offcore_rsp.attr, /* XXX do NHM/WSM + SNB breakout */
&format_attr_ldlat.attr, /* PEBS load latency */
@@ -1966,6 +2182,15 @@ static __init void intel_nehalem_quirk(void)
}
}
+EVENT_ATTR_STR(mem-loads, mem_ld_hsw, "event=0xcd,umask=0x1,ldlat=3");
+EVENT_ATTR_STR(mem-stores, mem_st_hsw, "event=0xd0,umask=0x82")
+
+static struct attribute *hsw_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL
+};
+
__init int intel_pmu_init(void)
{
union cpuid10_edx edx;
@@ -2099,6 +2324,22 @@ __init int intel_pmu_init(void)
pr_cont("Atom events, ");
break;
+ case 55: /* Atom 22nm "Silvermont" */
+ case 77: /* Avoton "Silvermont" */
+ memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_atom();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_slm_extra_regs;
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ pr_cont("Silvermont events, ");
+ break;
+
case 37: /* 32 nm nehalem, "Clarkdale" */
case 44: /* 32 nm nehalem, "Gulftown" */
case 47: /* 32 nm Xeon E7 */
@@ -2189,6 +2430,31 @@ __init int intel_pmu_init(void)
break;
+ case 60: /* Haswell Client */
+ case 70:
+ case 71:
+ case 63:
+ case 69:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_hsw_event_constraints;
+ x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_snb_extra_regs;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.cpu_events = hsw_events_attrs;
+ pr_cont("Haswell events, ");
+ break;
+
default:
switch (x86_pmu.version) {
case 1:
@@ -2227,7 +2493,7 @@ __init int intel_pmu_init(void)
* counter, so do not extend mask to generic counters
*/
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if (c->cmask != X86_RAW_EVENT_MASK
+ if (c->cmask != FIXED_EVENT_FLAGS
|| c->idxmsk64 == INTEL_PMC_MSK_FIXED_REF_CYCLES) {
continue;
}
@@ -2237,5 +2503,12 @@ __init int intel_pmu_init(void)
}
}
+ /* Support full width counters using alternative MSR range */
+ if (x86_pmu.intel_cap.full_width_write) {
+ x86_pmu.max_period = x86_pmu.cntval_mask;
+ x86_pmu.perfctr = MSR_IA32_PMC0;
+ pr_cont("full-width counters, ");
+ }
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 60250f6..ab3ba1c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status)
return val;
}
+static u64 precise_store_data_hsw(u64 status)
+{
+ union perf_mem_data_src dse;
+
+ dse.val = 0;
+ dse.mem_op = PERF_MEM_OP_STORE;
+ dse.mem_lvl = PERF_MEM_LVL_NA;
+ if (status & 1)
+ dse.mem_lvl = PERF_MEM_LVL_L1;
+ /* Nothing else supported. Sorry. */
+ return dse.val;
+}
+
static u64 load_latency_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -165,6 +178,22 @@ struct pebs_record_nhm {
u64 status, dla, dse, lat;
};
+/*
+ * Same as pebs_record_nhm, with two additional fields.
+ */
+struct pebs_record_hsw {
+ struct pebs_record_nhm nhm;
+ /*
+ * Real IP of the event. In the Intel documentation this
+ * is called eventingrip.
+ */
+ u64 real_ip;
+ /*
+ * TSX tuning information field: abort cycles and abort flags.
+ */
+ u64 tsx_tuning;
+};
+
void init_debug_store_on_cpu(int cpu)
{
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
@@ -195,7 +224,7 @@ static int alloc_pebs_buffer(int cpu)
if (!x86_pmu.pebs)
return 0;
- buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+ buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node);
if (unlikely(!buffer))
return -ENOMEM;
@@ -233,7 +262,7 @@ static int alloc_bts_buffer(int cpu)
if (!x86_pmu.bts)
return 0;
- buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
+ buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node);
if (unlikely(!buffer))
return -ENOMEM;
@@ -266,7 +295,7 @@ static int alloc_ds_buffer(int cpu)
int node = cpu_to_node(cpu);
struct debug_store *ds;
- ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
+ ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
if (unlikely(!ds))
return -ENOMEM;
@@ -488,6 +517,32 @@ struct event_constraint intel_atom_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_slm_pebs_event_constraints[] = {
+ INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
+ INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */
+ INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */
+ INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */
+ INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */
+ INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */
+ INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */
+ INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint intel_nehalem_pebs_event_constraints[] = {
INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
@@ -529,6 +584,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+ INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
EVENT_CONSTRAINT_END
};
@@ -548,6 +604,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_hsw_pebs_event_constraints[] = {
+ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
+ INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
+ INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
+ INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */
+ INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */
+ /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
+ INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf),
+ /* MEM_UOPS_RETIRED.STLB_MISS_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
+ INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */
+ /* MEM_UOPS_RETIRED.SPLIT_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */
+ INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */
+ INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */
+ /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
+ INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf),
+ /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
+ INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf),
+ /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
+ INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf),
+ /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */
+ INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */
+ INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */
+
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@@ -588,6 +680,12 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+
+ if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT)
+ cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
+ else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST)
+ cpuc->pebs_enabled &= ~(1ULL << 63);
+
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
@@ -697,6 +795,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
*/
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct pebs_record_nhm *pebs = __pebs;
+ struct pebs_record_hsw *pebs_hsw = __pebs;
struct perf_sample_data data;
struct pt_regs regs;
u64 sample_type;
@@ -706,7 +805,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
return;
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
- fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
+ fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST |
+ PERF_X86_EVENT_PEBS_ST_HSW);
perf_sample_data_init(&data, 0, event->hw.last_period);
@@ -717,9 +817,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
* if PEBS-LL or PreciseStore
*/
if (fll || fst) {
- if (sample_type & PERF_SAMPLE_ADDR)
- data.addr = pebs->dla;
-
/*
* Use latency for weight (only avail with PEBS-LL)
*/
@@ -732,6 +829,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_SRC) {
if (fll)
data.data_src.val = load_latency_data(pebs->dse);
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ data.data_src.val =
+ precise_store_data_hsw(pebs->dse);
else
data.data_src.val = precise_store_data(pebs->dse);
}
@@ -753,11 +853,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
regs.bp = pebs->bp;
regs.sp = pebs->sp;
- if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
+ if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
+ regs.ip = pebs_hsw->real_ip;
+ regs.flags |= PERF_EFLAGS_EXACT;
+ } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
regs.flags |= PERF_EFLAGS_EXACT;
else
regs.flags &= ~PERF_EFLAGS_EXACT;
+ if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
+ x86_pmu.intel_cap.pebs_format >= 1)
+ data.addr = pebs->dla;
+
if (has_branch_stack(event))
data.br_stack = &cpuc->lbr_stack;
@@ -806,35 +913,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
__intel_pmu_pebs_event(event, iregs, at);
}
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
+ void *top)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct debug_store *ds = cpuc->ds;
- struct pebs_record_nhm *at, *top;
struct perf_event *event = NULL;
u64 status = 0;
- int bit, n;
-
- if (!x86_pmu.pebs_active)
- return;
-
- at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
- top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+ int bit;
ds->pebs_index = ds->pebs_buffer_base;
- n = top - at;
- if (n <= 0)
- return;
+ for (; at < top; at += x86_pmu.pebs_record_size) {
+ struct pebs_record_nhm *p = at;
- /*
- * Should not happen, we program the threshold at 1 and do not
- * set a reset value.
- */
- WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n);
-
- for ( ; at < top; at++) {
- for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) {
+ for_each_set_bit(bit, (unsigned long *)&p->status,
+ x86_pmu.max_pebs_events) {
event = cpuc->events[bit];
if (!test_bit(bit, cpuc->active_mask))
continue;
@@ -857,6 +951,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
}
}
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct pebs_record_nhm *at, *top;
+ int n;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+ ds->pebs_index = ds->pebs_buffer_base;
+
+ n = top - at;
+ if (n <= 0)
+ return;
+
+ /*
+ * Should not happen, we program the threshold at 1 and do not
+ * set a reset value.
+ */
+ WARN_ONCE(n > x86_pmu.max_pebs_events,
+ "Unexpected number of pebs records %d\n", n);
+
+ return __intel_pmu_drain_pebs_nhm(iregs, at, top);
+}
+
+static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
+{
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct pebs_record_hsw *at, *top;
+ int n;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
+
+ n = top - at;
+ if (n <= 0)
+ return;
+ /*
+ * Should not happen, we program the threshold at 1 and do not
+ * set a reset value.
+ */
+ WARN_ONCE(n > x86_pmu.max_pebs_events,
+ "Unexpected number of pebs records %d\n", n);
+
+ return __intel_pmu_drain_pebs_nhm(iregs, at, top);
+}
+
/*
* BTS, PEBS probe and setup
*/
@@ -888,6 +1037,12 @@ void intel_ds_init(void)
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
break;
+ case 2:
+ pr_cont("PEBS fmt2%c, ", pebs_type);
+ x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+ break;
+
default:
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d978353..d5be06a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -12,6 +12,16 @@ enum {
LBR_FORMAT_LIP = 0x01,
LBR_FORMAT_EIP = 0x02,
LBR_FORMAT_EIP_FLAGS = 0x03,
+ LBR_FORMAT_EIP_FLAGS2 = 0x04,
+ LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_EIP_FLAGS2,
+};
+
+static enum {
+ LBR_EIP_FLAGS = 1,
+ LBR_TSX = 2,
+} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
+ [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
+ [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
};
/*
@@ -56,6 +66,8 @@ enum {
LBR_FAR)
#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
+#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
+#define LBR_FROM_FLAG_ABORT (1ULL << 61)
#define for_each_branch_sample_type(x) \
for ((x) = PERF_SAMPLE_BRANCH_USER; \
@@ -81,9 +93,13 @@ enum {
X86_BR_JMP = 1 << 9, /* jump */
X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
X86_BR_IND_CALL = 1 << 11,/* indirect calls */
+ X86_BR_ABORT = 1 << 12,/* transaction abort */
+ X86_BR_IN_TX = 1 << 13,/* in transaction */
+ X86_BR_NO_TX = 1 << 14,/* not in transaction */
};
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
+#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
#define X86_BR_ANY \
(X86_BR_CALL |\
@@ -95,6 +111,7 @@ enum {
X86_BR_JCC |\
X86_BR_JMP |\
X86_BR_IRQ |\
+ X86_BR_ABORT |\
X86_BR_IND_CALL)
#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -270,21 +287,31 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
unsigned long lbr_idx = (tos - i) & mask;
- u64 from, to, mis = 0, pred = 0;
+ u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
+ int skip = 0;
+ int lbr_flags = lbr_desc[lbr_format];
rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
- if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
+ if (lbr_flags & LBR_EIP_FLAGS) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
- from = (u64)((((s64)from) << 1) >> 1);
+ skip = 1;
+ }
+ if (lbr_flags & LBR_TSX) {
+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
+ abort = !!(from & LBR_FROM_FLAG_ABORT);
+ skip = 3;
}
+ from = (u64)((((s64)from) << skip) >> skip);
cpuc->lbr_entries[i].from = from;
cpuc->lbr_entries[i].to = to;
cpuc->lbr_entries[i].mispred = mis;
cpuc->lbr_entries[i].predicted = pred;
+ cpuc->lbr_entries[i].in_tx = in_tx;
+ cpuc->lbr_entries[i].abort = abort;
cpuc->lbr_entries[i].reserved = 0;
}
cpuc->lbr_stack.nr = i;
@@ -310,7 +337,7 @@ void intel_pmu_lbr_read(void)
* - in case there is no HW filter
* - in case the HW filter has errata or limitations
*/
-static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
+static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
{
u64 br_type = event->attr.branch_sample_type;
int mask = 0;
@@ -318,11 +345,8 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_USER)
mask |= X86_BR_USER;
- if (br_type & PERF_SAMPLE_BRANCH_KERNEL) {
- if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
mask |= X86_BR_KERNEL;
- }
/* we ignore BRANCH_HV here */
@@ -337,13 +361,21 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
mask |= X86_BR_IND_CALL;
+
+ if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
+ mask |= X86_BR_ABORT;
+
+ if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
+ mask |= X86_BR_IN_TX;
+
+ if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
+ mask |= X86_BR_NO_TX;
+
/*
* stash actual user request into reg, it may
* be used by fixup code for some CPU
*/
event->hw.branch_reg.reg = mask;
-
- return 0;
}
/*
@@ -391,9 +423,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
/*
* setup SW LBR filter
*/
- ret = intel_pmu_setup_sw_lbr_filter(event);
- if (ret)
- return ret;
+ intel_pmu_setup_sw_lbr_filter(event);
/*
* setup HW LBR filter, if any
@@ -415,7 +445,7 @@ int intel_pmu_setup_lbr_filter(struct perf_event *event)
* decoded (e.g., text page not present), then X86_BR_NONE is
* returned.
*/
-static int branch_type(unsigned long from, unsigned long to)
+static int branch_type(unsigned long from, unsigned long to, int abort)
{
struct insn insn;
void *addr;
@@ -435,6 +465,9 @@ static int branch_type(unsigned long from, unsigned long to)
if (from == 0 || to == 0)
return X86_BR_NONE;
+ if (abort)
+ return X86_BR_ABORT | to_plm;
+
if (from_plm == X86_BR_USER) {
/*
* can happen if measuring at the user level only
@@ -581,7 +614,13 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
from = cpuc->lbr_entries[i].from;
to = cpuc->lbr_entries[i].to;
- type = branch_type(from, to);
+ type = branch_type(from, to, cpuc->lbr_entries[i].abort);
+ if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
+ if (cpuc->lbr_entries[i].in_tx)
+ type |= X86_BR_IN_TX;
+ else
+ type |= X86_BR_NO_TX;
+ }
/* if type does not correspond, then discard */
if (type == X86_BR_NONE || (br_sel & type) != type) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 52441a2..4118f9f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -6,6 +6,8 @@ static struct intel_uncore_type **pci_uncores = empty_uncore;
/* pci bus to socket mapping */
static int pcibus_to_physid[256] = { [0 ... 255] = -1, };
+static struct pci_dev *extra_pci_dev[UNCORE_SOCKET_MAX][UNCORE_EXTRA_PCI_DEV_MAX];
+
static DEFINE_RAW_SPINLOCK(uncore_box_lock);
/* mask of cpus that collect uncore events */
@@ -45,6 +47,24 @@ DEFINE_UNCORE_FORMAT_ATTR(filter_band0, filter_band0, "config1:0-7");
DEFINE_UNCORE_FORMAT_ATTR(filter_band1, filter_band1, "config1:8-15");
DEFINE_UNCORE_FORMAT_ATTR(filter_band2, filter_band2, "config1:16-23");
DEFINE_UNCORE_FORMAT_ATTR(filter_band3, filter_band3, "config1:24-31");
+DEFINE_UNCORE_FORMAT_ATTR(match_rds, match_rds, "config1:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid30, match_rnid30, "config1:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(match_rnid4, match_rnid4, "config1:31");
+DEFINE_UNCORE_FORMAT_ATTR(match_dnid, match_dnid, "config1:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(match_mc, match_mc, "config1:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(match_opc, match_opc, "config1:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(match_vnw, match_vnw, "config1:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(match0, match0, "config1:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(match1, match1, "config1:32-63");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rds, mask_rds, "config2:48-51");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid30, mask_rnid30, "config2:32-35");
+DEFINE_UNCORE_FORMAT_ATTR(mask_rnid4, mask_rnid4, "config2:31");
+DEFINE_UNCORE_FORMAT_ATTR(mask_dnid, mask_dnid, "config2:13-17");
+DEFINE_UNCORE_FORMAT_ATTR(mask_mc, mask_mc, "config2:9-12");
+DEFINE_UNCORE_FORMAT_ATTR(mask_opc, mask_opc, "config2:5-8");
+DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4");
+DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31");
+DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63");
static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event)
{
@@ -281,7 +301,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = {
};
static struct attribute *snbep_uncore_pcu_formats_attr[] = {
- &format_attr_event.attr,
+ &format_attr_event_ext.attr,
&format_attr_occ_sel.attr,
&format_attr_edge.attr,
&format_attr_inv.attr,
@@ -301,6 +321,24 @@ static struct attribute *snbep_uncore_qpi_formats_attr[] = {
&format_attr_edge.attr,
&format_attr_inv.attr,
&format_attr_thresh8.attr,
+ &format_attr_match_rds.attr,
+ &format_attr_match_rnid30.attr,
+ &format_attr_match_rnid4.attr,
+ &format_attr_match_dnid.attr,
+ &format_attr_match_mc.attr,
+ &format_attr_match_opc.attr,
+ &format_attr_match_vnw.attr,
+ &format_attr_match0.attr,
+ &format_attr_match1.attr,
+ &format_attr_mask_rds.attr,
+ &format_attr_mask_rnid30.attr,
+ &format_attr_mask_rnid4.attr,
+ &format_attr_mask_dnid.attr,
+ &format_attr_mask_mc.attr,
+ &format_attr_mask_opc.attr,
+ &format_attr_mask_vnw.attr,
+ &format_attr_mask0.attr,
+ &format_attr_mask1.attr,
NULL,
};
@@ -314,8 +352,8 @@ static struct uncore_event_desc snbep_uncore_imc_events[] = {
static struct uncore_event_desc snbep_uncore_qpi_events[] = {
INTEL_UNCORE_EVENT_DESC(clockticks, "event=0x14"),
INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
- INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x02,umask=0x08"),
- INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x03,umask=0x04"),
+ INTEL_UNCORE_EVENT_DESC(drs_data, "event=0x102,umask=0x08"),
+ INTEL_UNCORE_EVENT_DESC(ncb_data, "event=0x103,umask=0x04"),
{ /* end: all zeroes */ },
};
@@ -356,13 +394,16 @@ static struct intel_uncore_ops snbep_uncore_msr_ops = {
SNBEP_UNCORE_MSR_OPS_COMMON_INIT(),
};
+#define SNBEP_UNCORE_PCI_OPS_COMMON_INIT() \
+ .init_box = snbep_uncore_pci_init_box, \
+ .disable_box = snbep_uncore_pci_disable_box, \
+ .enable_box = snbep_uncore_pci_enable_box, \
+ .disable_event = snbep_uncore_pci_disable_event, \
+ .read_counter = snbep_uncore_pci_read_counter
+
static struct intel_uncore_ops snbep_uncore_pci_ops = {
- .init_box = snbep_uncore_pci_init_box,
- .disable_box = snbep_uncore_pci_disable_box,
- .enable_box = snbep_uncore_pci_enable_box,
- .disable_event = snbep_uncore_pci_disable_event,
- .enable_event = snbep_uncore_pci_enable_event,
- .read_counter = snbep_uncore_pci_read_counter,
+ SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+ .enable_event = snbep_uncore_pci_enable_event, \
};
static struct event_constraint snbep_uncore_cbox_constraints[] = {
@@ -536,7 +577,7 @@ __snbep_cbox_get_constraint(struct intel_uncore_box *box, struct perf_event *eve
if (!uncore_box_is_fake(box))
reg1->alloc |= alloc;
- return 0;
+ return NULL;
fail:
for (; i >= 0; i--) {
if (alloc & (0x1 << i))
@@ -644,7 +685,7 @@ snbep_pcu_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
(!uncore_box_is_fake(box) && reg1->alloc))
return NULL;
again:
- mask = 0xff << (idx * 8);
+ mask = 0xffULL << (idx * 8);
raw_spin_lock_irqsave(&er->lock, flags);
if (!__BITS_VALUE(atomic_read(&er->ref), idx, 8) ||
!((config1 ^ er->config) & mask)) {
@@ -726,6 +767,61 @@ static struct intel_uncore_type *snbep_msr_uncores[] = {
NULL,
};
+enum {
+ SNBEP_PCI_QPI_PORT0_FILTER,
+ SNBEP_PCI_QPI_PORT1_FILTER,
+};
+
+static int snbep_qpi_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if ((hwc->config & SNBEP_PMON_CTL_EV_SEL_MASK) == 0x38) {
+ reg1->idx = 0;
+ reg1->reg = SNBEP_Q_Py_PCI_PMON_PKT_MATCH0;
+ reg1->config = event->attr.config1;
+ reg2->reg = SNBEP_Q_Py_PCI_PMON_PKT_MASK0;
+ reg2->config = event->attr.config2;
+ }
+ return 0;
+}
+
+static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct pci_dev *pdev = box->pci_dev;
+ struct hw_perf_event *hwc = &event->hw;
+ struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
+ struct hw_perf_event_extra *reg2 = &hwc->branch_reg;
+
+ if (reg1->idx != EXTRA_REG_NONE) {
+ int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER;
+ struct pci_dev *filter_pdev = extra_pci_dev[box->phys_id][idx];
+ WARN_ON_ONCE(!filter_pdev);
+ if (filter_pdev) {
+ pci_write_config_dword(filter_pdev, reg1->reg,
+ (u32)reg1->config);
+ pci_write_config_dword(filter_pdev, reg1->reg + 4,
+ (u32)(reg1->config >> 32));
+ pci_write_config_dword(filter_pdev, reg2->reg,
+ (u32)reg2->config);
+ pci_write_config_dword(filter_pdev, reg2->reg + 4,
+ (u32)(reg2->config >> 32));
+ }
+ }
+
+ pci_write_config_dword(pdev, hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops snbep_uncore_qpi_ops = {
+ SNBEP_UNCORE_PCI_OPS_COMMON_INIT(),
+ .enable_event = snbep_qpi_enable_event,
+ .hw_config = snbep_qpi_hw_config,
+ .get_constraint = uncore_get_constraint,
+ .put_constraint = uncore_put_constraint,
+};
+
#define SNBEP_UNCORE_PCI_COMMON_INIT() \
.perf_ctr = SNBEP_PCI_PMON_CTR0, \
.event_ctl = SNBEP_PCI_PMON_CTL0, \
@@ -755,17 +851,18 @@ static struct intel_uncore_type snbep_uncore_imc = {
};
static struct intel_uncore_type snbep_uncore_qpi = {
- .name = "qpi",
- .num_counters = 4,
- .num_boxes = 2,
- .perf_ctr_bits = 48,
- .perf_ctr = SNBEP_PCI_PMON_CTR0,
- .event_ctl = SNBEP_PCI_PMON_CTL0,
- .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
- .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
- .ops = &snbep_uncore_pci_ops,
- .event_descs = snbep_uncore_qpi_events,
- .format_group = &snbep_uncore_qpi_format_group,
+ .name = "qpi",
+ .num_counters = 4,
+ .num_boxes = 2,
+ .perf_ctr_bits = 48,
+ .perf_ctr = SNBEP_PCI_PMON_CTR0,
+ .event_ctl = SNBEP_PCI_PMON_CTL0,
+ .event_mask = SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+ .box_ctl = SNBEP_PCI_PMON_BOX_CTL,
+ .num_shared_regs = 1,
+ .ops = &snbep_uncore_qpi_ops,
+ .event_descs = snbep_uncore_qpi_events,
+ .format_group = &snbep_uncore_qpi_format_group,
};
@@ -807,43 +904,53 @@ static struct intel_uncore_type *snbep_pci_uncores[] = {
static DEFINE_PCI_DEVICE_TABLE(snbep_uncore_pci_ids) = {
{ /* Home Agent */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_HA),
- .driver_data = SNBEP_PCI_UNCORE_HA,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_HA, 0),
},
{ /* MC Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC0),
- .driver_data = SNBEP_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 0),
},
{ /* MC Channel 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC1),
- .driver_data = SNBEP_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 1),
},
{ /* MC Channel 2 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC2),
- .driver_data = SNBEP_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 2),
},
{ /* MC Channel 3 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_IMC3),
- .driver_data = SNBEP_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_IMC, 3),
},
{ /* QPI Port 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI0),
- .driver_data = SNBEP_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 0),
},
{ /* QPI Port 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_QPI1),
- .driver_data = SNBEP_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_QPI, 1),
},
{ /* R2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R2PCIE),
- .driver_data = SNBEP_PCI_UNCORE_R2PCIE,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R2PCIE, 0),
},
{ /* R3QPI Link 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI0),
- .driver_data = SNBEP_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 0),
},
{ /* R3QPI Link 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_UNC_R3QPI1),
- .driver_data = SNBEP_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(SNBEP_PCI_UNCORE_R3QPI, 1),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c86),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT0_FILTER),
+ },
+ { /* QPI Port 0 filter */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3c96),
+ .driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
+ SNBEP_PCI_QPI_PORT1_FILTER),
},
{ /* end: all zeroes */ }
};
@@ -1256,71 +1363,71 @@ static struct intel_uncore_type *ivt_pci_uncores[] = {
static DEFINE_PCI_DEVICE_TABLE(ivt_uncore_pci_ids) = {
{ /* Home Agent 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe30),
- .driver_data = IVT_PCI_UNCORE_HA,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 0),
},
{ /* Home Agent 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe38),
- .driver_data = IVT_PCI_UNCORE_HA,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_HA, 1),
},
{ /* MC0 Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb4),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 0),
},
{ /* MC0 Channel 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb5),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 1),
},
{ /* MC0 Channel 3 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb0),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 2),
},
{ /* MC0 Channel 4 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xeb1),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 3),
},
{ /* MC1 Channel 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef4),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 4),
},
{ /* MC1 Channel 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef5),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 5),
},
{ /* MC1 Channel 3 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef0),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 6),
},
{ /* MC1 Channel 4 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xef1),
- .driver_data = IVT_PCI_UNCORE_IMC,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_IMC, 7),
},
{ /* QPI0 Port 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe32),
- .driver_data = IVT_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 0),
},
{ /* QPI0 Port 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe33),
- .driver_data = IVT_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 1),
},
{ /* QPI1 Port 2 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3a),
- .driver_data = IVT_PCI_UNCORE_QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_QPI, 2),
},
{ /* R2PCIe */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe34),
- .driver_data = IVT_PCI_UNCORE_R2PCIE,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R2PCIE, 0),
},
{ /* R3QPI0 Link 0 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe36),
- .driver_data = IVT_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 0),
},
{ /* R3QPI0 Link 1 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe37),
- .driver_data = IVT_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 1),
},
{ /* R3QPI1 Link 2 */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xe3e),
- .driver_data = IVT_PCI_UNCORE_R3QPI,
+ .driver_data = UNCORE_PCI_DEV_DATA(IVT_PCI_UNCORE_R3QPI, 2),
},
{ /* end: all zeroes */ }
};
@@ -1923,7 +2030,7 @@ static u64 nhmex_mbox_alter_er(struct perf_event *event, int new_idx, bool modif
{
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
+ u64 idx, orig_idx = __BITS_VALUE(reg1->idx, 0, 8);
u64 config = reg1->config;
/* get the non-shared control bits and shift them */
@@ -2599,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
box->hrtimer.function = uncore_pmu_hrtimer;
}
-struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu)
+static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
{
struct intel_uncore_box *box;
int i, size;
size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
- box = kmalloc_node(size, GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu));
+ box = kzalloc_node(size, GFP_KERNEL, node);
if (!box)
return NULL;
@@ -2701,7 +2808,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve
return c;
}
- if (event->hw.config == ~0ULL)
+ if (event->attr.config == UNCORE_FIXED_EVENT)
return &constraint_fixed;
if (type->constraints) {
@@ -2723,15 +2830,16 @@ static void uncore_put_event_constraint(struct intel_uncore_box *box, struct per
static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int n)
{
unsigned long used_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];
- struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
+ struct event_constraint *c;
int i, wmin, wmax, ret = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);
for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) {
+ hwc = &box->event_list[i]->hw;
c = uncore_get_event_constraint(box, box->event_list[i]);
- constraints[i] = c;
+ hwc->constraint = c;
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
}
@@ -2739,7 +2847,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
/* fastpath, try to reuse previous register */
for (i = 0; i < n; i++) {
hwc = &box->event_list[i]->hw;
- c = constraints[i];
+ c = hwc->constraint;
/* never assigned */
if (hwc->idx == -1)
@@ -2759,7 +2867,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int
}
/* slow path */
if (i != n)
- ret = perf_assign_events(constraints, n, wmin, wmax, assign);
+ ret = perf_assign_events(box->event_list, n,
+ wmin, wmax, assign);
if (!assign || ret) {
for (i = 0; i < n; i++)
@@ -2922,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
struct intel_uncore_box *fake_box;
int ret = -EINVAL, n;
- fake_box = uncore_alloc_box(pmu->type, smp_processor_id());
+ fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
if (!fake_box)
return -ENOMEM;
@@ -3003,7 +3112,9 @@ static int uncore_pmu_event_init(struct perf_event *event)
*/
if (pmu->type->single_fixed && pmu->pmu_idx > 0)
return -EINVAL;
- hwc->config = ~0ULL;
+
+ /* fixed counters have event field hardcoded to zero */
+ hwc->config = 0ULL;
} else {
hwc->config = event->attr.config & pmu->type->event_mask;
if (pmu->type->ops->hw_config) {
@@ -3165,17 +3276,25 @@ static bool pcidrv_registered;
/*
* add a pci uncore device
*/
-static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
+static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
- int i, phys_id;
+ struct intel_uncore_type *type;
+ int phys_id;
phys_id = pcibus_to_physid[pdev->bus->number];
if (phys_id < 0)
return -ENODEV;
- box = uncore_alloc_box(type, 0);
+ if (UNCORE_PCI_DEV_TYPE(id->driver_data) == UNCORE_EXTRA_PCI_DEV) {
+ extra_pci_dev[phys_id][UNCORE_PCI_DEV_IDX(id->driver_data)] = pdev;
+ pci_set_drvdata(pdev, NULL);
+ return 0;
+ }
+
+ type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
+ box = uncore_alloc_box(type, NUMA_NO_NODE);
if (!box)
return -ENOMEM;
@@ -3183,21 +3302,11 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
* for performance monitoring unit with multiple boxes,
* each box has a different function id.
*/
- for (i = 0; i < type->num_boxes; i++) {
- pmu = &type->pmus[i];
- if (pmu->func_id == pdev->devfn)
- break;
- if (pmu->func_id < 0) {
- pmu->func_id = pdev->devfn;
- break;
- }
- pmu = NULL;
- }
-
- if (!pmu) {
- kfree(box);
- return -EINVAL;
- }
+ pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)];
+ if (pmu->func_id < 0)
+ pmu->func_id = pdev->devfn;
+ else
+ WARN_ON_ONCE(pmu->func_id != pdev->devfn);
box->phys_id = phys_id;
box->pci_dev = pdev;
@@ -3215,9 +3324,22 @@ static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
static void uncore_pci_remove(struct pci_dev *pdev)
{
struct intel_uncore_box *box = pci_get_drvdata(pdev);
- struct intel_uncore_pmu *pmu = box->pmu;
- int cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+ struct intel_uncore_pmu *pmu;
+ int i, cpu, phys_id = pcibus_to_physid[pdev->bus->number];
+ box = pci_get_drvdata(pdev);
+ if (!box) {
+ for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) {
+ if (extra_pci_dev[phys_id][i] == pdev) {
+ extra_pci_dev[phys_id][i] = NULL;
+ break;
+ }
+ }
+ WARN_ON_ONCE(i >= UNCORE_EXTRA_PCI_DEV_MAX);
+ return;
+ }
+
+ pmu = box->pmu;
if (WARN_ON_ONCE(phys_id != box->phys_id))
return;
@@ -3238,12 +3360,6 @@ static void uncore_pci_remove(struct pci_dev *pdev)
kfree(box);
}
-static int uncore_pci_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
-{
- return uncore_pci_add(pci_uncores[id->driver_data], pdev);
-}
-
static int __init uncore_pci_init(void)
{
int ret;
@@ -3295,7 +3411,7 @@ static void __init uncore_pci_exit(void)
/* CPU hot plug/unplug are serialized by cpu_add_remove_lock mutex */
static LIST_HEAD(boxes_to_free);
-static void __cpuinit uncore_kfree_boxes(void)
+static void uncore_kfree_boxes(void)
{
struct intel_uncore_box *box;
@@ -3307,7 +3423,7 @@ static void __cpuinit uncore_kfree_boxes(void)
}
}
-static void __cpuinit uncore_cpu_dying(int cpu)
+static void uncore_cpu_dying(int cpu)
{
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
@@ -3326,7 +3442,7 @@ static void __cpuinit uncore_cpu_dying(int cpu)
}
}
-static int __cpuinit uncore_cpu_starting(int cpu)
+static int uncore_cpu_starting(int cpu)
{
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
@@ -3369,7 +3485,7 @@ static int __cpuinit uncore_cpu_starting(int cpu)
return 0;
}
-static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
+static int uncore_cpu_prepare(int cpu, int phys_id)
{
struct intel_uncore_type *type;
struct intel_uncore_pmu *pmu;
@@ -3383,7 +3499,7 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
if (pmu->func_id < 0)
pmu->func_id = j;
- box = uncore_alloc_box(type, cpu);
+ box = uncore_alloc_box(type, cpu_to_node(cpu));
if (!box)
return -ENOMEM;
@@ -3395,7 +3511,7 @@ static int __cpuinit uncore_cpu_prepare(int cpu, int phys_id)
return 0;
}
-static void __cpuinit
+static void
uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_cpu)
{
struct intel_uncore_type *type;
@@ -3433,7 +3549,7 @@ uncore_change_context(struct intel_uncore_type **uncores, int old_cpu, int new_c
}
}
-static void __cpuinit uncore_event_exit_cpu(int cpu)
+static void uncore_event_exit_cpu(int cpu)
{
int i, phys_id, target;
@@ -3461,7 +3577,7 @@ static void __cpuinit uncore_event_exit_cpu(int cpu)
uncore_change_context(pci_uncores, cpu, target);
}
-static void __cpuinit uncore_event_init_cpu(int cpu)
+static void uncore_event_init_cpu(int cpu)
{
int i, phys_id;
@@ -3477,8 +3593,8 @@ static void __cpuinit uncore_event_init_cpu(int cpu)
uncore_change_context(pci_uncores, -1, cpu);
}
-static int
- __cpuinit uncore_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+static int uncore_cpu_notifier(struct notifier_block *self,
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
@@ -3518,7 +3634,7 @@ static int
return NOTIFY_OK;
}
-static struct notifier_block uncore_cpu_nb __cpuinitdata = {
+static struct notifier_block uncore_cpu_nb = {
.notifier_call = uncore_cpu_notifier,
/*
* to migrate uncore events, our notifier should be executed
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index f952891..a80ab71 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -12,6 +12,15 @@
#define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC
#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1)
+#define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx)
+#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
+#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
+#define UNCORE_EXTRA_PCI_DEV 0xff
+#define UNCORE_EXTRA_PCI_DEV_MAX 2
+
+/* support up to 8 sockets */
+#define UNCORE_SOCKET_MAX 8
+
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
/* SNB event control */
@@ -108,6 +117,7 @@
(SNBEP_PMON_CTL_EV_SEL_MASK | \
SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \
SNBEP_PMON_CTL_EDGE_DET | \
+ SNBEP_PMON_CTL_EV_SEL_EXT | \
SNBEP_PMON_CTL_INVERT | \
SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \
SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
@@ -337,10 +347,10 @@
NHMEX_M_PMON_CTL_SET_FLAG_SEL_MASK)
#define NHMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 11) - 1) | (1 << 23))
-#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (11 + 3 * (n)))
+#define NHMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (11 + 3 * (n)))
#define WSMEX_M_PMON_ZDP_CTL_FVC_MASK (((1 << 12) - 1) | (1 << 24))
-#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7 << (12 + 3 * (n)))
+#define WSMEX_M_PMON_ZDP_CTL_FVC_EVENT_MASK(n) (0x7ULL << (12 + 3 * (n)))
/*
* use the 9~13 bits to select event If the 7th bit is not set,
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 7b3fe56..31f0f33 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -11,10 +11,10 @@ const char *const x86_power_flags[32] = {
"fid", /* frequency id control */
"vid", /* voltage id control */
"ttp", /* thermal trip */
- "tm",
- "stc",
- "100mhzsteps",
- "hwpstate",
+ "tm", /* hardware thermal control */
+ "stc", /* software thermal control */
+ "100mhzsteps", /* 100 MHz multiplier control */
+ "hwpstate", /* hardware P-state control */
"", /* tsc invariant mapped to constant_tsc */
"cpb", /* core performance boost */
"eff_freq_ro", /* Readonly aperf/mperf */
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 37a198b..aee6317 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -37,8 +37,8 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
- c->hard_math ? "yes" : "no",
- c->hard_math ? "yes" : "no",
+ static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+ static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
c->cpuid_level,
c->wp_works_ok ? "yes" : "no");
}
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index feca286..88db010 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -52,7 +52,7 @@ static inline int rdrand_long(unsigned long *v)
*/
#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
-void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c)
+void x86_init_rdrand(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_ARCH_RANDOM
unsigned long tmp;
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d92b5da..f2cc63e 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -24,13 +24,13 @@ enum cpuid_regs {
CR_EBX
};
-void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
+void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{
u32 max_level;
u32 regs[4];
const struct cpuid_bit *cb;
- static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
+ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 4397e98..4c60eaf 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -26,7 +26,7 @@
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
-void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
+void detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index 2800074..aa0430d 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -5,7 +5,7 @@
#include <asm/msr.h>
#include "cpu.h"
-static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c)
+static void early_init_transmeta(struct cpuinfo_x86 *c)
{
u32 xlvl;
@@ -17,7 +17,7 @@ static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c)
}
}
-static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
+static void init_transmeta(struct cpuinfo_x86 *c)
{
unsigned int cap_mask, uk, max, dummy;
unsigned int cms_rev1, cms_rev2;
@@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
#endif
}
-static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = {
+static const struct cpu_dev transmeta_cpu_dev = {
.c_vendor = "Transmeta",
.c_ident = { "GenuineTMx86", "TransmetaCPU" },
.c_early_init = early_init_transmeta,
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index fd2c37b..202759a 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -8,7 +8,7 @@
* so no special init takes place.
*/
-static const struct cpu_dev __cpuinitconst umc_cpu_dev = {
+static const struct cpu_dev umc_cpu_dev = {
.c_vendor = "UMC",
.c_ident = { "UMC UMC UMC" },
.c_models = {
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 03a3632..628a059 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -93,7 +93,7 @@ static void __init vmware_platform_setup(void)
* serial key should be enough, as this will always have a VMware
* specific string when running under VMware hypervisor.
*/
-static bool __init vmware_platform(void)
+static uint32_t __init vmware_platform(void)
{
if (cpu_has_hypervisor) {
unsigned int eax;
@@ -102,12 +102,12 @@ static bool __init vmware_platform(void)
cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
&hyper_vendor_id[1], &hyper_vendor_id[2]);
if (!memcmp(hyper_vendor_id, "VMwareVMware", 12))
- return true;
+ return CPUID_VMWARE_INFO_LEAF;
} else if (dmi_available && dmi_name_in_serial("VMware") &&
__vmware_platform())
- return true;
+ return 1;
- return false;
+ return 0;
}
/*
@@ -122,7 +122,7 @@ static bool __init vmware_platform(void)
* so that the kernel could just trust the hypervisor with providing a
* reliable virtual TSC that is suitable for timekeeping.
*/
-static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
+static void vmware_set_cpu_features(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 1e4dbcf..7d9481c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -137,7 +137,7 @@ static const struct file_operations cpuid_fops = {
.open = cpuid_open,
};
-static __cpuinit int cpuid_device_create(int cpu)
+static int cpuid_device_create(int cpu)
{
struct device *dev;
@@ -151,9 +151,8 @@ static void cpuid_device_destroy(int cpu)
device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
}
-static int __cpuinit cpuid_class_cpu_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+static int cpuid_class_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
int err = 0;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 74467fe..e0e0841 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -128,7 +128,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
cpu_emergency_svm_disable();
lapic_shutdown();
-#if defined(CONFIG_X86_IO_APIC)
+#ifdef CONFIG_X86_IO_APIC
+ /* Prevent crash_kexec() from deadlocking on ioapic_lock. */
+ ioapic_zap_locks();
disable_IO_APIC();
#endif
#ifdef CONFIG_HPET_TIMER
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index b158152..376dc78 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -52,8 +52,7 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
}
#ifdef CONFIG_BLK_DEV_INITRD
-void __init early_init_dt_setup_initrd_arch(unsigned long start,
- unsigned long end)
+void __init early_init_dt_setup_initrd_arch(u64 start, u64 end)
{
initrd_start = (unsigned long)__va(start);
initrd_end = (unsigned long)__va(end);
@@ -133,7 +132,7 @@ static void x86_of_pci_irq_disable(struct pci_dev *dev)
{
}
-void __cpuinit x86_of_pci_init(void)
+void x86_of_pci_init(void)
{
pcibios_enable_irq = x86_of_pci_irq_enable;
pcibios_disable_irq = x86_of_pci_irq_disable;
@@ -364,9 +363,7 @@ static void dt_add_ioapic_domain(unsigned int ioapic_num,
* and assigned so we can keep the 1:1 mapping which the ioapic
* is having.
*/
- ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
- if (ret)
- pr_err("Error mapping legacy IRQs: %d\n", ret);
+ irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
if (num > NR_IRQS_LEGACY) {
ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault.c
index 155a13f..5d3fe8d 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault.c
@@ -9,6 +9,8 @@
#include <asm/processor.h>
#include <asm/desc.h>
+#ifdef CONFIG_X86_32
+
#define DOUBLEFAULT_STACKSIZE (1024)
static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
@@ -67,3 +69,16 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
.__cr3 = __pa_nodebug(swapper_pg_dir),
}
};
+
+/* dummy for do_double_fault() call */
+void df_debug(struct pt_regs *regs, long error_code) {}
+
+#else /* !CONFIG_X86_32 */
+
+void df_debug(struct pt_regs *regs, long error_code)
+{
+ pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
+ show_regs(regs);
+ panic("Machine halted.");
+}
+#endif
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d32abea..174da5f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -658,15 +658,18 @@ __init void e820_setup_gap(void)
* boot_params.e820_map, others are passed via SETUP_E820_EXT node of
* linked list of struct setup_data, which is parsed here.
*/
-void __init parse_e820_ext(struct setup_data *sdata)
+void __init parse_e820_ext(u64 phys_addr, u32 data_len)
{
int entries;
struct e820entry *extmap;
+ struct setup_data *sdata;
+ sdata = early_memremap(phys_addr, data_len);
entries = sdata->len / sizeof(struct e820entry);
extmap = (struct e820entry *)(sdata->data);
__append_e820_map(extmap, entries);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ early_iounmap(sdata, data_len);
printk(KERN_INFO "e820: extended physical RAM map:\n");
e820_print_map("extended");
}
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 94ab6b9..b3cd3eb 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/pci_ids.h>
+#include <drm/i915_drm.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
#include <asm/io_apic.h>
@@ -196,16 +197,175 @@ static void __init ati_bugs_contd(int num, int slot, int func)
static void __init intel_remapping_check(int num, int slot, int func)
{
u8 revision;
+ u16 device;
+ device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);
/*
- * Revision 0x13 of this chipset supports irq remapping
- * but has an erratum that breaks its behavior, flag it as such
+ * Revision 13 of all triggering devices id in this quirk have
+ * a problem draining interrupts when irq remapping is enabled,
+ * and should be flagged as broken. Additionally revisions 0x12
+ * and 0x22 of device id 0x3405 has this problem.
*/
if (revision == 0x13)
set_irq_remapping_broken();
+ else if ((device == 0x3405) &&
+ ((revision == 0x12) ||
+ (revision == 0x22)))
+ set_irq_remapping_broken();
+
+}
+
+/*
+ * Systems with Intel graphics controllers set aside memory exclusively
+ * for gfx driver use. This memory is not marked in the E820 as reserved
+ * or as RAM, and so is subject to overlap from E820 manipulation later
+ * in the boot process. On some systems, MMIO space is allocated on top,
+ * despite the efforts of the "RAM buffer" approach, which simply rounds
+ * memory boundaries up to 64M to try to catch space that may decode
+ * as RAM and so is not suitable for MMIO.
+ *
+ * And yes, so far on current devices the base addr is always under 4G.
+ */
+static u32 __init intel_stolen_base(int num, int slot, int func)
+{
+ u32 base;
+
+ /*
+ * For the PCI IDs in this quirk, the stolen base is always
+ * in 0x5c, aka the BDSM register (yes that's really what
+ * it's called).
+ */
+ base = read_pci_config(num, slot, func, 0x5c);
+ base &= ~((1<<20) - 1);
+
+ return base;
+}
+
+#define KB(x) ((x) * 1024)
+#define MB(x) (KB (KB (x)))
+#define GB(x) (MB (KB (x)))
+
+static size_t __init gen3_stolen_size(int num, int slot, int func)
+{
+ size_t stolen_size;
+ u16 gmch_ctrl;
+
+ gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL);
+
+ switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
+ case I855_GMCH_GMS_STOLEN_1M:
+ stolen_size = MB(1);
+ break;
+ case I855_GMCH_GMS_STOLEN_4M:
+ stolen_size = MB(4);
+ break;
+ case I855_GMCH_GMS_STOLEN_8M:
+ stolen_size = MB(8);
+ break;
+ case I855_GMCH_GMS_STOLEN_16M:
+ stolen_size = MB(16);
+ break;
+ case I855_GMCH_GMS_STOLEN_32M:
+ stolen_size = MB(32);
+ break;
+ case I915_GMCH_GMS_STOLEN_48M:
+ stolen_size = MB(48);
+ break;
+ case I915_GMCH_GMS_STOLEN_64M:
+ stolen_size = MB(64);
+ break;
+ case G33_GMCH_GMS_STOLEN_128M:
+ stolen_size = MB(128);
+ break;
+ case G33_GMCH_GMS_STOLEN_256M:
+ stolen_size = MB(256);
+ break;
+ case INTEL_GMCH_GMS_STOLEN_96M:
+ stolen_size = MB(96);
+ break;
+ case INTEL_GMCH_GMS_STOLEN_160M:
+ stolen_size = MB(160);
+ break;
+ case INTEL_GMCH_GMS_STOLEN_224M:
+ stolen_size = MB(224);
+ break;
+ case INTEL_GMCH_GMS_STOLEN_352M:
+ stolen_size = MB(352);
+ break;
+ default:
+ stolen_size = 0;
+ break;
+ }
+
+ return stolen_size;
+}
+
+static size_t __init gen6_stolen_size(int num, int slot, int func)
+{
+ u16 gmch_ctrl;
+
+ gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
+ gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
+ gmch_ctrl &= SNB_GMCH_GMS_MASK;
+
+ return gmch_ctrl << 25; /* 32 MB units */
+}
+
+typedef size_t (*stolen_size_fn)(int num, int slot, int func);
+
+static struct pci_device_id intel_stolen_ids[] __initdata = {
+ INTEL_I915G_IDS(gen3_stolen_size),
+ INTEL_I915GM_IDS(gen3_stolen_size),
+ INTEL_I945G_IDS(gen3_stolen_size),
+ INTEL_I945GM_IDS(gen3_stolen_size),
+ INTEL_VLV_M_IDS(gen3_stolen_size),
+ INTEL_VLV_D_IDS(gen3_stolen_size),
+ INTEL_PINEVIEW_IDS(gen3_stolen_size),
+ INTEL_I965G_IDS(gen3_stolen_size),
+ INTEL_G33_IDS(gen3_stolen_size),
+ INTEL_I965GM_IDS(gen3_stolen_size),
+ INTEL_GM45_IDS(gen3_stolen_size),
+ INTEL_G45_IDS(gen3_stolen_size),
+ INTEL_IRONLAKE_D_IDS(gen3_stolen_size),
+ INTEL_IRONLAKE_M_IDS(gen3_stolen_size),
+ INTEL_SNB_D_IDS(gen6_stolen_size),
+ INTEL_SNB_M_IDS(gen6_stolen_size),
+ INTEL_IVB_M_IDS(gen6_stolen_size),
+ INTEL_IVB_D_IDS(gen6_stolen_size),
+ INTEL_HSW_D_IDS(gen6_stolen_size),
+ INTEL_HSW_M_IDS(gen6_stolen_size),
+};
+static void __init intel_graphics_stolen(int num, int slot, int func)
+{
+ size_t size;
+ int i;
+ u32 start;
+ u16 device, subvendor, subdevice;
+
+ device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
+ subvendor = read_pci_config_16(num, slot, func,
+ PCI_SUBSYSTEM_VENDOR_ID);
+ subdevice = read_pci_config_16(num, slot, func, PCI_SUBSYSTEM_ID);
+
+ for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) {
+ if (intel_stolen_ids[i].device == device) {
+ stolen_size_fn stolen_size =
+ (stolen_size_fn)intel_stolen_ids[i].driver_data;
+ size = stolen_size(num, slot, func);
+ start = intel_stolen_base(num, slot, func);
+ if (size && start) {
+ /* Mark this space as reserved */
+ e820_add_region(start, size, E820_RESERVED);
+ sanitize_e820_map(e820.map,
+ ARRAY_SIZE(e820.map),
+ &e820.nr_map);
+ }
+ return;
+ }
+ }
}
#define QFLAG_APPLY_ONCE 0x1
@@ -239,8 +399,12 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
{ PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
+ { PCI_VENDOR_ID_INTEL, 0x3405, PCI_CLASS_BRIDGE_HOST,
+ PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
+ { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
+ QFLAG_APPLY_ONCE, intel_graphics_stolen },
{}
};
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 8f3e2de..f0dcb0c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -801,7 +801,17 @@ ENTRY(name) \
CFI_ENDPROC; \
ENDPROC(name)
-#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
+
+#ifdef CONFIG_TRACING
+#define TRACE_BUILD_INTERRUPT(name, nr) \
+ BUILD_INTERRUPT3(trace_##name, nr, smp_trace_##name)
+#else
+#define TRACE_BUILD_INTERRUPT(name, nr)
+#endif
+
+#define BUILD_INTERRUPT(name, nr) \
+ BUILD_INTERRUPT3(name, nr, smp_##name); \
+ TRACE_BUILD_INTERRUPT(name, nr)
/* The include is where all of the SMP etc. interrupts come from */
#include <asm/entry_arch.h>
@@ -1166,6 +1176,9 @@ ftrace_restore_flags:
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
+ cmpl $__PAGE_OFFSET, %esp
+ jb ftrace_stub /* Paging not enabled yet? */
+
cmpl $0, function_trace_stop
jne ftrace_stub
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7272089..b077f4c 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -365,7 +365,7 @@ ENDPROC(native_usergs_sysret64)
/*CFI_REL_OFFSET ss,0*/
pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0
- pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */
+ pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_FIXED) /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/
pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/
@@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64)
TRACE_IRQS_OFF
.endm
-ENTRY(save_rest)
- PARTIAL_FRAME 1 (REST_SKIP+8)
- movq 5*8+16(%rsp), %r11 /* save return address */
- movq_cfi rbx, RBX+16
- movq_cfi rbp, RBP+16
- movq_cfi r12, R12+16
- movq_cfi r13, R13+16
- movq_cfi r14, R14+16
- movq_cfi r15, R15+16
- movq %r11, 8(%rsp) /* return address */
- FIXUP_TOP_OF_STACK %r11, 16
- ret
- CFI_ENDPROC
-END(save_rest)
-
/* save complete stack frame */
.pushsection .kprobes.text, "ax"
ENTRY(save_paranoid)
@@ -1138,7 +1123,7 @@ END(common_interrupt)
/*
* APIC interrupts.
*/
-.macro apicinterrupt num sym do_sym
+.macro apicinterrupt3 num sym do_sym
ENTRY(\sym)
INTR_FRAME
ASM_CLAC
@@ -1150,15 +1135,32 @@ ENTRY(\sym)
END(\sym)
.endm
+#ifdef CONFIG_TRACING
+#define trace(sym) trace_##sym
+#define smp_trace(sym) smp_trace_##sym
+
+.macro trace_apicinterrupt num sym
+apicinterrupt3 \num trace(\sym) smp_trace(\sym)
+.endm
+#else
+.macro trace_apicinterrupt num sym do_sym
+.endm
+#endif
+
+.macro apicinterrupt num sym do_sym
+apicinterrupt3 \num \sym \do_sym
+trace_apicinterrupt \num \sym
+.endm
+
#ifdef CONFIG_SMP
-apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+apicinterrupt3 IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
-apicinterrupt REBOOT_VECTOR \
+apicinterrupt3 REBOOT_VECTOR \
reboot_interrupt smp_reboot_interrupt
#endif
#ifdef CONFIG_X86_UV
-apicinterrupt UV_BAU_MESSAGE \
+apicinterrupt3 UV_BAU_MESSAGE \
uv_bau_message_intr1 uv_bau_message_interrupt
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
@@ -1167,14 +1169,19 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_HAVE_KVM
-apicinterrupt POSTED_INTR_VECTOR \
+apicinterrupt3 POSTED_INTR_VECTOR \
kvm_posted_intr_ipi smp_kvm_posted_intr_ipi
#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
apicinterrupt THRESHOLD_APIC_VECTOR \
threshold_interrupt smp_threshold_interrupt
+#endif
+
+#ifdef CONFIG_X86_THERMAL_VECTOR
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
+#endif
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
@@ -1451,13 +1458,13 @@ ENTRY(xen_failsafe_callback)
CFI_ENDPROC
END(xen_failsafe_callback)
-apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
xen_hvm_callback_vector xen_evtchn_do_upcall
#endif /* CONFIG_XEN */
#if IS_ENABLED(CONFIG_HYPERV)
-apicinterrupt HYPERVISOR_CALLBACK_VECTOR \
+apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
hyperv_callback_vector hyperv_vector_handler
#endif /* CONFIG_HYPERV */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 138463a..06f87be 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -29,7 +29,7 @@ static void __init i386_default_early_setup(void)
reserve_ebda_region();
}
-void __init i386_start_kernel(void)
+asmlinkage void __init i386_start_kernel(void)
{
sanitize_boot_params(&boot_params);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 55b6761..1be8e43 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -137,7 +137,7 @@ static void __init copy_bootdata(char *real_mode_data)
}
}
-void __init x86_64_start_kernel(char * real_mode_data)
+asmlinkage void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 73afd11..81ba276 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -292,7 +292,6 @@ ENDPROC(start_cpu0)
* If cpu hotplug is not supported then this code can go in init section
* which will be freed later
*/
-__CPUINIT
ENTRY(startup_32_smp)
cld
movl $(__BOOT_DS),%eax
@@ -410,6 +409,7 @@ enable_paging:
/*
* Check if it is 486
*/
+ movb $4,X86 # at least 486
cmpl $-1,X86_CPUID
je is486
@@ -437,14 +437,12 @@ enable_paging:
movl %edx,X86_CAPABILITY
is486:
- movb $4,X86
movl $0x50022,%ecx # set AM, WP, NE and MP
movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET
orl %ecx,%eax
movl %eax,%cr0
- call check_x87
lgdt early_gdt_descr
lidt idt_descr
ljmp $(__KERNEL_CS),$1f
@@ -467,26 +465,6 @@ is486:
pushl $0 # fake return address for unwinder
jmp *(initial_code)
-/*
- * We depend on ET to be correct. This checks for 287/387.
- */
-check_x87:
- movb $0,X86_HARD_MATH
- clts
- fninit
- fstsw %ax
- cmpb $0,%al
- je 1f
- movl %cr0,%eax /* no coprocessor: have to set bits */
- xorl $4,%eax /* set EM */
- movl %eax,%cr0
- ret
- ALIGN
-1: movb $1,X86_HARD_MATH
- .byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
- ret
-
-
#include "verify_cpu.S"
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 321d65e..e1aabdb 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -512,15 +512,6 @@ ENTRY(phys_base)
#include "../../x86/xen/xen-head.S"
- .section .bss, "aw", @nobits
- .align L1_CACHE_BYTES
-ENTRY(idt_table)
- .skip IDT_ENTRIES * 16
-
- .align L1_CACHE_BYTES
-ENTRY(nmi_idt_table)
- .skip IDT_ENTRIES * 16
-
__PAGE_ALIGNED_BSS
NEXT_PAGE(empty_zero_page)
.skip PAGE_SIZE
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 02f0763..f66ff16 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -393,6 +393,9 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
unregister_hw_breakpoint(t->ptrace_bps[i]);
t->ptrace_bps[i] = NULL;
}
+
+ t->debugreg6 = 0;
+ t->ptrace_dr7 = 0;
}
void hw_breakpoint_restore(void)
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index cb33909..5d576ab 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -108,15 +108,15 @@ EXPORT_SYMBOL(unlazy_fpu);
unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size);
-static struct i387_fxsave_struct fx_scratch __cpuinitdata;
+static struct i387_fxsave_struct fx_scratch;
-static void __cpuinit mxcsr_feature_mask_init(void)
+static void mxcsr_feature_mask_init(void)
{
unsigned long mask = 0;
if (cpu_has_fxsr) {
memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
- asm volatile("fxsave %0" : : "m" (fx_scratch));
+ asm volatile("fxsave %0" : "+m" (fx_scratch));
mask = fx_scratch.mxcsr_mask;
if (mask == 0)
mask = 0x0000ffbf;
@@ -124,14 +124,14 @@ static void __cpuinit mxcsr_feature_mask_init(void)
mxcsr_feature_mask &= mask;
}
-static void __cpuinit init_thread_xstate(void)
+static void init_thread_xstate(void)
{
/*
* Note that xstate_size might be overwriten later during
* xsave_init().
*/
- if (!HAVE_HWFP) {
+ if (!cpu_has_fpu) {
/*
* Disable xsave as we do not support it if i387
* emulation is enabled.
@@ -153,11 +153,19 @@ static void __cpuinit init_thread_xstate(void)
* into all processes.
*/
-void __cpuinit fpu_init(void)
+void fpu_init(void)
{
unsigned long cr0;
unsigned long cr4_mask = 0;
+#ifndef CONFIG_MATH_EMULATION
+ if (!cpu_has_fpu) {
+ pr_emerg("No FPU found and no math emulation present\n");
+ pr_emerg("Giving up\n");
+ for (;;)
+ asm volatile("hlt");
+ }
+#endif
if (cpu_has_fxsr)
cr4_mask |= X86_CR4_OSFXSR;
if (cpu_has_xmm)
@@ -167,7 +175,7 @@ void __cpuinit fpu_init(void)
cr0 = read_cr0();
cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
- if (!HAVE_HWFP)
+ if (!cpu_has_fpu)
cr0 |= X86_CR0_EM;
write_cr0(cr0);
@@ -185,7 +193,7 @@ void __cpuinit fpu_init(void)
void fpu_finit(struct fpu *fpu)
{
- if (!HAVE_HWFP) {
+ if (!cpu_has_fpu) {
finit_soft_fpu(&fpu->state->soft);
return;
}
@@ -214,7 +222,7 @@ int init_fpu(struct task_struct *tsk)
int ret;
if (tsk_used_math(tsk)) {
- if (HAVE_HWFP && tsk == current)
+ if (cpu_has_fpu && tsk == current)
unlazy_fpu(tsk);
tsk->thread.fpu.last_cpu = ~0;
return 0;
@@ -511,14 +519,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- if (!HAVE_HWFP)
+ if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr) {
+ if (!cpu_has_fxsr)
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.state->fsave, 0,
-1);
- }
sanitize_i387_state(target);
@@ -545,13 +552,13 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
sanitize_i387_state(target);
- if (!HAVE_HWFP)
+ if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr) {
+ if (!cpu_has_fxsr)
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.state->fsave, 0, -1);
- }
+ &target->thread.fpu.state->fsave, 0,
+ -1);
if (pos > 0 || count < sizeof(env))
convert_from_fxsr(&env, target);
@@ -592,3 +599,33 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
EXPORT_SYMBOL(dump_fpu);
#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
+
+static int __init no_387(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_FPU);
+ return 1;
+}
+
+__setup("no387", no_387);
+
+void fpu_detect(struct cpuinfo_x86 *c)
+{
+ unsigned long cr0;
+ u16 fsw, fcw;
+
+ fsw = fcw = 0xffff;
+
+ cr0 = read_cr0();
+ cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
+ write_cr0(cr0);
+
+ asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
+ : "+m" (fsw), "+m" (fcw));
+
+ if (fsw == 0 && (fcw & 0x103f) == 0x003f)
+ set_cpu_cap(c, X86_FEATURE_FPU);
+ else
+ clear_cpu_cap(c, X86_FEATURE_FPU);
+
+ /* The final cr0 value is set in fpu_init() */
+}
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index ac0631d..22d0687 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -18,6 +18,9 @@
#include <asm/mce.h>
#include <asm/hw_irq.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/irq_vectors.h>
+
atomic_t irq_err_count;
/* Function pointer for generic interrupt vector handling */
@@ -174,7 +177,7 @@ u64 arch_irq_stat(void)
* SMP cross-CPU interrupts have their own specific
* handlers).
*/
-unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
+__visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -204,23 +207,21 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
/*
* Handler for X86_PLATFORM_IPI_VECTOR.
*/
-void smp_x86_platform_ipi(struct pt_regs *regs)
+void __smp_x86_platform_ipi(void)
{
- struct pt_regs *old_regs = set_irq_regs(regs);
-
- ack_APIC_irq();
-
- irq_enter();
-
- exit_idle();
-
inc_irq_stat(x86_platform_ipis);
if (x86_platform_ipi_callback)
x86_platform_ipi_callback();
+}
- irq_exit();
+__visible void smp_x86_platform_ipi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ entering_ack_irq();
+ __smp_x86_platform_ipi();
+ exiting_irq();
set_irq_regs(old_regs);
}
@@ -228,7 +229,7 @@ void smp_x86_platform_ipi(struct pt_regs *regs)
/*
* Handler for POSTED_INTERRUPT_VECTOR.
*/
-void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
+__visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -246,6 +247,18 @@ void smp_kvm_posted_intr_ipi(struct pt_regs *regs)
}
#endif
+__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ entering_ack_irq();
+ trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
+ __smp_x86_platform_ipi();
+ trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
+ exiting_irq();
+ set_irq_regs(old_regs);
+}
+
EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 344faf8..4186755 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -119,7 +119,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
/*
* allocate per-cpu stacks for hardirq and for softirq processing
*/
-void __cpuinit irq_ctx_init(int cpu)
+void irq_ctx_init(int cpu)
{
union irq_ctx *irqctx;
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index ca8f703..1de84e3a 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -8,14 +8,34 @@
#include <linux/irq_work.h>
#include <linux/hardirq.h>
#include <asm/apic.h>
+#include <asm/trace/irq_vectors.h>
-void smp_irq_work_interrupt(struct pt_regs *regs)
+static inline void irq_work_entering_irq(void)
{
irq_enter();
ack_APIC_irq();
+}
+
+static inline void __smp_irq_work_interrupt(void)
+{
inc_irq_stat(apic_irq_work_irqs);
irq_work_run();
- irq_exit();
+}
+
+__visible void smp_irq_work_interrupt(struct pt_regs *regs)
+{
+ irq_work_entering_irq();
+ __smp_irq_work_interrupt();
+ exiting_irq();
+}
+
+__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs)
+{
+ irq_work_entering_irq();
+ trace_irq_work_entry(IRQ_WORK_VECTOR);
+ __smp_irq_work_interrupt();
+ trace_irq_work_exit(IRQ_WORK_VECTOR);
+ exiting_irq();
}
void arch_irq_work_raise(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 2889b3d..ee11b7d 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,20 +24,71 @@ union jump_code_union {
} __attribute__((packed));
};
+static void bug_at(unsigned char *ip, int line)
+{
+ /*
+ * The location is not an op that we were expecting.
+ * Something went wrong. Crash the box, as something could be
+ * corrupting the kernel.
+ */
+ pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n",
+ ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line);
+ BUG();
+}
+
static void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
- void *(*poker)(void *, const void *, size_t))
+ void *(*poker)(void *, const void *, size_t),
+ int init)
{
union jump_code_union code;
+ const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
if (type == JUMP_LABEL_ENABLE) {
+ /*
+ * We are enabling this jump label. If it is not a nop
+ * then something must have gone wrong.
+ */
+ if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+
code.jump = 0xe9;
code.offset = entry->target -
(entry->code + JUMP_LABEL_NOP_SIZE);
- } else
+ } else {
+ /*
+ * We are disabling this jump label. If it is not what
+ * we think it is, then something must have gone wrong.
+ * If this is the first initialization call, then we
+ * are converting the default nop to the ideal nop.
+ */
+ if (init) {
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+ if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ } else {
+ code.jump = 0xe9;
+ code.offset = entry->target -
+ (entry->code + JUMP_LABEL_NOP_SIZE);
+ if (unlikely(memcmp((void *)entry->code, &code, 5) != 0))
+ bug_at((void *)entry->code, __LINE__);
+ }
memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+ }
- (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+ /*
+ * Make text_poke_bp() a default fallback poker.
+ *
+ * At the time the change is being done, just ignore whether we
+ * are doing nop -> jump or jump -> nop transition, and assume
+ * always nop being the 'currently valid' instruction
+ *
+ */
+ if (poker)
+ (*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+ else
+ text_poke_bp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE,
+ (void *)entry->code + JUMP_LABEL_NOP_SIZE);
}
void arch_jump_label_transform(struct jump_entry *entry,
@@ -45,15 +96,38 @@ void arch_jump_label_transform(struct jump_entry *entry,
{
get_online_cpus();
mutex_lock(&text_mutex);
- __jump_label_transform(entry, type, text_poke_smp);
+ __jump_label_transform(entry, type, NULL, 0);
mutex_unlock(&text_mutex);
put_online_cpus();
}
+static enum {
+ JL_STATE_START,
+ JL_STATE_NO_UPDATE,
+ JL_STATE_UPDATE,
+} jlstate __initdata_or_module = JL_STATE_START;
+
__init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
enum jump_label_type type)
{
- __jump_label_transform(entry, type, text_poke_early);
+ /*
+ * This function is called at boot up and when modules are
+ * first loaded. Check if the default nop, the one that is
+ * inserted at compile time, is the ideal nop. If it is, then
+ * we do not need to update the nop, and we can leave it as is.
+ * If it is not, then we need to update the nop to the ideal nop.
+ */
+ if (jlstate == JL_STATE_START) {
+ const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
+ const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
+
+ if (memcmp(ideal_nop, default_nop, 5) != 0)
+ jlstate = JL_STATE_UPDATE;
+ else
+ jlstate = JL_STATE_NO_UPDATE;
+ }
+ if (jlstate == JL_STATE_UPDATE)
+ __jump_label_transform(entry, type, text_poke_early, 1);
}
#endif
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2e9d4b5..c6ee63f 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -82,14 +82,9 @@ extern void synthesize_reljump(void *from, void *to);
extern void synthesize_relcall(void *from, void *to);
#ifdef CONFIG_OPTPROBES
-extern int arch_init_optprobes(void);
extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr);
#else /* !CONFIG_OPTPROBES */
-static inline int arch_init_optprobes(void)
-{
- return 0;
-}
static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
{
return 0;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 9895a9a..79a3f96 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -365,10 +365,14 @@ int __kprobes __copy_instruction(u8 *dest, u8 *src)
return insn.length;
}
-static void __kprobes arch_copy_kprobe(struct kprobe *p)
+static int __kprobes arch_copy_kprobe(struct kprobe *p)
{
+ int ret;
+
/* Copy an instruction with recovering if other optprobe modifies it.*/
- __copy_instruction(p->ainsn.insn, p->addr);
+ ret = __copy_instruction(p->ainsn.insn, p->addr);
+ if (!ret)
+ return -EINVAL;
/*
* __copy_instruction can modify the displacement of the instruction,
@@ -384,6 +388,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
/* Also, displacement change doesn't affect the first byte */
p->opcode = p->ainsn.insn[0];
+
+ return 0;
}
int __kprobes arch_prepare_kprobe(struct kprobe *p)
@@ -397,8 +403,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
return -ENOMEM;
- arch_copy_kprobe(p);
- return 0;
+
+ return arch_copy_kprobe(p);
}
void __kprobes arch_arm_kprobe(struct kprobe *p)
@@ -655,7 +661,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void)
/*
* Called from kretprobe_trampoline
*/
-static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
+__visible __used __kprobes void *trampoline_handler(struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head, empty_rp;
@@ -1062,7 +1068,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
int __init arch_init_kprobes(void)
{
- return arch_init_optprobes();
+ return 0;
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 76dc6f0..898160b 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -88,9 +88,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long v
*(unsigned long *)addr = val;
}
-static void __used __kprobes kprobes_optinsn_template_holder(void)
-{
- asm volatile (
+asm (
".global optprobe_template_entry\n"
"optprobe_template_entry:\n"
#ifdef CONFIG_X86_64
@@ -129,7 +127,6 @@ static void __used __kprobes kprobes_optinsn_template_holder(void)
#endif
".global optprobe_template_end\n"
"optprobe_template_end:\n");
-}
#define TMPL_MOVE_IDX \
((long)&optprobe_template_val - (long)&optprobe_template_entry)
@@ -371,31 +368,6 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
return 0;
}
-#define MAX_OPTIMIZE_PROBES 256
-static struct text_poke_param *jump_poke_params;
-static struct jump_poke_buffer {
- u8 buf[RELATIVEJUMP_SIZE];
-} *jump_poke_bufs;
-
-static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
-{
- s32 rel = (s32)((long)op->optinsn.insn -
- ((long)op->kp.addr + RELATIVEJUMP_SIZE));
-
- /* Backup instructions which will be replaced by jump address */
- memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
- RELATIVE_ADDR_SIZE);
-
- insn_buf[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&insn_buf[1]) = rel;
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
-}
-
/*
* Replace breakpoints (int3) with relative jumps.
* Caller must call with locking kprobe_mutex and text_mutex.
@@ -403,37 +375,38 @@ static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
void __kprobes arch_optimize_kprobes(struct list_head *oplist)
{
struct optimized_kprobe *op, *tmp;
- int c = 0;
+ u8 insn_buf[RELATIVEJUMP_SIZE];
list_for_each_entry_safe(op, tmp, oplist, list) {
+ s32 rel = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
WARN_ON(kprobe_disabled(&op->kp));
- /* Setup param */
- setup_optimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
+
+ /* Backup instructions which will be replaced by jump address */
+ memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
+ RELATIVE_ADDR_SIZE);
+
+ insn_buf[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buf[1]) = rel;
+
+ text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ op->optinsn.insn);
+
list_del_init(&op->list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
}
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
}
-static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
- u8 *insn_buf,
- struct optimized_kprobe *op)
+/* Replace a relative jump with a breakpoint (int3). */
+void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
+ u8 insn_buf[RELATIVEJUMP_SIZE];
+
/* Set int3 to first byte for kprobes */
insn_buf[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-
- tprm->addr = op->kp.addr;
- tprm->opcode = insn_buf;
- tprm->len = RELATIVEJUMP_SIZE;
+ text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+ op->optinsn.insn);
}
/*
@@ -444,34 +417,11 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
struct list_head *done_list)
{
struct optimized_kprobe *op, *tmp;
- int c = 0;
list_for_each_entry_safe(op, tmp, oplist, list) {
- /* Setup param */
- setup_unoptimize_kprobe(&jump_poke_params[c],
- jump_poke_bufs[c].buf, op);
+ arch_unoptimize_kprobe(op);
list_move(&op->list, done_list);
- if (++c >= MAX_OPTIMIZE_PROBES)
- break;
}
-
- /*
- * text_poke_smp doesn't support NMI/MCE code modifying.
- * However, since kprobes itself also doesn't support NMI/MCE
- * code probing, it's not a problem.
- */
- text_poke_smp_batch(jump_poke_params, c);
-}
-
-/* Replace a relative jump with a breakpoint (int3). */
-void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op)
-{
- u8 buf[RELATIVEJUMP_SIZE];
-
- /* Set int3 to first byte for kprobes */
- buf[0] = BREAKPOINT_INSTRUCTION;
- memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
- text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE);
}
int __kprobes
@@ -491,22 +441,3 @@ setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter)
}
return 0;
}
-
-int __kprobes arch_init_optprobes(void)
-{
- /* Allocate code buffer and parameter array */
- jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_bufs)
- return -ENOMEM;
-
- jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
- MAX_OPTIMIZE_PROBES, GFP_KERNEL);
- if (!jump_poke_params) {
- kfree(jump_poke_bufs);
- jump_poke_bufs = NULL;
- return -ENOMEM;
- }
-
- return 0;
-}
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index cd6d9a5..697b93a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,6 +34,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/kprobes.h>
+#include <linux/debugfs.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -320,7 +321,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
apic_write(APIC_EOI, APIC_EOI_ACK);
}
-void __cpuinit kvm_guest_cpu_init(void)
+void kvm_guest_cpu_init(void)
{
if (!kvm_para_available())
return;
@@ -419,9 +420,10 @@ static void __init kvm_smp_prepare_boot_cpu(void)
WARN_ON(kvm_register_clock("primary cpu clock"));
kvm_guest_cpu_init();
native_smp_prepare_boot_cpu();
+ kvm_spinlock_init();
}
-static void __cpuinit kvm_guest_cpu_online(void *dummy)
+static void kvm_guest_cpu_online(void *dummy)
{
kvm_guest_cpu_init();
}
@@ -435,8 +437,8 @@ static void kvm_guest_cpu_offline(void *dummy)
apf_task_wake_all();
}
-static int __cpuinit kvm_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int kvm_cpu_notify(struct notifier_block *self, unsigned long action,
+ void *hcpu)
{
int cpu = (unsigned long)hcpu;
switch (action) {
@@ -455,7 +457,7 @@ static int __cpuinit kvm_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata kvm_cpu_notifier = {
+static struct notifier_block kvm_cpu_notifier = {
.notifier_call = kvm_cpu_notify,
};
#endif
@@ -498,11 +500,9 @@ void __init kvm_guest_init(void)
#endif
}
-static bool __init kvm_detect(void)
+static uint32_t __init kvm_detect(void)
{
- if (!kvm_para_available())
- return false;
- return true;
+ return kvm_cpuid_base();
}
const struct hypervisor_x86 x86_hyper_kvm __refconst = {
@@ -523,3 +523,263 @@ static __init int activate_jump_labels(void)
return 0;
}
arch_initcall(activate_jump_labels);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+
+/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
+static void kvm_kick_cpu(int cpu)
+{
+ int apicid;
+ unsigned long flags = 0;
+
+ apicid = per_cpu(x86_cpu_to_apicid, cpu);
+ kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
+}
+
+enum kvm_contention_stat {
+ TAKEN_SLOW,
+ TAKEN_SLOW_PICKUP,
+ RELEASED_SLOW,
+ RELEASED_SLOW_KICKED,
+ NR_CONTENTION_STATS
+};
+
+#ifdef CONFIG_KVM_DEBUG_FS
+#define HISTO_BUCKETS 30
+
+static struct kvm_spinlock_stats
+{
+ u32 contention_stats[NR_CONTENTION_STATS];
+ u32 histo_spin_blocked[HISTO_BUCKETS+1];
+ u64 time_blocked;
+} spinlock_stats;
+
+static u8 zero_stats;
+
+static inline void check_zero(void)
+{
+ u8 ret;
+ u8 old;
+
+ old = ACCESS_ONCE(zero_stats);
+ if (unlikely(old)) {
+ ret = cmpxchg(&zero_stats, old, 0);
+ /* This ensures only one fellow resets the stat */
+ if (ret == old)
+ memset(&spinlock_stats, 0, sizeof(spinlock_stats));
+ }
+}
+
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+ check_zero();
+ spinlock_stats.contention_stats[var] += val;
+}
+
+
+static inline u64 spin_time_start(void)
+{
+ return sched_clock();
+}
+
+static void __spin_time_accum(u64 delta, u32 *array)
+{
+ unsigned index;
+
+ index = ilog2(delta);
+ check_zero();
+
+ if (index < HISTO_BUCKETS)
+ array[index]++;
+ else
+ array[HISTO_BUCKETS]++;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+ u32 delta;
+
+ delta = sched_clock() - start;
+ __spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
+ spinlock_stats.time_blocked += delta;
+}
+
+static struct dentry *d_spin_debug;
+static struct dentry *d_kvm_debug;
+
+struct dentry *kvm_init_debugfs(void)
+{
+ d_kvm_debug = debugfs_create_dir("kvm", NULL);
+ if (!d_kvm_debug)
+ printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
+
+ return d_kvm_debug;
+}
+
+static int __init kvm_spinlock_debugfs(void)
+{
+ struct dentry *d_kvm;
+
+ d_kvm = kvm_init_debugfs();
+ if (d_kvm == NULL)
+ return -ENOMEM;
+
+ d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
+
+ debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
+
+ debugfs_create_u32("taken_slow", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[TAKEN_SLOW]);
+ debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
+
+ debugfs_create_u32("released_slow", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[RELEASED_SLOW]);
+ debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
+ &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
+
+ debugfs_create_u64("time_blocked", 0444, d_spin_debug,
+ &spinlock_stats.time_blocked);
+
+ debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
+ spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
+
+ return 0;
+}
+fs_initcall(kvm_spinlock_debugfs);
+#else /* !CONFIG_KVM_DEBUG_FS */
+static inline void add_stats(enum kvm_contention_stat var, u32 val)
+{
+}
+
+static inline u64 spin_time_start(void)
+{
+ return 0;
+}
+
+static inline void spin_time_accum_blocked(u64 start)
+{
+}
+#endif /* CONFIG_KVM_DEBUG_FS */
+
+struct kvm_lock_waiting {
+ struct arch_spinlock *lock;
+ __ticket_t want;
+};
+
+/* cpus 'waiting' on a spinlock to become available */
+static cpumask_t waiting_cpus;
+
+/* Track spinlock on which a cpu is waiting */
+static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
+
+static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
+{
+ struct kvm_lock_waiting *w;
+ int cpu;
+ u64 start;
+ unsigned long flags;
+
+ if (in_nmi())
+ return;
+
+ w = &__get_cpu_var(klock_waiting);
+ cpu = smp_processor_id();
+ start = spin_time_start();
+
+ /*
+ * Make sure an interrupt handler can't upset things in a
+ * partially setup state.
+ */
+ local_irq_save(flags);
+
+ /*
+ * The ordering protocol on this is that the "lock" pointer
+ * may only be set non-NULL if the "want" ticket is correct.
+ * If we're updating "want", we must first clear "lock".
+ */
+ w->lock = NULL;
+ smp_wmb();
+ w->want = want;
+ smp_wmb();
+ w->lock = lock;
+
+ add_stats(TAKEN_SLOW, 1);
+
+ /*
+ * This uses set_bit, which is atomic but we should not rely on its
+ * reordering gurantees. So barrier is needed after this call.
+ */
+ cpumask_set_cpu(cpu, &waiting_cpus);
+
+ barrier();
+
+ /*
+ * Mark entry to slowpath before doing the pickup test to make
+ * sure we don't deadlock with an unlocker.
+ */
+ __ticket_enter_slowpath(lock);
+
+ /*
+ * check again make sure it didn't become free while
+ * we weren't looking.
+ */
+ if (ACCESS_ONCE(lock->tickets.head) == want) {
+ add_stats(TAKEN_SLOW_PICKUP, 1);
+ goto out;
+ }
+
+ /*
+ * halt until it's our turn and kicked. Note that we do safe halt
+ * for irq enabled case to avoid hang when lock info is overwritten
+ * in irq spinlock slowpath and no spurious interrupt occur to save us.
+ */
+ if (arch_irqs_disabled_flags(flags))
+ halt();
+ else
+ safe_halt();
+
+out:
+ cpumask_clear_cpu(cpu, &waiting_cpus);
+ w->lock = NULL;
+ local_irq_restore(flags);
+ spin_time_accum_blocked(start);
+}
+PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
+
+/* Kick vcpu waiting on @lock->head to reach value @ticket */
+static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
+{
+ int cpu;
+
+ add_stats(RELEASED_SLOW, 1);
+ for_each_cpu(cpu, &waiting_cpus) {
+ const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
+ if (ACCESS_ONCE(w->lock) == lock &&
+ ACCESS_ONCE(w->want) == ticket) {
+ add_stats(RELEASED_SLOW_KICKED, 1);
+ kvm_kick_cpu(cpu);
+ break;
+ }
+ }
+}
+
+/*
+ * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
+ */
+void __init kvm_spinlock_init(void)
+{
+ if (!kvm_para_available())
+ return;
+ /* Does host kernel support KVM_FEATURE_PV_UNHALT? */
+ if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
+ return;
+
+ printk(KERN_INFO "KVM setup paravirtual spinlock\n");
+
+ static_key_slow_inc(&paravirt_ticketlocks_enabled);
+
+ pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
+ pv_lock_ops.unlock_kick = kvm_unlock_kick;
+}
+#endif /* CONFIG_PARAVIRT_SPINLOCKS */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d2c3812..1570e07 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -48,10 +48,9 @@ static struct pvclock_wall_clock wall_clock;
* have elapsed since the hypervisor wrote the data. So we try to account for
* that with system time
*/
-static unsigned long kvm_get_wallclock(void)
+static void kvm_get_wallclock(struct timespec *now)
{
struct pvclock_vcpu_time_info *vcpu_time;
- struct timespec ts;
int low, high;
int cpu;
@@ -64,14 +63,12 @@ static unsigned long kvm_get_wallclock(void)
cpu = smp_processor_id();
vcpu_time = &hv_clock[cpu].pvti;
- pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+ pvclock_read_wallclock(&wall_clock, vcpu_time, now);
preempt_enable();
-
- return ts.tv_sec;
}
-static int kvm_set_wallclock(unsigned long now)
+static int kvm_set_wallclock(const struct timespec *now)
{
return -1;
}
@@ -185,7 +182,7 @@ static void kvm_restore_sched_clock_state(void)
}
#ifdef CONFIG_X86_LOCAL_APIC
-static void __cpuinit kvm_setup_secondary_clock(void)
+static void kvm_setup_secondary_clock(void)
{
/*
* Now that the first cpu already had this clocksource initialized,
@@ -242,6 +239,7 @@ void __init kvmclock_init(void)
if (!mem)
return;
hv_clock = __va(mem);
+ memset(hv_clock, 0, size);
if (kvm_register_clock("boot clock")) {
hv_clock = NULL;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index efdec7c..af99f71 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -31,48 +31,12 @@
#include <asm/microcode.h>
#include <asm/processor.h>
#include <asm/msr.h>
+#include <asm/microcode_amd.h>
MODULE_DESCRIPTION("AMD Microcode Update Driver");
MODULE_AUTHOR("Peter Oruba");
MODULE_LICENSE("GPL v2");
-#define UCODE_MAGIC 0x00414d44
-#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
-#define UCODE_UCODE_TYPE 0x00000001
-
-struct equiv_cpu_entry {
- u32 installed_cpu;
- u32 fixed_errata_mask;
- u32 fixed_errata_compare;
- u16 equiv_cpu;
- u16 res;
-} __attribute__((packed));
-
-struct microcode_header_amd {
- u32 data_code;
- u32 patch_id;
- u16 mc_patch_data_id;
- u8 mc_patch_data_len;
- u8 init_flag;
- u32 mc_patch_data_checksum;
- u32 nb_dev_id;
- u32 sb_dev_id;
- u16 processor_rev_id;
- u8 nb_rev_id;
- u8 sb_rev_id;
- u8 bios_api_rev;
- u8 reserved1[3];
- u32 match_reg[8];
-} __attribute__((packed));
-
-struct microcode_amd {
- struct microcode_header_amd hdr;
- unsigned int mpb[0];
-};
-
-#define SECTION_HDR_SIZE 8
-#define CONTAINER_HDR_SZ 12
-
static struct equiv_cpu_entry *equiv_cpu_table;
struct ucode_patch {
@@ -84,21 +48,10 @@ struct ucode_patch {
static LIST_HEAD(pcache);
-static u16 find_equiv_id(unsigned int cpu)
+static u16 __find_equiv_id(unsigned int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- int i = 0;
-
- if (!equiv_cpu_table)
- return 0;
-
- while (equiv_cpu_table[i].installed_cpu != 0) {
- if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
- return equiv_cpu_table[i].equiv_cpu;
-
- i++;
- }
- return 0;
+ return find_equiv_id(equiv_cpu_table, uci->cpu_sig.sig);
}
static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
@@ -163,7 +116,7 @@ static struct ucode_patch *find_patch(unsigned int cpu)
{
u16 equiv_id;
- equiv_id = find_equiv_id(cpu);
+ equiv_id = __find_equiv_id(cpu);
if (!equiv_id)
return NULL;
@@ -173,18 +126,28 @@ static struct ucode_patch *find_patch(unsigned int cpu)
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ struct ucode_patch *p;
csig->sig = cpuid_eax(0x00000001);
csig->rev = c->microcode;
+
+ /*
+ * a patch could have been loaded early, set uci->mc so that
+ * mc_bp_resume() can call apply_microcode()
+ */
+ p = find_patch(cpu);
+ if (p && (p->patch_id == csig->rev))
+ uci->mc = p->data;
+
pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
return 0;
}
-static unsigned int verify_patch_size(int cpu, u32 patch_size,
+static unsigned int verify_patch_size(u8 family, u32 patch_size,
unsigned int size)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 max_size;
#define F1XH_MPB_MAX_SIZE 2048
@@ -192,7 +155,7 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
#define F15H_MPB_MAX_SIZE 4096
#define F16H_MPB_MAX_SIZE 3458
- switch (c->x86) {
+ switch (family) {
case 0x14:
max_size = F14H_MPB_MAX_SIZE;
break;
@@ -215,7 +178,21 @@ static unsigned int verify_patch_size(int cpu, u32 patch_size,
return patch_size;
}
-static int apply_microcode_amd(int cpu)
+int __apply_microcode_amd(struct microcode_amd *mc_amd)
+{
+ u32 rev, dummy;
+
+ wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+
+ /* verify patch application was successful */
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+ if (rev != mc_amd->hdr.patch_id)
+ return -1;
+
+ return 0;
+}
+
+int apply_microcode_amd(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_amd *mc_amd;
@@ -239,22 +216,20 @@ static int apply_microcode_amd(int cpu)
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
c->microcode = rev;
+ uci->cpu_sig.rev = rev;
return 0;
}
- wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
-
- /* verify patch application was successful */
- rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
- if (rev != mc_amd->hdr.patch_id) {
+ if (__apply_microcode_amd(mc_amd)) {
pr_err("CPU%d: update failed for patch_level=0x%08x\n",
- cpu, mc_amd->hdr.patch_id);
+ cpu, mc_amd->hdr.patch_id);
return -1;
}
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
+ mc_amd->hdr.patch_id);
- pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
- uci->cpu_sig.rev = rev;
- c->microcode = rev;
+ uci->cpu_sig.rev = mc_amd->hdr.patch_id;
+ c->microcode = mc_amd->hdr.patch_id;
return 0;
}
@@ -302,9 +277,8 @@ static void cleanup(void)
* driver cannot continue functioning normally. In such cases, we tear
* down everything we've used up so far and exit.
*/
-static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
+static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_header_amd *mc_hdr;
struct ucode_patch *patch;
unsigned int patch_size, crnt_size, ret;
@@ -324,7 +298,7 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
/* check if patch is for the current family */
proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
- if (proc_fam != c->x86)
+ if (proc_fam != family)
return crnt_size;
if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
@@ -333,7 +307,7 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
return crnt_size;
}
- ret = verify_patch_size(cpu, patch_size, leftover);
+ ret = verify_patch_size(family, patch_size, leftover);
if (!ret) {
pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
return crnt_size;
@@ -364,7 +338,8 @@ static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
return crnt_size;
}
-static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
+ size_t size)
{
enum ucode_state ret = UCODE_ERROR;
unsigned int leftover;
@@ -387,7 +362,7 @@ static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
}
while (leftover) {
- crnt_size = verify_and_add_patch(cpu, fw, leftover);
+ crnt_size = verify_and_add_patch(family, fw, leftover);
if (crnt_size < 0)
return ret;
@@ -398,6 +373,32 @@ static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
return UCODE_OK;
}
+enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
+{
+ enum ucode_state ret;
+
+ /* free old equiv table */
+ free_equiv_cpu_table();
+
+ ret = __load_microcode_amd(family, data, size);
+
+ if (ret != UCODE_OK)
+ cleanup();
+
+#if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
+ /* save BSP's matching patch for early load */
+ if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) {
+ struct ucode_patch *p = find_patch(smp_processor_id());
+ if (p) {
+ memset(amd_bsp_mpb, 0, MPB_MAX_SIZE);
+ memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data),
+ MPB_MAX_SIZE));
+ }
+ }
+#endif
+ return ret;
+}
+
/*
* AMD microcode firmware naming convention, up to family 15h they are in
* the legacy file:
@@ -440,12 +441,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
goto fw_release;
}
- /* free old equiv table */
- free_equiv_cpu_table();
-
- ret = load_microcode_amd(cpu, fw->data, fw->size);
- if (ret != UCODE_OK)
- cleanup();
+ ret = load_microcode_amd(c->x86, fw->data, fw->size);
fw_release:
release_firmware(fw);
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c
new file mode 100644
index 0000000..6073104
--- /dev/null
+++ b/arch/x86/kernel/microcode_amd_early.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Jacob Shin <jacob.shin@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/earlycpio.h>
+#include <linux/initrd.h>
+
+#include <asm/cpu.h>
+#include <asm/setup.h>
+#include <asm/microcode_amd.h>
+
+static bool ucode_loaded;
+static u32 ucode_new_rev;
+static unsigned long ucode_offset;
+static size_t ucode_size;
+
+/*
+ * Microcode patch container file is prepended to the initrd in cpio format.
+ * See Documentation/x86/early-microcode.txt
+ */
+static __initdata char ucode_path[] = "kernel/x86/microcode/AuthenticAMD.bin";
+
+static struct cpio_data __init find_ucode_in_initrd(void)
+{
+ long offset = 0;
+ char *path;
+ void *start;
+ size_t size;
+ unsigned long *uoffset;
+ size_t *usize;
+ struct cpio_data cd;
+
+#ifdef CONFIG_X86_32
+ struct boot_params *p;
+
+ /*
+ * On 32-bit, early load occurs before paging is turned on so we need
+ * to use physical addresses.
+ */
+ p = (struct boot_params *)__pa_nodebug(&boot_params);
+ path = (char *)__pa_nodebug(ucode_path);
+ start = (void *)p->hdr.ramdisk_image;
+ size = p->hdr.ramdisk_size;
+ uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
+ usize = (size_t *)__pa_nodebug(&ucode_size);
+#else
+ path = ucode_path;
+ start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET);
+ size = boot_params.hdr.ramdisk_size;
+ uoffset = &ucode_offset;
+ usize = &ucode_size;
+#endif
+
+ cd = find_cpio_data(path, start, size, &offset);
+ if (!cd.data)
+ return cd;
+
+ if (*(u32 *)cd.data != UCODE_MAGIC) {
+ cd.data = NULL;
+ cd.size = 0;
+ return cd;
+ }
+
+ *uoffset = (u8 *)cd.data - (u8 *)start;
+ *usize = cd.size;
+
+ return cd;
+}
+
+/*
+ * Early load occurs before we can vmalloc(). So we look for the microcode
+ * patch container file in initrd, traverse equivalent cpu table, look for a
+ * matching microcode patch, and update, all in initrd memory in place.
+ * When vmalloc() is available for use later -- on 64-bit during first AP load,
+ * and on 32-bit during save_microcode_in_initrd_amd() -- we can call
+ * load_microcode_amd() to save equivalent cpu table and microcode patches in
+ * kernel heap memory.
+ */
+static void apply_ucode_in_initrd(void *ucode, size_t size)
+{
+ struct equiv_cpu_entry *eq;
+ u32 *header;
+ u8 *data;
+ u16 eq_id = 0;
+ int offset, left;
+ u32 rev, eax;
+ u32 *new_rev;
+ unsigned long *uoffset;
+ size_t *usize;
+
+#ifdef CONFIG_X86_32
+ new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
+ uoffset = (unsigned long *)__pa_nodebug(&ucode_offset);
+ usize = (size_t *)__pa_nodebug(&ucode_size);
+#else
+ new_rev = &ucode_new_rev;
+ uoffset = &ucode_offset;
+ usize = &ucode_size;
+#endif
+
+ data = ucode;
+ left = size;
+ header = (u32 *)data;
+
+ /* find equiv cpu table */
+
+ if (header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */
+ header[2] == 0) /* size */
+ return;
+
+ eax = cpuid_eax(0x00000001);
+
+ while (left > 0) {
+ eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ);
+
+ offset = header[2] + CONTAINER_HDR_SZ;
+ data += offset;
+ left -= offset;
+
+ eq_id = find_equiv_id(eq, eax);
+ if (eq_id)
+ break;
+
+ /*
+ * support multiple container files appended together. if this
+ * one does not have a matching equivalent cpu entry, we fast
+ * forward to the next container file.
+ */
+ while (left > 0) {
+ header = (u32 *)data;
+ if (header[0] == UCODE_MAGIC &&
+ header[1] == UCODE_EQUIV_CPU_TABLE_TYPE)
+ break;
+
+ offset = header[1] + SECTION_HDR_SIZE;
+ data += offset;
+ left -= offset;
+ }
+
+ /* mark where the next microcode container file starts */
+ offset = data - (u8 *)ucode;
+ *uoffset += offset;
+ *usize -= offset;
+ ucode = data;
+ }
+
+ if (!eq_id) {
+ *usize = 0;
+ return;
+ }
+
+ /* find ucode and update if needed */
+
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+
+ while (left > 0) {
+ struct microcode_amd *mc;
+
+ header = (u32 *)data;
+ if (header[0] != UCODE_UCODE_TYPE || /* type */
+ header[1] == 0) /* size */
+ break;
+
+ mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE);
+ if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id)
+ if (__apply_microcode_amd(mc) == 0) {
+ rev = mc->hdr.patch_id;
+ *new_rev = rev;
+ }
+
+ offset = header[1] + SECTION_HDR_SIZE;
+ data += offset;
+ left -= offset;
+ }
+
+ /* mark where this microcode container file ends */
+ offset = *usize - (data - (u8 *)ucode);
+ *usize -= offset;
+
+ if (!(*new_rev))
+ *usize = 0;
+}
+
+void __init load_ucode_amd_bsp(void)
+{
+ struct cpio_data cd = find_ucode_in_initrd();
+ if (!cd.data)
+ return;
+
+ apply_ucode_in_initrd(cd.data, cd.size);
+}
+
+#ifdef CONFIG_X86_32
+u8 amd_bsp_mpb[MPB_MAX_SIZE];
+
+/*
+ * On 32-bit, since AP's early load occurs before paging is turned on, we
+ * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during
+ * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During
+ * save_microcode_in_initrd_amd() BSP's patch is copied to amd_bsp_mpb, which
+ * is used upon resume from suspend.
+ */
+void load_ucode_amd_ap(void)
+{
+ struct microcode_amd *mc;
+ unsigned long *initrd;
+ unsigned long *uoffset;
+ size_t *usize;
+ void *ucode;
+
+ mc = (struct microcode_amd *)__pa(amd_bsp_mpb);
+ if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
+ __apply_microcode_amd(mc);
+ return;
+ }
+
+ initrd = (unsigned long *)__pa(&initrd_start);
+ uoffset = (unsigned long *)__pa(&ucode_offset);
+ usize = (size_t *)__pa(&ucode_size);
+
+ if (!*usize || !*initrd)
+ return;
+
+ ucode = (void *)((unsigned long)__pa(*initrd) + *uoffset);
+ apply_ucode_in_initrd(ucode, *usize);
+}
+
+static void __init collect_cpu_sig_on_bsp(void *arg)
+{
+ unsigned int cpu = smp_processor_id();
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ uci->cpu_sig.sig = cpuid_eax(0x00000001);
+}
+#else
+void load_ucode_amd_ap(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ u32 rev, eax;
+
+ rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
+ eax = cpuid_eax(0x00000001);
+
+ uci->cpu_sig.rev = rev;
+ uci->cpu_sig.sig = eax;
+
+ if (cpu && !ucode_loaded) {
+ void *ucode;
+
+ if (!ucode_size || !initrd_start)
+ return;
+
+ ucode = (void *)(initrd_start + ucode_offset);
+ eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+ if (load_microcode_amd(eax, ucode, ucode_size) != UCODE_OK)
+ return;
+
+ ucode_loaded = true;
+ }
+
+ apply_microcode_amd(cpu);
+}
+#endif
+
+int __init save_microcode_in_initrd_amd(void)
+{
+ enum ucode_state ret;
+ void *ucode;
+ u32 eax;
+
+#ifdef CONFIG_X86_32
+ unsigned int bsp = boot_cpu_data.cpu_index;
+ struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
+
+ if (!uci->cpu_sig.sig)
+ smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1);
+#endif
+ if (ucode_new_rev)
+ pr_info("microcode: updated early to new patch_level=0x%08x\n",
+ ucode_new_rev);
+
+ if (ucode_loaded || !ucode_size || !initrd_start)
+ return 0;
+
+ ucode = (void *)(initrd_start + ucode_offset);
+ eax = cpuid_eax(0x00000001);
+ eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+
+ ret = load_microcode_amd(eax, ucode, ucode_size);
+ if (ret != UCODE_OK)
+ return -EINVAL;
+
+ ucode_loaded = true;
+ return 0;
+}
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 22db92b..15c9876 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -468,7 +468,7 @@ static struct syscore_ops mc_syscore_ops = {
.resume = mc_bp_resume,
};
-static __cpuinit int
+static int
mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
index 833d51d..be7f851 100644
--- a/arch/x86/kernel/microcode_core_early.c
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -18,6 +18,7 @@
*/
#include <linux/module.h>
#include <asm/microcode_intel.h>
+#include <asm/microcode_amd.h>
#include <asm/processor.h>
#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
@@ -40,7 +41,7 @@
*
* x86_vendor() gets vendor information directly through cpuid.
*/
-static int __cpuinit x86_vendor(void)
+static int x86_vendor(void)
{
u32 eax = 0x00000000;
u32 ebx, ecx = 0, edx;
@@ -56,7 +57,7 @@ static int __cpuinit x86_vendor(void)
return X86_VENDOR_UNKNOWN;
}
-static int __cpuinit x86_family(void)
+static int x86_family(void)
{
u32 eax = 0x00000001;
u32 ebx, ecx = 0, edx;
@@ -81,11 +82,21 @@ void __init load_ucode_bsp(void)
vendor = x86_vendor();
x86 = x86_family();
- if (vendor == X86_VENDOR_INTEL && x86 >= 6)
- load_ucode_intel_bsp();
+ switch (vendor) {
+ case X86_VENDOR_INTEL:
+ if (x86 >= 6)
+ load_ucode_intel_bsp();
+ break;
+ case X86_VENDOR_AMD:
+ if (x86 >= 0x10)
+ load_ucode_amd_bsp();
+ break;
+ default:
+ break;
+ }
}
-void __cpuinit load_ucode_ap(void)
+void load_ucode_ap(void)
{
int vendor, x86;
@@ -95,6 +106,36 @@ void __cpuinit load_ucode_ap(void)
vendor = x86_vendor();
x86 = x86_family();
- if (vendor == X86_VENDOR_INTEL && x86 >= 6)
- load_ucode_intel_ap();
+ switch (vendor) {
+ case X86_VENDOR_INTEL:
+ if (x86 >= 6)
+ load_ucode_intel_ap();
+ break;
+ case X86_VENDOR_AMD:
+ if (x86 >= 0x10)
+ load_ucode_amd_ap();
+ break;
+ default:
+ break;
+ }
+}
+
+int __init save_microcode_in_initrd(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_INTEL:
+ if (c->x86 >= 6)
+ save_microcode_in_initrd_intel();
+ break;
+ case X86_VENDOR_AMD:
+ if (c->x86 >= 0x10)
+ save_microcode_in_initrd_amd();
+ break;
+ default:
+ break;
+ }
+
+ return 0;
}
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 2e9e128..1575deb 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -34,7 +34,7 @@ struct mc_saved_data {
struct microcode_intel **mc_saved;
} mc_saved_data;
-static enum ucode_state __cpuinit
+static enum ucode_state
generic_load_microcode_early(struct microcode_intel **mc_saved_p,
unsigned int mc_saved_count,
struct ucode_cpu_info *uci)
@@ -69,7 +69,7 @@ out:
return state;
}
-static void __cpuinit
+static void
microcode_pointer(struct microcode_intel **mc_saved,
unsigned long *mc_saved_in_initrd,
unsigned long initrd_start, int mc_saved_count)
@@ -82,7 +82,7 @@ microcode_pointer(struct microcode_intel **mc_saved,
}
#ifdef CONFIG_X86_32
-static void __cpuinit
+static void
microcode_phys(struct microcode_intel **mc_saved_tmp,
struct mc_saved_data *mc_saved_data)
{
@@ -101,7 +101,7 @@ microcode_phys(struct microcode_intel **mc_saved_tmp,
}
#endif
-static enum ucode_state __cpuinit
+static enum ucode_state
load_microcode(struct mc_saved_data *mc_saved_data,
unsigned long *mc_saved_in_initrd,
unsigned long initrd_start,
@@ -375,7 +375,7 @@ do { \
#define native_wrmsr(msr, low, high) \
native_write_msr(msr, low, high);
-static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci)
+static int collect_cpu_info_early(struct ucode_cpu_info *uci)
{
unsigned int val[2];
u8 x86, x86_model;
@@ -529,7 +529,7 @@ int save_mc_for_early(u8 *mc)
*/
ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count);
if (ret) {
- pr_err("Can not save microcode patch.\n");
+ pr_err("Cannot save microcode patch.\n");
goto out;
}
@@ -584,7 +584,7 @@ scan_microcode(unsigned long start, unsigned long end,
/*
* Print ucode update info.
*/
-static void __cpuinit
+static void
print_ucode_info(struct ucode_cpu_info *uci, unsigned int date)
{
int cpu = smp_processor_id();
@@ -605,7 +605,7 @@ static int current_mc_date;
/*
* Print early updated ucode info after printk works. This is delayed info dump.
*/
-void __cpuinit show_ucode_info_early(void)
+void show_ucode_info_early(void)
{
struct ucode_cpu_info uci;
@@ -621,7 +621,7 @@ void __cpuinit show_ucode_info_early(void)
* mc_saved_data.mc_saved and delay printing microcode info in
* show_ucode_info_early() until printk() works.
*/
-static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
+static void print_ucode(struct ucode_cpu_info *uci)
{
struct microcode_intel *mc_intel;
int *delay_ucode_info_p;
@@ -643,12 +643,12 @@ static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
* Flush global tlb. We only do this in x86_64 where paging has been enabled
* already and PGE should be enabled as well.
*/
-static inline void __cpuinit flush_tlb_early(void)
+static inline void flush_tlb_early(void)
{
__native_flush_tlb_global_irq_disabled();
}
-static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
+static inline void print_ucode(struct ucode_cpu_info *uci)
{
struct microcode_intel *mc_intel;
@@ -660,8 +660,8 @@ static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
}
#endif
-static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
- struct ucode_cpu_info *uci)
+static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
+ struct ucode_cpu_info *uci)
{
struct microcode_intel *mc_intel;
unsigned int val[2];
@@ -699,7 +699,7 @@ static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
* This function converts microcode patch offsets previously stored in
* mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data.
*/
-int __init save_microcode_in_initrd(void)
+int __init save_microcode_in_initrd_intel(void)
{
unsigned int count = mc_saved_data.mc_saved_count;
struct microcode_intel *mc_saved[MAX_UCODE_COUNT];
@@ -711,7 +711,7 @@ int __init save_microcode_in_initrd(void)
microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count);
ret = save_microcode(&mc_saved_data, mc_saved, count);
if (ret)
- pr_err("Can not save microcod patches from initrd");
+ pr_err("Cannot save microcode patches from initrd.\n");
show_saved_mc();
@@ -763,7 +763,7 @@ load_ucode_intel_bsp(void)
#endif
}
-void __cpuinit load_ucode_intel_ap(void)
+void load_ucode_intel_ap(void)
{
struct mc_saved_data *mc_saved_data_p;
struct ucode_cpu_info uci;
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index ac861b8..f4c886d 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -24,14 +24,14 @@ struct pci_hostbridge_probe {
u32 device;
};
-static u64 __cpuinitdata fam10h_pci_mmconf_base;
+static u64 fam10h_pci_mmconf_base;
-static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
+static struct pci_hostbridge_probe pci_probes[] = {
{ 0, 0x18, PCI_VENDOR_ID_AMD, 0x1200 },
{ 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
};
-static int __cpuinit cmp_range(const void *x1, const void *x2)
+static int cmp_range(const void *x1, const void *x2)
{
const struct range *r1 = x1;
const struct range *r2 = x2;
@@ -49,7 +49,7 @@ static int __cpuinit cmp_range(const void *x1, const void *x2)
/* need to avoid (0xfd<<32), (0xfe<<32), and (0xff<<32), ht used space */
#define FAM10H_PCI_MMCONF_BASE (0xfcULL<<32)
#define BASE_VALID(b) ((b) + MMCONF_SIZE <= (0xfdULL<<32) || (b) >= (1ULL<<40))
-static void __cpuinit get_fam10h_pci_mmconf_base(void)
+static void get_fam10h_pci_mmconf_base(void)
{
int i;
unsigned bus;
@@ -166,7 +166,7 @@ out:
fam10h_pci_mmconf_base = base;
}
-void __cpuinit fam10h_check_enable_mmcfg(void)
+void fam10h_check_enable_mmcfg(void)
{
u64 val;
u32 address;
@@ -230,7 +230,7 @@ static const struct dmi_system_id __initconst mmconf_dmi_table[] = {
{}
};
-/* Called from a __cpuinit function, but only on the BSP. */
+/* Called from a non __init function, but only on the BSP. */
void __ref check_enable_amd_mmconf_dmi(void)
{
dmi_check_system(mmconf_dmi_table);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index ce13049..88458fa 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -200,7 +200,7 @@ static const struct file_operations msr_fops = {
.compat_ioctl = msr_ioctl,
};
-static int __cpuinit msr_device_create(int cpu)
+static int msr_device_create(int cpu)
{
struct device *dev;
@@ -214,8 +214,8 @@ static void msr_device_destroy(int cpu)
device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
}
-static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int msr_class_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
int err = 0;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 6030805..ba77ebc 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -14,6 +14,7 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/nmi.h>
+#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/hardirq.h>
#include <linux/slab.h>
@@ -29,6 +30,9 @@
#include <asm/nmi.h>
#include <asm/x86_init.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/nmi.h>
+
struct nmi_desc {
spinlock_t lock;
struct list_head head;
@@ -82,6 +86,15 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
#define nmi_to_desc(type) (&nmi_desc[type])
+static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
+static int __init nmi_warning_debugfs(void)
+{
+ debugfs_create_u64("nmi_longest_ns", 0644,
+ arch_debugfs_dir, &nmi_longest_ns);
+ return 0;
+}
+fs_initcall(nmi_warning_debugfs);
+
static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b)
{
struct nmi_desc *desc = nmi_to_desc(type);
@@ -96,8 +109,28 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2
* can be latched at any given time. Walk the whole list
* to handle those situations.
*/
- list_for_each_entry_rcu(a, &desc->head, list)
- handled += a->handler(type, regs);
+ list_for_each_entry_rcu(a, &desc->head, list) {
+ u64 before, delta, whole_msecs;
+ int remainder_ns, decimal_msecs, thishandled;
+
+ before = local_clock();
+ thishandled = a->handler(type, regs);
+ handled += thishandled;
+ delta = local_clock() - before;
+ trace_nmi_handler(a->handler, (int)delta, thishandled);
+
+ if (delta < nmi_longest_ns)
+ continue;
+
+ nmi_longest_ns = delta;
+ whole_msecs = delta;
+ remainder_ns = do_div(whole_msecs, (1000 * 1000));
+ decimal_msecs = remainder_ns / 1000;
+ printk_ratelimited(KERN_INFO
+ "INFO: NMI handler (%ps) took too long to run: "
+ "%lld.%03d msecs\n", a->handler, whole_msecs,
+ decimal_msecs);
+ }
rcu_read_unlock();
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 676b8c7..bbb6c73 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -4,25 +4,17 @@
*/
#include <linux/spinlock.h>
#include <linux/module.h>
+#include <linux/jump_label.h>
#include <asm/paravirt.h>
-static inline void
-default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
- arch_spin_lock(lock);
-}
-
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
- .spin_is_locked = __ticket_spin_is_locked,
- .spin_is_contended = __ticket_spin_is_contended,
-
- .spin_lock = __ticket_spin_lock,
- .spin_lock_flags = default_spin_lock_flags,
- .spin_trylock = __ticket_spin_trylock,
- .spin_unlock = __ticket_spin_unlock,
+ .lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
+ .unlock_kick = paravirt_nop,
#endif
};
EXPORT_SYMBOL(pv_lock_ops);
+struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL(paravirt_ticketlocks_enabled);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index cd6de64..1b10af8 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -62,11 +62,6 @@ void __init default_banner(void)
pv_info.name);
}
-/* Simple instruction patching code. */
-#define DEF_NATIVE(ops, name, code) \
- extern const char start_##ops##_##name[], end_##ops##_##name[]; \
- asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
-
/* Undefined instruction for dealing with missing ops pointers. */
static const unsigned char ud2a[] = { 0x0f, 0x0b };
@@ -324,7 +319,7 @@ struct pv_time_ops pv_time_ops = {
.steal_clock = native_steal_clock,
};
-struct pv_irq_ops pv_irq_ops = {
+__visible struct pv_irq_ops pv_irq_ops = {
.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
@@ -336,7 +331,7 @@ struct pv_irq_ops pv_irq_ops = {
#endif
};
-struct pv_cpu_ops pv_cpu_ops = {
+__visible struct pv_cpu_ops pv_cpu_ops = {
.cpuid = native_cpuid,
.get_debugreg = native_get_debugreg,
.set_debugreg = native_set_debugreg,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4e7a37f..c83516b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -36,7 +36,7 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -277,18 +277,6 @@ void exit_idle(void)
}
#endif
-void arch_cpu_idle_prepare(void)
-{
- /*
- * If we're the non-boot CPU, nothing set the stack canary up
- * for us. CPU0 already has it initialized but no harm in
- * doing it again. This is a good place for updating it, as
- * we wont ever return from this function (so the invalid
- * canaries already on the stack wont ever trigger).
- */
- boot_init_stack_canary();
-}
-
void arch_cpu_idle_enter(void)
{
local_touch_nmi();
@@ -410,7 +398,7 @@ static void amd_e400_idle(void)
default_idle();
}
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+void select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7305f7d..884f98f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -110,11 +110,16 @@ void __show_regs(struct pt_regs *regs, int all)
get_debugreg(d1, 1);
get_debugreg(d2, 2);
get_debugreg(d3, 3);
- printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
- d0, d1, d2, d3);
-
get_debugreg(d6, 6);
get_debugreg(d7, 7);
+
+ /* Only print out debug registers if they are in their non-default state. */
+ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
+ (d6 == DR6_RESERVED) && (d7 == 0x400))
+ return;
+
+ printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+ d0, d1, d2, d3);
printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
d6, d7);
}
@@ -147,7 +152,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -242,7 +247,7 @@ EXPORT_SYMBOL_GPL(start_thread);
* the task-switch, and shows up in ret_from_fork in entry.S,
* for example.
*/
-__notrace_funcgraph struct task_struct *
+__visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 355ae06..bb1dc51 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
asmlinkage extern void ret_from_fork(void);
-DEFINE_PER_CPU(unsigned long, old_rsp);
+asmlinkage DEFINE_PER_CPU(unsigned long, old_rsp);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all)
@@ -105,11 +105,18 @@ void __show_regs(struct pt_regs *regs, int all)
get_debugreg(d0, 0);
get_debugreg(d1, 1);
get_debugreg(d2, 2);
- printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
get_debugreg(d3, 3);
get_debugreg(d6, 6);
get_debugreg(d7, 7);
+
+ /* Only print out debug registers if they are in their non-default state. */
+ if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
+ (d6 == DR6_RESERVED) && (d7 == 0x400))
+ return;
+
+ printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+
}
void release_thread(struct task_struct *dead_task)
@@ -176,7 +183,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->bp = arg;
childregs->orig_ax = -1;
childregs->cs = __KERNEL_CS | get_kernel_rpl();
- childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
return 0;
}
*childregs = *current_pt_regs();
@@ -267,7 +274,7 @@ void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
* Kprobes not supported here. Set the probe on schedule instead.
* Function graph tracer not supported too.
*/
-__notrace_funcgraph struct task_struct *
+__visible __notrace_funcgraph struct task_struct *
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 29a8120..7461f50 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -601,30 +601,48 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[])
return dr7;
}
-static int
-ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
- struct task_struct *tsk, int disabled)
+static int ptrace_fill_bp_fields(struct perf_event_attr *attr,
+ int len, int type, bool disabled)
+{
+ int err, bp_len, bp_type;
+
+ err = arch_bp_generic_fields(len, type, &bp_len, &bp_type);
+ if (!err) {
+ attr->bp_len = bp_len;
+ attr->bp_type = bp_type;
+ attr->disabled = disabled;
+ }
+
+ return err;
+}
+
+static struct perf_event *
+ptrace_register_breakpoint(struct task_struct *tsk, int len, int type,
+ unsigned long addr, bool disabled)
{
- int err;
- int gen_len, gen_type;
struct perf_event_attr attr;
+ int err;
- /*
- * We should have at least an inactive breakpoint at this
- * slot. It means the user is writing dr7 without having
- * written the address register first
- */
- if (!bp)
- return -EINVAL;
+ ptrace_breakpoint_init(&attr);
+ attr.bp_addr = addr;
- err = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+ err = ptrace_fill_bp_fields(&attr, len, type, disabled);
if (err)
- return err;
+ return ERR_PTR(err);
+
+ return register_user_hw_breakpoint(&attr, ptrace_triggered,
+ NULL, tsk);
+}
- attr = bp->attr;
- attr.bp_len = gen_len;
- attr.bp_type = gen_type;
- attr.disabled = disabled;
+static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
+ int disabled)
+{
+ struct perf_event_attr attr = bp->attr;
+ int err;
+
+ err = ptrace_fill_bp_fields(&attr, len, type, disabled);
+ if (err)
+ return err;
return modify_user_hw_breakpoint(bp, &attr);
}
@@ -634,67 +652,50 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type,
*/
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
{
- struct thread_struct *thread = &(tsk->thread);
+ struct thread_struct *thread = &tsk->thread;
unsigned long old_dr7;
- int i, orig_ret = 0, rc = 0;
- int enabled, second_pass = 0;
- unsigned len, type;
- struct perf_event *bp;
-
- if (ptrace_get_breakpoints(tsk) < 0)
- return -ESRCH;
+ bool second_pass = false;
+ int i, rc, ret = 0;
data &= ~DR_CONTROL_RESERVED;
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
+
restore:
- /*
- * Loop through all the hardware breakpoints, making the
- * appropriate changes to each.
- */
+ rc = 0;
for (i = 0; i < HBP_NUM; i++) {
- enabled = decode_dr7(data, i, &len, &type);
- bp = thread->ptrace_bps[i];
-
- if (!enabled) {
- if (bp) {
- /*
- * Don't unregister the breakpoints right-away,
- * unless all register_user_hw_breakpoint()
- * requests have succeeded. This prevents
- * any window of opportunity for debug
- * register grabbing by other users.
- */
- if (!second_pass)
- continue;
-
- rc = ptrace_modify_breakpoint(bp, len, type,
- tsk, 1);
- if (rc)
- break;
+ unsigned len, type;
+ bool disabled = !decode_dr7(data, i, &len, &type);
+ struct perf_event *bp = thread->ptrace_bps[i];
+
+ if (!bp) {
+ if (disabled)
+ continue;
+
+ bp = ptrace_register_breakpoint(tsk,
+ len, type, 0, disabled);
+ if (IS_ERR(bp)) {
+ rc = PTR_ERR(bp);
+ break;
}
+
+ thread->ptrace_bps[i] = bp;
continue;
}
- rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0);
+ rc = ptrace_modify_breakpoint(bp, len, type, disabled);
if (rc)
break;
}
- /*
- * Make a second pass to free the remaining unused breakpoints
- * or to restore the original breakpoints if an error occurred.
- */
- if (!second_pass) {
- second_pass = 1;
- if (rc < 0) {
- orig_ret = rc;
- data = old_dr7;
- }
+
+ /* Restore if the first pass failed, second_pass shouldn't fail. */
+ if (rc && !WARN_ON(second_pass)) {
+ ret = rc;
+ data = old_dr7;
+ second_pass = true;
goto restore;
}
- ptrace_put_breakpoints(tsk);
-
- return ((orig_ret < 0) ? orig_ret : rc);
+ return ret;
}
/*
@@ -702,25 +703,17 @@ restore:
*/
static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
{
- struct thread_struct *thread = &(tsk->thread);
+ struct thread_struct *thread = &tsk->thread;
unsigned long val = 0;
if (n < HBP_NUM) {
- struct perf_event *bp;
+ struct perf_event *bp = thread->ptrace_bps[n];
- if (ptrace_get_breakpoints(tsk) < 0)
- return -ESRCH;
-
- bp = thread->ptrace_bps[n];
- if (!bp)
- val = 0;
- else
+ if (bp)
val = bp->hw.info.address;
-
- ptrace_put_breakpoints(tsk);
} else if (n == 6) {
val = thread->debugreg6;
- } else if (n == 7) {
+ } else if (n == 7) {
val = thread->ptrace_dr7;
}
return val;
@@ -729,29 +722,14 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
unsigned long addr)
{
- struct perf_event *bp;
struct thread_struct *t = &tsk->thread;
- struct perf_event_attr attr;
+ struct perf_event *bp = t->ptrace_bps[nr];
int err = 0;
- if (ptrace_get_breakpoints(tsk) < 0)
- return -ESRCH;
-
- if (!t->ptrace_bps[nr]) {
- ptrace_breakpoint_init(&attr);
- /*
- * Put stub len and type to register (reserve) an inactive but
- * correct bp
- */
- attr.bp_addr = addr;
- attr.bp_len = HW_BREAKPOINT_LEN_1;
- attr.bp_type = HW_BREAKPOINT_W;
- attr.disabled = 1;
-
- bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
- NULL, tsk);
-
+ if (!bp) {
/*
+ * Put stub len and type to create an inactive but correct bp.
+ *
* CHECKME: the previous code returned -EIO if the addr wasn't
* a valid task virtual addr. The new one will return -EINVAL in
* this case.
@@ -760,22 +738,20 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
* writing for the user. And anyway this is the previous
* behaviour.
*/
- if (IS_ERR(bp)) {
+ bp = ptrace_register_breakpoint(tsk,
+ X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE,
+ addr, true);
+ if (IS_ERR(bp))
err = PTR_ERR(bp);
- goto put;
- }
-
- t->ptrace_bps[nr] = bp;
+ else
+ t->ptrace_bps[nr] = bp;
} else {
- bp = t->ptrace_bps[nr];
+ struct perf_event_attr attr = bp->attr;
- attr = bp->attr;
attr.bp_addr = addr;
err = modify_user_hw_breakpoint(bp, &attr);
}
-put:
- ptrace_put_breakpoints(tsk);
return err;
}
@@ -785,30 +761,20 @@ put:
static int ptrace_set_debugreg(struct task_struct *tsk, int n,
unsigned long val)
{
- struct thread_struct *thread = &(tsk->thread);
- int rc = 0;
-
+ struct thread_struct *thread = &tsk->thread;
/* There are no DR4 or DR5 registers */
- if (n == 4 || n == 5)
- return -EIO;
+ int rc = -EIO;
- if (n == 6) {
- thread->debugreg6 = val;
- goto ret_path;
- }
if (n < HBP_NUM) {
rc = ptrace_set_breakpoint_addr(tsk, n, val);
- if (rc)
- return rc;
- }
- /* All that's left is DR7 */
- if (n == 7) {
+ } else if (n == 6) {
+ thread->debugreg6 = val;
+ rc = 0;
+ } else if (n == 7) {
rc = ptrace_write_dr7(tsk, val);
if (!rc)
thread->ptrace_dr7 = val;
}
-
-ret_path:
return rc;
}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2cb9470..a16bae3 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -128,46 +128,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
-static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
-
-static struct pvclock_vsyscall_time_info *
-pvclock_get_vsyscall_user_time_info(int cpu)
-{
- if (!pvclock_vdso_info) {
- BUG();
- return NULL;
- }
-
- return &pvclock_vdso_info[cpu];
-}
-
-struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
-{
- return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
-}
-
#ifdef CONFIG_X86_64
-static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
- void *v)
-{
- struct task_migration_notifier *mn = v;
- struct pvclock_vsyscall_time_info *pvti;
-
- pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
-
- /* this is NULL when pvclock vsyscall is not initialized */
- if (unlikely(pvti == NULL))
- return NOTIFY_DONE;
-
- pvti->migrate_count++;
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block pvclock_migrate = {
- .notifier_call = pvclock_task_migrate,
-};
-
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
@@ -181,17 +142,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
- pvclock_vdso_info = i;
-
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
__pa(i) + (idx*PAGE_SIZE),
PAGE_KERNEL_VVAR);
}
-
- register_task_migration_notifier(&pvclock_migrate);
-
return 0;
}
#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 76fa1e9..e643e74 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -36,22 +36,6 @@ void (*pm_power_off)(void);
EXPORT_SYMBOL(pm_power_off);
static const struct desc_ptr no_idt = {};
-static int reboot_mode;
-enum reboot_type reboot_type = BOOT_ACPI;
-int reboot_force;
-
-/*
- * This variable is used privately to keep track of whether or not
- * reboot_type is still set to its default value (i.e., reboot= hasn't
- * been set on the command line). This is needed so that we can
- * suppress DMI scanning for reboot quirks. Without it, it's
- * impossible to override a faulty reboot quirk without recompiling.
- */
-static int reboot_default = 1;
-
-#ifdef CONFIG_SMP
-static int reboot_cpu = -1;
-#endif
/*
* This is set if we need to go through the 'emergency' path.
@@ -64,79 +48,6 @@ static int reboot_emergency;
bool port_cf9_safe = false;
/*
- * reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci]
- * warm Don't set the cold reboot flag
- * cold Set the cold reboot flag
- * bios Reboot by jumping through the BIOS
- * smp Reboot by executing reset on BSP or other CPU
- * triple Force a triple fault (init)
- * kbd Use the keyboard controller. cold reset (default)
- * acpi Use the RESET_REG in the FADT
- * efi Use efi reset_system runtime service
- * pci Use the so-called "PCI reset register", CF9
- * force Avoid anything that could hang.
- */
-static int __init reboot_setup(char *str)
-{
- for (;;) {
- /*
- * Having anything passed on the command line via
- * reboot= will cause us to disable DMI checking
- * below.
- */
- reboot_default = 0;
-
- switch (*str) {
- case 'w':
- reboot_mode = 0x1234;
- break;
-
- case 'c':
- reboot_mode = 0;
- break;
-
-#ifdef CONFIG_SMP
- case 's':
- if (isdigit(*(str+1))) {
- reboot_cpu = (int) (*(str+1) - '0');
- if (isdigit(*(str+2)))
- reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
- }
- /*
- * We will leave sorting out the final value
- * when we are ready to reboot, since we might not
- * have detected BSP APIC ID or smp_num_cpu
- */
- break;
-#endif /* CONFIG_SMP */
-
- case 'b':
- case 'a':
- case 'k':
- case 't':
- case 'e':
- case 'p':
- reboot_type = *str;
- break;
-
- case 'f':
- reboot_force = 1;
- break;
- }
-
- str = strchr(str, ',');
- if (str)
- str++;
- else
- break;
- }
- return 1;
-}
-
-__setup("reboot=", reboot_setup);
-
-
-/*
* Reboot options and system auto-detection code provided by
* Dell Inc. so their systems "just work". :-)
*/
@@ -441,12 +352,28 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
},
{ /* Handle problems with rebooting on the Precision M6600. */
.callback = set_pci_reboot,
- .ident = "Dell OptiPlex 990",
+ .ident = "Dell Precision M6600",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
},
},
+ { /* Handle problems with rebooting on the Dell PowerEdge C6100. */
+ .callback = set_pci_reboot,
+ .ident = "Dell PowerEdge C6100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+ },
+ },
+ { /* Some C6100 machines were shipped with vendor being 'Dell'. */
+ .callback = set_pci_reboot,
+ .ident = "Dell PowerEdge C6100",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
+ },
+ },
{ }
};
@@ -536,6 +463,7 @@ static void native_machine_emergency_restart(void)
int i;
int attempt = 0;
int orig_reboot_type = reboot_type;
+ unsigned short mode;
if (reboot_emergency)
emergency_vmx_disable_all();
@@ -543,7 +471,8 @@ static void native_machine_emergency_restart(void)
tboot_shutdown(TB_SHUTDOWN_REBOOT);
/* Tell the BIOS if we want cold or warm reboot */
- *((unsigned short *)__va(0x472)) = reboot_mode;
+ mode = reboot_mode == REBOOT_WARM ? 0x1234 : 0;
+ *((unsigned short *)__va(0x472)) = mode;
for (;;) {
/* Could also try the reset bit in the Hammer NB */
@@ -585,7 +514,7 @@ static void native_machine_emergency_restart(void)
case BOOT_EFI:
if (efi_enabled(EFI_RUNTIME_SERVICES))
- efi.reset_system(reboot_mode ?
+ efi.reset_system(reboot_mode == REBOOT_WARM ?
EFI_RESET_WARM :
EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
@@ -614,26 +543,10 @@ void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
-
- /* The boot cpu is always logical cpu 0 */
- int reboot_cpu_id = 0;
-
- /* See if there has been given a command line override */
- if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
- cpu_online(reboot_cpu))
- reboot_cpu_id = reboot_cpu;
-
- /* Make certain the cpu I'm about to reboot on is online */
- if (!cpu_online(reboot_cpu_id))
- reboot_cpu_id = smp_processor_id();
-
- /* Make certain I only run on the appropriate processor */
- set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
-
/*
- * O.K Now that I'm on the appropriate processor, stop all of the
- * others. Also disable the local irq to not receive the per-cpu
- * timer interrupt which may trigger scheduler's load balance.
+ * Stop all of the others. Also disable the local irq to
+ * not receive the per-cpu timer interrupt which may trigger
+ * scheduler's load balance.
*/
local_irq_disable();
stop_other_cpus();
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 36818f8..e13f8e7 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -186,7 +186,7 @@ identity_mapped:
movl CP_PA_PGD(%ebx), %eax
movl %eax, %cr3
movl %cr0, %eax
- orl $(1<<31), %eax
+ orl $X86_CR0_PG, %eax
movl %eax, %cr0
lea PAGE_SIZE(%edi), %esp
movl %edi, %eax
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 7a6f3b3..3fd2c69 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -151,21 +151,21 @@ identity_mapped:
testq %r11, %r11
jnz 1f
- xorq %rax, %rax
- xorq %rbx, %rbx
- xorq %rcx, %rcx
- xorq %rdx, %rdx
- xorq %rsi, %rsi
- xorq %rdi, %rdi
- xorq %rbp, %rbp
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r9
- xorq %r11, %r11
- xorq %r12, %r12
- xorq %r13, %r13
- xorq %r14, %r14
- xorq %r15, %r15
+ xorl %eax, %eax
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ xorl %edx, %edx
+ xorl %esi, %esi
+ xorl %edi, %edi
+ xorl %ebp, %ebp
+ xorl %r8d, %r8d
+ xorl %r9d, %r9d
+ xorl %r10d, %r10d
+ xorl %r11d, %r11d
+ xorl %r12d, %r12d
+ xorl %r13d, %r13d
+ xorl %r14d, %r14d
+ xorl %r15d, %r15d
ret
@@ -212,8 +212,8 @@ virtual_mapped:
/* Do the copies */
swap_pages:
movq %rdi, %rcx /* Put the page_list in %rcx */
- xorq %rdi, %rdi
- xorq %rsi, %rsi
+ xorl %edi, %edi
+ xorl %esi, %esi
jmp 1f
0: /* top, read another word for the indirection page */
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 198eb20..0aa2939 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -38,8 +38,9 @@ EXPORT_SYMBOL(rtc_lock);
* jump to the next second precisely 500 ms later. Check the Motorola
* MC146818A or Dallas DS12887 data sheet for details.
*/
-int mach_set_rtc_mmss(unsigned long nowtime)
+int mach_set_rtc_mmss(const struct timespec *now)
{
+ unsigned long nowtime = now->tv_sec;
struct rtc_time tm;
int retval = 0;
@@ -58,7 +59,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
return retval;
}
-unsigned long mach_get_cmos_time(void)
+void mach_get_cmos_time(struct timespec *now)
{
unsigned int status, year, mon, day, hour, min, sec, century = 0;
unsigned long flags;
@@ -107,7 +108,8 @@ unsigned long mach_get_cmos_time(void)
} else
year += CMOS_YEARS_OFFS;
- return mktime(year, mon, day, hour, min, sec);
+ now->tv_sec = mktime(year, mon, day, hour, min, sec);
+ now->tv_nsec = 0;
}
/* Routines for accessing the CMOS RAM/RTC. */
@@ -135,18 +137,13 @@ EXPORT_SYMBOL(rtc_cmos_write);
int update_persistent_clock(struct timespec now)
{
- return x86_platform.set_wallclock(now.tv_sec);
+ return x86_platform.set_wallclock(&now);
}
/* not static: needed by APM */
void read_persistent_clock(struct timespec *ts)
{
- unsigned long retval;
-
- retval = x86_platform.get_wallclock();
-
- ts->tv_sec = retval;
- ts->tv_nsec = 0;
+ x86_platform.get_wallclock(ts);
}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 56f7fcf..f0de629 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -170,7 +170,7 @@ static struct resource bss_resource = {
#ifdef CONFIG_X86_32
/* cpu data as detected by the assembly code in head.S */
-struct cpuinfo_x86 new_cpu_data __cpuinitdata = {
+struct cpuinfo_x86 new_cpu_data = {
.wp_works_ok = -1,
};
/* common cpu data for all cpus */
@@ -206,9 +206,9 @@ EXPORT_SYMBOL(boot_cpu_data);
#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-unsigned long mmu_cr4_features;
+__visible unsigned long mmu_cr4_features;
#else
-unsigned long mmu_cr4_features = X86_CR4_PAE;
+__visible unsigned long mmu_cr4_features = X86_CR4_PAE;
#endif
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -426,25 +426,23 @@ static void __init reserve_initrd(void)
static void __init parse_setup_data(void)
{
struct setup_data *data;
- u64 pa_data;
+ u64 pa_data, pa_next;
pa_data = boot_params.hdr.setup_data;
while (pa_data) {
- u32 data_len, map_len;
+ u32 data_len, map_len, data_type;
map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK),
(u64)sizeof(struct setup_data));
data = early_memremap(pa_data, map_len);
data_len = data->len + sizeof(struct setup_data);
- if (data_len > map_len) {
- early_iounmap(data, map_len);
- data = early_memremap(pa_data, data_len);
- map_len = data_len;
- }
+ data_type = data->type;
+ pa_next = data->next;
+ early_iounmap(data, map_len);
- switch (data->type) {
+ switch (data_type) {
case SETUP_E820_EXT:
- parse_e820_ext(data);
+ parse_e820_ext(pa_data, data_len);
break;
case SETUP_DTB:
add_dtb(pa_data);
@@ -452,8 +450,7 @@ static void __init parse_setup_data(void)
default:
break;
}
- pa_data = data->next;
- early_iounmap(data, map_len);
+ pa_data = pa_next;
}
}
@@ -1040,8 +1037,6 @@ void __init setup_arch(char **cmdline_p)
/* max_low_pfn get updated here */
find_low_pfn_range();
#else
- num_physpages = max_pfn;
-
check_x2apic();
/* How many end-of-memory variables you have, grandma! */
@@ -1072,7 +1067,7 @@ void __init setup_arch(char **cmdline_p)
cleanup_highmap();
- memblock.current_limit = ISA_END_ADDRESS;
+ memblock_set_current_limit(ISA_END_ADDRESS);
memblock_x86_fill();
/*
@@ -1105,7 +1100,7 @@ void __init setup_arch(char **cmdline_p)
setup_real_mode();
- memblock.current_limit = get_max_mapped();
+ memblock_set_current_limit(get_max_mapped());
dma_contiguous_reserve(0);
/*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6956299..9e5de68 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -43,12 +43,6 @@
#include <asm/sigframe.h>
-#ifdef CONFIG_X86_32
-# define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF)
-#else
-# define FIX_EFLAGS __FIX_EFLAGS
-#endif
-
#define COPY(x) do { \
get_user_ex(regs->x, &sc->x); \
} while (0)
@@ -364,7 +358,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+ save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -429,7 +423,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
+ save_altstack_ex(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -496,7 +490,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
+ compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
if (ksig->ka.sa.sa_flags & SA_RESTORER) {
@@ -539,7 +533,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
* Do a signal return; undo the signal stack.
*/
#ifdef CONFIG_X86_32
-unsigned long sys_sigreturn(void)
+asmlinkage unsigned long sys_sigreturn(void)
{
struct pt_regs *regs = current_pt_regs();
struct sigframe __user *frame;
@@ -568,7 +562,7 @@ badframe:
}
#endif /* CONFIG_X86_32 */
-long sys_rt_sigreturn(void)
+asmlinkage long sys_rt_sigreturn(void)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
@@ -668,15 +662,17 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
if (!failed) {
/*
* Clear the direction flag as per the ABI for function entry.
- */
- regs->flags &= ~X86_EFLAGS_DF;
- /*
+ *
+ * Clear RF when entering the signal handler, because
+ * it might disable possible debug exception from the
+ * signal handler.
+ *
* Clear TF when entering the signal handler, but
* notify any tracer that was single-stepping it.
* The tracer may want to single-step inside the
* handler too.
*/
- regs->flags &= ~X86_EFLAGS_TF;
+ regs->flags &= ~(X86_EFLAGS_DF|X86_EFLAGS_RF|X86_EFLAGS_TF);
}
signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP));
}
@@ -732,7 +728,7 @@ static void do_signal(struct pt_regs *regs)
* notification of userspace execution resumption
* - triggered by the TIF_WORK_MASK flags
*/
-void
+__visible void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
user_exit();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 48d2b7d..7c3a5a6 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -30,6 +30,7 @@
#include <asm/proto.h>
#include <asm/apic.h>
#include <asm/nmi.h>
+#include <asm/trace/irq_vectors.h>
/*
* Some notes on x86 processor bugs affecting SMP operation:
*
@@ -249,32 +250,87 @@ finish:
/*
* Reschedule call back.
*/
-void smp_reschedule_interrupt(struct pt_regs *regs)
+static inline void __smp_reschedule_interrupt(void)
{
- ack_APIC_irq();
inc_irq_stat(irq_resched_count);
scheduler_ipi();
+}
+
+__visible void smp_reschedule_interrupt(struct pt_regs *regs)
+{
+ ack_APIC_irq();
+ __smp_reschedule_interrupt();
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
}
-void smp_call_function_interrupt(struct pt_regs *regs)
+static inline void smp_entering_irq(void)
{
ack_APIC_irq();
irq_enter();
+}
+
+__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs)
+{
+ /*
+ * Need to call irq_enter() before calling the trace point.
+ * __smp_reschedule_interrupt() calls irq_enter/exit() too (in
+ * scheduler_ipi(). This is OK, since those functions are allowed
+ * to nest.
+ */
+ smp_entering_irq();
+ trace_reschedule_entry(RESCHEDULE_VECTOR);
+ __smp_reschedule_interrupt();
+ trace_reschedule_exit(RESCHEDULE_VECTOR);
+ exiting_irq();
+ /*
+ * KVM uses this interrupt to force a cpu out of guest mode
+ */
+}
+
+static inline void __smp_call_function_interrupt(void)
+{
generic_smp_call_function_interrupt();
inc_irq_stat(irq_call_count);
- irq_exit();
}
-void smp_call_function_single_interrupt(struct pt_regs *regs)
+__visible void smp_call_function_interrupt(struct pt_regs *regs)
+{
+ smp_entering_irq();
+ __smp_call_function_interrupt();
+ exiting_irq();
+}
+
+__visible void smp_trace_call_function_interrupt(struct pt_regs *regs)
+{
+ smp_entering_irq();
+ trace_call_function_entry(CALL_FUNCTION_VECTOR);
+ __smp_call_function_interrupt();
+ trace_call_function_exit(CALL_FUNCTION_VECTOR);
+ exiting_irq();
+}
+
+static inline void __smp_call_function_single_interrupt(void)
{
- ack_APIC_irq();
- irq_enter();
generic_smp_call_function_single_interrupt();
inc_irq_stat(irq_call_count);
- irq_exit();
+}
+
+__visible void smp_call_function_single_interrupt(struct pt_regs *regs)
+{
+ smp_entering_irq();
+ __smp_call_function_single_interrupt();
+ exiting_irq();
+}
+
+__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs)
+{
+ smp_entering_irq();
+ trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
+ __smp_call_function_single_interrupt();
+ trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
+ exiting_irq();
}
static int __init nonmi_ipi_setup(char *str)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9c73b51..6cacab6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -130,7 +130,7 @@ atomic_t init_deasserted;
* Report back to the Boot Processor during boot time or to the caller processor
* during CPU online.
*/
-static void __cpuinit smp_callin(void)
+static void smp_callin(void)
{
int cpuid, phys_id;
unsigned long timeout;
@@ -237,7 +237,7 @@ static int enable_start_cpu0;
/*
* Activate a secondary processor.
*/
-notrace static void __cpuinit start_secondary(void *unused)
+static void notrace start_secondary(void *unused)
{
/*
* Don't put *anything* before cpu_init(), SMP booting is too
@@ -300,7 +300,7 @@ void __init smp_store_boot_cpu_info(void)
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
*/
-void __cpuinit smp_store_cpu_info(int id)
+void smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
@@ -313,7 +313,7 @@ void __cpuinit smp_store_cpu_info(int id)
identify_secondary_cpu(c);
}
-static bool __cpuinit
+static bool
topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
{
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
@@ -330,7 +330,7 @@ do { \
cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
} while (0)
-static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if (cpu_has_topoext) {
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
@@ -348,7 +348,7 @@ static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
-static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
@@ -359,7 +359,7 @@ static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
-static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if (c->phys_proc_id == o->phys_proc_id) {
if (cpu_has(c, X86_FEATURE_AMD_DCM))
@@ -370,17 +370,17 @@ static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
-void __cpuinit set_cpu_sibling_map(int cpu)
+void set_cpu_sibling_map(int cpu)
{
- bool has_mc = boot_cpu_data.x86_max_cores > 1;
bool has_smt = smp_num_siblings > 1;
+ bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct cpuinfo_x86 *o;
int i;
cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
- if (!has_smt && !has_mc) {
+ if (!has_mp) {
cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
cpumask_set_cpu(cpu, cpu_core_mask(cpu));
@@ -394,7 +394,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
if ((i == cpu) || (has_smt && match_smt(c, o)))
link_mask(sibling, cpu, i);
- if ((i == cpu) || (has_mc && match_llc(c, o)))
+ if ((i == cpu) || (has_mp && match_llc(c, o)))
link_mask(llc_shared, cpu, i);
}
@@ -406,7 +406,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
- if ((i == cpu) || (has_mc && match_mc(c, o))) {
+ if ((i == cpu) || (has_mp && match_mc(c, o))) {
link_mask(core, cpu, i);
/*
@@ -499,7 +499,7 @@ void __inquire_remote_apic(int apicid)
* INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
* won't ... remember to clear down the APIC, etc later.
*/
-int __cpuinit
+int
wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
@@ -533,7 +533,7 @@ wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
return (send_status | accept_status);
}
-static int __cpuinit
+static int
wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
@@ -649,10 +649,11 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
}
/* reduce the number of lines printed when booting a large cpu count system */
-static void __cpuinit announce_cpu(int cpu, int apicid)
+static void announce_cpu(int cpu, int apicid)
{
static int current_node = -1;
int node = early_cpu_to_node(cpu);
+ int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS);
if (system_state == SYSTEM_BOOTING) {
if (node != current_node) {
@@ -661,7 +662,7 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
current_node = node;
pr_info("Booting Node %3d, Processors ", node);
}
- pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
+ pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : "");
return;
} else
pr_info("Booting Node %d Processor %d APIC 0x%x\n",
@@ -691,7 +692,7 @@ static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
* We'll change this code in the future to wake up hard offlined CPU0 if
* real platform and request are available.
*/
-static int __cpuinit
+static int
wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
int *cpu0_nmi_registered)
{
@@ -731,7 +732,7 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
* Returns zero if CPU booted OK, else error code from
* ->wakeup_secondary_cpu.
*/
-static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
+static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
volatile u32 *trampoline_status =
(volatile u32 *) __va(real_mode_header->trampoline_status);
@@ -872,7 +873,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
return boot_error;
}
-int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
int apicid = apic->cpu_present_to_apicid(cpu);
unsigned long flags;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index dbded5a..30277e2 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
*begin = new_begin;
}
} else {
- *begin = TASK_UNMAPPED_BASE;
+ *begin = current->mm->mmap_legacy_base;
*end = TASK_SIZE;
}
}
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
index 147fcd4..e9bcd57 100644
--- a/arch/x86/kernel/syscall_32.c
+++ b/arch/x86/kernel/syscall_32.c
@@ -15,7 +15,7 @@ typedef asmlinkage void (*sys_call_ptr_t)(void);
extern asmlinkage void sys_ni_syscall(void);
-const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 5c7f8c2..4ac730b 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -4,6 +4,7 @@
#include <linux/sys.h>
#include <linux/cache.h>
#include <asm/asm-offsets.h>
+#include <asm/syscall.h>
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
@@ -19,11 +20,9 @@
#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
-typedef void (*sys_call_ptr_t)(void);
-
extern void sys_ni_syscall(void);
-const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
diff --git a/arch/x86/kernel/sysfb.c b/arch/x86/kernel/sysfb.c
new file mode 100644
index 0000000..193ec2c
--- /dev/null
+++ b/arch/x86/kernel/sysfb.c
@@ -0,0 +1,74 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * Simple-Framebuffer support for x86 systems
+ * Create a platform-device for any available boot framebuffer. The
+ * simple-framebuffer platform device is already available on DT systems, so
+ * this module parses the global "screen_info" object and creates a suitable
+ * platform device compatible with the "simple-framebuffer" DT object. If
+ * the framebuffer is incompatible, we instead create a legacy
+ * "vesa-framebuffer", "efi-framebuffer" or "platform-framebuffer" device and
+ * pass the screen_info as platform_data. This allows legacy drivers
+ * to pick these devices up without messing with simple-framebuffer drivers.
+ * The global "screen_info" is still valid at all times.
+ *
+ * If CONFIG_X86_SYSFB is not selected, we never register "simple-framebuffer"
+ * platform devices, but only use legacy framebuffer devices for
+ * backwards compatibility.
+ *
+ * TODO: We set the dev_id field of all platform-devices to 0. This allows
+ * other x86 OF/DT parsers to create such devices, too. However, they must
+ * start at offset 1 for this to work.
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/platform_data/simplefb.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
+#include <asm/sysfb.h>
+
+static __init int sysfb_init(void)
+{
+ struct screen_info *si = &screen_info;
+ struct simplefb_platform_data mode;
+ struct platform_device *pd;
+ const char *name;
+ bool compatible;
+ int ret;
+
+ sysfb_apply_efi_quirks();
+
+ /* try to create a simple-framebuffer device */
+ compatible = parse_mode(si, &mode);
+ if (compatible) {
+ ret = create_simplefb(si, &mode);
+ if (!ret)
+ return 0;
+ }
+
+ /* if the FB is incompatible, create a legacy framebuffer device */
+ if (si->orig_video_isVGA == VIDEO_TYPE_EFI)
+ name = "efi-framebuffer";
+ else if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
+ name = "vesa-framebuffer";
+ else
+ name = "platform-framebuffer";
+
+ pd = platform_device_register_resndata(NULL, name, 0,
+ NULL, 0, si, sizeof(*si));
+ return IS_ERR(pd) ? PTR_ERR(pd) : 0;
+}
+
+/* must execute after PCI subsystem for EFI quirks */
+device_initcall(sysfb_init);
diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
new file mode 100644
index 0000000..b285d4e
--- /dev/null
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -0,0 +1,214 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * EFI Quirks Copyright (c) 2006 Edgar Hucek <gimli@dark-green.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * EFI Quirks
+ * Several EFI systems do not correctly advertise their boot framebuffers.
+ * Hence, we use this static table of known broken machines and fix up the
+ * information so framebuffer drivers can load corectly.
+ */
+
+#include <linux/dmi.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pci.h>
+#include <linux/screen_info.h>
+#include <video/vga.h>
+#include <asm/sysfb.h>
+
+enum {
+ OVERRIDE_NONE = 0x0,
+ OVERRIDE_BASE = 0x1,
+ OVERRIDE_STRIDE = 0x2,
+ OVERRIDE_HEIGHT = 0x4,
+ OVERRIDE_WIDTH = 0x8,
+};
+
+struct efifb_dmi_info efifb_dmi_list[] = {
+ [M_I17] = { "i17", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_I20] = { "i20", 0x80010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE }, /* guess */
+ [M_I20_SR] = { "imac7", 0x40010000, 1728 * 4, 1680, 1050, OVERRIDE_NONE },
+ [M_I24] = { "i24", 0x80010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE }, /* guess */
+ [M_I24_8_1] = { "imac8", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_I24_10_1] = { "imac10", 0xc0010000, 2048 * 4, 1920, 1080, OVERRIDE_NONE },
+ [M_I27_11_1] = { "imac11", 0xc0010000, 2560 * 4, 2560, 1440, OVERRIDE_NONE },
+ [M_MINI]= { "mini", 0x80000000, 2048 * 4, 1024, 768, OVERRIDE_NONE },
+ [M_MINI_3_1] = { "mini31", 0x40010000, 1024 * 4, 1024, 768, OVERRIDE_NONE },
+ [M_MINI_4_1] = { "mini41", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_MB] = { "macbook", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MB_5_1] = { "macbook51", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MB_6_1] = { "macbook61", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MB_7_1] = { "macbook71", 0x80010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MBA] = { "mba", 0x80000000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ /* 11" Macbook Air 3,1 passes the wrong stride */
+ [M_MBA_3] = { "mba3", 0, 2048 * 4, 0, 0, OVERRIDE_STRIDE },
+ [M_MBP] = { "mbp", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_2] = { "mbp2", 0, 0, 0, 0, OVERRIDE_NONE }, /* placeholder */
+ [M_MBP_2_2] = { "mbp22", 0x80010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_SR] = { "mbp3", 0x80030000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_4] = { "mbp4", 0xc0060000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_MBP_5_1] = { "mbp51", 0xc0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_5_2] = { "mbp52", 0xc0010000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_MBP_5_3] = { "mbp53", 0xd0010000, 2048 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_MBP_6_1] = { "mbp61", 0x90030000, 2048 * 4, 1920, 1200, OVERRIDE_NONE },
+ [M_MBP_6_2] = { "mbp62", 0x90030000, 2048 * 4, 1680, 1050, OVERRIDE_NONE },
+ [M_MBP_7_1] = { "mbp71", 0xc0010000, 2048 * 4, 1280, 800, OVERRIDE_NONE },
+ [M_MBP_8_2] = { "mbp82", 0x90010000, 1472 * 4, 1440, 900, OVERRIDE_NONE },
+ [M_UNKNOWN] = { NULL, 0, 0, 0, 0, OVERRIDE_NONE }
+};
+
+#define choose_value(dmivalue, fwvalue, field, flags) ({ \
+ typeof(fwvalue) _ret_ = fwvalue; \
+ if ((flags) & (field)) \
+ _ret_ = dmivalue; \
+ else if ((fwvalue) == 0) \
+ _ret_ = dmivalue; \
+ _ret_; \
+ })
+
+static int __init efifb_set_system(const struct dmi_system_id *id)
+{
+ struct efifb_dmi_info *info = id->driver_data;
+
+ if (info->base == 0 && info->height == 0 && info->width == 0 &&
+ info->stride == 0)
+ return 0;
+
+ /* Trust the bootloader over the DMI tables */
+ if (screen_info.lfb_base == 0) {
+#if defined(CONFIG_PCI)
+ struct pci_dev *dev = NULL;
+ int found_bar = 0;
+#endif
+ if (info->base) {
+ screen_info.lfb_base = choose_value(info->base,
+ screen_info.lfb_base, OVERRIDE_BASE,
+ info->flags);
+
+#if defined(CONFIG_PCI)
+ /* make sure that the address in the table is actually
+ * on a VGA device's PCI BAR */
+
+ for_each_pci_dev(dev) {
+ int i;
+ if ((dev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+ continue;
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ resource_size_t start, end;
+
+ start = pci_resource_start(dev, i);
+ if (start == 0)
+ break;
+ end = pci_resource_end(dev, i);
+ if (screen_info.lfb_base >= start &&
+ screen_info.lfb_base < end) {
+ found_bar = 1;
+ }
+ }
+ }
+ if (!found_bar)
+ screen_info.lfb_base = 0;
+#endif
+ }
+ }
+ if (screen_info.lfb_base) {
+ screen_info.lfb_linelength = choose_value(info->stride,
+ screen_info.lfb_linelength, OVERRIDE_STRIDE,
+ info->flags);
+ screen_info.lfb_width = choose_value(info->width,
+ screen_info.lfb_width, OVERRIDE_WIDTH,
+ info->flags);
+ screen_info.lfb_height = choose_value(info->height,
+ screen_info.lfb_height, OVERRIDE_HEIGHT,
+ info->flags);
+ if (screen_info.orig_video_isVGA == 0)
+ screen_info.orig_video_isVGA = VIDEO_TYPE_EFI;
+ } else {
+ screen_info.lfb_linelength = 0;
+ screen_info.lfb_width = 0;
+ screen_info.lfb_height = 0;
+ screen_info.orig_video_isVGA = 0;
+ return 0;
+ }
+
+ printk(KERN_INFO "efifb: dmi detected %s - framebuffer at 0x%08x "
+ "(%dx%d, stride %d)\n", id->ident,
+ screen_info.lfb_base, screen_info.lfb_width,
+ screen_info.lfb_height, screen_info.lfb_linelength);
+
+ return 1;
+}
+
+#define EFIFB_DMI_SYSTEM_ID(vendor, name, enumid) \
+ { \
+ efifb_set_system, \
+ name, \
+ { \
+ DMI_MATCH(DMI_BIOS_VENDOR, vendor), \
+ DMI_MATCH(DMI_PRODUCT_NAME, name) \
+ }, \
+ &efifb_dmi_list[enumid] \
+ }
+
+static const struct dmi_system_id efifb_dmi_system_table[] __initconst = {
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac4,1", M_I17),
+ /* At least one of these two will be right; maybe both? */
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac5,1", M_I20),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac5,1", M_I20),
+ /* At least one of these two will be right; maybe both? */
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac6,1", M_I24),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac6,1", M_I24),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac7,1", M_I20_SR),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac8,1", M_I24_8_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac10,1", M_I24_10_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac11,1", M_I27_11_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "Macmini1,1", M_MINI),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini3,1", M_MINI_3_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini4,1", M_MINI_4_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook1,1", M_MB),
+ /* At least one of these two will be right; maybe both? */
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook2,1", M_MB),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook2,1", M_MB),
+ /* At least one of these two will be right; maybe both? */
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook3,1", M_MB),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook3,1", M_MB),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook4,1", M_MB),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook5,1", M_MB_5_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook6,1", M_MB_6_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook7,1", M_MB_7_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir1,1", M_MBA),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir3,1", M_MBA_3),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro1,1", M_MBP),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,1", M_MBP_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,2", M_MBP_2_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro2,1", M_MBP_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro3,1", M_MBP_SR),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro3,1", M_MBP_SR),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro4,1", M_MBP_4),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,1", M_MBP_5_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,2", M_MBP_5_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,3", M_MBP_5_3),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,1", M_MBP_6_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,2", M_MBP_6_2),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro7,1", M_MBP_7_1),
+ EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro8,2", M_MBP_8_2),
+ {},
+};
+
+__init void sysfb_apply_efi_quirks(void)
+{
+ if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
+ !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
+ dmi_check_system(efifb_dmi_system_table);
+}
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c
new file mode 100644
index 0000000..86179d4
--- /dev/null
+++ b/arch/x86/kernel/sysfb_simplefb.c
@@ -0,0 +1,95 @@
+/*
+ * Generic System Framebuffers on x86
+ * Copyright (c) 2012-2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+/*
+ * simple-framebuffer probing
+ * Try to convert "screen_info" into a "simple-framebuffer" compatible mode.
+ * If the mode is incompatible, we return "false" and let the caller create
+ * legacy nodes instead.
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/platform_data/simplefb.h>
+#include <linux/platform_device.h>
+#include <linux/screen_info.h>
+#include <asm/sysfb.h>
+
+static const char simplefb_resname[] = "BOOTFB";
+static const struct simplefb_format formats[] = SIMPLEFB_FORMATS;
+
+/* try parsing x86 screen_info into a simple-framebuffer mode struct */
+__init bool parse_mode(const struct screen_info *si,
+ struct simplefb_platform_data *mode)
+{
+ const struct simplefb_format *f;
+ __u8 type;
+ unsigned int i;
+
+ type = si->orig_video_isVGA;
+ if (type != VIDEO_TYPE_VLFB && type != VIDEO_TYPE_EFI)
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(formats); ++i) {
+ f = &formats[i];
+ if (si->lfb_depth == f->bits_per_pixel &&
+ si->red_size == f->red.length &&
+ si->red_pos == f->red.offset &&
+ si->green_size == f->green.length &&
+ si->green_pos == f->green.offset &&
+ si->blue_size == f->blue.length &&
+ si->blue_pos == f->blue.offset &&
+ si->rsvd_size == f->transp.length &&
+ si->rsvd_pos == f->transp.offset) {
+ mode->format = f->name;
+ mode->width = si->lfb_width;
+ mode->height = si->lfb_height;
+ mode->stride = si->lfb_linelength;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+__init int create_simplefb(const struct screen_info *si,
+ const struct simplefb_platform_data *mode)
+{
+ struct platform_device *pd;
+ struct resource res;
+ unsigned long len;
+
+ /* don't use lfb_size as it may contain the whole VMEM instead of only
+ * the part that is occupied by the framebuffer */
+ len = mode->height * mode->stride;
+ len = PAGE_ALIGN(len);
+ if (len > (u64)si->lfb_size << 16) {
+ printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
+ return -EINVAL;
+ }
+
+ /* setup IORESOURCE_MEM as framebuffer memory */
+ memset(&res, 0, sizeof(res));
+ res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res.name = simplefb_resname;
+ res.start = si->lfb_base;
+ res.end = si->lfb_base + len - 1;
+ if (res.end <= res.start)
+ return -EINVAL;
+
+ pd = platform_device_register_resndata(NULL, "simple-framebuffer", 0,
+ &res, 1, mode, sizeof(*mode));
+ if (IS_ERR(pd))
+ return PTR_ERR(pd);
+
+ return 0;
+}
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index f84fe00..91a4496 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -31,6 +31,7 @@
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/tboot.h>
+#include <linux/debugfs.h>
#include <asm/realmode.h>
#include <asm/processor.h>
@@ -300,6 +301,15 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
return 0;
}
+static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b)
+{
+ if (!tboot_enabled())
+ return 0;
+
+ pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
+ return -ENODEV;
+}
+
static atomic_t ap_wfs_count;
static int tboot_wait_for_aps(int num_aps)
@@ -319,8 +329,8 @@ static int tboot_wait_for_aps(int num_aps)
return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
}
-static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
- unsigned long action, void *hcpu)
+static int tboot_cpu_callback(struct notifier_block *nfb, unsigned long action,
+ void *hcpu)
{
switch (action) {
case CPU_DYING:
@@ -333,11 +343,78 @@ static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block tboot_cpu_notifier __cpuinitdata =
+static struct notifier_block tboot_cpu_notifier =
{
.notifier_call = tboot_cpu_callback,
};
+#ifdef CONFIG_DEBUG_FS
+
+#define TBOOT_LOG_UUID { 0x26, 0x25, 0x19, 0xc0, 0x30, 0x6b, 0xb4, 0x4d, \
+ 0x4c, 0x84, 0xa3, 0xe9, 0x53, 0xb8, 0x81, 0x74 }
+
+#define TBOOT_SERIAL_LOG_ADDR 0x60000
+#define TBOOT_SERIAL_LOG_SIZE 0x08000
+#define LOG_MAX_SIZE_OFF 16
+#define LOG_BUF_OFF 24
+
+static uint8_t tboot_log_uuid[16] = TBOOT_LOG_UUID;
+
+static ssize_t tboot_log_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
+{
+ void __iomem *log_base;
+ u8 log_uuid[16];
+ u32 max_size;
+ void *kbuf;
+ int ret = -EFAULT;
+
+ log_base = ioremap_nocache(TBOOT_SERIAL_LOG_ADDR, TBOOT_SERIAL_LOG_SIZE);
+ if (!log_base)
+ return ret;
+
+ memcpy_fromio(log_uuid, log_base, sizeof(log_uuid));
+ if (memcmp(&tboot_log_uuid, log_uuid, sizeof(log_uuid)))
+ goto err_iounmap;
+
+ max_size = readl(log_base + LOG_MAX_SIZE_OFF);
+ if (*ppos >= max_size) {
+ ret = 0;
+ goto err_iounmap;
+ }
+
+ if (*ppos + count > max_size)
+ count = max_size - *ppos;
+
+ kbuf = kmalloc(count, GFP_KERNEL);
+ if (!kbuf) {
+ ret = -ENOMEM;
+ goto err_iounmap;
+ }
+
+ memcpy_fromio(kbuf, log_base + LOG_BUF_OFF + *ppos, count);
+ if (copy_to_user(user_buf, kbuf, count))
+ goto err_kfree;
+
+ *ppos += count;
+
+ ret = count;
+
+err_kfree:
+ kfree(kbuf);
+
+err_iounmap:
+ iounmap(log_base);
+
+ return ret;
+}
+
+static const struct file_operations tboot_log_fops = {
+ .read = tboot_log_read,
+ .llseek = default_llseek,
+};
+
+#endif /* CONFIG_DEBUG_FS */
+
static __init int tboot_late_init(void)
{
if (!tboot_enabled())
@@ -348,7 +425,13 @@ static __init int tboot_late_init(void)
atomic_set(&ap_wfs_count, 0);
register_hotcpu_notifier(&tboot_cpu_notifier);
+#ifdef CONFIG_DEBUG_FS
+ debugfs_create_file("tboot_log", S_IRUSR,
+ arch_debugfs_dir, NULL, &tboot_log_fops);
+#endif
+
acpi_os_set_prepare_sleep(&tboot_sleep);
+ acpi_os_set_prepare_extended_sleep(&tboot_extended_sleep);
return 0;
}
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
new file mode 100644
index 0000000..1c113db
--- /dev/null
+++ b/arch/x86/kernel/tracepoint.c
@@ -0,0 +1,59 @@
+/*
+ * Code for supporting irq vector tracepoints.
+ *
+ * Copyright (C) 2013 Seiji Aguchi <seiji.aguchi@hds.com>
+ *
+ */
+#include <asm/hw_irq.h>
+#include <asm/desc.h>
+#include <linux/atomic.h>
+
+atomic_t trace_idt_ctr = ATOMIC_INIT(0);
+struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+ (unsigned long) trace_idt_table };
+
+/* No need to be aligned, but done to keep all IDTs defined the same way. */
+gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+
+static int trace_irq_vector_refcount;
+static DEFINE_MUTEX(irq_vector_mutex);
+
+static void set_trace_idt_ctr(int val)
+{
+ atomic_set(&trace_idt_ctr, val);
+ /* Ensure the trace_idt_ctr is set before sending IPI */
+ wmb();
+}
+
+static void switch_idt(void *arg)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ load_current_idt();
+ local_irq_restore(flags);
+}
+
+void trace_irq_vector_regfunc(void)
+{
+ mutex_lock(&irq_vector_mutex);
+ if (!trace_irq_vector_refcount) {
+ set_trace_idt_ctr(1);
+ smp_call_function(switch_idt, NULL, 0);
+ switch_idt(NULL);
+ }
+ trace_irq_vector_refcount++;
+ mutex_unlock(&irq_vector_mutex);
+}
+
+void trace_irq_vector_unregfunc(void)
+{
+ mutex_lock(&irq_vector_mutex);
+ trace_irq_vector_refcount--;
+ if (!trace_irq_vector_refcount) {
+ set_trace_idt_ctr(0);
+ smp_call_function(switch_idt, NULL, 0);
+ switch_idt(NULL);
+ }
+ mutex_unlock(&irq_vector_mutex);
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 772e2a8..8c8093b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -58,24 +58,25 @@
#include <asm/mce.h>
#include <asm/fixmap.h>
#include <asm/mach_traps.h>
+#include <asm/alternative.h>
#ifdef CONFIG_X86_64
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
+
+/* No need to be aligned, but done to keep all IDTs defined the same way. */
+gate_desc debug_idt_table[NR_VECTORS] __page_aligned_bss;
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
asmlinkage int system_call(void);
-
-/*
- * The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.
- */
-gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
#endif
+/* Must be page-aligned because the real IDT is used in a fixmap. */
+gate_desc idt_table[NR_VECTORS] __page_aligned_bss;
+
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
@@ -254,6 +255,9 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_DF;
+#ifdef CONFIG_DOUBLEFAULT
+ df_debug(regs, error_code);
+#endif
/*
* This is always a kernel trap and never fixable (and thus must
* never return).
@@ -324,6 +328,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
ftrace_int3_handler(regs))
return;
#endif
+ if (poke_int3_handler(regs))
+ return;
+
prev_state = exception_enter();
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
@@ -437,7 +444,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
/* Store the virtualized DR6 value */
tsk->thread.debugreg6 = dr6;
- if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+ if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code,
SIGTRAP) == NOTIFY_STOP)
goto exit;
@@ -785,7 +792,7 @@ void __init trap_init(void)
x86_init.irqs.trap_init();
#ifdef CONFIG_X86_64
- memcpy(&nmi_idt_table, &idt_table, IDT_ENTRIES * 16);
+ memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
set_nmi_gate(X86_TRAP_DB, &debug);
set_nmi_gate(X86_TRAP_BP, &int3);
#endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 098b3cf..930e5d4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -89,6 +89,12 @@ int check_tsc_unstable(void)
}
EXPORT_SYMBOL_GPL(check_tsc_unstable);
+int check_tsc_disabled(void)
+{
+ return tsc_disabled;
+}
+EXPORT_SYMBOL_GPL(check_tsc_disabled);
+
#ifdef CONFIG_X86_TSC
int __init notsc_setup(char *str)
{
@@ -824,7 +830,7 @@ static void __init check_system_tsc_reliable(void)
* Make an educated guess if the TSC is trustworthy and synchronized
* over all CPUs.
*/
-__cpuinit int unsynchronized_tsc(void)
+int unsynchronized_tsc(void)
{
if (!cpu_has_tsc || tsc_unstable)
return 1;
@@ -1020,7 +1026,7 @@ void __init tsc_init(void)
* been calibrated. This assumes that CONSTANT_TSC applies to all
* cpus in the socket - this should be a safe assumption.
*/
-unsigned long __cpuinit calibrate_delay_is_known(void)
+unsigned long calibrate_delay_is_known(void)
{
int i, cpu = smp_processor_id();
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index fc25e60..adfdf56 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -25,24 +25,24 @@
* Entry/exit counters that make sure that both CPUs
* run the measurement code at once:
*/
-static __cpuinitdata atomic_t start_count;
-static __cpuinitdata atomic_t stop_count;
+static atomic_t start_count;
+static atomic_t stop_count;
/*
* We use a raw spinlock in this exceptional case, because
* we want to have the fastest, inlined, non-debug version
* of a critical section, to be able to prove TSC time-warps:
*/
-static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-static __cpuinitdata cycles_t last_tsc;
-static __cpuinitdata cycles_t max_warp;
-static __cpuinitdata int nr_warps;
+static cycles_t last_tsc;
+static cycles_t max_warp;
+static int nr_warps;
/*
* TSC-warp measurement loop running on both CPUs:
*/
-static __cpuinit void check_tsc_warp(unsigned int timeout)
+static void check_tsc_warp(unsigned int timeout)
{
cycles_t start, now, prev, end;
int i;
@@ -121,7 +121,7 @@ static inline unsigned int loop_timeout(int cpu)
* Source CPU calls into this - it waits for the freshly booted
* target CPU to arrive and then starts the measurement:
*/
-void __cpuinit check_tsc_sync_source(int cpu)
+void check_tsc_sync_source(int cpu)
{
int cpus = 2;
@@ -187,7 +187,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
/*
* Freshly booted CPUs call into this:
*/
-void __cpuinit check_tsc_sync_target(void)
+void check_tsc_sync_target(void)
{
int cpus = 2;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 9a907a6..1f96f93 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -331,7 +331,7 @@ sigsegv:
* Assume __initcall executes before all user space. Hopefully kmod
* doesn't violate that. We'll find out if it does.
*/
-static void __cpuinit vsyscall_set_cpu(int cpu)
+static void vsyscall_set_cpu(int cpu)
{
unsigned long d;
unsigned long node = 0;
@@ -353,13 +353,13 @@ static void __cpuinit vsyscall_set_cpu(int cpu)
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
-static void __cpuinit cpu_vsyscall_init(void *arg)
+static void cpu_vsyscall_init(void *arg)
{
/* preemption should be already off */
vsyscall_set_cpu(raw_smp_processor_id());
}
-static int __cpuinit
+static int
cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
{
long cpu = (long)arg;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 45a14db..8ce0072 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -25,7 +25,7 @@
#include <asm/iommu.h>
#include <asm/mach_traps.h>
-void __cpuinit x86_init_noop(void) { }
+void x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
int __init iommu_init_noop(void) { return 0; }
void iommu_shutdown_noop(void) { }
@@ -85,7 +85,7 @@ struct x86_init_ops x86_init __initdata = {
},
};
-struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
+struct x86_cpuinit_ops x86_cpuinit = {
.early_percpu_clock_init = x86_init_noop,
.setup_percpu_clockev = setup_secondary_APIC_clock,
};
@@ -107,6 +107,8 @@ struct x86_platform_ops x86_platform = {
};
EXPORT_SYMBOL_GPL(x86_platform);
+
+#if defined(CONFIG_PCI_MSI)
struct x86_msi_ops x86_msi = {
.setup_msi_irqs = native_setup_msi_irqs,
.compose_msi_msg = native_compose_msi_msg,
@@ -116,6 +118,28 @@ struct x86_msi_ops x86_msi = {
.setup_hpet_msi = default_setup_hpet_msi,
};
+/* MSI arch specific hooks */
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ return x86_msi.setup_msi_irqs(dev, nvec, type);
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *dev)
+{
+ x86_msi.teardown_msi_irqs(dev);
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+ x86_msi.teardown_msi_irq(irq);
+}
+
+void arch_restore_msi_irqs(struct pci_dev *dev, int irq)
+{
+ x86_msi.restore_msi_irqs(dev, irq);
+}
+#endif
+
struct x86_io_apic_ops x86_io_apic_ops = {
.init = native_io_apic_init_mappings,
.read = native_io_apic_read,
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index ada87a3..422fd82 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -243,7 +243,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size)
if (!access_ok(VERIFY_WRITE, buf, size))
return -EACCES;
- if (!HAVE_HWFP)
+ if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(current, NULL, 0,
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_ia32 __user *) buf) ? -1 : 1;
@@ -350,11 +350,10 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
if (!used_math() && init_fpu(tsk))
return -1;
- if (!HAVE_HWFP) {
+ if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
NULL, buf) != 0;
- }
if (use_xsave()) {
struct _fpx_sw_bytes fx_sw_user;
@@ -574,7 +573,7 @@ static void __init xstate_enable_boot_cpu(void)
* This is somewhat obfuscated due to the lack of powerful enough
* overrides for the section checks.
*/
-void __cpuinit xsave_init(void)
+void xsave_init(void)
{
static __refdata void (*next_func)(void) = xstate_enable_boot_cpu;
void (*this_func)(void);
@@ -595,7 +594,7 @@ static inline void __init eager_fpu_init_bp(void)
setup_init_fpu_buf();
}
-void __cpuinit eager_fpu_init(void)
+void eager_fpu_init(void)
{
static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
OpenPOWER on IntegriCloud