summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-18 22:50:34 +0200
committerIngo Molnar <mingo@elte.hu>2008-07-18 22:50:34 +0200
commita208f37a465e222218974ab20a31b42b7b4893b2 (patch)
tree77c6acdd4be32024330a14f2618b814126ce7a20 /arch/x86
parent511d9d34183662aada3890883e860b151d707e22 (diff)
parent5b664cb235e97afbf34db9c4d77f08ebd725335e (diff)
downloadop-kernel-dev-a208f37a465e222218974ab20a31b42b7b4893b2.zip
op-kernel-dev-a208f37a465e222218974ab20a31b42b7b4893b2.tar.gz
Merge branch 'linus' into x86/x2apic
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig25
-rw-r--r--arch/x86/Kconfig.debug27
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/acpi/boot.c1
-rw-r--r--arch/x86/kernel/acpi/processor.c6
-rw-r--r--arch/x86/kernel/acpi/sleep.c32
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/apic_32.c4
-rw-r--r--arch/x86/kernel/apm_32.c12
-rw-r--r--arch/x86/kernel/cpu/amd_64.c1
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c10
-rw-r--r--arch/x86/kernel/cpu/common_64.c14
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_64.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/cpuid.c27
-rw-r--r--arch/x86/kernel/e820.c19
-rw-r--r--arch/x86/kernel/early-quirks.c26
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/entry_32.S72
-rw-r--r--arch/x86/kernel/entry_64.S109
-rw-r--r--arch/x86/kernel/ftrace.c141
-rw-r--r--arch/x86/kernel/hpet.c20
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/io_apic_32.c12
-rw-r--r--arch/x86/kernel/io_apic_64.c12
-rw-r--r--arch/x86/kernel/irqinit_64.c4
-rw-r--r--arch/x86/kernel/ldt.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/microcode.c6
-rw-r--r--arch/x86/kernel/msr.c16
-rw-r--r--arch/x86/kernel/nmi.c6
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/pci-gart_64.c8
-rw-r--r--arch/x86/kernel/process.c30
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup.c13
-rw-r--r--arch/x86/kernel/setup_percpu.c8
-rw-r--r--arch/x86/kernel/smp.c158
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/smpcommon.c56
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/traps_64.c25
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S10
-rw-r--r--arch/x86/kernel/vsyscall_64.c7
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
-rw-r--r--arch/x86/kvm/vmx.c4
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/msr-on-cpu.c8
-rw-r--r--arch/x86/lib/thunk_32.S47
-rw-r--r--arch/x86/lib/thunk_64.S19
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c98
-rw-r--r--arch/x86/mm/Makefile5
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c26
-rw-r--r--arch/x86/mm/ioremap.c11
-rw-r--r--arch/x86/mm/kmmio.c510
-rw-r--r--arch/x86/mm/mmio-mod.c515
-rw-r--r--arch/x86/mm/pageattr.c9
-rw-r--r--arch/x86/mm/pat.c4
-rw-r--r--arch/x86/mm/pf_in.c489
-rw-r--r--arch/x86/mm/pf_in.h39
-rw-r--r--arch/x86/mm/srat_32.c3
-rw-r--r--arch/x86/mm/testmmiotrace.c71
-rw-r--r--arch/x86/oprofile/nmi_int.c10
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/common.c30
-rw-r--r--arch/x86/pci/early.c60
-rw-r--r--arch/x86/pci/i386.c4
-rw-r--r--arch/x86/pci/irq.c120
-rw-r--r--arch/x86/pci/pci.h1
-rw-r--r--arch/x86/vdso/vclock_gettime.c15
-rw-r--r--arch/x86/vdso/vgetcpu.c3
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c2
-rw-r--r--arch/x86/xen/smp.c135
-rw-r--r--arch/x86/xen/xen-ops.h9
89 files changed, 2669 insertions, 651 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0bf8391..baca554 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,8 @@ config X86
select HAVE_OPROFILE
select HAVE_KPROBES
select HAVE_KRETPROBES
+ select HAVE_DYNAMIC_FTRACE
+ select HAVE_FTRACE
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
select HAVE_ARCH_KGDB if !X86_VOYAGER
@@ -168,6 +170,7 @@ config GENERIC_PENDING_IRQ
config X86_SMP
bool
depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
+ select USE_GENERIC_SMP_HELPERS
default y
config X86_32_SMP
@@ -445,7 +448,6 @@ config PARAVIRT_DEBUG
config MEMTEST
bool "Memtest"
depends on X86_64
- default y
help
This option adds a kernel parameter 'memtest', which allows memtest
to be set.
@@ -453,7 +455,7 @@ config MEMTEST
memtest=1, mean do 1 test pattern;
...
memtest=4, mean do 4 test patterns.
- If you are unsure how to answer this question, answer Y.
+ If you are unsure how to answer this question, answer N.
config X86_SUMMIT_NUMA
def_bool y
@@ -1133,21 +1135,18 @@ config MTRR
See <file:Documentation/mtrr.txt> for more information.
config MTRR_SANITIZER
- def_bool y
+ bool
prompt "MTRR cleanup support"
depends on MTRR
help
- Convert MTRR layout from continuous to discrete, so some X driver
- could add WB entries.
+ Convert MTRR layout from continuous to discrete, so X drivers can
+ add writeback entries.
- Say N here if you see bootup problems (boot crash, boot hang,
- spontaneous reboots).
+ Can be disabled with disable_mtrr_cleanup on the kernel command line.
+ The largest mtrr entry size for a continous block can be set with
+ mtrr_chunk_size.
- Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
- could be used to send largest mtrr entry size for continuous block
- to hold holes (aka. UC entries)
-
- If unsure, say Y.
+ If unsure, say N.
config MTRR_SANITIZER_ENABLE_DEFAULT
int "MTRR cleanup enable value (0-1)"
@@ -1164,7 +1163,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
depends on MTRR_SANITIZER
help
mtrr cleanup spare entries default, it can be changed via
- mtrr_spare_reg_nr=
+ mtrr_spare_reg_nr=N on the kernel command line.
config X86_PAT
bool
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index acc0271..ae36bfa 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -171,6 +171,33 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
+config MMIOTRACE_HOOKS
+ bool
+
+config MMIOTRACE
+ bool "Memory mapped IO tracing"
+ depends on DEBUG_KERNEL && PCI
+ select TRACING
+ select MMIOTRACE_HOOKS
+ help
+ Mmiotrace traces Memory Mapped I/O access and is meant for
+ debugging and reverse engineering. It is called from the ioremap
+ implementation and works via page faults. Tracing is disabled by
+ default and can be enabled at run-time.
+
+ See Documentation/tracers/mmiotrace.txt.
+ If you are not helping to develop drivers, say N.
+
+config MMIOTRACE_TEST
+ tristate "Test module for mmiotrace"
+ depends on MMIOTRACE && m
+ help
+ This is a dumb module for testing mmiotrace. It is very dangerous
+ as it will write garbage to IO memory starting at a given address.
+ However, it should be safe to use on e.g. unused portion of VRAM.
+
+ Say N, unless you absolutely know what you are doing.
+
#
# IO delay types:
#
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 81280e9..673f1d1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,12 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
+ifdef CONFIG_FTRACE
+# Do not profile debug utilities
+CFLAGS_REMOVE_tsc.o = -pg
+CFLAGS_REMOVE_rtc.o = -pg
+endif
+
#
# vsyscalls (which work on the user stack) should have
# no stack-protector checks:
@@ -57,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b314bcd..b41b27a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1411,7 +1411,6 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{
pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
acpi_skip_timer_override = 1;
- force_mask_ioapic_irq_2();
return 0;
}
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index de2d2e4..7c074ee 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_ACPI))
buf[2] |= ACPI_PDC_T_FFH;
+ /*
+ * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+ */
+ if (!cpu_has(c, X86_FEATURE_MWAIT))
+ buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+
obj->type = ACPI_TYPE_BUFFER;
obj->buffer.length = 12;
obj->buffer.pointer = (u8 *) buf;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index e6a4b56..868de3d 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -23,6 +23,15 @@ static unsigned long acpi_realmode;
static char temp_stack[10240];
#endif
+/* XXX: this macro should move to asm-x86/segment.h and be shared with the
+ boot code... */
+#define GDT_ENTRY(flags, base, limit) \
+ (((u64)(base & 0xff000000) << 32) | \
+ ((u64)flags << 40) | \
+ ((u64)(limit & 0x00ff0000) << 32) | \
+ ((u64)(base & 0x00ffffff) << 16) | \
+ ((u64)(limit & 0x0000ffff)))
+
/**
* acpi_save_state_mem - save kernel state
*
@@ -51,18 +60,27 @@ int acpi_save_state_mem(void)
header->video_mode = saved_video_mode;
header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
+
+ /*
+ * Set up the wakeup GDT. We set these up as Big Real Mode,
+ * that is, with limits set to 4 GB. At least the Lenovo
+ * Thinkpad X61 is known to need this for the video BIOS
+ * initialization quirk to work; this is likely to also
+ * be the case for other laptops or integrated video devices.
+ */
+
/* GDT[0]: GDT self-pointer */
header->wakeup_gdt[0] =
(u64)(sizeof(header->wakeup_gdt) - 1) +
((u64)(acpi_wakeup_address +
((char *)&header->wakeup_gdt - (char *)acpi_realmode))
<< 16);
- /* GDT[1]: real-mode-like code segment */
- header->wakeup_gdt[1] = (0x009bULL << 40) +
- ((u64)acpi_wakeup_address << 16) + 0xffff;
- /* GDT[2]: real-mode-like data segment */
- header->wakeup_gdt[2] = (0x0093ULL << 40) +
- ((u64)acpi_wakeup_address << 16) + 0xffff;
+ /* GDT[1]: big real mode-like code segment */
+ header->wakeup_gdt[1] =
+ GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
+ /* GDT[2]: big real mode-like data segment */
+ header->wakeup_gdt[2] =
+ GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
@@ -140,6 +158,8 @@ static int __init acpi_sleep_setup(char *str)
acpi_realmode_flags |= 2;
if (strncmp(str, "s3_beep", 7) == 0)
acpi_realmode_flags |= 4;
+ if (strncmp(str, "old_ordering", 12) == 0)
+ acpi_old_suspend_ordering();
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857..2763cb3 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/kprobes.h>
#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
#ifdef CONFIG_X86_64
extern char __vsyscall_0;
-static inline const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
{
return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
{ -1, NULL }
};
-static const unsigned char*const * find_nop_table(void)
+const unsigned char *const *find_nop_table(void)
{
const unsigned char *const *noptable = intel_nops;
int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
struct list_head next;
};
static LIST_HEAD(smp_alt_modules);
-static DEFINE_SPINLOCK(smp_alt);
+static DEFINE_MUTEX(smp_alt);
static int smp_mode = 1; /* protected by smp_alt */
void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
__func__, smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
list_add_tail(&smp->next, &smp_alt_modules);
if (boot_cpu_has(X86_FEATURE_UP))
alternatives_smp_unlock(smp->locks, smp->locks_end,
smp->text, smp->text_end);
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
if (smp_alt_once || noreplace_smp)
return;
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
list_for_each_entry(item, &smp_alt_modules, next) {
if (mod != item->mod)
continue;
list_del(&item->next);
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
DPRINTK("%s: %s\n", __func__, item->name);
kfree(item);
return;
}
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
- spin_lock(&smp_alt);
+ mutex_lock(&smp_alt);
/*
* Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
mod->text, mod->text_end);
}
smp_mode = smp;
- spin_unlock(&smp_alt);
+ mutex_unlock(&smp_alt);
}
#endif
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index cb54d9e..3410196 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1374,6 +1374,10 @@ void __init smp_intr_init(void)
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+
+ /* IPI for single call function */
+ set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+ call_function_single_interrupt);
}
#endif
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 00e6d13..bf9b441 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -204,6 +204,7 @@
#include <linux/module.h>
#include <linux/poll.h>
+#include <linux/smp_lock.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/timer.h>
@@ -1212,9 +1213,9 @@ static int suspend(int vetoable)
if (err != APM_SUCCESS)
apm_error("suspend", err);
err = (err == APM_SUCCESS) ? 0 : -EIO;
- device_power_up();
+ device_power_up(PMSG_RESUME);
local_irq_enable();
- device_resume();
+ device_resume(PMSG_RESUME);
queue_event(APM_NORMAL_RESUME, NULL);
spin_lock(&user_list_lock);
for (as = user_list; as != NULL; as = as->next) {
@@ -1239,7 +1240,7 @@ static void standby(void)
apm_error("standby", err);
local_irq_disable();
- device_power_up();
+ device_power_up(PMSG_RESUME);
local_irq_enable();
}
@@ -1325,7 +1326,7 @@ static void check_events(void)
ignore_bounce = 1;
if ((event != APM_NORMAL_RESUME)
|| (ignore_normal_resume == 0)) {
- device_resume();
+ device_resume(PMSG_RESUME);
queue_event(event, NULL);
}
ignore_normal_resume = 0;
@@ -1549,10 +1550,12 @@ static int do_open(struct inode *inode, struct file *filp)
{
struct apm_user *as;
+ lock_kernel();
as = kmalloc(sizeof(*as), GFP_KERNEL);
if (as == NULL) {
printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
sizeof(*as));
+ unlock_kernel();
return -ENOMEM;
}
as->magic = APM_BIOS_MAGIC;
@@ -1574,6 +1577,7 @@ static int do_open(struct inode *inode, struct file *filp)
user_list = as;
spin_unlock(&user_list_lock);
filp->private_data = as;
+ unlock_kernel();
return 0;
}
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index bd182b76..7c36fb8 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -200,6 +200,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
* benefit in doing so.
*/
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+ printk(KERN_DEBUG "tseg: %010llx\n", tseg);
if ((tseg>>PMD_SHIFT) <
(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
((tseg>>PMD_SHIFT) <
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 2026d21..1d181c4 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -16,16 +16,6 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
{
- /* Cache sizes */
- unsigned n;
-
- n = c->extended_cpuid_level;
- if (n >= 0x80000008) {
- unsigned eax = cpuid_eax(0x80000008);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- }
-
if (c->x86 == 0x6 && c->x86_model >= 0xf) {
c->x86_cache_alignment = c->x86_clflush_size * 2;
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index e7bf3c2..c6bee77 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -98,7 +98,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
{
- unsigned int n, dummy, eax, ebx, ecx, edx;
+ unsigned int n, dummy, ebx, ecx, edx;
n = c->extended_cpuid_level;
@@ -121,11 +121,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
c->x86_cache_size, ecx & 0xFF);
}
- if (n >= 0x80000008) {
- cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- }
}
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -314,6 +309,13 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
+ if (c->extended_cpuid_level >= 0x80000008) {
+ u32 eax = cpuid_eax(0x80000008);
+
+ c->x86_virt_bits = (eax >> 8) & 0xff;
+ c->x86_phys_bits = eax & 0xff;
+ }
+
/* Assume all 64-bit CPUs support 32-bit syscall */
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c..70609ef 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (cpu_has_bts)
ds_init_intel(c);
+
+#ifdef CONFIG_X86_NUMAQ
+ numaq_tsc_disable();
+#endif
}
static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index 02f7733..1019c58 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -54,9 +54,6 @@ static void __cpuinit srat_detect_node(void)
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
{
- /* Cache sizes */
- unsigned n;
-
init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
@@ -78,13 +75,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
if (cpu_has_bts)
ds_init_intel(c);
- n = c->extended_cpuid_level;
- if (n >= 0x80000008) {
- unsigned eax = cpuid_eax(0x80000008);
- c->x86_virt_bits = (eax >> 8) & 0xff;
- c->x86_phys_bits = eax & 0xff;
- }
-
if (c->x86 == 15)
c->x86_cache_alignment = c->x86_clflush_size * 2;
if (c->x86 == 6)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 501ca1c..c4a7ec3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
#include <linux/string.h>
#include <linux/rcupdate.h>
#include <linux/kallsyms.h>
@@ -363,7 +364,7 @@ static void mcheck_check_cpu(void *info)
static void mcheck_timer(struct work_struct *work)
{
- on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
+ on_each_cpu(mcheck_check_cpu, NULL, 1);
/*
* Alert userspace if needed. If we logged an MCE, reduce the
@@ -532,10 +533,12 @@ static int open_exclu; /* already open exclusive? */
static int mce_open(struct inode *inode, struct file *file)
{
+ lock_kernel();
spin_lock(&mce_state_lock);
if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
spin_unlock(&mce_state_lock);
+ unlock_kernel();
return -EBUSY;
}
@@ -544,6 +547,7 @@ static int mce_open(struct inode *inode, struct file *file)
open_count++;
spin_unlock(&mce_state_lock);
+ unlock_kernel();
return nonseekable_open(inode, file);
}
@@ -617,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
* Collect entries that were still getting written before the
* synchronize.
*/
- on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
+ on_each_cpu(collect_tscs, cpu_tsc, 1);
for (i = next; i < MCE_LOG_LEN; i++) {
if (mcelog.entry[i].finished &&
mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -742,7 +746,7 @@ static void mce_restart(void)
if (next_interval)
cancel_delayed_work(&mcheck_work);
/* Timer race is harmless here */
- on_each_cpu(mce_init, NULL, 1, 1);
+ on_each_cpu(mce_init, NULL, 1);
next_interval = check_interval * HZ;
if (next_interval)
schedule_delayed_work(&mcheck_work,
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c..cc1fccd 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
static void mce_work_fn(struct work_struct *work)
{
- on_each_cpu(mce_checkregs, NULL, 1, 1);
+ on_each_cpu(mce_checkregs, NULL, 1);
schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 105afe1..6f23969 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -223,7 +223,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
atomic_set(&data.gate,0);
/* Start the ball rolling on other CPUs */
- if (smp_call_function(ipi_handler, &data, 1, 0) != 0)
+ if (smp_call_function(ipi_handler, &data, 0) != 0)
panic("mtrr: timed out waiting for other CPUs\n");
local_irq_save(flags);
@@ -1682,7 +1682,7 @@ void mtrr_ap_init(void)
*/
void mtrr_save_state(void)
{
- smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+ smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
}
static int __init mtrr_init_finialize(void)
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 2e9bef6..6d4bdc0 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -189,7 +189,7 @@ void disable_lapic_nmi_watchdog(void)
if (atomic_read(&nmi_active) <= 0)
return;
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+ on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
if (wd_ops)
wd_ops->unreserve();
@@ -213,7 +213,7 @@ void enable_lapic_nmi_watchdog(void)
return;
}
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+ on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
touch_nmi_watchdog();
}
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index daff52a..2de5fa2 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -33,6 +33,7 @@
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/smp.h>
+#include <linux/smp_lock.h>
#include <linux/major.h>
#include <linux/fs.h>
#include <linux/smp_lock.h>
@@ -95,7 +96,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
for (; count; count -= 16) {
cmd.eax = pos;
cmd.ecx = pos >> 32;
- smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1);
+ smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
if (copy_to_user(tmp, &cmd, 16))
return -EFAULT;
tmp += 16;
@@ -107,15 +108,23 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
static int cpuid_open(struct inode *inode, struct file *file)
{
- unsigned int cpu = iminor(file->f_path.dentry->d_inode);
- struct cpuinfo_x86 *c = &cpu_data(cpu);
-
- if (cpu >= NR_CPUS || !cpu_online(cpu))
- return -ENXIO; /* No such CPU */
+ unsigned int cpu;
+ struct cpuinfo_x86 *c;
+ int ret = 0;
+
+ lock_kernel();
+
+ cpu = iminor(file->f_path.dentry->d_inode);
+ if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+ ret = -ENXIO; /* No such CPU */
+ goto out;
+ }
+ c = &cpu_data(cpu);
if (c->cpuid_level < 0)
- return -EIO; /* CPUID not supported */
-
- return 0;
+ ret = -EIO; /* CPUID not supported */
+out:
+ unlock_kernel();
+ return ret;
}
/*
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a5383ae..28c2918 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1049,11 +1049,6 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
#endif
/*
- * Last pfn which the user wants to use.
- */
-unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
-
-/*
* Find the highest page frame number we have available
*/
static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
@@ -1085,8 +1080,6 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
if (last_pfn > max_arch_pfn)
last_pfn = max_arch_pfn;
- if (last_pfn > end_user_pfn)
- last_pfn = end_user_pfn;
printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
last_pfn, max_arch_pfn);
@@ -1131,12 +1124,6 @@ int __init e820_find_active_region(const struct e820entry *ei,
if (*ei_endpfn > last_pfn)
*ei_endpfn = last_pfn;
- /* Obey end_user_pfn to save on memmap */
- if (*ei_startpfn >= end_user_pfn)
- return 0;
- if (*ei_endpfn > end_user_pfn)
- *ei_endpfn = end_user_pfn;
-
return 1;
}
@@ -1201,7 +1188,6 @@ static int __init parse_memopt(char *p)
userdef = 1;
mem_size = memparse(p, &p);
- end_user_pfn = mem_size>>PAGE_SHIFT;
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
@@ -1245,10 +1231,9 @@ static int __init parse_memmap_opt(char *p)
} else if (*p == '$') {
start_at = memparse(p+1, &p);
e820_add_region(start_at, mem_size, E820_RESERVED);
- } else {
- end_user_pfn = (mem_size >> PAGE_SHIFT);
+ } else
e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
- }
+
return *p == '\0' ? 0 : -EINVAL;
}
early_param("memmap", parse_memmap_opt);
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a4665f3..a0e11c0 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -120,7 +120,18 @@ static struct chipset early_qrk[] __initdata = {
{}
};
-static void __init check_dev_quirk(int num, int slot, int func)
+/**
+ * check_dev_quirk - apply early quirks to a given PCI device
+ * @num: bus number
+ * @slot: slot number
+ * @func: PCI function
+ *
+ * Check the vendor & device ID against the early quirks table.
+ *
+ * If the device is single function, let early_quirks() know so we don't
+ * poke at this device again.
+ */
+static int __init check_dev_quirk(int num, int slot, int func)
{
u16 class;
u16 vendor;
@@ -131,7 +142,7 @@ static void __init check_dev_quirk(int num, int slot, int func)
class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
if (class == 0xffff)
- return;
+ return -1; /* no class, treat as single function */
vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
@@ -154,7 +165,9 @@ static void __init check_dev_quirk(int num, int slot, int func)
type = read_pci_config_byte(num, slot, func,
PCI_HEADER_TYPE);
if (!(type & 0x80))
- return;
+ return -1;
+
+ return 0;
}
void __init early_quirks(void)
@@ -167,6 +180,9 @@ void __init early_quirks(void)
/* Poor man's PCI discovery */
for (num = 0; num < 32; num++)
for (slot = 0; slot < 32; slot++)
- for (func = 0; func < 8; func++)
- check_dev_quirk(num, slot, func);
+ for (func = 0; func < 8; func++) {
+ /* Only probe function 0 on single fn devices */
+ if (check_dev_quirk(num, slot, func))
+ break;
+ }
}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 643fd86..ff9e735 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -196,7 +196,7 @@ static struct console simnow_console = {
static struct console *early_console = &early_vga_console;
static int early_console_initialized;
-void early_printk(const char *fmt, ...)
+asmlinkage void early_printk(const char *fmt, ...)
{
char buf[512];
int n;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index cfe28a7..6bc07f0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
#include <asm/percpu.h>
#include <asm/dwarf2.h>
#include <asm/processor-flags.h>
+#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
/*
@@ -1111,6 +1112,77 @@ ENDPROC(xen_failsafe_callback)
#endif /* CONFIG_XEN */
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+ENTRY(mcount)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ subl $MCOUNT_INSN_SIZE, %eax
+
+.globl mcount_call
+mcount_call:
+ call ftrace_stub
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+ ret
+END(mcount)
+
+ENTRY(ftrace_caller)
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+.globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+.globl ftrace_stub
+ftrace_stub:
+ ret
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+
+ENTRY(mcount)
+ cmpl $ftrace_stub, ftrace_trace_function
+ jnz trace
+.globl ftrace_stub
+ftrace_stub:
+ ret
+
+ /* taken from glibc */
+trace:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ movl 0xc(%esp), %eax
+ movl 0x4(%ebp), %edx
+ subl $MCOUNT_INSN_SIZE, %eax
+
+ call *ftrace_trace_function
+
+ popl %edx
+ popl %ecx
+ popl %eax
+
+ jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FTRACE */
+
.section .rodata,"a"
#include "syscall_table_32.S"
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bb4e22f..ae63e58 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,9 +51,115 @@
#include <asm/page.h>
#include <asm/irqflags.h>
#include <asm/paravirt.h>
+#include <asm/ftrace.h>
.code64
+#ifdef CONFIG_FTRACE
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(mcount)
+
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+.globl mcount_call
+mcount_call:
+ call ftrace_stub
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+ retq
+END(mcount)
+
+ENTRY(ftrace_caller)
+
+ /* taken from glibc */
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ movq 8(%rbp), %rsi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+.globl ftrace_call
+ftrace_call:
+ call ftrace_stub
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+.globl ftrace_stub
+ftrace_stub:
+ retq
+END(ftrace_caller)
+
+#else /* ! CONFIG_DYNAMIC_FTRACE */
+ENTRY(mcount)
+ cmpq $ftrace_stub, ftrace_trace_function
+ jnz trace
+.globl ftrace_stub
+ftrace_stub:
+ retq
+
+trace:
+ /* taken from glibc */
+ subq $0x38, %rsp
+ movq %rax, (%rsp)
+ movq %rcx, 8(%rsp)
+ movq %rdx, 16(%rsp)
+ movq %rsi, 24(%rsp)
+ movq %rdi, 32(%rsp)
+ movq %r8, 40(%rsp)
+ movq %r9, 48(%rsp)
+
+ movq 0x38(%rsp), %rdi
+ movq 8(%rbp), %rsi
+ subq $MCOUNT_INSN_SIZE, %rdi
+
+ call *ftrace_trace_function
+
+ movq 48(%rsp), %r9
+ movq 40(%rsp), %r8
+ movq 32(%rsp), %rdi
+ movq 24(%rsp), %rsi
+ movq 16(%rsp), %rdx
+ movq 8(%rsp), %rcx
+ movq (%rsp), %rax
+ addq $0x38, %rsp
+
+ jmp ftrace_stub
+END(mcount)
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FTRACE */
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
@@ -710,6 +816,9 @@ END(invalidate_interrupt\num)
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
END(call_function_interrupt)
+ENTRY(call_function_single_interrupt)
+ apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
+END(call_function_single_interrupt)
ENTRY(irq_move_cleanup_interrupt)
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
END(irq_move_cleanup_interrupt)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 0000000..ab115cd
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,141 @@
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes to Ingo Molnar, for suggesting the idea.
+ * Mathieu Desnoyers, for suggesting postponing the modifications.
+ * Arjan van de Ven, for keeping me straight, and explaining to me
+ * the dangers of modifying code on the run.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/alternative.h>
+#include <asm/ftrace.h>
+
+
+/* Long is fine, even if it is only 4 bytes ;-) */
+static long *ftrace_nop;
+
+union ftrace_code_union {
+ char code[MCOUNT_INSN_SIZE];
+ struct {
+ char e8;
+ int offset;
+ } __attribute__((packed));
+};
+
+
+static int notrace ftrace_calc_offset(long ip, long addr)
+{
+ return (int)(addr - ip);
+}
+
+notrace unsigned char *ftrace_nop_replace(void)
+{
+ return (char *)ftrace_nop;
+}
+
+notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+ static union ftrace_code_union calc;
+
+ calc.e8 = 0xe8;
+ calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
+
+ /*
+ * No locking needed, this must be called via kstop_machine
+ * which in essence is like running on a uniprocessor machine.
+ */
+ return calc.code;
+}
+
+notrace int
+ftrace_modify_code(unsigned long ip, unsigned char *old_code,
+ unsigned char *new_code)
+{
+ unsigned replaced;
+ unsigned old = *(unsigned *)old_code; /* 4 bytes */
+ unsigned new = *(unsigned *)new_code; /* 4 bytes */
+ unsigned char newch = new_code[4];
+ int faulted = 0;
+
+ /*
+ * Note: Due to modules and __init, code can
+ * disappear and change, we need to protect against faulting
+ * as well as code changing.
+ *
+ * No real locking needed, this code is run through
+ * kstop_machine.
+ */
+ asm volatile (
+ "1: lock\n"
+ " cmpxchg %3, (%2)\n"
+ " jnz 2f\n"
+ " movb %b4, 4(%2)\n"
+ "2:\n"
+ ".section .fixup, \"ax\"\n"
+ "3: movl $1, %0\n"
+ " jmp 2b\n"
+ ".previous\n"
+ _ASM_EXTABLE(1b, 3b)
+ : "=r"(faulted), "=a"(replaced)
+ : "r"(ip), "r"(new), "c"(newch),
+ "0"(faulted), "a"(old)
+ : "memory");
+ sync_core();
+
+ if (replaced != old && replaced != new)
+ faulted = 2;
+
+ return faulted;
+}
+
+notrace int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+ int ret;
+
+ memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, (unsigned long)func);
+ ret = ftrace_modify_code(ip, old, new);
+
+ return ret;
+}
+
+notrace int ftrace_mcount_set(unsigned long *data)
+{
+ unsigned long ip = (long)(&mcount_call);
+ unsigned long *addr = data;
+ unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+ /*
+ * Replace the mcount stub with a pointer to the
+ * ip recorder function.
+ */
+ memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
+ new = ftrace_call_replace(ip, *addr);
+ *addr = ftrace_modify_code(ip, old, new);
+
+ return 0;
+}
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ const unsigned char *const *noptable = find_nop_table();
+
+ /* This is running in kstop_machine */
+
+ ftrace_mcount_set(data);
+
+ ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+
+ return 0;
+}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ea230ec..0ea6a19 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned long d, unsigned long a)
}
#ifdef CONFIG_X86_64
-
#include <asm/pgtable.h>
-
-static inline void hpet_set_mapping(void)
-{
- set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
- __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
- hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
-}
-
-static inline void hpet_clear_mapping(void)
-{
- hpet_virt_address = NULL;
-}
-
-#else
+#endif
static inline void hpet_set_mapping(void)
{
hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+#ifdef CONFIG_X86_64
+ __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+#endif
}
static inline void hpet_clear_mapping(void)
@@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(void)
iounmap(hpet_virt_address);
hpet_virt_address = NULL;
}
-#endif
/*
* HPET command line enable / disable
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb4378..dd7ebee 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
#include <linux/module.h>
+
#include <asm/checksum.h>
-#include <asm/desc.h>
#include <asm/pgtable.h>
+#include <asm/desc.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FTRACE
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(mcount);
+#endif
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 382208d1..a82065b 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -59,13 +59,6 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
static DEFINE_SPINLOCK(vector_lock);
-static bool mask_ioapic_irq_2 __initdata;
-
-void __init force_mask_ioapic_irq_2(void)
-{
- mask_ioapic_irq_2 = true;
-}
-
int timer_through_8259 __initdata;
/*
@@ -1576,7 +1569,7 @@ void /*__init*/ print_local_APIC(void *dummy)
void print_all_local_APICs(void)
{
- on_each_cpu(print_local_APIC, NULL, 1, 1);
+ on_each_cpu(print_local_APIC, NULL, 1);
}
void /*__init*/ print_PIC(void)
@@ -2186,9 +2179,6 @@ static inline void __init check_timer(void)
printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
vector, apic1, pin1, apic2, pin2);
- if (mask_ioapic_irq_2)
- mask_IO_APIC_irq(2);
-
/*
* Some BIOS writers are clueless and report the ExtINTA
* I/O APIC input from the cascaded 8259A as the timer
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 2db0f98..39f0be3 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -96,13 +96,6 @@ static int no_timer_check;
static int disable_timer_pin_1 __initdata;
-static bool mask_ioapic_irq_2 __initdata;
-
-void __init force_mask_ioapic_irq_2(void)
-{
- mask_ioapic_irq_2 = true;
-}
-
int timer_through_8259 __initdata;
/* Where if anywhere is the i8259 connect in external int mode */
@@ -1314,7 +1307,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
void print_all_local_APICs (void)
{
- on_each_cpu(print_local_APIC, NULL, 1, 1);
+ on_each_cpu(print_local_APIC, NULL, 1);
}
void __apicdebuginit print_PIC(void)
@@ -2020,9 +2013,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
cfg->vector, apic1, pin1, apic2, pin2);
- if (mask_ioapic_irq_2)
- mask_IO_APIC_irq(2);
-
/*
* Some BIOS writers are clueless and report the ExtINTA
* I/O APIC input from the cascaded 8259A as the timer
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 31f49e8..0373e88 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -199,6 +199,10 @@ void __init native_init_IRQ(void)
/* IPI for generic function call */
alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+ /* IPI for generic single function call */
+ alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
+ call_function_single_interrupt);
+
/* Low priority IPI to cleanup after moving an irq */
set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
#endif
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 21f2bae..a844957 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
load_LDT(pc);
mask = cpumask_of_cpu(smp_processor_id());
if (!cpus_equal(current->mm->cpu_vm_mask, mask))
- smp_call_function(flush_ldt, current->mm, 1, 1);
+ smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
load_LDT(pc);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f496017..8864230 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/numa.h>
+#include <linux/ftrace.h>
+
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
unsigned long page_list[PAGES_NR];
void *control_page;
+ tracer_disable();
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7830dc4..9dd9262 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
#include <linux/string.h>
#include <linux/reboot.h>
#include <linux/numa.h>
+#include <linux/ftrace.h>
+
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
unsigned long page_list[PAGES_NR];
void *control_page;
+ tracer_disable();
+
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 9758fea..56b9331 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -76,6 +76,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -423,6 +424,7 @@ out:
static int microcode_open (struct inode *unused1, struct file *unused2)
{
+ cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
@@ -489,7 +491,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
#define microcode_dev_exit() do { } while(0)
#endif
-static long get_next_ucode_from_buffer(void **mc, void *buf,
+static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
unsigned long size, long offset)
{
microcode_header_t *mc_header;
@@ -523,7 +525,7 @@ static int cpu_request_microcode(int cpu)
char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
- void *buf;
+ const u8 *buf;
unsigned long size;
long offset = 0;
int error;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 1f3abe0..a153b39 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -117,12 +117,20 @@ static int msr_open(struct inode *inode, struct file *file)
{
unsigned int cpu = iminor(file->f_path.dentry->d_inode);
struct cpuinfo_x86 *c = &cpu_data(cpu);
+ int ret = 0;
- if (cpu >= NR_CPUS || !cpu_online(cpu))
- return -ENXIO; /* No such CPU */
- if (!cpu_has(c, X86_FEATURE_MSR))
- return -EIO; /* MSR not supported */
+ lock_kernel();
+ cpu = iminor(file->f_path.dentry->d_inode);
+ if (cpu >= NR_CPUS || !cpu_online(cpu)) {
+ ret = -ENXIO; /* No such CPU */
+ goto out;
+ }
+ c = &cpu_data(cpu);
+ if (!cpu_has(c, X86_FEATURE_MSR))
+ ret = -EIO; /* MSR not supported */
+out:
+ unlock_kernel();
return 0;
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 716b892..ec024b3 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -130,7 +130,7 @@ int __init check_nmi_watchdog(void)
#ifdef CONFIG_SMP
if (nmi_watchdog == NMI_LOCAL_APIC)
- smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
+ smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
#endif
for_each_possible_cpu(cpu)
@@ -272,7 +272,7 @@ static void __acpi_nmi_enable(void *__unused)
void acpi_nmi_enable(void)
{
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+ on_each_cpu(__acpi_nmi_enable, NULL, 1);
}
static void __acpi_nmi_disable(void *__unused)
@@ -286,7 +286,7 @@ static void __acpi_nmi_disable(void *__unused)
void acpi_nmi_disable(void)
{
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+ on_each_cpu(__acpi_nmi_disable, NULL, 1);
}
void setup_apic_nmi_watchdog(void *unused)
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index f0f1de1..a23e823 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -93,12 +93,13 @@ int __init get_memcfg_numaq(void)
return 1;
}
-static int __init numaq_tsc_disable(void)
+void __init numaq_tsc_disable(void)
{
+ if (!found_numaq)
+ return;
+
if (num_online_nodes() > 1) {
printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
setup_clear_cpu_cap(X86_FEATURE_TSC);
}
- return 0;
}
-arch_initcall(numaq_tsc_disable);
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d0d18db..c3fe784 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -630,6 +630,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
struct pci_dev *dev;
void *gatt;
int i, error;
+ unsigned long start_pfn, end_pfn;
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
@@ -674,6 +675,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
aper_base, aper_size>>10);
+
+ /* need to map that range */
+ end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+ if (end_pfn > max_low_pfn_mapped) {
+ start_pfn = (aper_base>>PAGE_SHIFT);
+ init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
+ }
return 0;
nommu:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4061d63..4d629c6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,12 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
+#include <asm/system.h>
+
+unsigned long idle_halt;
+EXPORT_SYMBOL(idle_halt);
+unsigned long idle_nomwait;
+EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
@@ -132,7 +138,7 @@ void cpu_idle_wait(void)
{
smp_mb();
/* kick all the CPUs so that they exit out of pm_idle */
- smp_call_function(do_nothing, NULL, 0, 1);
+ smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
@@ -325,7 +331,27 @@ static int __init idle_setup(char *str)
pm_idle = poll_idle;
} else if (!strcmp(str, "mwait"))
force_mwait = 1;
- else
+ else if (!strcmp(str, "halt")) {
+ /*
+ * When the boot option of idle=halt is added, halt is
+ * forced to be used for CPU idle. In such case CPU C2/C3
+ * won't be used again.
+ * To continue to load the CPU idle driver, don't touch
+ * the boot_option_idle_override.
+ */
+ pm_idle = default_idle;
+ idle_halt = 1;
+ return 0;
+ } else if (!strcmp(str, "nomwait")) {
+ /*
+ * If the boot option of "idle=nomwait" is added,
+ * it means that mwait will be disabled for CPU C2/C3
+ * states. In such case it won't touch the variable
+ * of boot_option_idle_override.
+ */
+ idle_nomwait = 1;
+ return 0;
+ } else
return -1;
boot_option_idle_override = 1;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9a139f6..0c3927a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -142,7 +142,10 @@ void cpu_idle(void)
local_irq_disable();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
pm_idle();
+ start_critical_timings();
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index db5eb96..a8e5362 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -134,7 +134,10 @@ void cpu_idle(void)
*/
local_irq_disable();
enter_idle();
+ /* Don't trace irqs off for idle */
+ stop_critical_timings();
pm_idle();
+ start_critical_timings();
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 79bdcd1..d138588 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -266,6 +266,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
hpet_print_force_info();
}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
+ old_ich_force_enable_hpet_user);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
old_ich_force_enable_hpet_user);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 2e78a14..6121ffd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -684,6 +684,11 @@ void __init setup_arch(char **cmdline_p)
clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
}
+#ifdef CONFIG_PCI
+ if (pci_early_dump_regs)
+ early_dump_pci_devices();
+#endif
+
finish_e820_parsing();
#ifdef CONFIG_X86_32
@@ -851,6 +856,14 @@ void __init setup_arch(char **cmdline_p)
init_cpu_to_node();
#endif
+#ifdef CONFIG_X86_NUMAQ
+ /*
+ * need to check online nodes num, call it
+ * here before time_init/tsc_init
+ */
+ numaq_tsc_disable();
+#endif
+
init_apic_mappings();
ioapic_init_mappings();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5fc310f..cac6843 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -343,23 +343,23 @@ static const cpumask_t cpu_mask_none;
/*
* Returns a pointer to the bitmask of CPUs on Node 'node'.
*/
-cpumask_t *_node_to_cpumask_ptr(int node)
+const cpumask_t *_node_to_cpumask_ptr(int node)
{
if (node_to_cpumask_map == NULL) {
printk(KERN_WARNING
"_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
node);
dump_stack();
- return &cpu_online_map;
+ return (const cpumask_t *)&cpu_online_map;
}
if (node >= nr_node_ids) {
printk(KERN_WARNING
"_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
node, nr_node_ids);
dump_stack();
- return (cpumask_t *)&cpu_mask_none;
+ return &cpu_mask_none;
}
- return (cpumask_t *)&node_to_cpumask_map[node];
+ return &node_to_cpumask_map[node];
}
EXPORT_SYMBOL(_node_to_cpumask_ptr);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 0cb7aad..361b7a4 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,132 +121,23 @@ static void native_smp_send_reschedule(int cpu)
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
}
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
-
-void lock_ipi_call_lock(void)
+void native_send_call_func_single_ipi(int cpu)
{
- spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
- spin_unlock_irq(&call_lock);
-}
-
-static struct call_data_struct *call_data;
-
-static void __smp_call_function(void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- struct call_data_struct data;
- int cpus = num_online_cpus() - 1;
-
- if (!cpus)
- return;
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- mb();
-
- /* Send a message to all other CPUs and wait for them to respond */
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
}
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on. Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int
-native_smp_call_function_mask(cpumask_t mask,
- void (*func)(void *), void *info,
- int wait)
+void native_send_call_func_ipi(cpumask_t mask)
{
- struct call_data_struct data;
cpumask_t allbutself;
- int cpus;
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- /* Holding any lock stops cpus from going down. */
- spin_lock(&call_lock);
allbutself = cpu_online_map;
cpu_clear(smp_processor_id(), allbutself);
- cpus_and(mask, mask, allbutself);
- cpus = cpus_weight(mask);
-
- if (!cpus) {
- spin_unlock(&call_lock);
- return 0;
- }
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- wmb();
-
- /* Send a message to other CPUs */
if (cpus_equal(mask, allbutself) &&
cpus_equal(cpu_online_map, cpu_callout_map))
send_IPI_allbutself(CALL_FUNCTION_VECTOR);
else
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus)
- cpu_relax();
-
- if (wait)
- while (atomic_read(&data.finished) != cpus)
- cpu_relax();
- spin_unlock(&call_lock);
-
- return 0;
}
static void stop_this_cpu(void *dummy)
@@ -268,18 +159,13 @@ static void stop_this_cpu(void *dummy)
static void native_smp_send_stop(void)
{
- int nolock;
unsigned long flags;
if (reboot_force)
return;
- /* Don't deadlock on the call lock in panic */
- nolock = !spin_trylock(&call_lock);
+ smp_call_function(stop_this_cpu, NULL, 0);
local_irq_save(flags);
- __smp_call_function(stop_this_cpu, NULL, 0, 0);
- if (!nolock)
- spin_unlock(&call_lock);
disable_local_APIC();
local_irq_restore(flags);
}
@@ -301,33 +187,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
void smp_call_function_interrupt(struct pt_regs *regs)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
ack_APIC_irq();
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
irq_enter();
- (*func)(info);
+ generic_smp_call_function_interrupt();
#ifdef CONFIG_X86_32
__get_cpu_var(irq_stat).irq_call_count++;
#else
add_pda(irq_call_count, 1);
#endif
irq_exit();
+}
- if (wait) {
- mb();
- atomic_inc(&call_data->finished);
- }
+void smp_call_function_single_interrupt(struct pt_regs *regs)
+{
+ ack_APIC_irq();
+ irq_enter();
+ generic_smp_call_function_single_interrupt();
+#ifdef CONFIG_X86_32
+ __get_cpu_var(irq_stat).irq_call_count++;
+#else
+ add_pda(irq_call_count, 1);
+#endif
+ irq_exit();
}
struct smp_ops smp_ops = {
@@ -338,7 +219,8 @@ struct smp_ops smp_ops = {
.smp_send_stop = native_smp_send_stop,
.smp_send_reschedule = native_smp_send_reschedule,
- .smp_call_function_mask = native_smp_call_function_mask,
+
+ .send_call_func_ipi = native_send_call_func_ipi,
+ .send_call_func_single_ipi = native_send_call_func_single_ipi,
};
EXPORT_SYMBOL_GPL(smp_ops);
-
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6cd002f..23c3b3d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -328,12 +328,12 @@ static void __cpuinit start_secondary(void *unused)
* lock helps us to not include this cpu in a currently in progress
* smp_call_function().
*/
- lock_ipi_call_lock();
+ ipi_call_lock_irq();
#ifdef CONFIG_X86_IO_APIC
setup_vector_irq(smp_processor_id());
#endif
cpu_set(smp_processor_id(), cpu_online_map);
- unlock_ipi_call_lock();
+ ipi_call_unlock_irq();
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
setup_secondary_clock();
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064..99941b3 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
per_cpu(cpu_number, cpu) = cpu;
}
#endif
-
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
- int wait)
-{
- return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-/**
- * smp_call_function_single - Run a function on a specific CPU
- * @cpu: The target CPU. Cannot be the calling CPU.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait)
-{
- /* prevent preemption and reschedule on another processor */
- int ret;
- int me = get_cpu();
- if (cpu == me) {
- local_irq_disable();
- func(info);
- local_irq_enable();
- put_cpu();
- return 0;
- }
-
- ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
- put_cpu();
- return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c28c342..a03e7f6 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -74,6 +74,7 @@ void save_stack_trace(struct stack_trace *trace)
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
+EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
@@ -81,3 +82,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 9bb2363..fec1ece 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -238,6 +238,6 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
}
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 5039d0f..dcbf7a1 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -275,5 +275,5 @@ static void do_flush_tlb_all(void *info)
void flush_tlb_all(void)
{
- on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+ on_each_cpu(do_flush_tlb_all, NULL, 1);
}
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 74e9929..2696a68 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -105,30 +105,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
void printk_address(unsigned long address, int reliable)
{
-#ifdef CONFIG_KALLSYMS
- unsigned long offset = 0, symsize;
- const char *symname;
- char *modname;
- char *delim = ":";
- char namebuf[KSYM_NAME_LEN];
- char reliab[4] = "";
-
- symname = kallsyms_lookup(address, &symsize, &offset,
- &modname, namebuf);
- if (!symname) {
- printk(" [<%016lx>]\n", address);
- return;
- }
- if (!reliable)
- strcpy(reliab, "? ");
-
- if (!modname)
- modname = delim = "";
- printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
- address, reliab, delim, modname, delim, symname, offset, symsize);
-#else
- printk(" [<%016lx>]\n", address);
-#endif
+ printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
}
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3c36f92..7603c05 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -358,6 +358,7 @@ static cycle_t read_tsc(void)
ret : clocksource_tsc.cycle_last;
}
+#ifdef CONFIG_X86_64
static cycle_t __vsyscall_fn vread_tsc(void)
{
cycle_t ret = (cycle_t)vget_cycles();
@@ -365,6 +366,7 @@ static cycle_t __vsyscall_fn vread_tsc(void)
return ret >= __vsyscall_gtod_data.clock.cycle_last ?
ret : __vsyscall_gtod_data.clock.cycle_last;
}
+#endif
static struct clocksource clocksource_tsc = {
.name = "tsc",
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 2674f57..cdb2363 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -49,16 +49,14 @@ SECTIONS
_etext = .; /* End of text section */
} :text = 0x9090
+ NOTES :text :note
+
. = ALIGN(16); /* Exception table */
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
*(__ex_table)
__stop___ex_table = .;
- }
-
- NOTES :text :note
-
- BUG_TABLE :text
+ } :text = 0x9090
RODATA
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fd246e2..63e5c1a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -19,7 +19,7 @@ PHDRS {
data PT_LOAD FLAGS(7); /* RWE */
user PT_LOAD FLAGS(7); /* RWE */
data.init PT_LOAD FLAGS(7); /* RWE */
- note PT_NOTE FLAGS(4); /* R__ */
+ note PT_NOTE FLAGS(0); /* ___ */
}
SECTIONS
{
@@ -40,16 +40,14 @@ SECTIONS
_etext = .; /* End of text section */
} :text = 0x9090
+ NOTES :text :note
+
. = ALIGN(16); /* Exception table */
__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
__start___ex_table = .;
*(__ex_table)
__stop___ex_table = .;
- }
-
- NOTES :text :note
-
- BUG_TABLE :text
+ } :text = 0x9090
RODATA
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index c87cbd8..0b8b6690 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
#include <asm/topology.h>
#include <asm/vgtod.h>
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+#define __vsyscall(nr) \
+ __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
#define __syscall_clobber "r11","cx","memory"
/*
@@ -278,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
{
long cpu = (long)arg;
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
- smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
return NOTIFY_DONE;
}
@@ -301,7 +302,7 @@ static int __init vsyscall_init(void)
#ifdef CONFIG_SYSCTL
register_sysctl_table(kernel_root_table2);
#endif
- on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+ on_each_cpu(cpu_vsyscall_init, NULL, 1);
hotcpu_notifier(cpu_vsyscall_notifier, 0);
return 0;
}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 2f306a8..b545f37 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,13 +2,20 @@
All C exports should go in the respective C files. */
#include <linux/module.h>
-#include <net/checksum.h>
#include <linux/smp.h>
+#include <net/checksum.h>
+
#include <asm/processor.h>
-#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#include <asm/uaccess.h>
#include <asm/desc.h>
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_FTRACE
+/* mcount is defined in assembly */
+EXPORT_SYMBOL(mcount);
+#endif
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 540e951..10ce6ee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -335,7 +335,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
{
if (vmx->vcpu.cpu == -1)
return;
- smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1);
+ smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1);
vmx->launched = 0;
}
@@ -2968,7 +2968,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (vmx->vmcs) {
- on_each_cpu(__vcpu_clear, vmx, 0, 1);
+ on_each_cpu(__vcpu_clear, vmx, 1);
free_vmcs(vmx->vmcs);
vmx->vmcs = NULL;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63a77ca..0faa254 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4044,6 +4044,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
* So need not to call smp_call_function_single() in that case.
*/
if (vcpu->guest_mode && vcpu->cpu != cpu)
- smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+ smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
put_cpu();
}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 83226e0..aa3fa41 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_SMP) := msr-on-cpu.o
lib-y := delay.o
+lib-y += thunk_$(BITS).o
lib-y += usercopy_$(BITS).o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index 57d043f..d5a2b39 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c
@@ -30,10 +30,10 @@ static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
rv.msr_no = msr_no;
if (safe) {
- smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 0, 1);
+ smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
err = rv.err;
} else {
- smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1);
+ smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
}
*l = rv.l;
*h = rv.h;
@@ -64,10 +64,10 @@ static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
rv.l = l;
rv.h = h;
if (safe) {
- smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 0, 1);
+ smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
err = rv.err;
} else {
- smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1);
+ smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
}
return err;
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 0000000..650b11e
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
+/*
+ * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
+ * Copyright 2008 by Steven Rostedt, Red Hat, Inc
+ * (inspired by Andi Kleen's thunk_64.S)
+ * Subject to the GNU public license, v.2. No warranty of any kind.
+ */
+
+ #include <linux/linkage.h>
+
+#define ARCH_TRACE_IRQS_ON \
+ pushl %eax; \
+ pushl %ecx; \
+ pushl %edx; \
+ call trace_hardirqs_on; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+
+#define ARCH_TRACE_IRQS_OFF \
+ pushl %eax; \
+ pushl %ecx; \
+ pushl %edx; \
+ call trace_hardirqs_off; \
+ popl %edx; \
+ popl %ecx; \
+ popl %eax;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* put return address in eax (arg1) */
+ .macro thunk_ra name,func
+ .globl \name
+\name:
+ pushl %eax
+ pushl %ecx
+ pushl %edx
+ /* Place EIP in the arg1 */
+ movl 3*4(%esp), %eax
+ call \func
+ popl %edx
+ popl %ecx
+ popl %eax
+ ret
+ .endm
+
+ thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+ thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
+#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251..bf9a7d5 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
* Save registers before calling assembly functions. This avoids
* disturbance of register allocation in some inline assembly constructs.
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
+ * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
* Subject to the GNU public license, v.2. No warranty of any kind.
*/
@@ -42,8 +43,22 @@
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
- thunk trace_hardirqs_on_thunk,trace_hardirqs_on
- thunk trace_hardirqs_off_thunk,trace_hardirqs_off
+ /* put return address in rdi (arg1) */
+ .macro thunk_ra name,func
+ .globl \name
+\name:
+ CFI_STARTPROC
+ SAVE_ARGS
+ /* SAVE_ARGS pushs 9 elements */
+ /* the next element would be the rip */
+ movq 9*8(%rsp), %rdi
+ call \func
+ jmp restore
+ CFI_ENDPROC
+ .endm
+
+ thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+ thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 8dedd01..ee0fba0 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -950,94 +950,24 @@ static void smp_stop_cpu_function(void *dummy)
halt();
}
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- volatile unsigned long started;
- volatile unsigned long finished;
- int wait;
-};
-
-static struct call_data_struct *call_data;
-
/* execute a thread on a new CPU. The function to be called must be
* previously set up. This is used to schedule a function for
* execution on all CPUs - set up the function then broadcast a
* function_interrupt CPI to come here on each CPU */
static void smp_call_function_interrupt(void)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- /* must take copy of wait because call_data may be replaced
- * unless the function is waiting for us to finish */
- int wait = call_data->wait;
- __u8 cpu = smp_processor_id();
-
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- if (!test_and_clear_bit(cpu, &call_data->started)) {
- /* If the bit wasn't set, this could be a replay */
- printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion"
- " with no call pending\n", cpu);
- return;
- }
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
irq_enter();
- (*func) (info);
+ generic_smp_call_function_interrupt();
__get_cpu_var(irq_stat).irq_call_count++;
irq_exit();
- if (wait) {
- mb();
- clear_bit(cpu, &call_data->finished);
- }
}
-static int
-voyager_smp_call_function_mask(cpumask_t cpumask,
- void (*func) (void *info), void *info, int wait)
+static void smp_call_function_single_interrupt(void)
{
- struct call_data_struct data;
- u32 mask = cpus_addr(cpumask)[0];
-
- mask &= ~(1 << smp_processor_id());
-
- if (!mask)
- return 0;
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- data.started = mask;
- data.wait = wait;
- if (wait)
- data.finished = mask;
-
- spin_lock(&call_lock);
- call_data = &data;
- wmb();
- /* Send a message to all other CPUs and wait for them to respond */
- send_CPI(mask, VIC_CALL_FUNCTION_CPI);
-
- /* Wait for response */
- while (data.started)
- barrier();
-
- if (wait)
- while (data.finished)
- barrier();
-
- spin_unlock(&call_lock);
-
- return 0;
+ irq_enter();
+ generic_smp_call_function_single_interrupt();
+ __get_cpu_var(irq_stat).irq_call_count++;
+ irq_exit();
}
/* Sorry about the name. In an APIC based system, the APICs
@@ -1094,6 +1024,12 @@ void smp_qic_call_function_interrupt(struct pt_regs *regs)
smp_call_function_interrupt();
}
+void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
+{
+ ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
+ smp_call_function_single_interrupt();
+}
+
void smp_vic_cpi_interrupt(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1114,6 +1050,8 @@ void smp_vic_cpi_interrupt(struct pt_regs *regs)
smp_enable_irq_interrupt();
if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
smp_call_function_interrupt();
+ if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
+ smp_call_function_single_interrupt();
set_irq_regs(old_regs);
}
@@ -1129,7 +1067,7 @@ static void do_flush_tlb_all(void *info)
/* flush the TLB of every active CPU in the system */
void flush_tlb_all(void)
{
- on_each_cpu(do_flush_tlb_all, 0, 1, 1);
+ on_each_cpu(do_flush_tlb_all, 0, 1);
}
/* send a reschedule CPI to one CPU by physical CPU number*/
@@ -1161,7 +1099,7 @@ int safe_smp_processor_id(void)
/* broadcast a halt to all other CPUs */
static void voyager_smp_send_stop(void)
{
- smp_call_function(smp_stop_cpu_function, NULL, 1, 1);
+ smp_call_function(smp_stop_cpu_function, NULL, 1);
}
/* this function is triggered in time.c when a clock tick fires
@@ -1848,5 +1786,7 @@ struct smp_ops smp_ops = {
.smp_send_stop = voyager_smp_send_stop,
.smp_send_reschedule = voyager_smp_send_reschedule,
- .smp_call_function_mask = voyager_smp_call_function_mask,
+
+ .send_call_func_ipi = native_send_call_func_ipi,
+ .send_call_func_single_ipi = native_send_call_func_single_ipi,
};
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index c107641..9873716 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_HIGHMEM) += highmem_32.o
+obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
+obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
+mmiotrace-y := pf_in.o mmio-mod.o
+obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
+
ifeq ($(CONFIG_X86_32),y)
obj-$(CONFIG_NUMA) += discontig_32.o
else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d0f5fce..455f3fe 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,6 +10,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/mmiotrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/smp.h>
@@ -49,6 +50,16 @@
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
+static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+{
+#ifdef CONFIG_MMIOTRACE_HOOKS
+ if (unlikely(is_kmmio_active()))
+ if (kmmio_handler(regs, addr) == 1)
+ return -1;
+#endif
+ return 0;
+}
+
static inline int notify_page_fault(struct pt_regs *regs)
{
#ifdef CONFIG_KPROBES
@@ -598,6 +609,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (notify_page_fault(regs))
return;
+ if (unlikely(kmmio_fault(regs, address)))
+ return;
/*
* We fault-in kernel-space virtual memory on-demand. The
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 029e8cf..9689a51 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -1035,6 +1035,8 @@ void mark_rodata_ro(void)
unsigned long start = PFN_ALIGN(_text);
unsigned long size = PFN_ALIGN(_etext) - start;
+#ifndef CONFIG_DYNAMIC_FTRACE
+ /* Dynamic tracing modifies the kernel text section */
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel text: %luk\n",
size >> 10);
@@ -1047,6 +1049,8 @@ void mark_rodata_ro(void)
printk(KERN_INFO "Testing CPA: write protecting again\n");
set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
#endif
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
start += size;
size = (unsigned long)__end_rodata - start;
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 122bcef..306049e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -644,7 +644,7 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start,
unsigned long pud_phys;
pud_t *pud;
- next = start + PGDIR_SIZE;
+ next = (start + PGDIR_SIZE) & PGDIR_MASK;
if (next > end)
next = end;
@@ -763,6 +763,20 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
end_pfn = end>>PAGE_SHIFT;
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+ /* try to merge same page size and continuous */
+ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
+ unsigned long old_start;
+ if (mr[i].end != mr[i+1].start ||
+ mr[i].page_size_mask != mr[i+1].page_size_mask)
+ continue;
+ /* move it */
+ old_start = mr[i].start;
+ memmove(&mr[i], &mr[i+1],
+ (nr_range - 1 - i) * sizeof (struct map_range));
+ mr[i].start = old_start;
+ nr_range--;
+ }
+
for (i = 0; i < nr_range; i++)
printk(KERN_DEBUG " %010lx - %010lx page %s\n",
mr[i].start, mr[i].end,
@@ -977,6 +991,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+ unsigned long rodata_start =
+ ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /* Dynamic tracing modifies the kernel text section */
+ start = rodata_start;
+#endif
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -986,8 +1007,7 @@ void mark_rodata_ro(void)
* The rodata section (but not the kernel text!) should also be
* not-executable.
*/
- start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
- set_memory_nx(start, (end - start) >> PAGE_SHIFT);
+ set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
rodata_test();
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 115f13e..24c1d3c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/mmiotrace.h>
#include <asm/cacheflush.h>
#include <asm/e820.h>
@@ -122,10 +123,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
{
unsigned long pfn, offset, vaddr;
resource_size_t last_addr;
+ const resource_size_t unaligned_phys_addr = phys_addr;
+ const unsigned long unaligned_size = size;
struct vm_struct *area;
unsigned long new_prot_val;
pgprot_t prot;
int retval;
+ void __iomem *ret_addr;
/* Don't allow wraparound or zero size */
last_addr = phys_addr + size - 1;
@@ -233,7 +237,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
return NULL;
}
- return (void __iomem *) (vaddr + offset);
+ ret_addr = (void __iomem *) (vaddr + offset);
+ mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
+
+ return ret_addr;
}
/**
@@ -348,6 +355,8 @@ void iounmap(volatile void __iomem *addr)
addr = (volatile void __iomem *)
(PAGE_MASK & (unsigned long __force)addr);
+ mmiotrace_iounmap(addr);
+
/* Use the vm area unlocked, assuming the caller
ensures there isn't another iounmap for the same address
in parallel. Reuse of the virtual address is prevented by
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
new file mode 100644
index 0000000..93d82038
--- /dev/null
+++ b/arch/x86/mm/kmmio.c
@@ -0,0 +1,510 @@
+/* Support for MMIO probes.
+ * Benfit many code from kprobes
+ * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
+ * 2007 Alexander Eichner
+ * 2008 Pekka Paalanen <pq@iki.fi>
+ */
+
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <linux/errno.h>
+#include <asm/debugreg.h>
+#include <linux/mmiotrace.h>
+
+#define KMMIO_PAGE_HASH_BITS 4
+#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
+
+struct kmmio_fault_page {
+ struct list_head list;
+ struct kmmio_fault_page *release_next;
+ unsigned long page; /* location of the fault page */
+
+ /*
+ * Number of times this page has been registered as a part
+ * of a probe. If zero, page is disarmed and this may be freed.
+ * Used only by writers (RCU).
+ */
+ int count;
+};
+
+struct kmmio_delayed_release {
+ struct rcu_head rcu;
+ struct kmmio_fault_page *release_list;
+};
+
+struct kmmio_context {
+ struct kmmio_fault_page *fpage;
+ struct kmmio_probe *probe;
+ unsigned long saved_flags;
+ unsigned long addr;
+ int active;
+};
+
+static DEFINE_SPINLOCK(kmmio_lock);
+
+/* Protected by kmmio_lock */
+unsigned int kmmio_count;
+
+/* Read-protected by RCU, write-protected by kmmio_lock. */
+static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
+static LIST_HEAD(kmmio_probes);
+
+static struct list_head *kmmio_page_list(unsigned long page)
+{
+ return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+}
+
+/* Accessed per-cpu */
+static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
+
+/*
+ * this is basically a dynamic stabbing problem:
+ * Could use the existing prio tree code or
+ * Possible better implementations:
+ * The Interval Skip List: A Data Structure for Finding All Intervals That
+ * Overlap a Point (might be simple)
+ * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
+ */
+/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
+static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
+{
+ struct kmmio_probe *p;
+ list_for_each_entry_rcu(p, &kmmio_probes, list) {
+ if (addr >= p->addr && addr <= (p->addr + p->len))
+ return p;
+ }
+ return NULL;
+}
+
+/* You must be holding RCU read lock. */
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+{
+ struct list_head *head;
+ struct kmmio_fault_page *p;
+
+ page &= PAGE_MASK;
+ head = kmmio_page_list(page);
+ list_for_each_entry_rcu(p, head, list) {
+ if (p->page == page)
+ return p;
+ }
+ return NULL;
+}
+
+static void set_page_present(unsigned long addr, bool present,
+ unsigned int *pglevel)
+{
+ pteval_t pteval;
+ pmdval_t pmdval;
+ unsigned int level;
+ pmd_t *pmd;
+ pte_t *pte = lookup_address(addr, &level);
+
+ if (!pte) {
+ pr_err("kmmio: no pte for page 0x%08lx\n", addr);
+ return;
+ }
+
+ if (pglevel)
+ *pglevel = level;
+
+ switch (level) {
+ case PG_LEVEL_2M:
+ pmd = (pmd_t *)pte;
+ pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
+ if (present)
+ pmdval |= _PAGE_PRESENT;
+ set_pmd(pmd, __pmd(pmdval));
+ break;
+
+ case PG_LEVEL_4K:
+ pteval = pte_val(*pte) & ~_PAGE_PRESENT;
+ if (present)
+ pteval |= _PAGE_PRESENT;
+ set_pte_atomic(pte, __pte(pteval));
+ break;
+
+ default:
+ pr_err("kmmio: unexpected page level 0x%x.\n", level);
+ return;
+ }
+
+ __flush_tlb_one(addr);
+}
+
+/** Mark the given page as not present. Access to it will trigger a fault. */
+static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+{
+ set_page_present(page & PAGE_MASK, false, pglevel);
+}
+
+/** Mark the given page as present. */
+static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
+{
+ set_page_present(page & PAGE_MASK, true, pglevel);
+}
+
+/*
+ * This is being called from do_page_fault().
+ *
+ * We may be in an interrupt or a critical section. Also prefecthing may
+ * trigger a page fault. We may be in the middle of process switch.
+ * We cannot take any locks, because we could be executing especially
+ * within a kmmio critical section.
+ *
+ * Local interrupts are disabled, so preemption cannot happen.
+ * Do not enable interrupts, do not sleep, and watch out for other CPUs.
+ */
+/*
+ * Interrupts are disabled on entry as trap3 is an interrupt gate
+ * and they remain disabled thorough out this function.
+ */
+int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+{
+ struct kmmio_context *ctx;
+ struct kmmio_fault_page *faultpage;
+ int ret = 0; /* default to fault not handled */
+
+ /*
+ * Preemption is now disabled to prevent process switch during
+ * single stepping. We can only handle one active kmmio trace
+ * per cpu, so ensure that we finish it before something else
+ * gets to run. We also hold the RCU read lock over single
+ * stepping to avoid looking up the probe and kmmio_fault_page
+ * again.
+ */
+ preempt_disable();
+ rcu_read_lock();
+
+ faultpage = get_kmmio_fault_page(addr);
+ if (!faultpage) {
+ /*
+ * Either this page fault is not caused by kmmio, or
+ * another CPU just pulled the kmmio probe from under
+ * our feet. The latter case should not be possible.
+ */
+ goto no_kmmio;
+ }
+
+ ctx = &get_cpu_var(kmmio_ctx);
+ if (ctx->active) {
+ disarm_kmmio_fault_page(faultpage->page, NULL);
+ if (addr == ctx->addr) {
+ /*
+ * On SMP we sometimes get recursive probe hits on the
+ * same address. Context is already saved, fall out.
+ */
+ pr_debug("kmmio: duplicate probe hit on CPU %d, for "
+ "address 0x%08lx.\n",
+ smp_processor_id(), addr);
+ ret = 1;
+ goto no_kmmio_ctx;
+ }
+ /*
+ * Prevent overwriting already in-flight context.
+ * This should not happen, let's hope disarming at least
+ * prevents a panic.
+ */
+ pr_emerg("kmmio: recursive probe hit on CPU %d, "
+ "for address 0x%08lx. Ignoring.\n",
+ smp_processor_id(), addr);
+ pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
+ ctx->addr);
+ goto no_kmmio_ctx;
+ }
+ ctx->active++;
+
+ ctx->fpage = faultpage;
+ ctx->probe = get_kmmio_probe(addr);
+ ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
+ ctx->addr = addr;
+
+ if (ctx->probe && ctx->probe->pre_handler)
+ ctx->probe->pre_handler(ctx->probe, regs, addr);
+
+ /*
+ * Enable single-stepping and disable interrupts for the faulting
+ * context. Local interrupts must not get enabled during stepping.
+ */
+ regs->flags |= X86_EFLAGS_TF;
+ regs->flags &= ~X86_EFLAGS_IF;
+
+ /* Now we set present bit in PTE and single step. */
+ disarm_kmmio_fault_page(ctx->fpage->page, NULL);
+
+ /*
+ * If another cpu accesses the same page while we are stepping,
+ * the access will not be caught. It will simply succeed and the
+ * only downside is we lose the event. If this becomes a problem,
+ * the user should drop to single cpu before tracing.
+ */
+
+ put_cpu_var(kmmio_ctx);
+ return 1; /* fault handled */
+
+no_kmmio_ctx:
+ put_cpu_var(kmmio_ctx);
+no_kmmio:
+ rcu_read_unlock();
+ preempt_enable_no_resched();
+ return ret;
+}
+
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate
+ * and they remain disabled thorough out this function.
+ * This must always get called as the pair to kmmio_handler().
+ */
+static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
+{
+ int ret = 0;
+ struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
+
+ if (!ctx->active) {
+ pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+ smp_processor_id());
+ goto out;
+ }
+
+ if (ctx->probe && ctx->probe->post_handler)
+ ctx->probe->post_handler(ctx->probe, condition, regs);
+
+ arm_kmmio_fault_page(ctx->fpage->page, NULL);
+
+ regs->flags &= ~X86_EFLAGS_TF;
+ regs->flags |= ctx->saved_flags;
+
+ /* These were acquired in kmmio_handler(). */
+ ctx->active--;
+ BUG_ON(ctx->active);
+ rcu_read_unlock();
+ preempt_enable_no_resched();
+
+ /*
+ * if somebody else is singlestepping across a probe point, flags
+ * will have TF set, in which case, continue the remaining processing
+ * of do_debug, as if this is not a probe hit.
+ */
+ if (!(regs->flags & X86_EFLAGS_TF))
+ ret = 1;
+out:
+ put_cpu_var(kmmio_ctx);
+ return ret;
+}
+
+/* You must be holding kmmio_lock. */
+static int add_kmmio_fault_page(unsigned long page)
+{
+ struct kmmio_fault_page *f;
+
+ page &= PAGE_MASK;
+ f = get_kmmio_fault_page(page);
+ if (f) {
+ if (!f->count)
+ arm_kmmio_fault_page(f->page, NULL);
+ f->count++;
+ return 0;
+ }
+
+ f = kmalloc(sizeof(*f), GFP_ATOMIC);
+ if (!f)
+ return -1;
+
+ f->count = 1;
+ f->page = page;
+ list_add_rcu(&f->list, kmmio_page_list(f->page));
+
+ arm_kmmio_fault_page(f->page, NULL);
+
+ return 0;
+}
+
+/* You must be holding kmmio_lock. */
+static void release_kmmio_fault_page(unsigned long page,
+ struct kmmio_fault_page **release_list)
+{
+ struct kmmio_fault_page *f;
+
+ page &= PAGE_MASK;
+ f = get_kmmio_fault_page(page);
+ if (!f)
+ return;
+
+ f->count--;
+ BUG_ON(f->count < 0);
+ if (!f->count) {
+ disarm_kmmio_fault_page(f->page, NULL);
+ f->release_next = *release_list;
+ *release_list = f;
+ }
+}
+
+/*
+ * With page-unaligned ioremaps, one or two armed pages may contain
+ * addresses from outside the intended mapping. Events for these addresses
+ * are currently silently dropped. The events may result only from programming
+ * mistakes by accessing addresses before the beginning or past the end of a
+ * mapping.
+ */
+int register_kmmio_probe(struct kmmio_probe *p)
+{
+ unsigned long flags;
+ int ret = 0;
+ unsigned long size = 0;
+ const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+
+ spin_lock_irqsave(&kmmio_lock, flags);
+ if (get_kmmio_probe(p->addr)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ kmmio_count++;
+ list_add_rcu(&p->list, &kmmio_probes);
+ while (size < size_lim) {
+ if (add_kmmio_fault_page(p->addr + size))
+ pr_err("kmmio: Unable to set page fault.\n");
+ size += PAGE_SIZE;
+ }
+out:
+ spin_unlock_irqrestore(&kmmio_lock, flags);
+ /*
+ * XXX: What should I do here?
+ * Here was a call to global_flush_tlb(), but it does not exist
+ * anymore. It seems it's not needed after all.
+ */
+ return ret;
+}
+EXPORT_SYMBOL(register_kmmio_probe);
+
+static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
+{
+ struct kmmio_delayed_release *dr = container_of(
+ head,
+ struct kmmio_delayed_release,
+ rcu);
+ struct kmmio_fault_page *p = dr->release_list;
+ while (p) {
+ struct kmmio_fault_page *next = p->release_next;
+ BUG_ON(p->count);
+ kfree(p);
+ p = next;
+ }
+ kfree(dr);
+}
+
+static void remove_kmmio_fault_pages(struct rcu_head *head)
+{
+ struct kmmio_delayed_release *dr = container_of(
+ head,
+ struct kmmio_delayed_release,
+ rcu);
+ struct kmmio_fault_page *p = dr->release_list;
+ struct kmmio_fault_page **prevp = &dr->release_list;
+ unsigned long flags;
+ spin_lock_irqsave(&kmmio_lock, flags);
+ while (p) {
+ if (!p->count)
+ list_del_rcu(&p->list);
+ else
+ *prevp = p->release_next;
+ prevp = &p->release_next;
+ p = p->release_next;
+ }
+ spin_unlock_irqrestore(&kmmio_lock, flags);
+ /* This is the real RCU destroy call. */
+ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
+}
+
+/*
+ * Remove a kmmio probe. You have to synchronize_rcu() before you can be
+ * sure that the callbacks will not be called anymore. Only after that
+ * you may actually release your struct kmmio_probe.
+ *
+ * Unregistering a kmmio fault page has three steps:
+ * 1. release_kmmio_fault_page()
+ * Disarm the page, wait a grace period to let all faults finish.
+ * 2. remove_kmmio_fault_pages()
+ * Remove the pages from kmmio_page_table.
+ * 3. rcu_free_kmmio_fault_pages()
+ * Actally free the kmmio_fault_page structs as with RCU.
+ */
+void unregister_kmmio_probe(struct kmmio_probe *p)
+{
+ unsigned long flags;
+ unsigned long size = 0;
+ const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+ struct kmmio_fault_page *release_list = NULL;
+ struct kmmio_delayed_release *drelease;
+
+ spin_lock_irqsave(&kmmio_lock, flags);
+ while (size < size_lim) {
+ release_kmmio_fault_page(p->addr + size, &release_list);
+ size += PAGE_SIZE;
+ }
+ list_del_rcu(&p->list);
+ kmmio_count--;
+ spin_unlock_irqrestore(&kmmio_lock, flags);
+
+ drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
+ if (!drelease) {
+ pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
+ return;
+ }
+ drelease->release_list = release_list;
+
+ /*
+ * This is not really RCU here. We have just disarmed a set of
+ * pages so that they cannot trigger page faults anymore. However,
+ * we cannot remove the pages from kmmio_page_table,
+ * because a probe hit might be in flight on another CPU. The
+ * pages are collected into a list, and they will be removed from
+ * kmmio_page_table when it is certain that no probe hit related to
+ * these pages can be in flight. RCU grace period sounds like a
+ * good choice.
+ *
+ * If we removed the pages too early, kmmio page fault handler might
+ * not find the respective kmmio_fault_page and determine it's not
+ * a kmmio fault, when it actually is. This would lead to madness.
+ */
+ call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
+}
+EXPORT_SYMBOL(unregister_kmmio_probe);
+
+static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
+ void *args)
+{
+ struct die_args *arg = args;
+
+ if (val == DIE_DEBUG && (arg->err & DR_STEP))
+ if (post_kmmio_handler(arg->err, arg->regs) == 1)
+ return NOTIFY_STOP;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nb_die = {
+ .notifier_call = kmmio_die_notifier
+};
+
+static int __init init_kmmio(void)
+{
+ int i;
+ for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
+ INIT_LIST_HEAD(&kmmio_page_table[i]);
+ return register_die_notifier(&nb_die);
+}
+fs_initcall(init_kmmio); /* should be before device_initcall() */
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
new file mode 100644
index 0000000..e7397e1
--- /dev/null
+++ b/arch/x86/mm/mmio-mod.c
@@ -0,0 +1,515 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2005
+ * Jeff Muizelaar, 2006, 2007
+ * Pekka Paalanen, 2008 <pq@iki.fi>
+ *
+ * Derived from the read-mod example from relay-examples by Tom Zanussi.
+ */
+#define DEBUG 1
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/version.h>
+#include <linux/kallsyms.h>
+#include <asm/pgtable.h>
+#include <linux/mmiotrace.h>
+#include <asm/e820.h> /* for ISA_START_ADDRESS */
+#include <asm/atomic.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+
+#include "pf_in.h"
+
+#define NAME "mmiotrace: "
+
+struct trap_reason {
+ unsigned long addr;
+ unsigned long ip;
+ enum reason_type type;
+ int active_traces;
+};
+
+struct remap_trace {
+ struct list_head list;
+ struct kmmio_probe probe;
+ resource_size_t phys;
+ unsigned long id;
+};
+
+/* Accessed per-cpu. */
+static DEFINE_PER_CPU(struct trap_reason, pf_reason);
+static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
+
+#if 0 /* XXX: no way gather this info anymore */
+/* Access to this is not per-cpu. */
+static DEFINE_PER_CPU(atomic_t, dropped);
+#endif
+
+static struct dentry *marker_file;
+
+static DEFINE_MUTEX(mmiotrace_mutex);
+static DEFINE_SPINLOCK(trace_lock);
+static atomic_t mmiotrace_enabled;
+static LIST_HEAD(trace_list); /* struct remap_trace */
+
+/*
+ * Locking in this file:
+ * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections.
+ * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex
+ * and trace_lock.
+ * - Routines depending on is_enabled() must take trace_lock.
+ * - trace_list users must hold trace_lock.
+ * - is_enabled() guarantees that mmio_trace_record is allowed.
+ * - pre/post callbacks assume the effect of is_enabled() being true.
+ */
+
+/* module parameters */
+static unsigned long filter_offset;
+static int nommiotrace;
+static int trace_pc;
+
+module_param(filter_offset, ulong, 0);
+module_param(nommiotrace, bool, 0);
+module_param(trace_pc, bool, 0);
+
+MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
+MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
+MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions.");
+
+static bool is_enabled(void)
+{
+ return atomic_read(&mmiotrace_enabled);
+}
+
+#if 0 /* XXX: needs rewrite */
+/*
+ * Write callback for the debugfs entry:
+ * Read a marker and write it to the mmio trace log
+ */
+static ssize_t write_marker(struct file *file, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ char *event = NULL;
+ struct mm_io_header *headp;
+ ssize_t len = (count > 65535) ? 65535 : count;
+
+ event = kzalloc(sizeof(*headp) + len, GFP_KERNEL);
+ if (!event)
+ return -ENOMEM;
+
+ headp = (struct mm_io_header *)event;
+ headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT);
+ headp->data_len = len;
+
+ if (copy_from_user(event + sizeof(*headp), buffer, len)) {
+ kfree(event);
+ return -EFAULT;
+ }
+
+ spin_lock_irq(&trace_lock);
+#if 0 /* XXX: convert this to use tracing */
+ if (is_enabled())
+ relay_write(chan, event, sizeof(*headp) + len);
+ else
+#endif
+ len = -EINVAL;
+ spin_unlock_irq(&trace_lock);
+ kfree(event);
+ return len;
+}
+#endif
+
+static void print_pte(unsigned long address)
+{
+ unsigned int level;
+ pte_t *pte = lookup_address(address, &level);
+
+ if (!pte) {
+ pr_err(NAME "Error in %s: no pte for page 0x%08lx\n",
+ __func__, address);
+ return;
+ }
+
+ if (level == PG_LEVEL_2M) {
+ pr_emerg(NAME "4MB pages are not currently supported: "
+ "0x%08lx\n", address);
+ BUG();
+ }
+ pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address,
+ (unsigned long long)pte_val(*pte),
+ (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
+}
+
+/*
+ * For some reason the pre/post pairs have been called in an
+ * unmatched order. Report and die.
+ */
+static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
+{
+ const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+ pr_emerg(NAME "unexpected fault for address: 0x%08lx, "
+ "last fault for address: 0x%08lx\n",
+ addr, my_reason->addr);
+ print_pte(addr);
+ print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip);
+ print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip);
+#ifdef __i386__
+ pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+ regs->ax, regs->bx, regs->cx, regs->dx);
+ pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
+ regs->si, regs->di, regs->bp, regs->sp);
+#else
+ pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
+ regs->ax, regs->cx, regs->dx);
+ pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
+ regs->si, regs->di, regs->bp, regs->sp);
+#endif
+ put_cpu_var(pf_reason);
+ BUG();
+}
+
+static void pre(struct kmmio_probe *p, struct pt_regs *regs,
+ unsigned long addr)
+{
+ struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+ struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
+ const unsigned long instptr = instruction_pointer(regs);
+ const enum reason_type type = get_ins_type(instptr);
+ struct remap_trace *trace = p->private;
+
+ /* it doesn't make sense to have more than one active trace per cpu */
+ if (my_reason->active_traces)
+ die_kmmio_nesting_error(regs, addr);
+ else
+ my_reason->active_traces++;
+
+ my_reason->type = type;
+ my_reason->addr = addr;
+ my_reason->ip = instptr;
+
+ my_trace->phys = addr - trace->probe.addr + trace->phys;
+ my_trace->map_id = trace->id;
+
+ /*
+ * Only record the program counter when requested.
+ * It may taint clean-room reverse engineering.
+ */
+ if (trace_pc)
+ my_trace->pc = instptr;
+ else
+ my_trace->pc = 0;
+
+ /*
+ * XXX: the timestamp recorded will be *after* the tracing has been
+ * done, not at the time we hit the instruction. SMP implications
+ * on event ordering?
+ */
+
+ switch (type) {
+ case REG_READ:
+ my_trace->opcode = MMIO_READ;
+ my_trace->width = get_ins_mem_width(instptr);
+ break;
+ case REG_WRITE:
+ my_trace->opcode = MMIO_WRITE;
+ my_trace->width = get_ins_mem_width(instptr);
+ my_trace->value = get_ins_reg_val(instptr, regs);
+ break;
+ case IMM_WRITE:
+ my_trace->opcode = MMIO_WRITE;
+ my_trace->width = get_ins_mem_width(instptr);
+ my_trace->value = get_ins_imm_val(instptr);
+ break;
+ default:
+ {
+ unsigned char *ip = (unsigned char *)instptr;
+ my_trace->opcode = MMIO_UNKNOWN_OP;
+ my_trace->width = 0;
+ my_trace->value = (*ip) << 16 | *(ip + 1) << 8 |
+ *(ip + 2);
+ }
+ }
+ put_cpu_var(cpu_trace);
+ put_cpu_var(pf_reason);
+}
+
+static void post(struct kmmio_probe *p, unsigned long condition,
+ struct pt_regs *regs)
+{
+ struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+ struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
+
+ /* this should always return the active_trace count to 0 */
+ my_reason->active_traces--;
+ if (my_reason->active_traces) {
+ pr_emerg(NAME "unexpected post handler");
+ BUG();
+ }
+
+ switch (my_reason->type) {
+ case REG_READ:
+ my_trace->value = get_ins_reg_val(my_reason->ip, regs);
+ break;
+ default:
+ break;
+ }
+
+ mmio_trace_rw(my_trace);
+ put_cpu_var(cpu_trace);
+ put_cpu_var(pf_reason);
+}
+
+static void ioremap_trace_core(resource_size_t offset, unsigned long size,
+ void __iomem *addr)
+{
+ static atomic_t next_id;
+ struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
+ /* These are page-unaligned. */
+ struct mmiotrace_map map = {
+ .phys = offset,
+ .virt = (unsigned long)addr,
+ .len = size,
+ .opcode = MMIO_PROBE
+ };
+
+ if (!trace) {
+ pr_err(NAME "kmalloc failed in ioremap\n");
+ return;
+ }
+
+ *trace = (struct remap_trace) {
+ .probe = {
+ .addr = (unsigned long)addr,
+ .len = size,
+ .pre_handler = pre,
+ .post_handler = post,
+ .private = trace
+ },
+ .phys = offset,
+ .id = atomic_inc_return(&next_id)
+ };
+ map.map_id = trace->id;
+
+ spin_lock_irq(&trace_lock);
+ if (!is_enabled())
+ goto not_enabled;
+
+ mmio_trace_mapping(&map);
+ list_add_tail(&trace->list, &trace_list);
+ if (!nommiotrace)
+ register_kmmio_probe(&trace->probe);
+
+not_enabled:
+ spin_unlock_irq(&trace_lock);
+}
+
+void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
+ void __iomem *addr)
+{
+ if (!is_enabled()) /* recheck and proper locking in *_core() */
+ return;
+
+ pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n",
+ (unsigned long long)offset, size, addr);
+ if ((filter_offset) && (offset != filter_offset))
+ return;
+ ioremap_trace_core(offset, size, addr);
+}
+
+static void iounmap_trace_core(volatile void __iomem *addr)
+{
+ struct mmiotrace_map map = {
+ .phys = 0,
+ .virt = (unsigned long)addr,
+ .len = 0,
+ .opcode = MMIO_UNPROBE
+ };
+ struct remap_trace *trace;
+ struct remap_trace *tmp;
+ struct remap_trace *found_trace = NULL;
+
+ pr_debug(NAME "Unmapping %p.\n", addr);
+
+ spin_lock_irq(&trace_lock);
+ if (!is_enabled())
+ goto not_enabled;
+
+ list_for_each_entry_safe(trace, tmp, &trace_list, list) {
+ if ((unsigned long)addr == trace->probe.addr) {
+ if (!nommiotrace)
+ unregister_kmmio_probe(&trace->probe);
+ list_del(&trace->list);
+ found_trace = trace;
+ break;
+ }
+ }
+ map.map_id = (found_trace) ? found_trace->id : -1;
+ mmio_trace_mapping(&map);
+
+not_enabled:
+ spin_unlock_irq(&trace_lock);
+ if (found_trace) {
+ synchronize_rcu(); /* unregister_kmmio_probe() requirement */
+ kfree(found_trace);
+ }
+}
+
+void mmiotrace_iounmap(volatile void __iomem *addr)
+{
+ might_sleep();
+ if (is_enabled()) /* recheck and proper locking in *_core() */
+ iounmap_trace_core(addr);
+}
+
+static void clear_trace_list(void)
+{
+ struct remap_trace *trace;
+ struct remap_trace *tmp;
+
+ /*
+ * No locking required, because the caller ensures we are in a
+ * critical section via mutex, and is_enabled() is false,
+ * i.e. nothing can traverse or modify this list.
+ * Caller also ensures is_enabled() cannot change.
+ */
+ list_for_each_entry(trace, &trace_list, list) {
+ pr_notice(NAME "purging non-iounmapped "
+ "trace @0x%08lx, size 0x%lx.\n",
+ trace->probe.addr, trace->probe.len);
+ if (!nommiotrace)
+ unregister_kmmio_probe(&trace->probe);
+ }
+ synchronize_rcu(); /* unregister_kmmio_probe() requirement */
+
+ list_for_each_entry_safe(trace, tmp, &trace_list, list) {
+ list_del(&trace->list);
+ kfree(trace);
+ }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static cpumask_t downed_cpus;
+
+static void enter_uniprocessor(void)
+{
+ int cpu;
+ int err;
+
+ get_online_cpus();
+ downed_cpus = cpu_online_map;
+ cpu_clear(first_cpu(cpu_online_map), downed_cpus);
+ if (num_online_cpus() > 1)
+ pr_notice(NAME "Disabling non-boot CPUs...\n");
+ put_online_cpus();
+
+ for_each_cpu_mask(cpu, downed_cpus) {
+ err = cpu_down(cpu);
+ if (!err)
+ pr_info(NAME "CPU%d is down.\n", cpu);
+ else
+ pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
+ }
+ if (num_online_cpus() > 1)
+ pr_warning(NAME "multiple CPUs still online, "
+ "may miss events.\n");
+}
+
+static void leave_uniprocessor(void)
+{
+ int cpu;
+ int err;
+
+ if (cpus_weight(downed_cpus) == 0)
+ return;
+ pr_notice(NAME "Re-enabling CPUs...\n");
+ for_each_cpu_mask(cpu, downed_cpus) {
+ err = cpu_up(cpu);
+ if (!err)
+ pr_info(NAME "enabled CPU%d.\n", cpu);
+ else
+ pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err);
+ }
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+static void enter_uniprocessor(void)
+{
+ if (num_online_cpus() > 1)
+ pr_warning(NAME "multiple CPUs are online, may miss events. "
+ "Suggest booting with maxcpus=1 kernel argument.\n");
+}
+
+static void leave_uniprocessor(void)
+{
+}
+#endif
+
+#if 0 /* XXX: out of order */
+static struct file_operations fops_marker = {
+ .owner = THIS_MODULE,
+ .write = write_marker
+};
+#endif
+
+void enable_mmiotrace(void)
+{
+ mutex_lock(&mmiotrace_mutex);
+ if (is_enabled())
+ goto out;
+
+#if 0 /* XXX: tracing does not support text entries */
+ marker_file = debugfs_create_file("marker", 0660, dir, NULL,
+ &fops_marker);
+ if (!marker_file)
+ pr_err(NAME "marker file creation failed.\n");
+#endif
+
+ if (nommiotrace)
+ pr_info(NAME "MMIO tracing disabled.\n");
+ enter_uniprocessor();
+ spin_lock_irq(&trace_lock);
+ atomic_inc(&mmiotrace_enabled);
+ spin_unlock_irq(&trace_lock);
+ pr_info(NAME "enabled.\n");
+out:
+ mutex_unlock(&mmiotrace_mutex);
+}
+
+void disable_mmiotrace(void)
+{
+ mutex_lock(&mmiotrace_mutex);
+ if (!is_enabled())
+ goto out;
+
+ spin_lock_irq(&trace_lock);
+ atomic_dec(&mmiotrace_enabled);
+ BUG_ON(is_enabled());
+ spin_unlock_irq(&trace_lock);
+
+ clear_trace_list(); /* guarantees: no more kmmio callbacks */
+ leave_uniprocessor();
+ if (marker_file) {
+ debugfs_remove(marker_file);
+ marker_file = NULL;
+ }
+
+ pr_info(NAME "disabled.\n");
+out:
+ mutex_unlock(&mmiotrace_mutex);
+}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 0389cb8..65c6e46 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -141,7 +141,7 @@ static void cpa_flush_all(unsigned long cache)
{
BUG_ON(irqs_disabled());
- on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
+ on_each_cpu(__cpa_flush_all, (void *) cache, 1);
}
static void __cpa_flush_range(void *arg)
@@ -162,7 +162,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
BUG_ON(irqs_disabled());
WARN_ON(PAGE_ALIGN(start) != start);
- on_each_cpu(__cpa_flush_range, NULL, 1, 1);
+ on_each_cpu(__cpa_flush_range, NULL, 1);
if (!cache)
return;
@@ -262,6 +262,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
return pte_offset_kernel(pmd, address);
}
+EXPORT_SYMBOL_GPL(lookup_address);
/*
* Set the new pmd in all the pgds we know about:
@@ -658,11 +659,11 @@ static int cpa_process_alias(struct cpa_data *cpa)
struct cpa_data alias_cpa;
int ret = 0;
- if (cpa->pfn > max_pfn_mapped)
+ if (cpa->pfn >= max_pfn_mapped)
return 0;
#ifdef CONFIG_X86_64
- if (cpa->pfn > max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+ if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
return 0;
#endif
/*
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 749766c3..d458507 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -449,8 +449,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
if (retval < 0)
return 0;
- if (((pfn <= max_low_pfn_mapped) ||
- (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
+ if (((pfn < max_low_pfn_mapped) ||
+ (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
free_memtype(offset, offset + size);
printk(KERN_INFO
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
new file mode 100644
index 0000000..efa1911
--- /dev/null
+++ b/arch/x86/mm/pf_in.c
@@ -0,0 +1,489 @@
+/*
+ * Fault Injection Test harness (FI)
+ * Copyright (C) Intel Crop.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ *
+ */
+
+/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
+ * Copyright by Intel Crop., 2002
+ * Louis Zhuang (louis.zhuang@intel.com)
+ *
+ * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007
+ */
+
+#include <linux/module.h>
+#include <linux/ptrace.h> /* struct pt_regs */
+#include "pf_in.h"
+
+#ifdef __i386__
+/* IA32 Manual 3, 2-1 */
+static unsigned char prefix_codes[] = {
+ 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64,
+ 0x65, 0x2E, 0x3E, 0x66, 0x67
+};
+/* IA32 Manual 3, 3-432*/
+static unsigned int reg_rop[] = {
+ 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int imm_wop[] = { 0xC6, 0xC7 };
+/* IA32 Manual 3, 3-432*/
+static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
+static unsigned int rw32[] = {
+ 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
+static unsigned int mw16[] = { 0xB70F, 0xBF0F };
+static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
+static unsigned int mw64[] = {};
+#else /* not __i386__ */
+static unsigned char prefix_codes[] = {
+ 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36,
+ 0xF0, 0xF3, 0xF2,
+ /* REX Prefixes */
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f
+};
+/* AMD64 Manual 3, Appendix A*/
+static unsigned int reg_rop[] = {
+ 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int imm_wop[] = { 0xC6, 0xC7 };
+static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
+static unsigned int rw32[] = {
+ 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+};
+/* 8 bit only */
+static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
+/* 16 bit only */
+static unsigned int mw16[] = { 0xB70F, 0xBF0F };
+/* 16 or 32 bit */
+static unsigned int mw32[] = { 0xC7 };
+/* 16, 32 or 64 bit */
+static unsigned int mw64[] = { 0x89, 0x8B };
+#endif /* not __i386__ */
+
+static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged,
+ int *rexr)
+{
+ int i;
+ unsigned char *p = addr;
+ *shorted = 0;
+ *enlarged = 0;
+ *rexr = 0;
+
+restart:
+ for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) {
+ if (*p == prefix_codes[i]) {
+ if (*p == 0x66)
+ *shorted = 1;
+#ifdef __amd64__
+ if ((*p & 0xf8) == 0x48)
+ *enlarged = 1;
+ if ((*p & 0xf4) == 0x44)
+ *rexr = 1;
+#endif
+ p++;
+ goto restart;
+ }
+ }
+
+ return (p - addr);
+}
+
+static int get_opcode(unsigned char *addr, unsigned int *opcode)
+{
+ int len;
+
+ if (*addr == 0x0F) {
+ /* 0x0F is extension instruction */
+ *opcode = *(unsigned short *)addr;
+ len = 2;
+ } else {
+ *opcode = *addr;
+ len = 1;
+ }
+
+ return len;
+}
+
+#define CHECK_OP_TYPE(opcode, array, type) \
+ for (i = 0; i < ARRAY_SIZE(array); i++) { \
+ if (array[i] == opcode) { \
+ rv = type; \
+ goto exit; \
+ } \
+ }
+
+enum reason_type get_ins_type(unsigned long ins_addr)
+{
+ unsigned int opcode;
+ unsigned char *p;
+ int shorted, enlarged, rexr;
+ int i;
+ enum reason_type rv = OTHERS;
+
+ p = (unsigned char *)ins_addr;
+ p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += get_opcode(p, &opcode);
+
+ CHECK_OP_TYPE(opcode, reg_rop, REG_READ);
+ CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE);
+ CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE);
+
+exit:
+ return rv;
+}
+#undef CHECK_OP_TYPE
+
+static unsigned int get_ins_reg_width(unsigned long ins_addr)
+{
+ unsigned int opcode;
+ unsigned char *p;
+ int i, shorted, enlarged, rexr;
+
+ p = (unsigned char *)ins_addr;
+ p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += get_opcode(p, &opcode);
+
+ for (i = 0; i < ARRAY_SIZE(rw8); i++)
+ if (rw8[i] == opcode)
+ return 1;
+
+ for (i = 0; i < ARRAY_SIZE(rw32); i++)
+ if (rw32[i] == opcode)
+ return (shorted ? 2 : (enlarged ? 8 : 4));
+
+ printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
+ return 0;
+}
+
+unsigned int get_ins_mem_width(unsigned long ins_addr)
+{
+ unsigned int opcode;
+ unsigned char *p;
+ int i, shorted, enlarged, rexr;
+
+ p = (unsigned char *)ins_addr;
+ p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += get_opcode(p, &opcode);
+
+ for (i = 0; i < ARRAY_SIZE(mw8); i++)
+ if (mw8[i] == opcode)
+ return 1;
+
+ for (i = 0; i < ARRAY_SIZE(mw16); i++)
+ if (mw16[i] == opcode)
+ return 2;
+
+ for (i = 0; i < ARRAY_SIZE(mw32); i++)
+ if (mw32[i] == opcode)
+ return shorted ? 2 : 4;
+
+ for (i = 0; i < ARRAY_SIZE(mw64); i++)
+ if (mw64[i] == opcode)
+ return shorted ? 2 : (enlarged ? 8 : 4);
+
+ printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode);
+ return 0;
+}
+
+/*
+ * Define register ident in mod/rm byte.
+ * Note: these are NOT the same as in ptrace-abi.h.
+ */
+enum {
+ arg_AL = 0,
+ arg_CL = 1,
+ arg_DL = 2,
+ arg_BL = 3,
+ arg_AH = 4,
+ arg_CH = 5,
+ arg_DH = 6,
+ arg_BH = 7,
+
+ arg_AX = 0,
+ arg_CX = 1,
+ arg_DX = 2,
+ arg_BX = 3,
+ arg_SP = 4,
+ arg_BP = 5,
+ arg_SI = 6,
+ arg_DI = 7,
+#ifdef __amd64__
+ arg_R8 = 8,
+ arg_R9 = 9,
+ arg_R10 = 10,
+ arg_R11 = 11,
+ arg_R12 = 12,
+ arg_R13 = 13,
+ arg_R14 = 14,
+ arg_R15 = 15
+#endif
+};
+
+static unsigned char *get_reg_w8(int no, struct pt_regs *regs)
+{
+ unsigned char *rv = NULL;
+
+ switch (no) {
+ case arg_AL:
+ rv = (unsigned char *)&regs->ax;
+ break;
+ case arg_BL:
+ rv = (unsigned char *)&regs->bx;
+ break;
+ case arg_CL:
+ rv = (unsigned char *)&regs->cx;
+ break;
+ case arg_DL:
+ rv = (unsigned char *)&regs->dx;
+ break;
+ case arg_AH:
+ rv = 1 + (unsigned char *)&regs->ax;
+ break;
+ case arg_BH:
+ rv = 1 + (unsigned char *)&regs->bx;
+ break;
+ case arg_CH:
+ rv = 1 + (unsigned char *)&regs->cx;
+ break;
+ case arg_DH:
+ rv = 1 + (unsigned char *)&regs->dx;
+ break;
+#ifdef __amd64__
+ case arg_R8:
+ rv = (unsigned char *)&regs->r8;
+ break;
+ case arg_R9:
+ rv = (unsigned char *)&regs->r9;
+ break;
+ case arg_R10:
+ rv = (unsigned char *)&regs->r10;
+ break;
+ case arg_R11:
+ rv = (unsigned char *)&regs->r11;
+ break;
+ case arg_R12:
+ rv = (unsigned char *)&regs->r12;
+ break;
+ case arg_R13:
+ rv = (unsigned char *)&regs->r13;
+ break;
+ case arg_R14:
+ rv = (unsigned char *)&regs->r14;
+ break;
+ case arg_R15:
+ rv = (unsigned char *)&regs->r15;
+ break;
+#endif
+ default:
+ printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+ break;
+ }
+ return rv;
+}
+
+static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
+{
+ unsigned long *rv = NULL;
+
+ switch (no) {
+ case arg_AX:
+ rv = &regs->ax;
+ break;
+ case arg_BX:
+ rv = &regs->bx;
+ break;
+ case arg_CX:
+ rv = &regs->cx;
+ break;
+ case arg_DX:
+ rv = &regs->dx;
+ break;
+ case arg_SP:
+ rv = &regs->sp;
+ break;
+ case arg_BP:
+ rv = &regs->bp;
+ break;
+ case arg_SI:
+ rv = &regs->si;
+ break;
+ case arg_DI:
+ rv = &regs->di;
+ break;
+#ifdef __amd64__
+ case arg_R8:
+ rv = &regs->r8;
+ break;
+ case arg_R9:
+ rv = &regs->r9;
+ break;
+ case arg_R10:
+ rv = &regs->r10;
+ break;
+ case arg_R11:
+ rv = &regs->r11;
+ break;
+ case arg_R12:
+ rv = &regs->r12;
+ break;
+ case arg_R13:
+ rv = &regs->r13;
+ break;
+ case arg_R14:
+ rv = &regs->r14;
+ break;
+ case arg_R15:
+ rv = &regs->r15;
+ break;
+#endif
+ default:
+ printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no);
+ }
+
+ return rv;
+}
+
+unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
+{
+ unsigned int opcode;
+ unsigned char mod_rm;
+ int reg;
+ unsigned char *p;
+ int i, shorted, enlarged, rexr;
+ unsigned long rv;
+
+ p = (unsigned char *)ins_addr;
+ p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += get_opcode(p, &opcode);
+ for (i = 0; i < ARRAY_SIZE(reg_rop); i++)
+ if (reg_rop[i] == opcode) {
+ rv = REG_READ;
+ goto do_work;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(reg_wop); i++)
+ if (reg_wop[i] == opcode) {
+ rv = REG_WRITE;
+ goto do_work;
+ }
+
+ printk(KERN_ERR "mmiotrace: Not a register instruction, opcode "
+ "0x%02x\n", opcode);
+ goto err;
+
+do_work:
+ mod_rm = *p;
+ reg = ((mod_rm >> 3) & 0x7) | (rexr << 3);
+ switch (get_ins_reg_width(ins_addr)) {
+ case 1:
+ return *get_reg_w8(reg, regs);
+
+ case 2:
+ return *(unsigned short *)get_reg_w32(reg, regs);
+
+ case 4:
+ return *(unsigned int *)get_reg_w32(reg, regs);
+
+#ifdef __amd64__
+ case 8:
+ return *(unsigned long *)get_reg_w32(reg, regs);
+#endif
+
+ default:
+ printk(KERN_ERR "mmiotrace: Error width# %d\n", reg);
+ }
+
+err:
+ return 0;
+}
+
+unsigned long get_ins_imm_val(unsigned long ins_addr)
+{
+ unsigned int opcode;
+ unsigned char mod_rm;
+ unsigned char mod;
+ unsigned char *p;
+ int i, shorted, enlarged, rexr;
+ unsigned long rv;
+
+ p = (unsigned char *)ins_addr;
+ p += skip_prefix(p, &shorted, &enlarged, &rexr);
+ p += get_opcode(p, &opcode);
+ for (i = 0; i < ARRAY_SIZE(imm_wop); i++)
+ if (imm_wop[i] == opcode) {
+ rv = IMM_WRITE;
+ goto do_work;
+ }
+
+ printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode "
+ "0x%02x\n", opcode);
+ goto err;
+
+do_work:
+ mod_rm = *p;
+ mod = mod_rm >> 6;
+ p++;
+ switch (mod) {
+ case 0:
+ /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */
+ /* AMD64: XXX Check for address size prefix? */
+ if ((mod_rm & 0x7) == 0x5)
+ p += 4;
+ break;
+
+ case 1:
+ p += 1;
+ break;
+
+ case 2:
+ p += 4;
+ break;
+
+ case 3:
+ default:
+ printk(KERN_ERR "mmiotrace: not a memory access instruction "
+ "at 0x%lx, rm_mod=0x%02x\n",
+ ins_addr, mod_rm);
+ }
+
+ switch (get_ins_reg_width(ins_addr)) {
+ case 1:
+ return *(unsigned char *)p;
+
+ case 2:
+ return *(unsigned short *)p;
+
+ case 4:
+ return *(unsigned int *)p;
+
+#ifdef __amd64__
+ case 8:
+ return *(unsigned long *)p;
+#endif
+
+ default:
+ printk(KERN_ERR "mmiotrace: Error: width.\n");
+ }
+
+err:
+ return 0;
+}
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h
new file mode 100644
index 0000000..e05341a
--- /dev/null
+++ b/arch/x86/mm/pf_in.h
@@ -0,0 +1,39 @@
+/*
+ * Fault Injection Test harness (FI)
+ * Copyright (C) Intel Crop.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ * USA.
+ *
+ */
+
+#ifndef __PF_H_
+#define __PF_H_
+
+enum reason_type {
+ NOT_ME, /* page fault is not in regions */
+ NOTHING, /* access others point in regions */
+ REG_READ, /* read from addr to reg */
+ REG_WRITE, /* write from reg to addr */
+ IMM_WRITE, /* write from imm to addr */
+ OTHERS /* Other instructions can not intercept */
+};
+
+enum reason_type get_ins_type(unsigned long ins_addr);
+unsigned int get_ins_mem_width(unsigned long ins_addr);
+unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs);
+unsigned long get_ins_imm_val(unsigned long ins_addr);
+
+#endif /* __PF_H_ */
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c
index f41d67f..1eb2973 100644
--- a/arch/x86/mm/srat_32.c
+++ b/arch/x86/mm/srat_32.c
@@ -156,10 +156,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity)
num_memory_chunks++;
- printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)"
+ printk(KERN_DEBUG "Memory range %08lx to %08lx"
" in proximity domain %02x %s\n",
start_pfn, end_pfn,
- memory_affinity->memory_type,
pxm,
((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ?
"enabled and removable" : "enabled" ) );
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
new file mode 100644
index 0000000..d877c5b
--- /dev/null
+++ b/arch/x86/mm/testmmiotrace.c
@@ -0,0 +1,71 @@
+/*
+ * Written by Pekka Paalanen, 2008 <pq@iki.fi>
+ */
+#include <linux/module.h>
+#include <linux/io.h>
+
+#define MODULE_NAME "testmmiotrace"
+
+static unsigned long mmio_address;
+module_param(mmio_address, ulong, 0);
+MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB.");
+
+static void do_write_test(void __iomem *p)
+{
+ unsigned int i;
+ for (i = 0; i < 256; i++)
+ iowrite8(i, p + i);
+ for (i = 1024; i < (5 * 1024); i += 2)
+ iowrite16(i * 12 + 7, p + i);
+ for (i = (5 * 1024); i < (16 * 1024); i += 4)
+ iowrite32(i * 212371 + 13, p + i);
+}
+
+static void do_read_test(void __iomem *p)
+{
+ unsigned int i;
+ for (i = 0; i < 256; i++)
+ ioread8(p + i);
+ for (i = 1024; i < (5 * 1024); i += 2)
+ ioread16(p + i);
+ for (i = (5 * 1024); i < (16 * 1024); i += 4)
+ ioread32(p + i);
+}
+
+static void do_test(void)
+{
+ void __iomem *p = ioremap_nocache(mmio_address, 0x4000);
+ if (!p) {
+ pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
+ return;
+ }
+ do_write_test(p);
+ do_read_test(p);
+ iounmap(p);
+}
+
+static int __init init(void)
+{
+ if (mmio_address == 0) {
+ pr_err(MODULE_NAME ": you have to use the module argument "
+ "mmio_address.\n");
+ pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
+ " YOU REALLY KNOW WHAT YOU ARE DOING!\n");
+ return -ENXIO;
+ }
+
+ pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
+ "in PCI address space, and writing "
+ "rubbish in there.\n", mmio_address);
+ do_test();
+ return 0;
+}
+
+static void __exit cleanup(void)
+{
+ pr_debug(MODULE_NAME ": unloaded.\n");
+}
+
+module_init(init);
+module_exit(cleanup);
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 2b6ad5b..7f3329b 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -218,8 +218,8 @@ static int nmi_setup(void)
}
}
- on_each_cpu(nmi_save_registers, NULL, 0, 1);
- on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+ on_each_cpu(nmi_save_registers, NULL, 1);
+ on_each_cpu(nmi_cpu_setup, NULL, 1);
nmi_enabled = 1;
return 0;
}
@@ -271,7 +271,7 @@ static void nmi_shutdown(void)
{
struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
nmi_enabled = 0;
- on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+ on_each_cpu(nmi_cpu_shutdown, NULL, 1);
unregister_die_notifier(&profile_exceptions_nb);
model->shutdown(msrs);
free_msrs();
@@ -286,7 +286,7 @@ static void nmi_cpu_start(void *dummy)
static int nmi_start(void)
{
- on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+ on_each_cpu(nmi_cpu_start, NULL, 1);
return 0;
}
@@ -298,7 +298,7 @@ static void nmi_cpu_stop(void *dummy)
static void nmi_stop(void)
{
- on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+ on_each_cpu(nmi_cpu_stop, NULL, 1);
}
struct op_counter_config counter_config[OP_MAX_COUNTER];
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index a18141a..dbf5323 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -578,7 +578,7 @@ static int __init enable_pci_io_ecs(void)
/* assume all cpus from fam10h have IO ECS */
if (boot_cpu_data.x86 < 0x10)
return 0;
- on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1);
+ on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1);
pci_probe |= PCI_HAS_IO_ECS;
return 0;
}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 20b9f59..b67732b 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -20,6 +20,7 @@
unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
PCI_PROBE_MMCONF;
+unsigned int pci_early_dump_regs;
static int pci_bf_sort;
int pci_routeirq;
int pcibios_last_bus = -1;
@@ -31,7 +32,7 @@ struct pci_raw_ops *raw_pci_ext_ops;
int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val)
{
- if (reg < 256 && raw_pci_ops)
+ if (domain == 0 && reg < 256 && raw_pci_ops)
return raw_pci_ops->read(domain, bus, devfn, reg, len, val);
if (raw_pci_ext_ops)
return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val);
@@ -41,7 +42,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 val)
{
- if (reg < 256 && raw_pci_ops)
+ if (domain == 0 && reg < 256 && raw_pci_ops)
return raw_pci_ops->write(domain, bus, devfn, reg, len, val);
if (raw_pci_ext_ops)
return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val);
@@ -121,6 +122,21 @@ void __init dmi_check_skip_isa_align(void)
dmi_check_system(can_skip_pciprobe_dmi_table);
}
+static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+{
+ struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
+
+ if (pci_probe & PCI_NOASSIGN_ROMS) {
+ if (rom_r->parent)
+ return;
+ if (rom_r->start) {
+ /* we deal with BIOS assigned ROM later */
+ return;
+ }
+ rom_r->start = rom_r->end = rom_r->flags = 0;
+ }
+}
+
/*
* Called after each bus is probed, but before its children
* are examined.
@@ -128,7 +144,11 @@ void __init dmi_check_skip_isa_align(void)
void __devinit pcibios_fixup_bus(struct pci_bus *b)
{
+ struct pci_dev *dev;
+
pci_read_bridge_bases(b);
+ list_for_each_entry(dev, &b->devices, bus_list)
+ pcibios_fixup_device_resources(dev);
}
/*
@@ -481,12 +501,18 @@ char * __devinit pcibios_setup(char *str)
else if (!strcmp(str, "rom")) {
pci_probe |= PCI_ASSIGN_ROMS;
return NULL;
+ } else if (!strcmp(str, "norom")) {
+ pci_probe |= PCI_NOASSIGN_ROMS;
+ return NULL;
} else if (!strcmp(str, "assign-busses")) {
pci_probe |= PCI_ASSIGN_ALL_BUSSES;
return NULL;
} else if (!strcmp(str, "use_crs")) {
pci_probe |= PCI_USE__CRS;
return NULL;
+ } else if (!strcmp(str, "earlydump")) {
+ pci_early_dump_regs = 1;
+ return NULL;
} else if (!strcmp(str, "routeirq")) {
pci_routeirq = 1;
return NULL;
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 42df4b6..858dbe3 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -49,7 +49,14 @@ void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val)
{
PDprintk("%x writing to %x: %x\n", slot, offset, val);
outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
- outb(val, 0xcfc);
+ outb(val, 0xcfc + (offset&3));
+}
+
+void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val)
+{
+ PDprintk("%x writing to %x: %x\n", slot, offset, val);
+ outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+ outw(val, 0xcfc + (offset&2));
}
int early_pci_allowed(void)
@@ -57,3 +64,54 @@ int early_pci_allowed(void)
return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
PCI_PROBE_CONF1;
}
+
+void early_dump_pci_device(u8 bus, u8 slot, u8 func)
+{
+ int i;
+ int j;
+ u32 val;
+
+ printk("PCI: %02x:%02x:%02x", bus, slot, func);
+
+ for (i = 0; i < 256; i += 4) {
+ if (!(i & 0x0f))
+ printk("\n%04x:",i);
+
+ val = read_pci_config(bus, slot, func, i);
+ for (j = 0; j < 4; j++) {
+ printk(" %02x", val & 0xff);
+ val >>= 8;
+ }
+ }
+ printk("\n");
+}
+
+void early_dump_pci_devices(void)
+{
+ unsigned bus, slot, func;
+
+ if (!early_pci_allowed())
+ return;
+
+ for (bus = 0; bus < 256; bus++) {
+ for (slot = 0; slot < 32; slot++) {
+ for (func = 0; func < 8; func++) {
+ u32 class;
+ u8 type;
+ class = read_pci_config(bus, slot, func,
+ PCI_CLASS_REVISION);
+ if (class == 0xffffffff)
+ break;
+
+ early_dump_pci_device(bus, slot, func);
+
+ /* No multi-function device? */
+ type = read_pci_config_byte(bus, slot, func,
+ PCI_HEADER_TYPE);
+ if (!(type & 0x80))
+ break;
+ }
+ }
+ }
+}
+
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5281e34..2aafb67 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -334,9 +334,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
flags = new_flags;
}
- if (((vma->vm_pgoff <= max_low_pfn_mapped) ||
+ if (((vma->vm_pgoff < max_low_pfn_mapped) ||
(vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
- vma->vm_pgoff <= max_pfn_mapped)) &&
+ vma->vm_pgoff < max_pfn_mapped)) &&
ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
free_memtype(addr, addr + len);
return -EINVAL;
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index dc568c6..6a06a2e 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -45,7 +45,8 @@ struct irq_router {
char *name;
u16 vendor, device;
int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
- int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+ int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq,
+ int new);
};
struct irq_router_handler {
@@ -77,7 +78,8 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
for (i = 0; i < rt->size; i++)
sum += addr[i];
if (!sum) {
- DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt);
+ DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
+ rt);
return rt;
}
return NULL;
@@ -183,7 +185,8 @@ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset,
return (nr & 1) ? (x >> 4) : (x & 0xf);
}
-static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+static void write_config_nybble(struct pci_dev *router, unsigned offset,
+ unsigned nr, unsigned int val)
{
u8 x;
unsigned reg = offset + (nr >> 1);
@@ -467,7 +470,8 @@ static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int
return inb(0xc01) & 0xf;
}
-static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev,
+ int pirq, int irq)
{
outb(pirq, 0xc00);
outb(irq, 0xc01);
@@ -660,7 +664,8 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router
}
-static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+static __init int serverworks_router_probe(struct irq_router *r,
+ struct pci_dev *router, u16 device)
{
switch (device) {
case PCI_DEVICE_ID_SERVERWORKS_OSB4:
@@ -827,10 +832,12 @@ static void __init pirq_find_router(struct irq_router *r)
for (h = pirq_routers; h->vendor; h++) {
/* First look for a router match */
- if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
+ if (rt->rtr_vendor == h->vendor &&
+ h->probe(r, pirq_router_dev, rt->rtr_device))
break;
/* Fall back to a device match */
- if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
+ if (pirq_router_dev->vendor == h->vendor &&
+ h->probe(r, pirq_router_dev, pirq_router_dev->device))
break;
}
printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
@@ -845,11 +852,13 @@ static void __init pirq_find_router(struct irq_router *r)
static struct irq_info *pirq_get_info(struct pci_dev *dev)
{
struct irq_routing_table *rt = pirq_table;
- int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+ int entries = (rt->size - sizeof(struct irq_routing_table)) /
+ sizeof(struct irq_info);
struct irq_info *info;
for (info = rt->slots; entries--; info++)
- if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+ if (info->bus == dev->bus->number &&
+ PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
return info;
return NULL;
}
@@ -890,7 +899,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
DBG(" -> not routed\n" KERN_DEBUG);
return 0;
}
- DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+ DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask,
+ pirq_table->exclusive_irqs);
mask &= pcibios_irq_mask;
/* Work around broken HP Pavilion Notebooks which assign USB to
@@ -903,7 +913,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
}
/* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
- if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) {
+ if (acer_tm360_irqrouting && dev->irq == 11 &&
+ dev->vendor == PCI_VENDOR_ID_O2) {
pirq = 0x68;
mask = 0x400;
dev->irq = r->get(pirq_router_dev, dev, pirq);
@@ -920,15 +931,16 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
newirq = 0;
else
printk("\n" KERN_WARNING
- "PCI: IRQ %i for device %s doesn't match PIRQ mask "
- "- try pci=usepirqmask\n" KERN_DEBUG, newirq,
- pci_name(dev));
+ "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n"
+ KERN_DEBUG, newirq,
+ pci_name(dev));
}
if (!newirq && assign) {
for (i = 0; i < 16; i++) {
if (!(mask & (1 << i)))
continue;
- if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED))
+ if (pirq_penalty[i] < pirq_penalty[newirq] &&
+ can_request_irq(i, IRQF_SHARED))
newirq = i;
}
}
@@ -944,7 +956,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
DBG(" -> got IRQ %d\n", irq);
msg = "Found";
eisa_set_level_irq(irq);
- } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+ } else if (newirq && r->set &&
+ (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
DBG(" -> assigning IRQ %d", newirq);
if (r->set(pirq_router_dev, dev, pirq, newirq)) {
eisa_set_level_irq(newirq);
@@ -962,7 +975,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
} else
return 0;
}
- printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev));
+ printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq,
+ pci_name(dev));
/* Update IRQ for all devices with the same pirq value */
while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
@@ -974,7 +988,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
if (!info)
continue;
if (info->irq[pin].link == pirq) {
- /* We refuse to override the dev->irq information. Give a warning! */
+ /*
+ * We refuse to override the dev->irq
+ * information. Give a warning!
+ */
if (dev2->irq && dev2->irq != irq && \
(!(pci_probe & PCI_USE_PIRQ_MASK) || \
((1 << dev2->irq) & mask))) {
@@ -987,7 +1004,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
dev2->irq = irq;
pirq_penalty[irq]++;
if (dev != dev2)
- printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2));
+ printk(KERN_INFO
+ "PCI: Sharing IRQ %d with %s\n",
+ irq, pci_name(dev2));
}
}
return 1;
@@ -1001,15 +1020,21 @@ static void __init pcibios_fixup_irqs(void)
DBG(KERN_DEBUG "PCI: IRQ fixup\n");
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
/*
- * If the BIOS has set an out of range IRQ number, just ignore it.
- * Also keep track of which IRQ's are already in use.
+ * If the BIOS has set an out of range IRQ number, just
+ * ignore it. Also keep track of which IRQ's are
+ * already in use.
*/
if (dev->irq >= 16) {
- DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq);
+ DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n",
+ pci_name(dev), dev->irq);
dev->irq = 0;
}
- /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
- if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+ /*
+ * If the IRQ is already assigned to a PCI device,
+ * ignore its ISA use penalty
+ */
+ if (pirq_penalty[dev->irq] >= 100 &&
+ pirq_penalty[dev->irq] < 100000)
pirq_penalty[dev->irq] = 0;
pirq_penalty[dev->irq]++;
}
@@ -1025,8 +1050,13 @@ static void __init pcibios_fixup_irqs(void)
int irq;
if (pin) {
- pin--; /* interrupt pins are numbered starting from 1 */
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+ /*
+ * interrupt pins are numbered starting
+ * from 1
+ */
+ pin--;
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+ PCI_SLOT(dev->devfn), pin);
/*
* Busses behind bridges are typically not listed in the MP-table.
* In this case we have to look up the IRQ based on the parent bus,
@@ -1067,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d)
{
if (!broken_hp_bios_irq9) {
broken_hp_bios_irq9 = 1;
- printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+ printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
+ d->ident);
}
return 0;
}
@@ -1080,7 +1111,8 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d)
{
if (!acer_tm360_irqrouting) {
acer_tm360_irqrouting = 1;
- printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
+ printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
+ d->ident);
}
return 0;
}
@@ -1092,7 +1124,8 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = {
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"),
+ DMI_MATCH(DMI_PRODUCT_VERSION,
+ "HP Pavilion Notebook Model GE"),
DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
},
},
@@ -1131,7 +1164,10 @@ int __init pcibios_irq_init(void)
if (!(pirq_table->exclusive_irqs & (1 << i)))
pirq_penalty[i] += 100;
}
- /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+ /*
+ * If we're using the I/O APIC, avoid using the PCI IRQ
+ * routing table
+ */
if (io_apic_assign_pci_irqs)
pirq_table = NULL;
}
@@ -1175,7 +1211,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
char *msg = "";
- pin--; /* interrupt pins are numbered starting from 1 */
+ pin--; /* interrupt pins are numbered starting from 1 */
if (io_apic_assign_pci_irqs) {
int irq;
@@ -1195,13 +1231,16 @@ static int pirq_enable_irq(struct pci_dev *dev)
irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
PCI_SLOT(bridge->devfn), pin);
if (irq >= 0)
- printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
- pci_name(bridge), 'A' + pin, irq);
+ printk(KERN_WARNING
+ "PCI: using PPB %s[%c] to get irq %d\n",
+ pci_name(bridge),
+ 'A' + pin, irq);
dev = bridge;
}
dev = temp_dev;
if (irq >= 0) {
- printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+ printk(KERN_INFO
+ "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
pci_name(dev), 'A' + pin, irq);
dev->irq = irq;
return 0;
@@ -1212,12 +1251,17 @@ static int pirq_enable_irq(struct pci_dev *dev)
else
msg = " Please try using pci=biosirq.";
- /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
- if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
+ /*
+ * With IDE legacy devices the IRQ lookup failure is not
+ * a problem..
+ */
+ if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE &&
+ !(dev->class & 0x5))
return 0;
- printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
- 'A' + pin, pci_name(dev), msg);
+ printk(KERN_WARNING
+ "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+ 'A' + pin, pci_name(dev), msg);
}
return 0;
}
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index b2270a5..3e25deb 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -28,6 +28,7 @@
#define PCI_USE__CRS 0x10000
#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
#define PCI_HAS_IO_ECS 0x40000
+#define PCI_NOASSIGN_ROMS 0x80000
extern unsigned int pci_probe;
extern unsigned long pirq_table_addr;
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index efa2ba7..1ef0f90 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -23,7 +23,7 @@
#define gtod vdso_vsyscall_gtod_data
-static long vdso_fallback_gettime(long clock, struct timespec *ts)
+notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
asm("syscall" : "=a" (ret) :
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts)
return ret;
}
-static inline long vgetns(void)
+notrace static inline long vgetns(void)
{
long v;
cycles_t (*vread)(void);
@@ -40,7 +40,7 @@ static inline long vgetns(void)
return (v * gtod->clock.mult) >> gtod->clock.shift;
}
-static noinline int do_realtime(struct timespec *ts)
+notrace static noinline int do_realtime(struct timespec *ts)
{
unsigned long seq, ns;
do {
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts)
}
/* Copy of the version in kernel/time.c which we cannot directly access */
-static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
+notrace static void
+vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
ts->tv_nsec = nsec;
}
-static noinline int do_monotonic(struct timespec *ts)
+notrace static noinline int do_monotonic(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts)
return 0;
}
-int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
if (likely(gtod->sysctl_enabled && gtod->clock.vread))
switch (clock) {
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
-int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
long ret;
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index c8097f1..9fbc6b2 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,7 +13,8 @@
#include <asm/vgtod.h>
#include "vextern.h"
-long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+notrace long
+__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
{
unsigned int p;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d11dda7e..402f3e2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1243,7 +1243,9 @@ static const struct smp_ops xen_smp_ops __initdata = {
.smp_send_stop = xen_smp_send_stop,
.smp_send_reschedule = xen_smp_send_reschedule,
- .smp_call_function_mask = xen_smp_call_function_mask,
+
+ .send_call_func_ipi = xen_smp_send_call_function_ipi,
+ .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
};
#endif /* CONFIG_SMP */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 42b3b9e..ff0aa74 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -796,7 +796,7 @@ static void drop_mm_ref(struct mm_struct *mm)
}
if (!cpus_empty(mask))
- xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
+ smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
}
#else
static void drop_mm_ref(struct mm_struct *mm)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d2e3c20..233156f 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,27 +36,14 @@
#include "mmu.h"
cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq) = -1;
-static DEFINE_PER_CPU(int, callfunc_irq) = -1;
-static DEFINE_PER_CPU(int, debug_irq) = -1;
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-struct call_data_struct {
- void (*func) (void *info);
- void *info;
- atomic_t started;
- atomic_t finished;
- int wait;
-};
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, callfuncsingle_irq);
+static DEFINE_PER_CPU(int, debug_irq) = -1;
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
-
-static struct call_data_struct *call_data;
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
/*
* Reschedule call back. Nothing to do,
@@ -128,6 +115,17 @@ static int xen_smp_intr_init(unsigned int cpu)
goto fail;
per_cpu(debug_irq, cpu) = rc;
+ callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+ rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
+ cpu,
+ xen_call_function_single_interrupt,
+ IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
+ callfunc_name,
+ NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(callfuncsingle_irq, cpu) = rc;
+
return 0;
fail:
@@ -137,6 +135,9 @@ static int xen_smp_intr_init(unsigned int cpu)
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
if (per_cpu(debug_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
+ if (per_cpu(callfuncsingle_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
+
return rc;
}
@@ -336,7 +337,7 @@ static void stop_self(void *v)
void xen_smp_send_stop(void)
{
- smp_call_function(stop_self, NULL, 0, 0);
+ smp_call_function(stop_self, NULL, 0);
}
void xen_smp_send_reschedule(int cpu)
@@ -344,7 +345,6 @@ void xen_smp_send_reschedule(int cpu)
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
}
-
static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
{
unsigned cpu;
@@ -355,83 +355,42 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector)
xen_send_IPI_one(cpu, vector);
}
+void xen_smp_send_call_function_ipi(cpumask_t mask)
+{
+ int cpu;
+
+ xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
+
+ /* Make sure other vcpus get a chance to run if they need to. */
+ for_each_cpu_mask(cpu, mask) {
+ if (xen_vcpu_stolen(cpu)) {
+ HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+ break;
+ }
+ }
+}
+
+void xen_smp_send_call_function_single_ipi(int cpu)
+{
+ xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR);
+}
+
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
- void (*func) (void *info) = call_data->func;
- void *info = call_data->info;
- int wait = call_data->wait;
-
- /*
- * Notify initiating CPU that I've grabbed the data and am
- * about to execute the function
- */
- mb();
- atomic_inc(&call_data->started);
- /*
- * At this point the info structure may be out of scope unless wait==1
- */
irq_enter();
- (*func)(info);
+ generic_smp_call_function_interrupt();
__get_cpu_var(irq_stat).irq_call_count++;
irq_exit();
- if (wait) {
- mb(); /* commit everything before setting finished */
- atomic_inc(&call_data->finished);
- }
-
return IRQ_HANDLED;
}
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
- void *info, int wait)
+static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
{
- struct call_data_struct data;
- int cpus, cpu;
- bool yield;
-
- /* Holding any lock stops cpus from going down. */
- spin_lock(&call_lock);
-
- cpu_clear(smp_processor_id(), mask);
-
- cpus = cpus_weight(mask);
- if (!cpus) {
- spin_unlock(&call_lock);
- return 0;
- }
-
- /* Can deadlock when called with interrupts disabled */
- WARN_ON(irqs_disabled());
-
- data.func = func;
- data.info = info;
- atomic_set(&data.started, 0);
- data.wait = wait;
- if (wait)
- atomic_set(&data.finished, 0);
-
- call_data = &data;
- mb(); /* write everything before IPI */
-
- /* Send a message to other CPUs and wait for them to respond */
- xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
-
- /* Make sure other vcpus get a chance to run if they need to. */
- yield = false;
- for_each_cpu_mask(cpu, mask)
- if (xen_vcpu_stolen(cpu))
- yield = true;
-
- if (yield)
- HYPERVISOR_sched_op(SCHEDOP_yield, 0);
-
- /* Wait for response */
- while (atomic_read(&data.started) != cpus ||
- (wait && atomic_read(&data.finished) != cpus))
- cpu_relax();
-
- spin_unlock(&call_lock);
+ irq_enter();
+ generic_smp_call_function_single_interrupt();
+ __get_cpu_var(irq_stat).irq_call_count++;
+ irq_exit();
- return 0;
+ return IRQ_HANDLED;
}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d852ddb..6f4b104 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -55,13 +55,8 @@ void xen_smp_cpus_done(unsigned int max_cpus);
void xen_smp_send_stop(void);
void xen_smp_send_reschedule(int cpu);
-int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait);
-int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
- int nonatomic, int wait);
-
-int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
- void *info, int wait);
+void xen_smp_send_call_function_ipi(cpumask_t mask);
+void xen_smp_send_call_function_single_ipi(int cpu);
extern cpumask_t xen_cpu_initialized_map;
OpenPOWER on IntegriCloud