summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/apic_32.c3
-rw-r--r--arch/x86/kernel/apic_64.c3
-rw-r--r--arch/x86/kernel/apm_32.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c18
-rw-r--r--arch/x86/kernel/cpu/nexgen.c59
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c14
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/e820_64.c35
-rw-r--r--arch/x86/kernel/genapic_64.c2
-rw-r--r--arch/x86/kernel/head64.c25
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/i8253.c6
-rw-r--r--arch/x86/kernel/io_apic_32.c2
-rw-r--r--arch/x86/kernel/io_apic_64.c2
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/kdebugfs.c163
-rw-r--r--arch/x86/kernel/kvm.c248
-rw-r--r--arch/x86/kernel/kvmclock.c187
-rw-r--r--arch/x86/kernel/mfgpt_32.c3
-rw-r--r--arch/x86/kernel/mpparse.c39
-rw-r--r--arch/x86/kernel/pci-calgary_64.c1
-rw-r--r--arch/x86/kernel/process.c117
-rw-r--r--arch/x86/kernel/process_32.c118
-rw-r--r--arch/x86/kernel/process_64.c123
-rw-r--r--arch/x86/kernel/ptrace.c95
-rw-r--r--arch/x86/kernel/reboot.c13
-rw-r--r--arch/x86/kernel/setup_32.c10
-rw-r--r--arch/x86/kernel/setup_64.c36
-rw-r--r--arch/x86/kernel/signal_32.c35
-rw-r--r--arch/x86/kernel/signal_64.c30
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/summit_32.c5
-rw-r--r--arch/x86/kernel/tlb_64.c4
-rw-r--r--arch/x86/kernel/trampoline_32.S2
-rw-r--r--arch/x86/kernel/traps_32.c2
39 files changed, 919 insertions, 508 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 90e092d..fa19c38 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
+obj-$(CONFIG_KVM_GUEST) += kvm.o
+obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
ifdef CONFIG_INPUT_PCSPKR
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 057ccf1..977ed5c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -697,10 +697,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
#define HPET_RESOURCE_NAME_SIZE 9
hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
- if (!hpet_res)
- return 0;
-
- memset(hpet_res, 0, sizeof(*hpet_res));
hpet_res->name = (void *)&hpet_res[1];
hpet_res->flags = IORESOURCE_MEM;
snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u",
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 8317401..4b99b1b 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -451,7 +451,8 @@ void __init setup_boot_APIC_clock(void)
}
/* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32);
+ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+ lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
lapic_clockevent.min_delta_ns =
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index bf83157..5910020 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -360,7 +360,8 @@ static void __init calibrate_APIC_clock(void)
result / 1000 / 1000, result / 1000 % 1000);
/* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
+ lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
+ lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
lapic_clockevent.min_delta_ns =
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f0030a0..e4ea362 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -904,6 +904,7 @@ recalc:
original_pm_idle();
else
default_idle();
+ local_irq_disable();
jiffies_since_last_check = jiffies - last_jiffies;
if (jiffies_since_last_check > idle_period)
goto recalc;
@@ -911,6 +912,8 @@ recalc:
if (apm_idle_done)
apm_do_busy();
+
+ local_irq_enable();
}
/**
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee7c452..a0c6f81 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_X86_32) += cyrix.o
obj-$(CONFIG_X86_32) += centaur.o
obj-$(CONFIG_X86_32) += transmeta.o
obj-$(CONFIG_X86_32) += intel.o
-obj-$(CONFIG_X86_32) += nexgen.o
obj-$(CONFIG_X86_32) += umc.o
obj-$(CONFIG_X86_MCE) += mcheck/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0173065..2458668 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -343,10 +343,4 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = {
.c_size_cache = amd_size_cache,
};
-int __init amd_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
- return 0;
-}
-
cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 9a699ed..e07e8c0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -49,7 +49,7 @@ static int banks;
static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
static unsigned long notify_user;
static int rip_msr;
-static int mce_bootlog = 1;
+static int mce_bootlog = -1;
static atomic_t mce_events;
static char trigger[128];
@@ -471,13 +471,15 @@ static void mce_init(void *dummy)
static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
{
/* This should be disabled by the BIOS, but isn't always */
- if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
- /* disable GART TBL walk error reporting, which trips off
- incorrectly with the IOMMU & 3ware & Cerberus. */
- clear_bit(10, &bank[4]);
- /* Lots of broken BIOS around that don't clear them
- by default and leave crap in there. Don't log. */
- mce_bootlog = 0;
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ if(c->x86 == 15)
+ /* disable GART TBL walk error reporting, which trips off
+ incorrectly with the IOMMU & 3ware & Cerberus. */
+ clear_bit(10, &bank[4]);
+ if(c->x86 <= 17 && mce_bootlog < 0)
+ /* Lots of broken BIOS around that don't clear them
+ by default and leave crap in there. Don't log. */
+ mce_bootlog = 0;
}
}
diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c
deleted file mode 100644
index 5d5e1c1..0000000
--- a/arch/x86/kernel/cpu/nexgen.c
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-/*
- * Detect a NexGen CPU running without BIOS hypercode new enough
- * to have CPUID. (Thanks to Herbert Oppmann)
- */
-
-static int __cpuinit deep_magic_nexgen_probe(void)
-{
- int ret;
-
- __asm__ __volatile__ (
- " movw $0x5555, %%ax\n"
- " xorw %%dx,%%dx\n"
- " movw $2, %%cx\n"
- " divw %%cx\n"
- " movl $0, %%eax\n"
- " jnz 1f\n"
- " movl $1, %%eax\n"
- "1:\n"
- : "=a" (ret) : : "cx", "dx");
- return ret;
-}
-
-static void __cpuinit init_nexgen(struct cpuinfo_x86 *c)
-{
- c->x86_cache_size = 256; /* A few had 1 MB... */
-}
-
-static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c)
-{
- /* Detect NexGen with old hypercode */
- if (deep_magic_nexgen_probe())
- strcpy(c->x86_vendor_id, "NexGenDriven");
-}
-
-static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
- .c_vendor = "Nexgen",
- .c_ident = { "NexGenDriven" },
- .c_models = {
- { .vendor = X86_VENDOR_NEXGEN,
- .family = 5,
- .model_names = { [1] = "Nx586" }
- },
- },
- .c_init = init_nexgen,
- .c_identify = nexgen_identify,
-};
-
-int __init nexgen_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
- return 0;
-}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index b943e10..f9ae93a 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -614,16 +614,6 @@ static struct wd_ops intel_arch_wd_ops __read_mostly = {
.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
};
-static struct wd_ops coreduo_wd_ops = {
- .reserve = single_msr_reserve,
- .unreserve = single_msr_unreserve,
- .setup = setup_intel_arch_watchdog,
- .rearm = p6_rearm,
- .stop = single_msr_stop_watchdog,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
-};
-
static void probe_nmi_watchdog(void)
{
switch (boot_cpu_data.x86_vendor) {
@@ -637,8 +627,8 @@ static void probe_nmi_watchdog(void)
/* Work around Core Duo (Yonah) errata AE49 where perfctr1
doesn't have a working enable bit. */
if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
- wd_ops = &coreduo_wd_ops;
- break;
+ intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
+ intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
}
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
wd_ops = &intel_arch_wd_ops;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2251d0a..2685538 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -25,6 +25,7 @@
#include <asm/hpet.h>
#include <linux/kdebug.h>
#include <asm/smp.h>
+#include <asm/reboot.h>
#include <mach_ipi.h>
@@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void)
}
#endif
-void machine_crash_shutdown(struct pt_regs *regs)
+void native_machine_crash_shutdown(struct pt_regs *regs)
{
/* This function is only called after the system
* has panicked or is otherwise in a critical state.
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index cbd42e5..645ee5e 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -84,14 +84,41 @@ void __init reserve_early(unsigned long start, unsigned long end, char *name)
strncpy(r->name, name, sizeof(r->name) - 1);
}
-void __init early_res_to_bootmem(void)
+void __init free_early(unsigned long start, unsigned long end)
+{
+ struct early_res *r;
+ int i, j;
+
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+ r = &early_res[i];
+ if (start == r->start && end == r->end)
+ break;
+ }
+ if (i >= MAX_EARLY_RES || !early_res[i].end)
+ panic("free_early on not reserved area: %lx-%lx!", start, end);
+
+ for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
+ ;
+
+ memcpy(&early_res[i], &early_res[i + 1],
+ (j - 1 - i) * sizeof(struct early_res));
+
+ early_res[j - 1].end = 0;
+}
+
+void __init early_res_to_bootmem(unsigned long start, unsigned long end)
{
int i;
+ unsigned long final_start, final_end;
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
struct early_res *r = &early_res[i];
- printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
- r->start, r->end - 1, r->name);
- reserve_bootmem_generic(r->start, r->end - r->start);
+ final_start = max(start, r->start);
+ final_end = min(end, r->end);
+ if (final_start >= final_end)
+ continue;
+ printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
+ final_start, final_end - 1, r->name);
+ reserve_bootmem_generic(final_start, final_end - final_start);
}
}
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 9546ef4..021624c 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
else
#endif
- if (cpus_weight(cpu_possible_map) <= 8)
+ if (num_possible_cpus() <= 8)
genapic = &apic_flat;
else
genapic = &apic_physflat;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 993c767..e25c57b 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <linux/percpu.h>
#include <linux/start_kernel.h>
+#include <linux/io.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -22,6 +23,7 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/e820.h>
+#include <asm/bios_ebda.h>
static void __init zap_identity_mappings(void)
{
@@ -49,7 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
}
}
-#define BIOS_EBDA_SEGMENT 0x40E
#define BIOS_LOWMEM_KILOBYTES 0x413
/*
@@ -80,8 +81,7 @@ static void __init reserve_ebda_region(void)
lowmem <<= 10;
/* start of EBDA area */
- ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
- ebda_addr <<= 4;
+ ebda_addr = get_bios_ebda();
/* Fixup: bios puts an EBDA in the top 64K segment */
/* of conventional memory, but does not adjust lowmem. */
@@ -101,6 +101,24 @@ static void __init reserve_ebda_region(void)
reserve_early(lowmem, 0x100000, "BIOS reserved");
}
+static void __init reserve_setup_data(void)
+{
+ struct setup_data *data;
+ unsigned long pa_data;
+ char buf[32];
+
+ if (boot_params.hdr.version < 0x0209)
+ return;
+ pa_data = boot_params.hdr.setup_data;
+ while (pa_data) {
+ data = early_ioremap(pa_data, sizeof(*data));
+ sprintf(buf, "setup data %x", data->type);
+ reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+ pa_data = data->next;
+ early_iounmap(data, sizeof(*data));
+ }
+}
+
void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
@@ -157,6 +175,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
#endif
reserve_ebda_region();
+ reserve_setup_data();
/*
* At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 36652ea..9007f9e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -218,7 +218,7 @@ static void hpet_legacy_clockevent_register(void)
hpet_freq = 1000000000000000ULL;
do_div(hpet_freq, hpet_period);
hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
- NSEC_PER_SEC, 32);
+ NSEC_PER_SEC, hpet_clockevent.shift);
/* Calculate the min / max delta */
hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
&hpet_clockevent);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 8540abe..c1b5e3e 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -115,7 +115,8 @@ void __init setup_pit_timer(void)
* IO_APIC has been initialized.
*/
pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
- pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32);
+ pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
+ pit_clockevent.shift);
pit_clockevent.max_delta_ns =
clockevent_delta2ns(0x7FFF, &pit_clockevent);
pit_clockevent.min_delta_ns =
@@ -224,7 +225,8 @@ static int __init init_pit_clocksource(void)
pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
return 0;
- clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+ clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
+ clocksource_pit.shift);
return clocksource_register(&clocksource_pit);
}
arch_initcall(init_pit_clocksource);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 2e2f420..696b8e4 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2068,7 +2068,7 @@ static void __init setup_nmi(void)
* cycles as some i82489DX-based boards have glue logic that keeps the
* 8259A interrupt line asserted until INTA. --macro
*/
-static inline void unlock_ExtINT_logic(void)
+static inline void __init unlock_ExtINT_logic(void)
{
int apic, pin, i;
struct IO_APIC_route_entry entry0, entry1;
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 9ba11d0..ef1a8df 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1599,7 +1599,7 @@ static void __init setup_nmi(void)
* cycles as some i82489DX-based boards have glue logic that keeps the
* 8259A interrupt line asserted until INTA. --macro
*/
-static inline void unlock_ExtINT_logic(void)
+static inline void __init unlock_ExtINT_logic(void)
{
int apic, pin, i;
struct IO_APIC_route_entry entry0, entry1;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 6ea67b7..00bda7b 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
: "=a" (arg1), "=d" (arg2), "=b" (bx)
: "0" (irq), "1" (desc), "2" (isp),
"D" (desc->handle_irq)
- : "memory", "cc"
+ : "memory", "cc", "ecx"
);
} else
#endif
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 7335430..c032059 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -6,23 +6,171 @@
*
* This file is released under the GPLv2.
*/
-
#include <linux/debugfs.h>
+#include <linux/uaccess.h>
#include <linux/stat.h>
#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/mm.h>
#include <asm/setup.h>
#ifdef CONFIG_DEBUG_BOOT_PARAMS
+struct setup_data_node {
+ u64 paddr;
+ u32 type;
+ u32 len;
+};
+
+static ssize_t
+setup_data_read(struct file *file, char __user *user_buf, size_t count,
+ loff_t *ppos)
+{
+ struct setup_data_node *node = file->private_data;
+ unsigned long remain;
+ loff_t pos = *ppos;
+ struct page *pg;
+ void *p;
+ u64 pa;
+
+ if (pos < 0)
+ return -EINVAL;
+ if (pos >= node->len)
+ return 0;
+
+ if (count > node->len - pos)
+ count = node->len - pos;
+ pa = node->paddr + sizeof(struct setup_data) + pos;
+ pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
+ if (PageHighMem(pg)) {
+ p = ioremap_cache(pa, count);
+ if (!p)
+ return -ENXIO;
+ } else {
+ p = __va(pa);
+ }
+
+ remain = copy_to_user(user_buf, p, count);
+
+ if (PageHighMem(pg))
+ iounmap(p);
+
+ if (remain)
+ return -EFAULT;
+
+ *ppos = pos + count;
+
+ return count;
+}
+
+static int setup_data_open(struct inode *inode, struct file *file)
+{
+ file->private_data = inode->i_private;
+ return 0;
+}
+
+static const struct file_operations fops_setup_data = {
+ .read = setup_data_read,
+ .open = setup_data_open,
+};
+
+static int __init
+create_setup_data_node(struct dentry *parent, int no,
+ struct setup_data_node *node)
+{
+ struct dentry *d, *type, *data;
+ char buf[16];
+ int error;
+
+ sprintf(buf, "%d", no);
+ d = debugfs_create_dir(buf, parent);
+ if (!d) {
+ error = -ENOMEM;
+ goto err_return;
+ }
+ type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
+ if (!type) {
+ error = -ENOMEM;
+ goto err_dir;
+ }
+ data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
+ if (!data) {
+ error = -ENOMEM;
+ goto err_type;
+ }
+ return 0;
+
+err_type:
+ debugfs_remove(type);
+err_dir:
+ debugfs_remove(d);
+err_return:
+ return error;
+}
+
+static int __init create_setup_data_nodes(struct dentry *parent)
+{
+ struct setup_data_node *node;
+ struct setup_data *data;
+ int error, no = 0;
+ struct dentry *d;
+ struct page *pg;
+ u64 pa_data;
+
+ d = debugfs_create_dir("setup_data", parent);
+ if (!d) {
+ error = -ENOMEM;
+ goto err_return;
+ }
+
+ pa_data = boot_params.hdr.setup_data;
+
+ while (pa_data) {
+ node = kmalloc(sizeof(*node), GFP_KERNEL);
+ if (!node) {
+ error = -ENOMEM;
+ goto err_dir;
+ }
+ pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
+ if (PageHighMem(pg)) {
+ data = ioremap_cache(pa_data, sizeof(*data));
+ if (!data) {
+ error = -ENXIO;
+ goto err_dir;
+ }
+ } else {
+ data = __va(pa_data);
+ }
+
+ node->paddr = pa_data;
+ node->type = data->type;
+ node->len = data->len;
+ error = create_setup_data_node(d, no, node);
+ pa_data = data->next;
+
+ if (PageHighMem(pg))
+ iounmap(data);
+ if (error)
+ goto err_dir;
+ no++;
+ }
+ return 0;
+
+err_dir:
+ debugfs_remove(d);
+err_return:
+ return error;
+}
+
static struct debugfs_blob_wrapper boot_params_blob = {
- .data = &boot_params,
- .size = sizeof(boot_params),
+ .data = &boot_params,
+ .size = sizeof(boot_params),
};
static int __init boot_params_kdebugfs_init(void)
{
- int error;
struct dentry *dbp, *version, *data;
+ int error;
dbp = debugfs_create_dir("boot_params", NULL);
if (!dbp) {
@@ -41,7 +189,13 @@ static int __init boot_params_kdebugfs_init(void)
error = -ENOMEM;
goto err_version;
}
+ error = create_setup_data_nodes(dbp);
+ if (error)
+ goto err_data;
return 0;
+
+err_data:
+ debugfs_remove(data);
err_version:
debugfs_remove(version);
err_dir:
@@ -61,5 +215,4 @@ static int __init arch_kdebugfs_init(void)
return error;
}
-
arch_initcall(arch_kdebugfs_init);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
new file mode 100644
index 0000000..8b7a3cf
--- /dev/null
+++ b/arch/x86/kernel/kvm.c
@@ -0,0 +1,248 @@
+/*
+ * KVM paravirt_ops implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright IBM Corporation, 2007
+ * Authors: Anthony Liguori <aliguori@us.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/hardirq.h>
+
+#define MMU_QUEUE_SIZE 1024
+
+struct kvm_para_state {
+ u8 mmu_queue[MMU_QUEUE_SIZE];
+ int mmu_queue_len;
+ enum paravirt_lazy_mode mode;
+};
+
+static DEFINE_PER_CPU(struct kvm_para_state, para_state);
+
+static struct kvm_para_state *kvm_para_state(void)
+{
+ return &per_cpu(para_state, raw_smp_processor_id());
+}
+
+/*
+ * No need for any "IO delay" on KVM
+ */
+static void kvm_io_delay(void)
+{
+}
+
+static void kvm_mmu_op(void *buffer, unsigned len)
+{
+ int r;
+ unsigned long a1, a2;
+
+ do {
+ a1 = __pa(buffer);
+ a2 = 0; /* on i386 __pa() always returns <4G */
+ r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
+ buffer += r;
+ len -= r;
+ } while (len);
+}
+
+static void mmu_queue_flush(struct kvm_para_state *state)
+{
+ if (state->mmu_queue_len) {
+ kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
+ state->mmu_queue_len = 0;
+ }
+}
+
+static void kvm_deferred_mmu_op(void *buffer, int len)
+{
+ struct kvm_para_state *state = kvm_para_state();
+
+ if (state->mode != PARAVIRT_LAZY_MMU) {
+ kvm_mmu_op(buffer, len);
+ return;
+ }
+ if (state->mmu_queue_len + len > sizeof state->mmu_queue)
+ mmu_queue_flush(state);
+ memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
+ state->mmu_queue_len += len;
+}
+
+static void kvm_mmu_write(void *dest, u64 val)
+{
+ __u64 pte_phys;
+ struct kvm_mmu_op_write_pte wpte;
+
+#ifdef CONFIG_HIGHPTE
+ struct page *page;
+ unsigned long dst = (unsigned long) dest;
+
+ page = kmap_atomic_to_page(dest);
+ pte_phys = page_to_pfn(page);
+ pte_phys <<= PAGE_SHIFT;
+ pte_phys += (dst & ~(PAGE_MASK));
+#else
+ pte_phys = (unsigned long)__pa(dest);
+#endif
+ wpte.header.op = KVM_MMU_OP_WRITE_PTE;
+ wpte.pte_val = val;
+ wpte.pte_phys = pte_phys;
+
+ kvm_deferred_mmu_op(&wpte, sizeof wpte);
+}
+
+/*
+ * We only need to hook operations that are MMU writes. We hook these so that
+ * we can use lazy MMU mode to batch these operations. We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+ kvm_mmu_write(pmdp, pmd_val(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ kvm_mmu_write(ptep, 0);
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+ kvm_mmu_write(pmdp, 0);
+}
+#endif
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+ kvm_mmu_write(pudp, pud_val(pud));
+}
+
+#if PAGETABLE_LEVELS == 4
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+ kvm_mmu_write(pgdp, pgd_val(pgd));
+}
+#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+ struct kvm_mmu_op_flush_tlb ftlb = {
+ .header.op = KVM_MMU_OP_FLUSH_TLB,
+ };
+
+ kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+ struct kvm_mmu_op_release_pt rpt = {
+ .header.op = KVM_MMU_OP_RELEASE_PT,
+ .pt_phys = (u64)pfn << PAGE_SHIFT,
+ };
+
+ kvm_mmu_op(&rpt, sizeof rpt);
+}
+
+static void kvm_enter_lazy_mmu(void)
+{
+ struct kvm_para_state *state = kvm_para_state();
+
+ paravirt_enter_lazy_mmu();
+ state->mode = paravirt_get_lazy_mode();
+}
+
+static void kvm_leave_lazy_mmu(void)
+{
+ struct kvm_para_state *state = kvm_para_state();
+
+ mmu_queue_flush(state);
+ paravirt_leave_lazy(paravirt_get_lazy_mode());
+ state->mode = paravirt_get_lazy_mode();
+}
+
+static void paravirt_ops_setup(void)
+{
+ pv_info.name = "KVM";
+ pv_info.paravirt_enabled = 1;
+
+ if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
+ pv_cpu_ops.io_delay = kvm_io_delay;
+
+ if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
+ pv_mmu_ops.set_pte = kvm_set_pte;
+ pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+ pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+ pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+ pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+ pv_mmu_ops.pte_clear = kvm_pte_clear;
+ pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+ pv_mmu_ops.set_pud = kvm_set_pud;
+#if PAGETABLE_LEVELS == 4
+ pv_mmu_ops.set_pgd = kvm_set_pgd;
+#endif
+#endif
+ pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+ pv_mmu_ops.release_pte = kvm_release_pt;
+ pv_mmu_ops.release_pmd = kvm_release_pt;
+ pv_mmu_ops.release_pud = kvm_release_pt;
+
+ pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
+ pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
+ }
+}
+
+void __init kvm_guest_init(void)
+{
+ if (!kvm_para_available())
+ return;
+
+ paravirt_ops_setup();
+}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
new file mode 100644
index 0000000..ddee040
--- /dev/null
+++ b/arch/x86/kernel/kvmclock.c
@@ -0,0 +1,187 @@
+/* KVM paravirtual clock driver. A clocksource implementation
+ Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <linux/clocksource.h>
+#include <linux/kvm_para.h>
+#include <asm/arch_hooks.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <linux/percpu.h>
+#include <asm/reboot.h>
+
+#define KVM_SCALE 22
+
+static int kvmclock = 1;
+
+static int parse_no_kvmclock(char *arg)
+{
+ kvmclock = 0;
+ return 0;
+}
+early_param("no-kvmclock", parse_no_kvmclock);
+
+/* The hypervisor will put information about time periodically here */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
+#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+
+static inline u64 kvm_get_delta(u64 last_tsc)
+{
+ int cpu = smp_processor_id();
+ u64 delta = native_read_tsc() - last_tsc;
+ return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
+}
+
+static struct kvm_wall_clock wall_clock;
+static cycle_t kvm_clock_read(void);
+/*
+ * The wallclock is the time of day when we booted. Since then, some time may
+ * have elapsed since the hypervisor wrote the data. So we try to account for
+ * that with system time
+ */
+unsigned long kvm_get_wallclock(void)
+{
+ u32 wc_sec, wc_nsec;
+ u64 delta;
+ struct timespec ts;
+ int version, nsec;
+ int low, high;
+
+ low = (int)__pa(&wall_clock);
+ high = ((u64)__pa(&wall_clock) >> 32);
+
+ delta = kvm_clock_read();
+
+ native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
+ do {
+ version = wall_clock.wc_version;
+ rmb();
+ wc_sec = wall_clock.wc_sec;
+ wc_nsec = wall_clock.wc_nsec;
+ rmb();
+ } while ((wall_clock.wc_version != version) || (version & 1));
+
+ delta = kvm_clock_read() - delta;
+ delta += wc_nsec;
+ nsec = do_div(delta, NSEC_PER_SEC);
+ set_normalized_timespec(&ts, wc_sec + delta, nsec);
+ /*
+ * Of all mechanisms of time adjustment I've tested, this one
+ * was the champion!
+ */
+ return ts.tv_sec + 1;
+}
+
+int kvm_set_wallclock(unsigned long now)
+{
+ return 0;
+}
+
+/*
+ * This is our read_clock function. The host puts an tsc timestamp each time
+ * it updates a new time. Without the tsc adjustment, we can have a situation
+ * in which a vcpu starts to run earlier (smaller system_time), but probes
+ * time later (compared to another vcpu), leading to backwards time
+ */
+static cycle_t kvm_clock_read(void)
+{
+ u64 last_tsc, now;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ last_tsc = get_clock(cpu, tsc_timestamp);
+ now = get_clock(cpu, system_time);
+
+ now += kvm_get_delta(last_tsc);
+ preempt_enable();
+
+ return now;
+}
+static struct clocksource kvm_clock = {
+ .name = "kvm-clock",
+ .read = kvm_clock_read,
+ .rating = 400,
+ .mask = CLOCKSOURCE_MASK(64),
+ .mult = 1 << KVM_SCALE,
+ .shift = KVM_SCALE,
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int kvm_register_clock(void)
+{
+ int cpu = smp_processor_id();
+ int low, high;
+ low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
+ high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+
+ return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+}
+
+static void kvm_setup_secondary_clock(void)
+{
+ /*
+ * Now that the first cpu already had this clocksource initialized,
+ * we shouldn't fail.
+ */
+ WARN_ON(kvm_register_clock());
+ /* ok, done with our trickery, call native */
+ setup_secondary_APIC_clock();
+}
+
+/*
+ * After the clock is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written. So before any
+ * kind of shutdown from our side, we unregister the clock by writting anything
+ * that does not have the 'enable' bit set in the msr
+ */
+#ifdef CONFIG_KEXEC
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+ native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+ native_machine_crash_shutdown(regs);
+}
+#endif
+
+static void kvm_shutdown(void)
+{
+ native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+ native_machine_shutdown();
+}
+
+void __init kvmclock_init(void)
+{
+ if (!kvm_para_available())
+ return;
+
+ if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+ if (kvm_register_clock())
+ return;
+ pv_time_ops.get_wallclock = kvm_get_wallclock;
+ pv_time_ops.set_wallclock = kvm_set_wallclock;
+ pv_time_ops.sched_clock = kvm_clock_read;
+ pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+ machine_ops.shutdown = kvm_shutdown;
+#ifdef CONFIG_KEXEC
+ machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+ clocksource_register(&kvm_clock);
+ }
+}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index b402c0f..cfc2648 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -364,7 +364,8 @@ int __init mfgpt_timer_setup(void)
geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val);
/* Set up the clock event */
- mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32);
+ mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC,
+ mfgpt_clockevent.shift);
mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF,
&mfgpt_clockevent);
mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70744e3..3e2c54d 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -686,13 +686,11 @@ void __init get_smp_config(void)
static int __init smp_scan_config(unsigned long base, unsigned long length,
unsigned reserve)
{
- extern void __bad_mpf_size(void);
unsigned int *bp = phys_to_virt(base);
struct intel_mp_floating *mpf;
Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
- if (sizeof(*mpf) != 16)
- __bad_mpf_size();
+ BUILD_BUG_ON(sizeof(*mpf) != 16);
while (length > 0) {
mpf = (struct intel_mp_floating *)bp;
@@ -801,7 +799,6 @@ void __init find_smp_config(void)
#ifdef CONFIG_X86_IO_APIC
#define MP_ISA_BUS 0
-#define MP_MAX_IOAPIC_PIN 127
extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
@@ -820,7 +817,7 @@ static int mp_find_ioapic(int gsi)
return -1;
}
-static u8 uniq_ioapic_id(u8 id)
+static u8 __init uniq_ioapic_id(u8 id)
{
#ifdef CONFIG_X86_32
if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
@@ -909,14 +906,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
intsrc.mpc_dstirq = pin; /* INTIN# */
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
- intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
+ MP_intsrc_info(&intsrc);
}
int es7000_plat;
@@ -985,23 +975,14 @@ void __init mp_config_acpi_legacy_irqs(void)
intsrc.mpc_srcbusirq = i; /* Identity mapped */
intsrc.mpc_dstirq = i;
- Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
- "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
- (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
- intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
- intsrc.mpc_dstirq);
-
- mp_irqs[mp_irq_entries] = intsrc;
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!\n");
+ MP_intsrc_info(&intsrc);
}
}
int mp_register_gsi(u32 gsi, int triggering, int polarity)
{
- int ioapic = -1;
- int ioapic_pin = 0;
- int idx, bit = 0;
+ int ioapic;
+ int ioapic_pin;
#ifdef CONFIG_X86_32
#define MAX_GSI_NUM 4096
#define IRQ_COMPRESSION_START 64
@@ -1041,15 +1022,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
* with redundant pin->gsi mappings (but unique PCI devices);
* we only program the IOAPIC on the first.
*/
- bit = ioapic_pin % 32;
- idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
- if (idx > 3) {
+ if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
printk(KERN_ERR "Invalid reference to IOAPIC pin "
"%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
ioapic_pin);
return gsi;
}
- if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+ if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
#ifdef CONFIG_X86_32
@@ -1059,7 +1038,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
#endif
}
- mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit);
+ set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
#ifdef CONFIG_X86_32
/*
* For GSI >= 64, use IRQ compression
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 2edee22..e28ec49 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -43,6 +43,7 @@
#include <asm/system.h>
#include <asm/dma.h>
#include <asm/rio.h>
+#include <asm/bios_ebda.h>
#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
int use_calgary __read_mostly = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3004d71..67e9b4a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -4,6 +4,8 @@
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/pm.h>
struct kmem_cache *task_xstate_cachep;
@@ -42,3 +44,118 @@ void arch_task_cache_init(void)
__alignof__(union thread_xstate),
SLAB_PANIC, NULL);
}
+
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
+void cpu_idle_wait(void)
+{
+ smp_mb();
+ /* kick all the CPUs so that they exit out of pm_idle */
+ smp_call_function(do_nothing, NULL, 0, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
+{
+ if (!need_resched()) {
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (!need_resched())
+ __mwait(ax, cx);
+ }
+}
+
+/* Default MONITOR/MWAIT with no hints, used for default C1 state */
+static void mwait_idle(void)
+{
+ if (!need_resched()) {
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (!need_resched())
+ __sti_mwait(0, 0);
+ else
+ local_irq_enable();
+ } else
+ local_irq_enable();
+}
+
+
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+{
+ if (force_mwait)
+ return 1;
+ /* Any C1 states supported? */
+ return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
+}
+
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->work.need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle(void)
+{
+ local_irq_enable();
+ cpu_relax();
+}
+
+void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+{
+ static int selected;
+
+ if (selected)
+ return;
+#ifdef CONFIG_X86_SMP
+ if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+ " performance may degrade.\n");
+ }
+#endif
+ if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
+ /*
+ * Skip, if setup has overridden idle.
+ * One CPU supports mwait => All CPUs supports mwait
+ */
+ if (!pm_idle) {
+ printk(KERN_INFO "using mwait in idle threads.\n");
+ pm_idle = mwait_idle;
+ }
+ }
+ selected = 1;
+}
+
+static int __init idle_setup(char *str)
+{
+ if (!strcmp(str, "poll")) {
+ printk("using polling idle threads.\n");
+ pm_idle = poll_idle;
+ } else if (!strcmp(str, "mwait"))
+ force_mwait = 1;
+ else
+ return -1;
+
+ boot_option_idle_override = 1;
+ return 0;
+}
+early_param("idle", idle_setup);
+
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 77de848..f8476df 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -111,12 +111,10 @@ void default_idle(void)
*/
smp_mb();
- local_irq_disable();
- if (!need_resched()) {
+ if (!need_resched())
safe_halt(); /* enables interrupts racelessly */
- local_irq_disable();
- }
- local_irq_enable();
+ else
+ local_irq_enable();
current_thread_info()->status |= TS_POLLING;
} else {
local_irq_enable();
@@ -128,17 +126,6 @@ void default_idle(void)
EXPORT_SYMBOL(default_idle);
#endif
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
- local_irq_enable();
- cpu_relax();
-}
-
#ifdef CONFIG_HOTPLUG_CPU
#include <asm/nmi.h>
/* We don't actually take CPU down, just spin without interrupts. */
@@ -196,6 +183,7 @@ void cpu_idle(void)
if (cpu_is_offline(cpu))
play_dead();
+ local_irq_disable();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
idle();
}
@@ -206,104 +194,6 @@ void cpu_idle(void)
}
}
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
- smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
- smp_call_function(do_nothing, NULL, 0, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
- if (!need_resched()) {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __sti_mwait(ax, cx);
- else
- local_irq_enable();
- } else
- local_irq_enable();
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
- local_irq_enable();
- mwait_idle_with_hints(0, 0);
-}
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
- if (force_mwait)
- return 1;
- /* Any C1 states supported? */
- return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
- static int selected;
-
- if (selected)
- return;
-#ifdef CONFIG_X86_SMP
- if (pm_idle == poll_idle && smp_num_siblings > 1) {
- printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
- " performance may degrade.\n");
- }
-#endif
- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
- /*
- * Skip, if setup has overridden idle.
- * One CPU supports mwait => All CPUs supports mwait
- */
- if (!pm_idle) {
- printk(KERN_INFO "using mwait in idle threads.\n");
- pm_idle = mwait_idle;
- }
- }
- selected = 1;
-}
-
-static int __init idle_setup(char *str)
-{
- if (!strcmp(str, "poll")) {
- printk("using polling idle threads.\n");
- pm_idle = poll_idle;
- } else if (!strcmp(str, "mwait"))
- force_mwait = 1;
- else
- return -1;
-
- boot_option_idle_override = 1;
- return 0;
-}
-early_param("idle", idle_setup);
-
void __show_registers(struct pt_regs *regs, int all)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 131c2ee..e2319f3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -106,26 +106,13 @@ void default_idle(void)
* test NEED_RESCHED:
*/
smp_mb();
- local_irq_disable();
- if (!need_resched()) {
+ if (!need_resched())
safe_halt(); /* enables interrupts racelessly */
- local_irq_disable();
- }
- local_irq_enable();
+ else
+ local_irq_enable();
current_thread_info()->status |= TS_POLLING;
}
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
- local_irq_enable();
- cpu_relax();
-}
-
#ifdef CONFIG_HOTPLUG_CPU
DECLARE_PER_CPU(int, cpu_state);
@@ -192,110 +179,6 @@ void cpu_idle(void)
}
}
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
- smp_mb();
- /* kick all the CPUs so that they exit out of pm_idle */
- smp_call_function(do_nothing, NULL, 0, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
- if (!need_resched()) {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __mwait(ax, cx);
- }
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
- if (!need_resched()) {
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __sti_mwait(0, 0);
- else
- local_irq_enable();
- } else {
- local_irq_enable();
- }
-}
-
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
- if (force_mwait)
- return 1;
- /* Any C1 states supported? */
- return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
- static int selected;
-
- if (selected)
- return;
-#ifdef CONFIG_X86_SMP
- if (pm_idle == poll_idle && smp_num_siblings > 1) {
- printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
- " performance may degrade.\n");
- }
-#endif
- if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
- /*
- * Skip, if setup has overridden idle.
- * One CPU supports mwait => All CPUs supports mwait
- */
- if (!pm_idle) {
- printk(KERN_INFO "using mwait in idle threads.\n");
- pm_idle = mwait_idle;
- }
- }
- selected = 1;
-}
-
-static int __init idle_setup(char *str)
-{
- if (!strcmp(str, "poll")) {
- printk("using polling idle threads.\n");
- pm_idle = poll_idle;
- } else if (!strcmp(str, "mwait"))
- force_mwait = 1;
- else
- return -1;
-
- boot_option_idle_override = 1;
- return 0;
-}
-early_param("idle", idle_setup);
-
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs * regs)
{
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 559c1b0..fb03ef3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1207,97 +1207,16 @@ static int genregs32_set(struct task_struct *target,
return ret;
}
-static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+ compat_ulong_t caddr, compat_ulong_t cdata)
{
- siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
- compat_siginfo_t __user *si32 = compat_ptr(data);
- siginfo_t ssi;
- int ret;
-
- if (request == PTRACE_SETSIGINFO) {
- memset(&ssi, 0, sizeof(siginfo_t));
- ret = copy_siginfo_from_user32(&ssi, si32);
- if (ret)
- return ret;
- if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
- return -EFAULT;
- }
- ret = sys_ptrace(request, pid, addr, (unsigned long)si);
- if (ret)
- return ret;
- if (request == PTRACE_GETSIGINFO) {
- if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
- return -EFAULT;
- ret = copy_siginfo_to_user32(si32, &ssi);
- }
- return ret;
-}
-
-asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
-{
- struct task_struct *child;
- struct pt_regs *childregs;
+ unsigned long addr = caddr;
+ unsigned long data = cdata;
void __user *datap = compat_ptr(data);
int ret;
__u32 val;
switch (request) {
- case PTRACE_TRACEME:
- case PTRACE_ATTACH:
- case PTRACE_KILL:
- case PTRACE_CONT:
- case PTRACE_SINGLESTEP:
- case PTRACE_SINGLEBLOCK:
- case PTRACE_DETACH:
- case PTRACE_SYSCALL:
- case PTRACE_OLDSETOPTIONS:
- case PTRACE_SETOPTIONS:
- case PTRACE_SET_THREAD_AREA:
- case PTRACE_GET_THREAD_AREA:
-#ifdef X86_BTS
- case PTRACE_BTS_CONFIG:
- case PTRACE_BTS_STATUS:
- case PTRACE_BTS_SIZE:
- case PTRACE_BTS_GET:
- case PTRACE_BTS_CLEAR:
- case PTRACE_BTS_DRAIN:
-#endif
- return sys_ptrace(request, pid, addr, data);
-
- default:
- return -EINVAL;
-
- case PTRACE_PEEKTEXT:
- case PTRACE_PEEKDATA:
- case PTRACE_POKEDATA:
- case PTRACE_POKETEXT:
- case PTRACE_POKEUSR:
- case PTRACE_PEEKUSR:
- case PTRACE_GETREGS:
- case PTRACE_SETREGS:
- case PTRACE_SETFPREGS:
- case PTRACE_GETFPREGS:
- case PTRACE_SETFPXREGS:
- case PTRACE_GETFPXREGS:
- case PTRACE_GETEVENTMSG:
- break;
-
- case PTRACE_SETSIGINFO:
- case PTRACE_GETSIGINFO:
- return ptrace32_siginfo(request, pid, addr, data);
- }
-
- child = ptrace_get_task_struct(pid);
- if (IS_ERR(child))
- return PTR_ERR(child);
-
- ret = ptrace_check_attach(child, request == PTRACE_KILL);
- if (ret < 0)
- goto out;
-
- childregs = task_pt_regs(child);
-
- switch (request) {
case PTRACE_PEEKUSR:
ret = getreg32(child, addr, &val);
if (ret == 0)
@@ -1343,12 +1262,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
sizeof(struct user32_fxsr_struct),
datap);
+ case PTRACE_GET_THREAD_AREA:
+ case PTRACE_SET_THREAD_AREA:
+ return arch_ptrace(child, request, addr, data);
+
default:
return compat_ptrace_request(child, request, addr, data);
}
- out:
- put_task_struct(child);
return ret;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 1791a75..a4a8383 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void)
}
}
-static void native_machine_shutdown(void)
+void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
@@ -470,7 +470,10 @@ struct machine_ops machine_ops = {
.shutdown = native_machine_shutdown,
.emergency_restart = native_machine_emergency_restart,
.restart = native_machine_restart,
- .halt = native_machine_halt
+ .halt = native_machine_halt,
+#ifdef CONFIG_KEXEC
+ .crash_shutdown = native_machine_crash_shutdown,
+#endif
};
void machine_power_off(void)
@@ -498,3 +501,9 @@ void machine_halt(void)
machine_ops.halt();
}
+#ifdef CONFIG_KEXEC
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ machine_ops.crash_shutdown(regs);
+}
+#endif
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 455d3c8..2283422 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -47,6 +47,7 @@
#include <linux/pfn.h>
#include <linux/pci.h>
#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
#include <video/edid.h>
@@ -389,7 +390,6 @@ unsigned long __init find_max_low_pfn(void)
return max_low_pfn;
}
-#define BIOS_EBDA_SEGMENT 0x40E
#define BIOS_LOWMEM_KILOBYTES 0x413
/*
@@ -420,8 +420,7 @@ static void __init reserve_ebda_region(void)
lowmem <<= 10;
/* start of EBDA area */
- ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
- ebda_addr <<= 4;
+ ebda_addr = get_bios_ebda();
/* Fixup: bios puts an EBDA in the top 64K segment */
/* of conventional memory, but does not adjust lowmem. */
@@ -822,6 +821,10 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn = setup_memory();
+#ifdef CONFIG_KVM_CLOCK
+ kvmclock_init();
+#endif
+
#ifdef CONFIG_VMI
/*
* Must be after max_low_pfn is determined, and before kernel
@@ -829,6 +832,7 @@ void __init setup_arch(char **cmdline_p)
*/
vmi_init();
#endif
+ kvm_guest_init();
/*
* NOTE: before this point _nobody_ is allowed to allocate
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c2ec3dcb..a94fb95 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -42,6 +42,7 @@
#include <linux/ctype.h>
#include <linux/uaccess.h>
#include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
@@ -116,7 +117,7 @@ extern int root_mountflags;
char __initdata command_line[COMMAND_LINE_SIZE];
-struct resource standard_io_resources[] = {
+static struct resource standard_io_resources[] = {
{ .name = "dma1", .start = 0x00, .end = 0x1f,
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
{ .name = "pic1", .start = 0x20, .end = 0x21,
@@ -190,6 +191,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
+ early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
}
#endif
@@ -264,6 +266,28 @@ void __attribute__((weak)) __init memory_setup(void)
machine_specific_memory_setup();
}
+static void __init parse_setup_data(void)
+{
+ struct setup_data *data;
+ unsigned long pa_data;
+
+ if (boot_params.hdr.version < 0x0209)
+ return;
+ pa_data = boot_params.hdr.setup_data;
+ while (pa_data) {
+ data = early_ioremap(pa_data, PAGE_SIZE);
+ switch (data->type) {
+ default:
+ break;
+ }
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
+ free_early(pa_data, pa_data+sizeof(*data)+data->len);
+#endif
+ pa_data = data->next;
+ early_iounmap(data, PAGE_SIZE);
+ }
+}
+
/*
* setup_arch - architecture-specific boot-time initializations
*
@@ -316,6 +340,8 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
+ parse_setup_data();
+
parse_early_param();
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
@@ -359,6 +385,10 @@ void __init setup_arch(char **cmdline_p)
io_delay_init();
+#ifdef CONFIG_KVM_CLOCK
+ kvmclock_init();
+#endif
+
#ifdef CONFIG_SMP
/* setup to use the early static init tables during kernel startup */
x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
@@ -397,8 +427,6 @@ void __init setup_arch(char **cmdline_p)
contig_initmem_init(0, end_pfn);
#endif
- early_res_to_bootmem();
-
dma32_reserve_bootmem();
#ifdef CONFIG_ACPI_SLEEP
@@ -465,6 +493,8 @@ void __init setup_arch(char **cmdline_p)
init_apic_mappings();
ioapic_init_mappings();
+ kvm_guest_init();
+
/*
* We trust e820 completely. No explicit ROM probing in memory.
*/
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index f1b1179..8e05e7f 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -413,16 +413,6 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
regs->ss = __USER_DS;
regs->cs = __USER_CS;
- /*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
return 0;
give_sigsegv:
@@ -501,16 +491,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->ss = __USER_DS;
regs->cs = __USER_CS;
- /*
- * Clear TF when entering the signal handler, but
- * notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
return 0;
give_sigsegv:
@@ -566,6 +546,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
if (ret)
return ret;
+ /*
+ * Clear the direction flag as per the ABI for function entry.
+ */
+ regs->flags &= ~X86_EFLAGS_DF;
+
+ /*
+ * Clear TF when entering the signal handler, but
+ * notify any tracer that was single-stepping it.
+ * The tracer may want to single-step inside the
+ * handler too.
+ */
+ regs->flags &= ~X86_EFLAGS_TF;
+ if (test_thread_flag(TIF_SINGLESTEP))
+ ptrace_notify(SIGTRAP);
+
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 827179c..ccb2a45 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -285,14 +285,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
even if the handler happens to be interrupting 32-bit code. */
regs->cs = __USER_CS;
- /* This, by contrast, has nothing to do with segment registers -
- see include/asm-x86_64/uaccess.h for details. */
- set_fs(USER_DS);
-
- regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
return 0;
give_sigsegv:
@@ -380,6 +372,28 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
ret = setup_rt_frame(sig, ka, info, oldset, regs);
if (ret == 0) {
+ /*
+ * This has nothing to do with segment registers,
+ * despite the name. This magic affects uaccess.h
+ * macros' behavior. Reset it to the normal setting.
+ */
+ set_fs(USER_DS);
+
+ /*
+ * Clear the direction flag as per the ABI for function entry.
+ */
+ regs->flags &= ~X86_EFLAGS_DF;
+
+ /*
+ * Clear TF when entering the signal handler, but
+ * notify any tracer that was single-stepping it.
+ * The tracer may want to single-step inside the
+ * handler too.
+ */
+ regs->flags &= ~X86_EFLAGS_TF;
+ if (test_thread_flag(TIF_SINGLESTEP))
+ ptrace_notify(SIGTRAP);
+
spin_lock_irq(&current->sighand->siglock);
sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
if (!(ka->sa.sa_flags & SA_NODEFER))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index eef79e8..04c662b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1058,7 +1058,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
check_tsc_sync_source(cpu);
local_irq_restore(flags);
- while (!cpu_isset(cpu, cpu_online_map)) {
+ while (!cpu_online(cpu)) {
cpu_relax();
touch_nmi_watchdog();
}
@@ -1168,7 +1168,7 @@ static void __init smp_cpu_index_default(void)
int i;
struct cpuinfo_x86 *c;
- for_each_cpu_mask(i, cpu_possible_map) {
+ for_each_possible_cpu(i) {
c = &cpu_data(i);
/* mark all to hotplug */
c->cpu_index = NR_CPUS;
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index 6878a9c..ae75109 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -29,6 +29,7 @@
#include <linux/mm.h>
#include <linux/init.h>
#include <asm/io.h>
+#include <asm/bios_ebda.h>
#include <asm/mach-summit/mach_mpparse.h>
static struct rio_table_hdr *rio_table_hdr __initdata;
@@ -140,8 +141,8 @@ void __init setup_summit(void)
int i, next_wpeg, next_bus = 0;
/* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
- ptr = *(unsigned short *)phys_to_virt(0x40Eul);
- ptr = (unsigned long)phys_to_virt(ptr << 4);
+ ptr = get_bios_ebda();
+ ptr = (unsigned long)phys_to_virt(ptr);
rio_table_hdr = NULL;
offset = 0x180;
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index df224a8..a1f07d7 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -195,9 +195,9 @@ static int __cpuinit init_smp_flush(void)
{
int i;
- for_each_cpu_mask(i, cpu_possible_map) {
+ for_each_possible_cpu(i)
spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
- }
+
return 0;
}
core_initcall(init_smp_flush);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 6458067..d8ccc3c 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -33,7 +33,7 @@
/* We can free up trampoline after bootup if cpu hotplug is not supported. */
#ifndef CONFIG_HOTPLUG_CPU
-.section ".init.data","aw",@progbits
+.section ".cpuinit.data","aw",@progbits
#else
.section .rodata,"a",@progbits
#endif
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 471e694..bde6f63 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -602,7 +602,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
{
OpenPOWER on IntegriCloud