From cbf74cea070fa1f705de4712e25d9e56ae6543c7 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Mon, 30 May 2011 16:31:11 +0200
Subject: oprofile, x86: Add comments to IBS LVT offset initialization

Adding a comment in the code as IBS LVT setup is not obvious at all ...

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/kernel/apic/apic.c      |  3 ++-
 arch/x86/oprofile/op_model_amd.c | 13 +++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index fabf01e..a0bf78a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -390,7 +390,8 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
 
 /*
  * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
+ * enables the vector. See also the BKDGs. Must be called with
+ * preemption disabled.
  */
 
 int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 9fd8a56..9cbb710 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -609,16 +609,21 @@ static int setup_ibs_ctl(int ibs_eilvt_off)
 	return 0;
 }
 
+/*
+ * This runs only on the current cpu. We try to find an LVT offset and
+ * setup the local APIC. For this we must disable preemption. On
+ * success we initialize all nodes with this offset. This updates then
+ * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
+ * the IBS interrupt vector is called from op_amd_setup_ctrs()/op_-
+ * amd_cpu_shutdown() using the new offset.
+ */
 static int force_ibs_eilvt_setup(void)
 {
 	int offset;
 	int ret;
 
-	/*
-	 * find the next free available EILVT entry, skip offset 0,
-	 * pin search to this cpu
-	 */
 	preempt_disable();
+	/* find the next free available EILVT entry, skip offset 0 */
 	for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
 		if (get_eilvt(offset))
 			break;
-- 
cgit v1.1


From 6e33a852a37dee02979ec9d82bea26c07cee5bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1rton=20N=C3=A9meth?= <nm127@freemail.hu>
Date: Sat, 14 May 2011 19:27:33 +0200
Subject: x86/PCI/ACPI: fix type mismatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The flags field of struct resource from linux/ioport.h is "unsigned
long". Change the "type" parameter of coalesce_windows() function to
match that field. This fixes the following warning messages when
compiling with "make C=1 W=1 bzImage modules":

arch/x86/pci/acpi.c: In function ‘coalesce_windows’:
arch/x86/pci/acpi.c:198: warning: conversion to ‘long unsigned int’ from ‘int’ may change the sign of the result
arch/x86/pci/acpi.c:203: warning: conversion to ‘long unsigned int’ from ‘int’ may change the sign of the result

Signed-off-by: Márton Németh <nm127@freemail.hu>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 0972315..68c3c13 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -188,7 +188,7 @@ static bool resource_contains(struct resource *res, resource_size_t point)
 	return false;
 }
 
-static void coalesce_windows(struct pci_root_info *info, int type)
+static void coalesce_windows(struct pci_root_info *info, unsigned long type)
 {
 	int i, j;
 	struct resource *res1, *res2;
-- 
cgit v1.1


From fd8a7de177b6f56a0fc59ad211c197a7df06b1ad Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 20 Jul 2010 14:34:50 +0200
Subject: x86: cpu-hotplug: Prevent softirq wakeup on wrong CPU

After a newly plugged CPU sets the cpu_online bit it enables
interrupts and goes idle. The cpu which brought up the new cpu waits
for the cpu_online bit and when it observes it, it sets the cpu_active
bit for this cpu. The cpu_active bit is the relevant one for the
scheduler to consider the cpu as a viable target.

With forced threaded interrupt handlers which imply forced threaded
softirqs we observed the following race:

cpu 0                         cpu 1

bringup(cpu1);
                              set_cpu_online(smp_processor_id(), true);
		              local_irq_enable();
while (!cpu_online(cpu1));
                              timer_interrupt()
                                -> wake_up(softirq_thread_cpu1);
                                     -> enqueue_on(softirq_thread_cpu1, cpu0);

                                                                        ^^^^

cpu_notify(CPU_ONLINE, cpu1);
  -> sched_cpu_active(cpu1)
     -> set_cpu_active((cpu1, true);

When an interrupt happens before the cpu_active bit is set by the cpu
which brought up the newly onlined cpu, then the scheduler refuses to
enqueue the woken thread which is bound to that newly onlined cpu on
that newly onlined cpu due to the not yet set cpu_active bit and
selects a fallback runqueue. Not really an expected and desirable
behaviour.

So far this has only been observed with forced hard/softirq threading,
but in theory this could happen without forced threaded hard/softirqs
as well. It's probably unobservable as it would take a massive
interrupt storm on the newly onlined cpu which causes the softirq loop
to wake up the softirq thread and an even longer delay of the cpu
which waits for the cpu_online bit.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
Cc: stable@kernel.org # 2.6.39
---
 arch/x86/kernel/smpboot.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 33a0c11..9fd3137 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -285,6 +285,19 @@ notrace static void __cpuinit start_secondary(void *unused)
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 	x86_platform.nmi_init();
 
+	/*
+	 * Wait until the cpu which brought this one up marked it
+	 * online before enabling interrupts. If we don't do that then
+	 * we can end up waking up the softirq thread before this cpu
+	 * reached the active state, which makes the scheduler unhappy
+	 * and schedule the softirq thread on the wrong cpu. This is
+	 * only observable with forced threaded interrupts, but in
+	 * theory it could also happen w/o them. It's just way harder
+	 * to achieve.
+	 */
+	while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
+		cpu_relax();
+
 	/* enable local interrupts */
 	local_irq_enable();
 
-- 
cgit v1.1


From a91d92875ee94e4703fd017ccaadb48cfb344994 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 3 Jun 2011 09:51:34 +0000
Subject: xen: partially revert "xen: set max_pfn_mapped to the last pfn
 mapped"

We only need to set max_pfn_mapped to the last pfn mapped on x86_64 to
make sure that cleanup_highmap doesn't remove important mappings at
_end.

We don't need to do this on x86_32 because cleanup_highmap is not called
on x86_32. Besides lowering max_pfn_mapped on x86_32 has the unwanted
side effect of limiting the amount of memory available for the 1:1
kernel pagetable allocation.

This patch reverts the x86_32 part of the original patch.

CC: stable@kernel.org
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dc708dc..afe1d54 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1599,6 +1599,11 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 		for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
 			pte_t pte;
 
+#ifdef CONFIG_X86_32
+			if (pfn > max_pfn_mapped)
+				max_pfn_mapped = pfn;
+#endif
+
 			if (!pte_none(pte_page[pteidx]))
 				continue;
 
@@ -1766,7 +1771,9 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 	initial_kernel_pmd =
 		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
 
-	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
+	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
+				  xen_start_info->nr_pt_frames * PAGE_SIZE +
+				  512*1024);
 
 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
 	memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
-- 
cgit v1.1


From 977cb76d52e7aa040e18a84b29fe6fd80d79319b Mon Sep 17 00:00:00 2001
From: Florian Fainelli <ffainelli@freebox.fr>
Date: Mon, 6 Jun 2011 10:15:49 +0200
Subject: x86: devicetree: Add missing early_init_dt_setup_initrd_arch stub

This patch fixes the following build failure:

drivers/built-in.o: In function `early_init_dt_check_for_initrd':
/home/florian/dev/kernel/x86/linux-2.6-x86/drivers/of/fdt.c:571:
undefined reference to `early_init_dt_setup_initrd_arch'
make: *** [.tmp_vmlinux1] Error 1

which happens as soon as we enable initrd support on a x86 devicetree
platform such as Intel CE4100.

Signed-off-by: Florian Fainelli <ffainelli@freebox.fr>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Maxime Bizon <mbizon@freebox.fr>
Acked-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: stable@kernel.org # 2.6.39
Link: http://lkml.kernel.org/r/201106061015.50039.ffainelli@freebox.fr
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/devicetree.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 690bc84..9aeb78a 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/of_pci.h>
+#include <linux/initrd.h>
 
 #include <asm/hpet.h>
 #include <asm/irq_controller.h>
@@ -98,6 +99,16 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 	return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS));
 }
 
+#ifdef CONFIG_BLK_DEV_INITRD
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+					    unsigned long end)
+{
+	initrd_start = (unsigned long)__va(start);
+	initrd_end = (unsigned long)__va(end);
+	initrd_below_start_ok = 1;
+}
+#endif
+
 void __init add_dtb(u64 data)
 {
 	initial_dtb = data + offsetof(struct setup_data, data);
-- 
cgit v1.1


From 7ad35cf288fd63a19bf50e490440a992de808b2b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 25 May 2011 14:00:49 +1000
Subject: x86/uv/x2apic: update for change in pci bridge handling.

When I added 3448a19da479b6bd1e28e2a2be9fa16c6a6feb39
I forgot about the special uv handling code for this, so this
patch fixes it up.

Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Acked-by: Ingo Molnar
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 arch/x86/kernel/apic/x2apic_uv_x.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index b511a01..adc66c3 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -632,14 +632,14 @@ late_initcall(uv_init_heartbeat);
 
 /* Direct Legacy VGA I/O traffic to designated IOH */
 int uv_set_vga_state(struct pci_dev *pdev, bool decode,
-		      unsigned int command_bits, bool change_bridge)
+		      unsigned int command_bits, u32 flags)
 {
 	int domain, bus, rc;
 
-	PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n",
-			pdev->devfn, decode, command_bits, change_bridge);
+	PR_DEVEL("devfn %x decode %d cmd %x flags %d\n",
+			pdev->devfn, decode, command_bits, flags);
 
-	if (!change_bridge)
+	if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
 		return 0;
 
 	if ((command_bits & PCI_COMMAND_IO) == 0)
-- 
cgit v1.1


From 60b8b1de0dd2bf246f0e074d287bb3f0bc42a755 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@canonical.com>
Date: Tue, 14 Jun 2011 12:45:10 -0700
Subject: x86 idle: APM requires pm_idle/default_idle unconditionally when a
 module

[ Also from Ben Hutchings <ben@decadent.org.uk> and Vitaliy Ivanov
  <vitalivanov@gmail.com> ]

Commit 06ae40ce073d ("x86 idle: EXPORT_SYMBOL(default_idle, pm_idle)
only when APM demands it") removed the export for pm_idle/default_idle
unless the apm module was modularised and CONFIG_APM_CPU_IDLE was set.

But the apm module uses pm_idle/default_idle unconditionally,
CONFIG_APM_CPU_IDLE only affects the bios idle threshold.  Adjust the
export accordingly.

[ Used #ifdef instead of #if defined() as it's shorter, and what both
  Ben and Vitaliy used.. Andy, you're out-voted ;)    - Linus ]

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Len Brown <len.brown@intel.com>
Signed-off-by: Andy Whitcroft <apw@canonical.com>
Signed-off-by: Vitaliy Ivanov <vitalivanov@gmail.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/process.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 2e4928d..e1ba8cb2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -337,7 +337,7 @@ EXPORT_SYMBOL(boot_option_idle_override);
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void);
-#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE)
+#ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(pm_idle);
 #endif
 
@@ -399,7 +399,7 @@ void default_idle(void)
 		cpu_relax();
 	}
 }
-#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE)
+#ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(default_idle);
 #endif
 
-- 
cgit v1.1


From 8fe7e94eb71430cf63a742f3c19739d82a662758 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 1 Jun 2011 15:31:44 +0200
Subject: oprofile, x86: Fix race in nmi handler while starting counters

In some rare cases, nmis are generated immediately after the nmi
handler of the cpu was started. This causes the counter not to be
enabled. Before enabling the nmi handlers we need to set variable
ctr_running first and make sure its value is written to memory.

Also, the patch makes all existing barriers a memory barrier instead
of a compiler barrier only.

Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: <stable@kernel.org> # .35+
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index cf97500..68894fd 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -112,8 +112,10 @@ static void nmi_cpu_start(void *dummy)
 static int nmi_start(void)
 {
 	get_online_cpus();
-	on_each_cpu(nmi_cpu_start, NULL, 1);
 	ctr_running = 1;
+	/* make ctr_running visible to the nmi handler: */
+	smp_mb();
+	on_each_cpu(nmi_cpu_start, NULL, 1);
 	put_online_cpus();
 	return 0;
 }
@@ -504,15 +506,18 @@ static int nmi_setup(void)
 
 	nmi_enabled = 0;
 	ctr_running = 0;
-	barrier();
+	/* make variables visible to the nmi handler: */
+	smp_mb();
 	err = register_die_notifier(&profile_exceptions_nb);
 	if (err)
 		goto fail;
 
 	get_online_cpus();
 	register_cpu_notifier(&oprofile_cpu_nb);
-	on_each_cpu(nmi_cpu_setup, NULL, 1);
 	nmi_enabled = 1;
+	/* make nmi_enabled visible to the nmi handler: */
+	smp_mb();
+	on_each_cpu(nmi_cpu_setup, NULL, 1);
 	put_online_cpus();
 
 	return 0;
@@ -531,7 +536,8 @@ static void nmi_shutdown(void)
 	nmi_enabled = 0;
 	ctr_running = 0;
 	put_online_cpus();
-	barrier();
+	/* make variables visible to the nmi handler: */
+	smp_mb();
 	unregister_die_notifier(&profile_exceptions_nb);
 	msrs = &get_cpu_var(cpu_msrs);
 	model->shutdown(msrs);
-- 
cgit v1.1


From 900cba8881b39dfbc7c8062098504ab93f5387a8 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Fri, 18 Dec 2009 10:31:31 +0100
Subject: xen: support CONFIG_MAXSMP

The MAXSMP config option requires CPUMASK_OFFSTACK, which in turn
requires we init the memory for the maps while we bring up the cpus.
MAXSMP also increases NR_CPUS to 4096. This increase in size exposed an
issue in the argument construction for multicalls from
xen_flush_tlb_others. The args should only need space for the actual
number of cpus.

Also in 2.6.39 it exposes a bootup problem.

BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff8157a1d3>] set_cpu_sibling_map+0x123/0x30d
...
Call Trace:
[<ffffffff81039a3f>] ? xen_restore_fl_direct_reloc+0x4/0x4
[<ffffffff819dc4db>] xen_smp_prepare_cpus+0x36/0x135
..

CC: stable@kernel.org
Signed-off-by: Andrew Jones <drjones@redhat.com>
[v2: Updated to compile on 3.0]
[v3: Updated to compile when CONFIG_SMP is not defined]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 3 ++-
 arch/x86/xen/smp.c | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index afe1d54..673e968 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -59,6 +59,7 @@
 #include <asm/page.h>
 #include <asm/init.h>
 #include <asm/pat.h>
+#include <asm/smp.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -1231,7 +1232,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 {
 	struct {
 		struct mmuext_op op;
-		DECLARE_BITMAP(mask, NR_CPUS);
+		DECLARE_BITMAP(mask, num_processors);
 	} *args;
 	struct multicall_space mcs;
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 41038c0..b4533a8 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -205,11 +205,18 @@ static void __init xen_smp_prepare_boot_cpu(void)
 static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned cpu;
+	unsigned int i;
 
 	xen_init_lock_cpu(0);
 
 	smp_store_cpu_info(0);
 	cpu_data(0).x86_max_cores = 1;
+
+	for_each_possible_cpu(i) {
+		zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
+		zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+		zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
+	}
 	set_cpu_sibling_map(0);
 
 	if (xen_smp_intr_init(0))
-- 
cgit v1.1


From b2abe50688dcb470e2e46109da7e7e02245ed59b Mon Sep 17 00:00:00 2001
From: Tom Goetz <tom.goetz@virtualcomputer.com>
Date: Mon, 16 May 2011 15:06:26 -0400
Subject: xen: When calling power_off, don't call the halt function.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.. As it won't actually power off the machine.

Reported-by: Sven Köhler <sven.koehler@gmail.com>
Tested-by: Sven Köhler <sven.koehler@gmail.com>
Signed-off-by: Tom Goetz <tom.goetz@virtualcomputer.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index dd7b88f..5525163 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1033,6 +1033,13 @@ static void xen_machine_halt(void)
 	xen_reboot(SHUTDOWN_poweroff);
 }
 
+static void xen_machine_power_off(void)
+{
+	if (pm_power_off)
+		pm_power_off();
+	xen_reboot(SHUTDOWN_poweroff);
+}
+
 static void xen_crash_shutdown(struct pt_regs *regs)
 {
 	xen_reboot(SHUTDOWN_crash);
@@ -1058,7 +1065,7 @@ int xen_panic_handler_init(void)
 static const struct machine_ops xen_machine_ops __initconst = {
 	.restart = xen_restart,
 	.halt = xen_machine_halt,
-	.power_off = xen_machine_halt,
+	.power_off = xen_machine_power_off,
 	.shutdown = xen_machine_halt,
 	.crash_shutdown = xen_crash_shutdown,
 	.emergency_restart = xen_emergency_restart,
-- 
cgit v1.1


From acd049c6e99d2ad1195666195230f6881d1c1588 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 16 Jun 2011 13:07:19 -0400
Subject: xen/setup: Fix for incorrect xen_extra_mem_start.

The earlier attempts (24bdb0b62cc82120924762ae6bc85afc8c3f2b26)
at fixing this problem caused other problems to surface (PV guests
with no PCI passthrough would have SWIOTLB turned on - which meant
64MB of precious contingous DMA32 memory being eaten up per guest).
The problem was: "on xen we add an extra memory region at the end of
the e820, and on this particular machine this extra memory region
would start below 4g and cross over the 4g boundary:

[0xfee01000-0x192655000)

Unfortunately e820_end_of_low_ram_pfn does not expect an
e820 layout like that so it returns 4g, therefore initial_memory_mapping
will map [0 - 0x100000000), that is a memory range that includes some
reserved memory regions."

The memory range was the IOAPIC regions, and with the 1-1 mapping
turned on, it would map them as RAM, not as MMIO regions. This caused
the hypervisor to complain. Fortunately this is experienced only under
the initial domain so we guard for it.

Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index be1a464..60aeeb5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -227,11 +227,7 @@ char * __init xen_memory_setup(void)
 
 	memcpy(map_raw, map, sizeof(map));
 	e820.nr_map = 0;
-#ifdef CONFIG_X86_32
 	xen_extra_mem_start = mem_end;
-#else
-	xen_extra_mem_start = max((1ULL << 32), mem_end);
-#endif
 	for (i = 0; i < memmap.nr_entries; i++) {
 		unsigned long long end;
 
@@ -266,6 +262,12 @@ char * __init xen_memory_setup(void)
 		if (map[i].size > 0)
 			e820_add_region(map[i].addr, map[i].size, map[i].type);
 	}
+	/* Align the balloon area so that max_low_pfn does not get set
+	 * to be at the _end_ of the PCI gap at the far end (fee01000).
+	 * Note that xen_extra_mem_start gets set in the loop above to be
+	 * past the last E820 region. */
+	if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
+		xen_extra_mem_start = (1ULL<<32);
 
 	/*
 	 * In domU, the ISA region is normal, usable memory, but we
-- 
cgit v1.1


From 7d68dc3f1003a38948c55c803c32d1989dd49198 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <m.b.lankhorst@gmail.com>
Date: Tue, 14 Jun 2011 19:53:09 +0200
Subject: x86, efi: Do not reserve boot services regions within reserved areas

Commit 916f676f8dc started reserving boot service code since some systems
require you to keep that code around until SetVirtualAddressMap is called.

However, in some cases those areas will overlap with reserved regions.
The proper medium-term fix is to fix the bootloader to prevent the
conflicts from occurring by moving the kernel to a better position,
but the kernel should check for this possibility, and only reserve regions
which can be reserved.

Signed-off-by: Maarten Lankhorst <m.b.lankhorst@gmail.com>
Link: http://lkml.kernel.org/r/4DF7A005.1050407@gmail.com
Acked-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/memblock.h |  2 +-
 arch/x86/mm/memblock.c          |  4 ++--
 arch/x86/platform/efi/efi.c     | 29 +++++++++++++++++++++++++----
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
index 19ae14b..0cd3800 100644
--- a/arch/x86/include/asm/memblock.h
+++ b/arch/x86/include/asm/memblock.h
@@ -4,7 +4,6 @@
 #define ARCH_DISCARD_MEMBLOCK
 
 u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
-void memblock_x86_to_bootmem(u64 start, u64 end);
 
 void memblock_x86_reserve_range(u64 start, u64 end, char *name);
 void memblock_x86_free_range(u64 start, u64 end);
@@ -19,5 +18,6 @@ u64 memblock_x86_hole_size(u64 start, u64 end);
 u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
 u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
 u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
+bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
 
 #endif
diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c
index aa11693..992da5e 100644
--- a/arch/x86/mm/memblock.c
+++ b/arch/x86/mm/memblock.c
@@ -8,7 +8,7 @@
 #include <linux/range.h>
 
 /* Check for already reserved areas */
-static bool __init check_with_memblock_reserved_size(u64 *addrp, u64 *sizep, u64 align)
+bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align)
 {
 	struct memblock_region *r;
 	u64 addr = *addrp, last;
@@ -59,7 +59,7 @@ u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align)
 		if (addr >= ei_last)
 			continue;
 		*sizep = ei_last - addr;
-		while (check_with_memblock_reserved_size(&addr, sizep, align))
+		while (memblock_x86_check_reserved_size(&addr, sizep, align))
 			;
 
 		if (*sizep)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 0d3a4fa..474356b 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -310,14 +310,31 @@ void __init efi_reserve_boot_services(void)
 
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		efi_memory_desc_t *md = p;
-		unsigned long long start = md->phys_addr;
-		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+		u64 start = md->phys_addr;
+		u64 size = md->num_pages << EFI_PAGE_SHIFT;
 
 		if (md->type != EFI_BOOT_SERVICES_CODE &&
 		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
-
-		memblock_x86_reserve_range(start, start + size, "EFI Boot");
+		/* Only reserve where possible:
+		 * - Not within any already allocated areas
+		 * - Not over any memory area (really needed, if above?)
+		 * - Not within any part of the kernel
+		 * - Not the bios reserved area
+		*/
+		if ((start+size >= virt_to_phys(_text)
+				&& start <= virt_to_phys(_end)) ||
+			!e820_all_mapped(start, start+size, E820_RAM) ||
+			memblock_x86_check_reserved_size(&start, &size,
+							1<<EFI_PAGE_SHIFT)) {
+			/* Could not reserve, skip it */
+			md->num_pages = 0;
+			memblock_dbg(PFX "Could not reserve boot range "
+					"[0x%010llx-0x%010llx]\n",
+						start, start+size-1);
+		} else
+			memblock_x86_reserve_range(start, start+size,
+							"EFI Boot");
 	}
 }
 
@@ -334,6 +351,10 @@ static void __init efi_free_boot_services(void)
 		    md->type != EFI_BOOT_SERVICES_DATA)
 			continue;
 
+		/* Could not reserve boot area */
+		if (!size)
+			continue;
+
 		free_bootmem_late(start, size);
 	}
 }
-- 
cgit v1.1


From b72336355bb4c92d4a2be3f975dbea47089c83c1 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@alien8.de>
Date: Mon, 30 May 2011 22:11:17 +0200
Subject: KVM: MMU: Fix build warnings in walk_addr_generic()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On 3.0-rc1 I get

In file included from arch/x86/kvm/mmu.c:2856:
arch/x86/kvm/paging_tmpl.h: In function ‘paging32_walk_addr_generic’:
arch/x86/kvm/paging_tmpl.h:124: warning: ‘ptep_user’ may be used uninitialized in this function
In file included from arch/x86/kvm/mmu.c:2852:
arch/x86/kvm/paging_tmpl.h: In function ‘paging64_walk_addr_generic’:
arch/x86/kvm/paging_tmpl.h:124: warning: ‘ptep_user’ may be used uninitialized in this function

caused by 6e2ca7d1802bf8ed9908435e34daa116662e7790. According to Takuya
Yoshikawa, ptep_user won't be used uninitialized so shut up gcc.

Cc: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Link: http://lkml.kernel.org/r/20110530094604.GC21833@liondog.tnic
Signed-off-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6c4dc010..9d03ad4 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -121,7 +121,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 				    gva_t addr, u32 access)
 {
 	pt_element_t pte;
-	pt_element_t __user *ptep_user;
+	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
 	unsigned index, pt_access, uninitialized_var(pte_access);
 	gpa_t pte_gpa;
-- 
cgit v1.1


From 5233dd51ece1615d54ab96c4cbe9ac3cc595e955 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 6 Jun 2011 14:27:47 -0300
Subject: KVM: VMX: do not overwrite uptodate vcpu->arch.cr3 on KVM_SET_SREGS

Only decache guest CR3 value if vcpu->arch.cr3 is stale.
Fixes loadvm with live guest.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Tested-by: Markus Schade <markus.schade@gmail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4c3fa0f..d48ec60 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2047,7 +2047,8 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
 					unsigned long cr0,
 					struct kvm_vcpu *vcpu)
 {
-	vmx_decache_cr3(vcpu);
+	if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
+		vmx_decache_cr3(vcpu);
 	if (!(cr0 & X86_CR0_PG)) {
 		/* From paging/starting to nonpaging */
 		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-- 
cgit v1.1


From a0a8eaba1661232f094654422bdabe2df4e26863 Mon Sep 17 00:00:00 2001
From: Steve <stefan.bosak@gmail.com>
Date: Fri, 17 Jun 2011 10:25:39 +0800
Subject: KVM: MMU: fix opposite condition in mapping_level_dirty_bitmap

The condition is opposite, it always maps huge page for the dirty tracked page

Reported-by: Steve <stefan.bosak@gmail.com>
Signed-off-by: Steve <stefan.bosak@gmail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index bd14bb4..aee3862 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -565,7 +565,7 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
 
 static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 {
-	return gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true);
+	return !gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true);
 }
 
 static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
-- 
cgit v1.1


From de2d1a524e94a79078d9fe22c57c0c6009237547 Mon Sep 17 00:00:00 2001
From: Zachary Amsden <zamsden@redhat.com>
Date: Wed, 15 Jun 2011 20:50:04 -0700
Subject: KVM: Fix register corruption in pvclock_scale_delta

The 128-bit multiply in pvclock.h was missing an output constraint for
EDX which caused a register corruption to appear.  Thanks to Ulrich for
diagnosing the EDX corruption and Avi for providing this fix.

Signed-off-by: Zachary Amsden <zamsden@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/pvclock.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 31d84ac..a518c0a 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -22,6 +22,8 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 	u64 product;
 #ifdef __i386__
 	u32 tmp1, tmp2;
+#else
+	ulong tmp;
 #endif
 
 	if (shift < 0)
@@ -42,8 +44,11 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
 #elif defined(__x86_64__)
 	__asm__ (
-		"mul %%rdx ; shrd $32,%%rdx,%%rax"
-		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+		"mul %[mul_frac] ; shrd $32, %[hi], %[lo]"
+		: [lo]"=a"(product),
+		  [hi]"=d"(tmp)
+		: "0"(delta),
+		  [mul_frac]"rm"((u64)mul_frac));
 #else
 #error implement me!
 #endif
-- 
cgit v1.1


From c6830c22603aaecf65405af23f6da2d55892f9cb Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 16 Jun 2011 17:28:07 +0900
Subject: Fix node_start/end_pfn() definition for mm/page_cgroup.c

commit 21a3c96 uses node_start/end_pfn(nid) for detection start/end
of nodes. But, it's not defined in linux/mmzone.h but defined in
/arch/???/include/mmzone.h which is included only under
CONFIG_NEED_MULTIPLE_NODES=y.

Then, we see
  mm/page_cgroup.c: In function 'page_cgroup_init':
  mm/page_cgroup.c:308: error: implicit declaration of function 'node_start_pfn'
  mm/page_cgroup.c:309: error: implicit declaration of function 'node_end_pfn'

So, fixiing page_cgroup.c is an idea...

But node_start_pfn()/node_end_pfn() is a very generic macro and
should be implemented in the same manner for all archs.
(m32r has different implementation...)

This patch removes definitions of node_start/end_pfn() in each archs
and defines a unified one in linux/mmzone.h. It's not under
CONFIG_NEED_MULTIPLE_NODES, now.

A result of macro expansion is here (mm/page_cgroup.c)

for !NUMA
 start_pfn = ((&contig_page_data)->node_start_pfn);
  end_pfn = ({ pg_data_t *__pgdat = (&contig_page_data); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;});

for NUMA (x86-64)
  start_pfn = ((node_data[nid])->node_start_pfn);
  end_pfn = ({ pg_data_t *__pgdat = (node_data[nid]); __pgdat->node_start_pfn + __pgdat->node_spanned_pages;});

Changelog:
 - fixed to avoid using "nid" twice in node_end_pfn() macro.

Reported-and-acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Reported-and-tested-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/mmzone_32.h | 11 -----------
 arch/x86/include/asm/mmzone_64.h |  3 ---
 2 files changed, 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 5e83a41..224e8c5 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -48,17 +48,6 @@ static inline int pfn_to_nid(unsigned long pfn)
 #endif
 }
 
-/*
- * Following are macros that each numa implmentation must define.
- */
-
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)						\
-({									\
-	pg_data_t *__pgdat = NODE_DATA(nid);				\
-	__pgdat->node_start_pfn + __pgdat->node_spanned_pages;		\
-})
-
 static inline int pfn_valid(int pfn)
 {
 	int nid = pfn_to_nid(pfn);
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
index b3f88d7..129d9aa 100644
--- a/arch/x86/include/asm/mmzone_64.h
+++ b/arch/x86/include/asm/mmzone_64.h
@@ -13,8 +13,5 @@ extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid)		(node_data[nid])
 
-#define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
-#define node_end_pfn(nid)       (NODE_DATA(nid)->node_start_pfn +	\
-				 NODE_DATA(nid)->node_spanned_pages)
 #endif
 #endif /* _ASM_X86_MMZONE_64_H */
-- 
cgit v1.1


From e376fd664b1547e29e264e3cfb97553a1be9054b Mon Sep 17 00:00:00 2001
From: Jesper Juhl <jj@chaosbits.net>
Date: Thu, 26 May 2011 11:12:53 +0200
Subject: watchdog: Intel SCU Watchdog: Fix build and remove duplicate code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trying to build the Intel SCU Watchdog fails for me with gcc 4.6.0 -
$ gcc --version | head -n 1
gcc (GCC) 4.6.0 20110513 (prerelease)

like this :
  CC      drivers/watchdog/intel_scu_watchdog.o
In file included from drivers/watchdog/intel_scu_watchdog.c:49:0:
/home/jj/src/linux-2.6/arch/x86/include/asm/apb_timer.h: In function ‘apbt_time_init’:
/home/jj/src/linux-2.6/arch/x86/include/asm/apb_timer.h:65:42: warning: ‘return’ with a value, in function returning void [enabled by default]
drivers/watchdog/intel_scu_watchdog.c: In function ‘intel_scu_watchdog_init’:
drivers/watchdog/intel_scu_watchdog.c:468:2: error: implicit declaration of function ‘sfi_get_mtmr’ [-Werror=implicit-function-declaration]
drivers/watchdog/intel_scu_watchdog.c:468:32: warning: assignment makes pointer from integer without a cast [enabled by default]
cc1: some warnings being treated as errors

make[1]: *** [drivers/watchdog/intel_scu_watchdog.o] Error 1
make: *** [drivers/watchdog/intel_scu_watchdog.o] Error 2

Additionally, linux/types.h is needlessly being included twice in
drivers/watchdog/intel_scu_watchdog.c

Signed-off-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 arch/x86/include/asm/apb_timer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
index 2fefa50..af60d8a 100644
--- a/arch/x86/include/asm/apb_timer.h
+++ b/arch/x86/include/asm/apb_timer.h
@@ -62,7 +62,7 @@ extern int sfi_mtimer_num;
 #else /* CONFIG_APB_TIMER */
 
 static inline unsigned long apbt_quick_calibrate(void) {return 0; }
-static inline void apbt_time_init(void) {return 0; }
+static inline void apbt_time_init(void) { }
 
 #endif
 #endif /* ASM_X86_APBT_H */
-- 
cgit v1.1


From cb16c348760ad2bc79b67b20aefac05529569ed7 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 19 Jun 2011 19:21:11 +0300
Subject: KVM: x86 emulator: fix %rip-relative addressing with immediate source
 operand

%rip-relative addressing is relative to the first byte of the next instruction,
so we need to add %rip only after we've fetched any immediate bytes.

Based on original patch by Li Xin <xin.li@intel.com>.

Signed-off-by: Avi Kivity <avi@redhat.com>
Acked-by: Li Xin <xin.li@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 6df88c7..adc9867 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3372,7 +3372,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
 	int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
 	bool op_prefix = false;
 	struct opcode opcode;
-	struct operand memop = { .type = OP_NONE };
+	struct operand memop = { .type = OP_NONE }, *memopp = NULL;
 
 	c->eip = ctxt->eip;
 	c->fetch.start = c->eip;
@@ -3547,9 +3547,6 @@ done_prefixes:
 	if (memop.type == OP_MEM && c->ad_bytes != 8)
 		memop.addr.mem.ea = (u32)memop.addr.mem.ea;
 
-	if (memop.type == OP_MEM && c->rip_relative)
-		memop.addr.mem.ea += c->eip;
-
 	/*
 	 * Decode and fetch the source operand: register, memory
 	 * or immediate.
@@ -3571,6 +3568,7 @@ done_prefixes:
 							   c->op_bytes;
 	srcmem_common:
 		c->src = memop;
+		memopp = &c->src;
 		break;
 	case SrcImmU16:
 		rc = decode_imm(ctxt, &c->src, 2, false);
@@ -3667,6 +3665,7 @@ done_prefixes:
 	case DstMem:
 	case DstMem64:
 		c->dst = memop;
+		memopp = &c->dst;
 		if ((c->d & DstMask) == DstMem64)
 			c->dst.bytes = 8;
 		else
@@ -3700,10 +3699,13 @@ done_prefixes:
 		/* Special instructions do their own operand decoding. */
 	default:
 		c->dst.type = OP_NONE; /* Disable writeback. */
-		return 0;
+		break;
 	}
 
 done:
+	if (memopp && memopp->type == OP_MEM && c->rip_relative)
+		memopp->addr.mem.ea += c->eip;
+
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
-- 
cgit v1.1


From 32dd11942aeb47f91209a446d6b10063c5b69389 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 30 Jun 2011 09:12:40 -0400
Subject: xen/mmu: Fix for linker errors when CONFIG_SMP is not defined.

Simple enough - we use an extern defined symbol which is not
defined when CONFIG_SMP is not defined. This fixes the linker
dying.

CC: stable@kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 673e968..0ccccb6 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1232,7 +1232,11 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 {
 	struct {
 		struct mmuext_op op;
+#ifdef CONFIG_SMP
 		DECLARE_BITMAP(mask, num_processors);
+#else
+		DECLARE_BITMAP(mask, NR_CPUS);
+#endif
 	} *args;
 	struct multicall_space mcs;
 
-- 
cgit v1.1


From 155a16f21923bc2f04161ac92acca986371ef27b Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 30 Jun 2011 09:18:27 -0400
Subject: xen/pci: Use the INT_SRC_OVR IRQ (instead of GSI) to preset the ACPI
 SCI IRQ.

In the past we would use the GSI value to preset the ACPI SCI
IRQ which worked great as GSI == IRQ:

ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)

While that is most often seen, there are some oddities:

ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)

which means that GSI 20 (or pin 20) is to be overriden for IRQ 9.
Our code that presets the interrupt for ACPI SCI however would
use the GSI 20 instead of IRQ 9 ending up with:

xen: sci override: global_irq=20 trigger=0 polarity=1
xen: registering gsi 20 triggering 0 polarity 1
xen: --> pirq=20 -> irq=20
xen: acpi sci 20
.. snip..
calling  acpi_init+0x0/0xbc @ 1
ACPI: SCI (IRQ9) allocation failed
ACPI Exception: AE_NOT_ACQUIRED, Unable to install System Control Interrupt handler (20110413/evevent-119)
ACPI: Unable to start the ACPI Interpreter

as the ACPI interpreter made a call to 'acpi_gsi_to_irq' which got nine.
It used that value to request an IRQ (request_irq) and since that was not
present it failed.

The fix is to recognize that for interrupts that are overriden (in our
case we only care about the ACPI SCI) we should use the IRQ number
to present the IRQ instead of the using GSI. End result is that we get:

xen: sci override: global_irq=20 trigger=0 polarity=1
xen: registering gsi 20 triggering 0 polarity 1
xen: --> pirq=20 -> irq=9 (gsi=9)
xen: acpi sci 9

which fixes the ACPI interpreter failing on startup.

CC: stable@kernel.org
Reported-by: Liwei <xieliwei@gmail.com>
Tested-by: Liwei <xieliwei@gmail.com>
[http://lists.xensource.com/archives/html/xen-devel/2011-06/msg01727.html]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/pci/xen.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 8214724..fe00830 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -333,6 +333,7 @@ static int xen_register_pirq(u32 gsi, int triggering)
 	struct physdev_map_pirq map_irq;
 	int shareable = 0;
 	char *name;
+	bool gsi_override = false;
 
 	if (!xen_pv_domain())
 		return -1;
@@ -349,11 +350,32 @@ static int xen_register_pirq(u32 gsi, int triggering)
 	if (pirq < 0)
 		goto out;
 
-	irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name);
+	/* Before we bind the GSI to a Linux IRQ, check whether
+	 * we need to override it with bus_irq (IRQ) value. Usually for
+	 * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
+	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
+	 * but there are oddballs where the IRQ != GSI:
+	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
+	 * which ends up being: gsi_to_irq[9] == 20
+	 * (which is what acpi_gsi_to_irq ends up calling when starting the
+	 * the ACPI interpreter and keels over since IRQ 9 has not been
+	 * setup as we had setup IRQ 20 for it).
+	 */
+	if (gsi == acpi_sci_override_gsi) {
+		/* Check whether the GSI != IRQ */
+		acpi_gsi_to_irq(gsi, &irq);
+		if (irq != gsi)
+			/* Bugger, we MUST have that IRQ. */
+			gsi_override = true;
+	}
+	if (gsi_override)
+		irq = xen_bind_pirq_gsi_to_irq(irq, pirq, shareable, name);
+	else
+		irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name);
 	if (irq < 0)
 		goto out;
 
-	printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d\n", pirq, irq);
+	printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d (gsi=%d)\n", pirq, irq, gsi);
 
 	map_irq.domid = DOMID_SELF;
 	map_irq.type = MAP_PIRQ_TYPE_GSI;
-- 
cgit v1.1


From a26474e8649643e82d71e3a386d5c4bcc0b207ef Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 28 Jun 2011 11:41:07 +0200
Subject: x86-32, NUMA: Fix boot regression caused by NUMA init unification on
 highmem machines

During 32/64 NUMA init unification, commit 797390d855 ("x86-32,
NUMA: use sparse_memory_present_with_active_regions()") made
32bit mm init call memory_present() automatically from
active_regions instead of leaving it to each NUMA init path.

This commit description is inaccurate - memory_present() calls
aren't the same for flat and numaq.  After the commit,
memory_present() is only called for the intersection of e820 and
NUMA layout.  Before, on flatmem, memory_present() would be
called from 0 to max_pfn.  After, it would be called only on the
areas that e820 indicates to be populated.

This is how x86_64 works and should be okay as memmap is allowed
to contain holes; however, x86_32 DISCONTIGMEM is missing
early_pfn_valid(), which makes memmap_init_zone() assume that
memmap doesn't contain any hole.  This leads to the following
oops if e820 map contains holes as it often does on machine with
near or more 4GiB of memory by calling pfn_to_page() on a pfn
which isn't mapped to a NUMA node, a reported by Conny Seidel:

  BUG: unable to handle kernel paging request at 000012b0
  IP: [<c1aa13ce>] memmap_init_zone+0x6c/0xf2
  *pdpt =3D 0000000000000000 *pde =3D f000eef3f000ee00
  Oops: 0000 [#1] SMP
  last sysfs file:
  Modules linked in:

  Pid: 0, comm: swapper Not tainted 2.6.39-rc5-00164-g797390d #1 To Be Filled By O.E.M. To Be Filled By O.E.M./E350M1
  EIP: 0060:[<c1aa13ce>] EFLAGS: 00010012 CPU: 0
  EIP is at memmap_init_zone+0x6c/0xf2
  EAX: 00000000 EBX: 000a8000 ECX: 000a7fff EDX: f2c00b80
  ESI: 000a8000 EDI: f2c00800 EBP: c19ffe54 ESP: c19ffe34
   DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
  Process swapper (pid: 0, ti=3Dc19fe000 task=3Dc1a07f60 task.ti=3Dc19fe000)
  Stack:
   00000002 00000000 0023f000 00000000 10000000 00000a00 f2c00000 f2c00b58
   c19ffeb0 c1a80f24 000375fe 00000000 f2c00800 00000800 00000100 00000030
   c1abb768 0000003c 00000000 00000000 00000004 00207a02 f2c00800 000375fe
  Call Trace:
   [<c1a80f24>] free_area_init_node+0x358/0x385
   [<c1a81384>] free_area_init_nodes+0x420/0x487
   [<c1a79326>] paging_init+0x114/0x11b
   [<c1a6cb13>] setup_arch+0xb37/0xc0a
   [<c1a69554>] start_kernel+0x76/0x316
   [<c1a690a8>] i386_start_kernel+0xa8/0xb0

This patch fixes the bug by defining early_pfn_valid() to be the
same as pfn_valid() when DISCONTIGMEM.

Reported-bisected-and-tested-by: Conny Seidel <conny.seidel@amd.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: hans.rosenfeld@amd.com
Cc: Christoph Lameter <cl@linux.com>
Cc: Conny Seidel <conny.seidel@amd.com>
Link: http://lkml.kernel.org/r/20110628094107.GB3386@htj.dyndns.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mmzone_32.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 224e8c5..ffa037f 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -57,6 +57,8 @@ static inline int pfn_valid(int pfn)
 	return 0;
 }
 
+#define early_pfn_valid(pfn)	pfn_valid((pfn))
+
 #endif /* CONFIG_DISCONTIGMEM */
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-- 
cgit v1.1


From b49c78d4827be8d7e67e5b94adac6b30a4a9ad14 Mon Sep 17 00:00:00 2001
From: Peter Chubb <peter.chubb@nicta.com.au>
Date: Wed, 6 Jul 2011 10:56:30 +1000
Subject: x86, reboot: Acer Aspire One A110 reboot quirk

Since git commit
  660e34cebf0a11d54f2d5dd8838607452355f321 x86: reorder reboot method
  preferences,
my Acer Aspire One hangs on reboot.  It appears that its ACPI method
for rebooting is broken.  The attached patch adds a quirk so that the
machine will reboot via the BIOS.

[ hpa: verified that the ACPI control on this machine is just plain broken. ]

Signed-off-by: Peter Chubb <peter.chubb@nicta.com.au>
Link: http://lkml.kernel.org/r/w439iki5vl.wl%25peter@chubb.wattle.id.au
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/reboot.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 0c016f7..4f0d46f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -294,6 +294,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "VersaLogic Menlow board"),
 		},
 	},
+	{ /* Handle reboot issue on Acer Aspire one */
+		.callback = set_bios_reboot,
+		.ident = "Acer Aspire One A110",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "AOA110"),
+		},
+	},
 	{ }
 };
 
-- 
cgit v1.1


From 7a3136666bc0f0419f7aaa7b1fabb4b0e0a7fb76 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees.cook@canonical.com>
Date: Wed, 6 Jul 2011 18:10:34 -0700
Subject: x86, suspend: Restore MISC_ENABLE MSR in realmode wakeup

Some BIOSes will reset the Intel MISC_ENABLE MSR (specifically the
XD_DISABLE bit) when resuming from S3, which can interact poorly with
ebba638ae723d8a8fc2f7abce5ec18b688b791d7. In 32bit PAE mode, this can
lead to a fault when EFER is restored by the kernel wakeup routines,
due to it setting the NX bit for a CPU that (thanks to the BIOS reset)
now incorrectly thinks it lacks the NX feature. (64bit is not affected
because it uses a common CPU bring-up that specifically handles the
XD_DISABLE bit.)

The need for MISC_ENABLE being restored so early is specific to the S3
resume path. Normally, MISC_ENABLE is saved in save_processor_state(),
but this happens after the resume header is created, so just reproduce
the logic here. (acpi_suspend_lowlevel() creates the header, calls
do_suspend_lowlevel, which calls save_processor_state(), so the saved
processor context isn't available during resume header creation.)

[ hpa: Consider for stable if OK in mainline ]

Signed-off-by: Kees Cook <kees.cook@canonical.com>
Link: http://lkml.kernel.org/r/20110707011034.GA8523@outflux.net
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: <stable@kernel.org> 2.6.38+
---
 arch/x86/kernel/acpi/realmode/wakeup.S | 14 ++++++++++++++
 arch/x86/kernel/acpi/realmode/wakeup.h |  6 ++++++
 arch/x86/kernel/acpi/sleep.c           |  6 ++++++
 3 files changed, 26 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index ead21b6..b4fd836 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -28,6 +28,8 @@ pmode_cr3:	.long	0	/* Saved %cr3 */
 pmode_cr4:	.long	0	/* Saved %cr4 */
 pmode_efer:	.quad	0	/* Saved EFER */
 pmode_gdt:	.quad	0
+pmode_misc_en:	.quad	0	/* Saved MISC_ENABLE MSR */
+pmode_behavior:	.long	0	/* Wakeup behavior flags */
 realmode_flags:	.long	0
 real_magic:	.long	0
 trampoline_segment:	.word 0
@@ -91,6 +93,18 @@ wakeup_code:
 	/* Call the C code */
 	calll	main
 
+	/* Restore MISC_ENABLE before entering protected mode, in case
+	   BIOS decided to clear XD_DISABLE during S3. */
+	movl	pmode_behavior, %eax
+	btl	$WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE, %eax
+	jnc	1f
+
+	movl	pmode_misc_en, %eax
+	movl	pmode_misc_en + 4, %edx
+	movl	$MSR_IA32_MISC_ENABLE, %ecx
+	wrmsr
+1:
+
 	/* Do any other stuff... */
 
 #ifndef CONFIG_64BIT
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h
index e1828c0..97a29e1 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.h
+++ b/arch/x86/kernel/acpi/realmode/wakeup.h
@@ -21,6 +21,9 @@ struct wakeup_header {
 	u32 pmode_efer_low;	/* Protected mode EFER */
 	u32 pmode_efer_high;
 	u64 pmode_gdt;
+	u32 pmode_misc_en_low;	/* Protected mode MISC_ENABLE */
+	u32 pmode_misc_en_high;
+	u32 pmode_behavior;	/* Wakeup routine behavior flags */
 	u32 realmode_flags;
 	u32 real_magic;
 	u16 trampoline_segment;	/* segment with trampoline code, 64-bit only */
@@ -39,4 +42,7 @@ extern struct wakeup_header wakeup_header;
 #define WAKEUP_HEADER_SIGNATURE 0x51ee1111
 #define WAKEUP_END_SIGNATURE	0x65a22c82
 
+/* Wakeup behavior bits */
+#define WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE     0
+
 #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 18a857b..103b6ab 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -77,6 +77,12 @@ int acpi_suspend_lowlevel(void)
 
 	header->pmode_cr0 = read_cr0();
 	header->pmode_cr4 = read_cr4_safe();
+	header->pmode_behavior = 0;
+	if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
+			&header->pmode_misc_en_low,
+			&header->pmode_misc_en_high))
+		header->pmode_behavior |=
+			(1 << WAKEUP_BEHAVIOR_RESTORE_MISC_ENABLE);
 	header->realmode_flags = acpi_realmode_flags;
 	header->real_magic = 0x12345678;
 
-- 
cgit v1.1


From f70e957cda22d309c769805cbb932407a5232219 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Wed, 6 Jul 2011 16:52:37 -0400
Subject: x86: Don't use the EFI reboot method by default

Testing suggests that at least some Lenovos and some Intels will
fail to reboot via EFI, attempting to jump to an unmapped
physical address. In the long run we could handle this by
providing a page table with a 1:1 mapping of physical addresses,
but for now it's probably just easier to assume that ACPI or
legacy methods will be present and reboot via those.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alan Cox <alan@linux.intel.com>
Link: http://lkml.kernel.org/r/1309985557-15350-1-git-send-email-mjg@redhat.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/platform/efi/efi.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 474356b..899e393 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -504,9 +504,6 @@ void __init efi_init(void)
 	x86_platform.set_wallclock = efi_set_rtc_mmss;
 #endif
 
-	/* Setup for EFI runtime service */
-	reboot_type = BOOT_EFI;
-
 #if EFI_DEBUG
 	print_efi_memmap();
 #endif
-- 
cgit v1.1


From ee339fe63ac408e4604c1c88b1f9a428f2511b70 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 6 Jul 2011 09:43:16 -0400
Subject: xen/pci: Move check for acpi_sci_override_gsi to xen_setup_acpi_sci.

Previously we would check for acpi_sci_override_gsi == gsi every time
a PCI device was enabled. That works during early bootup, but later
on it could lead to triggering unnecessarily the acpi_gsi_to_irq(..) lookup.
The reason is that acpi_sci_override_gsi was declared in __initdata and
after early bootup could contain bogus values.

This patch moves the check for acpi_sci_override_gsi to the
site where the ACPI SCI is preset.

CC: stable@kernel.org
Reported-by: Raghavendra D Prabhu <rprabhu@wnohang.net>
Tested-by: Raghavendra D Prabhu <rprabhu@wnohang.net>
[http://lists.xensource.com/archives/html/xen-devel/2011-07/msg00154.html]
Suggested-by:  Ian Campbell <ijc@hellion.org.uk>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/pci/xen.c | 56 +++++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index fe00830..f567965 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -327,13 +327,12 @@ int __init pci_xen_hvm_init(void)
 }
 
 #ifdef CONFIG_XEN_DOM0
-static int xen_register_pirq(u32 gsi, int triggering)
+static int xen_register_pirq(u32 gsi, int gsi_override, int triggering)
 {
 	int rc, pirq, irq = -1;
 	struct physdev_map_pirq map_irq;
 	int shareable = 0;
 	char *name;
-	bool gsi_override = false;
 
 	if (!xen_pv_domain())
 		return -1;
@@ -345,31 +344,12 @@ static int xen_register_pirq(u32 gsi, int triggering)
 		shareable = 1;
 		name = "ioapic-level";
 	}
-
 	pirq = xen_allocate_pirq_gsi(gsi);
 	if (pirq < 0)
 		goto out;
 
-	/* Before we bind the GSI to a Linux IRQ, check whether
-	 * we need to override it with bus_irq (IRQ) value. Usually for
-	 * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
-	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
-	 * but there are oddballs where the IRQ != GSI:
-	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
-	 * which ends up being: gsi_to_irq[9] == 20
-	 * (which is what acpi_gsi_to_irq ends up calling when starting the
-	 * the ACPI interpreter and keels over since IRQ 9 has not been
-	 * setup as we had setup IRQ 20 for it).
-	 */
-	if (gsi == acpi_sci_override_gsi) {
-		/* Check whether the GSI != IRQ */
-		acpi_gsi_to_irq(gsi, &irq);
-		if (irq != gsi)
-			/* Bugger, we MUST have that IRQ. */
-			gsi_override = true;
-	}
-	if (gsi_override)
-		irq = xen_bind_pirq_gsi_to_irq(irq, pirq, shareable, name);
+	if (gsi_override >= 0)
+		irq = xen_bind_pirq_gsi_to_irq(gsi_override, pirq, shareable, name);
 	else
 		irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name);
 	if (irq < 0)
@@ -392,7 +372,7 @@ out:
 	return irq;
 }
 
-static int xen_register_gsi(u32 gsi, int triggering, int polarity)
+static int xen_register_gsi(u32 gsi, int gsi_override, int triggering, int polarity)
 {
 	int rc, irq;
 	struct physdev_setup_gsi setup_gsi;
@@ -403,7 +383,7 @@ static int xen_register_gsi(u32 gsi, int triggering, int polarity)
 	printk(KERN_DEBUG "xen: registering gsi %u triggering %d polarity %d\n",
 			gsi, triggering, polarity);
 
-	irq = xen_register_pirq(gsi, triggering);
+	irq = xen_register_pirq(gsi, gsi_override, triggering);
 
 	setup_gsi.gsi = gsi;
 	setup_gsi.triggering = (triggering == ACPI_EDGE_SENSITIVE ? 0 : 1);
@@ -425,6 +405,8 @@ static __init void xen_setup_acpi_sci(void)
 	int rc;
 	int trigger, polarity;
 	int gsi = acpi_sci_override_gsi;
+	int irq = -1;
+	int gsi_override = -1;
 
 	if (!gsi)
 		return;
@@ -441,7 +423,25 @@ static __init void xen_setup_acpi_sci(void)
 	printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d "
 			"polarity=%d\n", gsi, trigger, polarity);
 
-	gsi = xen_register_gsi(gsi, trigger, polarity);
+	/* Before we bind the GSI to a Linux IRQ, check whether
+	 * we need to override it with bus_irq (IRQ) value. Usually for
+	 * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so:
+	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level)
+	 * but there are oddballs where the IRQ != GSI:
+	 *  ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level)
+	 * which ends up being: gsi_to_irq[9] == 20
+	 * (which is what acpi_gsi_to_irq ends up calling when starting the
+	 * the ACPI interpreter and keels over since IRQ 9 has not been
+	 * setup as we had setup IRQ 20 for it).
+	 */
+	/* Check whether the GSI != IRQ */
+	if (acpi_gsi_to_irq(gsi, &irq) == 0) {
+		if (irq >= 0 && irq != gsi)
+			/* Bugger, we MUST have that IRQ. */
+			gsi_override = irq;
+	}
+
+	gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity);
 	printk(KERN_INFO "xen: acpi sci %d\n", gsi);
 
 	return;
@@ -450,7 +450,7 @@ static __init void xen_setup_acpi_sci(void)
 static int acpi_register_gsi_xen(struct device *dev, u32 gsi,
 				 int trigger, int polarity)
 {
-	return xen_register_gsi(gsi, trigger, polarity);
+	return xen_register_gsi(gsi, -1 /* no GSI override */, trigger, polarity);
 }
 
 static int __init pci_xen_initial_domain(void)
@@ -489,7 +489,7 @@ void __init xen_setup_pirqs(void)
 		if (acpi_get_override_irq(irq, &trigger, &polarity) == -1)
 			continue;
 
-		xen_register_pirq(irq,
+		xen_register_pirq(irq, -1 /* no GSI override */,
 			trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE);
 	}
 }
-- 
cgit v1.1