From 9b4e27b52853c5da77e61a4e36fbc40688b7a829 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 22 Aug 2008 20:23:37 +0200
Subject: x86: fix: do not run code in amd_bus.c on non-AMD CPUs

Jan Beulich wrote:

> Even worse - this would even try to access the MSR on non-AMD CPUs
> (currently probably prevented just by the fact that only AMD ones use
> family values of 0x10 or higher).

This patch adds cpu vendor check to the postcore_initcalls.

Reported-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/amd_bus.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index dbf5323..4a6f1a6 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -555,9 +555,11 @@ static int __init early_fill_mp_bus_info(void)
 	return 0;
 }
 
-postcore_initcall(early_fill_mp_bus_info);
+#else  /* !CONFIG_X86_64 */
 
-#endif
+static int __init early_fill_mp_bus_info(void) { return 0; }
+
+#endif /* !CONFIG_X86_64 */
 
 /* common 32/64 bit code */
 
@@ -583,4 +585,15 @@ static int __init enable_pci_io_ecs(void)
 	return 0;
 }
 
-postcore_initcall(enable_pci_io_ecs);
+static int __init amd_postcore_init(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return 0;
+
+	early_fill_mp_bus_info();
+	enable_pci_io_ecs();
+
+	return 0;
+}
+
+postcore_initcall(amd_postcore_init);
-- 
cgit v1.1


From 91ede005d72df60d6b3f252be177a4743a6aa46a Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Fri, 22 Aug 2008 20:23:38 +0200
Subject: x86: fix: make PCI ECS for AMD CPUs hotplug capable

Until now, PCI ECS setup was performed at boot time only and for cpus
that are enabled then. This patch fixes this and adds cpu hotplug.

Tests sequence (check if ECS bit is set when bringing cpu online again):

 # ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' )   < /dev/cpu/1/msr
 00000008 00404010
 # ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr
 # ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' )   < /dev/cpu/1/msr
 00000008 00400010
 # echo 0 > /sys/devices/system/cpu/cpu1/online
 # echo 1 > /sys/devices/system/cpu/cpu1/online
 # ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' )   < /dev/cpu/1/msr
 00000008 00404010

Reported-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/amd_bus.c | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 4a6f1a6..6a0fca7 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -1,6 +1,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/topology.h>
+#include <linux/cpu.h>
 #include "pci.h"
 
 #ifdef CONFIG_X86_64
@@ -565,7 +566,7 @@ static int __init early_fill_mp_bus_info(void) { return 0; }
 
 #define ENABLE_CF8_EXT_CFG      (1ULL << 46)
 
-static void enable_pci_io_ecs_per_cpu(void *unused)
+static void enable_pci_io_ecs(void *unused)
 {
 	u64 reg;
 	rdmsrl(MSR_AMD64_NB_CFG, reg);
@@ -575,13 +576,39 @@ static void enable_pci_io_ecs_per_cpu(void *unused)
 	}
 }
 
-static int __init enable_pci_io_ecs(void)
+static int __cpuinit amd_cpu_notify(struct notifier_block *self,
+				    unsigned long action, void *hcpu)
 {
+	int cpu = (long)hcpu;
+	switch(action) {
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata amd_cpu_notifier = {
+	.notifier_call	= amd_cpu_notify,
+};
+
+static int __init pci_io_ecs_init(void)
+{
+	int cpu;
+
 	/* assume all cpus from fam10h have IO ECS */
         if (boot_cpu_data.x86 < 0x10)
 		return 0;
-	on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1);
+
+	register_cpu_notifier(&amd_cpu_notifier);
+	for_each_online_cpu(cpu)
+		amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
+			       (void *)(long)cpu);
 	pci_probe |= PCI_HAS_IO_ECS;
+
 	return 0;
 }
 
@@ -591,7 +618,7 @@ static int __init amd_postcore_init(void)
 		return 0;
 
 	early_fill_mp_bus_info();
-	enable_pci_io_ecs();
+	pci_io_ecs_init();
 
 	return 0;
 }
-- 
cgit v1.1


From 8735728ef8dc935c4fb351f913758fdbb62c308d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 22 Aug 2008 22:23:09 +0200
Subject: x86 MCE: Fix CPU hotplug problem with multiple multicore AMD CPUs

During CPU hot-remove the sysfs directory created by
threshold_create_bank(), defined in
arch/x86/kernel/cpu/mcheck/mce_amd_64.c, has to be removed before
its parent directory, created by mce_create_device(), defined in
arch/x86/kernel/cpu/mcheck/mce_64.c .  Moreover, when the CPU in
question is hotplugged again, obviously the latter has to be created
before the former.  At present, the right ordering is not enforced,
because all of these operations are carried out by CPU hotplug
notifiers which are not appropriately ordered with respect to each
other.  This leads to serious problems on systems with two or more
multicore AMD CPUs, among other things during suspend and hibernation.

Fix the problem by placing threshold bank CPU hotplug callbacks in
mce_cpu_callback(), so that they are invoked at the right places,
if defined.  Additionally, use kobject_del() to remove the sysfs
directory associated with the kobject created by
kobject_create_and_add() in threshold_create_bank(), to prevent the
kernel from crashing during CPU hotplug operations on systems with
two or more multicore AMD CPUs.

This patch fixes bug #11337.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Andi Kleen <andi@firstfloor.org>
Tested-by: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/mcheck/mce_64.c     |  5 +++++
 arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 18 +++++-------------
 2 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 65a3396..726a5fc 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = {
 };
 
 DEFINE_PER_CPU(struct sys_device, device_mce);
+void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata;
 
 /* Why are there no generic functions for this? */
 #define ACCESSOR(name, var, start) \
@@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 		mce_create_device(cpu);
+		if (threshold_cpu_callback)
+			threshold_cpu_callback(action, cpu);
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
+		if (threshold_cpu_callback)
+			threshold_cpu_callback(action, cpu);
 		mce_remove_device(cpu);
 		break;
 	}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 88736ca..5eb390a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
 	deallocate_threshold_block(cpu, bank);
 
 free_out:
+	kobject_del(b->kobj);
 	kobject_put(b->kobj);
 	kfree(b);
 	per_cpu(threshold_banks, cpu)[bank] = NULL;
@@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu)
 }
 
 /* get notified when a cpu comes on/off */
-static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
-					    unsigned long action, void *hcpu)
+static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action,
+						     unsigned int cpu)
 {
-	/* cpu was unsigned int to begin with */
-	unsigned int cpu = (unsigned long)hcpu;
-
 	if (cpu >= NR_CPUS)
-		goto out;
+		return;
 
 	switch (action) {
 	case CPU_ONLINE:
@@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
 	default:
 		break;
 	}
-      out:
-	return NOTIFY_OK;
 }
 
-static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
-	.notifier_call = threshold_cpu_callback,
-};
-
 static __init int threshold_init_device(void)
 {
 	unsigned lcpu = 0;
@@ -684,7 +676,7 @@ static __init int threshold_init_device(void)
 		if (err)
 			return err;
 	}
-	register_hotcpu_notifier(&threshold_cpu_notifier);
+	threshold_cpu_callback = amd_64_threshold_cpu_callback;
 	return 0;
 }
 
-- 
cgit v1.1


From 060700b571717c997a2ea5e2049b848fa248ee13 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 24 Aug 2008 11:52:06 -0700
Subject: x86: do not enable TSC notifier if we don't need it

Impact: crash on non-TSC-equipped CPUs

Don't enable the TSC notifier if we *either*:

1. don't have a CPU, or
2. have a CPU with constant TSC.

In either of those cases, the notifier is either damaging (1) or useless(2).

From: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/tsc.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 46af716..9bed5ca 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -325,6 +325,10 @@ static struct notifier_block time_cpufreq_notifier_block = {
 
 static int __init cpufreq_tsc(void)
 {
+	if (!cpu_has_tsc)
+		return 0;
+	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+		return 0;
 	cpufreq_register_notifier(&time_cpufreq_notifier_block,
 				CPUFREQ_TRANSITION_NOTIFIER);
 	return 0;
-- 
cgit v1.1


From a2bd7274b47124d2fc4dfdb8c0591f545ba749dd Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yhlu.kernel@gmail.com>
Date: Mon, 25 Aug 2008 00:56:08 -0700
Subject: x86: fix HPET regression in 2.6.26 versus 2.6.25, check hpet against
 BAR, v3

David Witbrodt tracked down (and bisected) a hpet bootup hang on his
system to the following problem: a BIOS bug made the hpet device
visible as a generic PCI device. If e820 reserved entries happen to
be registered first in the resource tree [which v2.6.26 started doing],
then the PCI code will reallocate that device's BAR to some other
address - breaking timer IRQs and hanging the system.

( Normally hpet devices are hidden by the BIOS from the OS's PCI
  discovery via chipset magic. Sometimes the hpet is not a PCI device
  at all. )

Solve this fundamental fragility by making non-PCI platform drivers
insert resources into the resource tree even if it overlaps the e820
reserved entry, to keep the resource manager from updating the BAR.

Also do these checks for the ioapic and mmconfig addresses, and emit
a warning if this happens.

Bisected-by: David Witbrodt <dawitbro@sbcglobal.net>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Tested-by: David Witbrodt <dawitbro@sbcglobal.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/i386.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 5807d1b..d765da9 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -31,8 +31,11 @@
 #include <linux/ioport.h>
 #include <linux/errno.h>
 #include <linux/bootmem.h>
+#include <linux/acpi.h>
 
 #include <asm/pat.h>
+#include <asm/hpet.h>
+#include <asm/io_apic.h>
 
 #include "pci.h"
 
@@ -77,6 +80,77 @@ pcibios_align_resource(void *data, struct resource *res,
 }
 EXPORT_SYMBOL(pcibios_align_resource);
 
+static int check_res_with_valid(struct pci_dev *dev, struct resource *res)
+{
+	unsigned long base;
+	unsigned long size;
+	int i;
+
+	base = res->start;
+	size = (res->start == 0 && res->end == res->start) ? 0 :
+		 (res->end - res->start + 1);
+
+	if (!base || !size)
+		return 0;
+
+#ifdef CONFIG_HPET_TIMER
+	/* for hpet */
+	if (base == hpet_address && (res->flags & IORESOURCE_MEM)) {
+		dev_info(&dev->dev, "BAR has HPET at %08lx-%08lx\n",
+				 base, base + size - 1);
+		return 1;
+	}
+#endif
+
+#ifdef CONFIG_X86_IO_APIC
+	for (i = 0; i < nr_ioapics; i++) {
+		unsigned long ioapic_phys = mp_ioapics[i].mp_apicaddr;
+
+		if (base == ioapic_phys && (res->flags & IORESOURCE_MEM)) {
+			dev_info(&dev->dev, "BAR has ioapic at %08lx-%08lx\n",
+					 base, base + size - 1);
+			return 1;
+		}
+	}
+#endif
+
+#ifdef CONFIG_PCI_MMCONFIG
+	for (i = 0; i < pci_mmcfg_config_num; i++) {
+		unsigned long addr;
+
+		addr = pci_mmcfg_config[i].address;
+		if (base == addr && (res->flags & IORESOURCE_MEM)) {
+			dev_info(&dev->dev, "BAR has MMCONFIG at %08lx-%08lx\n",
+					 base, base + size - 1);
+			return 1;
+		}
+	}
+#endif
+
+	return 0;
+}
+
+static int check_platform(struct pci_dev *dev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	/*
+	 * forcibly insert it into the
+	 * resource tree
+	 */
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+	else if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+
+	if (root && check_res_with_valid(dev, res)) {
+		insert_resource(root, res);
+
+		return 1;
+	}
+
+	return 0;
+}
 /*
  *  Handle resources of PCI devices.  If the world were perfect, we could
  *  just allocate all the resource regions and do nothing more.  It isn't.
@@ -128,6 +202,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
 				pr = pci_find_parent_resource(dev, r);
 				if (!r->start || !pr ||
 				    request_resource(pr, r) < 0) {
+					if (check_platform(dev, r))
+						continue;
 					dev_err(&dev->dev, "BAR %d: can't "
 						"allocate resource\n", idx);
 					/*
@@ -171,6 +247,8 @@ static void __init pcibios_allocate_resources(int pass)
 					r->flags, disabled, pass);
 				pr = pci_find_parent_resource(dev, r);
 				if (!pr || request_resource(pr, r) < 0) {
+					if (check_platform(dev, r))
+						continue;
 					dev_err(&dev->dev, "BAR %d: can't "
 						"allocate resource\n", idx);
 					/* We'll assign a new address later */
-- 
cgit v1.1


From 52a8968ce95da8469ba0a9b3e4010fe31caf77a3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 25 Aug 2008 13:35:06 +0200
Subject: x86: fix cpufreq + sched_clock() regression

I noticed that my sched_clock() was slow on a number of machine, so I
started looking at cpufreq.

The below seems to fix the problem for me.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 9bed5ca..8e786b0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -314,7 +314,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 			mark_tsc_unstable("cpufreq changes");
 	}
 
-	set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
+	set_cyc2ns_scale(tsc_khz, freq->cpu);
 
 	return 0;
 }
-- 
cgit v1.1