summaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2005-09-12 18:49:24 +0200
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-12 10:49:56 -0700
commit3f098c2605bdf50176b26f4fa724e9b9c99e5242 (patch)
treed4632bc555c52afed6ae4c0444659dca49e3e406 /arch/x86_64
parentb91691164be174b780f5c1bb145a7ab5d33fce2f (diff)
downloadop-kernel-dev-3f098c2605bdf50176b26f4fa724e9b9c99e5242.zip
op-kernel-dev-3f098c2605bdf50176b26f4fa724e9b9c99e5242.tar.gz
[PATCH] x86-64: Support dualcore and 8 socket systems in k8 fallback node parsing
In particular on systems where the local APIC space and node space is very different from the Linux CPU number space. Previously the older NUMA setup code directly parsing the K8 northbridge registers had some issues on 8 socket or dual core systems. This patch fixes them. This is mainly done by fixing some confusion between Linux CPU numbers and local APIC ids. We now pass the local APIC IDs to later code, which avoids mismatches. Also add some heuristics to detect cases where the Hypertransport nodeids and the local APIC IDs don't match, but are shifted by a constant offset. This is still all quite hackish, hopefully BIOS writers fill in correct SRATs instead. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/kernel/setup.c64
-rw-r--r--arch/x86_64/mm/k8topology.c13
-rw-r--r--arch/x86_64/mm/numa.c10
3 files changed, 62 insertions, 25 deletions
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 976ebcf..df55a63 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -755,6 +755,24 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
}
}
+#ifdef CONFIG_NUMA
+static int nearby_node(int apicid)
+{
+ int i;
+ for (i = apicid - 1; i >= 0; i--) {
+ int node = apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
+ int node = apicid_to_node[i];
+ if (node != NUMA_NO_NODE && node_online(node))
+ return node;
+ }
+ return first_node(node_online_map); /* Shouldn't happen */
+}
+#endif
+
/*
* On a AMD dual core setup the lower bits of the APIC id distingush the cores.
* Assumes number of cores is a power of two.
@@ -763,9 +781,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
int cpu = smp_processor_id();
- int node = 0;
unsigned bits;
+#ifdef CONFIG_NUMA
+ int node = 0;
unsigned apicid = phys_proc_id[cpu];
+#endif
bits = 0;
while ((1 << bits) < c->x86_num_cores)
@@ -777,24 +797,32 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
phys_proc_id[cpu] >>= bits;
#ifdef CONFIG_NUMA
- /* When an ACPI SRAT table is available use the mappings from SRAT
- instead. */
- node = phys_proc_id[cpu];
- if (acpi_numa > 0) {
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
- node = apicid_to_node[apicid];
- else
- printk(KERN_ERR
- "SRAT: Didn't specify node for CPU %d(%d)\n",
- cpu, apicid);
- }
- if (!node_online(node))
- node = first_node(node_online_map);
- cpu_to_node[cpu] = node;
+ node = phys_proc_id[cpu];
+ if (apicid_to_node[apicid] != NUMA_NO_NODE)
+ node = apicid_to_node[apicid];
+ if (!node_online(node)) {
+ /* Two possibilities here:
+ - The CPU is missing memory and no node was created.
+ In that case try picking one from a nearby CPU
+ - The APIC IDs differ from the HyperTransport node IDs
+ which the K8 northbridge parsing fills in.
+ Assume they are all increased by a constant offset,
+ but in the same order as the HT nodeids.
+ If that doesn't result in a usable node fall back to the
+ path for the previous case. */
+ int ht_nodeid = apicid - (phys_proc_id[0] << bits);
+ if (ht_nodeid >= 0 &&
+ apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
+ node = apicid_to_node[ht_nodeid];
+ /* Pick a nearby node */
+ if (!node_online(node))
+ node = nearby_node(apicid);
+ }
+ cpu_to_node[cpu] = node;
+
+ printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
+ cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
#endif
-
- printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
- cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
#endif
}
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index ec35747..65417b0 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
unsigned long prevbase;
struct node nodes[8];
int nodeid, i, nb;
+ unsigned char nodeids[8];
int found = 0;
u32 reg;
unsigned numnodes;
nodemask_t nodes_parsed;
+ unsigned dualcore = 0;
nodes_clear(nodes_parsed);
@@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
prevbase = 0;
for (i = 0; i < 8; i++) {
unsigned long base,limit;
-
+ u32 nodeid;
+
+ /* Undefined before E stepping, but hopefully 0 */
+ dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
nodeid = limit & 7;
+ nodeids[i] = nodeid;
if ((base & 3) == 0) {
if (i < numnodes)
printk("Skipping disabled node %d\n", i);
@@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
for (i = 0; i < 8; i++) {
if (nodes[i].start != nodes[i].end) {
- /* assume 1:1 NODE:CPU */
- cpu_to_node[i] = i;
+ nodeid = nodeids[i];
+ apicid_to_node[nodeid << dualcore] = i;
+ apicid_to_node[(nodeid << dualcore) + dualcore] = i;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
}
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 5b15186..80a49d9 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -28,11 +28,13 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
int memnode_shift;
u8 memnodemap[NODEMAPSIZE];
-unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
-unsigned char apicid_to_node[256] __cpuinitdata = {
- [0 ... NR_CPUS-1] = NUMA_NO_NODE
+unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
+ [0 ... NR_CPUS-1] = NUMA_NO_NODE
};
-cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
+unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+};
+cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
int numa_off __initdata;
OpenPOWER on IntegriCloud