diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/kernel/setup.c | 64 | ||||
-rw-r--r-- | arch/x86_64/mm/k8topology.c | 13 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 10 |
3 files changed, 62 insertions, 25 deletions
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 976ebcf..df55a63 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -755,6 +755,24 @@ static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) } } +#ifdef CONFIG_NUMA +static int nearby_node(int apicid) +{ + int i; + for (i = apicid - 1; i >= 0; i--) { + int node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { + int node = apicid_to_node[i]; + if (node != NUMA_NO_NODE && node_online(node)) + return node; + } + return first_node(node_online_map); /* Shouldn't happen */ +} +#endif + /* * On a AMD dual core setup the lower bits of the APIC id distingush the cores. * Assumes number of cores is a power of two. @@ -763,9 +781,11 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP int cpu = smp_processor_id(); - int node = 0; unsigned bits; +#ifdef CONFIG_NUMA + int node = 0; unsigned apicid = phys_proc_id[cpu]; +#endif bits = 0; while ((1 << bits) < c->x86_num_cores) @@ -777,24 +797,32 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) phys_proc_id[cpu] >>= bits; #ifdef CONFIG_NUMA - /* When an ACPI SRAT table is available use the mappings from SRAT - instead. */ - node = phys_proc_id[cpu]; - if (acpi_numa > 0) { - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; - else - printk(KERN_ERR - "SRAT: Didn't specify node for CPU %d(%d)\n", - cpu, apicid); - } - if (!node_online(node)) - node = first_node(node_online_map); - cpu_to_node[cpu] = node; + node = phys_proc_id[cpu]; + if (apicid_to_node[apicid] != NUMA_NO_NODE) + node = apicid_to_node[apicid]; + if (!node_online(node)) { + /* Two possibilities here: + - The CPU is missing memory and no node was created. + In that case try picking one from a nearby CPU + - The APIC IDs differ from the HyperTransport node IDs + which the K8 northbridge parsing fills in. + Assume they are all increased by a constant offset, + but in the same order as the HT nodeids. + If that doesn't result in a usable node fall back to the + path for the previous case. */ + int ht_nodeid = apicid - (phys_proc_id[0] << bits); + if (ht_nodeid >= 0 && + apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = apicid_to_node[ht_nodeid]; + /* Pick a nearby node */ + if (!node_online(node)) + node = nearby_node(apicid); + } + cpu_to_node[cpu] = node; + + printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", + cpu, c->x86_num_cores, node, cpu_core_id[cpu]); #endif - - printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", - cpu, c->x86_num_cores, node, cpu_core_id[cpu]); #endif } diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index ec35747..65417b0 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c @@ -45,10 +45,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) unsigned long prevbase; struct node nodes[8]; int nodeid, i, nb; + unsigned char nodeids[8]; int found = 0; u32 reg; unsigned numnodes; nodemask_t nodes_parsed; + unsigned dualcore = 0; nodes_clear(nodes_parsed); @@ -67,11 +69,15 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) prevbase = 0; for (i = 0; i < 8; i++) { unsigned long base,limit; - + u32 nodeid; + + /* Undefined before E stepping, but hopefully 0 */ + dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1; base = read_pci_config(0, nb, 1, 0x40 + i*8); limit = read_pci_config(0, nb, 1, 0x44 + i*8); nodeid = limit & 7; + nodeids[i] = nodeid; if ((base & 3) == 0) { if (i < numnodes) printk("Skipping disabled node %d\n", i); @@ -157,8 +163,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) for (i = 0; i < 8; i++) { if (nodes[i].start != nodes[i].end) { - /* assume 1:1 NODE:CPU */ - cpu_to_node[i] = i; + nodeid = nodeids[i]; + apicid_to_node[nodeid << dualcore] = i; + apicid_to_node[(nodeid << dualcore) + dualcore] = i; setup_node_bootmem(i, nodes[i].start, nodes[i].end); } } diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 5b15186..80a49d9 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -28,11 +28,13 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES]; int memnode_shift; u8 memnodemap[NODEMAPSIZE]; -unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -unsigned char apicid_to_node[256] __cpuinitdata = { - [0 ... NR_CPUS-1] = NUMA_NO_NODE +unsigned char cpu_to_node[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; +unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { + [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE +}; +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; int numa_off __initdata; |