diff options
author | Tejun Heo <tj@kernel.org> | 2011-01-23 14:37:39 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-01-28 14:54:09 +0100 |
commit | bbc9e2f452d9c4b166d1f9a78d941d80173312fe (patch) | |
tree | d75d41187b296235f833e942ed8c1dd938a7bae4 /arch/x86/kernel | |
parent | 89e5dc218e084e13a3996db6693b01478912f4ee (diff) | |
download | op-kernel-dev-bbc9e2f452d9c4b166d1f9a78d941d80173312fe.zip op-kernel-dev-bbc9e2f452d9c4b166d1f9a78d941d80173312fe.tar.gz |
x86: Unify cpu/apicid <-> NUMA node mapping between 32 and 64bit
The mapping between cpu/apicid and node is done via
apicid_to_node[] on 64bit and apicid_2_node[] +
apic->x86_32_numa_cpu_node() on 32bit. This difference makes it
difficult to further unify 32 and 64bit NUMA handling.
This patch unifies it by replacing both apicid_to_node[] and
apicid_2_node[] with __apicid_to_node[] array, which is accessed
by two accessors - set_apicid_to_node() and numa_cpu_node(). On
64bit, numa_cpu_node() always consults __apicid_to_node[]
directly while 32bit goes through apic->numa_cpu_node() method
to allow apic implementations to override it.
srat_detect_node() for amd cpus contains workaround for broken
NUMA configuration which assumes relationship between APIC ID,
HT node ID and NUMA topology. Leave it to access
__apicid_to_node[] directly as mapping through CPU might result
in undesirable behavior change. The comment is reformatted and
updated to note the ugliness.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Cc: eric.dumazet@gmail.com
Cc: yinghai@kernel.org
Cc: brgerst@gmail.com
Cc: gorcunov@gmail.com
Cc: shaohui.zheng@intel.com
Cc: rientjes@google.com
LKML-Reference: <1295789862-25482-14-git-send-email-tj@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: David Rientjes <rientjes@google.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/acpi/boot.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 47 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 6 |
5 files changed, 34 insertions, 27 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b3a7113..a7bca59 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -589,11 +589,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) nid = acpi_get_node(handle); if (nid == -1 || !node_online(nid)) return; + set_apicid_to_node(physid, nid); #ifdef CONFIG_X86_64 - apicid_to_node[physid] = nid; numa_set_node(cpu, nid); #else /* CONFIG_X86_32 */ - apicid_2_node[physid] = nid; cpu_to_node_map[cpu] = nid; #endif diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0f4f3c1..4686ea5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2026,7 +2026,7 @@ int default_x86_32_numa_cpu_node(int cpu) int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); if (apicid != BAD_APICID) - return apicid_2_node[apicid]; + return __apicid_to_node[apicid]; return NUMA_NO_NODE; #else return 0; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7c7bedb..3cce8f2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -234,17 +234,21 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) #endif #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) +/* + * To workaround broken NUMA config. Read the comment in + * srat_detect_node(). + */ static int __cpuinit nearby_node(int apicid) { int i, node; for (i = apicid - 1; i >= 0; i--) { - node = apicid_to_node[i]; + node = __apicid_to_node[i]; if (node != NUMA_NO_NODE && node_online(node)) return node; } for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { - node = apicid_to_node[i]; + node = __apicid_to_node[i]; if (node != NUMA_NO_NODE && node_online(node)) return node; } @@ -339,26 +343,35 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) int node; unsigned apicid = c->apicid; - node = per_cpu(cpu_llc_id, cpu); + node = numa_cpu_node(cpu); + if (node == NUMA_NO_NODE) + node = per_cpu(cpu_llc_id, cpu); - if (apicid_to_node[apicid] != NUMA_NO_NODE) - node = apicid_to_node[apicid]; if (!node_online(node)) { - /* Two possibilities here: - - The CPU is missing memory and no node was created. - In that case try picking one from a nearby CPU - - The APIC IDs differ from the HyperTransport node IDs - which the K8 northbridge parsing fills in. - Assume they are all increased by a constant offset, - but in the same order as the HT nodeids. - If that doesn't result in a usable node fall back to the - path for the previous case. */ - + /* + * Two possibilities here: + * + * - The CPU is missing memory and no node was created. In + * that case try picking one from a nearby CPU. + * + * - The APIC IDs differ from the HyperTransport node IDs + * which the K8 northbridge parsing fills in. Assume + * they are all increased by a constant offset, but in + * the same order as the HT nodeids. If that doesn't + * result in a usable node fall back to the path for the + * previous case. + * + * This workaround operates directly on the mapping between + * APIC ID and NUMA node, assuming certain relationship + * between APIC ID, HT node ID and NUMA topology. As going + * through CPU mapping may alter the outcome, directly + * access __apicid_to_node[]. + */ int ht_nodeid = c->initial_apicid; if (ht_nodeid >= 0 && - apicid_to_node[ht_nodeid] != NUMA_NO_NODE) - node = apicid_to_node[ht_nodeid]; + __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) + node = __apicid_to_node[ht_nodeid]; /* Pick a nearby node */ if (!node_online(node)) node = nearby_node(apicid); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d16c2c5..6052004 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -279,11 +279,10 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) unsigned node; int cpu = smp_processor_id(); - int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; /* Don't do the funky fallback heuristics the AMD version employs for now. */ - node = apicid_to_node[apicid]; + node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE || !node_online(node)) { /* reuse the value from init_cpu_to_node() */ node = cpu_to_node(cpu); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5319cdd..b7cfce5 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -71,10 +71,6 @@ #include <asm/smpboot_hooks.h> #include <asm/i8259.h> -#ifdef CONFIG_X86_32 -u8 apicid_2_node[MAX_LOCAL_APIC]; -#endif - /* State of each CPU */ DEFINE_PER_CPU(int, cpu_state) = { 0 }; @@ -170,7 +166,7 @@ static void map_cpu_to_logical_apicid(void) int cpu = smp_processor_id(); int node; - node = apic->x86_32_numa_cpu_node(cpu); + node = numa_cpu_node(cpu); if (!node_online(node)) node = first_online_node; |