From 22adb358e816ce6aa0afb231ae9d826b0bddc8b0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 May 2007 01:14:43 -0700 Subject: [SPARC64]: Eliminate NR_CPUS limitations. Cheetah systems can have cpuids as large as 1023, although physical systems don't have that many cpus. Only three limitations existed in the kernel preventing arbitrary NR_CPUS values: 1) dcache dirty cpu state stored in page->flags on D-cache aliasing platforms. With some build time calculations and some build-time BUG checks on page->flags layout, this one was easily solved. 2) The cheetah XCALL delivery code could only handle a cpumask with up to 32 cpus set. Some simple looping logic clears that up too. 3) thread_info->cpu was a u8, easily changed to a u16. There are a few spots in the kernel that still put NR_CPUS sized arrays on the kernel stack, but that's not a sparc64 specific problem. Signed-off-by: David S. Miller --- arch/sparc64/Kconfig | 6 +++--- arch/sparc64/kernel/head.S | 2 +- arch/sparc64/kernel/smp.c | 19 ++++++++++++++++++- arch/sparc64/mm/init.c | 22 ++++++++++++++++------ 4 files changed, 38 insertions(+), 11 deletions(-) (limited to 'arch/sparc64') diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 831781c..bd00f89 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -147,10 +147,10 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-64)" - range 2 64 + int "Maximum number of CPUs (2-1024)" + range 2 1024 depends on SMP - default "32" + default "64" source "drivers/cpufreq/Kconfig" diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index baea10a..5c11529 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -523,7 +523,7 @@ tlb_fixup_done: #else mov 0, %o0 #endif - stb %o0, [%g6 + TI_CPU] + sth %o0, [%g6 + TI_CPU] /* Off we go.... */ call start_kernel diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index f7fa873..c550bba 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -400,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) { u64 pstate, ver; - int nack_busy_id, is_jbus; + int nack_busy_id, is_jbus, need_more; if (cpus_empty(mask)) return; @@ -416,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); retry: + need_more = 0; __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" : : "r" (pstate), "i" (PSTATE_IE)); @@ -444,6 +445,10 @@ retry: : /* no outputs */ : "r" (target), "i" (ASI_INTR_W)); nack_busy_id++; + if (nack_busy_id == 32) { + need_more = 1; + break; + } } } @@ -460,6 +465,16 @@ retry: if (dispatch_stat == 0UL) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); + if (unlikely(need_more)) { + int i, cnt = 0; + for_each_cpu_mask(i, mask) { + cpu_clear(i, mask); + cnt++; + if (cnt == 32) + break; + } + goto retry; + } return; } if (!--stuck) @@ -497,6 +512,8 @@ retry: if ((dispatch_stat & check_mask) == 0) cpu_clear(i, mask); this_busy_nack += 2; + if (this_busy_nack == 64) + break; } goto retry; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 9776982..087cbf0 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -191,12 +191,9 @@ inline void flush_dcache_page_impl(struct page *page) } #define PG_dcache_dirty PG_arch_1 -#define PG_dcache_cpu_shift 24UL -#define PG_dcache_cpu_mask (256UL - 1UL) - -#if NR_CPUS > 256 -#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus -#endif +#define PG_dcache_cpu_shift 32UL +#define PG_dcache_cpu_mask \ + ((1UL<flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) @@ -1349,6 +1346,19 @@ void __init paging_init(void) unsigned long end_pfn, pages_avail, shift, phys_base; unsigned long real_end, i; + /* These build time checkes make sure that the dcache_dirty_cpu() + * page->flags usage will work. + * + * When a page gets marked as dcache-dirty, we store the + * cpu number starting at bit 32 in the page->flags. Also, + * functions like clear_dcache_dirty_cpu use the cpu mask + * in 13-bit signed-immediate instruction fields. + */ + BUILD_BUG_ON(FLAGS_RESERVED != 32); + BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + + ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED); + BUILD_BUG_ON(NR_CPUS > 4096); + kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; -- cgit v1.1