diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2007-05-26 01:14:43 -0700 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-05-29 02:49:49 -0700 |
commit | 22adb358e816ce6aa0afb231ae9d826b0bddc8b0 (patch) | |
tree | 6f9886bf5b4e5c916c72d8d5733211813873c5fc | |
parent | 5cbc30737398b49f62ae8603129ce43ac7db1a41 (diff) | |
download | op-kernel-dev-22adb358e816ce6aa0afb231ae9d826b0bddc8b0.zip op-kernel-dev-22adb358e816ce6aa0afb231ae9d826b0bddc8b0.tar.gz |
[SPARC64]: Eliminate NR_CPUS limitations.
Cheetah systems can have cpuids as large as 1023, although physical
systems don't have that many cpus.
Only three limitations existed in the kernel preventing arbitrary
NR_CPUS values:
1) dcache dirty cpu state stored in page->flags on
D-cache aliasing platforms. With some build time
calculations and some build-time BUG checks on
page->flags layout, this one was easily solved.
2) The cheetah XCALL delivery code could only handle
a cpumask with up to 32 cpus set. Some simple looping
logic clears that up too.
3) thread_info->cpu was a u8, easily changed to a u16.
There are a few spots in the kernel that still put NR_CPUS
sized arrays on the kernel stack, but that's not a sparc64
specific problem.
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc64/Kconfig | 6 | ||||
-rw-r--r-- | arch/sparc64/kernel/head.S | 2 | ||||
-rw-r--r-- | arch/sparc64/kernel/smp.c | 19 | ||||
-rw-r--r-- | arch/sparc64/mm/init.c | 22 | ||||
-rw-r--r-- | include/asm-sparc64/cpudata.h | 2 | ||||
-rw-r--r-- | include/asm-sparc64/thread_info.h | 8 |
6 files changed, 43 insertions, 16 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 831781c..bd00f89 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -147,10 +147,10 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-64)" - range 2 64 + int "Maximum number of CPUs (2-1024)" + range 2 1024 depends on SMP - default "32" + default "64" source "drivers/cpufreq/Kconfig" diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index baea10a..5c11529 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -523,7 +523,7 @@ tlb_fixup_done: #else mov 0, %o0 #endif - stb %o0, [%g6 + TI_CPU] + sth %o0, [%g6 + TI_CPU] /* Off we go.... */ call start_kernel diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index f7fa873..c550bba 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -400,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) { u64 pstate, ver; - int nack_busy_id, is_jbus; + int nack_busy_id, is_jbus, need_more; if (cpus_empty(mask)) return; @@ -416,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); retry: + need_more = 0; __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" : : "r" (pstate), "i" (PSTATE_IE)); @@ -444,6 +445,10 @@ retry: : /* no outputs */ : "r" (target), "i" (ASI_INTR_W)); nack_busy_id++; + if (nack_busy_id == 32) { + need_more = 1; + break; + } } } @@ -460,6 +465,16 @@ retry: if (dispatch_stat == 0UL) { __asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate)); + if (unlikely(need_more)) { + int i, cnt = 0; + for_each_cpu_mask(i, mask) { + cpu_clear(i, mask); + cnt++; + if (cnt == 32) + break; + } + goto retry; + } return; } if (!--stuck) @@ -497,6 +512,8 @@ retry: if ((dispatch_stat & check_mask) == 0) cpu_clear(i, mask); this_busy_nack += 2; + if (this_busy_nack == 64) + break; } goto retry; diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 9776982..087cbf0 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c @@ -191,12 +191,9 @@ inline void flush_dcache_page_impl(struct page *page) } #define PG_dcache_dirty PG_arch_1 -#define PG_dcache_cpu_shift 24UL -#define PG_dcache_cpu_mask (256UL - 1UL) - -#if NR_CPUS > 256 -#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus -#endif +#define PG_dcache_cpu_shift 32UL +#define PG_dcache_cpu_mask \ + ((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL) #define dcache_dirty_cpu(page) \ (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) @@ -1349,6 +1346,19 @@ void __init paging_init(void) unsigned long end_pfn, pages_avail, shift, phys_base; unsigned long real_end, i; + /* These build time checkes make sure that the dcache_dirty_cpu() + * page->flags usage will work. + * + * When a page gets marked as dcache-dirty, we store the + * cpu number starting at bit 32 in the page->flags. Also, + * functions like clear_dcache_dirty_cpu use the cpu mask + * in 13-bit signed-immediate instruction fields. + */ + BUILD_BUG_ON(FLAGS_RESERVED != 32); + BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH + + ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED); + BUILD_BUG_ON(NR_CPUS > 4096); + kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index f321b1d..03c385d 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -202,7 +202,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, * the calculations done by the macro mid-stream. */ #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ - ldub [THR + TI_CPU], REG1; \ + lduh [THR + TI_CPU], REG1; \ sethi %hi(__per_cpu_shift), REG3; \ sethi %hi(__per_cpu_base), REG2; \ ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ diff --git a/include/asm-sparc64/thread_info.h b/include/asm-sparc64/thread_info.h index 2ebf7f2..98252cd 100644 --- a/include/asm-sparc64/thread_info.h +++ b/include/asm-sparc64/thread_info.h @@ -38,8 +38,8 @@ struct thread_info { /* D$ line 1 */ struct task_struct *task; unsigned long flags; - __u8 cpu; __u8 fpsaved[7]; + __u8 pad; unsigned long ksp; /* D$ line 2 */ @@ -49,7 +49,7 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ __u8 new_child; __u8 syscall_noerror; - __u16 __pad; + __u16 cpu; unsigned long *utraps; @@ -83,8 +83,7 @@ struct thread_info { #define TI_CURRENT_DS (TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS) #define TI_FPDEPTH (TI_FLAGS + TI_FLAG_BYTE_FPDEPTH) #define TI_WSAVED (TI_FLAGS + TI_FLAG_BYTE_WSAVED) -#define TI_CPU 0x00000010 -#define TI_FPSAVED 0x00000011 +#define TI_FPSAVED 0x00000010 #define TI_KSP 0x00000018 #define TI_FAULT_ADDR 0x00000020 #define TI_KREGS 0x00000028 @@ -92,6 +91,7 @@ struct thread_info { #define TI_PRE_COUNT 0x00000038 #define TI_NEW_CHILD 0x0000003c #define TI_SYS_NOERROR 0x0000003d +#define TI_CPU 0x0000003e #define TI_UTRAPS 0x00000040 #define TI_REG_WINDOW 0x00000048 #define TI_RWIN_SPTRS 0x000003c8 |