From 6b0f6beaa5b6fb8f33de23b54d4f15ff2db26715 Mon Sep 17 00:00:00 2001 From: marius Date: Tue, 2 Sep 2008 21:13:54 +0000 Subject: - USIII-based machines can consist of CPUs having different cache sizes (and running at different frequencies) so move the cacheinfo to the PCPU data. While at it, remove some redundant and/or unused members from struct cacheinfo. - In sparc64_init don't assume the first CPU node we find in the OFW device tree is the BSP. --- sys/sparc64/include/cache.h | 16 +++--------- sys/sparc64/include/pcpu.h | 2 ++ sys/sparc64/sparc64/cache.c | 49 ++++++++++++++++++++--------------- sys/sparc64/sparc64/cheetah.c | 2 +- sys/sparc64/sparc64/genassym.c | 3 ++- sys/sparc64/sparc64/machdep.c | 53 ++++++++++++++++++++++++-------------- sys/sparc64/sparc64/mp_exception.S | 13 ++++------ sys/sparc64/sparc64/mp_machdep.c | 2 ++ sys/sparc64/sparc64/spitfire.c | 12 ++++++--- 9 files changed, 85 insertions(+), 67 deletions(-) (limited to 'sys/sparc64') diff --git a/sys/sparc64/include/cache.h b/sys/sparc64/include/cache.h index c2c2b23..9dd7191 100644 --- a/sys/sparc64/include/cache.h +++ b/sys/sparc64/include/cache.h @@ -45,10 +45,6 @@ #ifndef _MACHINE_CACHE_H_ #define _MACHINE_CACHE_H_ -#ifndef LOCORE -#include -#endif - #define DCACHE_COLOR_BITS (1) #define DCACHE_COLORS (1 << DCACHE_COLOR_BITS) #define DCACHE_COLOR_MASK (DCACHE_COLORS - 1) @@ -80,31 +76,27 @@ * Cache control information */ struct cacheinfo { - u_int c_enabled; /* true => cache is enabled */ u_int ic_size; /* instruction cache */ - u_int ic_set; - u_int ic_l2set; u_int ic_assoc; u_int ic_linesize; u_int dc_size; /* data cache */ - u_int dc_l2size; u_int dc_assoc; u_int dc_linesize; u_int ec_size; /* external cache info */ u_int ec_assoc; - u_int ec_l2set; u_int ec_linesize; - u_int ec_l2linesize; }; #ifdef _KERNEL +struct pcpu; + typedef void cache_enable_t(void); typedef void cache_flush_t(void); typedef void dcache_page_inval_t(vm_paddr_t pa); typedef void icache_page_inval_t(vm_paddr_t pa); -void cache_init(phandle_t node); +void cache_init(struct pcpu *pcpu); cache_enable_t cheetah_cache_enable; cache_flush_t cheetah_cache_flush; @@ -121,8 +113,6 @@ extern cache_flush_t *cache_flush; extern dcache_page_inval_t *dcache_page_inval; extern icache_page_inval_t *icache_page_inval; -extern struct cacheinfo cache; - #endif /* KERNEL */ #endif /* !LOCORE */ diff --git a/sys/sparc64/include/pcpu.h b/sys/sparc64/include/pcpu.h index 91c1e5d..2ccdbd9 100644 --- a/sys/sparc64/include/pcpu.h +++ b/sys/sparc64/include/pcpu.h @@ -31,6 +31,7 @@ #define _MACHINE_PCPU_H_ #include +#include #include #include @@ -43,6 +44,7 @@ struct pmap; * point at the globaldata structure. */ #define PCPU_MD_FIELDS \ + struct cacheinfo pc_cache; \ struct intr_request pc_irpool[IR_FREE]; \ struct intr_request *pc_irhead; \ struct intr_request **pc_irtail; \ diff --git a/sys/sparc64/sparc64/cache.c b/sys/sparc64/sparc64/cache.c index 9e1309a..8dd933e 100644 --- a/sys/sparc64/sparc64/cache.c +++ b/sys/sparc64/sparc64/cache.c @@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include @@ -81,8 +82,6 @@ __FBSDID("$FreeBSD$"); #include #include -struct cacheinfo cache; - cache_enable_t *cache_enable; cache_flush_t *cache_flush; dcache_page_inval_t *dcache_page_inval; @@ -94,33 +93,41 @@ icache_page_inval_t *icache_page_inval; * Fill in the cache parameters using the cpu node. */ void -cache_init(phandle_t node) +cache_init(struct pcpu *pcpu) { u_long set; - if (OF_GET(node, "icache-size", cache.ic_size) == -1 || - OF_GET(node, "icache-line-size", cache.ic_linesize) == -1 || - OF_GET(node, "icache-associativity", cache.ic_assoc) == -1 || - OF_GET(node, "dcache-size", cache.dc_size) == -1 || - OF_GET(node, "dcache-line-size", cache.dc_linesize) == -1 || - OF_GET(node, "dcache-associativity", cache.dc_assoc) == -1 || - OF_GET(node, "ecache-size", cache.ec_size) == -1 || - OF_GET(node, "ecache-line-size", cache.ec_linesize) == -1 || - OF_GET(node, "ecache-associativity", cache.ec_assoc) == -1) + if (OF_GET(pcpu->pc_node, "icache-size", + pcpu->pc_cache.ic_size) == -1 || + OF_GET(pcpu->pc_node, "icache-line-size", + pcpu->pc_cache.ic_linesize) == -1 || + OF_GET(pcpu->pc_node, "icache-associativity", + pcpu->pc_cache.ic_assoc) == -1 || + OF_GET(pcpu->pc_node, "dcache-size", + pcpu->pc_cache.dc_size) == -1 || + OF_GET(pcpu->pc_node, "dcache-line-size", + pcpu->pc_cache.dc_linesize) == -1 || + OF_GET(pcpu->pc_node, "dcache-associativity", + pcpu->pc_cache.dc_assoc) == -1 || + OF_GET(pcpu->pc_node, "ecache-size", + pcpu->pc_cache.ec_size) == -1 || + OF_GET(pcpu->pc_node, "ecache-line-size", + pcpu->pc_cache.ec_linesize) == -1 || + OF_GET(pcpu->pc_node, "ecache-associativity", + pcpu->pc_cache.ec_assoc) == -1) panic("cache_init: could not retrieve cache parameters"); - cache.ic_set = cache.ic_size / cache.ic_assoc; - cache.ic_l2set = ffs(cache.ic_set) - 1; - if ((cache.ic_set & ~(1UL << cache.ic_l2set)) != 0) + set = pcpu->pc_cache.ic_size / pcpu->pc_cache.ic_assoc; + if ((set & ~(1UL << (ffs(set) - 1))) != 0) panic("cache_init: I$ set size not a power of 2"); - cache.dc_l2size = ffs(cache.dc_size) - 1; - if ((cache.dc_size & ~(1UL << cache.dc_l2size)) != 0) + if ((pcpu->pc_cache.dc_size & + ~(1UL << (ffs(pcpu->pc_cache.dc_size) - 1))) != 0) panic("cache_init: D$ size not a power of 2"); - if (((cache.dc_size / cache.dc_assoc) / PAGE_SIZE) != DCACHE_COLORS) + if (((pcpu->pc_cache.dc_size / pcpu->pc_cache.dc_assoc) / + PAGE_SIZE) != DCACHE_COLORS) panic("cache_init: too many D$ colors"); - set = cache.ec_size / cache.ec_assoc; - cache.ec_l2set = ffs(set) - 1; - if ((set & ~(1UL << cache.ec_l2set)) != 0) + set = pcpu->pc_cache.ec_size / pcpu->pc_cache.ec_assoc; + if ((set & ~(1UL << (ffs(set) - 1))) != 0) panic("cache_init: E$ set size not a power of 2"); if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) { diff --git a/sys/sparc64/sparc64/cheetah.c b/sys/sparc64/sparc64/cheetah.c index 3ed2a72..f9e2332 100644 --- a/sys/sparc64/sparc64/cheetah.c +++ b/sys/sparc64/sparc64/cheetah.c @@ -72,7 +72,7 @@ cheetah_dcache_page_inval(vm_paddr_t spa) KASSERT((spa & PAGE_MASK) == 0, ("%s: pa not page aligned", __func__)); cookie = ipi_dcache_page_inval(tl_ipi_cheetah_dcache_page_inval, spa); - for (pa = spa; pa < spa + PAGE_SIZE; pa += cache.dc_linesize) + for (pa = spa; pa < spa + PAGE_SIZE; pa += PCPU_GET(cache.dc_linesize)) stxa_sync(pa, ASI_DCACHE_INVALIDATE, 0); ipi_wait(cookie); } diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index 8ef97c5..d31c84d 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -188,11 +188,12 @@ ASSYM(PM_TSB_MISS_COUNT, offsetof(struct pmap, pm_tsb_miss_count)); ASSYM(PM_TSB_CAP_MISS_COUNT, offsetof(struct pmap, pm_tsb_cap_miss_count)); #endif #ifdef SUN4U +ASSYM(PC_CACHE, offsetof(struct pcpu, pc_cache)); ASSYM(PC_MID, offsetof(struct pcpu, pc_mid)); +ASSYM(PC_PMAP, offsetof(struct pcpu, pc_pmap)); ASSYM(PC_TLB_CTX, offsetof(struct pcpu, pc_tlb_ctx)); ASSYM(PC_TLB_CTX_MAX, offsetof(struct pcpu, pc_tlb_ctx_max)); ASSYM(PC_TLB_CTX_MIN, offsetof(struct pcpu, pc_tlb_ctx_min)); -ASSYM(PC_PMAP, offsetof(struct pcpu, pc_pmap)); #endif ASSYM(IR_NEXT, offsetof(struct intr_request, ir_next)); diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c index 94ba403..f5d63cd 100644 --- a/sys/sparc64/sparc64/machdep.c +++ b/sys/sparc64/sparc64/machdep.c @@ -279,6 +279,7 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) phandle_t child; phandle_t root; u_int clock; + uint32_t portid; end = 0; kmdp = NULL; @@ -314,12 +315,40 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) init_param1(); + /* + * Prime our per-CPU data page for use. Note, we are using it for + * our stack, so don't pass the real size (PAGE_SIZE) to pcpu_init + * or it'll zero it out from under us. + */ + pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; + pcpu_init(pc, 0, sizeof(struct pcpu)); + pc->pc_addr = (vm_offset_t)pcpu0; + pc->pc_mid = UPA_CR_GET_MID(ldxa(0, ASI_UPA_CONFIG_REG)); + pc->pc_tlb_ctx = TLB_CTX_USER_MIN; + pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN; + pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX; + + /* + * Determine the OFW node (and ensure the + * BSP is in the device tree in the first place). + */ + pc->pc_node = 0; root = OF_peer(0); for (child = OF_child(root); child != 0; child = OF_peer(child)) { - OF_getprop(child, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu") == 0) + if (OF_getprop(child, "device_type", type, sizeof(type)) <= 0) + continue; + if (strcmp(type, "cpu") != 0) + continue; + if (OF_getprop(child, cpu_impl < CPU_IMPL_ULTRASPARCIII ? + "upa-portid" : "portid", &portid, sizeof(portid)) <= 0) + continue; + if (portid == pc->pc_mid) { + pc->pc_node = child; break; + } } + if (pc->pc_node == 0) + OF_exit(); /* * Initialize the tick counter. Must be before the console is inited @@ -353,8 +382,8 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) end = (vm_offset_t)_end; } - cache_init(child); - uma_set_align(cache.dc_linesize - 1); + cache_init(pc); + uma_set_align(pc->pc_cache.dc_linesize - 1); cpu_block_copy = bcopy; cpu_block_zero = bzero; @@ -397,7 +426,7 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) intr_init1(); /* - * Initialize proc0 stuff (p_contested needs to be done early). + * Initialize proc0, set kstack0, frame0, curthread and curpcb. */ proc_linkup0(&proc0, &thread0); proc0.p_md.md_sigtramp = NULL; @@ -407,22 +436,8 @@ sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV; thread0.td_frame = &frame0; - - /* - * Prime our per-cpu data page for use. Note, we are using it for our - * stack, so don't pass the real size (PAGE_SIZE) to pcpu_init or - * it'll zero it out from under us. - */ - pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; - pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_curthread = &thread0; pc->pc_curpcb = thread0.td_pcb; - pc->pc_mid = UPA_CR_GET_MID(ldxa(0, ASI_UPA_CONFIG_REG)); - pc->pc_addr = (vm_offset_t)pcpu0; - pc->pc_node = child; - pc->pc_tlb_ctx = TLB_CTX_USER_MIN; - pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN; - pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX; /* * Initialize global registers. diff --git a/sys/sparc64/sparc64/mp_exception.S b/sys/sparc64/sparc64/mp_exception.S index ca8c90c..fbb1c25 100644 --- a/sys/sparc64/sparc64/mp_exception.S +++ b/sys/sparc64/sparc64/mp_exception.S @@ -57,9 +57,8 @@ ENTRY(tl_ipi_spitfire_dcache_page_inval) ldx [%g5 + ICA_PA], %g6 srlx %g6, PAGE_SHIFT - DC_TAG_SHIFT, %g6 - SET(cache, %g3, %g2) - lduw [%g2 + DC_SIZE], %g3 - lduw [%g2 + DC_LINESIZE], %g4 + lduw [PCPU(CACHE) + DC_SIZE], %g3 + lduw [PCPU(CACHE) + DC_LINESIZE], %g4 sub %g3, %g4, %g2 1: ldxa [%g2] ASI_DCACHE_TAG, %g1 @@ -98,9 +97,8 @@ ENTRY(tl_ipi_spitfire_icache_page_inval) ldx [%g5 + ICA_PA], %g6 srlx %g6, PAGE_SHIFT - IC_TAG_SHIFT, %g6 - SET(cache, %g3, %g2) - lduw [%g2 + IC_SIZE], %g3 - lduw [%g2 + IC_LINESIZE], %g4 + lduw [PCPU(CACHE) + IC_SIZE], %g3 + lduw [PCPU(CACHE) + IC_LINESIZE], %g4 sub %g3, %g4, %g2 1: ldda [%g2] ASI_ICACHE_TAG, %g0 /*, %g1 */ @@ -140,8 +138,7 @@ ENTRY(tl_ipi_cheetah_dcache_page_inval) set PAGE_SIZE, %g2 add %g1, %g2, %g3 - SET(cache, %g4, %g2) - lduw [%g2 + DC_LINESIZE], %g2 + lduw [PCPU(CACHE) + DC_LINESIZE], %g2 1: stxa %g0, [%g1] ASI_DCACHE_INVALIDATE membar #Sync diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c index d3572f2..214143a 100644 --- a/sys/sparc64/sparc64/mp_machdep.c +++ b/sys/sparc64/sparc64/mp_machdep.c @@ -301,6 +301,8 @@ cpu_mp_start(void) pc->pc_mid = mid; pc->pc_node = child; + cache_init(pc); + all_cpus |= 1 << cpuid; intr_add_cpu(cpuid); } diff --git a/sys/sparc64/sparc64/spitfire.c b/sys/sparc64/sparc64/spitfire.c index eb0ff00..5a8c4ec 100644 --- a/sys/sparc64/sparc64/spitfire.c +++ b/sys/sparc64/sparc64/spitfire.c @@ -72,9 +72,11 @@ spitfire_cache_flush(void) { u_long addr; - for (addr = 0; addr < cache.dc_size; addr += cache.dc_linesize) + for (addr = 0; addr < PCPU_GET(cache.dc_size); + addr += PCPU_GET(cache.dc_linesize)) stxa_sync(addr, ASI_DCACHE_TAG, 0); - for (addr = 0; addr < cache.ic_size; addr += cache.ic_linesize) + for (addr = 0; addr < PCPU_GET(cache.ic_size); + addr += PCPU_GET(cache.ic_linesize)) stxa_sync(addr, ASI_ICACHE_TAG, 0); } @@ -93,7 +95,8 @@ spitfire_dcache_page_inval(vm_paddr_t pa) PMAP_STATS_INC(spitfire_dcache_npage_inval); target = pa >> (PAGE_SHIFT - DC_TAG_SHIFT); cookie = ipi_dcache_page_inval(tl_ipi_spitfire_dcache_page_inval, pa); - for (addr = 0; addr < cache.dc_size; addr += cache.dc_linesize) { + for (addr = 0; addr < PCPU_GET(cache.dc_size); + addr += PCPU_GET(cache.dc_linesize)) { tag = ldxa(addr, ASI_DCACHE_TAG); if (((tag >> DC_VALID_SHIFT) & DC_VALID_MASK) == 0) continue; @@ -121,7 +124,8 @@ spitfire_icache_page_inval(vm_paddr_t pa) PMAP_STATS_INC(spitfire_icache_npage_inval); target = pa >> (PAGE_SHIFT - IC_TAG_SHIFT); cookie = ipi_icache_page_inval(tl_ipi_spitfire_icache_page_inval, pa); - for (addr = 0; addr < cache.ic_size; addr += cache.ic_linesize) { + for (addr = 0; addr < PCPU_GET(cache.ic_size); + addr += PCPU_GET(cache.ic_linesize)) { __asm __volatile("ldda [%1] %2, %%g0" /*, %g1 */ : "=r" (tag) : "r" (addr), "n" (ASI_ICACHE_TAG)); if (((tag >> IC_VALID_SHIFT) & IC_VALID_MASK) == 0) -- cgit v1.1