From b484b97e7766f8d643ff205558ce5176240c6765 Mon Sep 17 00:00:00 2001 From: marius Date: Mon, 8 Sep 2008 21:24:25 +0000 Subject: For cheetah-class CPUs ensure that the dt512_0 is set to hold 8k pages for all three contexts and configure the dt512_1 to hold 4MB pages for them (e.g. for direct mappings). This might allow for additional optimization by using the faulting page sizes provided by AA_DMMU_TAG_ACCESS_EXT for bypassing the page size walker for the dt512 in the superpage support code. Submitted by: nwhitehorn (initial patch) --- sys/sparc64/include/asi.h | 1 + sys/sparc64/include/tlb.h | 31 +++++++++++++++++++++++++++++-- sys/sparc64/sparc64/cheetah.c | 20 ++++++++++++++++++++ sys/sparc64/sparc64/genassym.c | 1 + sys/sparc64/sparc64/pmap.c | 7 ++++--- sys/sparc64/sparc64/swtch.S | 4 ++++ 6 files changed, 59 insertions(+), 5 deletions(-) diff --git a/sys/sparc64/include/asi.h b/sys/sparc64/include/asi.h index 7829de4..9371378 100644 --- a/sys/sparc64/include/asi.h +++ b/sys/sparc64/include/asi.h @@ -140,6 +140,7 @@ #define AA_DMMU_TSB_PEXT_REG 0x48 #define AA_DMMU_TSB_SEXT_REG 0x50 #define AA_DMMU_TSB_NEXT_REG 0x58 +#define AA_DMMU_TAG_ACCESS_EXT 0x60 /* US-III family */ #define ASI_DMMU_TSB_8KB_PTR_REG 0x59 #define ASI_DMMU_TSB_64KB_PTR_REG 0x5a diff --git a/sys/sparc64/include/tlb.h b/sys/sparc64/include/tlb.h index 3d54ef8..06a8296 100644 --- a/sys/sparc64/include/tlb.h +++ b/sys/sparc64/include/tlb.h @@ -51,6 +51,34 @@ #define TLB_TAR_VA(va) ((va) & ~TAR_CTX_MASK) #define TLB_TAR_CTX(ctx) ((ctx) & TAR_CTX_MASK) +#define TLB_CXR_CTX_BITS (13) +#define TLB_CXR_CTX_MASK \ + (((1UL << TLB_CXR_CTX_BITS) - 1) << TLB_CXR_CTX_SHIFT) +#define TLB_CXR_CTX_SHIFT (0) +#define TLB_CXR_PGSZ_BITS (3) +#define TLB_PCXR_PGSZ_MASK \ + ((((1UL << TLB_CXR_PGSZ_BITS) - 1) << TLB_PCXR_N_PGSZ0_SHIFT) | \ + (((1UL << TLB_CXR_PGSZ_BITS) - 1) << TLB_PCXR_N_PGSZ1_SHIFT) | \ + (((1UL << TLB_CXR_PGSZ_BITS) - 1) << TLB_PCXR_P_PGSZ0_SHIFT) | \ + (((1UL << TLB_CXR_PGSZ_BITS) - 1) << TLB_PCXR_P_PGSZ1_SHIFT)) +#define TLB_PCXR_N_PGSZ0_SHIFT (61) +#define TLB_PCXR_N_PGSZ1_SHIFT (58) +#define TLB_PCXR_P_PGSZ0_SHIFT (16) +#define TLB_PCXR_P_PGSZ1_SHIFT (19) +#define TLB_SCXR_PGSZ_MASK \ + ((((1UL << TLB_CXR_PGSZ_BITS) - 1) << TLB_SCXR_S_PGSZ0_SHIFT) | \ + (((1UL << TLB_CXR_PGSZ_BITS) - 1) << TLB_SCXR_S_PGSZ1_SHIFT)) +#define TLB_SCXR_S_PGSZ1_SHIFT (19) +#define TLB_SCXR_S_PGSZ0_SHIFT (16) + +#define TLB_TAE_PGSZ_BITS (3) +#define TLB_TAE_PGSZ0_MASK \ + (((1UL << TLB_TAE_PGSZ_BITS) - 1) << TLB_TAE_PGSZ0_SHIFT) +#define TLB_TAE_PGSZ1_MASK \ + (((1UL << TLB_TAE_PGSZ_BITS) - 1) << TLB_TAE_PGSZ1_SHIFT) +#define TLB_TAE_PGSZ0_SHIFT (16) +#define TLB_TAE_PGSZ1_SHIFT (19) + #define TLB_DEMAP_ID_SHIFT (4) #define TLB_DEMAP_ID_PRIMARY (0) #define TLB_DEMAP_ID_SECONDARY (1) @@ -59,8 +87,7 @@ #define TLB_DEMAP_TYPE_SHIFT (6) #define TLB_DEMAP_TYPE_PAGE (0) #define TLB_DEMAP_TYPE_CONTEXT (1) -/* US-III and greater only */ -#define TLB_DEMAP_TYPE_ALL (2) +#define TLB_DEMAP_TYPE_ALL (2) /* USIII and beyond only */ #define TLB_DEMAP_VA(va) ((va) & ~PAGE_MASK) #define TLB_DEMAP_ID(id) ((id) << TLB_DEMAP_ID_SHIFT) diff --git a/sys/sparc64/sparc64/cheetah.c b/sys/sparc64/sparc64/cheetah.c index bf42a44..e2dc714 100644 --- a/sys/sparc64/sparc64/cheetah.c +++ b/sys/sparc64/sparc64/cheetah.c @@ -92,6 +92,26 @@ cheetah_init(void) stxa(AA_IMMU_TSB_NEXT_REG, ASI_IMMU, 0); membar(Sync); + /* + * Ensure that the dt512_0 is set to hold 8k pages for all three + * contexts and configure the dt512_1 to hold 4MB pages for them + * (e.g. for direct mappings). + * NB: according to documentation, this requires a contex demap + * _before_ changing the corresponding page size, but we hardly + * can flush our locked pages here, so we use a demap all instead. + */ + stxa(TLB_DEMAP_ALL, ASI_DMMU_DEMAP, 0); + membar(Sync); + stxa(AA_DMMU_PCXR, ASI_DMMU, + (TS_8K << TLB_PCXR_N_PGSZ0_SHIFT) | + (TS_4M << TLB_PCXR_N_PGSZ1_SHIFT) | + (TS_8K << TLB_PCXR_P_PGSZ0_SHIFT) | + (TS_4M << TLB_PCXR_P_PGSZ1_SHIFT)); + stxa(AA_DMMU_SCXR, ASI_DMMU, + (TS_8K << TLB_SCXR_S_PGSZ0_SHIFT) | + (TS_4M << TLB_SCXR_S_PGSZ1_SHIFT)); + flush(KERNBASE); + intr_restore(s); } diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index afd0bdb..d3dac32 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -132,6 +132,7 @@ ASSYM(TD_W, TD_W); ASSYM(TS_MIN, TS_MIN); ASSYM(TS_MAX, TS_MAX); +ASSYM(TLB_PCXR_PGSZ_MASK, TLB_PCXR_PGSZ_MASK); ASSYM(TLB_DIRECT_TO_TTE_MASK, TLB_DIRECT_TO_TTE_MASK); ASSYM(TV_SIZE_BITS, TV_SIZE_BITS); #endif diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index be3226a..c083bbb 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -545,7 +545,6 @@ pmap_map_tsb(void) pa = tsb_kernel_phys + i; data = TD_V | TD_4M | TD_PA(pa) | TD_L | TD_CP | TD_CV | TD_P | TD_W; - /* XXX - cheetah */ stxa(AA_DMMU_TAR, ASI_DMMU, TLB_TAR_VA(va) | TLB_TAR_CTX(TLB_CTX_KERNEL)); stxa_sync(0, ASI_DTLB_DATA_IN_REG, data); @@ -555,7 +554,8 @@ pmap_map_tsb(void) * Set the secondary context to be the kernel context (needed for * FP block operations in the kernel). */ - stxa(AA_DMMU_SCXR, ASI_DMMU, TLB_CTX_KERNEL); + stxa(AA_DMMU_SCXR, ASI_DMMU, (ldxa(AA_DMMU_SCXR, ASI_DMMU) & + TLB_SCXR_PGSZ_MASK) | TLB_CTX_KERNEL); flush(KERNBASE); intr_restore(s); @@ -1979,7 +1979,8 @@ pmap_activate(struct thread *td) stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb); stxa(AA_IMMU_TSB, ASI_IMMU, pm->pm_tsb); - stxa(AA_DMMU_PCXR, ASI_DMMU, context); + stxa(AA_DMMU_PCXR, ASI_DMMU, (ldxa(AA_DMMU_PCXR, ASI_DMMU) & + TLB_PCXR_PGSZ_MASK) | context); flush(KERNBASE); mtx_unlock_spin(&sched_lock); diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S index 552233f..d9e86d2 100644 --- a/sys/sparc64/sparc64/swtch.S +++ b/sys/sparc64/sparc64/swtch.S @@ -236,7 +236,11 @@ ENTRY(cpu_switch) stxa %i4, [%i5] ASI_DMMU mov AA_IMMU_TSB, %i5 stxa %i4, [%i5] ASI_IMMU + setx TLB_PCXR_PGSZ_MASK, %i5, %i4 mov AA_DMMU_PCXR, %i5 + ldxa [%i5] ASI_DMMU, %i2 + and %i2, %i4, %i2 + or %i3, %i2, %i3 sethi %hi(KERNBASE), %i4 stxa %i3, [%i5] ASI_DMMU flush %i4 -- cgit v1.1