diff options
author | nwhitehorn <nwhitehorn@FreeBSD.org> | 2012-01-15 00:08:14 +0000 |
---|---|---|
committer | nwhitehorn <nwhitehorn@FreeBSD.org> | 2012-01-15 00:08:14 +0000 |
commit | 19c997ffb187fa972ee676f70314969915c35584 (patch) | |
tree | 1fedd675c12139ce0991a5a3ad286a5f769b575e | |
parent | e19a997d4a9c059fd70e3096ca9be23cf2f7a7c3 (diff) | |
download | FreeBSD-src-19c997ffb187fa972ee676f70314969915c35584.zip FreeBSD-src-19c997ffb187fa972ee676f70314969915c35584.tar.gz |
Rework SLB trap handling so that double-faults into an SLB trap handler are
possible, and double faults within an SLB trap handler are not. The result
is that it possible to take an SLB fault at any time, on any address, for
any reason, at any point in the kernel.
This lets us do two important things. First, it removes the (soft) 16 GB RAM
ceiling on PPC64 as well as any architectural limitations on KVA space.
Second, it lets the kernel tolerate poorly designed hypervisors that
have a tendency to fail to restore the SLB properly after a hypervisor
context switch.
MFC after: 6 weeks
-rw-r--r-- | sys/powerpc/aim/machdep.c | 5 | ||||
-rw-r--r-- | sys/powerpc/aim/slb.c | 18 | ||||
-rw-r--r-- | sys/powerpc/aim/trap.c | 84 | ||||
-rw-r--r-- | sys/powerpc/aim/trap_subr64.S | 179 | ||||
-rw-r--r-- | sys/powerpc/include/pcpu.h | 4 | ||||
-rw-r--r-- | sys/powerpc/powerpc/genassym.c | 3 |
6 files changed, 235 insertions, 58 deletions
diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/machdep.c index a3c36c5..b31f15c 100644 --- a/sys/powerpc/aim/machdep.c +++ b/sys/powerpc/aim/machdep.c @@ -238,6 +238,7 @@ extern void *trapcode64; extern void *rstcode, *rstsize; #endif extern void *trapcode, *trapsize; +extern void *slbtrap, *slbtrapsize; extern void *alitrap, *alisize; extern void *dsitrap, *dsisize; extern void *decrint, *decrsize; @@ -490,8 +491,8 @@ powerpc_init(vm_offset_t startkernel, vm_offset_t endkernel, bcopy(&dsitrap, (void *)(EXC_DSI + trap_offset), (size_t)&dsisize); bcopy(generictrap, (void *)EXC_ISI, (size_t)&trapsize); #ifdef __powerpc64__ - bcopy(generictrap, (void *)EXC_DSE, (size_t)&trapsize); - bcopy(generictrap, (void *)EXC_ISE, (size_t)&trapsize); + bcopy(&slbtrap, (void *)EXC_DSE, (size_t)&slbtrapsize); + bcopy(&slbtrap, (void *)EXC_ISE, (size_t)&slbtrapsize); #endif bcopy(generictrap, (void *)EXC_EXI, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_FPU, (size_t)&trapsize); diff --git a/sys/powerpc/aim/slb.c b/sys/powerpc/aim/slb.c index 1a5ce65..7f4b2ef 100644 --- a/sys/powerpc/aim/slb.c +++ b/sys/powerpc/aim/slb.c @@ -409,15 +409,11 @@ slb_alloc_tree(void) /* Lock entries mapping kernel text and stacks */ -#define SLB_SPILLABLE(slbe) \ - (((slbe & SLBE_ESID_MASK) < VM_MIN_KERNEL_ADDRESS && \ - (slbe & SLBE_ESID_MASK) > 16*SEGMENT_LENGTH) || \ - (slbe & SLBE_ESID_MASK) > VM_MAX_KERNEL_ADDRESS) void slb_insert_kernel(uint64_t slbe, uint64_t slbv) { struct slb *slbcache; - int i, j; + int i; /* We don't want to be preempted while modifying the kernel map */ critical_enter(); @@ -437,15 +433,9 @@ slb_insert_kernel(uint64_t slbe, uint64_t slbv) slbcache[USER_SLB_SLOT].slbe = 1; } - for (i = mftb() % n_slbs, j = 0; j < n_slbs; j++, i = (i+1) % n_slbs) { - if (i == USER_SLB_SLOT) - continue; - - if (SLB_SPILLABLE(slbcache[i].slbe)) - break; - } - - KASSERT(j < n_slbs, ("All kernel SLB slots locked!")); + i = mftb() % n_slbs; + if (i == USER_SLB_SLOT) + i = (i+1) % n_slbs; fillkernslb: KASSERT(i != USER_SLB_SLOT, diff --git a/sys/powerpc/aim/trap.c b/sys/powerpc/aim/trap.c index 91f478c..d419e43 100644 --- a/sys/powerpc/aim/trap.c +++ b/sys/powerpc/aim/trap.c @@ -88,7 +88,9 @@ static int handle_onfault(struct trapframe *frame); static void syscall(struct trapframe *frame); #ifdef __powerpc64__ -static int handle_slb_spill(pmap_t pm, vm_offset_t addr); + void handle_kernel_slb_spill(int, register_t, register_t); +static int handle_user_slb_spill(pmap_t pm, vm_offset_t addr); +extern int n_slbs; #endif int setfault(faultbuf); /* defined in locore.S */ @@ -191,7 +193,7 @@ trap(struct trapframe *frame) #ifdef __powerpc64__ case EXC_ISE: case EXC_DSE: - if (handle_slb_spill(&p->p_vmspace->vm_pmap, + if (handle_user_slb_spill(&p->p_vmspace->vm_pmap, (type == EXC_ISE) ? frame->srr0 : frame->cpu.aim.dar) != 0) sig = SIGSEGV; @@ -259,27 +261,20 @@ trap(struct trapframe *frame) KASSERT(cold || td->td_ucred != NULL, ("kernel trap doesn't have ucred")); switch (type) { - case EXC_DSI: - if (trap_pfault(frame, 0) == 0) - return; - break; #ifdef __powerpc64__ case EXC_DSE: if ((frame->cpu.aim.dar & SEGMENT_MASK) == USER_ADDR) { __asm __volatile ("slbmte %0, %1" :: - "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), - "r"(USER_SLB_SLBE)); + "r"(td->td_pcb->pcb_cpu.aim.usr_vsid), + "r"(USER_SLB_SLBE)); return; } - - /* FALLTHROUGH */ - case EXC_ISE: - if (handle_slb_spill(kernel_pmap, - (type == EXC_ISE) ? frame->srr0 : - frame->cpu.aim.dar) != 0) - panic("Fault handling kernel SLB miss"); - return; + break; #endif + case EXC_DSI: + if (trap_pfault(frame, 0) == 0) + return; + break; case EXC_MCHK: if (handle_onfault(frame)) return; @@ -326,8 +321,7 @@ printtrap(u_int vector, struct trapframe *frame, int isfatal, int user) printf("%s %s trap:\n", isfatal ? "fatal" : "handled", user ? "user" : "kernel"); printf("\n"); - printf(" exception = 0x%x (%s)\n", vector >> 8, - trapname(vector)); + printf(" exception = 0x%x (%s)\n", vector, trapname(vector)); switch (vector) { case EXC_DSE: case EXC_DSI: @@ -486,8 +480,54 @@ syscall(struct trapframe *frame) } #ifdef __powerpc64__ +/* Handle kernel SLB faults -- runs in real mode, all seat belts off */ +void +handle_kernel_slb_spill(int type, register_t dar, register_t srr0) +{ + struct slb *slbcache; + uint64_t slbe, slbv; + uint64_t esid, addr; + int i; + + addr = (type == EXC_ISE) ? srr0 : dar; + slbcache = PCPU_GET(slb); + esid = (uintptr_t)addr >> ADDR_SR_SHFT; + slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID; + + /* See if the hardware flushed this somehow (can happen in LPARs) */ + for (i = 0; i < n_slbs; i++) + if (slbcache[i].slbe == (slbe | (uint64_t)i)) + return; + + /* Not in the map, needs to actually be added */ + slbv = kernel_va_to_slbv(addr); + if (slbcache[USER_SLB_SLOT].slbe == 0) { + for (i = 0; i < n_slbs; i++) { + if (i == USER_SLB_SLOT) + continue; + if (!(slbcache[i].slbe & SLBE_VALID)) + goto fillkernslb; + } + + if (i == n_slbs) + slbcache[USER_SLB_SLOT].slbe = 1; + } + + /* Sacrifice a random SLB entry that is not the user entry */ + i = mftb() % n_slbs; + if (i == USER_SLB_SLOT) + i = (i+1) % n_slbs; + +fillkernslb: + /* Write new entry */ + slbcache[i].slbv = slbv; + slbcache[i].slbe = slbe | (uint64_t)i; + + /* Trap handler will restore from cache on exit */ +} + static int -handle_slb_spill(pmap_t pm, vm_offset_t addr) +handle_user_slb_spill(pmap_t pm, vm_offset_t addr) { struct slb *user_entry; uint64_t esid; @@ -495,12 +535,6 @@ handle_slb_spill(pmap_t pm, vm_offset_t addr) esid = (uintptr_t)addr >> ADDR_SR_SHFT; - if (pm == kernel_pmap) { - slb_insert_kernel((esid << SLBE_ESID_SHIFT) | SLBE_VALID, - kernel_va_to_slbv(addr)); - return (0); - } - PMAP_LOCK(pm); user_entry = user_va_to_slb_entry(pm, addr); diff --git a/sys/powerpc/aim/trap_subr64.S b/sys/powerpc/aim/trap_subr64.S index 6f3a2de..82935e7 100644 --- a/sys/powerpc/aim/trap_subr64.S +++ b/sys/powerpc/aim/trap_subr64.S @@ -112,6 +112,9 @@ restore_kernsrs: * r31 scratch * r1 kernel stack * SRR0/1 as at start of trap + * + * NOTE: SPRG1 is never used while the MMU is on, making it safe to reuse + * in any real-mode fault handler, including those handling double faults. */ #define FRAME_SETUP(savearea) \ /* Have to enable translation to allow access of kernel stack: */ \ @@ -120,11 +123,11 @@ restore_kernsrs: std %r30,(savearea+CPUSAVE_SRR0)(%r31); /* save SRR0 */ \ mfsrr1 %r30; \ std %r30,(savearea+CPUSAVE_SRR1)(%r31); /* save SRR1 */ \ + mfsprg1 %r31; /* get saved SP (clears SPRG1) */ \ mfmsr %r30; \ ori %r30,%r30,(PSL_DR|PSL_IR|PSL_RI)@l; /* relocation on */ \ mtmsr %r30; /* stack can now be accessed */ \ isync; \ - mfsprg1 %r31; /* get saved SP */ \ stdu %r31,-(FRAMELEN+288)(%r1); /* save it in the callframe */ \ std %r0, FRAME_0+48(%r1); /* save r0 in the trapframe */ \ std %r31,FRAME_1+48(%r1); /* save SP " " */ \ @@ -201,7 +204,7 @@ restore_kernsrs: mtctr %r4; \ mtxer %r5; \ mtlr %r6; \ - mtsprg1 %r7; /* save cr */ \ + mtsprg2 %r7; /* save cr */ \ ld %r31,FRAME_31+48(%r1); /* restore r0-31 */ \ ld %r30,FRAME_30+48(%r1); \ ld %r29,FRAME_29+48(%r1); \ @@ -235,16 +238,15 @@ restore_kernsrs: ld %r0, FRAME_0+48(%r1); \ ld %r1, FRAME_1+48(%r1); \ /* Can't touch %r1 from here on */ \ - mtsprg2 %r2; /* save r2 & r3 */ \ - mtsprg3 %r3; \ + mtsprg3 %r3; /* save r3 */ \ /* Disable translation, machine check and recoverability: */ \ - mfmsr %r2; \ - andi. %r2,%r2,~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l; \ - mtmsr %r2; \ + mfmsr %r3; \ + andi. %r3,%r3,~(PSL_DR|PSL_IR|PSL_ME|PSL_RI)@l; \ + mtmsr %r3; \ isync; \ /* Decide whether we return to user mode: */ \ - GET_CPUINFO(%r2); \ - ld %r3,(savearea+CPUSAVE_SRR1)(%r2); \ + GET_CPUINFO(%r3); \ + ld %r3,(savearea+CPUSAVE_SRR1)(%r3); \ mtcr %r3; \ bf 17,1f; /* branch if PSL_PR is false */ \ /* Restore user SRs */ \ @@ -262,15 +264,15 @@ restore_kernsrs: ld %r29,(savearea+CPUSAVE_R29)(%r3); \ ld %r28,(savearea+CPUSAVE_R28)(%r3); \ ld %r27,(savearea+CPUSAVE_R27)(%r3); \ -1: mfsprg1 %r2; /* restore cr */ \ - mtcr %r2; \ - GET_CPUINFO(%r2); \ - ld %r3,(savearea+CPUSAVE_SRR0)(%r2); /* restore srr0 */ \ +1: mfsprg2 %r3; /* restore cr */ \ + mtcr %r3; \ + GET_CPUINFO(%r3); \ + ld %r3,(savearea+CPUSAVE_SRR0)(%r3); /* restore srr0 */ \ mtsrr0 %r3; \ - ld %r3,(savearea+CPUSAVE_SRR1)(%r2); /* restore srr1 */ \ + GET_CPUINFO(%r3); \ + ld %r3,(savearea+CPUSAVE_SRR1)(%r3); /* restore srr1 */ \ mtsrr1 %r3; \ - mfsprg2 %r2; /* restore r2 & r3 */ \ - mfsprg3 %r3 + mfsprg3 %r3 /* restore r3 */ #ifdef SMP /* @@ -330,6 +332,151 @@ CNAME(trapcode): CNAME(trapsize) = .-CNAME(trapcode) /* + * For SLB misses: do special things for the kernel + * + * Note: SPRG1 is always safe to overwrite any time the MMU is on, which is + * the only time this can be called. + */ + .globl CNAME(slbtrap),CNAME(slbtrapsize) +CNAME(slbtrap): + mtsprg1 %r1 /* save SP */ + GET_CPUINFO(%r1) + std %r2,(PC_SLBSAVE+16)(%r1) + mfcr %r2 /* save CR */ + std %r2,(PC_SLBSAVE+104)(%r1) + mfsrr1 %r2 /* test kernel mode */ + mtcr %r2 + bf 17,1f /* branch if PSL_PR is false */ + /* User mode */ + ld %r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */ + mtcr %r2 + ld %r2,(PC_SLBSAVE+16)(%r1) /* Restore R2 */ + mflr %r1 /* Save the old LR in r1 */ + mtsprg2 %r1 /* And then in SPRG2 */ + li %r1, 0x80 /* How to get the vector from LR */ + bla generictrap /* LR & SPRG3 is exception # */ +1: mflr %r2 /* Save the old LR in r2 */ + bla kern_slbtrap +CNAME(slbtrapsize) = .-CNAME(slbtrap) + +kern_slbtrap: + std %r2,(PC_SLBSAVE+136)(%r1) /* old LR */ + std %r3,(PC_SLBSAVE+24)(%r1) /* save R3 */ + + /* Check if this needs to be handled as a regular trap (userseg miss) */ + mflr %r2 + andi. %r2,%r2,0xff80 + cmpwi %r2,0x380 + bne 1f + mfdar %r2 + b 2f +1: mfsrr0 %r2 +2: /* r2 now contains the fault address */ + lis %r3,SEGMENT_MASK@highesta + ori %r3,%r3,SEGMENT_MASK@highera + sldi %r3,%r3,32 + oris %r3,%r3,SEGMENT_MASK@ha + ori %r3,%r3,SEGMENT_MASK@l + and %r2,%r2,%r3 /* R2 = segment base address */ + lis %r3,USER_ADDR@highesta + ori %r3,%r3,USER_ADDR@highera + sldi %r3,%r3,32 + oris %r3,%r3,USER_ADDR@ha + ori %r3,%r3,USER_ADDR@l + cmpd %r2,%r3 /* Compare fault base to USER_ADDR */ + bne 3f + + /* User seg miss, handle as a regular trap */ + ld %r2,(PC_SLBSAVE+104)(%r1) /* Restore CR */ + mtcr %r2 + ld %r2,(PC_SLBSAVE+16)(%r1) /* Restore R2,R3 */ + ld %r3,(PC_SLBSAVE+24)(%r1) + ld %r1,(PC_SLBSAVE+136)(%r1) /* Save the old LR in r1 */ + mtsprg2 %r1 /* And then in SPRG2 */ + li %r1, 0x80 /* How to get the vector from LR */ + b generictrap /* Retain old LR using b */ + +3: /* Real kernel SLB miss */ + std %r0,(PC_SLBSAVE+0)(%r1) /* free all volatile regs */ + mfsprg1 %r2 /* Old R1 */ + std %r2,(PC_SLBSAVE+8)(%r1) + /* R2,R3 already saved */ + std %r4,(PC_SLBSAVE+32)(%r1) + std %r5,(PC_SLBSAVE+40)(%r1) + std %r6,(PC_SLBSAVE+48)(%r1) + std %r7,(PC_SLBSAVE+56)(%r1) + std %r8,(PC_SLBSAVE+64)(%r1) + std %r9,(PC_SLBSAVE+72)(%r1) + std %r10,(PC_SLBSAVE+80)(%r1) + std %r11,(PC_SLBSAVE+88)(%r1) + std %r12,(PC_SLBSAVE+96)(%r1) + /* CR already saved */ + mfxer %r2 /* save XER */ + std %r2,(PC_SLBSAVE+112)(%r1) + mflr %r2 /* save LR (SP already saved) */ + std %r2,(PC_SLBSAVE+120)(%r1) + mfctr %r2 /* save CTR */ + std %r2,(PC_SLBSAVE+128)(%r1) + + /* Call handler */ + addi %r1,%r1,PC_SLBSTACK-48+1024 + li %r2,~15 + and %r1,%r1,%r2 + lis %r3,tocbase@ha + ld %r2,tocbase@l(%r3) + mflr %r3 + andi. %r3,%r3,0xff80 + mfdar %r4 + mfsrr0 %r5 + bl handle_kernel_slb_spill + nop + + /* Save r28-31, restore r4-r12 */ + GET_CPUINFO(%r1) + ld %r4,(PC_SLBSAVE+32)(%r1) + ld %r5,(PC_SLBSAVE+40)(%r1) + ld %r6,(PC_SLBSAVE+48)(%r1) + ld %r7,(PC_SLBSAVE+56)(%r1) + ld %r8,(PC_SLBSAVE+64)(%r1) + ld %r9,(PC_SLBSAVE+72)(%r1) + ld %r10,(PC_SLBSAVE+80)(%r1) + ld %r11,(PC_SLBSAVE+88)(%r1) + ld %r12,(PC_SLBSAVE+96)(%r1) + std %r28,(PC_SLBSAVE+64)(%r1) + std %r29,(PC_SLBSAVE+72)(%r1) + std %r30,(PC_SLBSAVE+80)(%r1) + std %r31,(PC_SLBSAVE+88)(%r1) + + /* Restore kernel mapping */ + bl restore_kernsrs + + /* Restore remaining registers */ + ld %r28,(PC_SLBSAVE+64)(%r1) + ld %r29,(PC_SLBSAVE+72)(%r1) + ld %r30,(PC_SLBSAVE+80)(%r1) + ld %r31,(PC_SLBSAVE+88)(%r1) + + ld %r2,(PC_SLBSAVE+104)(%r1) + mtcr %r2 + ld %r2,(PC_SLBSAVE+112)(%r1) + mtxer %r2 + ld %r2,(PC_SLBSAVE+120)(%r1) + mtlr %r2 + ld %r2,(PC_SLBSAVE+128)(%r1) + mtctr %r2 + ld %r2,(PC_SLBSAVE+136)(%r1) + mtlr %r2 + + /* Restore r0-r3 */ + ld %r0,(PC_SLBSAVE+0)(%r1) + ld %r2,(PC_SLBSAVE+16)(%r1) + ld %r3,(PC_SLBSAVE+24)(%r1) + mfsprg1 %r1 + + /* Back to whatever we were doing */ + rfid + +/* * For ALI: has to save DSISR and DAR */ .globl CNAME(alitrap),CNAME(alisize) diff --git a/sys/powerpc/include/pcpu.h b/sys/powerpc/include/pcpu.h index 2dac1b4..62094f8 100644 --- a/sys/powerpc/include/pcpu.h +++ b/sys/powerpc/include/pcpu.h @@ -55,7 +55,9 @@ struct pmap; #define PCPU_MD_AIM64_FIELDS \ struct slb pc_slb[64]; \ - struct slb **pc_userslb; + struct slb **pc_userslb; \ + register_t pc_slbsave[18]; \ + uint8_t pc_slbstack[1024]; #ifdef __powerpc64__ #define PCPU_MD_AIM_FIELDS PCPU_MD_AIM64_FIELDS diff --git a/sys/powerpc/powerpc/genassym.c b/sys/powerpc/powerpc/genassym.c index 9f59498..e228b10 100644 --- a/sys/powerpc/powerpc/genassym.c +++ b/sys/powerpc/powerpc/genassym.c @@ -107,8 +107,11 @@ ASSYM(USER_ADDR, USER_ADDR); #ifdef __powerpc64__ ASSYM(PC_KERNSLB, offsetof(struct pcpu, pc_slb)); ASSYM(PC_USERSLB, offsetof(struct pcpu, pc_userslb)); +ASSYM(PC_SLBSAVE, offsetof(struct pcpu, pc_slbsave)); +ASSYM(PC_SLBSTACK, offsetof(struct pcpu, pc_slbstack)); ASSYM(USER_SLB_SLOT, USER_SLB_SLOT); ASSYM(USER_SLB_SLBE, USER_SLB_SLBE); +ASSYM(SEGMENT_MASK, SEGMENT_MASK); #else ASSYM(PM_SR, offsetof(struct pmap, pm_sr)); ASSYM(USER_SR, USER_SR); |