diff options
author | raj <raj@FreeBSD.org> | 2008-03-03 17:17:00 +0000 |
---|---|---|
committer | raj <raj@FreeBSD.org> | 2008-03-03 17:17:00 +0000 |
commit | 0757a4afb5d18c5b874cc918eb56d7264456bd20 (patch) | |
tree | b0d8321058cccbf59aa2e7cd69b9283a0663316e /sys/powerpc/booke | |
parent | 05437e53d55e216714c2e1f4a0aa97d4598090b8 (diff) | |
download | FreeBSD-src-0757a4afb5d18c5b874cc918eb56d7264456bd20.zip FreeBSD-src-0757a4afb5d18c5b874cc918eb56d7264456bd20.tar.gz |
Initial support for Freescale PowerQUICC III MPC85xx system-on-chip family.
The PQ3 is a high performance integrated communications processing system
based on the e500 core, which is an embedded RISC processor that implements
the 32-bit Book E definition of the PowerPC architecture. For details refer
to: http://www.freescale.com/webapp/sps/site/prod_summary.jsp?code=MPC8555E
This port was tested and successfully run on the following members of the PQ3
family: MPC8533, MPC8541, MPC8548, MPC8555.
The following major integrated peripherals are supported:
* On-chip peripherals bus
* OpenPIC interrupt controller
* UART
* Ethernet (TSEC)
* Host/PCI bridge
* QUICC engine (SCC functionality)
This commit brings the main functionality and will be followed by individual
drivers that are logically separate from this base.
Approved by: cognet (mentor)
Obtained from: Juniper, Semihalf
MFp4: e500
Diffstat (limited to 'sys/powerpc/booke')
-rw-r--r-- | sys/powerpc/booke/clock.c | 276 | ||||
-rw-r--r-- | sys/powerpc/booke/copyinout.c | 304 | ||||
-rw-r--r-- | sys/powerpc/booke/interrupt.c | 139 | ||||
-rw-r--r-- | sys/powerpc/booke/locore.S | 497 | ||||
-rw-r--r-- | sys/powerpc/booke/machdep.c | 963 | ||||
-rw-r--r-- | sys/powerpc/booke/pmap.c | 3107 | ||||
-rw-r--r-- | sys/powerpc/booke/support.S | 106 | ||||
-rw-r--r-- | sys/powerpc/booke/swtch.S | 145 | ||||
-rw-r--r-- | sys/powerpc/booke/trap.c | 679 | ||||
-rw-r--r-- | sys/powerpc/booke/trap_subr.S | 842 | ||||
-rw-r--r-- | sys/powerpc/booke/uio_machdep.c | 135 | ||||
-rw-r--r-- | sys/powerpc/booke/vm_machdep.c | 517 |
12 files changed, 7710 insertions, 0 deletions
diff --git a/sys/powerpc/booke/clock.c b/sys/powerpc/booke/clock.c new file mode 100644 index 0000000..a54c85a --- /dev/null +++ b/sys/powerpc/booke/clock.c @@ -0,0 +1,276 @@ +/*- + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $NetBSD: clock.c,v 1.9 2000/01/19 02:52:19 msaitoh Exp $ + */ +/* + * Copyright (C) 2001 Benno Rice. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/bus.h> +#include <sys/clock.h> +#include <sys/timetc.h> +#include <sys/interrupt.h> + +#include <machine/clock.h> +#include <machine/psl.h> +#include <machine/spr.h> +#include <machine/cpu.h> +#include <machine/intr.h> +#include <machine/md_var.h> + +/* + * Initially we assume a processor with a bus frequency of 12.5 MHz. + */ +u_int tickspending; +u_long ns_per_tick = 80; +static u_long ticks_per_sec = 12500000; +static long ticks_per_intr; + +#define DIFF19041970 2082844800 + +static int clockinitted = 0; +static timecounter_get_t decr_get_timecount; + +static struct timecounter decr_timecounter = { + decr_get_timecount, /* get_timecount */ + 0, /* no poll_pps */ + ~0u, /* counter_mask */ + 0, /* frequency */ + "decrementer" /* name */ +}; + +void +inittodr(time_t base) +{ + time_t deltat; + u_int rtc_time; + struct timespec ts; + + /* + * If we have an RTC device use it, otherwise use the fs time. + */ + { + ts.tv_sec = base; + ts.tv_nsec = 0; + tc_setclock(&ts); + return; + } + clockinitted = 1; + ts.tv_sec = rtc_time - DIFF19041970; + + deltat = ts.tv_sec - base; + if (deltat < 0) { + deltat = -deltat; + } + if (deltat < 2 * SECDAY) { + tc_setclock(&ts); + return; + } + + printf("WARNING: clock %s %d days", + ts.tv_sec < base ? "lost" : "gained", (int)(deltat / SECDAY)); + + printf(" -- CHECK AND RESET THE DATE!\n"); +} + +/* + * Similar to the above + */ +void +resettodr() +{ + +} + +void +decr_intr(struct trapframe *frame) +{ + u_long msr; + + /* + * Check whether we are initialized. + */ + if (!ticks_per_intr) + return; + + /* + * Interrupt handler must reset DIS to avoid getting another + * interrupt once EE is enabled. + */ + mtspr(SPR_TSR, TSR_DIS); + + /* + * Reenable interrupts + */ + msr = mfmsr(); + mtmsr(msr | PSL_EE); + + hardclock(TRAPF_USERMODE(frame), TRAPF_PC(frame)); +} + +void +cpu_initclocks(void) +{ + + return; +} + +void +decr_config (unsigned long freq) +{ + ticks_per_sec = freq; + decr_timecounter.tc_frequency = freq; +} + +void +decr_init (void) +{ + unsigned int msr; + + /* + * Should check for correct CPU here? XXX + */ + msr = mfmsr(); + mtmsr(msr & ~(PSL_EE)); + + tc_init(&decr_timecounter); + + ns_per_tick = 1000000000 / ticks_per_sec; + ticks_per_intr = ticks_per_sec / hz; + + mtdec(ticks_per_intr); + + mtspr(SPR_DECAR, ticks_per_intr); + mtspr(SPR_TCR, mfspr(SPR_TCR) | TCR_DIE | TCR_ARE); + + mtmsr(msr); +} + +static __inline u_quad_t +mftb (void) +{ + u_long scratch; + u_quad_t tb; + + __asm__ __volatile__( + "1: mftbu %0;" + " mftb %0+1;" + " mftbu %1;" + " cmpw 0,%0,%1;" + " bne 1b" + : "=r"(tb), "=r"(scratch)); + return tb; +} + +static unsigned +decr_get_timecount(struct timecounter *tc) +{ + quad_t tb; + + tb = mftb(); + return tb; +} + +/* + * Wait for about n microseconds (at least!). + */ +void +DELAY(int n) +{ + u_quad_t start, end, now; + +#define USECS_IN_SEC 1000000ULL + + if (n > USECS_IN_SEC) { + printf("WARNING: %s(%d) called from %p", __func__, n, + __builtin_return_address(0)); + } + + start = mftb(); + end = start + (u_quad_t)ticks_per_sec / ( USECS_IN_SEC / n); + do { + now = mftb(); + } while (now < end || (now > start && end < start)); +} + +/* + * Nothing to do. + */ +void +cpu_startprofclock(void) +{ + + /* Do nothing */ +} + +void +cpu_stopprofclock(void) +{ + +} + +/* + * XXX Needed by syscons + */ +int +sysbeep(int pitch, int period) +{ + + return (0); +} diff --git a/sys/powerpc/booke/copyinout.c b/sys/powerpc/booke/copyinout.c new file mode 100644 index 0000000..fd3bdf3 --- /dev/null +++ b/sys/powerpc/booke/copyinout.c @@ -0,0 +1,304 @@ +/*- + * Copyright (C) 2002 Benno Rice + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +/*- + * Copyright (C) 1993 Wolfgang Solfrank. + * Copyright (C) 1993 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> + +#include <machine/pcb.h> +#include <machine/vmparam.h> + + +int setfault(faultbuf); /* defined in locore.S */ + +static int +is_uaddr(const void *addr) +{ + int rv = ((vm_offset_t)addr <= VM_MAXUSER_ADDRESS) ? 1 : 0; + + return rv; +} + +int +copyout(const void *kaddr, void *udaddr, size_t len) +{ + struct thread *td; + faultbuf env; + + if (!is_uaddr(udaddr)) + return (EFAULT); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = NULL; + return (EFAULT); + } + + bcopy(kaddr, udaddr, len); + + td->td_pcb->pcb_onfault = NULL; + return (0); +} + +int +copyin(const void *udaddr, void *kaddr, size_t len) +{ + struct thread *td; + faultbuf env; + + if (!is_uaddr(udaddr) || is_uaddr(kaddr)) + return (EFAULT); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = NULL; + return (EFAULT); + } + + bcopy(udaddr, kaddr, len); + + td->td_pcb->pcb_onfault = NULL; + return (0); +} + +int +copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done) +{ + struct thread *td; + faultbuf env; + const char *up; + char *kp; + size_t l; + int rv, c; + + if (!is_uaddr(udaddr) || is_uaddr(kaddr)) + return (EFAULT); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = NULL; + return (EFAULT); + } + + kp = kaddr; + up = udaddr; + + rv = ENAMETOOLONG; + + for (l = 0; len-- > 0; l++) { + + c = *up++; + + if (!(*kp++ = c)) { + l++; + rv = 0; + break; + } + } + + if (done != NULL) { + *done = l; + } + + td->td_pcb->pcb_onfault = NULL; + return (rv); +} + +int +subyte(void *addr, int byte) +{ + struct thread *td; + faultbuf env; + + if (!is_uaddr(addr)) + return (EFAULT); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = NULL; + return (EFAULT); + } + + *(char *)addr = (char)byte; + + td->td_pcb->pcb_onfault = NULL; + return (0); +} + +int +suword(void *addr, long word) +{ + struct thread *td; + faultbuf env; + + if (!is_uaddr(addr)) + return (EFAULT); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = NULL; + return (EFAULT); + } + + *(long *)addr = word; + + td->td_pcb->pcb_onfault = NULL; + return (0); +} + +int +suword32(void *addr, int32_t word) +{ + + return (suword(addr, (long)word)); +} + + +int +fubyte(const void *addr) +{ + struct thread *td; + faultbuf env; + int val; + + if (!is_uaddr(addr)) + return (EFAULT); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = NULL; + return (EFAULT); + } + + val = *(const u_char *)addr; + + td->td_pcb->pcb_onfault = NULL; + return (val); +} + +long +fuword(const void *addr) +{ + struct thread *td; + faultbuf env; + long val; + + if (!is_uaddr(addr)) + return (EFAULT); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = NULL; + return (EFAULT); + } + + val = *(const long *)addr; + + td->td_pcb->pcb_onfault = NULL; + return (val); +} + +int32_t +fuword32(const void *addr) +{ + + return ((int32_t)fuword(addr)); +} + +uint32_t +casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval) +{ + + return (casuword((volatile u_long *)base, oldval, newval)); +} + +u_long +casuword(volatile u_long *addr, u_long old, u_long new) +{ + struct thread *td; + faultbuf env; + u_long val; + + if (!((vm_offset_t)addr <= VM_MAXUSER_ADDRESS)) + return (EFAULT); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = NULL; + return (EFAULT); + } + + val = *addr; + (void) atomic_cmpset_32((volatile uint32_t *)addr, old, new); + + td->td_pcb->pcb_onfault = NULL; + + return (val); +} diff --git a/sys/powerpc/booke/interrupt.c b/sys/powerpc/booke/interrupt.c new file mode 100644 index 0000000..1d20772 --- /dev/null +++ b/sys/powerpc/booke/interrupt.c @@ -0,0 +1,139 @@ +/*- + * Copyright (C) 2006 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * Copyright 2002 by Peter Grehan. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * Interrupts are dispatched to here from locore asm + */ + +#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/interrupt.h> +#include <sys/kernel.h> +#include <sys/kthread.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/smp.h> +#include <sys/unistd.h> +#include <sys/vmmeter.h> + +#include <machine/cpu.h> +#include <machine/db_machdep.h> +#include <machine/frame.h> +#include <machine/intr_machdep.h> +#include <machine/md_var.h> +#include <machine/pcb.h> +#include <machine/psl.h> +#include <machine/trap.h> + +#include "pic_if.h" + +extern void decr_intr(struct trapframe *); + +void powerpc_decr_interrupt(struct trapframe *); +void powerpc_extr_interrupt(struct trapframe *); +void powerpc_crit_interrupt(struct trapframe *); +void powerpc_mchk_interrupt(struct trapframe *); + +static void dump_frame(struct trapframe *framep); + +static void +dump_frame(struct trapframe *frame) +{ + int i; + + printf("\n*** *** STACK FRAME DUMP *** ***\n"); + printf(" exc = 0x%x\n", frame->exc); + printf(" srr0 = 0x%08x\n", frame->srr0); + printf(" srr1 = 0x%08x\n", frame->srr1); + printf(" dear = 0x%08x\n", frame->cpu.booke.dear); + printf(" esr = 0x%08x\n", frame->cpu.booke.esr); + printf(" lr = 0x%08x\n", frame->lr); + printf(" cr = 0x%08x\n", frame->cr); + printf(" sp = 0x%08x\n", frame->fixreg[1]); + + for (i = 0; i < 32; i++) { + printf(" R%02d = 0x%08x", i, frame->fixreg[i]); + if ((i & 0x3) == 3) + printf("\n"); + } + printf("\n"); +} + + +void powerpc_crit_interrupt(struct trapframe *framep) +{ + + printf("powerpc_crit_interrupt: critical interrupt!\n"); + dump_frame(framep); + trap(framep); +} + +void powerpc_mchk_interrupt(struct trapframe *framep) +{ + + printf("powerpc_mchk_interrupt: machine check interrupt!\n"); + dump_frame(framep); + trap(framep); +} + +/* + * Decrementer interrupt routine + */ +void +powerpc_decr_interrupt(struct trapframe *framep) +{ + struct thread *td; + + td = PCPU_GET(curthread); + atomic_add_int(&td->td_intr_nesting_level, 1); + decr_intr(framep); + atomic_subtract_int(&td->td_intr_nesting_level, 1); +} + +/* + * External input interrupt routine + */ +void +powerpc_extr_interrupt(struct trapframe *framep) +{ + struct thread *td; + + td = PCPU_GET(curthread); + atomic_add_int(&td->td_intr_nesting_level, 1); + PIC_DISPATCH(pic, framep); + atomic_subtract_int(&td->td_intr_nesting_level, 1); +} diff --git a/sys/powerpc/booke/locore.S b/sys/powerpc/booke/locore.S new file mode 100644 index 0000000..5cf3f6f --- /dev/null +++ b/sys/powerpc/booke/locore.S @@ -0,0 +1,497 @@ +/*- + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "assym.s" + +#include <machine/param.h> +#include <machine/asm.h> +#include <machine/spr.h> +#include <machine/psl.h> +#include <machine/pte.h> +#include <machine/trap.h> +#include <machine/vmparam.h> +#include <machine/tlb.h> +#include <machine/bootinfo.h> + +/* + * This symbol is here for the benefit of kvm_mkdb, and is supposed to + * mark the start of kernel text. + */ + .text + .globl kernel_text +kernel_text: + +/* + * Startup entry. Note, this must be the first thing in the text segment! + */ + .text + .globl __start +__start: + +/* + * Assumption on a boot loader: + * - system memory starts from physical address 0 + * - kernel is loaded at 16MB boundary + * - it's mapped by a single TBL1 entry + * - TLB1 mapping is 1:1 pa to va + * - all PID registers are set to the same value + * + * Loader register use: + * r1 : stack pointer + * r3 : metadata pointer + * + * We rearrange the TLB1 layout as follows: + * - find AS and entry kernel started in + * - make sure it's protected, ivalidate other entries + * - create temp entry in the second AS (make sure it's not TLB[15]) + * - switch to temp mapping + * - map 16MB of RAM in TLB1[15] + * - use AS=1, set EPN to KERNBASE and RPN to kernel load address + * - switch to to TLB1[15] mapping + * - invalidate temp mapping + * + * locore register use: + * r1 : stack pointer + * r2 : unused + * r3 : kernel_text + * r4 : _end + * r5 : metadata pointer + * r6-r9 : unused + * r10 : entry we started in + * r11 : temp entry + * r12 : AS we started in + * r13-r31 : auxiliary registers + */ + +/* + * Move metadata ptr to r5 + */ + mr %r5, %r3 + +/* + * Initial cleanup + */ + li %r16, 0x200 /* Keep debug exceptions for CodeWarrior. */ + mtmsr %r16 + isync +#if 0 + mtspr SPR_HID0, %r16 + isync + msync + mtspr SPR_HID1, %r16 + isync +#endif + + /* Issue INV_ALL Invalidate on TLB0 */ + li %r16, 0x04 + tlbivax 0, %r16 + isync + msync + +/* + * Use tblsx to locate the TLB1 entry that maps kernel code + */ + bl 1f /* Current address */ +1: mflr %r15 + + /* Find entry that maps current address */ + mfspr %r17, SPR_PID0 + slwi %r17, %r17, MAS6_SPID0_SHIFT + mtspr SPR_MAS6, %r17 + isync + tlbsx 0, %r15 + + /* Copy entry number to r10 */ + mfspr %r17, SPR_MAS0 + rlwinm %r10, %r17, 16, 28, 31 + + /* Invalidate TLB1, skipping our entry. */ + mfspr %r17, SPR_TLB1CFG /* Get number of entries */ + andi. %r17, %r17, TLBCFG_NENTRY_MASK@l + li %r16, 0 /* Start from Entry 0 */ + +2: lis %r15, MAS0_TLBSEL1@h /* Select TLB1 */ + rlwimi %r15, %r16, 16, 12, 15 + mtspr SPR_MAS0, %r15 + isync + tlbre + mfspr %r15, SPR_MAS1 + cmpw %r16, %r10 + beq 3f + /* Clear VALID and IPROT bits for other entries */ + rlwinm %r15, %r15, 0, 2, 31 + mtspr SPR_MAS1, %r15 + isync + tlbwe + isync + msync +3: addi %r16, %r16, 1 + cmpw %r16, %r17 /* Check if this is the last entry */ + bne 2b + +/* + * Create temporary mapping in the other Address Space + */ + lis %r17, MAS0_TLBSEL1@h /* Select TLB1 */ + rlwimi %r17, %r10, 16, 12, 15 /* Select our entry */ + mtspr SPR_MAS0, %r17 + isync + tlbre /* Read it in */ + + /* Prepare and write temp entry */ + lis %r17, MAS0_TLBSEL1@h /* Select TLB1 */ + addi %r11, %r10, 0x1 /* Use next entry. */ + rlwimi %r17, %r11, 16, 12, 15 /* Select temp entry */ + mtspr SPR_MAS0, %r17 + isync + + mfspr %r16, SPR_MAS1 + li %r15, 1 /* AS 1 */ + rlwimi %r16, %r15, 12, 19, 19 + mtspr SPR_MAS1, %r16 + li %r17, 0 + rlwimi %r16, %r17, 0, 8, 15 /* Global mapping, TID=0 */ + isync + + tlbwe + isync + msync + + mfmsr %r16 + ori %r16, %r16, 0x30 /* Switch to AS 1. */ + + bl 4f /* Find current execution address */ +4: mflr %r15 + addi %r15, %r15, 20 /* Increment to instruction after rfi */ + mtspr SPR_SRR0, %r15 + mtspr SPR_SRR1, %r16 + rfi /* Switch context */ + +/* + * Invalidate initial entry + */ + mr %r22, %r10 + bl tlb1_inval_entry + +/* + * Setup final mapping in TLB1[1] and switch to it + */ + /* Final kernel mapping, map in 16 MB of RAM */ + lis %r16, MAS0_TLBSEL1@h /* Select TLB1 */ + li %r17, 1 /* Entry 1 */ + rlwimi %r16, %r17, 16, 12, 15 + mtspr SPR_MAS0, %r16 + isync + + li %r16, (TLB_SIZE_16M << MAS1_TSIZE_SHIFT)@l + oris %r16, %r16, (MAS1_VALID | MAS1_IPROT)@h + mtspr SPR_MAS1, %r16 + isync + + lis %r19, KERNBASE@h + ori %r19, %r19, KERNBASE@l + mtspr SPR_MAS2, %r19 /* Set final EPN, clear WIMG */ + isync + + bl 5f +5: mflr %r16 /* Use current address */ + lis %r18, 0xff00 /* 16MB alignment mask */ + and %r16, %r16, %r18 + mr %r25, %r16 /* Copy kernel load address */ + ori %r16, %r16, (MAS3_SX | MAS3_SW | MAS3_SR)@l + mtspr SPR_MAS3, %r16 /* Set RPN and protection */ + isync + tlbwe + isync + msync + + /* Switch to the above TLB1[1] mapping */ + lis %r18, 0x00ff /* 16MB offset mask */ + ori %r18, %r18, 0xffff + bl 6f +6: mflr %r20 /* Use current address */ + and %r20, %r20, %r18 /* Offset from kernel load address */ + add %r20, %r20, %r19 /* Move to kernel virtual address */ + addi %r20, %r20, 32 /* Increment to instr. after rfi */ + li %r21, 0x200 + mtspr SPR_SRR0, %r20 + mtspr SPR_SRR1, %r21 + rfi + + /* Save kernel load address for later use */ + lis %r24, kernload@ha + addi %r24, %r24, kernload@l + stw %r25, 0(%r24) + +/* + * Invalidate temp mapping + */ + mr %r22, %r11 + bl tlb1_inval_entry + +/* + * Setup a temporary stack + */ + lis %r1, kstack0_space@ha + addi %r1, %r1, kstack0_space@l + addi %r1, %r1, (16384 - 512) + +/* + * Intialise exception vector offsets + */ + bl ivor_setup + +/* + * Jump to system initialization code + * + * Setup first two arguments for e500_init, metadata (r5) is already in place. + */ + lis %r3, kernel_text@ha + addi %r3, %r3, kernel_text@l + lis %r4, _end@ha + addi %r4, %r4, _end@l + + bl e500_init /* Prepare e500 core */ + bl mi_startup /* Machine independet part, does not return */ + +/************************************************************************/ +/* locore subroutines */ +/************************************************************************/ + +tlb1_inval_entry: + lis %r17, MAS0_TLBSEL1@h /* Select TLB1 */ + rlwimi %r17, %r22, 16, 12, 15 /* Select our entry */ + mtspr SPR_MAS0, %r17 + isync + tlbre /* Read it in */ + + li %r16, 0 + mtspr SPR_MAS1, %r16 + isync + tlbwe + isync + msync + blr + +ivor_setup: + /* Set base address of interrupt handler routines */ + lis %r21, interrupt_vector_base@h + mtspr SPR_IVPR, %r21 + + /* Assign interrupt handler routines offsets */ + li %r21, int_critical_input@l + mtspr SPR_IVOR0, %r21 + li %r21, int_machine_check@l + mtspr SPR_IVOR1, %r21 + li %r21, int_data_storage@l + mtspr SPR_IVOR2, %r21 + li %r21, int_instr_storage@l + mtspr SPR_IVOR3, %r21 + li %r21, int_external_input@l + mtspr SPR_IVOR4, %r21 + li %r21, int_alignment@l + mtspr SPR_IVOR5, %r21 + li %r21, int_program@l + mtspr SPR_IVOR6, %r21 + li %r21, int_syscall@l + mtspr SPR_IVOR8, %r21 + li %r21, int_decrementer@l + mtspr SPR_IVOR10, %r21 + li %r21, int_fixed_interval_timer@l + mtspr SPR_IVOR11, %r21 + li %r21, int_watchdog@l + mtspr SPR_IVOR12, %r21 + li %r21, int_data_tlb_error@l + mtspr SPR_IVOR13, %r21 + li %r21, int_inst_tlb_error@l + mtspr SPR_IVOR14, %r21 + li %r21, int_debug@l + mtspr SPR_IVOR15, %r21 + blr + +/* + * void tlb1_inval_va(vm_offset_t va) + * + * r3 - va to invalidate + */ +ENTRY(tlb1_inval_va) + /* EA mask */ + lis %r6, 0xffff + ori %r6, %r6, 0xf000 + and %r3, %r3, %r6 + + /* Select TLB1 */ + ori %r3, %r3, 0x08 + + isync + tlbivax 0, %r3 + isync + msync + blr + +/* + * void tlb0_inval_va(vm_offset_t va) + * + * r3 - va to invalidate + */ +ENTRY(tlb0_inval_va) + /* EA mask, this also clears TLBSEL, selecting TLB0 */ + lis %r6, 0xffff + ori %r6, %r6, 0xf000 + and %r3, %r3, %r6 + + isync + tlbivax 0, %r3 + isync + msync + blr + +/* + * Cache disable/enable/inval sequences according + * to section 2.16 of E500CORE RM. + */ +ENTRY(dcache_inval) + /* Invalidate d-cache */ + mfspr %r3, SPR_L1CSR0 + ori %r3, %r3, (L1CSR0_DCFI | L1CSR0_DCLFR)@l + msync + isync + mtspr SPR_L1CSR0, %r3 + isync + blr + +ENTRY(dcache_disable) + /* Disable d-cache */ + mfspr %r3, SPR_L1CSR0 + li %r4, L1CSR0_DCE@l + not %r4, %r4 + and %r3, %r3, %r4 + msync + isync + mtspr SPR_L1CSR0, %r3 + isync + blr + +ENTRY(dcache_enable) + /* Enable d-cache */ + mfspr %r3, SPR_L1CSR0 + oris %r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@h + ori %r3, %r3, (L1CSR0_DCPE | L1CSR0_DCE)@l + msync + isync + mtspr SPR_L1CSR0, %r3 + isync + blr + +ENTRY(icache_inval) + /* Invalidate i-cache */ + mfspr %r3, SPR_L1CSR1 + ori %r3, %r3, (L1CSR1_ICFI | L1CSR1_ICLFR)@l + isync + mtspr SPR_L1CSR1, %r3 + isync + blr + +ENTRY(icache_disable) + /* Disable i-cache */ + mfspr %r3, SPR_L1CSR1 + li %r4, L1CSR1_ICE@l + not %r4, %r4 + and %r3, %r3, %r4 + isync + mtspr SPR_L1CSR1, %r3 + isync + blr + +ENTRY(icache_enable) + /* Enable i-cache */ + mfspr %r3, SPR_L1CSR1 + oris %r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@h + ori %r3, %r3, (L1CSR1_ICPE | L1CSR1_ICE)@l + isync + mtspr SPR_L1CSR1, %r3 + isync + blr + +/* + * int setfault() + * + * Similar to setjmp to setup for handling faults on accesses to user memory. + * Any routine using this may only call bcopy, either the form below, + * or the (currently used) C code optimized, so it doesn't use any non-volatile + * registers. + */ + .globl setfault +setfault: + mflr %r0 + mfsprg0 %r4 + lwz %r4, PC_CURTHREAD(%r4) + lwz %r4, TD_PCB(%r4) + stw %r3, PCB_ONFAULT(%r4) + mfcr %r10 + mfctr %r11 + mfxer %r12 + stw %r0, 0(%r3) + stw %r1, 4(%r3) + stw %r2, 8(%r3) + stmw %r10, 12(%r3) /* store CR, CTR, XER, [r13 .. r31] */ + li %r3, 0 /* return FALSE */ + blr + +/************************************************************************/ +/* Data section */ +/************************************************************************/ + .data + .align 4 +GLOBAL(kstack0_space) + .space 16384 + +/* + * Compiled KERNBASE locations + */ + .globl kernbase + .set kernbase, KERNBASE + +/* + * Globals + */ +#define INTSTK 16384 /* 16K interrupt stack */ +#define INTRCNT_COUNT 256 /* max(HROWPIC_IRQMAX,OPENPIC_IRQMAX) */ + +GLOBAL(kernload) + .long +GLOBAL(intrnames) + .space INTRCNT_COUNT * (MAXCOMLEN + 1) * 2 +GLOBAL(eintrnames) + .align 4 +GLOBAL(intrcnt) + .space INTRCNT_COUNT * 4 * 2 +GLOBAL(eintrcnt) + +#include <powerpc/booke/trap_subr.S> diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c new file mode 100644 index 0000000..a7a11d9 --- /dev/null +++ b/sys/powerpc/booke/machdep.c @@ -0,0 +1,963 @@ +/*- + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/*- + * Copyright (C) 2001 Benno Rice + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ + */ +/*- + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_compat.h" +#include "opt_kstack_pages.h" + +#include <sys/cdefs.h> +#include <sys/types.h> + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/time.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/bus.h> +#include <sys/cons.h> +#include <sys/cpu.h> +#include <sys/kdb.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/exec.h> +#include <sys/ktr.h> +#include <sys/sysproto.h> +#include <sys/signalvar.h> +#include <sys/sysent.h> +#include <sys/imgact.h> +#include <sys/msgbuf.h> +#include <sys/ptrace.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_page.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> + +#include <machine/cpu.h> +#include <machine/kdb.h> +#include <machine/reg.h> +#include <machine/vmparam.h> +#include <machine/spr.h> +#include <machine/hid.h> +#include <machine/psl.h> +#include <machine/trap.h> +#include <machine/md_var.h> +#include <machine/mmuvar.h> +#include <machine/pmap.h> +#include <machine/sigframe.h> +#include <machine/metadata.h> +#include <machine/bootinfo.h> +#include <machine/powerpc.h> + +#include <sys/linker.h> +#include <sys/reboot.h> + +#ifdef DEBUG +#define debugf(fmt, args...) printf(fmt, ##args) +#else +#define debugf(fmt, args...) +#endif + +extern unsigned char kernel_text[]; +extern unsigned char _etext[]; +extern unsigned char _edata[]; +extern unsigned char __bss_start[]; +extern unsigned char __sbss_start[]; +extern unsigned char __sbss_end[]; +extern unsigned char _end[]; + +extern struct mem_region availmem_regions[]; +extern int availmem_regions_sz; + +extern void *trapcode, *trapsize; + +extern unsigned char kstack0_space[]; + +struct kva_md_info kmi; +struct pcpu __pcpu[MAXCPU]; +struct trapframe frame0; +int cold = 1; +long realmem = 0; +long Maxmem = 0; + +struct bootinfo *bootinfo; + +char machine[] = "powerpc"; +SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); + +static char model[128]; +SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, model, 0, ""); + +static int cacheline_size = CACHELINESIZE; +SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size, + CTLFLAG_RD, &cacheline_size, 0, ""); + +static void cpu_e500_startup(void *); +SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_e500_startup, NULL) + +void print_kernel_section_addr(void); +void dump_bootinfo(void); +void dump_kenv(void); +void e500_init(u_int32_t, u_int32_t, void *); +void setPQL2(int *const size, int *const ways); + +void +setPQL2(int *const size, int *const ways) +{ + + return; +} + +static void +cpu_e500_startup(void *dummy) +{ + + /* Initialise the decrementer-based clock. */ + decr_init(); + + /* Good {morning,afternoon,evening,night}. */ + cpu_setup(PCPU_GET(cpuid)); + + printf("real memory = %ld (%ld MB)\n", ptoa(physmem), + ptoa(physmem) / 1048576); + realmem = physmem; + + /* Display any holes after the first chunk of extended memory. */ + if (bootverbose) { + int indx; + + printf("Physical memory chunk(s):\n"); + for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { + int size1 = phys_avail[indx + 1] - phys_avail[indx]; + + printf("0x%08x - 0x%08x, %d bytes (%d pages)\n", + phys_avail[indx], phys_avail[indx + 1] - 1, size1, + size1 / PAGE_SIZE); + } + } + + vm_ksubmap_init(&kmi); + + printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), + ptoa(cnt.v_free_count) / 1048576); + + /* Set up buffers, so they can be used to read disk labels. */ + bufinit(); + vm_pager_bufferinit(); +} + +static char * +kenv_next(char *cp) +{ + + if (cp != NULL) { + while (*cp != 0) + cp++; + cp++; + if (*cp == 0) + cp = NULL; + } + return (cp); +} + +void +dump_kenv(void) +{ + int len; + char *cp; + + debugf("loader passed (static) kenv:\n"); + if (kern_envp == NULL) { + debugf(" no env, null ptr\n"); + return; + } + debugf(" kern_envp = 0x%08x\n", (u_int32_t)kern_envp); + + len = 0; + for (cp = kern_envp; cp != NULL; cp = kenv_next(cp)) + debugf(" %x %s\n", (u_int32_t)cp, cp); +} + +void +dump_bootinfo(void) +{ + struct bi_mem_region *mr; + struct bi_eth_addr *eth; + int i, j; + + debugf("bootinfo:\n"); + if (bootinfo == NULL) { + debugf(" no bootinfo, null ptr\n"); + return; + } + + debugf(" version = 0x%08x\n", bootinfo->bi_version); + debugf(" ccsrbar = 0x%08x\n", bootinfo->bi_bar_base); + debugf(" cpu_clk = 0x%08x\n", bootinfo->bi_cpu_clk); + debugf(" bus_clk = 0x%08x\n", bootinfo->bi_bus_clk); + + debugf(" mem regions:\n"); + mr = (struct bi_mem_region *)bootinfo->bi_data; + for (i = 0; i < bootinfo->bi_mem_reg_no; i++, mr++) + debugf(" #%d, base = 0x%08x, size = 0x%08x\n", i, + mr->mem_base, mr->mem_size); + + debugf(" eth addresses:\n"); + eth = (struct bi_eth_addr *)mr; + for (i = 0; i < bootinfo->bi_eth_addr_no; i++, eth++) { + debugf(" #%d, addr = ", i); + for (j = 0; j < 6; j++) + debugf("%02x ", eth->mac_addr[j]); + debugf("\n"); + } +} + +void +print_kernel_section_addr(void) +{ + + debugf("kernel image addresses:\n"); + debugf(" kernel_text = 0x%08x\n", (u_int32_t)kernel_text); + debugf(" _etext (sdata) = 0x%08x\n", (u_int32_t)_etext); + debugf(" _edata = 0x%08x\n", (u_int32_t)_edata); + debugf(" __sbss_start = 0x%08x\n", (u_int32_t)__sbss_start); + debugf(" __sbss_end = 0x%08x\n", (u_int32_t)__sbss_end); + debugf(" __sbss_start = 0x%08x\n", (u_int32_t)__bss_start); + debugf(" _end = 0x%08x\n", (u_int32_t)_end); +} + +void +e500_init(u_int32_t startkernel, u_int32_t endkernel, void *mdp) +{ + struct pcpu *pc; + void *kmdp; + vm_offset_t end; + struct bi_mem_region *mr; + int i; + + kmdp = NULL; + + end = endkernel; + + /* + * Parse metadata and fetch parameters. This must be done as the first + * step as we need bootinfo data to at least init the console + */ + if (mdp != NULL) { + preload_metadata = mdp; + kmdp = preload_search_by_type("elf kernel"); + if (kmdp != NULL) { + bootinfo = (struct bootinfo *)preload_search_info(kmdp, + MODINFO_METADATA|MODINFOMD_BOOTINFO); + + boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); + kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); + end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); + } + } else { + /* + * We should scream but how? - without CCSR bar (in bootinfo) + * cannot even output anything... + */ + + /* + * FIXME add return value and handle in the locore so we can + * return to the loader maybe? (this seems not very easy to + * restore everything as the TLB have all been reprogrammed + * in the locore etc...) + */ + while(1); + } + + /* Initialize memory regions table */ + mr = (struct bi_mem_region *)bootinfo->bi_data; + for (i = 0; i < bootinfo->bi_mem_reg_no; i++, mr++) { + if (i == MEM_REGIONS) + break; + availmem_regions[i].mr_start = mr->mem_base; + availmem_regions[i].mr_size = mr->mem_size; + } + availmem_regions_sz = i; + + /* Initialize TLB1 handling */ + tlb1_init(bootinfo->bi_bar_base); + + /* + * Time Base and Decrementer are updated every 8 CCB bus clocks. + * HID0[SEL_TBCLK] = 0 + */ + decr_config(bootinfo->bi_bus_clk/8); + + /* Init params/tunables that can be overridden by the loader. */ + init_param1(); + + /* Start initializing proc0 and thread0. */ + proc_linkup(&proc0, &thread0); + thread0.td_frame = &frame0; + + /* Set up per-cpu data and store the pointer in SPR general 0. */ + pc = &__pcpu[0]; + pcpu_init(pc, 0, sizeof(struct pcpu)); + pc->pc_curthread = &thread0; + pc->pc_cpuid = 0; + __asm __volatile("mtsprg 0, %0" :: "r"(pc)); + + /* Initialize system mutexes. */ + mutex_init(); + + /* Initialize the console before printing anything. */ + cninit(); + + /* Print out some debug info... */ + debugf("e500_init: console initialized\n"); + debugf(" arg1 startkernel = 0x%08x\n", startkernel); + debugf(" arg2 endkernel = 0x%08x\n", endkernel); + debugf(" arg3 midp = 0x%08x\n", (u_int32_t)mdp); + debugf(" end = 0x%08x\n", (u_int32_t)end); + debugf(" boothowto = 0x%08x\n", boothowto); + debugf(" kernel ccsrbar = 0x%08x\n", CCSRBAR_VA); + debugf(" MSR = 0x%08x\n", mfmsr()); + dump_bootinfo(); + print_kernel_section_addr(); + dump_kenv(); + //tlb1_print_entries(); + //tlb1_print_tlbentries(); + + kdb_init(); + +#ifdef KDB + if (boothowto & RB_KDB) + kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); +#endif + kobj_machdep_init(); + + /* Initialise virtual memory. */ + pmap_mmu_install(MMU_TYPE_BOOKE, 0); + pmap_bootstrap(startkernel, end); + debugf("MSR = 0x%08x\n", mfmsr()); + //tlb1_print_entries(); + //tlb1_print_tlbentries(); + + /* Initialize params/tunables that are derived from memsize. */ + init_param2(physmem); + + /* Finish setting up thread0. */ + thread0.td_kstack = (vm_offset_t)kstack0_space; + thread0.td_pcb = (struct pcb *) + (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + bzero((void *)thread0.td_pcb, sizeof(struct pcb)); + pc->pc_curpcb = thread0.td_pcb; + + /* Initialise the message buffer. */ + msgbufinit(msgbufp, MSGBUF_SIZE); + + /* Enable Machine Check interrupt. */ + mtmsr(mfmsr() | PSL_ME); + isync(); + + debugf("e500_init: e\n"); +} + +/* Initialise a struct pcpu. */ +void +cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) +{ + +} + +/* Set set up registers on exec. */ +void +exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) +{ + struct trapframe *tf; + struct ps_strings arginfo; + + tf = trapframe(td); + bzero(tf, sizeof *tf); + tf->fixreg[1] = -roundup(-stack + 8, 16); + + /* + * XXX Machine-independent code has already copied arguments and + * XXX environment to userland. Get them back here. + */ + (void)copyin((char *)PS_STRINGS, &arginfo, sizeof(arginfo)); + + /* + * Set up arguments for _start(): + * _start(argc, argv, envp, obj, cleanup, ps_strings); + * + * Notes: + * - obj and cleanup are the auxilliary and termination + * vectors. They are fixed up by ld.elf_so. + * - ps_strings is a NetBSD extention, and will be + * ignored by executables which are strictly + * compliant with the SVR4 ABI. + * + * XXX We have to set both regs and retval here due to different + * XXX calling convention in trap.c and init_main.c. + */ + /* + * XXX PG: these get overwritten in the syscall return code. + * execve() should return EJUSTRETURN, like it does on NetBSD. + * Emulate by setting the syscall return value cells. The + * registers still have to be set for init's fork trampoline. + */ + td->td_retval[0] = arginfo.ps_nargvstr; + td->td_retval[1] = (register_t)arginfo.ps_argvstr; + tf->fixreg[3] = arginfo.ps_nargvstr; + tf->fixreg[4] = (register_t)arginfo.ps_argvstr; + tf->fixreg[5] = (register_t)arginfo.ps_envstr; + tf->fixreg[6] = 0; /* auxillary vector */ + tf->fixreg[7] = 0; /* termination vector */ + tf->fixreg[8] = (register_t)PS_STRINGS; /* NetBSD extension */ + + tf->srr0 = entry; + tf->srr1 = PSL_USERSET; + td->td_pcb->pcb_flags = 0; +} + +int +fill_regs(struct thread *td, struct reg *regs) +{ + struct trapframe *tf; + + tf = td->td_frame; + memcpy(regs, tf, sizeof(struct reg)); + + return (0); +} + +int +fill_fpregs(struct thread *td, struct fpreg *fpregs) +{ + + return (0); +} + +/* Get current clock frequency for the given cpu id. */ +int +cpu_est_clockrate(int cpu_id, uint64_t *rate) +{ + + return (ENXIO); +} + +/* + * Construct a PCB from a trapframe. This is called from kdb_trap() where + * we want to start a backtrace from the function that caused us to enter + * the debugger. We have the context in the trapframe, but base the trace + * on the PCB. The PCB doesn't have to be perfect, as long as it contains + * enough for a backtrace. + */ +void +makectx(struct trapframe *tf, struct pcb *pcb) +{ + + pcb->pcb_lr = tf->srr0; + pcb->pcb_sp = tf->fixreg[1]; +} + +/* + * get_mcontext/sendsig helper routine that doesn't touch the + * proc lock. + */ +static int +grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + memset(mcp, 0, sizeof(mcontext_t)); + + mcp->mc_vers = _MC_VERSION; + mcp->mc_flags = 0; + memcpy(&mcp->mc_frame, td->td_frame, sizeof(struct trapframe)); + if (flags & GET_MC_CLEAR_RET) { + mcp->mc_gpr[3] = 0; + mcp->mc_gpr[4] = 0; + } + + /* XXX Altivec context ? */ + + mcp->mc_len = sizeof(*mcp); + return (0); +} + +int +get_mcontext(struct thread *td, mcontext_t *mcp, int flags) +{ + int error; + + error = grab_mcontext(td, mcp, flags); + if (error == 0) { + PROC_LOCK(curthread->td_proc); + mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]); + PROC_UNLOCK(curthread->td_proc); + } + + return (error); +} + +int +set_mcontext(struct thread *td, const mcontext_t *mcp) +{ + struct pcb *pcb; + struct trapframe *tf; + + pcb = td->td_pcb; + tf = td->td_frame; + + if (mcp->mc_vers != _MC_VERSION || mcp->mc_len != sizeof(*mcp)) + return (EINVAL); + + memcpy(tf, mcp->mc_frame, sizeof(mcp->mc_frame)); + + /* XXX Altivec context? */ + + return (0); +} + +int +sigreturn(struct thread *td, struct sigreturn_args *uap) +{ + struct proc *p; + ucontext_t uc; + int error; + + CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); + + if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { + CTR1(KTR_SIG, "sigreturn: efault td=%p", td); + return (EFAULT); + } + + error = set_mcontext(td, &uc.uc_mcontext); + if (error != 0) + return (error); + + p = td->td_proc; + PROC_LOCK(p); + td->td_sigmask = uc.uc_sigmask; + SIG_CANTMASK(td->td_sigmask); + signotify(td); + PROC_UNLOCK(p); + + CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", + td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); + + return (EJUSTRETURN); +} + +#ifdef COMPAT_FREEBSD4 +int +freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) +{ + + return sigreturn(td, (struct sigreturn_args *)uap); +} +#endif + +/* + * cpu_idle + * + * Set Wait state enable. + */ +void +cpu_idle (void) +{ + register_t msr; + + msr = mfmsr(); +#ifdef INVARIANTS + if ((msr & PSL_EE) != PSL_EE) { + struct thread *td = curthread; + printf("td msr %x\n", td->td_md.md_saved_msr); + panic("ints disabled in idleproc!"); + } +#endif +#if 0 + /* + * Freescale E500 core RM section 6.4.1 + */ + msr = msr | PSL_WE; + + __asm__(" msync;" + " mtmsr %0;" + " isync;" + "loop: b loop" : + /* no output */ : + "r" (msr)); +#endif +} + +void +spinlock_enter(void) +{ + struct thread *td; + + td = curthread; + if (td->td_md.md_spinlock_count == 0) + td->td_md.md_saved_msr = intr_disable(); + td->td_md.md_spinlock_count++; + critical_enter(); +} + +void +spinlock_exit(void) +{ + struct thread *td; + + td = curthread; + critical_exit(); + td->td_md.md_spinlock_count--; + if (td->td_md.md_spinlock_count == 0) + intr_restore(td->td_md.md_saved_msr); +} + +/* Shutdown the CPU as much as possible. */ +void +cpu_halt(void) +{ + + mtmsr(mfmsr() & ~(PSL_CE | PSL_EE | PSL_ME | PSL_DE)); + while (1); +} + +int +set_regs(struct thread *td, struct reg *regs) +{ + struct trapframe *tf; + + tf = td->td_frame; + memcpy(tf, regs, sizeof(struct reg)); + return (0); +} + +int +fill_dbregs(struct thread *td, struct dbreg *dbregs) +{ + + /* No debug registers on PowerPC */ + return (ENOSYS); +} + +int +set_dbregs(struct thread *td, struct dbreg *dbregs) +{ + + /* No debug registers on PowerPC */ + return (ENOSYS); +} + +int +set_fpregs(struct thread *td, struct fpreg *fpregs) +{ + + return (0); +} + +int +ptrace_set_pc(struct thread *td, unsigned long addr) +{ + struct trapframe *tf; + + tf = td->td_frame; + tf->srr0 = (register_t)addr; + + return (0); +} + +int +ptrace_single_step(struct thread *td) +{ + struct trapframe *tf; + u_int reg; + + reg = mfspr(SPR_DBCR0); + reg |= DBCR0_IC | DBCR0_IDM; + mtspr(SPR_DBCR0, reg); + + tf = td->td_frame; + tf->srr1 |= PSL_DE; + return (0); +} + +int +ptrace_clear_single_step(struct thread *td) +{ + struct trapframe *tf; + + tf = td->td_frame; + tf->srr1 &= ~PSL_DE; + return (0); +} + +void +kdb_cpu_clear_singlestep(void) +{ + register_t r; + + r = mfspr(SPR_DBCR0); + mtspr(SPR_DBCR0, r & ~DBCR0_IC); + kdb_frame->srr1 &= ~PSL_DE; +} + +void +kdb_cpu_set_singlestep(void) +{ + register_t r; + + r = mfspr(SPR_DBCR0); + mtspr(SPR_DBCR0, r | DBCR0_IC | DBCR0_IDM); + kdb_frame->srr1 |= PSL_DE; +} + +void +sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) +{ + struct trapframe *tf; + struct sigframe *sfp; + struct sigacts *psp; + struct sigframe sf; + struct thread *td; + struct proc *p; + int oonstack, rndfsize; + int sig, code; + + td = curthread; + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + sig = ksi->ksi_signo; + code = ksi->ksi_code; + psp = p->p_sigacts; + mtx_assert(&psp->ps_mtx, MA_OWNED); + tf = td->td_frame; + oonstack = sigonstack(tf->fixreg[1]); + + rndfsize = ((sizeof(sf) + 15) / 16) * 16; + + CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, + catcher, sig); + + /* + * Save user context + */ + memset(&sf, 0, sizeof(sf)); + grab_mcontext(td, &sf.sf_uc.uc_mcontext, 0); + sf.sf_uc.uc_sigmask = *mask; + sf.sf_uc.uc_stack = td->td_sigstk; + sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) + ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; + + sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; + + /* + * Allocate and validate space for the signal handler context. + */ + if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && + SIGISMEMBER(psp->ps_sigonstack, sig)) { + sfp = (struct sigframe *)((caddr_t)td->td_sigstk.ss_sp + + td->td_sigstk.ss_size - rndfsize); + } else { + sfp = (struct sigframe *)(tf->fixreg[1] - rndfsize); + } + + /* + * Translate the signal if appropriate (Linux emu ?) + */ + if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) + sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; + + /* + * Save the floating-point state, if necessary, then copy it. + */ + /* XXX */ + + /* + * Set up the registers to return to sigcode. + * + * r1/sp - sigframe ptr + * lr - sig function, dispatched to by blrl in trampoline + * r3 - sig number + * r4 - SIGINFO ? &siginfo : exception code + * r5 - user context + * srr0 - trampoline function addr + */ + tf->lr = (register_t)catcher; + tf->fixreg[1] = (register_t)sfp; + tf->fixreg[FIRSTARG] = sig; + tf->fixreg[FIRSTARG+2] = (register_t)&sfp->sf_uc; + if (SIGISMEMBER(psp->ps_siginfo, sig)) { + /* + * Signal handler installed with SA_SIGINFO. + */ + tf->fixreg[FIRSTARG+1] = (register_t)&sfp->sf_si; + + /* + * Fill siginfo structure. + */ + sf.sf_si = ksi->ksi_info; + sf.sf_si.si_signo = sig; + sf.sf_si.si_addr = (void *) ((tf->exc == EXC_DSI) ? + tf->cpu.booke.dear : tf->srr0); + } else { + /* Old FreeBSD-style arguments. */ + tf->fixreg[FIRSTARG+1] = code; + tf->fixreg[FIRSTARG+3] = (tf->exc == EXC_DSI) ? + tf->cpu.booke.dear : tf->srr0; + } + mtx_unlock(&psp->ps_mtx); + PROC_UNLOCK(p); + + tf->srr0 = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode)); + + /* + * copy the frame out to userland. + */ + if (copyout((caddr_t)&sf, (caddr_t)sfp, sizeof(sf)) != 0) { + /* + * Process has trashed its stack. Kill it. + */ + CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); + PROC_LOCK(p); + sigexit(td, SIGILL); + } + + CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, + tf->srr0, tf->fixreg[1]); + + PROC_LOCK(p); + mtx_lock(&psp->ps_mtx); +} + +void +bzero(void *buf, size_t len) +{ + caddr_t p; + + p = buf; + + while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) { + *p++ = 0; + len--; + } + + while (len >= sizeof(u_long) * 8) { + *(u_long*) p = 0; + *((u_long*) p + 1) = 0; + *((u_long*) p + 2) = 0; + *((u_long*) p + 3) = 0; + len -= sizeof(u_long) * 8; + *((u_long*) p + 4) = 0; + *((u_long*) p + 5) = 0; + *((u_long*) p + 6) = 0; + *((u_long*) p + 7) = 0; + p += sizeof(u_long) * 8; + } + + while (len >= sizeof(u_long)) { + *(u_long*) p = 0; + len -= sizeof(u_long); + p += sizeof(u_long); + } + + while (len) { + *p++ = 0; + len--; + } +} + +/* + * XXX what is the better/proper place for this routine? + */ +int +mem_valid(vm_offset_t addr, int len) +{ + + return (1); +} diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c new file mode 100644 index 0000000..53aa8e0 --- /dev/null +++ b/sys/powerpc/booke/pmap.c @@ -0,0 +1,3107 @@ +/*- + * Copyright (C) 2007 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Some hw specific parts of this pmap were derived or influenced + * by NetBSD's ibm4xx pmap module. More generic code is shared with + * a few other pmap modules from the FreeBSD tree. + */ + + /* + * VM layout notes: + * + * Kernel and user threads run within one common virtual address space + * defined by AS=0. + * + * Virtual address space layout: + * ----------------------------- + * 0x0000_0000 - 0xbfff_efff : user process + * 0xc000_0000 - 0xc1ff_ffff : kerel reserved + * 0xc000_0000 - kernelend : kernel code &data + * 0xc1ff_c000 - 0xc200_0000 : kstack0 + * 0xc200_0000 - 0xffef_ffff : KVA + * 0xc200_0000 - 0xc200_3fff : reserved for page zero/copy + * 0xc200_4000 - ptbl buf end: reserved for ptbl bufs + * ptbl buf end- 0xffef_ffff : actual free KVA space + * 0xfff0_0000 - 0xffff_ffff : I/O devices region + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/user.h> +#include <sys/queue.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/msgbuf.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/vmmeter.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_kern.h> +#include <vm/vm_pageout.h> +#include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_param.h> +#include <vm/vm_map.h> +#include <vm/vm_pager.h> +#include <vm/uma.h> + +#include <machine/cpu.h> +#include <machine/pcb.h> +#include <machine/powerpc.h> + +#include <machine/tlb.h> +#include <machine/spr.h> +#include <machine/vmparam.h> +#include <machine/md_var.h> +#include <machine/mmuvar.h> +#include <machine/pmap.h> +#include <machine/pte.h> + +#include "mmu_if.h" + +#define DEBUG +#undef DEBUG + +#ifdef DEBUG +#define debugf(fmt, args...) printf(fmt, ##args) +#else +#define debugf(fmt, args...) +#endif + +#define TODO panic("%s: not implemented", __func__); +#define memmove(d, s, l) bcopy(s, d, l) + +#include "opt_sched.h" +#ifndef SCHED_4BSD +#error "e500 only works with SCHED_4BSD which uses a global scheduler lock." +#endif +extern struct mtx sched_lock; + +/* Kernel physical load address. */ +extern uint32_t kernload; + +struct mem_region availmem_regions[MEM_REGIONS]; +int availmem_regions_sz; + +/* Reserved KVA space and mutex for mmu_booke_zero_page. */ +static vm_offset_t zero_page_va; +static struct mtx zero_page_mutex; + +/* + * Reserved KVA space for mmu_booke_zero_page_idle. This is used + * by idle thred only, no lock required. + */ +static vm_offset_t zero_page_idle_va; + +/* Reserved KVA space and mutex for mmu_booke_copy_page. */ +static vm_offset_t copy_page_src_va; +static vm_offset_t copy_page_dst_va; +static struct mtx copy_page_mutex; + +/**************************************************************************/ +/* PMAP */ +/**************************************************************************/ + +static void mmu_booke_enter_locked(mmu_t, pmap_t, vm_offset_t, vm_page_t, + vm_prot_t, boolean_t); + +unsigned int kptbl_min; /* Index of the first kernel ptbl. */ +unsigned int kernel_ptbls; /* Number of KVA ptbls. */ + +static int pagedaemon_waken; + +/* + * If user pmap is processed with mmu_booke_remove and the resident count + * drops to 0, there are no more pages to remove, so we need not continue. + */ +#define PMAP_REMOVE_DONE(pmap) \ + ((pmap) != kernel_pmap && (pmap)->pm_stats.resident_count == 0) + +extern void load_pid0(tlbtid_t); + +/**************************************************************************/ +/* TLB and TID handling */ +/**************************************************************************/ + +/* Translation ID busy table */ +static volatile pmap_t tidbusy[TID_MAX + 1]; + +/* + * Actual maximum number of TLB0 entries. + * This number differs between e500 core revisions. + */ +u_int32_t tlb0_size; +u_int32_t tlb0_nways; +u_int32_t tlb0_nentries_per_way; + +#define TLB0_SIZE (tlb0_size) +#define TLB0_NWAYS (tlb0_nways) +#define TLB0_ENTRIES_PER_WAY (tlb0_nentries_per_way) + +/* Pointer to kernel tlb0 table, allocated in mmu_booke_bootstrap() */ +tlb_entry_t *tlb0; + +/* + * Spinlock to assure proper locking between threads and + * between tlb miss handler and kernel. + */ +static struct mtx tlb0_mutex; + +#define TLB1_SIZE 16 + +/* In-ram copy of the TLB1 */ +static tlb_entry_t tlb1[TLB1_SIZE]; + +/* Next free entry in the TLB1 */ +static unsigned int tlb1_idx; + +static tlbtid_t tid_alloc(struct pmap *); +static void tid_flush(tlbtid_t); + +extern void tlb1_inval_va(vm_offset_t); +extern void tlb0_inval_va(vm_offset_t); + +static void tlb_print_entry(int, u_int32_t, u_int32_t, u_int32_t, u_int32_t); + +static int tlb1_set_entry(vm_offset_t, vm_offset_t, vm_size_t, u_int32_t); +static void __tlb1_set_entry(unsigned int, vm_offset_t, vm_offset_t, + vm_size_t, u_int32_t, unsigned int, unsigned int); +static void tlb1_write_entry(unsigned int); +static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); +static vm_size_t tlb1_mapin_region(vm_offset_t, vm_offset_t, vm_size_t); + +static vm_size_t tsize2size(unsigned int); +static unsigned int size2tsize(vm_size_t); +static unsigned int ilog2(unsigned int); + +static void set_mas4_defaults(void); + +static void tlb0_inval_entry(vm_offset_t, unsigned int); +static inline unsigned int tlb0_tableidx(vm_offset_t, unsigned int); +static void tlb0_write_entry(unsigned int, unsigned int); +static void tlb0_flush_entry(pmap_t, vm_offset_t); +static void tlb0_init(void); + +/**************************************************************************/ +/* Page table management */ +/**************************************************************************/ + +/* Data for the pv entry allocation mechanism */ +static uma_zone_t pvzone; +static struct vm_object pvzone_obj; +static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; + +#define PV_ENTRY_ZONE_MIN 2048 /* min pv entries in uma zone */ + +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif + +static void ptbl_init(void); +static struct ptbl_buf *ptbl_buf_alloc(void); +static void ptbl_buf_free(struct ptbl_buf *); +static void ptbl_free_pmap_ptbl(pmap_t, pte_t *); + +static void ptbl_alloc(mmu_t, pmap_t, unsigned int); +static void ptbl_free(mmu_t, pmap_t, unsigned int); +static void ptbl_hold(mmu_t, pmap_t, unsigned int); +static int ptbl_unhold(mmu_t, pmap_t, unsigned int); + +static vm_paddr_t pte_vatopa(mmu_t, pmap_t, vm_offset_t); +static pte_t *pte_find(mmu_t, pmap_t, vm_offset_t); +void pte_enter(mmu_t, pmap_t, vm_page_t, vm_offset_t, u_int32_t); +static int pte_remove(mmu_t, pmap_t, vm_offset_t, u_int8_t); + +pv_entry_t pv_alloc(void); +static void pv_free(pv_entry_t); +static void pv_insert(pmap_t, vm_offset_t, vm_page_t); +static void pv_remove(pmap_t, vm_offset_t, vm_page_t); + +/* Number of kva ptbl buffers, each covering one ptbl (PTBL_PAGES). */ +#define PTBL_BUFS (128 * 16) + +struct ptbl_buf { + TAILQ_ENTRY(ptbl_buf) link; /* list link */ + vm_offset_t kva; /* va of mapping */ +}; + +/* ptbl free list and a lock used for access synchronization. */ +static TAILQ_HEAD(, ptbl_buf) ptbl_buf_freelist; +static struct mtx ptbl_buf_freelist_lock; + +/* Base address of kva space allocated fot ptbl bufs. */ +static vm_offset_t ptbl_buf_pool_vabase; + +/* Pointer to ptbl_buf structures. */ +static struct ptbl_buf *ptbl_bufs; + +/* + * Kernel MMU interface + */ +static vm_offset_t mmu_booke_addr_hint(mmu_t, vm_object_t, vm_offset_t, vm_size_t); +static void mmu_booke_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); +static void mmu_booke_clear_modify(mmu_t, vm_page_t); +static void mmu_booke_clear_reference(mmu_t, vm_page_t); +static void mmu_booke_copy(pmap_t, pmap_t, vm_offset_t, vm_size_t, + vm_offset_t); +static void mmu_booke_copy_page(mmu_t, vm_page_t, vm_page_t); +static void mmu_booke_enter(mmu_t, pmap_t, vm_offset_t, vm_page_t, + vm_prot_t, boolean_t); +static void mmu_booke_enter_object(mmu_t, pmap_t, vm_offset_t, vm_offset_t, + vm_page_t, vm_prot_t); +static void mmu_booke_enter_quick(mmu_t, pmap_t, vm_offset_t, vm_page_t, + vm_prot_t); +static vm_paddr_t mmu_booke_extract(mmu_t, pmap_t, vm_offset_t); +static vm_page_t mmu_booke_extract_and_hold(mmu_t, pmap_t, vm_offset_t, + vm_prot_t); +static void mmu_booke_init(mmu_t); +static boolean_t mmu_booke_is_modified(mmu_t, vm_page_t); +static boolean_t mmu_booke_is_prefaultable(mmu_t, pmap_t, vm_offset_t); +static boolean_t mmu_booke_ts_referenced(mmu_t, vm_page_t); +static vm_offset_t mmu_booke_map(mmu_t, vm_offset_t *, vm_offset_t, vm_offset_t, + int); +static int mmu_booke_mincore(mmu_t, pmap_t, vm_offset_t); +static void mmu_booke_object_init_pt(mmu_t, pmap_t, vm_offset_t, + vm_object_t, vm_pindex_t, vm_size_t); +static boolean_t mmu_booke_page_exists_quick(mmu_t, pmap_t, vm_page_t); +static void mmu_booke_page_init(mmu_t, vm_page_t); +static int mmu_booke_page_wired_mappings(mmu_t, vm_page_t); +static void mmu_booke_pinit(mmu_t, pmap_t); +static void mmu_booke_pinit0(mmu_t, pmap_t); +static void mmu_booke_protect(mmu_t, pmap_t, vm_offset_t, vm_offset_t, + vm_prot_t); +static void mmu_booke_qenter(mmu_t, vm_offset_t, vm_page_t *, int); +static void mmu_booke_qremove(mmu_t, vm_offset_t, int); +static void mmu_booke_release(mmu_t, pmap_t); +static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); +static void mmu_booke_remove_all(mmu_t, vm_page_t); +static void mmu_booke_remove_write(mmu_t, vm_page_t); +static void mmu_booke_zero_page(mmu_t, vm_page_t); +static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); +static void mmu_booke_zero_page_idle(mmu_t, vm_page_t); +static void mmu_booke_activate(mmu_t, struct thread *); +static void mmu_booke_deactivate(mmu_t, struct thread *); +static void mmu_booke_bootstrap(mmu_t, vm_offset_t, vm_offset_t); +static void *mmu_booke_mapdev(mmu_t, vm_offset_t, vm_size_t); +static void mmu_booke_unmapdev(mmu_t, vm_offset_t, vm_size_t); +static vm_offset_t mmu_booke_kextract(mmu_t, vm_offset_t); +static void mmu_booke_kenter(mmu_t, vm_offset_t, vm_offset_t); +static void mmu_booke_kremove(mmu_t, vm_offset_t); +static boolean_t mmu_booke_dev_direct_mapped(mmu_t, vm_offset_t, vm_size_t); +static boolean_t mmu_booke_page_executable(mmu_t, vm_page_t); + +static mmu_method_t mmu_booke_methods[] = { + /* pmap dispatcher interface */ + MMUMETHOD(mmu_addr_hint, mmu_booke_addr_hint), + MMUMETHOD(mmu_change_wiring, mmu_booke_change_wiring), + MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), + MMUMETHOD(mmu_clear_reference, mmu_booke_clear_reference), + MMUMETHOD(mmu_copy, mmu_booke_copy), + MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), + MMUMETHOD(mmu_enter, mmu_booke_enter), + MMUMETHOD(mmu_enter_object, mmu_booke_enter_object), + MMUMETHOD(mmu_enter_quick, mmu_booke_enter_quick), + MMUMETHOD(mmu_extract, mmu_booke_extract), + MMUMETHOD(mmu_extract_and_hold, mmu_booke_extract_and_hold), + MMUMETHOD(mmu_init, mmu_booke_init), + MMUMETHOD(mmu_is_modified, mmu_booke_is_modified), + MMUMETHOD(mmu_is_prefaultable, mmu_booke_is_prefaultable), + MMUMETHOD(mmu_ts_referenced, mmu_booke_ts_referenced), + MMUMETHOD(mmu_map, mmu_booke_map), + MMUMETHOD(mmu_mincore, mmu_booke_mincore), + MMUMETHOD(mmu_object_init_pt, mmu_booke_object_init_pt), + MMUMETHOD(mmu_page_exists_quick,mmu_booke_page_exists_quick), + MMUMETHOD(mmu_page_init, mmu_booke_page_init), + MMUMETHOD(mmu_page_wired_mappings, mmu_booke_page_wired_mappings), + MMUMETHOD(mmu_pinit, mmu_booke_pinit), + MMUMETHOD(mmu_pinit0, mmu_booke_pinit0), + MMUMETHOD(mmu_protect, mmu_booke_protect), + MMUMETHOD(mmu_qenter, mmu_booke_qenter), + MMUMETHOD(mmu_qremove, mmu_booke_qremove), + MMUMETHOD(mmu_release, mmu_booke_release), + MMUMETHOD(mmu_remove, mmu_booke_remove), + MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), + MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), + MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), + MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), + MMUMETHOD(mmu_zero_page_idle, mmu_booke_zero_page_idle), + MMUMETHOD(mmu_activate, mmu_booke_activate), + MMUMETHOD(mmu_deactivate, mmu_booke_deactivate), + + /* Internal interfaces */ + MMUMETHOD(mmu_bootstrap, mmu_booke_bootstrap), + MMUMETHOD(mmu_dev_direct_mapped,mmu_booke_dev_direct_mapped), + MMUMETHOD(mmu_mapdev, mmu_booke_mapdev), + MMUMETHOD(mmu_kenter, mmu_booke_kenter), + MMUMETHOD(mmu_kextract, mmu_booke_kextract), +/* MMUMETHOD(mmu_kremove, mmu_booke_kremove), */ + MMUMETHOD(mmu_page_executable, mmu_booke_page_executable), + MMUMETHOD(mmu_unmapdev, mmu_booke_unmapdev), + + { 0, 0 } +}; + +static mmu_def_t booke_mmu = { + MMU_TYPE_BOOKE, + mmu_booke_methods, + 0 +}; +MMU_DEF(booke_mmu); + +/* + * This routine defines the region(s) of memory that should + * not be tested for the modified bit. + */ +static __inline int +track_modified_needed(pmap_t pmap, vm_offset_t va) +{ + + if (pmap == kernel_pmap) + return ((va < kmi.clean_sva) || (va >= kmi.clean_eva)); + else + return (1); +} + +/* Return number of entries in TLB0. */ +static __inline void +tlb0_get_tlbconf(void) +{ + uint32_t tlb0_cfg; + + tlb0_cfg = mfspr(SPR_TLB0CFG); + tlb0_size = tlb0_cfg & TLBCFG_NENTRY_MASK; + tlb0_nways = (tlb0_cfg & TLBCFG_ASSOC_MASK) >> TLBCFG_ASSOC_SHIFT; + tlb0_nentries_per_way = tlb0_size/tlb0_nways; +} + +/* Initialize pool of kva ptbl buffers. */ +static void +ptbl_init(void) +{ + int i; + + //debugf("ptbl_init: s (ptbl_bufs = 0x%08x size 0x%08x)\n", + // (u_int32_t)ptbl_bufs, sizeof(struct ptbl_buf) * PTBL_BUFS); + //debugf("ptbl_init: s (ptbl_buf_pool_vabase = 0x%08x size = 0x%08x)\n", + // ptbl_buf_pool_vabase, PTBL_BUFS * PTBL_PAGES * PAGE_SIZE); + + mtx_init(&ptbl_buf_freelist_lock, "ptbl bufs lock", NULL, MTX_DEF); + TAILQ_INIT(&ptbl_buf_freelist); + + for (i = 0; i < PTBL_BUFS; i++) { + ptbl_bufs[i].kva = ptbl_buf_pool_vabase + i * PTBL_PAGES * PAGE_SIZE; + TAILQ_INSERT_TAIL(&ptbl_buf_freelist, &ptbl_bufs[i], link); + } + + //debugf("ptbl_init: e\n"); +} + +/* Get an sf_buf from the freelist. */ +static struct ptbl_buf * +ptbl_buf_alloc(void) +{ + struct ptbl_buf *buf; + + //debugf("ptbl_buf_alloc: s\n"); + + mtx_lock(&ptbl_buf_freelist_lock); + buf = TAILQ_FIRST(&ptbl_buf_freelist); + if (buf != NULL) + TAILQ_REMOVE(&ptbl_buf_freelist, buf, link); + mtx_unlock(&ptbl_buf_freelist_lock); + + //debugf("ptbl_buf_alloc: e (buf = 0x%08x)\n", (u_int32_t)buf); + return (buf); +} + +/* Return ptbl buff to free pool. */ +static void +ptbl_buf_free(struct ptbl_buf *buf) +{ + + //debugf("ptbl_buf_free: s (buf = 0x%08x)\n", (u_int32_t)buf); + + mtx_lock(&ptbl_buf_freelist_lock); + TAILQ_INSERT_TAIL(&ptbl_buf_freelist, buf, link); + mtx_unlock(&ptbl_buf_freelist_lock); + + //debugf("ptbl_buf_free: e\n"); +} + +/* + * Search the list of allocated ptbl bufs and find + * on list of allocated ptbls + */ +static void +ptbl_free_pmap_ptbl(pmap_t pmap, pte_t *ptbl) +{ + struct ptbl_buf *pbuf; + + //debugf("ptbl_free_pmap_ptbl: s (pmap = 0x%08x ptbl = 0x%08x)\n", + // (u_int32_t)pmap, (u_int32_t)ptbl); + + TAILQ_FOREACH(pbuf, &pmap->ptbl_list, link) { + if (pbuf->kva == (vm_offset_t)ptbl) { + /* Remove from pmap ptbl buf list. */ + TAILQ_REMOVE(&pmap->ptbl_list, pbuf, link); + + /* Free correspondig ptbl buf. */ + ptbl_buf_free(pbuf); + + break; + } + } + + //debugf("ptbl_free_pmap_ptbl: e\n"); +} + +/* Allocate page table. */ +static void +ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) +{ + vm_page_t mtbl[PTBL_PAGES]; + vm_page_t m; + struct ptbl_buf *pbuf; + unsigned int pidx; + int i; + + //int su = (pmap == kernel_pmap); + //debugf("ptbl_alloc: s (pmap = 0x%08x su = %d pdir_idx = %d)\n", (u_int32_t)pmap, su, pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_alloc: invalid pdir_idx")); + KASSERT((pmap->pm_pdir[pdir_idx] == NULL), + ("pte_alloc: valid ptbl entry exists!")); + + pbuf = ptbl_buf_alloc(); + if (pbuf == NULL) + panic("pte_alloc: couldn't alloc kernel virtual memory"); + pmap->pm_pdir[pdir_idx] = (pte_t *)pbuf->kva; + //debugf("ptbl_alloc: kva = 0x%08x\n", (u_int32_t)pmap->pm_pdir[pdir_idx]); + + /* Allocate ptbl pages, this will sleep! */ + for (i = 0; i < PTBL_PAGES; i++) { + pidx = (PTBL_PAGES * pdir_idx) + i; + while ((m = vm_page_alloc(NULL, pidx, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); + VM_WAIT; + vm_page_lock_queues(); + PMAP_LOCK(pmap); + } + mtbl[i] = m; + } + + /* Map in allocated pages into kernel_pmap. */ + mmu_booke_qenter(mmu, (vm_offset_t)pmap->pm_pdir[pdir_idx], mtbl, PTBL_PAGES); + + /* Zero whole ptbl. */ + bzero((caddr_t)pmap->pm_pdir[pdir_idx], PTBL_PAGES * PAGE_SIZE); + + /* Add pbuf to the pmap ptbl bufs list. */ + TAILQ_INSERT_TAIL(&pmap->ptbl_list, pbuf, link); + + //debugf("ptbl_alloc: e\n"); +} + +/* Free ptbl pages and invalidate pdir entry. */ +static void +ptbl_free(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) +{ + pte_t *ptbl; + vm_paddr_t pa; + vm_offset_t va; + vm_page_t m; + int i; + + //int su = (pmap == kernel_pmap); + //debugf("ptbl_free: s (pmap = 0x%08x su = %d pdir_idx = %d)\n", (u_int32_t)pmap, su, pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_free: invalid pdir_idx")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + //debugf("ptbl_free: ptbl = 0x%08x\n", (u_int32_t)ptbl); + KASSERT((ptbl != NULL), ("ptbl_free: null ptbl")); + + for (i = 0; i < PTBL_PAGES; i++) { + va = ((vm_offset_t)ptbl + (i * PAGE_SIZE)); + pa = pte_vatopa(mmu, kernel_pmap, va); + m = PHYS_TO_VM_PAGE(pa); + vm_page_free_zero(m); + atomic_subtract_int(&cnt.v_wire_count, 1); + mmu_booke_kremove(mmu, va); + } + + ptbl_free_pmap_ptbl(pmap, ptbl); + pmap->pm_pdir[pdir_idx] = NULL; + + //debugf("ptbl_free: e\n"); +} + +/* + * Decrement ptbl pages hold count and attempt to free ptbl pages. + * Called when removing pte entry from ptbl. + * + * Return 1 if ptbl pages were freed. + */ +static int +ptbl_unhold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) +{ + pte_t *ptbl; + vm_paddr_t pa; + vm_page_t m; + int i; + + //int su = (pmap == kernel_pmap); + //debugf("ptbl_unhold: s (pmap = %08x su = %d pdir_idx = %d)\n", + // (u_int32_t)pmap, su, pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_unhold: invalid pdir_idx")); + KASSERT((pmap != kernel_pmap), + ("ptbl_unhold: unholding kernel ptbl!")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + //debugf("ptbl_unhold: ptbl = 0x%08x\n", (u_int32_t)ptbl); + KASSERT(((vm_offset_t)ptbl >= VM_MIN_KERNEL_ADDRESS), + ("ptbl_unhold: non kva ptbl")); + + /* decrement hold count */ + for (i = 0; i < PTBL_PAGES; i++) { + pa = pte_vatopa(mmu, kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE)); + m = PHYS_TO_VM_PAGE(pa); + m->wire_count--; + } + + /* + * Free ptbl pages if there are no pte etries in this ptbl. + * wire_count has the same value for all ptbl pages, so check + * the last page. + */ + if (m->wire_count == 0) { + ptbl_free(mmu, pmap, pdir_idx); + + //debugf("ptbl_unhold: e (freed ptbl)\n"); + return (1); + } + + //debugf("ptbl_unhold: e\n"); + return (0); +} + +/* + * Increment hold count for ptbl pages. This routine is used when + * new pte entry is being inserted into ptbl. + */ +static void +ptbl_hold(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx) +{ + vm_paddr_t pa; + pte_t *ptbl; + vm_page_t m; + int i; + + //debugf("ptbl_hold: s (pmap = 0x%08x pdir_idx = %d)\n", (u_int32_t)pmap, pdir_idx); + + KASSERT((pdir_idx <= (VM_MAXUSER_ADDRESS / PDIR_SIZE)), + ("ptbl_hold: invalid pdir_idx")); + KASSERT((pmap != kernel_pmap), + ("ptbl_hold: holding kernel ptbl!")); + + ptbl = pmap->pm_pdir[pdir_idx]; + + KASSERT((ptbl != NULL), ("ptbl_hold: null ptbl")); + + for (i = 0; i < PTBL_PAGES; i++) { + pa = pte_vatopa(mmu, kernel_pmap, (vm_offset_t)ptbl + (i * PAGE_SIZE)); + m = PHYS_TO_VM_PAGE(pa); + m->wire_count++; + } + + //debugf("ptbl_hold: e\n"); +} + +/* Allocate pv_entry structure. */ +pv_entry_t +pv_alloc(void) +{ + pv_entry_t pv; + + debugf("pv_alloc: s\n"); + + pv_entry_count++; + if ((pv_entry_count > pv_entry_high_water) && (pagedaemon_waken == 0)) { + pagedaemon_waken = 1; + wakeup (&vm_pages_needed); + } + pv = uma_zalloc(pvzone, M_NOWAIT); + + debugf("pv_alloc: e\n"); + return (pv); +} + +/* Free pv_entry structure. */ +static __inline void +pv_free(pv_entry_t pve) +{ + //debugf("pv_free: s\n"); + + pv_entry_count--; + uma_zfree(pvzone, pve); + + //debugf("pv_free: e\n"); +} + + +/* Allocate and initialize pv_entry structure. */ +static void +pv_insert(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pve; + + //int su = (pmap == kernel_pmap); + //debugf("pv_insert: s (su = %d pmap = 0x%08x va = 0x%08x m = 0x%08x)\n", su, + // (u_int32_t)pmap, va, (u_int32_t)m); + + pve = pv_alloc(); + if (pve == NULL) + panic("pv_insert: no pv entries!"); + + pve->pv_pmap = pmap; + pve->pv_va = va; + + /* add to pv_list */ + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + + TAILQ_INSERT_TAIL(&m->md.pv_list, pve, pv_link); + + //debugf("pv_insert: e\n"); +} + +/* Destroy pv entry. */ +static void +pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pve; + + //int su = (pmap == kernel_pmap); + //debugf("pv_remove: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + + /* find pv entry */ + TAILQ_FOREACH(pve, &m->md.pv_list, pv_link) { + if ((pmap == pve->pv_pmap) && (va == pve->pv_va)) { + /* remove from pv_list */ + TAILQ_REMOVE(&m->md.pv_list, pve, pv_link); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); + + /* free pv entry struct */ + pv_free(pve); + + break; + } + } + + //debugf("pv_remove: e\n"); +} + +/* + * Clean pte entry, try to free page table page if requested. + * + * Return 1 if ptbl pages were freed, otherwise return 0. + */ +static int +pte_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, u_int8_t flags) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + vm_page_t m; + pte_t *ptbl; + pte_t *pte; + + //int su = (pmap == kernel_pmap); + //debugf("pte_remove: s (su = %d pmap = 0x%08x va = 0x%08x flags = %d)\n", + // su, (u_int32_t)pmap, va, flags); + + ptbl = pmap->pm_pdir[pdir_idx]; + KASSERT(ptbl, ("pte_remove: null ptbl")); + + pte = &ptbl[ptbl_idx]; + + if (pte == NULL || !PTE_ISVALID(pte)) + return (0); + + /* Get vm_page_t for mapped pte. */ + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + if (PTE_ISWIRED(pte)) + pmap->pm_stats.wired_count--; + + if (!PTE_ISFAKE(pte)) { + /* Handle managed entry. */ + if (PTE_ISMANAGED(pte)) { + + /* Handle modified pages. */ + if (PTE_ISMODIFIED(pte)) { + if (track_modified_needed(pmap, va)) + vm_page_dirty(m); + } + + /* Referenced pages. */ + if (PTE_ISREFERENCED(pte)) + vm_page_flag_set(m, PG_REFERENCED); + + /* Remove pv_entry from pv_list. */ + pv_remove(pmap, va, m); + } + } + + pte->flags = 0; + pte->rpn = 0; + pmap->pm_stats.resident_count--; + + if (flags & PTBL_UNHOLD) { + //debugf("pte_remove: e (unhold)\n"); + return (ptbl_unhold(mmu, pmap, pdir_idx)); + } + + //debugf("pte_remove: e\n"); + return (0); +} + +/* + * Insert PTE for a given page and virtual address. + */ +void +pte_enter(mmu_t mmu, pmap_t pmap, vm_page_t m, vm_offset_t va, u_int32_t flags) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + pte_t *ptbl; + pte_t *pte; + + //int su = (pmap == kernel_pmap); + //debugf("pte_enter: s (su = %d pmap = 0x%08x va = 0x%08x)\n", su, (u_int32_t)pmap, va); + + /* Get the page table pointer. */ + ptbl = pmap->pm_pdir[pdir_idx]; + + if (ptbl) { + /* + * Check if there is valid mapping for requested + * va, if there is, remove it. + */ + pte = &pmap->pm_pdir[pdir_idx][ptbl_idx]; + if (PTE_ISVALID(pte)) { + pte_remove(mmu, pmap, va, PTBL_HOLD); + } else { + /* + * pte is not used, increment hold count + * for ptbl pages. + */ + if (pmap != kernel_pmap) + ptbl_hold(mmu, pmap, pdir_idx); + } + } else { + /* Allocate page table pages. */ + ptbl_alloc(mmu, pmap, pdir_idx); + } + + /* Flush entry from TLB. */ + tlb0_flush_entry(pmap, va); + + pte = &(pmap->pm_pdir[pdir_idx][ptbl_idx]); + + /* + * Insert pv_entry into pv_list for mapped page + * if part of managed memory. + */ + if ((m->flags & PG_FICTITIOUS) == 0) { + if ((m->flags & PG_UNMANAGED) == 0) { + pte->flags |= PTE_MANAGED; + + /* Create and insert pv entry. */ + pv_insert(pmap, va, m); + } + } else { + pte->flags |= PTE_FAKE; + } + + pmap->pm_stats.resident_count++; + pte->rpn = VM_PAGE_TO_PHYS(m) & ~PTE_PA_MASK; + pte->flags |= (PTE_VALID | flags); + + //debugf("pte_enter: e\n"); +} + +/* Return the pa for the given pmap/va. */ +static vm_paddr_t +pte_vatopa(mmu_t mmu, pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa = 0; + pte_t *pte; + + pte = pte_find(mmu, pmap, va); + if ((pte != NULL) && PTE_ISVALID(pte)) + pa = (PTE_PA(pte) | (va & PTE_PA_MASK)); + return (pa); +} + +/* Get a pointer to a PTE in a page table. */ +static pte_t * +pte_find(mmu_t mmu, pmap_t pmap, vm_offset_t va) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + + KASSERT((pmap != NULL), ("pte_find: invalid pmap")); + + if (pmap->pm_pdir[pdir_idx]) + return (&(pmap->pm_pdir[pdir_idx][ptbl_idx])); + + return (NULL); +} + +/**************************************************************************/ +/* PMAP related */ +/**************************************************************************/ + +/* + * This is called during e500_init, before the system is really initialized. + */ +static void +mmu_booke_bootstrap(mmu_t mmu, vm_offset_t kernelstart, vm_offset_t kernelend) +{ + vm_offset_t phys_kernelend; + struct mem_region *mp, *mp1; + int cnt, i, j; + u_int s, e, sz; + u_int phys_avail_count; + vm_size_t physsz, hwphyssz; + vm_offset_t kernel_pdir; + + debugf("mmu_booke_bootstrap: entered\n"); + + /* Align kernel start and end address (kernel image). */ + kernelstart = trunc_page(kernelstart); + kernelend = round_page(kernelend); + + /* Allocate space for the message buffer. */ + msgbufp = (struct msgbuf *)kernelend; + kernelend += MSGBUF_SIZE; + debugf(" msgbufp at 0x%08x end = 0x%08x\n", (u_int32_t)msgbufp, + kernelend); + + kernelend = round_page(kernelend); + + /* Allocate space for tlb0 table. */ + tlb0_get_tlbconf(); /* Read TLB0 size and associativity. */ + tlb0 = (tlb_entry_t *)kernelend; + kernelend += sizeof(tlb_entry_t) * tlb0_size; + memset((void *)tlb0, 0, sizeof(tlb_entry_t) * tlb0_size); + debugf(" tlb0 at 0x%08x end = 0x%08x\n", (u_int32_t)tlb0, kernelend); + + kernelend = round_page(kernelend); + + /* Allocate space for ptbl_bufs. */ + ptbl_bufs = (struct ptbl_buf *)kernelend; + kernelend += sizeof(struct ptbl_buf) * PTBL_BUFS; + memset((void *)ptbl_bufs, 0, sizeof(struct ptbl_buf) * PTBL_SIZE); + debugf(" ptbl_bufs at 0x%08x end = 0x%08x\n", (u_int32_t)ptbl_bufs, + kernelend); + + kernelend = round_page(kernelend); + + /* Allocate PTE tables for kernel KVA. */ + kernel_pdir = kernelend; + kernel_ptbls = (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS + + PDIR_SIZE - 1) / PDIR_SIZE; + kernelend += kernel_ptbls * PTBL_PAGES * PAGE_SIZE; + memset((void *)kernel_pdir, 0, kernel_ptbls * PTBL_PAGES * PAGE_SIZE); + debugf(" kernel ptbls: %d\n", kernel_ptbls); + debugf(" kernel pdir at 0x%08x\n", kernel_pdir); + + if (kernelend - kernelstart > 0x1000000) { + kernelend = (kernelend + 0x3fffff) & ~0x3fffff; + tlb1_mapin_region(kernelstart + 0x1000000, + kernload + 0x1000000, kernelend - kernelstart - 0x1000000); + } else + kernelend = (kernelend + 0xffffff) & ~0xffffff; + + /*******************************************************/ + /* Set the start and end of kva. */ + /*******************************************************/ + virtual_avail = kernelend; + virtual_end = VM_MAX_KERNEL_ADDRESS; + + /* Allocate KVA space for page zero/copy operations. */ + zero_page_va = virtual_avail; + virtual_avail += PAGE_SIZE; + zero_page_idle_va = virtual_avail; + virtual_avail += PAGE_SIZE; + copy_page_src_va = virtual_avail; + virtual_avail += PAGE_SIZE; + copy_page_dst_va = virtual_avail; + virtual_avail += PAGE_SIZE; + + /* Initialize page zero/copy mutexes. */ + mtx_init(&zero_page_mutex, "mmu_booke_zero_page", NULL, MTX_DEF); + mtx_init(©_page_mutex, "mmu_booke_copy_page", NULL, MTX_DEF); + + /* Initialize tlb0 table mutex. */ + mtx_init(&tlb0_mutex, "tlb0", NULL, MTX_SPIN | MTX_RECURSE); + + /* Allocate KVA space for ptbl bufs. */ + ptbl_buf_pool_vabase = virtual_avail; + virtual_avail += PTBL_BUFS * PTBL_PAGES * PAGE_SIZE; + + debugf("ptbl_buf_pool_vabase = 0x%08x\n", ptbl_buf_pool_vabase); + debugf("virtual_avail = %08x\n", virtual_avail); + debugf("virtual_end = %08x\n", virtual_end); + + /* Calculate corresponding physical addresses for the kernel region. */ + phys_kernelend = kernload + (kernelend - kernelstart); + + debugf("kernel image and allocated data:\n"); + debugf(" kernload = 0x%08x\n", kernload); + debugf(" kernelstart = 0x%08x\n", kernelstart); + debugf(" kernelend = 0x%08x\n", kernelend); + debugf(" kernel size = 0x%08x\n", kernelend - kernelstart); + + if (sizeof(phys_avail) / sizeof(phys_avail[0]) < availmem_regions_sz) + panic("mmu_booke_bootstrap: phys_avail too small"); + + /* + * Removed kernel physical address range from avail + * regions list. Page align all regions. + * Non-page aligned memory isn't very interesting to us. + * Also, sort the entries for ascending addresses. + */ + sz = 0; + cnt = availmem_regions_sz; + debugf("processing avail regions:\n"); + for (mp = availmem_regions; mp->mr_size; mp++) { + s = mp->mr_start; + e = mp->mr_start + mp->mr_size; + debugf(" %08x-%08x -> ", s, e); + /* Check whether this region holds all of the kernel. */ + if (s < kernload && e > phys_kernelend) { + availmem_regions[cnt].mr_start = phys_kernelend; + availmem_regions[cnt++].mr_size = e - phys_kernelend; + e = kernload; + } + /* Look whether this regions starts within the kernel. */ + if (s >= kernload && s < phys_kernelend) { + if (e <= phys_kernelend) + goto empty; + s = phys_kernelend; + } + /* Now look whether this region ends within the kernel. */ + if (e > kernload && e <= phys_kernelend) { + if (s >= kernload) + goto empty; + e = kernload; + } + /* Now page align the start and size of the region. */ + s = round_page(s); + e = trunc_page(e); + if (e < s) + e = s; + sz = e - s; + debugf("%08x-%08x = %x\n", s, e, sz); + + /* Check whether some memory is left here. */ + if (sz == 0) { + empty: + memmove(mp, mp + 1, + (cnt - (mp - availmem_regions)) * sizeof(*mp)); + cnt--; + mp--; + continue; + } + + /* Do an insertion sort. */ + for (mp1 = availmem_regions; mp1 < mp; mp1++) + if (s < mp1->mr_start) + break; + if (mp1 < mp) { + memmove(mp1 + 1, mp1, (char *)mp - (char *)mp1); + mp1->mr_start = s; + mp1->mr_size = sz; + } else { + mp->mr_start = s; + mp->mr_size = sz; + } + } + availmem_regions_sz = cnt; + + /*******************************************************/ + /* Fill in phys_avail table, based on availmem_regions */ + /*******************************************************/ + phys_avail_count = 0; + physsz = 0; + hwphyssz = 0; + TUNABLE_ULONG_FETCH("hw.physmem", (u_long *) &hwphyssz); + + debugf("fill in phys_avail:\n"); + for (i = 0, j = 0; i < availmem_regions_sz; i++, j += 2) { + + debugf(" region: 0x%08x - 0x%08x (0x%08x)\n", + availmem_regions[i].mr_start, + availmem_regions[i].mr_start + availmem_regions[i].mr_size, + availmem_regions[i].mr_size); + + if (hwphyssz != 0 && (physsz + availmem_regions[i].mr_size) >= hwphyssz) { + debugf(" hw.physmem adjust\n"); + if (physsz < hwphyssz) { + phys_avail[j] = availmem_regions[i].mr_start; + phys_avail[j + 1] = availmem_regions[i].mr_start + + hwphyssz - physsz; + physsz = hwphyssz; + phys_avail_count++; + } + break; + } + + phys_avail[j] = availmem_regions[i].mr_start; + phys_avail[j + 1] = availmem_regions[i].mr_start + + availmem_regions[i].mr_size; + phys_avail_count++; + physsz += availmem_regions[i].mr_size; + } + physmem = btoc(physsz); + + /* Calculate the last available physical address. */ + for (i = 0; phys_avail[i + 2] != 0; i += 2) + ; + Maxmem = powerpc_btop(phys_avail[i + 1]); + + debugf("Maxmem = 0x%08lx\n", Maxmem); + debugf("phys_avail_count = %d\n", phys_avail_count); + debugf("physsz = 0x%08x physmem = %ld (0x%08lx)\n", physsz, physmem, physmem); + + /*******************************************************/ + /* Initialize (statically allocated) kernel pmap. */ + /*******************************************************/ + PMAP_LOCK_INIT(kernel_pmap); + kptbl_min = VM_MIN_KERNEL_ADDRESS / PDIR_SIZE; + + debugf("kernel_pmap = 0x%08x\n", (u_int32_t)kernel_pmap); + debugf("kptbl_min = %d, kernel_kptbls = %d\n", kptbl_min, kernel_ptbls); + debugf("kernel pdir range: 0x%08x - 0x%08x\n", + kptbl_min * PDIR_SIZE, (kptbl_min + kernel_ptbls) * PDIR_SIZE - 1); + + /* Initialize kernel pdir */ + for (i = 0; i < kernel_ptbls; i++) + kernel_pmap->pm_pdir[kptbl_min + i] = + (pte_t *)(kernel_pdir + (i * PAGE_SIZE * PTBL_PAGES)); + + kernel_pmap->pm_tid = KERNEL_TID; + kernel_pmap->pm_active = ~0; + + /* Initialize tidbusy with kenel_pmap entry. */ + tidbusy[0] = kernel_pmap; + + /*******************************************************/ + /* Final setup */ + /*******************************************************/ + /* Initialize TLB0 handling. */ + tlb0_init(); + + debugf("mmu_booke_bootstrap: exit\n"); +} + +/* + * Get the physical page address for the given pmap/virtual address. + */ +static vm_paddr_t +mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t pa; + + PMAP_LOCK(pmap); + pa = pte_vatopa(mmu, pmap, va); + PMAP_UNLOCK(pmap); + + return (pa); +} + +/* + * Extract the physical page address associated with the given + * kernel virtual address. + */ +static vm_paddr_t +mmu_booke_kextract(mmu_t mmu, vm_offset_t va) +{ + + return (pte_vatopa(mmu, kernel_pmap, va)); +} + +/* + * Initialize the pmap module. + * Called by vm_init, to initialize any structures that the pmap + * system needs to map virtual memory. + */ +static void +mmu_booke_init(mmu_t mmu) +{ + int shpgperproc = PMAP_SHPGPERPROC; + + //debugf("mmu_booke_init: s\n"); + + /* + * Initialize the address space (zone) for the pv entries. Set a + * high water mark so that the system can recover from excessive + * numbers of pv entries. + */ + pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); + + TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); + pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; + + TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); + pv_entry_high_water = 9 * (pv_entry_max / 10); + + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); + + /* Pre-fill pvzone with initial number of pv entries. */ + uma_prealloc(pvzone, PV_ENTRY_ZONE_MIN); + + /* Initialize ptbl allocation. */ + ptbl_init(); + + //debugf("mmu_booke_init: e\n"); +} + +/* + * Map a list of wired pages into kernel virtual address space. This is + * intended for temporary mappings which do not need page modification or + * references recorded. Existing mappings in the region are overwritten. + */ +static void +mmu_booke_qenter(mmu_t mmu, vm_offset_t sva, vm_page_t *m, int count) +{ + vm_offset_t va; + + //debugf("mmu_booke_qenter: s (sva = 0x%08x count = %d)\n", sva, count); + + va = sva; + while (count-- > 0) { + mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(*m)); + va += PAGE_SIZE; + m++; + } + + //debugf("mmu_booke_qenter: e\n"); +} + +/* + * Remove page mappings from kernel virtual address space. Intended for + * temporary mappings entered by mmu_booke_qenter. + */ +static void +mmu_booke_qremove(mmu_t mmu, vm_offset_t sva, int count) +{ + vm_offset_t va; + + //debugf("mmu_booke_qremove: s (sva = 0x%08x count = %d)\n", sva, count); + + va = sva; + while (count-- > 0) { + mmu_booke_kremove(mmu, va); + va += PAGE_SIZE; + } + + //debugf("mmu_booke_qremove: e\n"); +} + +/* + * Map a wired page into kernel virtual address space. + */ +static void +mmu_booke_kenter(mmu_t mmu, vm_offset_t va, vm_offset_t pa) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + u_int32_t flags; + pte_t *pte; + + //debugf("mmu_booke_kenter: s (pdir_idx = %d ptbl_idx = %d va=0x%08x pa=0x%08x)\n", + // pdir_idx, ptbl_idx, va, pa); + + KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)), + ("mmu_booke_kenter: invalid va")); + +#if 0 + /* assume IO mapping, set I, G bits */ + flags = (PTE_G | PTE_I | PTE_FAKE); + + /* if mapping is within system memory, do not set I, G bits */ + for (i = 0; i < totalmem_regions_sz; i++) { + if ((pa >= totalmem_regions[i].mr_start) && + (pa < (totalmem_regions[i].mr_start + + totalmem_regions[i].mr_size))) { + flags &= ~(PTE_I | PTE_G | PTE_FAKE); + break; + } + } +#else + flags = 0; +#endif + + flags |= (PTE_SR | PTE_SW | PTE_SX | PTE_WIRED | PTE_VALID); + + pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]); + + if (PTE_ISVALID(pte)) { + //debugf("mmu_booke_kenter: replacing entry!\n"); + + /* Flush entry from TLB0 */ + tlb0_flush_entry(kernel_pmap, va); + } + + pte->rpn = pa & ~PTE_PA_MASK; + pte->flags = flags; + + //debugf("mmu_booke_kenter: pdir_idx = %d ptbl_idx = %d va=0x%08x " + // "pa=0x%08x rpn=0x%08x flags=0x%08x\n", + // pdir_idx, ptbl_idx, va, pa, pte->rpn, pte->flags); + + /* Flush the real memory from the instruction cache. */ + if ((flags & (PTE_I | PTE_G)) == 0) { + __syncicache((void *)va, PAGE_SIZE); + } + + //debugf("mmu_booke_kenter: e\n"); +} + +/* + * Remove a page from kernel page table. + */ +static void +mmu_booke_kremove(mmu_t mmu, vm_offset_t va) +{ + unsigned int pdir_idx = PDIR_IDX(va); + unsigned int ptbl_idx = PTBL_IDX(va); + pte_t *pte; + + //debugf("mmu_booke_kremove: s (va = 0x%08x)\n", va); + + KASSERT(((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)), + ("mmu_booke_kremove: invalid va")); + + pte = &(kernel_pmap->pm_pdir[pdir_idx][ptbl_idx]); + + if (!PTE_ISVALID(pte)) { + //debugf("mmu_booke_kremove: e (invalid pte)\n"); + return; + } + + /* Invalidate entry in TLB0. */ + tlb0_flush_entry(kernel_pmap, va); + + pte->flags = 0; + pte->rpn = 0; + + //debugf("mmu_booke_kremove: e\n"); +} + +/* + * Initialize pmap associated with process 0. + */ +static void +mmu_booke_pinit0(mmu_t mmu, pmap_t pmap) +{ + //debugf("mmu_booke_pinit0: s (pmap = 0x%08x)\n", (u_int32_t)pmap); + mmu_booke_pinit(mmu, pmap); + PCPU_SET(curpmap, pmap); + //debugf("mmu_booke_pinit0: e\n"); +} + +/* + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. + */ +static void +mmu_booke_pinit(mmu_t mmu, pmap_t pmap) +{ + + //struct thread *td; + //struct proc *p; + + //td = PCPU_GET(curthread); + //p = td->td_proc; + //debugf("mmu_booke_pinit: s (pmap = 0x%08x)\n", (u_int32_t)pmap); + //printf("mmu_booke_pinit: proc %d '%s'\n", p->p_pid, p->p_comm); + + KASSERT((pmap != kernel_pmap), ("mmu_booke_pinit: initializing kernel_pmap")); + + PMAP_LOCK_INIT(pmap); + pmap->pm_tid = 0; + pmap->pm_active = 0; + bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); + bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); + + TAILQ_INIT(&pmap->ptbl_list); + + //debugf("mmu_booke_pinit: e\n"); +} + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by mmu_booke_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +static void +mmu_booke_release(mmu_t mmu, pmap_t pmap) +{ + + //debugf("mmu_booke_release: s\n"); + + PMAP_LOCK_DESTROY(pmap); + + //debugf("mmu_booke_release: e\n"); +} + +#if 0 +/* Not needed, kernel page tables are statically allocated. */ +void +mmu_booke_growkernel(vm_offset_t maxkvaddr) +{ +} +#endif + +/* + * Insert the given physical page at the specified virtual address in the + * target physical map with the protection requested. If specified the page + * will be wired down. + */ +static void +mmu_booke_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, boolean_t wired) +{ + vm_page_lock_queues(); + PMAP_LOCK(pmap); + mmu_booke_enter_locked(mmu, pmap, va, m, prot, wired); + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +static void +mmu_booke_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, boolean_t wired) +{ + pte_t *pte; + vm_paddr_t pa; + u_int32_t flags; + int su, sync; + + pa = VM_PAGE_TO_PHYS(m); + su = (pmap == kernel_pmap); + sync = 0; + + //debugf("mmu_booke_enter_locked: s (pmap=0x%08x su=%d tid=%d m=0x%08x va=0x%08x " + // "pa=0x%08x prot=0x%08x wired=%d)\n", + // (u_int32_t)pmap, su, pmap->pm_tid, + // (u_int32_t)m, va, pa, prot, wired); + + if (su) { + KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)), + ("mmu_booke_enter_locked: kernel pmap, non kernel va")); + } else { + KASSERT((va <= VM_MAXUSER_ADDRESS), + ("mmu_booke_enter_locked: user pmap, non user va")); + } + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * If there is an existing mapping, and the physical address has not + * changed, must be protection or wiring change. + */ + if (((pte = pte_find(mmu, pmap, va)) != NULL) && + (PTE_ISVALID(pte)) && (PTE_PA(pte) == pa)) { + + //debugf("mmu_booke_enter_locked: update\n"); + + /* Wiring change, just update stats. */ + if (wired) { + if (!PTE_ISWIRED(pte)) { + pte->flags |= PTE_WIRED; + pmap->pm_stats.wired_count++; + } + } else { + if (PTE_ISWIRED(pte)) { + pte->flags &= ~PTE_WIRED; + pmap->pm_stats.wired_count--; + } + } + + /* Save the old bits and clear the ones we're interested in. */ + flags = pte->flags; + pte->flags &= ~(PTE_UW | PTE_UX | PTE_SW | PTE_SX | PTE_MODIFIED); + + if (prot & VM_PROT_WRITE) { + /* Add write permissions. */ + pte->flags |= PTE_SW; + if (!su) + pte->flags |= PTE_UW; + } else { + /* Handle modified pages, sense modify status. */ + if (PTE_ISMODIFIED(pte)) { + if (track_modified_needed(pmap, va)) + vm_page_dirty(m); + } + } + + /* If we're turning on execute permissions, flush the icache. */ + if (prot & VM_PROT_EXECUTE) { + pte->flags |= PTE_SX; + if (!su) + pte->flags |= PTE_UX; + + if ((flags & (PTE_UX | PTE_SX)) == 0) + sync++; + } + + /* Flush the old mapping from TLB0. */ + pte->flags &= ~PTE_REFERENCED; + tlb0_flush_entry(pmap, va); + } else { + /* + * If there is an existing mapping, but its for a different + * physical address, pte_enter() will delete the old mapping. + */ + //if ((pte != NULL) && PTE_ISVALID(pte)) + // debugf("mmu_booke_enter_locked: replace\n"); + //else + // debugf("mmu_booke_enter_locked: new\n"); + + /* Now set up the flags and install the new mapping. */ + flags = (PTE_SR | PTE_VALID); + + if (!su) + flags |= PTE_UR; + + if (prot & VM_PROT_WRITE) { + flags |= PTE_SW; + if (!su) + flags |= PTE_UW; + } + + if (prot & VM_PROT_EXECUTE) { + flags |= PTE_SX; + if (!su) + flags |= PTE_UX; + } + + /* If its wired update stats. */ + if (wired) { + pmap->pm_stats.wired_count++; + flags |= PTE_WIRED; + } + + pte_enter(mmu, pmap, m, va, flags); + + /* Flush the real memory from the instruction cache. */ + if (prot & VM_PROT_EXECUTE) + sync++; + } + + if (sync && (su || pmap == PCPU_GET(curpmap))) { + __syncicache((void *)va, PAGE_SIZE); + sync = 0; + } + + if (sync) { + /* Create a temporary mapping. */ + pmap = PCPU_GET(curpmap); + + va = 0; + pte = pte_find(mmu, pmap, va); + KASSERT(pte == NULL, ("%s:%d", __func__, __LINE__)); + + flags = PTE_SR | PTE_VALID | PTE_UR; + pte_enter(mmu, pmap, m, va, flags); + __syncicache((void *)va, PAGE_SIZE); + pte_remove(mmu, pmap, va, PTBL_UNHOLD); + } + + //debugf("mmu_booke_enter_locked: e\n"); +} + +/* + * Maps a sequence of resident pages belonging to the same object. + * The sequence begins with the given page m_start. This page is + * mapped at the given virtual address start. Each subsequent page is + * mapped at a virtual address that is offset from start by the same + * amount as the page is offset from m_start within the object. The + * last page in the sequence is the page with the largest offset from + * m_start that can be mapped at a virtual address less than the given + * virtual address end. Not every virtual page between start and end + * is mapped; only those for which a resident page exists with the + * corresponding offset from m_start are mapped. + */ +static void +mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, + vm_offset_t end, vm_page_t m_start, vm_prot_t prot) +{ + vm_page_t m; + vm_pindex_t diff, psize; + + psize = atop(end - start); + m = m_start; + PMAP_LOCK(pmap); + while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { + mmu_booke_enter_locked(mmu, pmap, start + ptoa(diff), m, prot & + (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); + m = TAILQ_NEXT(m, listq); + } + PMAP_UNLOCK(pmap); +} + +static void +mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot) +{ + + //debugf("mmu_booke_enter_quick: s\n"); + + PMAP_LOCK(pmap); + mmu_booke_enter_locked(mmu, pmap, va, m, + prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); + PMAP_UNLOCK(pmap); + + //debugf("mmu_booke_enter_quick e\n"); +} + +/* + * Remove the given range of addresses from the specified map. + * + * It is assumed that the start and end are properly rounded to the page size. + */ +static void +mmu_booke_remove(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t endva) +{ + pte_t *pte; + u_int8_t hold_flag; + + int su = (pmap == kernel_pmap); + + //debugf("mmu_booke_remove: s (su = %d pmap=0x%08x tid=%d va=0x%08x endva=0x%08x)\n", + // su, (u_int32_t)pmap, pmap->pm_tid, va, endva); + + if (su) { + KASSERT(((va >= virtual_avail) && (va <= VM_MAX_KERNEL_ADDRESS)), + ("mmu_booke_enter: kernel pmap, non kernel va")); + } else { + KASSERT((va <= VM_MAXUSER_ADDRESS), + ("mmu_booke_enter: user pmap, non user va")); + } + + if (PMAP_REMOVE_DONE(pmap)) { + //debugf("mmu_booke_remove: e (empty)\n"); + return; + } + + hold_flag = PTBL_HOLD_FLAG(pmap); + //debugf("mmu_booke_remove: hold_flag = %d\n", hold_flag); + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + for (; va < endva; va += PAGE_SIZE) { + pte = pte_find(mmu, pmap, va); + if ((pte != NULL) && PTE_ISVALID(pte)) { + pte_remove(mmu, pmap, va, hold_flag); + + /* Flush mapping from TLB0. */ + tlb0_flush_entry(pmap, va); + } + } + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); + + //debugf("mmu_booke_remove: e\n"); +} + +/* + * Remove physical page from all pmaps in which it resides. + */ +static void +mmu_booke_remove_all(mmu_t mmu, vm_page_t m) +{ + pv_entry_t pv, pvn; + u_int8_t hold_flag; + + //debugf("mmu_booke_remove_all: s\n"); + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + + for (pv = TAILQ_FIRST(&m->md.pv_list); pv != NULL; pv = pvn) { + pvn = TAILQ_NEXT(pv, pv_link); + + PMAP_LOCK(pv->pv_pmap); + hold_flag = PTBL_HOLD_FLAG(pv->pv_pmap); + pte_remove(mmu, pv->pv_pmap, pv->pv_va, hold_flag); + + /* Flush mapping from TLB0. */ + tlb0_flush_entry(pv->pv_pmap, pv->pv_va); + PMAP_UNLOCK(pv->pv_pmap); + } + vm_page_flag_clear(m, PG_WRITEABLE); + + //debugf("mmu_booke_remove_all: e\n"); +} + +/* + * Map a range of physical addresses into kernel virtual address space. + * + * The value passed in *virt is a suggested virtual address for the mapping. + * Architectures which can support a direct-mapped physical to virtual region + * can return the appropriate address within that region, leaving '*virt' + * unchanged. We cannot and therefore do not; *virt is updated with the + * first usable address after the mapped region. + */ +static vm_offset_t +mmu_booke_map(mmu_t mmu, vm_offset_t *virt, vm_offset_t pa_start, + vm_offset_t pa_end, int prot) +{ + vm_offset_t sva = *virt; + vm_offset_t va = sva; + + //debugf("mmu_booke_map: s (sva = 0x%08x pa_start = 0x%08x pa_end = 0x%08x)\n", + // sva, pa_start, pa_end); + + while (pa_start < pa_end) { + mmu_booke_kenter(mmu, va, pa_start); + va += PAGE_SIZE; + pa_start += PAGE_SIZE; + } + *virt = va; + + //debugf("mmu_booke_map: e (va = 0x%08x)\n", va); + return (sva); +} + +/* + * The pmap must be activated before it's address space can be accessed in any + * way. + */ +static void +mmu_booke_activate(mmu_t mmu, struct thread *td) +{ + pmap_t pmap; + + pmap = &td->td_proc->p_vmspace->vm_pmap; + + //debugf("mmu_booke_activate: s (proc = '%s', id = %d, pmap = 0x%08x)\n", + // td->td_proc->p_comm, td->td_proc->p_pid, pmap); + + KASSERT((pmap != kernel_pmap), ("mmu_booke_activate: kernel_pmap!")); + + mtx_lock_spin(&sched_lock); + + pmap->pm_active |= PCPU_GET(cpumask); + PCPU_SET(curpmap, pmap); + + if (!pmap->pm_tid) + tid_alloc(pmap); + + /* Load PID0 register with pmap tid value. */ + load_pid0(pmap->pm_tid); + + mtx_unlock_spin(&sched_lock); + + //debugf("mmu_booke_activate: e (tid = %d for '%s')\n", pmap->pm_tid, + // td->td_proc->p_comm); +} + +/* + * Deactivate the specified process's address space. + */ +static void +mmu_booke_deactivate(mmu_t mmu, struct thread *td) +{ + pmap_t pmap; + + pmap = &td->td_proc->p_vmspace->vm_pmap; + pmap->pm_active &= ~(PCPU_GET(cpumask)); + PCPU_SET(curpmap, NULL); +} + +/* + * Copy the range specified by src_addr/len + * from the source map to the range dst_addr/len + * in the destination map. + * + * This routine is only advisory and need not do anything. + */ +static void +mmu_booke_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, + vm_size_t len, vm_offset_t src_addr) +{ + +} + +/* + * Set the physical protection on the specified range of this map as requested. + */ +static void +mmu_booke_protect(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + vm_prot_t prot) +{ + vm_offset_t va; + vm_page_t m; + pte_t *pte; + + if ((prot & VM_PROT_READ) == VM_PROT_NONE) { + mmu_booke_remove(mmu, pmap, sva, eva); + return; + } + + if (prot & VM_PROT_WRITE) + return; + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + for (va = sva; va < eva; va += PAGE_SIZE) { + if ((pte = pte_find(mmu, pmap, va)) != NULL) { + if (PTE_ISVALID(pte)) { + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + /* Handle modified pages. */ + if (PTE_ISMODIFIED(pte)) { + if (track_modified_needed(pmap, va)) + vm_page_dirty(m); + } + + /* Referenced pages. */ + if (PTE_ISREFERENCED(pte)) + vm_page_flag_set(m, PG_REFERENCED); + + /* Flush mapping from TLB0. */ + pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED | + PTE_REFERENCED); + tlb0_flush_entry(pmap, va); + } + } + } + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); +} + +/* + * Clear the write and modified bits in each of the given page's mappings. + */ +static void +mmu_booke_remove_write(mmu_t mmu, vm_page_t m) +{ + pv_entry_t pv; + pte_t *pte; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0 || + (m->flags & PG_WRITEABLE) == 0) + return; + + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { + if (PTE_ISVALID(pte)) { + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + + /* Handle modified pages. */ + if (PTE_ISMODIFIED(pte)) { + if (track_modified_needed(pv->pv_pmap, + pv->pv_va)) + vm_page_dirty(m); + } + + /* Referenced pages. */ + if (PTE_ISREFERENCED(pte)) + vm_page_flag_set(m, PG_REFERENCED); + + /* Flush mapping from TLB0. */ + pte->flags &= ~(PTE_UW | PTE_SW | PTE_MODIFIED | + PTE_REFERENCED); + tlb0_flush_entry(pv->pv_pmap, pv->pv_va); + } + } + PMAP_UNLOCK(pv->pv_pmap); + } + vm_page_flag_clear(m, PG_WRITEABLE); +} + +static boolean_t +mmu_booke_page_executable(mmu_t mmu, vm_page_t m) +{ + pv_entry_t pv; + pte_t *pte; + boolean_t executable; + + executable = FALSE; + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + pte = pte_find(mmu, pv->pv_pmap, pv->pv_va); + if (pte != NULL && PTE_ISVALID(pte) && (pte->flags & PTE_UX)) + executable = TRUE; + PMAP_UNLOCK(pv->pv_pmap); + if (executable) + break; + } + + return (executable); +} + +/* + * Atomically extract and hold the physical page with the given + * pmap and virtual address pair if that mapping permits the given + * protection. + */ +static vm_page_t +mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, + vm_prot_t prot) +{ + pte_t *pte; + vm_page_t m; + u_int32_t pte_wbit; + + m = NULL; + vm_page_lock_queues(); + PMAP_LOCK(pmap); + pte = pte_find(mmu, pmap, va); + + if ((pte != NULL) && PTE_ISVALID(pte)) { + if (pmap == kernel_pmap) + pte_wbit = PTE_SW; + else + pte_wbit = PTE_UW; + + if ((pte->flags & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { + m = PHYS_TO_VM_PAGE(PTE_PA(pte)); + vm_page_hold(m); + } + } + + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); + return (m); +} + +/* + * Initialize a vm_page's machine-dependent fields. + */ +static void +mmu_booke_page_init(mmu_t mmu, vm_page_t m) +{ + + TAILQ_INIT(&m->md.pv_list); +} + +/* + * mmu_booke_zero_page_area zeros the specified hardware page by + * mapping it into virtual memory and using bzero to clear + * its contents. + * + * off and size must reside within a single page. + */ +static void +mmu_booke_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) +{ + vm_offset_t va; + + //debugf("mmu_booke_zero_page_area: s\n"); + + mtx_lock(&zero_page_mutex); + va = zero_page_va; + + mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); + bzero((caddr_t)va + off, size); + mmu_booke_kremove(mmu, va); + + mtx_unlock(&zero_page_mutex); + + //debugf("mmu_booke_zero_page_area: e\n"); +} + +/* + * mmu_booke_zero_page zeros the specified hardware page. + */ +static void +mmu_booke_zero_page(mmu_t mmu, vm_page_t m) +{ + + //debugf("mmu_booke_zero_page: s\n"); + mmu_booke_zero_page_area(mmu, m, 0, PAGE_SIZE); + //debugf("mmu_booke_zero_page: e\n"); +} + +/* + * mmu_booke_copy_page copies the specified (machine independent) page by + * mapping the page into virtual memory and using memcopy to copy the page, + * one machine dependent page at a time. + */ +static void +mmu_booke_copy_page(mmu_t mmu, vm_page_t sm, vm_page_t dm) +{ + vm_offset_t sva, dva; + + //debugf("mmu_booke_copy_page: s\n"); + + mtx_lock(©_page_mutex); + sva = copy_page_src_va; + dva = copy_page_dst_va; + + mmu_booke_kenter(mmu, sva, VM_PAGE_TO_PHYS(sm)); + mmu_booke_kenter(mmu, dva, VM_PAGE_TO_PHYS(dm)); + memcpy((caddr_t)dva, (caddr_t)sva, PAGE_SIZE); + mmu_booke_kremove(mmu, dva); + mmu_booke_kremove(mmu, sva); + + mtx_unlock(©_page_mutex); + + //debugf("mmu_booke_copy_page: e\n"); +} + +#if 0 +/* + * Remove all pages from specified address space, this aids process exit + * speeds. This is much faster than mmu_booke_remove in the case of running + * down an entire address space. Only works for the current pmap. + */ +void +mmu_booke_remove_pages(pmap_t pmap) +{ +} +#endif + +/* + * mmu_booke_zero_page_idle zeros the specified hardware page by mapping it + * into virtual memory and using bzero to clear its contents. This is intended + * to be called from the vm_pagezero process only and outside of Giant. No + * lock is required. + */ +static void +mmu_booke_zero_page_idle(mmu_t mmu, vm_page_t m) +{ + vm_offset_t va; + + //debugf("mmu_booke_zero_page_idle: s\n"); + + va = zero_page_idle_va; + mmu_booke_kenter(mmu, va, VM_PAGE_TO_PHYS(m)); + bzero((caddr_t)va, PAGE_SIZE); + mmu_booke_kremove(mmu, va); + + //debugf("mmu_booke_zero_page_idle: e\n"); +} + +/* + * Return whether or not the specified physical page was modified + * in any of physical maps. + */ +static boolean_t +mmu_booke_is_modified(mmu_t mmu, vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return (FALSE); + + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { + if (!PTE_ISVALID(pte)) + goto make_sure_to_unlock; + + if (!track_modified_needed(pv->pv_pmap, pv->pv_va)) + goto make_sure_to_unlock; + + if (PTE_ISMODIFIED(pte)) { + PMAP_UNLOCK(pv->pv_pmap); + return (TRUE); + } + } +make_sure_to_unlock: + PMAP_UNLOCK(pv->pv_pmap); + } + return (FALSE); +} + +/* + * Return whether or not the specified virtual address is elgible + * for prefault. + */ +static boolean_t +mmu_booke_is_prefaultable(mmu_t mmu, pmap_t pmap, vm_offset_t addr) +{ + + return (FALSE); +} + +/* + * Clear the modify bits on the specified physical page. + */ +static void +mmu_booke_clear_modify(mmu_t mmu, vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return; + + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { + if (!PTE_ISVALID(pte)) + goto make_sure_to_unlock; + + if (pte->flags & (PTE_SW | PTE_UW | PTE_MODIFIED)) { + pte->flags &= ~(PTE_SW | PTE_UW | PTE_MODIFIED | + PTE_REFERENCED); + tlb0_flush_entry(pv->pv_pmap, pv->pv_va); + } + } +make_sure_to_unlock: + PMAP_UNLOCK(pv->pv_pmap); + } +} + +/* + * Return a count of reference bits for a page, clearing those bits. + * It is not necessary for every reference bit to be cleared, but it + * is necessary that 0 only be returned when there are truly no + * reference bits set. + * + * XXX: The exact number of bits to check and clear is a matter that + * should be tested and standardized at some point in the future for + * optimal aging of shared pages. + */ +static int +mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + int count; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return (0); + + count = 0; + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { + if (!PTE_ISVALID(pte)) + goto make_sure_to_unlock; + + if (!track_modified_needed(pv->pv_pmap, pv->pv_va)) + goto make_sure_to_unlock; + + if (PTE_ISREFERENCED(pte)) { + pte->flags &= ~PTE_REFERENCED; + tlb0_flush_entry(pv->pv_pmap, pv->pv_va); + + if (++count > 4) { + PMAP_UNLOCK(pv->pv_pmap); + break; + } + } + } +make_sure_to_unlock: + PMAP_UNLOCK(pv->pv_pmap); + } + return (count); +} + +/* + * Clear the reference bit on the specified physical page. + */ +static void +mmu_booke_clear_reference(mmu_t mmu, vm_page_t m) +{ + pte_t *pte; + pv_entry_t pv; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return; + + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) { + if (!PTE_ISVALID(pte)) + goto make_sure_to_unlock; + + if (PTE_ISREFERENCED(pte)) { + pte->flags &= ~PTE_REFERENCED; + tlb0_flush_entry(pv->pv_pmap, pv->pv_va); + } + } +make_sure_to_unlock: + PMAP_UNLOCK(pv->pv_pmap); + } +} + +/* + * Change wiring attribute for a map/virtual-address pair. + */ +static void +mmu_booke_change_wiring(mmu_t mmu, pmap_t pmap, vm_offset_t va, boolean_t wired) +{ + pte_t *pte;; + + PMAP_LOCK(pmap); + if ((pte = pte_find(mmu, pmap, va)) != NULL) { + if (wired) { + if (!PTE_ISWIRED(pte)) { + pte->flags |= PTE_WIRED; + pmap->pm_stats.wired_count++; + } + } else { + if (PTE_ISWIRED(pte)) { + pte->flags &= ~PTE_WIRED; + pmap->pm_stats.wired_count--; + } + } + } + PMAP_UNLOCK(pmap); +} + +/* + * Return true if the pmap's pv is one of the first 16 pvs linked to from this + * page. This count may be changed upwards or downwards in the future; it is + * only necessary that true be returned for a small subset of pmaps for proper + * page aging. + */ +static boolean_t +mmu_booke_page_exists_quick(mmu_t mmu, pmap_t pmap, vm_page_t m) +{ + pv_entry_t pv; + int loops; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) + return (FALSE); + + loops = 0; + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + + if (pv->pv_pmap == pmap) + return (TRUE); + + if (++loops >= 16) + break; + } + return (FALSE); +} + +/* + * Return the number of managed mappings to the given physical page that are + * wired. + */ +static int +mmu_booke_page_wired_mappings(mmu_t mmu, vm_page_t m) +{ + pv_entry_t pv; + pte_t *pte; + int count = 0; + + if ((m->flags & PG_FICTITIOUS) != 0) + return (count); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + + TAILQ_FOREACH(pv, &m->md.pv_list, pv_link) { + PMAP_LOCK(pv->pv_pmap); + if ((pte = pte_find(mmu, pv->pv_pmap, pv->pv_va)) != NULL) + if (PTE_ISVALID(pte) && PTE_ISWIRED(pte)) + count++; + PMAP_UNLOCK(pv->pv_pmap); + } + + return (count); +} + +static int +mmu_booke_dev_direct_mapped(mmu_t mmu, vm_offset_t pa, vm_size_t size) +{ + int i; + vm_offset_t va; + + /* + * This currently does not work for entries that + * overlap TLB1 entries. + */ + for (i = 0; i < tlb1_idx; i ++) { + if (tlb1_iomapped(i, pa, size, &va) == 0) + return (0); + } + + return (EFAULT); +} + +/* + * Map a set of physical memory pages into the kernel virtual address space. + * Return a pointer to where it is mapped. This routine is intended to be used + * for mapping device memory, NOT real memory. + */ +static void * +mmu_booke_mapdev(mmu_t mmu, vm_offset_t pa, vm_size_t size) +{ + uintptr_t va; + + va = (pa >= 0x80000000) ? pa : (0xe2000000 + pa); + if (bootverbose) + printf("Wiring VA=%x to PA=%x (size=%x), using TLB1[%d]\n", + va, pa, size, tlb1_idx); + tlb1_set_entry(va, pa, size, _TLB_ENTRY_IO); + return ((void *)va); +} + +/* + * 'Unmap' a range mapped by mmu_booke_mapdev(). + */ +static void +mmu_booke_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) +{ + vm_offset_t base, offset; + + //debugf("mmu_booke_unmapdev: s (va = 0x%08x)\n", va); + + /* + * Unmap only if this is inside kernel virtual space. + */ + if ((va >= VM_MIN_KERNEL_ADDRESS) && (va <= VM_MAX_KERNEL_ADDRESS)) { + base = trunc_page(va); + offset = va & PAGE_MASK; + size = roundup(offset + size, PAGE_SIZE); + kmem_free(kernel_map, base, size); + } + + //debugf("mmu_booke_unmapdev: e\n"); +} + +/* + * mmu_booke_object_init_pt preloads the ptes for a given object + * into the specified pmap. This eliminates the blast of soft + * faults on process startup and immediately after an mmap. + */ +static void +mmu_booke_object_init_pt(mmu_t mmu, pmap_t pmap, vm_offset_t addr, + vm_object_t object, vm_pindex_t pindex, vm_size_t size) +{ + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + KASSERT(object->type == OBJT_DEVICE, + ("mmu_booke_object_init_pt: non-device object")); +} + +/* + * Perform the pmap work for mincore. + */ +static int +mmu_booke_mincore(mmu_t mmu, pmap_t pmap, vm_offset_t addr) +{ + + TODO; + return (0); +} + +static vm_offset_t +mmu_booke_addr_hint(mmu_t mmu, vm_object_t object, vm_offset_t va, + vm_size_t size) +{ + + return (va); +} + +/**************************************************************************/ +/* TID handling */ +/**************************************************************************/ +/* + * Flush all entries from TLB0 matching given tid. + */ +static void +tid_flush(tlbtid_t tid) +{ + int i, entryidx, way; + + //debugf("tid_flush: s (tid = %d)\n", tid); + + mtx_lock_spin(&tlb0_mutex); + + for (i = 0; i < TLB0_SIZE; i++) { + if (MAS1_GETTID(tlb0[i].mas1) == tid) { + way = i / TLB0_ENTRIES_PER_WAY; + entryidx = i - (way * TLB0_ENTRIES_PER_WAY); + + //debugf("tid_flush: inval tlb0 entry %d\n", i); + tlb0_inval_entry(entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT, way); + } + } + + mtx_unlock_spin(&tlb0_mutex); + + //debugf("tid_flush: e\n"); +} + +/* + * Allocate a TID. If necessary, steal one from someone else. + * The new TID is flushed from the TLB before returning. + */ +static tlbtid_t +tid_alloc(pmap_t pmap) +{ + tlbtid_t tid; + static tlbtid_t next_tid = TID_MIN; + + //struct thread *td; + //struct proc *p; + + //td = PCPU_GET(curthread); + //p = td->td_proc; + //debugf("tid_alloc: s (pmap = 0x%08x)\n", (u_int32_t)pmap); + //printf("tid_alloc: proc %d '%s'\n", p->p_pid, p->p_comm); + + KASSERT((pmap != kernel_pmap), ("tid_alloc: kernel pmap")); + + /* + * Find a likely TID, allocate unused if possible, + * skip reserved entries. + */ + tid = next_tid; + while (tidbusy[tid] != NULL) { + if (tid == next_tid) + break; + + if (tid == TID_MAX) + tid = TID_MIN; + else + tid++; + + } + + /* Now clean it out */ + tid_flush(tid); + + /* If we are stealing pmap then clear its tid */ + if (tidbusy[tid]) { + //debugf("warning: stealing tid %d\n", tid); + tidbusy[tid]->pm_tid = 0; + } + + /* Calculate next tid */ + if (tid == TID_MAX) + next_tid = TID_MIN; + else + next_tid = tid + 1; + + tidbusy[tid] = pmap; + pmap->pm_tid = tid; + + //debugf("tid_alloc: e (%02d next = %02d)\n", tid, next_tid); + return (tid); +} + +#if 0 +/* + * Free this pmap's TID. + */ +static void +tid_free(pmap_t pmap) +{ + tlbtid_t oldtid; + + oldtid = pmap->pm_tid; + + if (oldtid == 0) { + panic("tid_free: freeing kernel tid"); + } + +#ifdef DEBUG + if (tidbusy[oldtid] == 0) + debugf("tid_free: freeing free tid %d\n", oldtid); + if (tidbusy[oldtid] != pmap) { + debugf("tid_free: freeing someone esle's tid\n " + "tidbusy[%d] = 0x%08x pmap = 0x%08x\n", + oldtid, (u_int32_t)tidbusy[oldtid], (u_int32_t)pmap); + } +#endif + + tidbusy[oldtid] = NULL; + tid_flush(oldtid); +} +#endif + +#if 0 +#if DEBUG +static void +tid_print_busy(void) +{ + int i; + + for (i = 0; i < TID_MAX; i++) { + debugf("tid %d = pmap 0x%08x", i, (u_int32_t)tidbusy[i]); + if (tidbusy[i]) + debugf(" pmap->tid = %d", tidbusy[i]->pm_tid); + debugf("\n"); + } + +} +#endif /* DEBUG */ +#endif + +/**************************************************************************/ +/* TLB0 handling */ +/**************************************************************************/ + +static void +tlb_print_entry(int i, u_int32_t mas1, u_int32_t mas2, u_int32_t mas3, u_int32_t mas7) +{ + int as; + char desc[3]; + tlbtid_t tid; + vm_size_t size; + unsigned int tsize; + + desc[2] = '\0'; + if (mas1 & MAS1_VALID) + desc[0] = 'V'; + else + desc[0] = ' '; + + if (mas1 & MAS1_IPROT) + desc[1] = 'P'; + else + desc[1] = ' '; + + as = (mas1 & MAS1_TS) ? 1 : 0; + tid = MAS1_GETTID(mas1); + + tsize = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; + size = 0; + if (tsize) + size = tsize2size(tsize); + + debugf("%3d: (%s) [AS=%d] " + "sz = 0x%08x tsz = %d tid = %d mas1 = 0x%08x " + "mas2(va) = 0x%08x mas3(pa) = 0x%08x mas7 = 0x%08x\n", + i, desc, as, size, tsize, tid, mas1, mas2, mas3, mas7); +} + +/* Convert TLB0 va and way number to tlb0[] table index. */ +static inline unsigned int +tlb0_tableidx(vm_offset_t va, unsigned int way) +{ + unsigned int idx; + + idx = (way * TLB0_ENTRIES_PER_WAY); + idx += (va & MAS2_TLB0_ENTRY_IDX_MASK) >> MAS2_TLB0_ENTRY_IDX_SHIFT; + return (idx); +} + +/* + * Write given entry to TLB0 hardware. + * Use 32 bit pa, clear 4 high-order bits of RPN (mas7). + */ +static void +tlb0_write_entry(unsigned int idx, unsigned int way) +{ + u_int32_t mas0, mas7, nv; + + /* Clear high order RPN bits. */ + mas7 = 0; + + /* Preserve NV. */ + mas0 = mfspr(SPR_MAS0); + nv = mas0 & (TLB0_NWAYS - 1); + + /* Select entry. */ + mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way) | nv; + + //debugf("tlb0_write_entry: s (idx=%d way=%d mas0=0x%08x " + // "mas1=0x%08x mas2=0x%08x mas3=0x%08x)\n", + // idx, way, mas0, tlb0[idx].mas1, + // tlb0[idx].mas2, tlb0[idx].mas3); + + mtspr(SPR_MAS0, mas0); + __asm volatile("isync"); + mtspr(SPR_MAS1, tlb0[idx].mas1); + __asm volatile("isync"); + mtspr(SPR_MAS2, tlb0[idx].mas2); + __asm volatile("isync"); + mtspr(SPR_MAS3, tlb0[idx].mas3); + __asm volatile("isync"); + mtspr(SPR_MAS7, mas7); + __asm volatile("isync; tlbwe; isync; msync"); + + //debugf("tlb0_write_entry: e\n"); +} + +/* + * Invalidate TLB0 entry, clear correspondig tlb0 table element. + */ +static void +tlb0_inval_entry(vm_offset_t va, unsigned int way) +{ + int idx = tlb0_tableidx(va, way); + + //debugf("tlb0_inval_entry: s (va=0x%08x way=%d idx=%d)\n", + // va, way, idx); + + tlb0[idx].mas1 = 1 << MAS1_TSIZE_SHIFT; /* !MAS1_VALID */ + tlb0[idx].mas2 = va & MAS2_EPN; + tlb0[idx].mas3 = 0; + + tlb0_write_entry(idx, way); + + //debugf("tlb0_inval_entry: e\n"); +} + +/* + * Invalidate TLB0 entry that corresponds to pmap/va. + */ +static void +tlb0_flush_entry(pmap_t pmap, vm_offset_t va) +{ + int idx, way; + + //debugf("tlb0_flush_entry: s (pmap=0x%08x va=0x%08x)\n", + // (u_int32_t)pmap, va); + + mtx_lock_spin(&tlb0_mutex); + + /* Check all TLB0 ways. */ + for (way = 0; way < TLB0_NWAYS; way ++) { + idx = tlb0_tableidx(va, way); + + /* Invalidate only if entry matches va and pmap tid. */ + if (((MAS1_GETTID(tlb0[idx].mas1) == pmap->pm_tid) && + ((tlb0[idx].mas2 & MAS2_EPN) == va))) { + tlb0_inval_entry(va, way); + } + } + + mtx_unlock_spin(&tlb0_mutex); + + //debugf("tlb0_flush_entry: e\n"); +} + +/* Clean TLB0 hardware and tlb0[] table. */ +static void +tlb0_init(void) +{ + int entryidx, way; + + debugf("tlb0_init: TLB0_SIZE = %d TLB0_NWAYS = %d\n", + TLB0_SIZE, TLB0_NWAYS); + + mtx_lock_spin(&tlb0_mutex); + + for (way = 0; way < TLB0_NWAYS; way ++) { + for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { + tlb0_inval_entry(entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT, way); + } + } + + mtx_unlock_spin(&tlb0_mutex); +} + +#if 0 +#if DEBUG +/* Print out tlb0 entries for given va. */ +static void +tlb0_print_tlbentries_va(vm_offset_t va) +{ + u_int32_t mas0, mas1, mas2, mas3, mas7; + int way, idx; + + debugf("TLB0 entries for va = 0x%08x:\n", va); + for (way = 0; way < TLB0_NWAYS; way ++) { + mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); + mtspr(SPR_MAS0, mas0); + __asm volatile("isync"); + + mas2 = va & MAS2_EPN; + mtspr(SPR_MAS2, mas2); + __asm volatile("isync; tlbre"); + + mas1 = mfspr(SPR_MAS1); + mas2 = mfspr(SPR_MAS2); + mas3 = mfspr(SPR_MAS3); + mas7 = mfspr(SPR_MAS7); + + idx = tlb0_tableidx(va, way); + tlb_print_entry(idx, mas1, mas2, mas3, mas7); + } +} + +/* Print out contents of the MAS registers for each TLB0 entry */ +static void +tlb0_print_tlbentries(void) +{ + u_int32_t mas0, mas1, mas2, mas3, mas7; + int entryidx, way, idx; + + debugf("TLB0 entries:\n"); + for (way = 0; way < TLB0_NWAYS; way ++) { + for (entryidx = 0; entryidx < TLB0_ENTRIES_PER_WAY; entryidx++) { + + mas0 = MAS0_TLBSEL(0) | MAS0_ESEL(way); + mtspr(SPR_MAS0, mas0); + __asm volatile("isync"); + + mas2 = entryidx << MAS2_TLB0_ENTRY_IDX_SHIFT; + mtspr(SPR_MAS2, mas2); + + __asm volatile("isync; tlbre"); + + mas1 = mfspr(SPR_MAS1); + mas2 = mfspr(SPR_MAS2); + mas3 = mfspr(SPR_MAS3); + mas7 = mfspr(SPR_MAS7); + + idx = tlb0_tableidx(mas2, way); + tlb_print_entry(idx, mas1, mas2, mas3, mas7); + } + } +} + +/* Print out kernel tlb0[] table. */ +static void +tlb0_print_entries(void) +{ + int i; + + debugf("tlb0[] table entries:\n"); + for (i = 0; i < TLB0_SIZE; i++) { + tlb_print_entry(i, tlb0[i].mas1, + tlb0[i].mas2, tlb0[i].mas3, 0); + } +} +#endif /* DEBUG */ +#endif + +/**************************************************************************/ +/* TLB1 handling */ +/**************************************************************************/ +/* + * Write given entry to TLB1 hardware. + * Use 32 bit pa, clear 4 high-order bits of RPN (mas7). + */ +static void +tlb1_write_entry(unsigned int idx) +{ + u_int32_t mas0, mas7; + + //debugf("tlb1_write_entry: s\n"); + + /* Clear high order RPN bits */ + mas7 = 0; + + /* Select entry */ + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(idx); + //debugf("tlb1_write_entry: mas0 = 0x%08x\n", mas0); + + mtspr(SPR_MAS0, mas0); + __asm volatile("isync"); + mtspr(SPR_MAS1, tlb1[idx].mas1); + __asm volatile("isync"); + mtspr(SPR_MAS2, tlb1[idx].mas2); + __asm volatile("isync"); + mtspr(SPR_MAS3, tlb1[idx].mas3); + __asm volatile("isync"); + mtspr(SPR_MAS7, mas7); + __asm volatile("isync; tlbwe; isync; msync"); + + //debugf("tlb1_write_entry: e\n");; +} + +/* + * Return the largest uint value log such that 2^log <= num. + */ +static unsigned int +ilog2(unsigned int num) +{ + int lz; + + __asm ("cntlzw %0, %1" : "=r" (lz) : "r" (num)); + return (31 - lz); +} + +/* + * Convert TLB TSIZE value to mapped region size. + */ +static vm_size_t +tsize2size(unsigned int tsize) +{ + + /* + * size = 4^tsize KB + * size = 4^tsize * 2^10 = 2^(2 * tsize - 10) + */ + + return ((1 << (2 * tsize)) * 1024); +} + +/* + * Convert region size (must be power of 4) to TLB TSIZE value. + */ +static unsigned int +size2tsize(vm_size_t size) +{ + + /* + * tsize = log2(size) / 2 - 5 + */ + + return (ilog2(size) / 2 - 5); +} + +/* + * Setup entry in a sw tlb1 table, write entry to TLB1 hardware. + * This routine is used for low level operations on the TLB1, + * for creating temporaray as well as permanent mappings (tlb_set_entry). + * + * We assume kernel mappings only, thus all entries created have supervisor + * permission bits set nad user permission bits cleared. + * + * Provided mapping size must be a power of 4. + * Mapping flags must be a combination of MAS2_[WIMG]. + * Entry TID is set to _tid which must not exceed 8 bit value. + * Entry TS is set to either 0 or MAS1_TS based on provided _ts. + */ +static void +__tlb1_set_entry(unsigned int idx, vm_offset_t va, vm_offset_t pa, + vm_size_t size, u_int32_t flags, unsigned int _tid, unsigned int _ts) +{ + int tsize; + u_int32_t ts, tid; + + //debugf("__tlb1_set_entry: s (idx = %d va = 0x%08x pa = 0x%08x " + // "size = 0x%08x flags = 0x%08x _tid = %d _ts = %d\n", + // idx, va, pa, size, flags, _tid, _ts); + + /* Convert size to TSIZE */ + tsize = size2tsize(size); + //debugf("__tlb1_set_entry: tsize = %d\n", tsize); + + tid = (_tid << MAS1_TID_SHIFT) & MAS1_TID_MASK; + ts = (_ts) ? MAS1_TS : 0; + tlb1[idx].mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; + tlb1[idx].mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); + + tlb1[idx].mas2 = (va & MAS2_EPN) | flags; + + /* Set supervisor rwx permission bits */ + tlb1[idx].mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; + + //debugf("__tlb1_set_entry: mas1 = %08x mas2 = %08x mas3 = 0x%08x\n", + // tlb1[idx].mas1, tlb1[idx].mas2, tlb1[idx].mas3); + + tlb1_write_entry(idx); + //debugf("__tlb1_set_entry: e\n"); +} + +/* + * Register permanent kernel mapping in TLB1. + * + * Entries are created starting from index 0 (current free entry is + * kept in tlb1_idx) and are not supposed to be invalidated. + */ +static int +tlb1_set_entry(vm_offset_t va, vm_offset_t pa, vm_size_t size, u_int32_t flags) +{ + //debugf("tlb1_set_entry: s (tlb1_idx = %d va = 0x%08x pa = 0x%08x " + // "size = 0x%08x flags = 0x%08x\n", + // tlb1_idx, va, pa, size, flags); + + if (tlb1_idx >= TLB1_SIZE) { + //debugf("tlb1_set_entry: e (tlb1 full!)\n"); + return (-1); + } + + /* TS = 0, TID = 0 */ + __tlb1_set_entry(tlb1_idx++, va, pa, size, flags, KERNEL_TID, 0); + //debugf("tlb1_set_entry: e\n"); + return (0); +} + +/* + * Invalidate TLB1 entry, clear correspondig tlb1 table element. + * This routine is used to clear temporary entries created + * early in a locore.S or through the use of __tlb1_set_entry(). + */ +void +tlb1_inval_entry(unsigned int idx) +{ + vm_offset_t va; + + va = tlb1[idx].mas2 & MAS2_EPN; + + tlb1[idx].mas1 = 0; /* !MAS1_VALID */ + tlb1[idx].mas2 = 0; + tlb1[idx].mas3 = 0; + + tlb1_write_entry(idx); +} + +static int +tlb1_entry_size_cmp(const void *a, const void *b) +{ + const vm_size_t *sza; + const vm_size_t *szb; + + sza = a; + szb = b; + if (*sza > *szb) + return (-1); + else if (*sza < *szb) + return (1); + else + return (0); +} + +/* + * Mapin contiguous RAM region into the TLB1 using maximum of + * KERNEL_REGION_MAX_TLB_ENTRIES entries. + * + * If necessarry round up last entry size and return total size + * used by all allocated entries. + */ +vm_size_t +tlb1_mapin_region(vm_offset_t va, vm_offset_t pa, vm_size_t size) +{ + vm_size_t entry_size[KERNEL_REGION_MAX_TLB_ENTRIES]; + vm_size_t mapped_size, sz, esz; + unsigned int log; + int i; + + debugf("tlb1_mapin_region:\n"); + debugf(" region size = 0x%08x va = 0x%08x pa = 0x%08x\n", size, va, pa); + + mapped_size = 0; + sz = size; + memset(entry_size, 0, sizeof(entry_size)); + + /* Calculate entry sizes. */ + for (i = 0; i < KERNEL_REGION_MAX_TLB_ENTRIES && sz > 0; i++) { + + /* Largest region that is power of 4 and fits within size */ + log = ilog2(sz)/2; + esz = 1 << (2 * log); + + /* Minimum region size is 4KB */ + if (esz < (1 << 12)) + esz = 1 << 12; + + /* If this is last entry cover remaining size. */ + if (i == KERNEL_REGION_MAX_TLB_ENTRIES - 1) { + while (esz < sz) + esz = esz << 2; + } + + entry_size[i] = esz; + mapped_size += esz; + if (esz < sz) + sz -= esz; + else + sz = 0; + } + + /* Sort entry sizes, required to get proper entry address alignment. */ + qsort(entry_size, KERNEL_REGION_MAX_TLB_ENTRIES, + sizeof(vm_size_t), tlb1_entry_size_cmp); + + /* Load TLB1 entries. */ + for (i = 0; i < KERNEL_REGION_MAX_TLB_ENTRIES; i++) { + esz = entry_size[i]; + if (!esz) + break; + debugf(" entry %d: sz = 0x%08x (va = 0x%08x pa = 0x%08x)\n", + tlb1_idx, esz, va, pa); + tlb1_set_entry(va, pa, esz, _TLB_ENTRY_MEM); + + va += esz; + pa += esz; + } + + debugf(" mapped size 0x%08x (wasted space 0x%08x)\n", + mapped_size, mapped_size - size); + + return (mapped_size); +} + +/* + * TLB1 initialization routine, to be called after the very first + * assembler level setup done in locore.S. + */ +void +tlb1_init(vm_offset_t ccsrbar) +{ + uint32_t mas0; + + /* TBL1[1] is used to map the kernel. Save that entry. */ + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(1); + mtspr(SPR_MAS0, mas0); + __asm __volatile("isync; tlbre"); + + tlb1[1].mas1 = mfspr(SPR_MAS1); + tlb1[1].mas2 = mfspr(SPR_MAS2); + tlb1[1].mas3 = mfspr(SPR_MAS3); + + /* Mapin CCSRBAR in TLB1[0] */ + __tlb1_set_entry(0, CCSRBAR_VA, ccsrbar, CCSRBAR_SIZE, + _TLB_ENTRY_IO, KERNEL_TID, 0); + + /* Setup TLB miss defaults */ + set_mas4_defaults(); + + /* Reset next available TLB1 entry index. */ + tlb1_idx = 2; +} + +/* + * Setup MAS4 defaults. + * These values are loaded to MAS0-2 on a TLB miss. + */ +static void +set_mas4_defaults(void) +{ + u_int32_t mas4; + + /* Defaults: TLB0, PID0, TSIZED=4K */ + mas4 = MAS4_TLBSELD0; + mas4 |= (TLB_SIZE_4K << MAS4_TSIZED_SHIFT) & MAS4_TSIZED_MASK; + + mtspr(SPR_MAS4, mas4); + __asm volatile("isync"); +} + +/* + * Print out contents of the MAS registers for each TLB1 entry + */ +void +tlb1_print_tlbentries(void) +{ + u_int32_t mas0, mas1, mas2, mas3, mas7; + int i; + + debugf("TLB1 entries:\n"); + for (i = 0; i < TLB1_SIZE; i++) { + + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(i); + mtspr(SPR_MAS0, mas0); + + __asm volatile("isync; tlbre"); + + mas1 = mfspr(SPR_MAS1); + mas2 = mfspr(SPR_MAS2); + mas3 = mfspr(SPR_MAS3); + mas7 = mfspr(SPR_MAS7); + + tlb_print_entry(i, mas1, mas2, mas3, mas7); + } +} + +/* + * Print out contents of the in-ram tlb1 table. + */ +void +tlb1_print_entries(void) +{ + int i; + + debugf("tlb1[] table entries:\n"); + for (i = 0; i < TLB1_SIZE; i++) + tlb_print_entry(i, tlb1[i].mas1, tlb1[i].mas2, tlb1[i].mas3, 0); +} + +/* + * Return 0 if the physical IO range is encompassed by one of the + * the TLB1 entries, otherwise return related error code. + */ +static int +tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) +{ + u_int32_t prot; + vm_paddr_t pa_start; + vm_paddr_t pa_end; + unsigned int entry_tsize; + vm_size_t entry_size; + + *va = (vm_offset_t)NULL; + + /* Skip invalid entries */ + if (!(tlb1[i].mas1 & MAS1_VALID)) + return (EINVAL); + + /* + * The entry must be cache-inhibited, guarded, and r/w + * so it can function as an i/o page + */ + prot = tlb1[i].mas2 & (MAS2_I | MAS2_G); + if (prot != (MAS2_I | MAS2_G)) + return (EPERM); + + prot = tlb1[i].mas3 & (MAS3_SR | MAS3_SW); + if (prot != (MAS3_SR | MAS3_SW)) + return (EPERM); + + /* The address should be within the entry range. */ + entry_tsize = (tlb1[i].mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; + KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); + + entry_size = tsize2size(entry_tsize); + pa_start = tlb1[i].mas3 & MAS3_RPN; + pa_end = pa_start + entry_size - 1; + + if ((pa < pa_start) || ((pa + size) > pa_end)) + return (ERANGE); + + /* Return virtual address of this mapping. */ + *va = (tlb1[i].mas2 & MAS2_EPN) + (pa - pa_start); + return (0); +} diff --git a/sys/powerpc/booke/support.S b/sys/powerpc/booke/support.S new file mode 100644 index 0000000..b21e79c --- /dev/null +++ b/sys/powerpc/booke/support.S @@ -0,0 +1,106 @@ +/*- + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "assym.s" + +#include <machine/param.h> +#include <machine/asm.h> +#include <machine/spr.h> +#include <machine/psl.h> +#include <machine/pte.h> +#include <machine/trap.h> +#include <machine/vmparam.h> +#include <machine/tlb.h> + + .text +/* + * void remap_ccsrbar(vm_offset_t old_ccsrbar_va, vm_offset_t new_ccsrbar_va, + * vm_offset_t new_ccsrbar_pa) + * + * r3 - old_ccsrbar_va + * r4 - new_ccsrbar_va + * r5 - new_ccsrbar_pa + */ +ENTRY(remap_ccsrbar) + /* + * CCSRBAR updating sequence according + * to section 4.3.1.1.1 of MPC8555E RM. + */ + + /* Read current value of CCSRBAR */ + lwz %r6, 0(%r3) + isync + + /* Write new value */ + rlwinm %r6, %r5, 20, 12, 23 + stw %r6, 0(%r3) + + /* + * Read from address that is outside of CCSRBAR space. + * We have RAM locations available at KERNBASE. + */ + lis %r7, KERNBASE@ha + addi %r7, %r7, KERNBASE@l + lwz %r6, 0(%r7) + isync + + /* Read value of CCSRBAR from new location */ + lwz %r6, 0(%r4) + isync + blr + +/* + * void switch_to_as0(void) + */ +ENTRY(switch_to_as0) + mflr %r5 /* Save LR */ + + mfmsr %r3 + lis %r6, (PSL_IS | PSL_DS)@ha + ori %r6, %r6, (PSL_IS | PSL_DS)@l + not %r6, %r6 + and %r3, %r3, %r6 /* Clear IS/DS bits */ + + bl 1f +1: mflr %r4 /* Use current address */ + addi %r4, %r4, 20 /* Increment to instruction after rfi */ + mtspr SPR_SRR0, %r4 + mtspr SPR_SRR1, %r3 + rfi + + mtlr %r5 /* Restore LR */ + blr + +/* + * void load_pid0(tlbtid_t) + */ +ENTRY(load_pid0) + mtspr SPR_PID0, %r3 + isync + blr diff --git a/sys/powerpc/booke/swtch.S b/sys/powerpc/booke/swtch.S new file mode 100644 index 0000000..80bf2b3 --- /dev/null +++ b/sys/powerpc/booke/swtch.S @@ -0,0 +1,145 @@ +/*- + * Copyright (C) 2001 Benno Rice + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from: $NetBSD: locore.S,v 1.24 2000/05/31 05:09:17 thorpej Exp $ + * from: $FreeBSD$ + * + * $FreeBSD$ + */ +/*- + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "assym.s" + +#include <sys/syscall.h> + +#include <machine/trap.h> +#include <machine/param.h> +#include <machine/psl.h> +#include <machine/asm.h> + +/* + * void cpu_switch(struct thread *old, struct thread *new) + * + * Switch to a new thread saving the current state in the old thread. + */ +ENTRY(cpu_switch) + lwz %r5, TD_PCB(%r3) /* Get the old thread's PCB ptr */ + + mr %r12, %r2 + stmw %r12, PCB_CONTEXT(%r5) /* Save the non-volatile GP regs. + These can now be used for scratch */ + + mfcr %r16 /* Save the condition register */ + stw %r16, PCB_CR(%r5) + mflr %r16 /* Save the link register */ + stw %r16, PCB_LR(%r5) + mfctr %r16 + stw %r16, PCB_BOOKE_CTR(%r5) + mfxer %r16 + stw %r16, PCB_BOOKE_XER(%r5) + + stw %r1, PCB_SP(%r5) /* Save the stack pointer */ + + mr %r14, %r3 /* Copy the old thread ptr... */ + mr %r15, %r4 /* and the new thread ptr in scratch */ + + bl pmap_deactivate /* Deactivate the current pmap */ + + mr %r3, %r15 /* Get new thread ptr */ + bl pmap_activate /* Activate the new address space */ + + mfsprg %r7, 0 /* Get the pcpu pointer */ + stw %r15, PC_CURTHREAD(%r7) /* Store new current thread */ + lwz %r17, TD_PCB(%r15) /* Store new current PCB */ + stw %r17, PC_CURPCB(%r7) + + mr %r3, %r17 /* Recover PCB ptr */ + lmw %r12, PCB_CONTEXT(%r3) /* Load the non-volatile GP regs */ + mr %r2, %r12 + lwz %r5, PCB_CR(%r3) /* Load the condition register */ + mtcr %r5 + lwz %r5, PCB_LR(%r3) /* Load the link register */ + mtlr %r5 + lwz %r5, PCB_BOOKE_CTR(%r3) + mtctr %r5 + lwz %r5, PCB_BOOKE_XER(%r3) + mtxer %r5 + + lwz %r1, PCB_SP(%r3) /* Load the stack pointer */ + blr + +/* + * savectx(pcb) + * Update pcb, saving current processor state + */ +ENTRY(savectx) + mr %r12,%r2 + stmw %r12,PCB_CONTEXT(%r3) /* Save the non-volatile GP regs */ + mfcr %r4 /* Save the condition register */ + stw %r4,PCB_CONTEXT(%r3) + blr + +/* + * fork_trampoline() + * Set up the return from cpu_fork() + */ +ENTRY(fork_trampoline) + lwz %r3,CF_FUNC(%r1) + lwz %r4,CF_ARG0(%r1) + lwz %r5,CF_ARG1(%r1) + bl fork_exit + addi %r1,%r1,CF_SIZE-FSP /* Allow 8 bytes in front of + trapframe to simulate FRAME_SETUP + does when allocating space for + a frame pointer/saved LR */ + b trapexit diff --git a/sys/powerpc/booke/trap.c b/sys/powerpc/booke/trap.c new file mode 100644 index 0000000..edd49f5 --- /dev/null +++ b/sys/powerpc/booke/trap.c @@ -0,0 +1,679 @@ +/*- + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $NetBSD: trap.c,v 1.58 2002/03/04 04:07:35 dbj Exp $ + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_fpu_emu.h" +#include "opt_ktrace.h" + +#include <sys/param.h> +#include <sys/kdb.h> +#include <sys/proc.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/pioctl.h> +#include <sys/ptrace.h> +#include <sys/reboot.h> +#include <sys/syscall.h> +#include <sys/sysent.h> +#include <sys/systm.h> +#include <sys/uio.h> +#include <sys/signalvar.h> +#ifdef KTRACE +#include <sys/ktrace.h> +#endif +#include <sys/vmmeter.h> + +#include <security/audit/audit.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_page.h> + +#include <machine/cpu.h> +#include <machine/db_machdep.h> +#include <machine/frame.h> +#include <machine/pcb.h> +#include <machine/pmap.h> +#include <machine/psl.h> +#include <machine/trap.h> +#include <machine/spr.h> + +#ifdef FPU_EMU +#include <powerpc/fpu/fpu_extern.h> +#endif + +#define FAULTBUF_LR 0 +#define FAULTBUF_R1 1 +#define FAULTBUF_R2 2 +#define FAULTBUF_CR 3 +#define FAULTBUF_CTR 4 +#define FAULTBUF_XER 5 +#define FAULTBUF_R13 6 + +static void trap_fatal(struct trapframe *frame); +static void printtrap(u_int vector, struct trapframe *frame, int isfatal, + int user); +static int trap_pfault(struct trapframe *frame, int user); +static int fix_unaligned(struct thread *td, struct trapframe *frame); +static int handle_onfault(struct trapframe *frame); +static void syscall(struct trapframe *frame); + +int setfault(faultbuf); /* defined in locore.S */ + +/* Why are these not defined in a header? */ +int badaddr(void *, size_t); +int badaddr_read(void *, size_t, int *); + +extern char *syscallnames[]; + +extern char interrupt_vector_base[]; +extern char interrupt_vector_top[]; + +struct powerpc_exception { + u_int vector; + char *name; +}; + +static struct powerpc_exception powerpc_exceptions[] = { + { EXC_CRIT, "critical input" }, + { EXC_MCHK, "machine check" }, + { EXC_DSI, "data storage interrupt" }, + { EXC_ISI, "instruction storage interrupt" }, + { EXC_EXI, "external interrupt" }, + { EXC_ALI, "alignment" }, + { EXC_PGM, "program" }, + { EXC_SC, "system call" }, + { EXC_APU, "auxiliary proc unavailable" }, + { EXC_DECR, "decrementer" }, + { EXC_FIT, "fixed-interval timer" }, + { EXC_WDOG, "watchdog timer" }, + { EXC_DTMISS, "data tlb miss" }, + { EXC_ITMISS, "instruction tlb miss" }, + { EXC_DEBUG, "debug" }, + { EXC_PERF, "performance monitoring" }, + { EXC_LAST, NULL } +}; + +static const char * +trapname(u_int vector) +{ + struct powerpc_exception *pe; + + for (pe = powerpc_exceptions; pe->vector != EXC_LAST; pe++) { + if (pe->vector == vector) + return (pe->name); + } + + return ("unknown"); +} + +void +trap(struct trapframe *frame) +{ + struct thread *td; + struct proc *p; + int sig, type, user; + ksiginfo_t ksi; + + PCPU_INC(cnt.v_trap); + + td = PCPU_GET(curthread); + p = td->td_proc; + + type = frame->exc; + sig = 0; + user = (frame->srr1 & PSL_PR) ? 1 : 0; + + CTR3(KTR_TRAP, "trap: %s type=%s (%s)", p->p_comm, + trapname(type), user ? "user" : "kernel"); + + if (user) { + td->td_frame = frame; + if (td->td_ucred != p->p_ucred) + cred_update_thread(td); + + /* User Mode Traps */ + switch (type) { + case EXC_DSI: + case EXC_ISI: + sig = trap_pfault(frame, 1); + break; + + case EXC_SC: + syscall(frame); + break; + + case EXC_ALI: + if (fix_unaligned(td, frame) != 0) + sig = SIGBUS; + else + frame->srr0 += 4; + break; + + case EXC_DEBUG: /* Single stepping */ + mtspr(SPR_DBSR, mfspr(SPR_DBSR)); + frame->srr1 &= ~PSL_DE; + sig = SIGTRAP; + break; + + case EXC_PGM: /* Program exception */ +#ifdef FPU_EMU + sig = fpu_emulate(frame, + (struct fpreg *)&td->td_pcb->pcb_fpu); +#else + /* XXX SIGILL for non-trap instructions. */ + sig = SIGTRAP; +#endif + break; + + default: + trap_fatal(frame); + } + } else { + /* Kernel Mode Traps */ + KASSERT(cold || td->td_ucred != NULL, + ("kernel trap doesn't have ucred")); + + switch (type) { + case EXC_DEBUG: + mtspr(SPR_DBSR, mfspr(SPR_DBSR)); + kdb_trap(frame->exc, 0, frame); + return; + + case EXC_DSI: + if (trap_pfault(frame, 0) == 0) + return; + break; + + case EXC_MCHK: + if (handle_onfault(frame)) + return; + break; +#ifdef KDB + case EXC_PGM: + if (frame->cpu.booke.esr & ESR_PTR) + kdb_trap(EXC_PGM, 0, frame); + return; +#endif + default: + break; + } + trap_fatal(frame); + } + + if (sig != 0) { + if (p->p_sysent->sv_transtrap != NULL) + sig = (p->p_sysent->sv_transtrap)(sig, type); + ksiginfo_init_trap(&ksi); + ksi.ksi_signo = sig; + ksi.ksi_code = type; /* XXX, not POSIX */ + /* ksi.ksi_addr = ? */ + ksi.ksi_trapno = type; + trapsignal(td, &ksi); + } + + userret(td, frame); + mtx_assert(&Giant, MA_NOTOWNED); +} + +static void +trap_fatal(struct trapframe *frame) +{ + + printtrap(frame->exc, frame, 1, (frame->srr1 & PSL_PR)); +#ifdef KDB + if ((debugger_on_panic || kdb_active) && + kdb_trap(frame->exc, 0, frame)) + return; +#endif + panic("%s trap", trapname(frame->exc)); +} + +static void +printtrap(u_int vector, struct trapframe *frame, int isfatal, int user) +{ + register_t va = 0; + + printf("\n"); + printf("%s %s trap:\n", isfatal ? "fatal" : "handled", + user ? "user" : "kernel"); + printf("\n"); + printf(" exception = 0x%x (%s)\n", vector, trapname(vector)); + + switch (vector) { + case EXC_DTMISS: + case EXC_DSI: + va = frame->cpu.booke.dear; + break; + + case EXC_ITMISS: + case EXC_ISI: + va = frame->srr0; + break; + } + + printf(" virtual address = 0x%08x\n", va); + printf(" srr0 = 0x%08x\n", frame->srr0); + printf(" srr1 = 0x%08x\n", frame->srr1); + printf(" curthread = %p\n", curthread); + if (curthread != NULL) + printf(" pid = %d, comm = %s\n", + curthread->td_proc->p_pid, curthread->td_proc->p_comm); + printf("\n"); +} + +/* + * Handles a fatal fault when we have onfault state to recover. Returns + * non-zero if there was onfault recovery state available. + */ +static int +handle_onfault(struct trapframe *frame) +{ + struct thread *td; + faultbuf *fb; + + td = curthread; + fb = td->td_pcb->pcb_onfault; + if (fb != NULL) { + frame->srr0 = (*fb)[FAULTBUF_LR]; + frame->fixreg[1] = (*fb)[FAULTBUF_R1]; + frame->fixreg[2] = (*fb)[FAULTBUF_R2]; + frame->fixreg[3] = 1; + frame->cr = (*fb)[FAULTBUF_CR]; + frame->ctr = (*fb)[FAULTBUF_CTR]; + frame->xer = (*fb)[FAULTBUF_XER]; + bcopy(&(*fb)[FAULTBUF_R13], &frame->fixreg[13], + 19 * sizeof(register_t)); + return (1); + } + return (0); +} + +void +syscall(struct trapframe *frame) +{ + caddr_t params; + struct sysent *callp; + struct thread *td; + struct proc *p; + int error, n; + size_t narg; + register_t args[10]; + u_int code; + + td = PCPU_GET(curthread); + p = td->td_proc; + + PCPU_INC(cnt.v_syscall); + +#if KSE + if (p->p_flag & P_SA) + thread_user_enter(td); +#endif + + code = frame->fixreg[0]; + params = (caddr_t)(frame->fixreg + FIRSTARG); + n = NARGREG; + + if (p->p_sysent->sv_prepsyscall) { + /* + * The prep code is MP aware. + */ + (*p->p_sysent->sv_prepsyscall)(frame, args, &code, ¶ms); + } else if (code == SYS_syscall) { + /* + * code is first argument, + * followed by actual args. + */ + code = *(u_int *) params; + params += sizeof(register_t); + n -= 1; + } else if (code == SYS___syscall) { + /* + * Like syscall, but code is a quad, + * so as to maintain quad alignment + * for the rest of the args. + */ + params += sizeof(register_t); + code = *(u_int *) params; + params += sizeof(register_t); + n -= 2; + } + + if (p->p_sysent->sv_mask) + code &= p->p_sysent->sv_mask; + + if (code >= p->p_sysent->sv_size) + callp = &p->p_sysent->sv_table[0]; + else + callp = &p->p_sysent->sv_table[code]; + + narg = callp->sy_narg; + + if (narg > n) { + bcopy(params, args, n * sizeof(register_t)); + error = copyin(MOREARGS(frame->fixreg[1]), args + n, + (narg - n) * sizeof(register_t)); + params = (caddr_t)args; + } else + error = 0; + + CTR5(KTR_SYSC, "syscall: p=%s %s(%x %x %x)", p->p_comm, + syscallnames[code], + frame->fixreg[FIRSTARG], + frame->fixreg[FIRSTARG+1], + frame->fixreg[FIRSTARG+2]); + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSCALL)) + ktrsyscall(code, narg, (register_t *)params); +#endif + + td->td_syscalls++; + + if (error == 0) { + td->td_retval[0] = 0; + td->td_retval[1] = frame->fixreg[FIRSTARG + 1]; + + STOPEVENT(p, S_SCE, narg); + + PTRACESTOP_SC(p, td, S_PT_SCE); + + AUDIT_SYSCALL_ENTER(code, td); + error = (*callp->sy_call)(td, params); + AUDIT_SYSCALL_EXIT(error, td); + + CTR3(KTR_SYSC, "syscall: p=%s %s ret=%x", p->p_comm, + syscallnames[code], td->td_retval[0]); + } + + switch (error) { + case 0: + if (frame->fixreg[0] == SYS___syscall && SYS_lseek) { + /* + * 64-bit return, 32-bit syscall. Fixup byte order + */ + frame->fixreg[FIRSTARG] = 0; + frame->fixreg[FIRSTARG + 1] = td->td_retval[0]; + } else { + frame->fixreg[FIRSTARG] = td->td_retval[0]; + frame->fixreg[FIRSTARG + 1] = td->td_retval[1]; + } + /* XXX: Magic number */ + frame->cr &= ~0x10000000; + break; + case ERESTART: + /* + * Set user's pc back to redo the system call. + */ + frame->srr0 -= 4; + break; + case EJUSTRETURN: + /* nothing to do */ + break; + default: + if (p->p_sysent->sv_errsize) { + if (error >= p->p_sysent->sv_errsize) + error = -1; /* XXX */ + else + error = p->p_sysent->sv_errtbl[error]; + } + frame->fixreg[FIRSTARG] = error; + /* XXX: Magic number: Carry Flag Equivalent? */ + frame->cr |= 0x10000000; + break; + } + + /* + * Check for misbehavior. + */ + WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", + (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"); + KASSERT(td->td_critnest == 0, + ("System call %s returning in a critical section", + (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???")); + KASSERT(td->td_locks == 0, + ("System call %s returning with %d locks held", + (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???", + td->td_locks)); + +#ifdef KTRACE + if (KTRPOINT(td, KTR_SYSRET)) + ktrsysret(code, error, td->td_retval[0]); +#endif + + /* + * Does the comment in the i386 code about errno apply here? + */ + STOPEVENT(p, S_SCX, code); + + PTRACESTOP_SC(p, td, S_PT_SCX); +} + +static int +trap_pfault(struct trapframe *frame, int user) +{ + vm_offset_t eva, va; + struct thread *td; + struct proc *p; + vm_map_t map; + vm_prot_t ftype; + int rv; + + td = curthread; + p = td->td_proc; + + if (frame->exc == EXC_ISI) { + eva = frame->srr0; + ftype = VM_PROT_READ | VM_PROT_EXECUTE; + + } else { + eva = frame->cpu.booke.dear; + if (frame->cpu.booke.esr & ESR_ST) + ftype = VM_PROT_WRITE; + else + ftype = VM_PROT_READ; + } + + if (user) { + KASSERT(p->p_vmspace != NULL, ("trap_pfault: vmspace NULL")); + map = &p->p_vmspace->vm_map; + } else { + if (eva < VM_MAXUSER_ADDRESS) { + + if (p->p_vmspace == NULL) + return (SIGSEGV); + + map = &p->p_vmspace->vm_map; + + } else { + map = kernel_map; + } + } + va = trunc_page(eva); + + if (map != kernel_map) { + /* + * Keep swapout from messing with us during this + * critical time. + */ + PROC_LOCK(p); + ++p->p_lock; + PROC_UNLOCK(p); + + /* Fault in the user page: */ + rv = vm_fault(map, va, ftype, + (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : VM_FAULT_NORMAL); + + PROC_LOCK(p); + --p->p_lock; + PROC_UNLOCK(p); + } else { + /* + * Don't have to worry about process locking or stacks in the + * kernel. + */ + rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + } + + if (rv == KERN_SUCCESS) + return (0); + + if (!user && handle_onfault(frame)) + return (0); + + return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); +} + +int +badaddr(void *addr, size_t size) +{ + + return (badaddr_read(addr, size, NULL)); +} + +int +badaddr_read(void *addr, size_t size, int *rptr) +{ + struct thread *td; + faultbuf env; + int x; + + /* Get rid of any stale machine checks that have been waiting. */ + __asm __volatile ("sync; isync"); + + td = PCPU_GET(curthread); + + if (setfault(env)) { + td->td_pcb->pcb_onfault = 0; + __asm __volatile ("sync"); + return (1); + } + + __asm __volatile ("sync"); + + switch (size) { + case 1: + x = *(volatile int8_t *)addr; + break; + case 2: + x = *(volatile int16_t *)addr; + break; + case 4: + x = *(volatile int32_t *)addr; + break; + default: + panic("badaddr: invalid size (%d)", size); + } + + /* Make sure we took the machine check, if we caused one. */ + __asm __volatile ("sync; isync"); + + td->td_pcb->pcb_onfault = 0; + __asm __volatile ("sync"); /* To be sure. */ + + /* Use the value to avoid reorder. */ + if (rptr) + *rptr = x; + + return (0); +} + +/* + * For now, this only deals with the particular unaligned access case + * that gcc tends to generate. Eventually it should handle all of the + * possibilities that can happen on a 32-bit PowerPC in big-endian mode. + */ + +static int +fix_unaligned(struct thread *td, struct trapframe *frame) +{ +#if 0 + struct thread *fputhread; + int indicator, reg; + double *fpr; + + indicator = EXC_ALI_OPCODE_INDICATOR(frame->dsisr); + + switch (indicator) { + case EXC_ALI_LFD: + case EXC_ALI_STFD: + reg = EXC_ALI_RST(frame->dsisr); + fpr = &td->td_pcb->pcb_fpu.fpr[reg]; + fputhread = PCPU_GET(fputhread); + /* Juggle the FPU to ensure that we've initialized + * the FPRs, and that their current state is in + * the PCB. + */ + if (fputhread != td) { + if (fputhread) + save_fpu(fputhread); + enable_fpu(td); + } + save_fpu(td); + + if (indicator == EXC_ALI_LFD) { + if (copyin((void *)frame->dar, fpr, + sizeof(double)) != 0) + return -1; + enable_fpu(td); + } else { + if (copyout(fpr, (void *)frame->dar, + sizeof(double)) != 0) + return -1; + } + return 0; + break; + } + +#endif + return (-1); +} + +#ifdef KDB +int db_trap_glue(struct trapframe *); +int +db_trap_glue(struct trapframe *tf) +{ + if (!(tf->srr1 & PSL_PR)) + return (kdb_trap(tf->exc, 0, tf)); + return (0); +} +#endif diff --git a/sys/powerpc/booke/trap_subr.S b/sys/powerpc/booke/trap_subr.S new file mode 100644 index 0000000..f9da569 --- /dev/null +++ b/sys/powerpc/booke/trap_subr.S @@ -0,0 +1,842 @@ +/*- + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * Copyright (C) 2006 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * Copyright (C) 2006 Juniper Networks, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ +/*- + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from: $NetBSD: trap_subr.S,v 1.20 2002/04/22 23:20:08 kleink Exp $ + */ + +/* + * NOTICE: This is not a standalone file. to use it, #include it in + * your port's locore.S, like so: + * + * #include <powerpc/booke/trap_subr.S> + */ + +/* + * SPRG usage notes + * + * SPRG0 - pcpu pointer + * SPRG1 - all interrupts except TLB miss, critical, machine check + * SPRG2 - critical + * SPRG3 - machine check + * + */ + +/* Get the per-CPU data structure */ +#define GET_CPUINFO(r) mfsprg0 r + +/* + * Standard interrupt prolog + * + * sprg_sp - SPRG{1-3} reg used to temporarily store the SP + * savearea - temp save area (pc_{tempsave, disisave, critsave, mchksave}) + * isrr0-1 - save restore registers with CPU state at interrupt time (may be + * SRR0-1, CSRR0-1, MCSRR0-1 + * + * 1. saves in the given savearea: + * - R30-31 + * - DEAR, ESR + * - xSRR0-1 + * + * 2. saves CR -> R30 + * + * 3. switches to kstack if needed + * + * 4. notes: + * - R31 can be used as scratch register until a new frame is layed on + * the stack with FRAME_SETUP + * + * - potential TLB miss: NO. Saveareas are always acessible via TLB1 + * permanent entries, and within this prolog we do not dereference any + * locations potentially not in the TLB + */ +#define STANDARD_PROLOG(sprg_sp, savearea, isrr0, isrr1) \ + mtspr sprg_sp, %r1; /* Save SP */ \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + stw %r30, (savearea+CPUSAVE_R30)(%r1); \ + stw %r31, (savearea+CPUSAVE_R31)(%r1); \ + mfdear %r30; \ + mfesr %r31; \ + stw %r30, (savearea+CPUSAVE_BOOKE_DEAR)(%r1); \ + stw %r31, (savearea+CPUSAVE_BOOKE_ESR)(%r1); \ + mfspr %r30, isrr0; \ + mfspr %r31, isrr1; /* MSR at interrupt time */ \ + stw %r30, (savearea+CPUSAVE_SRR0)(%r1); \ + stw %r31, (savearea+CPUSAVE_SRR1)(%r1); \ + isync; \ + mfspr %r1, sprg_sp; /* Restore SP */ \ + mfcr %r30; /* Save CR */ \ + /* switch to per-thread kstack if intr taken in user mode */ \ + mtcr %r31; /* MSR at interrupt time */ \ + bf 17, 1f; \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + lwz %r1, PC_CURPCB(%r1); /* Per-thread kernel stack */ \ +1: + +#define STANDARD_CRIT_PROLOG(sprg_sp, savearea, isrr0, isrr1) \ + mtspr sprg_sp, %r1; /* Save SP */ \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + stw %r30, (savearea+CPUSAVE_R30)(%r1); \ + stw %r31, (savearea+CPUSAVE_R31)(%r1); \ + mfdear %r30; \ + mfesr %r31; \ + stw %r30, (savearea+CPUSAVE_BOOKE_DEAR)(%r1); \ + stw %r31, (savearea+CPUSAVE_BOOKE_ESR)(%r1); \ + mfspr %r30, isrr0; \ + mfspr %r31, isrr1; /* MSR at interrupt time */ \ + stw %r30, (savearea+CPUSAVE_SRR0)(%r1); \ + stw %r31, (savearea+CPUSAVE_SRR1)(%r1); \ + mfspr %r30, SPR_SRR0; \ + mfspr %r31, SPR_SRR1; /* MSR at interrupt time */ \ + stw %r30, (savearea+CPUSAVE_SRR0+8)(%r1); \ + stw %r31, (savearea+CPUSAVE_SRR1+8)(%r1); \ + isync; \ + mfspr %r1, sprg_sp; /* Restore SP */ \ + mfcr %r30; /* Save CR */ \ + /* switch to per-thread kstack if intr taken in user mode */ \ + mtcr %r31; /* MSR at interrupt time */ \ + bf 17, 1f; \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + lwz %r1, PC_CURPCB(%r1); /* Per-thread kernel stack */ \ +1: + +/* + * FRAME_SETUP assumes: + * SPRG{1-3} SP at the time interrupt occured + * savearea r30-r31, DEAR, ESR, xSRR0-1 + * r30 CR + * r31 scratch + * r1 kernel stack + * + * sprg_sp - SPRG reg containing SP at the time interrupt occured + * savearea - temp save + * exc - exception number (EXC_xxx) + * + * 1. sets a new frame + * 2. saves in the frame: + * - R0, R1 (SP at the time of interrupt), R2, LR, CR + * - R3-31 (R30-31 first restored from savearea) + * - XER, CTR, DEAR, ESR (from savearea), xSRR0-1 + * + * Notes: + * - potential TLB miss: YES, since we make dereferences to kstack, which + * can happen not covered (we can have up to two DTLB misses if fortunate + * enough i.e. when kstack crosses page boundary and both pages are + * untranslated) + */ +#define FRAME_SETUP(sprg_sp, savearea, exc) \ + mfspr %r31, sprg_sp; /* get saved SP */ \ + /* establish a new stack frame and put everything on it */ \ + stwu %r31, -FRAMELEN(%r1); \ + stw %r0, FRAME_0+8(%r1); /* save r0 in the trapframe */ \ + stw %r31, FRAME_1+8(%r1); /* save SP " " */ \ + stw %r2, FRAME_2+8(%r1); /* save r2 " " */ \ + mflr %r31; \ + stw %r31, FRAME_LR+8(%r1); /* save LR " " */ \ + stw %r30, FRAME_CR+8(%r1); /* save CR " " */ \ + GET_CPUINFO(%r2); \ + lwz %r30, (savearea+CPUSAVE_R30)(%r2); /* get saved r30 */ \ + lwz %r31, (savearea+CPUSAVE_R31)(%r2); /* get saved r31 */ \ + /* save R3-31 */ \ + stmw %r3, FRAME_3+8(%r1) ; \ + /* save DEAR, ESR */ \ + lwz %r28, (savearea+CPUSAVE_BOOKE_DEAR)(%r2); \ + lwz %r29, (savearea+CPUSAVE_BOOKE_ESR)(%r2); \ + stw %r28, FRAME_BOOKE_DEAR+8(1); \ + stw %r29, FRAME_BOOKE_ESR+8(1); \ + /* save XER, CTR, exc number */ \ + mfxer %r3; \ + mfctr %r4; \ + stw %r3, FRAME_XER+8(1); \ + stw %r4, FRAME_CTR+8(1); \ + li %r5, exc; \ + stw %r5, FRAME_EXC+8(1); \ + /* save xSSR0-1 */ \ + lwz %r30, (savearea+CPUSAVE_SRR0)(%r2); \ + lwz %r31, (savearea+CPUSAVE_SRR1)(%r2); \ + stw %r30, FRAME_SRR0+8(1); \ + stw %r31, FRAME_SRR1+8(1) + + +/* + * + * isrr0-1 - save restore registers to restore CPU state to (may be + * SRR0-1, CSRR0-1, MCSRR0-1 + * + * Notes: + * - potential TLB miss: YES. The deref'd kstack may be not covered + */ +#define FRAME_LEAVE(isrr0, isrr1) \ + /* restore CTR, XER, LR, CR */ \ + lwz %r4, FRAME_CTR+8(%r1); \ + lwz %r5, FRAME_XER+8(%r1); \ + lwz %r6, FRAME_LR+8(%r1); \ + lwz %r7, FRAME_CR+8(%r1); \ + mtctr %r4; \ + mtxer %r5; \ + mtlr %r6; \ + mtcr %r7; \ + /* restore xSRR0-1 */ \ + lwz %r30, FRAME_SRR0+8(%r1); \ + lwz %r31, FRAME_SRR1+8(%r1); \ + mtspr isrr0, %r30; \ + mtspr isrr1, %r31; \ + /* restore R2-31, SP */ \ + lmw %r2, FRAME_2+8(%r1) ; \ + lwz %r0, FRAME_0+8(%r1); \ + lwz %r1, FRAME_1+8(%r1); \ + isync + +/* + * TLB miss prolog + * + * saves LR, CR, SRR0-1, R20-31 in the TLBSAVE area + * + * Notes: + * - potential TLB miss: NO. It is crucial that we do not generate a TLB + * miss withing the TLB prolog itself! + * - TLBSAVE is always translated + */ +#define TLB_PROLOG \ + mtsprg4 %r1; /* Save SP */ \ + mtsprg5 %r28; \ + mtsprg6 %r29; \ + /* calculate TLB nesting level and TLBSAVE instance address */ \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + lwz %r28, PC_BOOKE_TLB_LEVEL(%r1); \ + rlwinm %r29, %r28, 6, 24, 25; /* 4 x TLBSAVE_LEN */ \ + addi %r28, %r28, 1; \ + stw %r28, PC_BOOKE_TLB_LEVEL(%r1); \ + addi %r29, %r29, PC_BOOKE_TLBSAVE@l; \ + add %r1, %r1, %r29; /* current TLBSAVE ptr */ \ + \ + /* save R20-31 */ \ + mfsprg5 %r28; \ + mfsprg6 %r29; \ + stmw %r20, (TLBSAVE_BOOKE_R20)(%r1); \ + /* save LR, CR */ \ + mflr %r30; \ + mfcr %r31; \ + stw %r30, (TLBSAVE_BOOKE_LR)(%r1); \ + stw %r31, (TLBSAVE_BOOKE_CR)(%r1); \ + /* save SRR0-1 */ \ + mfsrr0 %r30; /* execution addr at interrupt time */ \ + mfsrr1 %r31; /* MSR at interrupt time*/ \ + stw %r30, (TLBSAVE_BOOKE_SRR0)(%r1); /* save SRR0 */ \ + stw %r31, (TLBSAVE_BOOKE_SRR1)(%r1); /* save SRR1 */ \ + isync; \ + mfsprg4 %r1 + +/* + * restores LR, CR, SRR0-1, R20-31 from the TLBSAVE area + * + * same notes as for the TLB_PROLOG + */ +#define TLB_RESTORE \ + mtsprg4 %r1; /* Save SP */ \ + GET_CPUINFO(%r1); /* Per-cpu structure */ \ + /* calculate TLB nesting level and TLBSAVE instance addr */ \ + lwz %r28, PC_BOOKE_TLB_LEVEL(%r1); \ + subi %r28, %r28, 1; \ + stw %r28, PC_BOOKE_TLB_LEVEL(%r1); \ + rlwinm %r29, %r28, 6, 24, 25; /* 4 x TLBSAVE_LEN */ \ + addi %r29, %r29, PC_BOOKE_TLBSAVE@l; \ + add %r1, %r1, %r29; \ + \ + /* restore LR, CR */ \ + lwz %r30, (TLBSAVE_BOOKE_LR)(%r1); \ + lwz %r31, (TLBSAVE_BOOKE_CR)(%r1); \ + mtlr %r30; \ + mtcr %r31; \ + /* restore SRR0-1 */ \ + lwz %r30, (TLBSAVE_BOOKE_SRR0)(%r1); \ + lwz %r31, (TLBSAVE_BOOKE_SRR1)(%r1); \ + mtsrr0 %r30; \ + mtsrr1 %r31; \ + /* restore R20-31 */ \ + lmw %r20, (TLBSAVE_BOOKE_R20)(%r1); \ + mfsprg4 %r1 + + +#define INTERRUPT(label) \ + .globl label; \ + .align 5; \ + CNAME(label): + +/* + * Interrupt handling routines in BookE can be flexibly placed and do not have + * to live in pre-defined vectors location. Note they need to be TLB-mapped at + * all times in order to be able to handle exceptions. We thus arrange for + * them to be part of kernel text which is always TLB-accessible. + * + * The interrupt handling routines have to be 16 bytes aligned: we align them + * to 32 bytes (cache line length) which supposedly performs better. + * + */ + .text + .globl CNAME(interrupt_vector_base) + .align 5 +interrupt_vector_base: + +/**************************************** + * Critical input interrupt + ****************************************/ +INTERRUPT(int_critical_input) + STANDARD_PROLOG(SPR_SPRG2, PC_BOOKE_CRITSAVE, SPR_CSRR0, SPR_CSRR1) + FRAME_SETUP(SPR_SPRG2, PC_BOOKE_CRITSAVE, EXC_CRIT) + addi %r3, %r1, 8 + bl CNAME(powerpc_crit_interrupt) + FRAME_LEAVE(SPR_CSRR0, SPR_CSRR1) + rfci + + +/**************************************** + * Machine check interrupt + ****************************************/ +INTERRUPT(int_machine_check) + STANDARD_PROLOG(SPR_SPRG3, PC_BOOKE_MCHKSAVE, SPR_MCSRR0, SPR_MCSRR1) + FRAME_SETUP(SPR_SPRG3, PC_BOOKE_MCHKSAVE, EXC_MCHK) + addi %r3, %r1, 8 + bl CNAME(powerpc_mchk_interrupt) + FRAME_LEAVE(SPR_MCSRR0, SPR_MCSRR1) + rfmci + + +/**************************************** + * Data storage interrupt + ****************************************/ +INTERRUPT(int_data_storage) + STANDARD_PROLOG(SPR_SPRG1, PC_DISISAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_DISISAVE, EXC_DSI) + b trap_common + + +/**************************************** + * Instruction storage interrupt + ****************************************/ +INTERRUPT(int_instr_storage) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_ISI) + b trap_common + + +/**************************************** + * External input interrupt + ****************************************/ +INTERRUPT(int_external_input) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_EXI) + bl CNAME(powerpc_extr_interrupt) + b trapexit + + +INTERRUPT(int_alignment) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_ALI) + b trap_common + + +INTERRUPT(int_program) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_PGM) + b trap_common + + +/**************************************** + * System call + ****************************************/ +INTERRUPT(int_syscall) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_SC) + b trap_common + + +/**************************************** + * Decrementer interrupt + ****************************************/ +INTERRUPT(int_decrementer) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_DECR) + addi %r3, %r1, 8 + bl CNAME(powerpc_decr_interrupt) + b trapexit + + +/**************************************** + * Fixed interval timer + ****************************************/ +INTERRUPT(int_fixed_interval_timer) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_FIT) + b trap_common + + +/**************************************** + * Watchdog interrupt + ****************************************/ +INTERRUPT(int_watchdog) + STANDARD_PROLOG(SPR_SPRG1, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG1, PC_TEMPSAVE, EXC_WDOG) + b trap_common + + +/**************************************** + * Data TLB miss interrupt + * + * There can be nested TLB misses - while + * handling a TLB miss we dereference data + * structures that may be not covered by + * translations. We support up to + * TLB_NESTED_MAX-1 nested misses. + * + * Registers use: + * r31 - dear + * r30 - tlb0 entry address + * r29 - saved mas0 + * r28 - saved mas1 + * r27 - saved mas2 + * r26 - pmap address + * r25 - pte address + * + * r20:r23 - scratch registers + ****************************************/ +INTERRUPT(int_data_tlb_error) + TLB_PROLOG + + mfdear %r31 + + /* + * Save MAS0-MAS2 registers. There might be another tlb miss during pte + * lookup overwriting current contents (which was hw filled). + */ + mfspr %r29, SPR_MAS0 + mfspr %r28, SPR_MAS1 + mfspr %r27, SPR_MAS2 + + /* return tlb0 entry address in r30 */ + bl get_tlb0table_entry + + /* Check faulting address. */ + lis %r21, VM_MAXUSER_ADDRESS@h + ori %r21, %r21, VM_MAXUSER_ADDRESS@l + cmplw cr0, %r31, %r21 + blt search_user_pmap + + /* If it's kernel address, allow only supervisor mode misses. */ + mfsrr1 %r21 + mtcr %r21 + bt 17, search_failed /* check MSR[PR] */ + +search_kernel_pmap: + /* Load r26 with kernel_pmap address */ + lis %r26, kernel_pmap_store@h + ori %r26, %r26, kernel_pmap_store@l + + /* Force kernel tid, set TID to 0 in MAS1. */ + li %r21, 0 + rlwimi %r28, %r21, 0, 8, 15 /* clear TID bits */ + +tlb_miss_handle: + /* This may result in nested tlb miss. */ + bl pte_lookup /* returns PTE address in R25 */ + + cmpwi %r25, 0 /* pte found? */ + beq search_failed + + /* Finish up, write TLB entry. */ + bl tlb_fill_entry + +tlb_miss_return: + TLB_RESTORE + rfi + +search_user_pmap: + /* Load r26 with current user space process pmap */ + GET_CPUINFO(%r26) + lwz %r26, PC_CURPMAP(%r26) + + b tlb_miss_handle + +search_failed: + /* + * Whenever we don't find a TLB mapping in PT, set a TLB0 entry with + * the faulting virtual address anyway, but put a fake RPN and no + * access rights. This should cause a following {D,I}SI exception. + */ + lis %r23, 0xffff0000@h /* revoke all permissions */ + + /* Save MAS registers to tlb0[] table. */ + stw %r28, TLB0TABLE_MAS1(%r30) /* write tlb0[idx].mas1 */ + stw %r27, TLB0TABLE_MAS2(%r30) /* write tlb0[idx].mas2 */ + stw %r23, TLB0TABLE_MAS3(%r30) /* write tlb0[idx].mas3 */ + + /* Load MAS registers. */ + mtspr SPR_MAS0, %r29 + isync + mtspr SPR_MAS1, %r28 + isync + mtspr SPR_MAS2, %r27 + isync + mtspr SPR_MAS3, %r23 + isync + + tlbwe + msync + isync + b tlb_miss_return + +/******************************************************/ +/* + * Calculate address of tlb0[tlb0table_idx], save it in r30 + * + * tlb0table_idx = (way * entries_per_way) + entry_number + * entries_per_way = 128 + * entry_number is defined by EPN[45:51] + * + * input: r31 - faulting address + * input: r29 - MAS0 + * output: r30 - address of corresponding tlb0[] entry + * + * scratch regs used: r21-r23 + */ +/******************************************************/ +get_tlb0table_entry: + lis %r21, 0 /* keeps tlb0table_idx */ + + /* Add entry number, use DEAR from r31 (faulting va) */ + rlwinm %r22, %r31, 20, 25, 31 /* get EPN[45:51] */ + add %r21, %r21, %r22 + + /* Select way */ + rlwinm %r22, %r29, 16, 30, 31 /* get way# = ESEL[0:1] */ + + /* Get number of entries per tlb0 way. */ + lis %r23, tlb0_nentries_per_way@h + ori %r23, %r23, tlb0_nentries_per_way@l + lwz %r23, 0(%r23) + + mullw %r22, %r22, %r23 /* multiply by #entries per way */ + add %r21, %r21, %r22 + + mulli %r21, %r21, TLB0_ENTRY_SIZE /* multipy by tlb0 entry size */ + + /* Get tlb0[tlb0tble_idx] address, save it in r30 */ + lis %r30, tlb0@h + ori %r30, %r30, tlb0@l + lwz %r30, 0(%r30) + add %r30, %r30, %r21 + blr + + +/******************************************************/ +/* + * Return pte address that corresponds to given pmap/va. + * If there is no valid entry return 0. + * + * input: r26 - pmap + * input: r31 - dear + * output: r25 - pte address + * + * scratch regs used: r21 + */ +/******************************************************/ +pte_lookup: + cmpwi %r26, 0 + beq 1f /* fail quickly if pmap is invalid */ + + srwi %r21, %r31, PDIR_SHIFT /* pdir offset */ + slwi %r21, %r21, PDIR_ENTRY_SHIFT /* multiply by pdir entry size */ + + addi %r25, %r26, PM_PDIR /* pmap pm_dir[] address */ + add %r25, %r25, %r21 /* offset within pm_pdir[] table */ + lwz %r25, 0(%r25) /* get ptbl address, i.e. pmap->pm_pdir[pdir_idx] */ + + cmpwi %r25, 0 + beq 2f + + lis %r21, PTBL_MASK@h + ori %r21, %r21, PTBL_MASK@l + and %r21, %r21, %r31 + + /* ptbl offset, multiply by ptbl entry size */ + srwi %r21, %r21, (PTBL_SHIFT - PTBL_ENTRY_SHIFT) + + add %r25, %r25, %r21 /* address of pte entry */ + lwz %r21, PTE_FLAGS(%r25) /* get pte->flags */ + andis. %r21, %r21, PTE_VALID@h + bne 2f +1: + li %r25, 0 +2: + blr + +/******************************************************/ +/* + * Save MAS1-MAS3 registers to tlb0[] table, write TLB entry + * + * input: + * r29 - mas0 + * r28 - mas1 + * r27 - mas2 + * r25 - pte + * r30 - tlb0 entry address + * + * output: none + * + * scratch regs: r21-r23 + */ +/******************************************************/ +tlb_fill_entry: + /* Handle pte flags. */ + lwz %r21, PTE_FLAGS(%r25) /* get pte->flags */ + oris %r21, %r21, PTE_REFERENCED@h /* set referenced bit */ + + andi. %r22, %r21, (PTE_UW | PTE_UW)@l /* check if writable */ + beq 1f + oris %r21, %r21, PTE_MODIFIED@h /* set modified bit */ +1: + stw %r21, PTE_FLAGS(%r25) /* write it back */ + + /* Update MAS2. */ + rlwimi %r27, %r21, 0, 27, 30 /* insert WIMG bits from pte */ + + /* Setup MAS3 value in r23. */ + lwz %r23, PTE_RPN(%r25) /* get pte->rpn */ + + rlwimi %r23, %r21, 24, 26, 31 /* insert protection bits from pte */ + + /* Save MAS registers to tlb0[] table. */ + stw %r28, TLB0TABLE_MAS1(%r30) /* write tlb0[idx].mas1 */ + stw %r27, TLB0TABLE_MAS2(%r30) /* write tlb0[idx].mas2 */ + stw %r23, TLB0TABLE_MAS3(%r30) /* write tlb0[idx].mas3 */ + + /* Load MAS registers. */ + mtspr SPR_MAS0, %r29 + isync + mtspr SPR_MAS1, %r28 + isync + mtspr SPR_MAS2, %r27 + isync + mtspr SPR_MAS3, %r23 + isync + + tlbwe + isync + msync + blr + +/**************************************** + * Instruction TLB miss interrupt + * + * Same notes as for the Data TLB miss + * + ****************************************/ +INTERRUPT(int_inst_tlb_error) + TLB_PROLOG + + mfsrr0 %r31 /* faulting address */ + + /* + * Save MAS0-MAS2 registers. There might be another tlb miss during pte + * lookup overwriting current contents (which was hw filled). + */ + mfspr %r29, SPR_MAS0 + mfspr %r28, SPR_MAS1 + mfspr %r27, SPR_MAS2 + + /* return tlb0 entry address in r30 */ + bl get_tlb0table_entry + + mfsrr1 %r21 + mtcr %r21 + + /* check MSR[PR] */ + bt 17, search_user_pmap + b search_kernel_pmap + + + .globl interrupt_vector_top +interrupt_vector_top: + +/**************************************** + * Debug interrupt + ****************************************/ +INTERRUPT(int_debug) + STANDARD_CRIT_PROLOG(SPR_SPRG2, PC_BOOKE_CRITSAVE, SPR_CSRR0, SPR_CSRR1) + FRAME_SETUP(SPR_SPRG2, PC_BOOKE_CRITSAVE, EXC_DEBUG) + lwz %r3, (PC_BOOKE_CRITSAVE+CPUSAVE_SRR0)(%r2); + lis %r4, interrupt_vector_base@ha + addi %r4, %r4, interrupt_vector_base@l + cmplw cr0, %r3, %r4 + blt 1f + lis %r4, interrupt_vector_top@ha + addi %r4, %r4, interrupt_vector_top@l + cmplw cr0, %r3, %r4 + bge 1f + /* Disable single-stepping for the interrupt handlers. */ + lwz %r3, FRAME_SRR1+8(%r1); + rlwinm %r3,%r3,0,23,21 + stw %r3, FRAME_SRR1+8(%r1); + /* Restore srr0 and srr1 as they could have been clobbered. */ + lwz %r3, (PC_BOOKE_CRITSAVE+CPUSAVE_SRR0+8)(%r2); + mtspr SPR_SRR0, %r3 + lwz %r4, (PC_BOOKE_CRITSAVE+CPUSAVE_SRR1+8)(%r2); + mtspr SPR_SRR1, %r4 + b 9f +1: + addi %r3, %r1, 8 + bl CNAME(trap) + /* + * Handle ASTs, needed for proper support of single-stepping. + * We actually need to return to the process with an rfi. + */ + b trapexit +9: + FRAME_LEAVE(SPR_CSRR0, SPR_CSRR1) + rfci + + +/******************************** + * Common trap code + ********************************/ +trap_common: + /* Call C trap dispatcher */ + addi %r3, %r1, 8 + bl CNAME(trap) + + .globl CNAME(trapexit) /* exported for db_backtrace use */ +CNAME(trapexit): + /* disable interrupts */ + wrteei 0 + + /* Test AST pending - makes sense for user process only */ + lwz %r5, FRAME_SRR1+8(%r1) + mtcr %r5 + bf 17, 1f + + GET_CPUINFO(%r3) + lwz %r4, PC_CURTHREAD(%r3) + lwz %r4, TD_FLAGS(%r4) + lis %r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@h + ori %r5, %r5, (TDF_ASTPENDING|TDF_NEEDRESCHED)@l + and. %r4, %r4, %r5 + beq 1f + + /* re-enable interrupts before calling ast() */ + wrteei 1 + + addi %r3, %r1, 8 + bl CNAME(ast) + .globl CNAME(asttrapexit) /* db_backtrace code sentinel #2 */ +CNAME(asttrapexit): + b trapexit /* test ast ret value ? */ +1: + FRAME_LEAVE(SPR_SRR0, SPR_SRR1) + rfi + + +#if defined(KDB) +/* + * Deliberate entry to dbtrap + */ + .globl CNAME(ppc_db_trap) +CNAME(ppc_db_trap): + mtsprg1 %r1 + mfmsr %r3 + mtsrr1 %r3 + andi. %r3,%r3,~(PSL_EE|PSL_ME)@l + mtmsr %r3 /* disable interrupts */ + isync + GET_CPUINFO(%r3) + stw %r30,(PC_DBSAVE+CPUSAVE_R30)(%r3) + stw %r31,(PC_DBSAVE+CPUSAVE_R31)(%r3) + + mflr %r31 + mtsrr0 %r31 + + mfdear %r30 + mfesr %r31 + stw %r30, (PC_DBSAVE+CPUSAVE_BOOKE_DEAR)(%r3) + stw %r31, (PC_DBSAVE+CPUSAVE_BOOKE_ESR)(%r3) + + mfsrr0 %r30 + mfsrr1 %r31 + stw %r30, (PC_DBSAVE+CPUSAVE_SRR0)(%r3) + stw %r31, (PC_DBSAVE+CPUSAVE_SRR1)(%r3) + isync + + mfcr %r30 + +/* + * Now the kdb trap catching code. + */ +dbtrap: + FRAME_SETUP(SPR_SPRG1, PC_DBSAVE, EXC_DEBUG) +/* Call C trap code: */ + addi %r3,%r1,8 + bl CNAME(db_trap_glue) + or. %r3,%r3,%r3 + bne dbleave +/* This wasn't for KDB, so switch to real trap: */ + b trap_common + +dbleave: + FRAME_LEAVE(SPR_SRR0, SPR_SRR1) + rfi +#endif /* KDB */ diff --git a/sys/powerpc/booke/uio_machdep.c b/sys/powerpc/booke/uio_machdep.c new file mode 100644 index 0000000..2a88fd2 --- /dev/null +++ b/sys/powerpc/booke/uio_machdep.c @@ -0,0 +1,135 @@ +/*- + * Copyright (c) 2004 Alan L. Cox <alc@cs.rice.edu> + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/uio.h> +#include <sys/sf_buf.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> + +#include <machine/cpu.h> +#include <machine/vmparam.h> +#include <machine/md_var.h> + +/* + * Implement uiomove(9) from physical memory using sf_bufs to + * avoid the creation and destruction of ephemeral mappings. + */ +int +uiomove_fromphys(vm_page_t ma[], vm_offset_t offset, int n, struct uio *uio) +{ + struct thread *td = curthread; + struct iovec *iov; + void *cp; + vm_offset_t page_offset; + vm_page_t m; + size_t cnt; + int error = 0; + int save = 0; + struct sf_buf *sf; + + KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, + ("uiomove_fromphys: mode")); + KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, + ("uiomove_fromphys proc")); + + save = td->td_pflags & TDP_DEADLKTREAT; + td->td_pflags |= TDP_DEADLKTREAT; + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + page_offset = offset & PAGE_MASK; + cnt = min(cnt, PAGE_SIZE - page_offset); + + m = ma[offset >> PAGE_SHIFT]; + sf = sf_buf_alloc(m, 0); + cp = (char*)sf_buf_kva(sf) + page_offset; + + switch (uio->uio_segflg) { + case UIO_USERSPACE: + if (ticks - PCPU_GET(switchticks) >= hogticks) + uio_yield(); + if (uio->uio_rw == UIO_READ) + error = copyout(cp, iov->iov_base, cnt); + else + error = copyin(iov->iov_base, cp, cnt); + if (error) { + sf_buf_free(sf); + goto out; + } + if (uio->uio_rw == UIO_WRITE && + pmap_page_executable(m)) + __syncicache(cp, cnt); + break; + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy(cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, cp, cnt); + break; + case UIO_NOCOPY: + break; + } + sf_buf_free(sf); + iov->iov_base = (char *)iov->iov_base + cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + offset += cnt; + n -= cnt; + } +out: + if (save == 0) + td->td_pflags &= ~TDP_DEADLKTREAT; + return (error); +} diff --git a/sys/powerpc/booke/vm_machdep.c b/sys/powerpc/booke/vm_machdep.c new file mode 100644 index 0000000..fb7f2db --- /dev/null +++ b/sys/powerpc/booke/vm_machdep.c @@ -0,0 +1,517 @@ +/*- + * Copyright (C) 2006 Semihalf, Marian Balakowicz <m8@semihalf.com> + * Copyright (C) 2006 Semihalf, Rafal Jaworowski <raj@semihalf.com> + * All rights reserved. + * + * Adapted for Freescale's e500 core CPUs. + * sf_buf implementation was derived from sys/arm/arm/vm_machdep. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * from: $FreeBSD$ + */ +/*- + * Copyright (c) 1982, 1986 The Regents of the University of California. + * Copyright (c) 1989, 1990 William Jolitz + * Copyright (c) 1994 John Dyson + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department, and William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 + * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ + */ +/*- + * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. + * All rights reserved. + * + * Author: Chris G. Demetriou + * + * Permission to use, copy, modify and distribute this software and + * its documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/vnode.h> +#include <sys/vmmeter.h> +#include <sys/kernel.h> +#include <sys/mbuf.h> +#include <sys/sf_buf.h> +#include <sys/sysctl.h> +#include <sys/unistd.h> + +#include <machine/clock.h> +#include <machine/cpu.h> +#include <machine/frame.h> +#include <machine/md_var.h> +#include <machine/pcb.h> +#include <machine/spr.h> +#include <machine/powerpc.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/vm_extern.h> + +#ifndef NSFBUFS +#define NSFBUFS (512 + maxusers * 16) +#endif + +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) + +LIST_HEAD(sf_head, sf_buf); + +/* A hash table of active sendfile(2) buffers */ +static struct sf_head *sf_buf_active; +static u_long sf_buf_hashmask; + +#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask) + +static TAILQ_HEAD(, sf_buf) sf_buf_freelist; +static u_int sf_buf_alloc_want; + +/* + * A lock used to synchronize access to the hash table and free list + */ +static struct mtx sf_buf_lock; + +/* + * Finish a fork operation, with process p2 nearly set up. + * Copy and update the pcb, set up the stack so that the child + * ready to run and return to user mode. + */ +void +cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) +{ + struct proc *p1; + struct trapframe *tf; + struct callframe *cf; + struct pcb *pcb; + + KASSERT(td1 == curthread || td1 == &thread0, + ("cpu_fork: p1 not curproc and not proc0")); + CTR3(KTR_PROC, "cpu_fork: called td1=%08x p2=%08x flags=%x", (u_int)td1, + (u_int)p2, flags); + + if ((flags & RFPROC) == 0) + return; + + p1 = td1->td_proc; + + pcb = (struct pcb *)((td2->td_kstack + + td2->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb)) & ~0x2fU); + td2->td_pcb = pcb; + + /* Copy the pcb */ + bcopy(td1->td_pcb, pcb, sizeof(struct pcb)); + + /* + * Create a fresh stack for the new process. + * Copy the trap frame for the return to user mode as if from a + * syscall. This copies most of the user mode register values. + */ + tf = (struct trapframe *)pcb - 1; + bcopy(td1->td_frame, tf, sizeof(*tf)); + + /* Set up trap frame. */ + tf->fixreg[FIRSTARG] = 0; + tf->fixreg[FIRSTARG + 1] = 0; + tf->cr &= ~0x10000000; + + td2->td_frame = tf; + + cf = (struct callframe *)tf - 1; + memset(cf, 0, sizeof(struct callframe)); + cf->cf_func = (register_t)fork_return; + cf->cf_arg0 = (register_t)td2; + cf->cf_arg1 = (register_t)tf; + + pcb->pcb_sp = (register_t)cf; + pcb->pcb_lr = (register_t)fork_trampoline; + + /* Setup to release sched_lock in fork_exit(). */ + td2->td_md.md_spinlock_count = 1; + td2->td_md.md_saved_msr = PSL_KERNSET; + + /* + * Now cpu_switch() can schedule the new process. + */ +} + +/* + * Intercept the return address from a freshly forked process that has NOT + * been scheduled yet. + * + * This is needed to make kernel threads stay in kernel mode. + */ +void +cpu_set_fork_handler(td, func, arg) + struct thread *td; + void (*func)(void *); + void *arg; +{ + struct callframe *cf; + + CTR3(KTR_PROC, "cpu_set_fork_handler: called with td=%08x func=%08x arg=%08x", + (u_int)td, (u_int)func, (u_int)arg); + + cf = (struct callframe *)td->td_pcb->pcb_sp; + + cf->cf_func = (register_t)func; + cf->cf_arg0 = (register_t)arg; +} + +void +cpu_exit(struct thread *td) +{ + +} + +/* Temporary helper */ +void +cpu_throw(struct thread *old, struct thread *new) +{ + + cpu_switch(old, new, NULL); + panic("cpu_throw() didn't"); +} + +/* Reset back to firmware. */ +void +cpu_reset() +{ + + /* Clear DBCR0, disables debug interrupts and events. */ + mtspr(SPR_DBCR0, 0); + __asm volatile("isync"); + + /* Enable Debug Interrupts in MSR. */ + mtmsr(mfmsr() | PSL_DE); + + /* Enable debug interrupts and issue reset. */ + mtspr(SPR_DBCR0, mfspr(SPR_DBCR0) | DBCR0_IDM | DBCR0_RST_SYSTEM); +} + +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + vm_offset_t sf_base; + int i; + + nsfbufs = NSFBUFS; + TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); + + sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); + TAILQ_INIT(&sf_buf_freelist); + sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT | M_ZERO); + + for (i = 0; i < nsfbufs; i++) { + sf_bufs[i].kva = sf_base + i * PAGE_SIZE; + TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry); + } + sf_buf_alloc_want = 0; + mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF); +} + +/* + * Get an sf_buf from the freelist. Will block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m, int flags) +{ + struct sf_head *hash_list; + struct sf_buf *sf; + int error; + + hash_list = &sf_buf_active[SF_BUF_HASH(m)]; + mtx_lock(&sf_buf_lock); + LIST_FOREACH(sf, hash_list, list_entry) { + if (sf->m == m) { + sf->ref_count++; + if (sf->ref_count == 1) { + TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); + nsfbufsused++; + nsfbufspeak = imax(nsfbufspeak, nsfbufsused); + } + goto done; + } + } + + while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) { + if (flags & SFB_NOWAIT) + goto done; + + sf_buf_alloc_want++; + mbstat.sf_allocwait++; + error = msleep(&sf_buf_freelist, &sf_buf_lock, + (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + goto done; + } + + TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); + if (sf->m != NULL) + LIST_REMOVE(sf, list_entry); + + LIST_INSERT_HEAD(hash_list, sf, list_entry); + sf->ref_count = 1; + sf->m = m; + nsfbufsused++; + nsfbufspeak = imax(nsfbufspeak, nsfbufsused); + pmap_qenter(sf->kva, &sf->m, 1); +done: + mtx_unlock(&sf_buf_lock); + return (sf); +} + +/* + * Detatch mapped page and release resources back to the system. + * + * Remove a reference from the given sf_buf, adding it to the free + * list when its reference count reaches zero. A freed sf_buf still, + * however, retains its virtual-to-physical mapping until it is + * recycled or reactivated by sf_buf_alloc(9). + */ +void +sf_buf_free(struct sf_buf *sf) +{ + mtx_lock(&sf_buf_lock); + sf->ref_count--; + if (sf->ref_count == 0) { + TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); + nsfbufsused--; + + if (sf_buf_alloc_want > 0) + wakeup_one(&sf_buf_freelist); + } + mtx_unlock(&sf_buf_lock); +} + +/* + * Software interrupt handler for queued VM system processing. + */ +void +swi_vm(void *dummy) +{ +#if 0 /* XXX: Don't have busdma stuff yet */ + if (busdma_swi_pending != 0) + busdma_swi(); +#endif +} + +/* + * Tell whether this address is in some physical memory region. + * Currently used by the kernel coredump code in order to avoid + * dumping the ``ISA memory hole'' which could cause indefinite hangs, + * or other unpredictable behaviour. + */ +int +is_physical_memory(vm_offset_t addr) +{ + + /* + * stuff other tests for known memory-mapped devices (PCI?) + * here + */ + return 1; +} + +/* + * KSE functions + */ +void +cpu_thread_exit(struct thread *td) +{ + +} + +void +cpu_thread_clean(struct thread *td) +{ + +} + +void +cpu_thread_alloc(struct thread *td) +{ + struct pcb *pcb; + + pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE - + sizeof(struct pcb)) & ~0x2fU); + td->td_pcb = pcb; + td->td_frame = (struct trapframe *)pcb - 1; +} + +void +cpu_thread_free(struct thread *td) +{ + +} + +void +cpu_thread_swapin(struct thread *td) +{ + +} + +void +cpu_thread_swapout(struct thread *td) +{ + +} + +void +cpu_set_upcall(struct thread *td, struct thread *td0) +{ + struct pcb *pcb2; + struct trapframe *tf; + struct callframe *cf; + + pcb2 = td->td_pcb; + + /* Copy the upcall pcb */ + bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); + + /* Create a stack for the new thread */ + tf = td->td_frame; + bcopy(td0->td_frame, tf, sizeof(struct trapframe)); + tf->fixreg[FIRSTARG] = 0; + tf->fixreg[FIRSTARG + 1] = 0; + tf->cr &= ~0x10000000; + + /* Set registers for trampoline to user mode. */ + cf = (struct callframe *)tf - 1; + memset(cf, 0, sizeof(struct callframe)); + cf->cf_func = (register_t)fork_return; + cf->cf_arg0 = (register_t)td; + cf->cf_arg1 = (register_t)tf; + + pcb2->pcb_sp = (register_t)cf; + pcb2->pcb_lr = (register_t)fork_trampoline; + + /* Setup to release sched_lock in fork_exit(). */ + td->td_md.md_spinlock_count = 1; + td->td_md.md_saved_msr = PSL_KERNSET; +} + +void +cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, + stack_t *stack) +{ + struct trapframe *tf; + uint32_t sp; + + tf = td->td_frame; + /* align stack and alloc space for frame ptr and saved LR */ + sp = ((uint32_t)stack->ss_sp + stack->ss_size - 2 * sizeof(u_int32_t)) & ~0x1f; + bzero(tf, sizeof(struct trapframe)); + + tf->fixreg[1] = (register_t)sp; + tf->fixreg[3] = (register_t)arg; + tf->srr0 = (register_t)entry; + + tf->srr1 = PSL_USERSET; + td->td_pcb->pcb_flags = 0; + + td->td_retval[0] = (register_t)entry; + td->td_retval[1] = 0; +} + +int +cpu_set_user_tls(struct thread *td, void *tls_base) +{ + + td->td_frame->fixreg[2] = (register_t)tls_base + 0x7008; + return (0); +} |