diff options
Diffstat (limited to 'sys/kern/subr_trap.c')
-rw-r--r-- | sys/kern/subr_trap.c | 1227 |
1 files changed, 1227 insertions, 0 deletions
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c new file mode 100644 index 0000000..83c83b4 --- /dev/null +++ b/sys/kern/subr_trap.c @@ -0,0 +1,1227 @@ +/*- + * Copyright (C) 1994, David Greenman + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the University of Utah, and William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 + * $FreeBSD$ + */ + +/* + * 386 Trap and System call handling + */ + +#include "opt_cpu.h" +#include "opt_ddb.h" +#include "opt_ktrace.h" +#include "opt_clock.h" +#include "opt_trap.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/pioctl.h> +#include <sys/kernel.h> +#include <sys/resourcevar.h> +#include <sys/signalvar.h> +#include <sys/syscall.h> +#include <sys/sysent.h> +#include <sys/uio.h> +#include <sys/vmmeter.h> +#ifdef KTRACE +#include <sys/ktrace.h> +#endif + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <sys/lock.h> +#include <vm/pmap.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_page.h> +#include <vm/vm_extern.h> + +#include <machine/cpu.h> +#include <machine/ipl.h> +#include <machine/md_var.h> +#include <machine/pcb.h> +#ifdef SMP +#include <machine/smp.h> +#endif +#include <machine/tss.h> + +#include <i386/isa/intr_machdep.h> + +#ifdef POWERFAIL_NMI +#include <sys/syslog.h> +#include <machine/clock.h> +#endif + +#include <machine/vm86.h> + +#include <ddb/ddb.h> + +#include "isa.h" +#include "npx.h" + +int (*pmath_emulate) __P((struct trapframe *)); + +extern void trap __P((struct trapframe frame)); +extern int trapwrite __P((unsigned addr)); +extern void syscall2 __P((struct trapframe frame)); + +static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); +static void trap_fatal __P((struct trapframe *, vm_offset_t)); +void dblfault_handler __P((void)); + +extern inthand_t IDTVEC(syscall); + +#define MAX_TRAP_MSG 28 +static char *trap_msg[] = { + "", /* 0 unused */ + "privileged instruction fault", /* 1 T_PRIVINFLT */ + "", /* 2 unused */ + "breakpoint instruction fault", /* 3 T_BPTFLT */ + "", /* 4 unused */ + "", /* 5 unused */ + "arithmetic trap", /* 6 T_ARITHTRAP */ + "system forced exception", /* 7 T_ASTFLT */ + "", /* 8 unused */ + "general protection fault", /* 9 T_PROTFLT */ + "trace trap", /* 10 T_TRCTRAP */ + "", /* 11 unused */ + "page fault", /* 12 T_PAGEFLT */ + "", /* 13 unused */ + "alignment fault", /* 14 T_ALIGNFLT */ + "", /* 15 unused */ + "", /* 16 unused */ + "", /* 17 unused */ + "integer divide fault", /* 18 T_DIVIDE */ + "non-maskable interrupt trap", /* 19 T_NMI */ + "overflow trap", /* 20 T_OFLOW */ + "FPU bounds check fault", /* 21 T_BOUND */ + "FPU device not available", /* 22 T_DNA */ + "double fault", /* 23 T_DOUBLEFLT */ + "FPU operand fetch fault", /* 24 T_FPOPFLT */ + "invalid TSS fault", /* 25 T_TSSFLT */ + "segment not present fault", /* 26 T_SEGNPFLT */ + "stack fault", /* 27 T_STKFLT */ + "machine check trap", /* 28 T_MCHK */ +}; + +static __inline int userret __P((struct proc *p, struct trapframe *frame, + u_quad_t oticks, int have_mplock)); + +#if defined(I586_CPU) && !defined(NO_F00F_HACK) +extern int has_f00f_bug; +#endif + +static __inline int +userret(p, frame, oticks, have_mplock) + struct proc *p; + struct trapframe *frame; + u_quad_t oticks; + int have_mplock; +{ + int sig, s; + + while ((sig = CURSIG(p)) != 0) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + postsig(sig); + } + + p->p_priority = p->p_usrpri; + if (resched_wanted()) { + /* + * Since we are curproc, clock will normally just change + * our priority without moving us from one queue to another + * (since the running process is not on a queue.) + * If that happened after we setrunqueue ourselves but before we + * mi_switch()'ed, we might not be on the queue indicated by + * our priority. + */ + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + s = splhigh(); + setrunqueue(p); + p->p_stats->p_ru.ru_nivcsw++; + mi_switch(); + splx(s); + while ((sig = CURSIG(p)) != 0) + postsig(sig); + } + /* + * Charge system time if profiling. + */ + if (p->p_flag & P_PROFIL) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + addupc_task(p, frame->tf_eip, + (u_int)(p->p_sticks - oticks) * psratio); + } + curpriority = p->p_priority; + return(have_mplock); +} + +/* + * Exception, fault, and trap interface to the FreeBSD kernel. + * This common code is called from assembly language IDT gate entry + * routines that prepare a suitable stack frame, and restore this + * frame after the exception has been processed. + */ + +void +trap(frame) + struct trapframe frame; +{ + struct proc *p = curproc; + u_quad_t sticks = 0; + int i = 0, ucode = 0, type, code; + vm_offset_t eva; + + if (!(frame.tf_eflags & PSL_I)) { + /* + * Buggy application or kernel code has disabled interrupts + * and then trapped. Enabling interrupts now is wrong, but + * it is better than running with interrupts disabled until + * they are accidentally enabled later. + */ + type = frame.tf_trapno; + if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM)) + printf( + "pid %ld (%s): trap %d with interrupts disabled\n", + (long)curproc->p_pid, curproc->p_comm, type); + else if (type != T_BPTFLT && type != T_TRCTRAP) + /* + * XXX not quite right, since this may be for a + * multiple fault in user mode. + */ + printf("kernel trap %d with interrupts disabled\n", + type); + enable_intr(); + } + + eva = 0; + if (frame.tf_trapno == T_PAGEFLT) { + /* + * For some Cyrix CPUs, %cr2 is clobbered by interrupts. + * This problem is worked around by using an interrupt + * gate for the pagefault handler. We are finally ready + * to read %cr2 and then must reenable interrupts. + * + * XXX this should be in the switch statement, but the + * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the + * flow of control too much for this to be obviously + * correct. + */ + eva = rcr2(); + enable_intr(); + } + +#if defined(I586_CPU) && !defined(NO_F00F_HACK) +restart: +#endif + type = frame.tf_trapno; + code = frame.tf_err; + + if (in_vm86call) { + if (frame.tf_eflags & PSL_VM && + (type == T_PROTFLT || type == T_STKFLT)) { + i = vm86_emulate((struct vm86frame *)&frame); + if (i != 0) + /* + * returns to original process + */ + vm86_trap((struct vm86frame *)&frame); + return; + } + switch (type) { + /* + * these traps want either a process context, or + * assume a normal userspace trap. + */ + case T_PROTFLT: + case T_SEGNPFLT: + trap_fatal(&frame, eva); + return; + case T_TRCTRAP: + type = T_BPTFLT; /* kernel breakpoint */ + /* FALL THROUGH */ + } + goto kernel_trap; /* normal kernel trap handling */ + } + + if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { + /* user trap */ + + sticks = p->p_sticks; + p->p_md.md_regs = &frame; + + switch (type) { + case T_PRIVINFLT: /* privileged instruction fault */ + ucode = type; + i = SIGILL; + break; + + case T_BPTFLT: /* bpt instruction fault */ + case T_TRCTRAP: /* trace trap */ + frame.tf_eflags &= ~PSL_T; + i = SIGTRAP; + break; + + case T_ARITHTRAP: /* arithmetic trap */ + ucode = code; + i = SIGFPE; + break; + + case T_ASTFLT: /* Allow process switch */ + astoff(); + cnt.v_soft++; + if (p->p_flag & P_OWEUPC) { + p->p_flag &= ~P_OWEUPC; + addupc_task(p, p->p_stats->p_prof.pr_addr, + p->p_stats->p_prof.pr_ticks); + } + goto out; + + /* + * The following two traps can happen in + * vm86 mode, and, if so, we want to handle + * them specially. + */ + case T_PROTFLT: /* general protection fault */ + case T_STKFLT: /* stack fault */ + if (frame.tf_eflags & PSL_VM) { + i = vm86_emulate((struct vm86frame *)&frame); + if (i == 0) + goto out; + break; + } + /* FALL THROUGH */ + + case T_SEGNPFLT: /* segment not present fault */ + case T_TSSFLT: /* invalid TSS fault */ + case T_DOUBLEFLT: /* double fault */ + default: + ucode = code + BUS_SEGM_FAULT ; + i = SIGBUS; + break; + + case T_PAGEFLT: /* page fault */ + i = trap_pfault(&frame, TRUE, eva); + if (i == -1) + return; +#if defined(I586_CPU) && !defined(NO_F00F_HACK) + if (i == -2) + goto restart; +#endif + if (i == 0) + goto out; + + ucode = T_PAGEFLT; + break; + + case T_DIVIDE: /* integer divide fault */ + ucode = FPE_INTDIV; + i = SIGFPE; + break; + +#if NISA > 0 + case T_NMI: +#ifdef POWERFAIL_NMI + goto handle_powerfail; +#else /* !POWERFAIL_NMI */ +#ifdef DDB + /* NMI can be hooked up to a pushbutton for debugging */ + printf ("NMI ... going to debugger\n"); + if (kdb_trap (type, 0, &frame)) + return; +#endif /* DDB */ + /* machine/parity/power fail/"kitchen sink" faults */ + if (isa_nmi(code) == 0) return; + panic("NMI indicates hardware failure"); +#endif /* POWERFAIL_NMI */ +#endif /* NISA > 0 */ + + case T_OFLOW: /* integer overflow fault */ + ucode = FPE_INTOVF; + i = SIGFPE; + break; + + case T_BOUND: /* bounds check fault */ + ucode = FPE_FLTSUB; + i = SIGFPE; + break; + + case T_DNA: +#if NNPX > 0 + /* if a transparent fault (due to context switch "late") */ + if (npxdna()) + return; +#endif + if (!pmath_emulate) { + i = SIGFPE; + ucode = FPE_FPU_NP_TRAP; + break; + } + i = (*pmath_emulate)(&frame); + if (i == 0) { + if (!(frame.tf_eflags & PSL_T)) + return; + frame.tf_eflags &= ~PSL_T; + i = SIGTRAP; + } + /* else ucode = emulator_only_knows() XXX */ + break; + + case T_FPOPFLT: /* FPU operand fetch fault */ + ucode = T_FPOPFLT; + i = SIGILL; + break; + } + } else { +kernel_trap: + /* kernel trap */ + + switch (type) { + case T_PAGEFLT: /* page fault */ + (void) trap_pfault(&frame, FALSE, eva); + return; + + case T_DNA: +#if NNPX > 0 + /* + * The kernel is apparently using npx for copying. + * XXX this should be fatal unless the kernel has + * registered such use. + */ + if (npxdna()) + return; +#endif + break; + + case T_PROTFLT: /* general protection fault */ + case T_SEGNPFLT: /* segment not present fault */ + /* + * Invalid segment selectors and out of bounds + * %eip's and %esp's can be set up in user mode. + * This causes a fault in kernel mode when the + * kernel tries to return to user mode. We want + * to get this fault so that we can fix the + * problem here and not have to check all the + * selectors and pointers when the user changes + * them. + */ +#define MAYBE_DORETI_FAULT(where, whereto) \ + do { \ + if (frame.tf_eip == (int)where) { \ + frame.tf_eip = (int)whereto; \ + return; \ + } \ + } while (0) + + if (intr_nesting_level == 0) { + /* + * Invalid %fs's and %gs's can be created using + * procfs or PT_SETREGS or by invalidating the + * underlying LDT entry. This causes a fault + * in kernel mode when the kernel attempts to + * switch contexts. Lose the bad context + * (XXX) so that we can continue, and generate + * a signal. + */ + if (frame.tf_eip == (int)cpu_switch_load_gs) { + curpcb->pcb_gs = 0; + psignal(p, SIGBUS); + return; + } + MAYBE_DORETI_FAULT(doreti_iret, + doreti_iret_fault); + MAYBE_DORETI_FAULT(doreti_popl_ds, + doreti_popl_ds_fault); + MAYBE_DORETI_FAULT(doreti_popl_es, + doreti_popl_es_fault); + MAYBE_DORETI_FAULT(doreti_popl_fs, + doreti_popl_fs_fault); + if (curpcb && curpcb->pcb_onfault) { + frame.tf_eip = (int)curpcb->pcb_onfault; + return; + } + } + break; + + case T_TSSFLT: + /* + * PSL_NT can be set in user mode and isn't cleared + * automatically when the kernel is entered. This + * causes a TSS fault when the kernel attempts to + * `iret' because the TSS link is uninitialized. We + * want to get this fault so that we can fix the + * problem here and not every time the kernel is + * entered. + */ + if (frame.tf_eflags & PSL_NT) { + frame.tf_eflags &= ~PSL_NT; + return; + } + break; + + case T_TRCTRAP: /* trace trap */ + if (frame.tf_eip == (int)IDTVEC(syscall)) { + /* + * We've just entered system mode via the + * syscall lcall. Continue single stepping + * silently until the syscall handler has + * saved the flags. + */ + return; + } + if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { + /* + * The syscall handler has now saved the + * flags. Stop single stepping it. + */ + frame.tf_eflags &= ~PSL_T; + return; + } + /* + * Ignore debug register trace traps due to + * accesses in the user's address space, which + * can happen under several conditions such as + * if a user sets a watchpoint on a buffer and + * then passes that buffer to a system call. + * We still want to get TRCTRAPS for addresses + * in kernel space because that is useful when + * debugging the kernel. + */ + if (user_dbreg_trap()) { + /* + * Reset breakpoint bits because the + * processor doesn't + */ + load_dr6(rdr6() & 0xfffffff0); + return; + } + /* + * Fall through (TRCTRAP kernel mode, kernel address) + */ + case T_BPTFLT: + /* + * If DDB is enabled, let it handle the debugger trap. + * Otherwise, debugger traps "can't happen". + */ +#ifdef DDB + if (kdb_trap (type, 0, &frame)) + return; +#endif + break; + +#if NISA > 0 + case T_NMI: +#ifdef POWERFAIL_NMI +#ifndef TIMER_FREQ +# define TIMER_FREQ 1193182 +#endif + handle_powerfail: + { + static unsigned lastalert = 0; + + if(time_second - lastalert > 10) + { + log(LOG_WARNING, "NMI: power fail\n"); + sysbeep(TIMER_FREQ/880, hz); + lastalert = time_second; + } + return; + } +#else /* !POWERFAIL_NMI */ +#ifdef DDB + /* NMI can be hooked up to a pushbutton for debugging */ + printf ("NMI ... going to debugger\n"); + if (kdb_trap (type, 0, &frame)) + return; +#endif /* DDB */ + /* machine/parity/power fail/"kitchen sink" faults */ + if (isa_nmi(code) == 0) return; + /* FALL THROUGH */ +#endif /* POWERFAIL_NMI */ +#endif /* NISA > 0 */ + } + + trap_fatal(&frame, eva); + return; + } + + /* Translate fault for emulators (e.g. Linux) */ + if (*p->p_sysent->sv_transtrap) + i = (*p->p_sysent->sv_transtrap)(i, type); + + trapsignal(p, i, ucode); + +#ifdef DEBUG + if (type <= MAX_TRAP_MSG) { + uprintf("fatal process exception: %s", + trap_msg[type]); + if ((type == T_PAGEFLT) || (type == T_PROTFLT)) + uprintf(", fault VA = 0x%lx", (u_long)eva); + uprintf("\n"); + } +#endif + +out: + userret(p, &frame, sticks, 1); +} + +#ifdef notyet +/* + * This version doesn't allow a page fault to user space while + * in the kernel. The rest of the kernel needs to be made "safe" + * before this can be used. I think the only things remaining + * to be made safe are the iBCS2 code and the process tracing/ + * debugging code. + */ +static int +trap_pfault(frame, usermode, eva) + struct trapframe *frame; + int usermode; + vm_offset_t eva; +{ + vm_offset_t va; + struct vmspace *vm = NULL; + vm_map_t map = 0; + int rv = 0; + vm_prot_t ftype; + struct proc *p = curproc; + + if (frame->tf_err & PGEX_W) + ftype = VM_PROT_READ | VM_PROT_WRITE; + else + ftype = VM_PROT_READ; + + va = trunc_page(eva); + if (va < VM_MIN_KERNEL_ADDRESS) { + vm_offset_t v; + vm_page_t mpte; + + if (p == NULL || + (!usermode && va < VM_MAXUSER_ADDRESS && + (intr_nesting_level != 0 || curpcb == NULL || + curpcb->pcb_onfault == NULL))) { + trap_fatal(frame, eva); + return (-1); + } + + /* + * This is a fault on non-kernel virtual memory. + * vm is initialized above to NULL. If curproc is NULL + * or curproc->p_vmspace is NULL the fault is fatal. + */ + vm = p->p_vmspace; + if (vm == NULL) + goto nogo; + + map = &vm->vm_map; + + /* + * Keep swapout from messing with us during this + * critical time. + */ + ++p->p_lock; + + /* + * Grow the stack if necessary + */ + /* grow_stack returns false only if va falls into + * a growable stack region and the stack growth + * fails. It returns true if va was not within + * a growable stack region, or if the stack + * growth succeeded. + */ + if (!grow_stack (p, va)) { + rv = KERN_FAILURE; + --p->p_lock; + goto nogo; + } + + /* Fault in the user page: */ + rv = vm_fault(map, va, ftype, + (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY + : VM_FAULT_NORMAL); + + --p->p_lock; + } else { + /* + * Don't allow user-mode faults in kernel address space. + */ + if (usermode) + goto nogo; + + /* + * Since we know that kernel virtual address addresses + * always have pte pages mapped, we just have to fault + * the page. + */ + rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL); + } + + if (rv == KERN_SUCCESS) + return (0); +nogo: + if (!usermode) { + if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { + frame->tf_eip = (int)curpcb->pcb_onfault; + return (0); + } + trap_fatal(frame, eva); + return (-1); + } + + /* kludge to pass faulting virtual address to sendsig */ + frame->tf_err = eva; + + return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); +} +#endif + +int +trap_pfault(frame, usermode, eva) + struct trapframe *frame; + int usermode; + vm_offset_t eva; +{ + vm_offset_t va; + struct vmspace *vm = NULL; + vm_map_t map = 0; + int rv = 0; + vm_prot_t ftype; + struct proc *p = curproc; + + va = trunc_page(eva); + if (va >= KERNBASE) { + /* + * Don't allow user-mode faults in kernel address space. + * An exception: if the faulting address is the invalid + * instruction entry in the IDT, then the Intel Pentium + * F00F bug workaround was triggered, and we need to + * treat it is as an illegal instruction, and not a page + * fault. + */ +#if defined(I586_CPU) && !defined(NO_F00F_HACK) + if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) { + frame->tf_trapno = T_PRIVINFLT; + return -2; + } +#endif + if (usermode) + goto nogo; + + map = kernel_map; + } else { + /* + * This is a fault on non-kernel virtual memory. + * vm is initialized above to NULL. If curproc is NULL + * or curproc->p_vmspace is NULL the fault is fatal. + */ + if (p != NULL) + vm = p->p_vmspace; + + if (vm == NULL) + goto nogo; + + map = &vm->vm_map; + } + + if (frame->tf_err & PGEX_W) + ftype = VM_PROT_READ | VM_PROT_WRITE; + else + ftype = VM_PROT_READ; + + if (map != kernel_map) { + /* + * Keep swapout from messing with us during this + * critical time. + */ + ++p->p_lock; + + /* + * Grow the stack if necessary + */ + /* grow_stack returns false only if va falls into + * a growable stack region and the stack growth + * fails. It returns true if va was not within + * a growable stack region, or if the stack + * growth succeeded. + */ + if (!grow_stack (p, va)) { + rv = KERN_FAILURE; + --p->p_lock; + goto nogo; + } + + /* Fault in the user page: */ + rv = vm_fault(map, va, ftype, + (ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY + : VM_FAULT_NORMAL); + + --p->p_lock; + } else { + /* + * Don't have to worry about process locking or stacks in the kernel. + */ + rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + } + + if (rv == KERN_SUCCESS) + return (0); +nogo: + if (!usermode) { + if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) { + frame->tf_eip = (int)curpcb->pcb_onfault; + return (0); + } + trap_fatal(frame, eva); + return (-1); + } + + /* kludge to pass faulting virtual address to sendsig */ + frame->tf_err = eva; + + return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); +} + +static void +trap_fatal(frame, eva) + struct trapframe *frame; + vm_offset_t eva; +{ + int code, type, ss, esp; + struct soft_segment_descriptor softseg; + + code = frame->tf_err; + type = frame->tf_trapno; + sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg); + + if (type <= MAX_TRAP_MSG) + printf("\n\nFatal trap %d: %s while in %s mode\n", + type, trap_msg[type], + frame->tf_eflags & PSL_VM ? "vm86" : + ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); +#ifdef SMP + /* three seperate prints in case of a trap on an unmapped page */ + printf("mp_lock = %08x; ", mp_lock); + printf("cpuid = %d; ", cpuid); + printf("lapic.id = %08x\n", lapic.id); +#endif + if (type == T_PAGEFLT) { + printf("fault virtual address = 0x%x\n", eva); + printf("fault code = %s %s, %s\n", + code & PGEX_U ? "user" : "supervisor", + code & PGEX_W ? "write" : "read", + code & PGEX_P ? "protection violation" : "page not present"); + } + printf("instruction pointer = 0x%x:0x%x\n", + frame->tf_cs & 0xffff, frame->tf_eip); + if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) { + ss = frame->tf_ss & 0xffff; + esp = frame->tf_esp; + } else { + ss = GSEL(GDATA_SEL, SEL_KPL); + esp = (int)&frame->tf_esp; + } + printf("stack pointer = 0x%x:0x%x\n", ss, esp); + printf("frame pointer = 0x%x:0x%x\n", ss, frame->tf_ebp); + printf("code segment = base 0x%x, limit 0x%x, type 0x%x\n", + softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); + printf(" = DPL %d, pres %d, def32 %d, gran %d\n", + softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32, + softseg.ssd_gran); + printf("processor eflags = "); + if (frame->tf_eflags & PSL_T) + printf("trace trap, "); + if (frame->tf_eflags & PSL_I) + printf("interrupt enabled, "); + if (frame->tf_eflags & PSL_NT) + printf("nested task, "); + if (frame->tf_eflags & PSL_RF) + printf("resume, "); + if (frame->tf_eflags & PSL_VM) + printf("vm86, "); + printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12); + printf("current process = "); + if (curproc) { + printf("%lu (%s)\n", + (u_long)curproc->p_pid, curproc->p_comm ? + curproc->p_comm : ""); + } else { + printf("Idle\n"); + } + printf("interrupt mask = "); + if ((cpl & net_imask) == net_imask) + printf("net "); + if ((cpl & tty_imask) == tty_imask) + printf("tty "); + if ((cpl & bio_imask) == bio_imask) + printf("bio "); + if ((cpl & cam_imask) == cam_imask) + printf("cam "); + if (cpl == 0) + printf("none"); +#ifdef SMP +/** + * XXX FIXME: + * we probably SHOULD have stopped the other CPUs before now! + * another CPU COULD have been touching cpl at this moment... + */ + printf(" <- SMP: XXX"); +#endif + printf("\n"); + +#ifdef KDB + if (kdb_trap(&psl)) + return; +#endif +#ifdef DDB + if ((debugger_on_panic || db_active) && kdb_trap(type, 0, frame)) + return; +#endif + printf("trap number = %d\n", type); + if (type <= MAX_TRAP_MSG) + panic(trap_msg[type]); + else + panic("unknown/reserved trap"); +} + +/* + * Double fault handler. Called when a fault occurs while writing + * a frame for a trap/exception onto the stack. This usually occurs + * when the stack overflows (such is the case with infinite recursion, + * for example). + * + * XXX Note that the current PTD gets replaced by IdlePTD when the + * task switch occurs. This means that the stack that was active at + * the time of the double fault is not available at <kstack> unless + * the machine was idle when the double fault occurred. The downside + * of this is that "trace <ebp>" in ddb won't work. + */ +void +dblfault_handler() +{ + printf("\nFatal double fault:\n"); + printf("eip = 0x%x\n", common_tss.tss_eip); + printf("esp = 0x%x\n", common_tss.tss_esp); + printf("ebp = 0x%x\n", common_tss.tss_ebp); +#ifdef SMP + /* three seperate prints in case of a trap on an unmapped page */ + printf("mp_lock = %08x; ", mp_lock); + printf("cpuid = %d; ", cpuid); + printf("lapic.id = %08x\n", lapic.id); +#endif + panic("double fault"); +} + +/* + * Compensate for 386 brain damage (missing URKR). + * This is a little simpler than the pagefault handler in trap() because + * it the page tables have already been faulted in and high addresses + * are thrown out early for other reasons. + */ +int trapwrite(addr) + unsigned addr; +{ + struct proc *p; + vm_offset_t va; + struct vmspace *vm; + int rv; + + va = trunc_page((vm_offset_t)addr); + /* + * XXX - MAX is END. Changed > to >= for temp. fix. + */ + if (va >= VM_MAXUSER_ADDRESS) + return (1); + + p = curproc; + vm = p->p_vmspace; + + ++p->p_lock; + + if (!grow_stack (p, va)) { + --p->p_lock; + return (1); + } + + /* + * fault the data page + */ + rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY); + + --p->p_lock; + + if (rv != KERN_SUCCESS) + return 1; + + return (0); +} + +/* + * syscall2 - MP aware system call request C handler + * + * A system call is essentially treated as a trap except that the + * MP lock is not held on entry or return. We are responsible for + * obtaining the MP lock if necessary and for handling ASTs + * (e.g. a task switch) prior to return. + * + * In general, only simple access and manipulation of curproc and + * the current stack is allowed without having to hold MP lock. + */ +void +syscall2(frame) + struct trapframe frame; +{ + caddr_t params; + int i; + struct sysent *callp; + struct proc *p = curproc; + u_quad_t sticks; + int error; + int narg; + int args[8]; + int have_mplock = 0; + u_int code; + +#ifdef DIAGNOSTIC + if (ISPL(frame.tf_cs) != SEL_UPL) { + get_mplock(); + panic("syscall"); + /* NOT REACHED */ + } +#endif + + /* + * handle atomicy by looping since interrupts are enabled and the + * MP lock is not held. + */ + sticks = ((volatile struct proc *)p)->p_sticks; + while (sticks != ((volatile struct proc *)p)->p_sticks) + sticks = ((volatile struct proc *)p)->p_sticks; + + p->p_md.md_regs = &frame; + params = (caddr_t)frame.tf_esp + sizeof(int); + code = frame.tf_eax; + + if (p->p_sysent->sv_prepsyscall) { + /* + * The prep code is not MP aware. + */ + get_mplock(); + (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); + rel_mplock(); + } else { + /* + * Need to check if this is a 32 bit or 64 bit syscall. + * fuword is MP aware. + */ + if (code == SYS_syscall) { + /* + * Code is first argument, followed by actual args. + */ + code = fuword(params); + params += sizeof(int); + } else if (code == SYS___syscall) { + /* + * Like syscall, but code is a quad, so as to maintain + * quad alignment for the rest of the arguments. + */ + code = fuword(params); + params += sizeof(quad_t); + } + } + + if (p->p_sysent->sv_mask) + code &= p->p_sysent->sv_mask; + + if (code >= p->p_sysent->sv_size) + callp = &p->p_sysent->sv_table[0]; + else + callp = &p->p_sysent->sv_table[code]; + + narg = callp->sy_narg & SYF_ARGMASK; + + /* + * copyin is MP aware, but the tracing code is not + */ + if (params && (i = narg * sizeof(int)) && + (error = copyin(params, (caddr_t)args, (u_int)i))) { + get_mplock(); + have_mplock = 1; +#ifdef KTRACE + if (KTRPOINT(p, KTR_SYSCALL)) + ktrsyscall(p->p_tracep, code, narg, args); +#endif + goto bad; + } + + /* + * Try to run the syscall without the MP lock if the syscall + * is MP safe. We have to obtain the MP lock no matter what if + * we are ktracing + */ + if ((callp->sy_narg & SYF_MPSAFE) == 0) { + get_mplock(); + have_mplock = 1; + } + +#ifdef KTRACE + if (KTRPOINT(p, KTR_SYSCALL)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + ktrsyscall(p->p_tracep, code, narg, args); + } +#endif + p->p_retval[0] = 0; + p->p_retval[1] = frame.tf_edx; + + STOPEVENT(p, S_SCE, narg); /* MP aware */ + + error = (*callp->sy_call)(p, args); + + /* + * MP SAFE (we may or may not have the MP lock at this point) + */ + switch (error) { + case 0: + /* + * Reinitialize proc pointer `p' as it may be different + * if this is a child returning from fork syscall. + */ + p = curproc; + frame.tf_eax = p->p_retval[0]; + frame.tf_edx = p->p_retval[1]; + frame.tf_eflags &= ~PSL_C; + break; + + case ERESTART: + /* + * Reconstruct pc, assuming lcall $X,y is 7 bytes, + * int 0x80 is 2 bytes. We saved this in tf_err. + */ + frame.tf_eip -= frame.tf_err; + break; + + case EJUSTRETURN: + break; + + default: +bad: + if (p->p_sysent->sv_errsize) { + if (error >= p->p_sysent->sv_errsize) + error = -1; /* XXX */ + else + error = p->p_sysent->sv_errtbl[error]; + } + frame.tf_eax = error; + frame.tf_eflags |= PSL_C; + break; + } + + /* + * Traced syscall. trapsignal() is not MP aware. + */ + if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + frame.tf_eflags &= ~PSL_T; + trapsignal(p, SIGTRAP, 0); + } + + /* + * Handle reschedule and other end-of-syscall issues + */ + have_mplock = userret(p, &frame, sticks, have_mplock); + +#ifdef KTRACE + if (KTRPOINT(p, KTR_SYSRET)) { + if (have_mplock == 0) { + get_mplock(); + have_mplock = 1; + } + ktrsysret(p->p_tracep, code, error, p->p_retval[0]); + } +#endif + + /* + * This works because errno is findable through the + * register set. If we ever support an emulation where this + * is not the case, this code will need to be revisited. + */ + STOPEVENT(p, S_SCX, code); + + /* + * Release the MP lock if we had to get it + */ + if (have_mplock) + rel_mplock(); +} + +/* + * Simplified back end of syscall(), used when returning from fork() + * directly into user mode. MP lock is held on entry and should be + * held on return. + */ +void +fork_return(p, frame) + struct proc *p; + struct trapframe frame; +{ + frame.tf_eax = 0; /* Child returns zero */ + frame.tf_eflags &= ~PSL_C; /* success */ + frame.tf_edx = 1; + + userret(p, &frame, 0, 1); +#ifdef KTRACE + if (KTRPOINT(p, KTR_SYSRET)) + ktrsysret(p->p_tracep, SYS_fork, 0, 0); +#endif +} |