diff options
author | dillon <dillon@FreeBSD.org> | 2002-03-27 05:39:23 +0000 |
---|---|---|
committer | dillon <dillon@FreeBSD.org> | 2002-03-27 05:39:23 +0000 |
commit | dc5aafeb94ddee4f835e390dffaecbb0eec5d5e2 (patch) | |
tree | 8233f61cf29e01829b91c6a5cf27defe60e6b8d8 | |
parent | 9b5143f94f573dc8954cb0913f3edb055e6caf0f (diff) | |
download | FreeBSD-src-dc5aafeb94ddee4f835e390dffaecbb0eec5d5e2.zip FreeBSD-src-dc5aafeb94ddee4f835e390dffaecbb0eec5d5e2.tar.gz |
Compromise for critical*()/cpu_critical*() recommit. Cleanup the interrupt
disablement assumptions in kern_fork.c by adding another API call,
cpu_critical_fork_exit(). Cleanup the td_savecrit field by moving it
from MI to MD. Temporarily move cpu_critical*() from <arch>/include/cpufunc.h
to <arch>/<arch>/critical.c (stage-2 will clean this up).
Implement interrupt deferral for i386 that allows interrupts to remain
enabled inside critical sections. This also fixes an IPI interlock bug,
and requires uses of icu_lock to be enclosed in a true interrupt disablement.
This is the stage-1 commit. Stage-2 will occur after stage-1 has stabilized,
and will move cpu_critical*() into its own header file(s) + other things.
This commit may break non-i386 architectures in trivial ways. This should
be temporary.
Reviewed by: core
Approved by: core
78 files changed, 2583 insertions, 547 deletions
diff --git a/sys/alpha/alpha/critical.c b/sys/alpha/alpha/critical.c new file mode 100644 index 0000000..03e5215 --- /dev/null +++ b/sys/alpha/alpha/critical.c @@ -0,0 +1,59 @@ +/*- + * Copyright (c) 2001 Matthew Dillon. This code is distributed under + * the BSD copyright, /usr/src/COPYRIGHT. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/pcpu.h> +#include <sys/eventhandler.h> /* XX */ +#include <sys/ktr.h> /* XX */ +#include <sys/signalvar.h> +#include <sys/sysproto.h> /* XX */ +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/ucontext.h> + +void +cpu_critical_enter(void) +{ + struct thread *td = curthread; + + td->td_md.md_savecrit = intr_disable(); +} + +void +cpu_critical_exit(void) +{ + struct thread *td = curthread; + + intr_restore(td->td_md.md_savecrit); +} + +/* + * cpu_critical_fork_exit() - cleanup after fork + * + */ +void +cpu_critical_fork_exit(void) +{ + struct thread *td = curthread; + + td->td_critnest = 1; + td->td_md.md_savecrit = ALPHA_PSL_IPL_0; +} + +/* + * cpu_thread_link() - thread linkup, initialize machine-dependant fields + */ +void +cpu_thread_link(struct thread *td) +{ + td->td_md.md_savecrit = 0; +} + diff --git a/sys/alpha/include/cpufunc.h b/sys/alpha/include/cpufunc.h index 577cb5f..95d7e10 100644 --- a/sys/alpha/include/cpufunc.h +++ b/sys/alpha/include/cpufunc.h @@ -35,7 +35,7 @@ #include <machine/chipset.h> #include <machine/alpha_cpu.h> -#define CRITICAL_FORK (ALPHA_PSL_IPL_0) +struct thread; #ifdef __GNUC__ @@ -47,18 +47,6 @@ breakpoint(void) #endif -static __inline critical_t -cpu_critical_enter(void) -{ - return (alpha_pal_swpipl(ALPHA_PSL_IPL_MCES)); -} - -static __inline void -cpu_critical_exit(critical_t ipl) -{ - alpha_pal_swpipl(ipl); -} - static __inline register_t intr_disable(void) { @@ -71,6 +59,10 @@ intr_restore(register_t ipl) alpha_pal_swpipl(ipl); } +void cpu_critical_enter(void); +void cpu_critical_exit(void); +void cpu_critical_fork_exit(void); +void cpu_thread_link(struct thread *td); #endif /* _KERNEL */ diff --git a/sys/alpha/include/proc.h b/sys/alpha/include/proc.h index eb1772a..b0689f9 100644 --- a/sys/alpha/include/proc.h +++ b/sys/alpha/include/proc.h @@ -47,6 +47,7 @@ struct mdthread { u_int64_t md_hae; /* user HAE register value */ void *osf_sigtramp; /* user-level signal trampoline */ u_int md_kernnest; /* nesting level in the kernel */ + register_t md_savecrit; /* save PSL for critical section */ }; #define MDP_FPUSED 0x0001 /* Process used the FPU */ diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index 95c9133..f0e6497 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -19,11 +19,19 @@ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - pushal ; \ + pushal ; /* 8 ints */ \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ pushl %fs +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; + #define POP_FRAME \ popl %fs ; \ popl %es ; \ @@ -31,37 +39,8 @@ popal ; \ addl $4+4,%esp -/* - * Macros for interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic+LA_EOI ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ - jmp doreti +#define POP_DUMMY \ + addl $16*4,%esp #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 @@ -114,9 +93,9 @@ IDTVEC(vec_name) ; \ */ #define UNMASK_IRQ(irq_num) \ ICU_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), _apic_imen ; \ + testl $IRQ_BIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), _apic_imen ;/* clear mask bit */ \ + andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -126,6 +105,95 @@ IDTVEC(vec_name) ; \ 7: ; /* already unmasked */ \ ICU_UNLOCK +/* + * Test to see whether we are handling an edge or level triggered INT. + * Level-triggered INTs have to be unmasked. + */ +#define UNMASK_LEVEL_IRQ(irq_num) \ + testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + jz 9f ; /* edge, don't unmask */ \ + UNMASK_IRQ(irq_num) ; \ +9: + +/* + * Macros for interrupt entry, call to handler, and exit. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + movl $KDSEL,%eax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + movl $KPSEL,%eax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(FPENDING) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + movl $0, lapic+LA_EOI ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + movl $0, lapic+LA_EOI ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti + +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + pushl %ebp ; \ + movl %esp, %ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + UNMASK_LEVEL_IRQ(irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret ; \ + + /* * Slow, threaded interrupts. * @@ -151,16 +219,27 @@ IDTVEC(vec_name) ; \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti @@ -226,9 +305,16 @@ Xhardclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $1,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_hardclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -250,10 +336,18 @@ Xstatclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) + movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $2,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_statclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -416,6 +510,39 @@ MCOUNT_LABEL(bintr) INTR(29,intr29,) INTR(30,intr30,) INTR(31,intr31,) + + FAST_UNPEND(0,fastunpend0) + FAST_UNPEND(1,fastunpend1) + FAST_UNPEND(2,fastunpend2) + FAST_UNPEND(3,fastunpend3) + FAST_UNPEND(4,fastunpend4) + FAST_UNPEND(5,fastunpend5) + FAST_UNPEND(6,fastunpend6) + FAST_UNPEND(7,fastunpend7) + FAST_UNPEND(8,fastunpend8) + FAST_UNPEND(9,fastunpend9) + FAST_UNPEND(10,fastunpend10) + FAST_UNPEND(11,fastunpend11) + FAST_UNPEND(12,fastunpend12) + FAST_UNPEND(13,fastunpend13) + FAST_UNPEND(14,fastunpend14) + FAST_UNPEND(15,fastunpend15) + FAST_UNPEND(16,fastunpend16) + FAST_UNPEND(17,fastunpend17) + FAST_UNPEND(18,fastunpend18) + FAST_UNPEND(19,fastunpend19) + FAST_UNPEND(20,fastunpend20) + FAST_UNPEND(21,fastunpend21) + FAST_UNPEND(22,fastunpend22) + FAST_UNPEND(23,fastunpend23) + FAST_UNPEND(24,fastunpend24) + FAST_UNPEND(25,fastunpend25) + FAST_UNPEND(26,fastunpend26) + FAST_UNPEND(27,fastunpend27) + FAST_UNPEND(28,fastunpend28) + FAST_UNPEND(29,fastunpend29) + FAST_UNPEND(30,fastunpend30) + FAST_UNPEND(31,fastunpend31) MCOUNT_LABEL(eintr) /* diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 4f2bdcb..e0f9bcd 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -96,6 +96,8 @@ ENTRY(cpu_switch) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) + pushfl /* PSL */ + popl PCB_PSL(%edx) /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) @@ -233,6 +235,8 @@ sw1b: movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) + pushl PCB_PSL(%edx) + popfl #if defined(SMP) && defined(GRAB_LOPRIO) /* Hold LOPRIO for interrupts. */ @@ -339,6 +343,8 @@ ENTRY(savectx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) movl %gs,PCB_GS(%ecx) + pushfl + popl PCB_PSL(%ecx) #ifdef DEV_NPX /* diff --git a/sys/amd64/amd64/critical.c b/sys/amd64/amd64/critical.c new file mode 100644 index 0000000..6ac696c --- /dev/null +++ b/sys/amd64/amd64/critical.c @@ -0,0 +1,220 @@ +/*- + * Copyright (c) 2001 Matthew Dillon. This code is distributed under + * the BSD copyright, /usr/src/COPYRIGHT. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/signalvar.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/sysctl.h> +#include <sys/ucontext.h> + +#ifdef SMP +#include <machine/privatespace.h> +#include <machine/smp.h> +#else +/* + * XXX this mess to get sched_ithd() and call_fast_unpend() + */ +#include <sys/bus.h> +#include <machine/apic.h> +#include <machine/frame.h> +#include <i386/isa/icu.h> +#include <i386/isa/intr_machdep.h> +#endif + +void unpend(void); /* note: not static (called from assembly) */ + +/* + * Instrument our ability to run critical sections with interrupts + * enabled. Default is 1 (enabled). The value can be changed on the + * fly, at any time. If set to 0 the original interrupt disablement + * will be used for critical sections. + */ +int critical_mode = 1; +SYSCTL_INT(_debug, OID_AUTO, critical_mode, + CTLFLAG_RW, &critical_mode, 0, ""); + +/* + * cpu_critical_enter: + * + * This routine is called from critical_enter() on the 0->1 transition + * of td_critnest, prior to it being incremented to 1. + * + * If old-style critical section handling (critical_mode == 0), we + * disable interrupts. + * + * If new-style critical section handling (criticla_mode != 0), we + * do not have to do anything. However, as a side effect any + * interrupts occuring while td_critnest is non-zero will be + * deferred. + */ +void +cpu_critical_enter(void) +{ + if (critical_mode == 0) { + struct thread *td = curthread; + td->td_md.md_savecrit = intr_disable(); + } +} + +/* + * cpu_critical_exit: + * + * This routine is called from critical_exit() on a 1->0 transition + * of td_critnest, after it has been decremented to 0. We are + * exiting the last critical section. + * + * If td_critnest is -1 this is the 'new' critical_enter()/exit() + * code (the default critical_mode=1) and we do not have to do + * anything unless PCPU_GET(int_pending) is non-zero. + * + * Note that the td->critnest (1->0) transition interrupt race against + * our int_pending/unpend() check below is handled by the interrupt + * code for us, so we do not have to do anything fancy. + * + * Otherwise td_critnest contains the saved hardware interrupt state + * and will be restored. Since interrupts were hard-disabled there + * will be no pending interrupts to dispatch (the 'original' code). + */ +void +cpu_critical_exit(void) +{ + struct thread *td = curthread; + + if (td->td_md.md_savecrit != (register_t)-1) { + intr_restore(td->td_md.md_savecrit); + td->td_md.md_savecrit = (register_t)-1; + } else { + /* + * We may have to schedule pending interrupts. Create + * conditions similar to an interrupt context and call + * unpend(). + * + * note: we do this even if we are in an interrupt + * nesting level. Deep nesting is protected by + * critical_*() and if we conditionalized it then we + * would have to check int_pending again whenever + * we decrement td_intr_nesting_level to 0. + */ + if (PCPU_GET(int_pending)) { + register_t eflags; + + eflags = intr_disable(); + if (PCPU_GET(int_pending)) { + ++td->td_intr_nesting_level; + unpend(); + --td->td_intr_nesting_level; + } + intr_restore(eflags); + } + } +} + +/* + * cpu_critical_fork_exit() - cleanup after fork + * + * For i386 we do not have to do anything, td_critnest and + * td_savecrit are handled by the fork trampoline code. + */ +void +cpu_critical_fork_exit(void) +{ +} + +/* + * cpu_thread_link() - thread linkup, initialize machine-dependant fields + * + * (copy code originally in kern/kern_proc.c). XXX we actually + * don't have to initialize this field but it's probably a good + * idea for the moment for debugging's sake. The field is only + * valid when td_critnest is non-zero. + */ +void +cpu_thread_link(struct thread *td) +{ + td->td_md.md_savecrit = 0; +} + +/* + * Called from cpu_critical_exit() or called from the assembly vector code + * to process any interrupts which may have occured while we were in + * a critical section. + * + * - interrupts must be disabled + * - td_critnest must be 0 + * - td_intr_nesting_level must be incremented by the caller + */ +void +unpend(void) +{ + KASSERT(curthread->td_critnest == 0, ("unpend critnest != 0")); + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled1")); + curthread->td_critnest = 1; + for (;;) { + u_int32_t mask; + + /* + * Fast interrupts have priority + */ + if ((mask = PCPU_GET(fpending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(fpending, mask & ~(1 << irq)); + call_fast_unpend(irq); + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled2 %d", irq)); + continue; + } + + /* + * Threaded interrupts come next + */ + if ((mask = PCPU_GET(ipending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(ipending, mask & ~(1 << irq)); + sched_ithd((void *)irq); + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled3 %d", irq)); + continue; + } + + /* + * Software interrupts and delayed IPIs are last + * + * XXX give the bits #defined names. see also + * isa/xxx_vector.s + */ + if ((mask = PCPU_GET(spending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(spending, mask & ~(1 << irq)); + switch(irq) { + case 0: /* bit 0 - hardclock */ + mtx_lock_spin(&sched_lock); + hardclock_process(curthread, 0); + mtx_unlock_spin(&sched_lock); + break; + case 1: /* bit 1 - statclock */ + mtx_lock_spin(&sched_lock); + statclock_process(curthread->td_kse, (register_t)unpend, 0); + mtx_unlock_spin(&sched_lock); + break; + } + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled4 %d", irq)); + continue; + } + break; + } + /* + * Interrupts are still disabled, we can safely clear int_pending + * and td_critnest. + */ + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled5")); + PCPU_SET(int_pending, 0); + curthread->td_critnest = 0; +} + diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 0b395e2..2d54a5f 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -222,6 +222,25 @@ ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ + movl PCPU(CURTHREAD),%ebx /* setup critnest */ + movl $1,TD_CRITNEST(%ebx) + /* + * Initialize md_savecrit based on critical_mode. If critical_mode + * is enabled (new/1) savecrit is basically not used but must + * be initialized to -1 so we know it isn't used in + * cpu_critical_exit(). If critical_mode is disabled (old/0) + * the eflags to restore must be saved in md_savecrit. + */ + cmpl $0,critical_mode + jne 1f + pushfl + popl TD_MD+MD_SAVECRIT(%ebx) + orl $PSL_I,TD_MD+MD_SAVECRIT(%ebx) + jmp 2f +1: + movl $-1,TD_MD+MD_SAVECRIT(%ebx) + sti /* enable interrupts */ +2: call fork_exit addl $12,%esp /* cut from syscall */ diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s index 0b395e2..2d54a5f 100644 --- a/sys/amd64/amd64/exception.s +++ b/sys/amd64/amd64/exception.s @@ -222,6 +222,25 @@ ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ + movl PCPU(CURTHREAD),%ebx /* setup critnest */ + movl $1,TD_CRITNEST(%ebx) + /* + * Initialize md_savecrit based on critical_mode. If critical_mode + * is enabled (new/1) savecrit is basically not used but must + * be initialized to -1 so we know it isn't used in + * cpu_critical_exit(). If critical_mode is disabled (old/0) + * the eflags to restore must be saved in md_savecrit. + */ + cmpl $0,critical_mode + jne 1f + pushfl + popl TD_MD+MD_SAVECRIT(%ebx) + orl $PSL_I,TD_MD+MD_SAVECRIT(%ebx) + jmp 2f +1: + movl $-1,TD_MD+MD_SAVECRIT(%ebx) + sti /* enable interrupts */ +2: call fork_exit addl $12,%esp /* cut from syscall */ diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 43b012c..22a0282 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -429,9 +429,15 @@ no_irq13: * XXX hack around brokenness of bus_teardown_intr(). If we left the * irq active then we would get it instead of exception 16. */ - mtx_lock_spin(&icu_lock); - INTRDIS(1 << irq_num); - mtx_unlock_spin(&icu_lock); + { + register_t crit; + + crit = intr_disable(); + mtx_lock_spin(&icu_lock); + INTRDIS(1 << irq_num); + mtx_unlock_spin(&icu_lock); + intr_restore(crit); + } bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 1fe8a2e..f3e9f04 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -89,9 +89,12 @@ ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_KSE, offsetof(struct thread, td_kse)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level)); +ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest)); +ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); +ASSYM(MD_SAVECRIT, offsetof(struct mdthread, md_savecrit)); ASSYM(KE_FLAGS, offsetof(struct kse, ke_flags)); @@ -134,6 +137,7 @@ ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); +ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); @@ -176,6 +180,10 @@ ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); +ASSYM(PC_INT_PENDING, offsetof(struct pcpu, pc_int_pending)); +ASSYM(PC_IPENDING, offsetof(struct pcpu, pc_ipending)); +ASSYM(PC_FPENDING, offsetof(struct pcpu, pc_fpending)); +ASSYM(PC_SPENDING, offsetof(struct pcpu, pc_spending)); ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread)); ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index ff8f10c..8b6cc82 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -802,11 +802,15 @@ cpu_idle(void) #ifndef SMP if (cpu_idle_hlt) { disable_intr(); - if (procrunnable()) + if (procrunnable()) { enable_intr(); - else { - enable_intr(); - __asm __volatile("hlt"); + } else { + /* + * we must absolutely guarentee that hlt is the + * absolute next instruction after sti or we + * introduce a timing window. + */ + __asm __volatile("sti; hlt"); } } #endif @@ -1693,12 +1697,17 @@ init386(first) /* * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. */ mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE); mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE); mtx_init(&proc0.p_mtx, "process lock", MTX_DEF); mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE); - mtx_init(&icu_lock, "icu", MTX_SPIN); + mtx_init(&icu_lock, "icu", MTX_SPIN | MTX_NOWITNESS); mtx_lock(&Giant); /* make ldt memory segments */ diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 4f891db..50c91c7 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -2310,6 +2310,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2341,6 +2344,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index 4f891db..50c91c7 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -2310,6 +2310,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2341,6 +2344,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s index 4f2bdcb..e0f9bcd 100644 --- a/sys/amd64/amd64/swtch.s +++ b/sys/amd64/amd64/swtch.s @@ -96,6 +96,8 @@ ENTRY(cpu_switch) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) + pushfl /* PSL */ + popl PCB_PSL(%edx) /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) @@ -233,6 +235,8 @@ sw1b: movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) + pushl PCB_PSL(%edx) + popfl #if defined(SMP) && defined(GRAB_LOPRIO) /* Hold LOPRIO for interrupts. */ @@ -339,6 +343,8 @@ ENTRY(savectx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) movl %gs,PCB_GS(%ecx) + pushfl + popl PCB_PSL(%ecx) #ifdef DEV_NPX /* diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c index 49516028..810fbe7 100644 --- a/sys/amd64/amd64/tsc.c +++ b/sys/amd64/amd64/tsc.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + register_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,12 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1125,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 59f3f89..a7e852a 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -193,6 +193,7 @@ cpu_fork(td1, p2, td2, flags) pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; + pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */ /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index ed1da94..2b0abcc 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -45,6 +45,8 @@ #include <sys/cdefs.h> #include <machine/psl.h> +struct thread; + __BEGIN_DECLS #define readb(va) (*(volatile u_int8_t *) (va)) #define readw(va) (*(volatile u_int16_t *) (va)) @@ -54,8 +56,6 @@ __BEGIN_DECLS #define writew(va, d) (*(volatile u_int16_t *) (va) = (d)) #define writel(va, d) (*(volatile u_int32_t *) (va) = (d)) -#define CRITICAL_FORK (read_eflags() | PSL_I) - #ifdef __GNUC__ #ifdef SWTCH_OPTIM_STATS @@ -562,22 +562,6 @@ load_dr7(u_int sel) __asm __volatile("movl %0,%%dr7" : : "r" (sel)); } -static __inline critical_t -cpu_critical_enter(void) -{ - critical_t eflags; - - eflags = read_eflags(); - disable_intr(); - return (eflags); -} - -static __inline void -cpu_critical_exit(critical_t eflags) -{ - write_eflags(eflags); -} - static __inline register_t intr_disable(void) { @@ -629,8 +613,6 @@ u_int rfs(void); u_int rgs(void); void load_fs(u_int sel); void load_gs(u_int sel); -critical_t cpu_critical_enter(void); -void cpu_critical_exit(critical_t eflags); #endif /* __GNUC__ */ @@ -642,6 +624,11 @@ u_int rcr0(void); u_int rcr3(void); u_int rcr4(void); void reset_dbregs(void); +void cpu_critical_enter(void); +void cpu_critical_exit(void); +void cpu_critical_fork_exit(void); +void cpu_thread_link(struct thread *td); + __END_DECLS #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h index 4f891db..50c91c7 100644 --- a/sys/amd64/include/mptable.h +++ b/sys/amd64/include/mptable.h @@ -2310,6 +2310,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2341,6 +2344,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index b2f1bc4..860b3b5 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -69,7 +69,8 @@ struct pcb { caddr_t pcb_onfault; /* copyin/out fault recovery */ int pcb_gs; struct pcb_ext *pcb_ext; /* optional pcb extension */ - u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ + int pcb_psl; /* process status long */ + u_long __pcb_spare[2]; /* adjust to avoid core dump size changes */ }; /* diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index 5fecb98..eba526c 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -50,7 +50,11 @@ struct i386tss pc_common_tss; \ struct segment_descriptor pc_common_tssd; \ struct segment_descriptor *pc_tss_gdt; \ - int pc_currentldt + int pc_currentldt; \ + u_int32_t pc_int_pending; /* master int pending flag */ \ + u_int32_t pc_ipending; /* pending slow interrupts */ \ + u_int32_t pc_fpending; /* pending fast interrupts */ \ + u_int32_t pc_spending /* pending soft interrupts */ /* * Evaluates to the byte offset of the per-cpu variable name. diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h index f011b5c..d7d5ef7 100644 --- a/sys/amd64/include/proc.h +++ b/sys/amd64/include/proc.h @@ -51,6 +51,7 @@ struct proc_ldt { * Machine-dependent part of the proc structure for i386. */ struct mdthread { + register_t md_savecrit; }; struct mdproc { diff --git a/sys/amd64/isa/atpic_vector.S b/sys/amd64/isa/atpic_vector.S index 4e10cc2..01a804b 100644 --- a/sys/amd64/isa/atpic_vector.S +++ b/sys/amd64/isa/atpic_vector.S @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else + #define ENABLE_ICU1 \ movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ + #define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,127 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else + #define ENABLE_ICU1_AND_2 \ movb $ICU_EOI,%al ; /* as above */ \ outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ + call *intr_handler + (irq_num) * 4 ; \ addl $4,%esp ; \ + enable_icus ; \ incl cnt+V_INTR ; /* book-keeping can wait */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + pushl %ebp ; \ + movl %esp, %ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +170,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ +; \ maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ + MASK_IRQ(icu, irq_num) ; \ enable_icus ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ +; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c index 49516028..810fbe7 100644 --- a/sys/amd64/isa/clock.c +++ b/sys/amd64/isa/clock.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + register_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,12 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1125,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } } diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S index 4e10cc2..01a804b 100644 --- a/sys/amd64/isa/icu_vector.S +++ b/sys/amd64/isa/icu_vector.S @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else + #define ENABLE_ICU1 \ movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ + #define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,127 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else + #define ENABLE_ICU1_AND_2 \ movb $ICU_EOI,%al ; /* as above */ \ outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ + call *intr_handler + (irq_num) * 4 ; \ addl $4,%esp ; \ + enable_icus ; \ incl cnt+V_INTR ; /* book-keeping can wait */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + pushl %ebp ; \ + movl %esp, %ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +170,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ +; \ maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ + MASK_IRQ(icu, irq_num) ; \ enable_icus ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ +; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/amd64/isa/icu_vector.s b/sys/amd64/isa/icu_vector.s index 4e10cc2..01a804b 100644 --- a/sys/amd64/isa/icu_vector.s +++ b/sys/amd64/isa/icu_vector.s @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else + #define ENABLE_ICU1 \ movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ + #define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,127 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else + #define ENABLE_ICU1_AND_2 \ movb $ICU_EOI,%al ; /* as above */ \ outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ + call *intr_handler + (irq_num) * 4 ; \ addl $4,%esp ; \ + enable_icus ; \ incl cnt+V_INTR ; /* book-keeping can wait */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + pushl %ebp ; \ + movl %esp, %ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +170,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ +; \ maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ + MASK_IRQ(icu, irq_num) ; \ enable_icus ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ +; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c index cfc162b..59c739e 100644 --- a/sys/amd64/isa/intr_machdep.c +++ b/sys/amd64/isa/intr_machdep.c @@ -117,6 +117,27 @@ static inthand_t *fastintr[ICU_LEN] = { #endif /* APIC_IO */ }; +static unpendhand_t *fastunpend[ICU_LEN] = { + &IDTVEC(fastunpend0), &IDTVEC(fastunpend1), + &IDTVEC(fastunpend2), &IDTVEC(fastunpend3), + &IDTVEC(fastunpend4), &IDTVEC(fastunpend5), + &IDTVEC(fastunpend6), &IDTVEC(fastunpend7), + &IDTVEC(fastunpend8), &IDTVEC(fastunpend9), + &IDTVEC(fastunpend10), &IDTVEC(fastunpend11), + &IDTVEC(fastunpend12), &IDTVEC(fastunpend13), + &IDTVEC(fastunpend14), &IDTVEC(fastunpend15), +#if defined(APIC_IO) + &IDTVEC(fastunpend16), &IDTVEC(fastunpend17), + &IDTVEC(fastunpend18), &IDTVEC(fastunpend19), + &IDTVEC(fastunpend20), &IDTVEC(fastunpend21), + &IDTVEC(fastunpend22), &IDTVEC(fastunpend23), + &IDTVEC(fastunpend24), &IDTVEC(fastunpend25), + &IDTVEC(fastunpend26), &IDTVEC(fastunpend27), + &IDTVEC(fastunpend28), &IDTVEC(fastunpend29), + &IDTVEC(fastunpend30), &IDTVEC(fastunpend31), +#endif /* APIC_IO */ +}; + static inthand_t *slowintr[ICU_LEN] = { &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3), &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7), @@ -291,13 +312,16 @@ isa_nmi(cd) void icu_reinit() { int i; + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); init_i8259(); for(i=0;i<ICU_LEN;i++) if(intr_handler[i] != isa_strayintr) INTREN(1<<i); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } /* @@ -309,13 +333,16 @@ void isa_defaultirq() { int i; + register_t crit; /* icu vectors */ for (i = 0; i < ICU_LEN; i++) icu_unset(i, (driver_intr_t *)NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); init_i8259(); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } @@ -476,6 +503,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) int vector; u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ + register_t crit; #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -488,6 +516,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) return (EBUSY); #endif + crit = intr_disable(); mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; @@ -530,6 +559,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) #endif /* FAST_HI */ INTREN(1 << intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); return (0); } @@ -543,10 +573,12 @@ icu_unset(intr, handler) int intr; driver_intr_t *handler; { + register_t crit; if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) return (EINVAL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << intr); intr_countp[intr] = &intrcnt[1 + intr]; @@ -564,6 +596,7 @@ icu_unset(intr, handler) GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); return (0); } @@ -578,19 +611,25 @@ SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); static void ithread_enable(int vector) { + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } static void ithread_disable(int vector) { + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } int @@ -672,3 +711,10 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } + +void +call_fast_unpend(int irq) +{ + fastunpend[irq](); +} + diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h index d674630..8e1a828 100644 --- a/sys/amd64/isa/intr_machdep.h +++ b/sys/amd64/isa/intr_machdep.h @@ -140,6 +140,7 @@ * Type of the first (asm) part of an interrupt handler. */ typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); +typedef void unpendhand_t __P((void)); #define IDTVEC(name) __CONCAT(X,name) @@ -163,6 +164,18 @@ inthand_t IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15); +unpendhand_t + IDTVEC(fastunpend0), IDTVEC(fastunpend1), IDTVEC(fastunpend2), + IDTVEC(fastunpend3), IDTVEC(fastunpend4), IDTVEC(fastunpend5), + IDTVEC(fastunpend6), IDTVEC(fastunpend7), IDTVEC(fastunpend8), + IDTVEC(fastunpend9), IDTVEC(fastunpend10), IDTVEC(fastunpend11), + IDTVEC(fastunpend12), IDTVEC(fastunpend13), IDTVEC(fastunpend14), + IDTVEC(fastunpend15), IDTVEC(fastunpend16), IDTVEC(fastunpend17), + IDTVEC(fastunpend18), IDTVEC(fastunpend19), IDTVEC(fastunpend20), + IDTVEC(fastunpend21), IDTVEC(fastunpend22), IDTVEC(fastunpend23), + IDTVEC(fastunpend24), IDTVEC(fastunpend25), IDTVEC(fastunpend26), + IDTVEC(fastunpend27), IDTVEC(fastunpend28), IDTVEC(fastunpend29), + IDTVEC(fastunpend30), IDTVEC(fastunpend31); #if defined(SMP) || defined(APIC_IO) inthand_t @@ -227,6 +240,7 @@ int inthand_add(const char *name, int irq, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); int inthand_remove(void *cookie); void sched_ithd(void *dummy); +void call_fast_unpend(int irq); #endif /* LOCORE */ diff --git a/sys/amd64/isa/nmi.c b/sys/amd64/isa/nmi.c index cfc162b..59c739e 100644 --- a/sys/amd64/isa/nmi.c +++ b/sys/amd64/isa/nmi.c @@ -117,6 +117,27 @@ static inthand_t *fastintr[ICU_LEN] = { #endif /* APIC_IO */ }; +static unpendhand_t *fastunpend[ICU_LEN] = { + &IDTVEC(fastunpend0), &IDTVEC(fastunpend1), + &IDTVEC(fastunpend2), &IDTVEC(fastunpend3), + &IDTVEC(fastunpend4), &IDTVEC(fastunpend5), + &IDTVEC(fastunpend6), &IDTVEC(fastunpend7), + &IDTVEC(fastunpend8), &IDTVEC(fastunpend9), + &IDTVEC(fastunpend10), &IDTVEC(fastunpend11), + &IDTVEC(fastunpend12), &IDTVEC(fastunpend13), + &IDTVEC(fastunpend14), &IDTVEC(fastunpend15), +#if defined(APIC_IO) + &IDTVEC(fastunpend16), &IDTVEC(fastunpend17), + &IDTVEC(fastunpend18), &IDTVEC(fastunpend19), + &IDTVEC(fastunpend20), &IDTVEC(fastunpend21), + &IDTVEC(fastunpend22), &IDTVEC(fastunpend23), + &IDTVEC(fastunpend24), &IDTVEC(fastunpend25), + &IDTVEC(fastunpend26), &IDTVEC(fastunpend27), + &IDTVEC(fastunpend28), &IDTVEC(fastunpend29), + &IDTVEC(fastunpend30), &IDTVEC(fastunpend31), +#endif /* APIC_IO */ +}; + static inthand_t *slowintr[ICU_LEN] = { &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3), &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7), @@ -291,13 +312,16 @@ isa_nmi(cd) void icu_reinit() { int i; + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); init_i8259(); for(i=0;i<ICU_LEN;i++) if(intr_handler[i] != isa_strayintr) INTREN(1<<i); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } /* @@ -309,13 +333,16 @@ void isa_defaultirq() { int i; + register_t crit; /* icu vectors */ for (i = 0; i < ICU_LEN; i++) icu_unset(i, (driver_intr_t *)NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); init_i8259(); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } @@ -476,6 +503,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) int vector; u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ + register_t crit; #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -488,6 +516,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) return (EBUSY); #endif + crit = intr_disable(); mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; @@ -530,6 +559,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) #endif /* FAST_HI */ INTREN(1 << intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); return (0); } @@ -543,10 +573,12 @@ icu_unset(intr, handler) int intr; driver_intr_t *handler; { + register_t crit; if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) return (EINVAL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << intr); intr_countp[intr] = &intrcnt[1 + intr]; @@ -564,6 +596,7 @@ icu_unset(intr, handler) GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); return (0); } @@ -578,19 +611,25 @@ SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); static void ithread_enable(int vector) { + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } static void ithread_disable(int vector) { + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } int @@ -672,3 +711,10 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } + +void +call_fast_unpend(int irq) +{ + fastunpend[irq](); +} + diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c index 43b012c..22a0282 100644 --- a/sys/amd64/isa/npx.c +++ b/sys/amd64/isa/npx.c @@ -429,9 +429,15 @@ no_irq13: * XXX hack around brokenness of bus_teardown_intr(). If we left the * irq active then we would get it instead of exception 16. */ - mtx_lock_spin(&icu_lock); - INTRDIS(1 << irq_num); - mtx_unlock_spin(&icu_lock); + { + register_t crit; + + crit = intr_disable(); + mtx_lock_spin(&icu_lock); + INTRDIS(1 << irq_num); + mtx_unlock_spin(&icu_lock); + intr_restore(crit); + } bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); diff --git a/sys/conf/files.alpha b/sys/conf/files.alpha index 9854c5d..9733f65 100644 --- a/sys/conf/files.alpha +++ b/sys/conf/files.alpha @@ -39,6 +39,7 @@ alpha/alpha/busspace.c standard alpha/alpha/clock.c standard alpha/alpha/clock_if.m standard alpha/alpha/cpuconf.c standard +alpha/alpha/critical.c standard alpha/alpha/db_disasm.c optional ddb alpha/alpha/db_interface.c optional ddb alpha/alpha/db_trace.c optional ddb diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index ecae824..5243412 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -199,6 +199,7 @@ i386/i386/autoconf.c standard i386/i386/bios.c standard i386/i386/bioscall.s standard i386/i386/busdma_machdep.c standard +i386/i386/critical.c standard i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb diff --git a/sys/conf/files.ia64 b/sys/conf/files.ia64 index 9af5112..9850f3b 100644 --- a/sys/conf/files.ia64 +++ b/sys/conf/files.ia64 @@ -27,6 +27,7 @@ ia64/ia64/autoconf.c standard ia64/ia64/busdma_machdep.c standard ia64/ia64/clock.c standard ia64/ia64/clock_if.m standard +ia64/ia64/critical.c standard ia64/ia64/db_disasm.c optional ddb ia64/ia64/db_interface.c optional ddb ia64/ia64/db_trace.c optional ddb diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98 index 7a44e84..c7d9d7f 100644 --- a/sys/conf/files.pc98 +++ b/sys/conf/files.pc98 @@ -183,6 +183,7 @@ i386/i386/autoconf.c standard i386/i386/bios.c standard i386/i386/bioscall.s standard i386/i386/busdma_machdep.c standard +i386/i386/critical.c standard i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 4d6fa6f..98307c8 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -16,6 +16,7 @@ powerpc/powerpc/bcopy.c standard powerpc/powerpc/clock.c standard powerpc/powerpc/copyinout.c standard powerpc/powerpc/copystr.c standard +powerpc/powerpc/critical.c standard powerpc/powerpc/elf_machdep.c standard powerpc/powerpc/extintr.c standard powerpc/powerpc/fuswintr.c standard diff --git a/sys/conf/files.sparc64 b/sys/conf/files.sparc64 index bb2bdee..ba67ba9 100644 --- a/sys/conf/files.sparc64 +++ b/sys/conf/files.sparc64 @@ -28,6 +28,7 @@ sparc64/sparc64/bus_machdep.c standard sparc64/sparc64/cache.c standard sparc64/sparc64/clock.c standard sparc64/sparc64/counter.c standard +sparc64/sparc64/critical.c standard sparc64/sparc64/db_disasm.c optional ddb sparc64/sparc64/db_interface.c optional ddb sparc64/sparc64/db_trace.c optional ddb diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index 95c9133..f0e6497 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -19,11 +19,19 @@ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - pushal ; \ + pushal ; /* 8 ints */ \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ pushl %fs +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; + #define POP_FRAME \ popl %fs ; \ popl %es ; \ @@ -31,37 +39,8 @@ popal ; \ addl $4+4,%esp -/* - * Macros for interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic+LA_EOI ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ - jmp doreti +#define POP_DUMMY \ + addl $16*4,%esp #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 @@ -114,9 +93,9 @@ IDTVEC(vec_name) ; \ */ #define UNMASK_IRQ(irq_num) \ ICU_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), _apic_imen ; \ + testl $IRQ_BIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), _apic_imen ;/* clear mask bit */ \ + andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -126,6 +105,95 @@ IDTVEC(vec_name) ; \ 7: ; /* already unmasked */ \ ICU_UNLOCK +/* + * Test to see whether we are handling an edge or level triggered INT. + * Level-triggered INTs have to be unmasked. + */ +#define UNMASK_LEVEL_IRQ(irq_num) \ + testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + jz 9f ; /* edge, don't unmask */ \ + UNMASK_IRQ(irq_num) ; \ +9: + +/* + * Macros for interrupt entry, call to handler, and exit. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + movl $KDSEL,%eax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + movl $KPSEL,%eax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(FPENDING) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + movl $0, lapic+LA_EOI ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + movl $0, lapic+LA_EOI ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti + +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + pushl %ebp ; \ + movl %esp, %ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + UNMASK_LEVEL_IRQ(irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret ; \ + + /* * Slow, threaded interrupts. * @@ -151,16 +219,27 @@ IDTVEC(vec_name) ; \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti @@ -226,9 +305,16 @@ Xhardclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $1,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_hardclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -250,10 +336,18 @@ Xstatclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) + movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $2,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_statclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -416,6 +510,39 @@ MCOUNT_LABEL(bintr) INTR(29,intr29,) INTR(30,intr30,) INTR(31,intr31,) + + FAST_UNPEND(0,fastunpend0) + FAST_UNPEND(1,fastunpend1) + FAST_UNPEND(2,fastunpend2) + FAST_UNPEND(3,fastunpend3) + FAST_UNPEND(4,fastunpend4) + FAST_UNPEND(5,fastunpend5) + FAST_UNPEND(6,fastunpend6) + FAST_UNPEND(7,fastunpend7) + FAST_UNPEND(8,fastunpend8) + FAST_UNPEND(9,fastunpend9) + FAST_UNPEND(10,fastunpend10) + FAST_UNPEND(11,fastunpend11) + FAST_UNPEND(12,fastunpend12) + FAST_UNPEND(13,fastunpend13) + FAST_UNPEND(14,fastunpend14) + FAST_UNPEND(15,fastunpend15) + FAST_UNPEND(16,fastunpend16) + FAST_UNPEND(17,fastunpend17) + FAST_UNPEND(18,fastunpend18) + FAST_UNPEND(19,fastunpend19) + FAST_UNPEND(20,fastunpend20) + FAST_UNPEND(21,fastunpend21) + FAST_UNPEND(22,fastunpend22) + FAST_UNPEND(23,fastunpend23) + FAST_UNPEND(24,fastunpend24) + FAST_UNPEND(25,fastunpend25) + FAST_UNPEND(26,fastunpend26) + FAST_UNPEND(27,fastunpend27) + FAST_UNPEND(28,fastunpend28) + FAST_UNPEND(29,fastunpend29) + FAST_UNPEND(30,fastunpend30) + FAST_UNPEND(31,fastunpend31) MCOUNT_LABEL(eintr) /* diff --git a/sys/i386/i386/critical.c b/sys/i386/i386/critical.c new file mode 100644 index 0000000..6ac696c --- /dev/null +++ b/sys/i386/i386/critical.c @@ -0,0 +1,220 @@ +/*- + * Copyright (c) 2001 Matthew Dillon. This code is distributed under + * the BSD copyright, /usr/src/COPYRIGHT. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/signalvar.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/sysctl.h> +#include <sys/ucontext.h> + +#ifdef SMP +#include <machine/privatespace.h> +#include <machine/smp.h> +#else +/* + * XXX this mess to get sched_ithd() and call_fast_unpend() + */ +#include <sys/bus.h> +#include <machine/apic.h> +#include <machine/frame.h> +#include <i386/isa/icu.h> +#include <i386/isa/intr_machdep.h> +#endif + +void unpend(void); /* note: not static (called from assembly) */ + +/* + * Instrument our ability to run critical sections with interrupts + * enabled. Default is 1 (enabled). The value can be changed on the + * fly, at any time. If set to 0 the original interrupt disablement + * will be used for critical sections. + */ +int critical_mode = 1; +SYSCTL_INT(_debug, OID_AUTO, critical_mode, + CTLFLAG_RW, &critical_mode, 0, ""); + +/* + * cpu_critical_enter: + * + * This routine is called from critical_enter() on the 0->1 transition + * of td_critnest, prior to it being incremented to 1. + * + * If old-style critical section handling (critical_mode == 0), we + * disable interrupts. + * + * If new-style critical section handling (criticla_mode != 0), we + * do not have to do anything. However, as a side effect any + * interrupts occuring while td_critnest is non-zero will be + * deferred. + */ +void +cpu_critical_enter(void) +{ + if (critical_mode == 0) { + struct thread *td = curthread; + td->td_md.md_savecrit = intr_disable(); + } +} + +/* + * cpu_critical_exit: + * + * This routine is called from critical_exit() on a 1->0 transition + * of td_critnest, after it has been decremented to 0. We are + * exiting the last critical section. + * + * If td_critnest is -1 this is the 'new' critical_enter()/exit() + * code (the default critical_mode=1) and we do not have to do + * anything unless PCPU_GET(int_pending) is non-zero. + * + * Note that the td->critnest (1->0) transition interrupt race against + * our int_pending/unpend() check below is handled by the interrupt + * code for us, so we do not have to do anything fancy. + * + * Otherwise td_critnest contains the saved hardware interrupt state + * and will be restored. Since interrupts were hard-disabled there + * will be no pending interrupts to dispatch (the 'original' code). + */ +void +cpu_critical_exit(void) +{ + struct thread *td = curthread; + + if (td->td_md.md_savecrit != (register_t)-1) { + intr_restore(td->td_md.md_savecrit); + td->td_md.md_savecrit = (register_t)-1; + } else { + /* + * We may have to schedule pending interrupts. Create + * conditions similar to an interrupt context and call + * unpend(). + * + * note: we do this even if we are in an interrupt + * nesting level. Deep nesting is protected by + * critical_*() and if we conditionalized it then we + * would have to check int_pending again whenever + * we decrement td_intr_nesting_level to 0. + */ + if (PCPU_GET(int_pending)) { + register_t eflags; + + eflags = intr_disable(); + if (PCPU_GET(int_pending)) { + ++td->td_intr_nesting_level; + unpend(); + --td->td_intr_nesting_level; + } + intr_restore(eflags); + } + } +} + +/* + * cpu_critical_fork_exit() - cleanup after fork + * + * For i386 we do not have to do anything, td_critnest and + * td_savecrit are handled by the fork trampoline code. + */ +void +cpu_critical_fork_exit(void) +{ +} + +/* + * cpu_thread_link() - thread linkup, initialize machine-dependant fields + * + * (copy code originally in kern/kern_proc.c). XXX we actually + * don't have to initialize this field but it's probably a good + * idea for the moment for debugging's sake. The field is only + * valid when td_critnest is non-zero. + */ +void +cpu_thread_link(struct thread *td) +{ + td->td_md.md_savecrit = 0; +} + +/* + * Called from cpu_critical_exit() or called from the assembly vector code + * to process any interrupts which may have occured while we were in + * a critical section. + * + * - interrupts must be disabled + * - td_critnest must be 0 + * - td_intr_nesting_level must be incremented by the caller + */ +void +unpend(void) +{ + KASSERT(curthread->td_critnest == 0, ("unpend critnest != 0")); + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled1")); + curthread->td_critnest = 1; + for (;;) { + u_int32_t mask; + + /* + * Fast interrupts have priority + */ + if ((mask = PCPU_GET(fpending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(fpending, mask & ~(1 << irq)); + call_fast_unpend(irq); + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled2 %d", irq)); + continue; + } + + /* + * Threaded interrupts come next + */ + if ((mask = PCPU_GET(ipending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(ipending, mask & ~(1 << irq)); + sched_ithd((void *)irq); + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled3 %d", irq)); + continue; + } + + /* + * Software interrupts and delayed IPIs are last + * + * XXX give the bits #defined names. see also + * isa/xxx_vector.s + */ + if ((mask = PCPU_GET(spending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(spending, mask & ~(1 << irq)); + switch(irq) { + case 0: /* bit 0 - hardclock */ + mtx_lock_spin(&sched_lock); + hardclock_process(curthread, 0); + mtx_unlock_spin(&sched_lock); + break; + case 1: /* bit 1 - statclock */ + mtx_lock_spin(&sched_lock); + statclock_process(curthread->td_kse, (register_t)unpend, 0); + mtx_unlock_spin(&sched_lock); + break; + } + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled4 %d", irq)); + continue; + } + break; + } + /* + * Interrupts are still disabled, we can safely clear int_pending + * and td_critnest. + */ + KASSERT((read_eflags() & PSL_I) == 0, ("unpend interrupts enabled5")); + PCPU_SET(int_pending, 0); + curthread->td_critnest = 0; +} + diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s index 0b395e2..2d54a5f 100644 --- a/sys/i386/i386/exception.s +++ b/sys/i386/i386/exception.s @@ -222,6 +222,25 @@ ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ + movl PCPU(CURTHREAD),%ebx /* setup critnest */ + movl $1,TD_CRITNEST(%ebx) + /* + * Initialize md_savecrit based on critical_mode. If critical_mode + * is enabled (new/1) savecrit is basically not used but must + * be initialized to -1 so we know it isn't used in + * cpu_critical_exit(). If critical_mode is disabled (old/0) + * the eflags to restore must be saved in md_savecrit. + */ + cmpl $0,critical_mode + jne 1f + pushfl + popl TD_MD+MD_SAVECRIT(%ebx) + orl $PSL_I,TD_MD+MD_SAVECRIT(%ebx) + jmp 2f +1: + movl $-1,TD_MD+MD_SAVECRIT(%ebx) + sti /* enable interrupts */ +2: call fork_exit addl $12,%esp /* cut from syscall */ diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index 1fe8a2e..f3e9f04 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -89,9 +89,12 @@ ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_KSE, offsetof(struct thread, td_kse)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level)); +ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest)); +ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); +ASSYM(MD_SAVECRIT, offsetof(struct mdthread, md_savecrit)); ASSYM(KE_FLAGS, offsetof(struct kse, ke_flags)); @@ -134,6 +137,7 @@ ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); +ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); @@ -176,6 +180,10 @@ ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); +ASSYM(PC_INT_PENDING, offsetof(struct pcpu, pc_int_pending)); +ASSYM(PC_IPENDING, offsetof(struct pcpu, pc_ipending)); +ASSYM(PC_FPENDING, offsetof(struct pcpu, pc_fpending)); +ASSYM(PC_SPENDING, offsetof(struct pcpu, pc_spending)); ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread)); ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index ff8f10c..8b6cc82 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -802,11 +802,15 @@ cpu_idle(void) #ifndef SMP if (cpu_idle_hlt) { disable_intr(); - if (procrunnable()) + if (procrunnable()) { enable_intr(); - else { - enable_intr(); - __asm __volatile("hlt"); + } else { + /* + * we must absolutely guarentee that hlt is the + * absolute next instruction after sti or we + * introduce a timing window. + */ + __asm __volatile("sti; hlt"); } } #endif @@ -1693,12 +1697,17 @@ init386(first) /* * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. */ mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE); mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE); mtx_init(&proc0.p_mtx, "process lock", MTX_DEF); mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE); - mtx_init(&icu_lock, "icu", MTX_SPIN); + mtx_init(&icu_lock, "icu", MTX_SPIN | MTX_NOWITNESS); mtx_lock(&Giant); /* make ldt memory segments */ diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 4f891db..50c91c7 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -2310,6 +2310,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2341,6 +2344,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c index 769415e..c42373b 100644 --- a/sys/i386/i386/mpapic.c +++ b/sys/i386/i386/mpapic.c @@ -190,6 +190,7 @@ io_apic_setup_intpin(int apic, int pin) u_int32_t target; /* the window register is 32 bits */ u_int32_t vector; /* the window register is 32 bits */ int level; + register_t crit; target = IOART_DEST; @@ -210,11 +211,13 @@ io_apic_setup_intpin(int apic, int pin) * shouldn't and stop the carnage. */ vector = NRSVIDT + pin; /* IDT vec */ + crit = intr_disable(); mtx_lock_spin(&icu_lock); io_apic_write(apic, select, (io_apic_read(apic, select) & ~IOART_INTMASK & ~0xff)|IOART_INTMSET|vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); /* we only deal with vectored INTs here */ if (apic_int_type(apic, pin) != 0) @@ -258,10 +261,12 @@ io_apic_setup_intpin(int apic, int pin) printf("IOAPIC #%d intpin %d -> irq %d\n", apic, pin, irq); vector = NRSVIDT + irq; /* IDT vec */ + crit = intr_disable(); mtx_lock_spin(&icu_lock); io_apic_write(apic, select, flags | vector); io_apic_write(apic, select + 1, target); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } int diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c index 4f891db..50c91c7 100644 --- a/sys/i386/i386/mptable.c +++ b/sys/i386/i386/mptable.c @@ -2310,6 +2310,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2341,6 +2344,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 4f2bdcb..e0f9bcd 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -96,6 +96,8 @@ ENTRY(cpu_switch) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) + pushfl /* PSL */ + popl PCB_PSL(%edx) /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) @@ -233,6 +235,8 @@ sw1b: movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) + pushl PCB_PSL(%edx) + popfl #if defined(SMP) && defined(GRAB_LOPRIO) /* Hold LOPRIO for interrupts. */ @@ -339,6 +343,8 @@ ENTRY(savectx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) movl %gs,PCB_GS(%ecx) + pushfl + popl PCB_PSL(%ecx) #ifdef DEV_NPX /* diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c index 49516028..810fbe7 100644 --- a/sys/i386/i386/tsc.c +++ b/sys/i386/i386/tsc.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + register_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,12 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1125,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } } diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 59f3f89..a7e852a 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -193,6 +193,7 @@ cpu_fork(td1, p2, td2, flags) pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; + pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */ /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index ed1da94..2b0abcc 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -45,6 +45,8 @@ #include <sys/cdefs.h> #include <machine/psl.h> +struct thread; + __BEGIN_DECLS #define readb(va) (*(volatile u_int8_t *) (va)) #define readw(va) (*(volatile u_int16_t *) (va)) @@ -54,8 +56,6 @@ __BEGIN_DECLS #define writew(va, d) (*(volatile u_int16_t *) (va) = (d)) #define writel(va, d) (*(volatile u_int32_t *) (va) = (d)) -#define CRITICAL_FORK (read_eflags() | PSL_I) - #ifdef __GNUC__ #ifdef SWTCH_OPTIM_STATS @@ -562,22 +562,6 @@ load_dr7(u_int sel) __asm __volatile("movl %0,%%dr7" : : "r" (sel)); } -static __inline critical_t -cpu_critical_enter(void) -{ - critical_t eflags; - - eflags = read_eflags(); - disable_intr(); - return (eflags); -} - -static __inline void -cpu_critical_exit(critical_t eflags) -{ - write_eflags(eflags); -} - static __inline register_t intr_disable(void) { @@ -629,8 +613,6 @@ u_int rfs(void); u_int rgs(void); void load_fs(u_int sel); void load_gs(u_int sel); -critical_t cpu_critical_enter(void); -void cpu_critical_exit(critical_t eflags); #endif /* __GNUC__ */ @@ -642,6 +624,11 @@ u_int rcr0(void); u_int rcr3(void); u_int rcr4(void); void reset_dbregs(void); +void cpu_critical_enter(void); +void cpu_critical_exit(void); +void cpu_critical_fork_exit(void); +void cpu_thread_link(struct thread *td); + __END_DECLS #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h index 4f891db..50c91c7 100644 --- a/sys/i386/include/mptable.h +++ b/sys/i386/include/mptable.h @@ -2310,6 +2310,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2341,6 +2344,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/i386/include/pcb.h b/sys/i386/include/pcb.h index b2f1bc4..860b3b5 100644 --- a/sys/i386/include/pcb.h +++ b/sys/i386/include/pcb.h @@ -69,7 +69,8 @@ struct pcb { caddr_t pcb_onfault; /* copyin/out fault recovery */ int pcb_gs; struct pcb_ext *pcb_ext; /* optional pcb extension */ - u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ + int pcb_psl; /* process status long */ + u_long __pcb_spare[2]; /* adjust to avoid core dump size changes */ }; /* diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index 5fecb98..eba526c 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -50,7 +50,11 @@ struct i386tss pc_common_tss; \ struct segment_descriptor pc_common_tssd; \ struct segment_descriptor *pc_tss_gdt; \ - int pc_currentldt + int pc_currentldt; \ + u_int32_t pc_int_pending; /* master int pending flag */ \ + u_int32_t pc_ipending; /* pending slow interrupts */ \ + u_int32_t pc_fpending; /* pending fast interrupts */ \ + u_int32_t pc_spending /* pending soft interrupts */ /* * Evaluates to the byte offset of the per-cpu variable name. diff --git a/sys/i386/include/proc.h b/sys/i386/include/proc.h index f011b5c..d7d5ef7 100644 --- a/sys/i386/include/proc.h +++ b/sys/i386/include/proc.h @@ -51,6 +51,7 @@ struct proc_ldt { * Machine-dependent part of the proc structure for i386. */ struct mdthread { + register_t md_savecrit; }; struct mdproc { diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s index 95c9133..f0e6497 100644 --- a/sys/i386/isa/apic_vector.s +++ b/sys/i386/isa/apic_vector.s @@ -19,11 +19,19 @@ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - pushal ; \ + pushal ; /* 8 ints */ \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ pushl %fs +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; + #define POP_FRAME \ popl %fs ; \ popl %es ; \ @@ -31,37 +39,8 @@ popal ; \ addl $4+4,%esp -/* - * Macros for interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic+LA_EOI ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ - jmp doreti +#define POP_DUMMY \ + addl $16*4,%esp #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 @@ -114,9 +93,9 @@ IDTVEC(vec_name) ; \ */ #define UNMASK_IRQ(irq_num) \ ICU_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), _apic_imen ; \ + testl $IRQ_BIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), _apic_imen ;/* clear mask bit */ \ + andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -126,6 +105,95 @@ IDTVEC(vec_name) ; \ 7: ; /* already unmasked */ \ ICU_UNLOCK +/* + * Test to see whether we are handling an edge or level triggered INT. + * Level-triggered INTs have to be unmasked. + */ +#define UNMASK_LEVEL_IRQ(irq_num) \ + testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + jz 9f ; /* edge, don't unmask */ \ + UNMASK_IRQ(irq_num) ; \ +9: + +/* + * Macros for interrupt entry, call to handler, and exit. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + movl $KDSEL,%eax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + movl $KPSEL,%eax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(FPENDING) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + movl $0, lapic+LA_EOI ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + movl $0, lapic+LA_EOI ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti + +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + pushl %ebp ; \ + movl %esp, %ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + UNMASK_LEVEL_IRQ(irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret ; \ + + /* * Slow, threaded interrupts. * @@ -151,16 +219,27 @@ IDTVEC(vec_name) ; \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti @@ -226,9 +305,16 @@ Xhardclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $1,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_hardclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -250,10 +336,18 @@ Xstatclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) + movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $2,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_statclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -416,6 +510,39 @@ MCOUNT_LABEL(bintr) INTR(29,intr29,) INTR(30,intr30,) INTR(31,intr31,) + + FAST_UNPEND(0,fastunpend0) + FAST_UNPEND(1,fastunpend1) + FAST_UNPEND(2,fastunpend2) + FAST_UNPEND(3,fastunpend3) + FAST_UNPEND(4,fastunpend4) + FAST_UNPEND(5,fastunpend5) + FAST_UNPEND(6,fastunpend6) + FAST_UNPEND(7,fastunpend7) + FAST_UNPEND(8,fastunpend8) + FAST_UNPEND(9,fastunpend9) + FAST_UNPEND(10,fastunpend10) + FAST_UNPEND(11,fastunpend11) + FAST_UNPEND(12,fastunpend12) + FAST_UNPEND(13,fastunpend13) + FAST_UNPEND(14,fastunpend14) + FAST_UNPEND(15,fastunpend15) + FAST_UNPEND(16,fastunpend16) + FAST_UNPEND(17,fastunpend17) + FAST_UNPEND(18,fastunpend18) + FAST_UNPEND(19,fastunpend19) + FAST_UNPEND(20,fastunpend20) + FAST_UNPEND(21,fastunpend21) + FAST_UNPEND(22,fastunpend22) + FAST_UNPEND(23,fastunpend23) + FAST_UNPEND(24,fastunpend24) + FAST_UNPEND(25,fastunpend25) + FAST_UNPEND(26,fastunpend26) + FAST_UNPEND(27,fastunpend27) + FAST_UNPEND(28,fastunpend28) + FAST_UNPEND(29,fastunpend29) + FAST_UNPEND(30,fastunpend30) + FAST_UNPEND(31,fastunpend31) MCOUNT_LABEL(eintr) /* diff --git a/sys/i386/isa/atpic_vector.s b/sys/i386/isa/atpic_vector.s index 4e10cc2..01a804b 100644 --- a/sys/i386/isa/atpic_vector.s +++ b/sys/i386/isa/atpic_vector.s @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else + #define ENABLE_ICU1 \ movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ + #define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,127 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else + #define ENABLE_ICU1_AND_2 \ movb $ICU_EOI,%al ; /* as above */ \ outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ + call *intr_handler + (irq_num) * 4 ; \ addl $4,%esp ; \ + enable_icus ; \ incl cnt+V_INTR ; /* book-keeping can wait */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + pushl %ebp ; \ + movl %esp, %ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +170,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ +; \ maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ + MASK_IRQ(icu, irq_num) ; \ enable_icus ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ +; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/i386/isa/clock.c b/sys/i386/isa/clock.c index 49516028..810fbe7 100644 --- a/sys/i386/isa/clock.c +++ b/sys/i386/isa/clock.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + register_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,12 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1125,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } } diff --git a/sys/i386/isa/icu_vector.s b/sys/i386/isa/icu_vector.s index 4e10cc2..01a804b 100644 --- a/sys/i386/isa/icu_vector.s +++ b/sys/i386/isa/icu_vector.s @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else + #define ENABLE_ICU1 \ movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ + #define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,127 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else + #define ENABLE_ICU1_AND_2 \ movb $ICU_EOI,%al ; /* as above */ \ outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl 12(%esp) ; /* original caller eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ + call *intr_handler + (irq_num) * 4 ; \ addl $4,%esp ; \ + enable_icus ; \ incl cnt+V_INTR ; /* book-keeping can wait */ \ movl intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + pushl %ebp ; \ + movl %esp, %ebp ; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + popl %ebp ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +170,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ + PUSH_FRAME ; \ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ mov %ax,%ds ; \ mov %ax,%es ; \ mov $KPSEL,%ax ; \ mov %ax,%fs ; \ +; \ maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ + MASK_IRQ(icu, irq_num) ; \ enable_icus ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ +; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c index cfc162b..59c739e 100644 --- a/sys/i386/isa/intr_machdep.c +++ b/sys/i386/isa/intr_machdep.c @@ -117,6 +117,27 @@ static inthand_t *fastintr[ICU_LEN] = { #endif /* APIC_IO */ }; +static unpendhand_t *fastunpend[ICU_LEN] = { + &IDTVEC(fastunpend0), &IDTVEC(fastunpend1), + &IDTVEC(fastunpend2), &IDTVEC(fastunpend3), + &IDTVEC(fastunpend4), &IDTVEC(fastunpend5), + &IDTVEC(fastunpend6), &IDTVEC(fastunpend7), + &IDTVEC(fastunpend8), &IDTVEC(fastunpend9), + &IDTVEC(fastunpend10), &IDTVEC(fastunpend11), + &IDTVEC(fastunpend12), &IDTVEC(fastunpend13), + &IDTVEC(fastunpend14), &IDTVEC(fastunpend15), +#if defined(APIC_IO) + &IDTVEC(fastunpend16), &IDTVEC(fastunpend17), + &IDTVEC(fastunpend18), &IDTVEC(fastunpend19), + &IDTVEC(fastunpend20), &IDTVEC(fastunpend21), + &IDTVEC(fastunpend22), &IDTVEC(fastunpend23), + &IDTVEC(fastunpend24), &IDTVEC(fastunpend25), + &IDTVEC(fastunpend26), &IDTVEC(fastunpend27), + &IDTVEC(fastunpend28), &IDTVEC(fastunpend29), + &IDTVEC(fastunpend30), &IDTVEC(fastunpend31), +#endif /* APIC_IO */ +}; + static inthand_t *slowintr[ICU_LEN] = { &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3), &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7), @@ -291,13 +312,16 @@ isa_nmi(cd) void icu_reinit() { int i; + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); init_i8259(); for(i=0;i<ICU_LEN;i++) if(intr_handler[i] != isa_strayintr) INTREN(1<<i); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } /* @@ -309,13 +333,16 @@ void isa_defaultirq() { int i; + register_t crit; /* icu vectors */ for (i = 0; i < ICU_LEN; i++) icu_unset(i, (driver_intr_t *)NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); init_i8259(); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } @@ -476,6 +503,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) int vector; u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ + register_t crit; #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -488,6 +516,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) return (EBUSY); #endif + crit = intr_disable(); mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; @@ -530,6 +559,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) #endif /* FAST_HI */ INTREN(1 << intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); return (0); } @@ -543,10 +573,12 @@ icu_unset(intr, handler) int intr; driver_intr_t *handler; { + register_t crit; if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) return (EINVAL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << intr); intr_countp[intr] = &intrcnt[1 + intr]; @@ -564,6 +596,7 @@ icu_unset(intr, handler) GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); return (0); } @@ -578,19 +611,25 @@ SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); static void ithread_enable(int vector) { + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } static void ithread_disable(int vector) { + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } int @@ -672,3 +711,10 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } + +void +call_fast_unpend(int irq) +{ + fastunpend[irq](); +} + diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h index d674630..8e1a828 100644 --- a/sys/i386/isa/intr_machdep.h +++ b/sys/i386/isa/intr_machdep.h @@ -140,6 +140,7 @@ * Type of the first (asm) part of an interrupt handler. */ typedef void inthand_t(u_int cs, u_int ef, u_int esp, u_int ss); +typedef void unpendhand_t __P((void)); #define IDTVEC(name) __CONCAT(X,name) @@ -163,6 +164,18 @@ inthand_t IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15); +unpendhand_t + IDTVEC(fastunpend0), IDTVEC(fastunpend1), IDTVEC(fastunpend2), + IDTVEC(fastunpend3), IDTVEC(fastunpend4), IDTVEC(fastunpend5), + IDTVEC(fastunpend6), IDTVEC(fastunpend7), IDTVEC(fastunpend8), + IDTVEC(fastunpend9), IDTVEC(fastunpend10), IDTVEC(fastunpend11), + IDTVEC(fastunpend12), IDTVEC(fastunpend13), IDTVEC(fastunpend14), + IDTVEC(fastunpend15), IDTVEC(fastunpend16), IDTVEC(fastunpend17), + IDTVEC(fastunpend18), IDTVEC(fastunpend19), IDTVEC(fastunpend20), + IDTVEC(fastunpend21), IDTVEC(fastunpend22), IDTVEC(fastunpend23), + IDTVEC(fastunpend24), IDTVEC(fastunpend25), IDTVEC(fastunpend26), + IDTVEC(fastunpend27), IDTVEC(fastunpend28), IDTVEC(fastunpend29), + IDTVEC(fastunpend30), IDTVEC(fastunpend31); #if defined(SMP) || defined(APIC_IO) inthand_t @@ -227,6 +240,7 @@ int inthand_add(const char *name, int irq, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); int inthand_remove(void *cookie); void sched_ithd(void *dummy); +void call_fast_unpend(int irq); #endif /* LOCORE */ diff --git a/sys/i386/isa/nmi.c b/sys/i386/isa/nmi.c index cfc162b..59c739e 100644 --- a/sys/i386/isa/nmi.c +++ b/sys/i386/isa/nmi.c @@ -117,6 +117,27 @@ static inthand_t *fastintr[ICU_LEN] = { #endif /* APIC_IO */ }; +static unpendhand_t *fastunpend[ICU_LEN] = { + &IDTVEC(fastunpend0), &IDTVEC(fastunpend1), + &IDTVEC(fastunpend2), &IDTVEC(fastunpend3), + &IDTVEC(fastunpend4), &IDTVEC(fastunpend5), + &IDTVEC(fastunpend6), &IDTVEC(fastunpend7), + &IDTVEC(fastunpend8), &IDTVEC(fastunpend9), + &IDTVEC(fastunpend10), &IDTVEC(fastunpend11), + &IDTVEC(fastunpend12), &IDTVEC(fastunpend13), + &IDTVEC(fastunpend14), &IDTVEC(fastunpend15), +#if defined(APIC_IO) + &IDTVEC(fastunpend16), &IDTVEC(fastunpend17), + &IDTVEC(fastunpend18), &IDTVEC(fastunpend19), + &IDTVEC(fastunpend20), &IDTVEC(fastunpend21), + &IDTVEC(fastunpend22), &IDTVEC(fastunpend23), + &IDTVEC(fastunpend24), &IDTVEC(fastunpend25), + &IDTVEC(fastunpend26), &IDTVEC(fastunpend27), + &IDTVEC(fastunpend28), &IDTVEC(fastunpend29), + &IDTVEC(fastunpend30), &IDTVEC(fastunpend31), +#endif /* APIC_IO */ +}; + static inthand_t *slowintr[ICU_LEN] = { &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3), &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7), @@ -291,13 +312,16 @@ isa_nmi(cd) void icu_reinit() { int i; + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); init_i8259(); for(i=0;i<ICU_LEN;i++) if(intr_handler[i] != isa_strayintr) INTREN(1<<i); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } /* @@ -309,13 +333,16 @@ void isa_defaultirq() { int i; + register_t crit; /* icu vectors */ for (i = 0; i < ICU_LEN; i++) icu_unset(i, (driver_intr_t *)NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); init_i8259(); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } @@ -476,6 +503,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) int vector; u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ + register_t crit; #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -488,6 +516,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) return (EBUSY); #endif + crit = intr_disable(); mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; @@ -530,6 +559,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) #endif /* FAST_HI */ INTREN(1 << intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); return (0); } @@ -543,10 +573,12 @@ icu_unset(intr, handler) int intr; driver_intr_t *handler; { + register_t crit; if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) return (EINVAL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << intr); intr_countp[intr] = &intrcnt[1 + intr]; @@ -564,6 +596,7 @@ icu_unset(intr, handler) GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); return (0); } @@ -578,19 +611,25 @@ SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); static void ithread_enable(int vector) { + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } static void ithread_disable(int vector) { + register_t crit; + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << vector); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } int @@ -672,3 +711,10 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } + +void +call_fast_unpend(int irq) +{ + fastunpend[irq](); +} + diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 43b012c..22a0282 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -429,9 +429,15 @@ no_irq13: * XXX hack around brokenness of bus_teardown_intr(). If we left the * irq active then we would get it instead of exception 16. */ - mtx_lock_spin(&icu_lock); - INTRDIS(1 << irq_num); - mtx_unlock_spin(&icu_lock); + { + register_t crit; + + crit = intr_disable(); + mtx_lock_spin(&icu_lock); + INTRDIS(1 << irq_num); + mtx_unlock_spin(&icu_lock); + intr_restore(crit); + } bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); diff --git a/sys/ia64/ia64/critical.c b/sys/ia64/ia64/critical.c new file mode 100644 index 0000000..77a6c83 --- /dev/null +++ b/sys/ia64/ia64/critical.c @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2001 Matthew Dillon. This code is distributed under + * the BSD copyright, /usr/src/COPYRIGHT. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/pcpu.h> +#include <sys/eventhandler.h> /* XX */ +#include <sys/ktr.h> /* XX */ +#include <sys/signalvar.h> +#include <sys/sysproto.h> /* XX */ +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/ucontext.h> + +void +cpu_critical_enter(void) +{ + struct thread *td = curthread; + + td->td_md.md_savecrit = intr_disable(); +} + +void +cpu_critical_exit(void) +{ + struct thread *td = curthread; + + intr_restore(td->td_md.md_savecrit); +} + +/* + * cpu_critical_fork_exit() - cleanup after fork + */ +void +cpu_critical_fork_exit(void) +{ + struct thread *td = curthread; + + td->td_critnest = 1; + td->td_md.md_savecrit = (ia64_get_psr() | IA64_PSR_I); +} + +/* + * cpu_thread_link() - thread linkup, initialize machine-dependant fields + */ +void +cpu_thread_link(struct thread *td) +{ + td->td_md.md_savecrit = 0; +} + diff --git a/sys/ia64/include/cpufunc.h b/sys/ia64/include/cpufunc.h index 713ecf4..e7cf818 100644 --- a/sys/ia64/include/cpufunc.h +++ b/sys/ia64/include/cpufunc.h @@ -34,7 +34,7 @@ #include <sys/types.h> #include <machine/ia64_cpu.h> -#define CRITICAL_FORK (ia64_get_psr() | IA64_PSR_I) +struct thread; #ifdef __GNUC__ @@ -300,17 +300,10 @@ intr_restore(critical_t psr) __asm __volatile ("mov psr.l=%0;; srlz.d" :: "r" (psr)); } -static __inline critical_t -cpu_critical_enter(void) -{ - return (intr_disable()); -} - -static __inline void -cpu_critical_exit(critical_t psr) -{ - intr_restore(psr); -} +void cpu_critical_enter(void); +void cpu_critical_exit(void); +void cpu_critical_fork_exit(void); +void cpu_thread_link(struct thread *td); #endif /* _KERNEL */ diff --git a/sys/ia64/include/proc.h b/sys/ia64/include/proc.h index c9ae251..4daead6 100644 --- a/sys/ia64/include/proc.h +++ b/sys/ia64/include/proc.h @@ -39,6 +39,7 @@ struct mdthread { u_long md_flags; void *md_kstackvirt; /* virtual address of td_kstack */ vm_offset_t md_bspstore; /* initial ar.bspstore */ + register_t md_savecrit; }; #define MDP_FPUSED 0x0001 /* Process used the FPU */ diff --git a/sys/isa/atrtc.c b/sys/isa/atrtc.c index 49516028..810fbe7 100644 --- a/sys/isa/atrtc.c +++ b/sys/isa/atrtc.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + register_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,12 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1125,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } } diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index d508cbf..723fd62 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -774,12 +774,16 @@ fork_exit(callout, arg, frame) td->td_kse->ke_oncpu = PCPU_GET(cpuid); /* - * Setup the sched_lock state so that we can release it. + * Finish setting up thread glue. We need to initialize + * the thread into a td_critnest=1 state. Some platforms + * may have already partially or fully initialized td_critnest + * and/or td_md.md_savecrit (when applciable). + * + * see <arch>/<arch>/critical.c */ sched_lock.mtx_lock = (uintptr_t)td; sched_lock.mtx_recurse = 0; - td->td_critnest = 1; - td->td_savecrit = CRITICAL_FORK; + cpu_critical_fork_exit(); CTR3(KTR_PROC, "fork_exit: new proc %p (pid %d, %s)", p, p->p_pid, p->p_comm); if (PCPU_GET(switchtime.sec) == 0) diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c index 3d24bed..6b263e0 100644 --- a/sys/kern/kern_prot.c +++ b/sys/kern/kern_prot.c @@ -338,10 +338,9 @@ getgroups(td, uap) ngrp = cred->cr_ngroups; error = copyout((caddr_t)cred->cr_groups, (caddr_t)uap->gidset, ngrp * sizeof(gid_t)); - if (error) - return (error); - td->td_retval[0] = ngrp; - return (0); + if (error == 0) + td->td_retval[0] = ngrp; + return (error); } #ifndef _SYS_SYSPROTO_H_ @@ -1112,11 +1111,9 @@ getresuid(td, uap) struct getresuid_args *uap; { struct ucred *cred; - struct proc *p = td->td_proc; int error1 = 0, error2 = 0, error3 = 0; - mtx_lock(&Giant); - cred = p->p_ucred; + cred = td->td_ucred; if (uap->ruid) error1 = copyout((caddr_t)&cred->cr_ruid, (caddr_t)uap->ruid, sizeof(cred->cr_ruid)); @@ -1126,7 +1123,6 @@ getresuid(td, uap) if (uap->suid) error3 = copyout((caddr_t)&cred->cr_svuid, (caddr_t)uap->suid, sizeof(cred->cr_svuid)); - mtx_unlock(&Giant); return (error1 ? error1 : error2 ? error2 : error3); } @@ -1147,11 +1143,9 @@ getresgid(td, uap) struct getresgid_args *uap; { struct ucred *cred; - struct proc *p = td->td_proc; int error1 = 0, error2 = 0, error3 = 0; - mtx_lock(&Giant); - cred = p->p_ucred; + cred = td->td_ucred; if (uap->rgid) error1 = copyout((caddr_t)&cred->cr_rgid, (caddr_t)uap->rgid, sizeof(cred->cr_rgid)); @@ -1161,7 +1155,6 @@ getresgid(td, uap) if (uap->sgid) error3 = copyout((caddr_t)&cred->cr_svgid, (caddr_t)uap->sgid, sizeof(cred->cr_svgid)); - mtx_unlock(&Giant); return (error1 ? error1 : error2 ? error2 : error3); } @@ -1233,6 +1226,8 @@ __setugid(td, uap) /* * Check if gid is a member of the group set. + * + * MPSAFE (cred must be held) */ int groupmember(gid, cred) @@ -1289,6 +1284,8 @@ suser_td(td) /* * wrapper to use if you have the thread on hand but not the proc. + * + * MPSAFE (cred must be held) */ int suser_xxx_td(cred, td, flag) @@ -1330,6 +1327,8 @@ suser_xxx(cred, proc, flag) * existing securelevel checks that occurred without a process/credential * context. In the future this will be disallowed, so a kernel message * is displayed. + * + * MPSAFE */ int securelevel_gt(struct ucred *cr, int level) diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index 78505a3..ccfb114 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -77,7 +77,7 @@ critical_enter(void) td = curthread; if (td->td_critnest == 0) - td->td_savecrit = cpu_critical_enter(); + cpu_critical_enter(); td->td_critnest++; } @@ -89,9 +89,10 @@ critical_exit(void) td = curthread; if (td->td_critnest == 1) { td->td_critnest = 0; - cpu_critical_exit(td->td_savecrit); - } else + cpu_critical_exit(); + } else { td->td_critnest--; + } } /* diff --git a/sys/pc98/cbus/clock.c b/sys/pc98/cbus/clock.c index b8b9e3f..b70465a 100644 --- a/sys/pc98/cbus/clock.c +++ b/sys/pc98/cbus/clock.c @@ -1331,6 +1331,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + register_t crit; #ifndef PC98 if (statclock_disable) { @@ -1367,9 +1368,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #else /* APIC_IO */ @@ -1380,9 +1383,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #endif /* APIC_IO */ @@ -1408,6 +1413,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1415,6 +1421,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); #endif /* PC98 */ @@ -1432,9 +1439,11 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1457,9 +1466,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } } diff --git a/sys/pc98/cbus/pcrtc.c b/sys/pc98/cbus/pcrtc.c index b8b9e3f..b70465a 100644 --- a/sys/pc98/cbus/pcrtc.c +++ b/sys/pc98/cbus/pcrtc.c @@ -1331,6 +1331,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + register_t crit; #ifndef PC98 if (statclock_disable) { @@ -1367,9 +1368,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #else /* APIC_IO */ @@ -1380,9 +1383,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #endif /* APIC_IO */ @@ -1408,6 +1413,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1415,6 +1421,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); #endif /* PC98 */ @@ -1432,9 +1439,11 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1457,9 +1466,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } } diff --git a/sys/pc98/i386/machdep.c b/sys/pc98/i386/machdep.c index 21a538f..e41899f 100644 --- a/sys/pc98/i386/machdep.c +++ b/sys/pc98/i386/machdep.c @@ -815,11 +815,10 @@ cpu_idle(void) #ifndef SMP if (cpu_idle_hlt) { disable_intr(); - if (procrunnable()) + if (procrunnable()) { enable_intr(); - else { - enable_intr(); - __asm __volatile("hlt"); + } else { + __asm __volatile("sti; hlt"); } } #endif @@ -1757,12 +1756,17 @@ init386(first) /* * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be under + * witness. */ mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE); mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE); mtx_init(&proc0.p_mtx, "process lock", MTX_DEF); mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE); - mtx_init(&icu_lock, "icu", MTX_SPIN); + mtx_init(&icu_lock, "icu", MTX_SPIN | MTX_NOWITNESS); mtx_lock(&Giant); /* make ldt memory segments */ diff --git a/sys/pc98/pc98/clock.c b/sys/pc98/pc98/clock.c index b8b9e3f..b70465a 100644 --- a/sys/pc98/pc98/clock.c +++ b/sys/pc98/pc98/clock.c @@ -1331,6 +1331,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + register_t crit; #ifndef PC98 if (statclock_disable) { @@ -1367,9 +1368,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #else /* APIC_IO */ @@ -1380,9 +1383,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + intr_restore(crit); #endif /* APIC_IO */ @@ -1408,6 +1413,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1415,6 +1421,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + intr_restore(crit); writertc(RTC_STATUSB, rtc_statusb); #endif /* PC98 */ @@ -1432,9 +1439,11 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1457,9 +1466,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = intr_disable(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + intr_restore(crit); } } diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 21a538f..e41899f 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -815,11 +815,10 @@ cpu_idle(void) #ifndef SMP if (cpu_idle_hlt) { disable_intr(); - if (procrunnable()) + if (procrunnable()) { enable_intr(); - else { - enable_intr(); - __asm __volatile("hlt"); + } else { + __asm __volatile("sti; hlt"); } } #endif @@ -1757,12 +1756,17 @@ init386(first) /* * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be under + * witness. */ mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE); mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE); mtx_init(&proc0.p_mtx, "process lock", MTX_DEF); mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE); - mtx_init(&icu_lock, "icu", MTX_SPIN); + mtx_init(&icu_lock, "icu", MTX_SPIN | MTX_NOWITNESS); mtx_lock(&Giant); /* make ldt memory segments */ diff --git a/sys/powerpc/include/cpufunc.h b/sys/powerpc/include/cpufunc.h index 7ea27b0..b788aa6 100644 --- a/sys/powerpc/include/cpufunc.h +++ b/sys/powerpc/include/cpufunc.h @@ -35,7 +35,7 @@ #include <machine/psl.h> -#define CRITICAL_FORK (mfmsr() | PSL_EE | PSL_RI) +struct thread; #ifdef __GNUC__ @@ -108,20 +108,6 @@ intr_restore(register_t msr) mtmsr(msr); } -static __inline critical_t -cpu_critical_enter(void) -{ - u_int msr; - critical_t crit; - - msr = mfmsr(); - crit = (critical_t)msr; - msr &= ~(PSL_EE | PSL_RI); - mtmsr(msr); - - return (crit); -} - static __inline void restore_intr(unsigned int msr) { @@ -130,13 +116,6 @@ restore_intr(unsigned int msr) } static __inline void -cpu_critical_exit(critical_t msr) -{ - - mtmsr(msr); -} - -static __inline void powerpc_mb(void) { @@ -153,6 +132,12 @@ powerpc_get_pcpup(void) return(ret); } +void cpu_critical_enter(void); +void cpu_critical_exit(void); +void cpu_critical_fork_exit(void); +void cpu_thread_link(struct thread *td); + + #endif /* _KERNEL */ #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/powerpc/include/proc.h b/sys/powerpc/include/proc.h index 82aa068..e307b24 100644 --- a/sys/powerpc/include/proc.h +++ b/sys/powerpc/include/proc.h @@ -39,6 +39,7 @@ * Machine-dependent part of the proc structure */ struct mdthread { + register_t md_savecrit; }; struct mdproc { diff --git a/sys/powerpc/powerpc/critical.c b/sys/powerpc/powerpc/critical.c new file mode 100644 index 0000000..cb80062 --- /dev/null +++ b/sys/powerpc/powerpc/critical.c @@ -0,0 +1,62 @@ +/*- + * Copyright (c) 2001 Matthew Dillon. This code is distributed under + * the BSD copyright, /usr/src/COPYRIGHT. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/pcpu.h> +#include <sys/eventhandler.h> /* XX */ +#include <sys/ktr.h> /* XX */ +#include <sys/signalvar.h> +#include <sys/sysproto.h> /* XX */ +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/ucontext.h> + +void +cpu_critical_enter(void) +{ + u_int msr; + struct thread *td = curthread; + + msr = mfmsr(); + td->td_md.md_savecrit = msr; + msr &= ~(PSL_EE | PSL_RI); + mtmsr(msr); +} + +void +cpu_critical_exit(void) +{ + struct thread *td = curthread; + + mtmsr(td->td_md.md_savecrit); +} + +/* + * cpu_critical_fork_exit() - cleanup after fork + */ +void +cpu_critical_fork_exit(void) +{ + struct thread *td = curthread; + + td->td_critnest = 1; + td->td_md.md_savecrit = (mfmsr() | PSL_EE | PSL_RI); +} + +/* + * cpu_thread_link() - thread linkup, initialize machine-dependant fields + */ +void +cpu_thread_link(struct thread *td) +{ + td->td_md.md_savecrit = 0; +} + diff --git a/sys/sparc64/include/cpufunc.h b/sys/sparc64/include/cpufunc.h index 4e59bb3..4a3dd7a 100644 --- a/sys/sparc64/include/cpufunc.h +++ b/sys/sparc64/include/cpufunc.h @@ -32,6 +32,8 @@ #include <machine/asi.h> #include <machine/pstate.h> +struct thread; + /* * membar operand macros for use in other macros when # is a special * character. Keep these in sync with what the hardware expects. @@ -160,30 +162,12 @@ STNC_GEN(u_long, stxa); : : "r" (val), "rI" (xor)); \ } while (0) -#define CRITICAL_FORK (0) - static __inline void breakpoint(void) { __asm __volatile("ta %%xcc, 1" : :); } -static __inline critical_t -cpu_critical_enter(void) -{ - critical_t pil; - - pil = rdpr(pil); - wrpr(pil, 0, 14); - return (pil); -} - -static __inline void -cpu_critical_exit(critical_t pil) -{ - wrpr(pil, pil, 0); -} - static __inline register_t intr_disable(void) { @@ -240,4 +224,9 @@ ffs(int mask) #undef LDNC_GEN #undef STNC_GEN +void cpu_critical_enter(void); +void cpu_critical_exit(void); +void cpu_critical_fork_exit(void); +void cpu_thread_link(struct thread *td); + #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/sparc64/include/proc.h b/sys/sparc64/include/proc.h index e261b24..108bc42 100644 --- a/sys/sparc64/include/proc.h +++ b/sys/sparc64/include/proc.h @@ -47,6 +47,7 @@ struct md_utrap { }; struct mdthread { + register_t md_savecrit; }; struct mdproc { diff --git a/sys/sparc64/sparc64/critical.c b/sys/sparc64/sparc64/critical.c new file mode 100644 index 0000000..8c2df1c --- /dev/null +++ b/sys/sparc64/sparc64/critical.c @@ -0,0 +1,60 @@ +/*- + * Copyright (c) 2001 Matthew Dillon. This code is distributed under + * the BSD copyright, /usr/src/COPYRIGHT. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/pcpu.h> +#include <sys/eventhandler.h> /* XX */ +#include <sys/ktr.h> /* XX */ +#include <sys/signalvar.h> +#include <sys/sysproto.h> /* XX */ +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sysctl.h> +#include <sys/ucontext.h> + +void +cpu_critical_enter(void) +{ + critical_t pil; + struct thread *td = curthread; + + pil = rdpr(pil); + wrpr(pil, 0, 14); + td->td_md.md_savecrit = pil; +} + +void +cpu_critical_exit(void) +{ + struct thread *td = curthread; + wrpr(td->td_md.md_savecrit, td->td_md.md_savecrit, 0); +} + +/* + * cpu_critical_fork_exit() - cleanup after fork + */ +void +cpu_critical_fork_exit(void) +{ + struct thread *td = curthread; + + td->td_critnest = 1; + td->td_md.md_savecrit = 0; +} + +/* + * cpu_thread_link() - thread linkup, initialize machine-dependant fields + */ +void +cpu_thread_link(struct thread *td) +{ + td->td_md.md_savecrit = 0; +} + diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 72656e8..e3e0cf8 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -295,7 +295,6 @@ struct thread { struct vm_object *td_kstack_obj;/* (a) Kstack object. */ vm_offset_t td_kstack; /* Kernel VA of kstack. */ u_int td_critnest; /* (k) Critical section nest level. */ - critical_t td_savecrit; /* (k) Saved critical section state. */ }; /* |