diff options
48 files changed, 2179 insertions, 611 deletions
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index e3a37e1..0325fbe 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -19,11 +19,19 @@ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - pushal ; \ + pushal ; /* 8 ints */ \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ pushl %fs +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl $0 ; /* dummy eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; + #define POP_FRAME \ popl %fs ; \ popl %es ; \ @@ -31,37 +39,8 @@ popal ; \ addl $4+4,%esp -/* - * Macros for interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic+LA_EOI ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ - jmp doreti +#define POP_DUMMY \ + addl $16*4,%esp #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 @@ -114,9 +93,9 @@ IDTVEC(vec_name) ; \ */ #define UNMASK_IRQ(irq_num) \ ICU_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), _apic_imen ; \ + testl $IRQ_BIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), _apic_imen ;/* clear mask bit */ \ + andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -126,6 +105,92 @@ IDTVEC(vec_name) ; \ 7: ; /* already unmasked */ \ ICU_UNLOCK +/* + * Test to see whether we are handling an edge or level triggered INT. + * Level-triggered INTs have to be unmasked. + */ +#define UNMASK_LEVEL_IRQ(irq_num) \ + testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + jz 9f ; /* edge, don't unmask */ \ + UNMASK_IRQ(irq_num) ; \ +9: + +/* + * Macros for interrupt entry, call to handler, and exit. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + movl $KDSEL,%eax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + movl $KPSEL,%eax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(FPENDING) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + movl $0, lapic+LA_EOI ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + movl $0, lapic+LA_EOI ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti + +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + UNMASK_LEVEL_IRQ(irq_num) ; \ + POP_DUMMY ; \ + ret ; \ + + /* * Slow, threaded interrupts. * @@ -151,16 +216,27 @@ IDTVEC(vec_name) ; \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti @@ -304,9 +380,16 @@ Xhardclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $1,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_hardclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -328,10 +411,18 @@ Xstatclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) + movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $2,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_statclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -494,6 +585,39 @@ MCOUNT_LABEL(bintr) INTR(29,intr29,) INTR(30,intr30,) INTR(31,intr31,) + + FAST_UNPEND(0,fastunpend0) + FAST_UNPEND(1,fastunpend1) + FAST_UNPEND(2,fastunpend2) + FAST_UNPEND(3,fastunpend3) + FAST_UNPEND(4,fastunpend4) + FAST_UNPEND(5,fastunpend5) + FAST_UNPEND(6,fastunpend6) + FAST_UNPEND(7,fastunpend7) + FAST_UNPEND(8,fastunpend8) + FAST_UNPEND(9,fastunpend9) + FAST_UNPEND(10,fastunpend10) + FAST_UNPEND(11,fastunpend11) + FAST_UNPEND(12,fastunpend12) + FAST_UNPEND(13,fastunpend13) + FAST_UNPEND(14,fastunpend14) + FAST_UNPEND(15,fastunpend15) + FAST_UNPEND(16,fastunpend16) + FAST_UNPEND(17,fastunpend17) + FAST_UNPEND(18,fastunpend18) + FAST_UNPEND(19,fastunpend19) + FAST_UNPEND(20,fastunpend20) + FAST_UNPEND(21,fastunpend21) + FAST_UNPEND(22,fastunpend22) + FAST_UNPEND(23,fastunpend23) + FAST_UNPEND(24,fastunpend24) + FAST_UNPEND(25,fastunpend25) + FAST_UNPEND(26,fastunpend26) + FAST_UNPEND(27,fastunpend27) + FAST_UNPEND(28,fastunpend28) + FAST_UNPEND(29,fastunpend29) + FAST_UNPEND(30,fastunpend30) + FAST_UNPEND(31,fastunpend31) MCOUNT_LABEL(eintr) /* diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index 4f2bdcb..e0f9bcd 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -96,6 +96,8 @@ ENTRY(cpu_switch) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) + pushfl /* PSL */ + popl PCB_PSL(%edx) /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) @@ -233,6 +235,8 @@ sw1b: movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) + pushl PCB_PSL(%edx) + popfl #if defined(SMP) && defined(GRAB_LOPRIO) /* Hold LOPRIO for interrupts. */ @@ -339,6 +343,8 @@ ENTRY(savectx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) movl %gs,PCB_GS(%ecx) + pushfl + popl PCB_PSL(%ecx) #ifdef DEV_NPX /* diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 0b395e2..feb8742 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -222,6 +222,18 @@ ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ + movl PCPU(CURTHREAD),%ebx /* setup critnest */ + movl $1,TD_CRITNEST(%ebx) + cmpl $0,critical_mode + jne 1f + pushfl + popl TD_SAVECRIT(%ebx) + orl $PSL_I,TD_SAVECRIT(%ebx) + jmp 2f +1: + movl $-1,TD_SAVECRIT(%ebx) + sti /* enable interrupts */ +2: call fork_exit addl $12,%esp /* cut from syscall */ diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s index 0b395e2..feb8742 100644 --- a/sys/amd64/amd64/exception.s +++ b/sys/amd64/amd64/exception.s @@ -222,6 +222,18 @@ ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ + movl PCPU(CURTHREAD),%ebx /* setup critnest */ + movl $1,TD_CRITNEST(%ebx) + cmpl $0,critical_mode + jne 1f + pushfl + popl TD_SAVECRIT(%ebx) + orl $PSL_I,TD_SAVECRIT(%ebx) + jmp 2f +1: + movl $-1,TD_SAVECRIT(%ebx) + sti /* enable interrupts */ +2: call fork_exit addl $12,%esp /* cut from syscall */ diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 07bd2f5..abccf06 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -429,9 +429,15 @@ no_irq13: * XXX hack around brokenness of bus_teardown_intr(). If we left the * irq active then we would get it instead of exception 16. */ - mtx_lock_spin(&icu_lock); - INTRDIS(1 << irq_num); - mtx_unlock_spin(&icu_lock); + { + critical_t crit; + + crit = cpu_critical_enter(); + mtx_lock_spin(&icu_lock); + INTRDIS(1 << irq_num); + mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); + } bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 1fe8a2e..aedbe53 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -89,6 +89,8 @@ ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_KSE, offsetof(struct thread, td_kse)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level)); +ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest)); +ASSYM(TD_SAVECRIT, offsetof(struct thread, td_savecrit)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); @@ -134,6 +136,7 @@ ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); +ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); @@ -176,6 +179,10 @@ ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); +ASSYM(PC_INT_PENDING, offsetof(struct pcpu, pc_int_pending)); +ASSYM(PC_IPENDING, offsetof(struct pcpu, pc_ipending)); +ASSYM(PC_FPENDING, offsetof(struct pcpu, pc_fpending)); +ASSYM(PC_SPENDING, offsetof(struct pcpu, pc_spending)); ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread)); ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index bb6d420..be3c20e 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -138,6 +138,8 @@ static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *)); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) +void unpend(void); /* note: not static */ + int _udatasel, _ucodesel; u_int atdevbase; @@ -148,6 +150,9 @@ SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, CTLFLAG_RD, &tlb_flush_count, 0, ""); #endif +int critical_mode = 1; +SYSCTL_INT(_debug, OID_AUTO, critical_mode, + CTLFLAG_RW, &critical_mode, 0, ""); #ifdef PC98 static int ispc98 = 1; @@ -270,6 +275,121 @@ cpu_startup(dummy) } /* + * Critical section handling. + * + * Note that our interrupt code handles any interrupt race that occurs + * after we decrement td_critnest. + */ +void +critical_enter(void) +{ + struct thread *td = curthread; + + if (critical_mode == 0) { + if (td->td_critnest == 0) + td->td_savecrit = cpu_critical_enter(); + td->td_critnest++; + } else { + ++td->td_critnest; + } +} + +void +critical_exit(void) +{ + struct thread *td = curthread; + KASSERT(td->td_critnest > 0, ("bad td_critnest value!")); + if (--td->td_critnest == 0) { + if (td->td_savecrit != (critical_t)-1) { + cpu_critical_exit(td->td_savecrit); + td->td_savecrit = (critical_t)-1; + } else { + /* + * We may have to schedule pending interrupts. Create + * conditions similar to an interrupt context and call + * unpend(). + */ + if (PCPU_GET(int_pending) && td->td_intr_nesting_level == 0) { + critical_t eflags; + + eflags = cpu_critical_enter(); + if (PCPU_GET(int_pending)) { + ++td->td_intr_nesting_level; + unpend(); + --td->td_intr_nesting_level; + } + cpu_critical_exit(eflags); + } + } + } +} + +/* + * Called from critical_exit() or called from the assembly vector code + * to process any interrupts which may have occured while we were in + * a critical section. + * + * - interrupts must be disabled + * - td_intr_nesting_level may not be 0 + * - td_critnest must be 0 + */ +void +unpend(void) +{ + curthread->td_critnest = 1; + for (;;) { + u_int32_t mask; + + /* + * Fast interrupts have priority + */ + if ((mask = PCPU_GET(fpending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(fpending, mask & ~(1 << irq)); + call_fast_unpend(irq); + continue; + } + + /* + * Threaded interrupts come next + */ + if ((mask = PCPU_GET(ipending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(ipending, mask & ~(1 << irq)); + sched_ithd((void *)irq); + continue; + } + + /* + * Software interrupts and delayed IPIs are last + * + * XXX give the bits #defined names. see also + * isa/xxx_vector.s + */ + if ((mask = PCPU_GET(spending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(spending, mask & ~(1 << irq)); + switch(irq) { + case 0: /* bit 0 - hardclock */ + mtx_lock_spin(&sched_lock); + hardclock_process(curthread, 0); + mtx_unlock_spin(&sched_lock); + break; + case 1: /* bit 1 - statclock */ + mtx_lock_spin(&sched_lock); + statclock_process(curthread->td_kse, (register_t)unpend, 0); + mtx_unlock_spin(&sched_lock); + break; + } + continue; + } + break; + } + PCPU_SET(int_pending, 0); + curthread->td_critnest = 0; +} + +/* * Send an interrupt to process. * * Stack is set up to allow sigcode stored @@ -1732,12 +1852,17 @@ init386(first) /* * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. */ mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE); mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE); mtx_init(&proc0.p_mtx, "process lock", MTX_DEF); mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE); - mtx_init(&icu_lock, "icu", MTX_SPIN); + mtx_init(&icu_lock, "icu", MTX_SPIN | MTX_NOWITNESS); mtx_lock(&Giant); /* make ldt memory segments */ diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 008dfc5..76f0d1b 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -2480,6 +2480,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2512,6 +2515,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index 008dfc5..76f0d1b 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -2480,6 +2480,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2512,6 +2515,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s index 4f2bdcb..e0f9bcd 100644 --- a/sys/amd64/amd64/swtch.s +++ b/sys/amd64/amd64/swtch.s @@ -96,6 +96,8 @@ ENTRY(cpu_switch) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) + pushfl /* PSL */ + popl PCB_PSL(%edx) /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) @@ -233,6 +235,8 @@ sw1b: movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) + pushl PCB_PSL(%edx) + popfl #if defined(SMP) && defined(GRAB_LOPRIO) /* Hold LOPRIO for interrupts. */ @@ -339,6 +343,8 @@ ENTRY(savectx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) movl %gs,PCB_GS(%ecx) + pushfl + popl PCB_PSL(%ecx) #ifdef DEV_NPX /* diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c index ae56051..ee776af 100644 --- a/sys/amd64/amd64/tsc.c +++ b/sys/amd64/amd64/tsc.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + critical_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,13 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + critical_t crit; + + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1126,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 01e4b30..9af6e8f 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -193,6 +193,7 @@ cpu_fork(td1, p2, td2, flags) pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; + pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */ /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 94d5c3a..b3ced94 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -52,7 +52,7 @@ __BEGIN_DECLS #define writew(va, d) (*(volatile u_int16_t *) (va) = (d)) #define writel(va, d) (*(volatile u_int32_t *) (va) = (d)) -#define CRITICAL_FORK (read_eflags() | PSL_I) +#define MACHINE_CRITICAL_ENTER /* MD code defines critical_enter/exit/fork */ #ifdef __GNUC__ diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h index 008dfc5..76f0d1b 100644 --- a/sys/amd64/include/mptable.h +++ b/sys/amd64/include/mptable.h @@ -2480,6 +2480,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2512,6 +2515,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index d4a375a..9a1f338 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -69,7 +69,8 @@ struct pcb { caddr_t pcb_onfault; /* copyin/out fault recovery */ int pcb_gs; struct pcb_ext *pcb_ext; /* optional pcb extension */ - u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ + int pcb_psl; /* process status long */ + u_long __pcb_spare[2]; /* adjust to avoid core dump size changes */ }; /* diff --git a/sys/amd64/isa/atpic_vector.S b/sys/amd64/isa/atpic_vector.S index 4e10cc2..3411c06 100644 --- a/sys/amd64/isa/atpic_vector.S +++ b/sys/amd64/isa/atpic_vector.S @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else -#define ENABLE_ICU1 \ - movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ + +#define ENABLE_ICU1 \ + movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ -#define OUTB_ICU1 \ + +#define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,124 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else -#define ENABLE_ICU1_AND_2 \ - movb $ICU_EOI,%al ; /* as above */ \ - outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ + +#define ENABLE_ICU1_AND_2 \ + movb $ICU_EOI,%al ; /* as above */ \ + outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl $0 ; /* dummy eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ - addl $4,%esp ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; \ + addl $4,%esp ; \ + enable_icus ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +167,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - enable_icus ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +; \ + maybe_extra_ipending ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ +; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - pushl $irq_num; /* pass the IRQ */ \ - call sched_ithd ; \ - addl $4, %esp ; /* discard the parameter */ \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ + pushl $irq_num; /* pass the IRQ */ \ + call sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ +; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c index ae56051..ee776af 100644 --- a/sys/amd64/isa/clock.c +++ b/sys/amd64/isa/clock.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + critical_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,13 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + critical_t crit; + + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1126,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } } diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S index 4e10cc2..3411c06 100644 --- a/sys/amd64/isa/icu_vector.S +++ b/sys/amd64/isa/icu_vector.S @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else -#define ENABLE_ICU1 \ - movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ + +#define ENABLE_ICU1 \ + movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ -#define OUTB_ICU1 \ + +#define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,124 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else -#define ENABLE_ICU1_AND_2 \ - movb $ICU_EOI,%al ; /* as above */ \ - outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ + +#define ENABLE_ICU1_AND_2 \ + movb $ICU_EOI,%al ; /* as above */ \ + outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl $0 ; /* dummy eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ - addl $4,%esp ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; \ + addl $4,%esp ; \ + enable_icus ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +167,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - enable_icus ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +; \ + maybe_extra_ipending ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ +; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - pushl $irq_num; /* pass the IRQ */ \ - call sched_ithd ; \ - addl $4, %esp ; /* discard the parameter */ \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ + pushl $irq_num; /* pass the IRQ */ \ + call sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ +; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/amd64/isa/icu_vector.s b/sys/amd64/isa/icu_vector.s index 4e10cc2..3411c06 100644 --- a/sys/amd64/isa/icu_vector.s +++ b/sys/amd64/isa/icu_vector.s @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else -#define ENABLE_ICU1 \ - movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ + +#define ENABLE_ICU1 \ + movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ -#define OUTB_ICU1 \ + +#define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,124 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else -#define ENABLE_ICU1_AND_2 \ - movb $ICU_EOI,%al ; /* as above */ \ - outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ + +#define ENABLE_ICU1_AND_2 \ + movb $ICU_EOI,%al ; /* as above */ \ + outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl $0 ; /* dummy eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ - addl $4,%esp ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; \ + addl $4,%esp ; \ + enable_icus ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +167,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - enable_icus ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +; \ + maybe_extra_ipending ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ +; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - pushl $irq_num; /* pass the IRQ */ \ - call sched_ithd ; \ - addl $4, %esp ; /* discard the parameter */ \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ + pushl $irq_num; /* pass the IRQ */ \ + call sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ +; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c index 92bf581..616e8c3 100644 --- a/sys/amd64/isa/intr_machdep.c +++ b/sys/amd64/isa/intr_machdep.c @@ -117,6 +117,27 @@ static inthand_t *fastintr[ICU_LEN] = { #endif /* APIC_IO */ }; +static unpendhand_t *fastunpend[ICU_LEN] = { + &IDTVEC(fastunpend0), &IDTVEC(fastunpend1), + &IDTVEC(fastunpend2), &IDTVEC(fastunpend3), + &IDTVEC(fastunpend4), &IDTVEC(fastunpend5), + &IDTVEC(fastunpend6), &IDTVEC(fastunpend7), + &IDTVEC(fastunpend8), &IDTVEC(fastunpend9), + &IDTVEC(fastunpend10), &IDTVEC(fastunpend11), + &IDTVEC(fastunpend12), &IDTVEC(fastunpend13), + &IDTVEC(fastunpend14), &IDTVEC(fastunpend15), +#if defined(APIC_IO) + &IDTVEC(fastunpend16), &IDTVEC(fastunpend17), + &IDTVEC(fastunpend18), &IDTVEC(fastunpend19), + &IDTVEC(fastunpend20), &IDTVEC(fastunpend21), + &IDTVEC(fastunpend22), &IDTVEC(fastunpend23), + &IDTVEC(fastunpend24), &IDTVEC(fastunpend25), + &IDTVEC(fastunpend26), &IDTVEC(fastunpend27), + &IDTVEC(fastunpend28), &IDTVEC(fastunpend29), + &IDTVEC(fastunpend30), &IDTVEC(fastunpend31), +#endif /* APIC_IO */ +}; + static inthand_t *slowintr[ICU_LEN] = { &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3), &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7), @@ -291,13 +312,16 @@ isa_nmi(cd) void icu_reinit() { int i; + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); init_i8259(); for(i=0;i<ICU_LEN;i++) if(intr_handler[i] != isa_strayintr) INTREN(1<<i); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } /* @@ -309,13 +333,16 @@ void isa_defaultirq() { int i; + critical_t crit; /* icu vectors */ for (i = 0; i < ICU_LEN; i++) icu_unset(i, (driver_intr_t *)NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); init_i8259(); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } @@ -476,6 +503,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) int vector; u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ + critical_t crit; #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -488,6 +516,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) return (EBUSY); #endif + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; @@ -522,6 +551,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) #endif /* FAST_HI */ INTREN(1 << intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); return (0); } @@ -535,10 +565,12 @@ icu_unset(intr, handler) int intr; driver_intr_t *handler; { + critical_t crit; if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) return (EINVAL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << intr); intr_countp[intr] = &intrcnt[1 + intr]; @@ -556,6 +588,7 @@ icu_unset(intr, handler) GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); return (0); } @@ -570,19 +603,25 @@ SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); static void ithread_enable(int vector) { + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } static void ithread_disable(int vector) { + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } int @@ -664,3 +703,10 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } + +void +call_fast_unpend(int irq) +{ + fastunpend[irq](); +} + diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h index 789b02b..21d5a93 100644 --- a/sys/amd64/isa/intr_machdep.h +++ b/sys/amd64/isa/intr_machdep.h @@ -144,6 +144,7 @@ * Type of the first (asm) part of an interrupt handler. */ typedef void inthand_t __P((u_int cs, u_int ef, u_int esp, u_int ss)); +typedef void unpendhand_t __P((void)); #define IDTVEC(name) __CONCAT(X,name) @@ -167,6 +168,18 @@ inthand_t IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15); +unpendhand_t + IDTVEC(fastunpend0), IDTVEC(fastunpend1), IDTVEC(fastunpend2), + IDTVEC(fastunpend3), IDTVEC(fastunpend4), IDTVEC(fastunpend5), + IDTVEC(fastunpend6), IDTVEC(fastunpend7), IDTVEC(fastunpend8), + IDTVEC(fastunpend9), IDTVEC(fastunpend10), IDTVEC(fastunpend11), + IDTVEC(fastunpend12), IDTVEC(fastunpend13), IDTVEC(fastunpend14), + IDTVEC(fastunpend15), IDTVEC(fastunpend16), IDTVEC(fastunpend17), + IDTVEC(fastunpend18), IDTVEC(fastunpend19), IDTVEC(fastunpend20), + IDTVEC(fastunpend21), IDTVEC(fastunpend22), IDTVEC(fastunpend23), + IDTVEC(fastunpend24), IDTVEC(fastunpend25), IDTVEC(fastunpend26), + IDTVEC(fastunpend27), IDTVEC(fastunpend28), IDTVEC(fastunpend29), + IDTVEC(fastunpend30), IDTVEC(fastunpend31); #if defined(SMP) || defined(APIC_IO) inthand_t @@ -234,6 +247,7 @@ int inthand_add(const char *name, int irq, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); int inthand_remove(void *cookie); void sched_ithd(void *dummy); +void call_fast_unpend(int irq); #endif /* LOCORE */ diff --git a/sys/amd64/isa/nmi.c b/sys/amd64/isa/nmi.c index 92bf581..616e8c3 100644 --- a/sys/amd64/isa/nmi.c +++ b/sys/amd64/isa/nmi.c @@ -117,6 +117,27 @@ static inthand_t *fastintr[ICU_LEN] = { #endif /* APIC_IO */ }; +static unpendhand_t *fastunpend[ICU_LEN] = { + &IDTVEC(fastunpend0), &IDTVEC(fastunpend1), + &IDTVEC(fastunpend2), &IDTVEC(fastunpend3), + &IDTVEC(fastunpend4), &IDTVEC(fastunpend5), + &IDTVEC(fastunpend6), &IDTVEC(fastunpend7), + &IDTVEC(fastunpend8), &IDTVEC(fastunpend9), + &IDTVEC(fastunpend10), &IDTVEC(fastunpend11), + &IDTVEC(fastunpend12), &IDTVEC(fastunpend13), + &IDTVEC(fastunpend14), &IDTVEC(fastunpend15), +#if defined(APIC_IO) + &IDTVEC(fastunpend16), &IDTVEC(fastunpend17), + &IDTVEC(fastunpend18), &IDTVEC(fastunpend19), + &IDTVEC(fastunpend20), &IDTVEC(fastunpend21), + &IDTVEC(fastunpend22), &IDTVEC(fastunpend23), + &IDTVEC(fastunpend24), &IDTVEC(fastunpend25), + &IDTVEC(fastunpend26), &IDTVEC(fastunpend27), + &IDTVEC(fastunpend28), &IDTVEC(fastunpend29), + &IDTVEC(fastunpend30), &IDTVEC(fastunpend31), +#endif /* APIC_IO */ +}; + static inthand_t *slowintr[ICU_LEN] = { &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3), &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7), @@ -291,13 +312,16 @@ isa_nmi(cd) void icu_reinit() { int i; + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); init_i8259(); for(i=0;i<ICU_LEN;i++) if(intr_handler[i] != isa_strayintr) INTREN(1<<i); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } /* @@ -309,13 +333,16 @@ void isa_defaultirq() { int i; + critical_t crit; /* icu vectors */ for (i = 0; i < ICU_LEN; i++) icu_unset(i, (driver_intr_t *)NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); init_i8259(); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } @@ -476,6 +503,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) int vector; u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ + critical_t crit; #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -488,6 +516,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) return (EBUSY); #endif + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; @@ -522,6 +551,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) #endif /* FAST_HI */ INTREN(1 << intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); return (0); } @@ -535,10 +565,12 @@ icu_unset(intr, handler) int intr; driver_intr_t *handler; { + critical_t crit; if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) return (EINVAL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << intr); intr_countp[intr] = &intrcnt[1 + intr]; @@ -556,6 +588,7 @@ icu_unset(intr, handler) GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); return (0); } @@ -570,19 +603,25 @@ SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); static void ithread_enable(int vector) { + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } static void ithread_disable(int vector) { + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } int @@ -664,3 +703,10 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } + +void +call_fast_unpend(int irq) +{ + fastunpend[irq](); +} + diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c index 07bd2f5..abccf06 100644 --- a/sys/amd64/isa/npx.c +++ b/sys/amd64/isa/npx.c @@ -429,9 +429,15 @@ no_irq13: * XXX hack around brokenness of bus_teardown_intr(). If we left the * irq active then we would get it instead of exception 16. */ - mtx_lock_spin(&icu_lock); - INTRDIS(1 << irq_num); - mtx_unlock_spin(&icu_lock); + { + critical_t crit; + + crit = cpu_critical_enter(); + mtx_lock_spin(&icu_lock); + INTRDIS(1 << irq_num); + mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); + } bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index e3a37e1..0325fbe 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -19,11 +19,19 @@ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - pushal ; \ + pushal ; /* 8 ints */ \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ pushl %fs +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl $0 ; /* dummy eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; + #define POP_FRAME \ popl %fs ; \ popl %es ; \ @@ -31,37 +39,8 @@ popal ; \ addl $4+4,%esp -/* - * Macros for interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic+LA_EOI ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ - jmp doreti +#define POP_DUMMY \ + addl $16*4,%esp #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 @@ -114,9 +93,9 @@ IDTVEC(vec_name) ; \ */ #define UNMASK_IRQ(irq_num) \ ICU_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), _apic_imen ; \ + testl $IRQ_BIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), _apic_imen ;/* clear mask bit */ \ + andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -126,6 +105,92 @@ IDTVEC(vec_name) ; \ 7: ; /* already unmasked */ \ ICU_UNLOCK +/* + * Test to see whether we are handling an edge or level triggered INT. + * Level-triggered INTs have to be unmasked. + */ +#define UNMASK_LEVEL_IRQ(irq_num) \ + testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + jz 9f ; /* edge, don't unmask */ \ + UNMASK_IRQ(irq_num) ; \ +9: + +/* + * Macros for interrupt entry, call to handler, and exit. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + movl $KDSEL,%eax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + movl $KPSEL,%eax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(FPENDING) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + movl $0, lapic+LA_EOI ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + movl $0, lapic+LA_EOI ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti + +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + UNMASK_LEVEL_IRQ(irq_num) ; \ + POP_DUMMY ; \ + ret ; \ + + /* * Slow, threaded interrupts. * @@ -151,16 +216,27 @@ IDTVEC(vec_name) ; \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti @@ -304,9 +380,16 @@ Xhardclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $1,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_hardclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -328,10 +411,18 @@ Xstatclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) + movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $2,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_statclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -494,6 +585,39 @@ MCOUNT_LABEL(bintr) INTR(29,intr29,) INTR(30,intr30,) INTR(31,intr31,) + + FAST_UNPEND(0,fastunpend0) + FAST_UNPEND(1,fastunpend1) + FAST_UNPEND(2,fastunpend2) + FAST_UNPEND(3,fastunpend3) + FAST_UNPEND(4,fastunpend4) + FAST_UNPEND(5,fastunpend5) + FAST_UNPEND(6,fastunpend6) + FAST_UNPEND(7,fastunpend7) + FAST_UNPEND(8,fastunpend8) + FAST_UNPEND(9,fastunpend9) + FAST_UNPEND(10,fastunpend10) + FAST_UNPEND(11,fastunpend11) + FAST_UNPEND(12,fastunpend12) + FAST_UNPEND(13,fastunpend13) + FAST_UNPEND(14,fastunpend14) + FAST_UNPEND(15,fastunpend15) + FAST_UNPEND(16,fastunpend16) + FAST_UNPEND(17,fastunpend17) + FAST_UNPEND(18,fastunpend18) + FAST_UNPEND(19,fastunpend19) + FAST_UNPEND(20,fastunpend20) + FAST_UNPEND(21,fastunpend21) + FAST_UNPEND(22,fastunpend22) + FAST_UNPEND(23,fastunpend23) + FAST_UNPEND(24,fastunpend24) + FAST_UNPEND(25,fastunpend25) + FAST_UNPEND(26,fastunpend26) + FAST_UNPEND(27,fastunpend27) + FAST_UNPEND(28,fastunpend28) + FAST_UNPEND(29,fastunpend29) + FAST_UNPEND(30,fastunpend30) + FAST_UNPEND(31,fastunpend31) MCOUNT_LABEL(eintr) /* diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s index 0b395e2..feb8742 100644 --- a/sys/i386/i386/exception.s +++ b/sys/i386/i386/exception.s @@ -222,6 +222,18 @@ ENTRY(fork_trampoline) pushl %esp /* trapframe pointer */ pushl %ebx /* arg1 */ pushl %esi /* function */ + movl PCPU(CURTHREAD),%ebx /* setup critnest */ + movl $1,TD_CRITNEST(%ebx) + cmpl $0,critical_mode + jne 1f + pushfl + popl TD_SAVECRIT(%ebx) + orl $PSL_I,TD_SAVECRIT(%ebx) + jmp 2f +1: + movl $-1,TD_SAVECRIT(%ebx) + sti /* enable interrupts */ +2: call fork_exit addl $12,%esp /* cut from syscall */ diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index 1fe8a2e..aedbe53 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -89,6 +89,8 @@ ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_KSE, offsetof(struct thread, td_kse)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level)); +ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest)); +ASSYM(TD_SAVECRIT, offsetof(struct thread, td_savecrit)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); @@ -134,6 +136,7 @@ ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); +ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); @@ -176,6 +179,10 @@ ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); ASSYM(PC_SIZEOF, sizeof(struct pcpu)); ASSYM(PC_PRVSPACE, offsetof(struct pcpu, pc_prvspace)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); +ASSYM(PC_INT_PENDING, offsetof(struct pcpu, pc_int_pending)); +ASSYM(PC_IPENDING, offsetof(struct pcpu, pc_ipending)); +ASSYM(PC_FPENDING, offsetof(struct pcpu, pc_fpending)); +ASSYM(PC_SPENDING, offsetof(struct pcpu, pc_spending)); ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread)); ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index bb6d420..be3c20e 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -138,6 +138,8 @@ static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *)); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) +void unpend(void); /* note: not static */ + int _udatasel, _ucodesel; u_int atdevbase; @@ -148,6 +150,9 @@ SYSCTL_INT(_debug, OID_AUTO, swtch_optim_stats, SYSCTL_INT(_debug, OID_AUTO, tlb_flush_count, CTLFLAG_RD, &tlb_flush_count, 0, ""); #endif +int critical_mode = 1; +SYSCTL_INT(_debug, OID_AUTO, critical_mode, + CTLFLAG_RW, &critical_mode, 0, ""); #ifdef PC98 static int ispc98 = 1; @@ -270,6 +275,121 @@ cpu_startup(dummy) } /* + * Critical section handling. + * + * Note that our interrupt code handles any interrupt race that occurs + * after we decrement td_critnest. + */ +void +critical_enter(void) +{ + struct thread *td = curthread; + + if (critical_mode == 0) { + if (td->td_critnest == 0) + td->td_savecrit = cpu_critical_enter(); + td->td_critnest++; + } else { + ++td->td_critnest; + } +} + +void +critical_exit(void) +{ + struct thread *td = curthread; + KASSERT(td->td_critnest > 0, ("bad td_critnest value!")); + if (--td->td_critnest == 0) { + if (td->td_savecrit != (critical_t)-1) { + cpu_critical_exit(td->td_savecrit); + td->td_savecrit = (critical_t)-1; + } else { + /* + * We may have to schedule pending interrupts. Create + * conditions similar to an interrupt context and call + * unpend(). + */ + if (PCPU_GET(int_pending) && td->td_intr_nesting_level == 0) { + critical_t eflags; + + eflags = cpu_critical_enter(); + if (PCPU_GET(int_pending)) { + ++td->td_intr_nesting_level; + unpend(); + --td->td_intr_nesting_level; + } + cpu_critical_exit(eflags); + } + } + } +} + +/* + * Called from critical_exit() or called from the assembly vector code + * to process any interrupts which may have occured while we were in + * a critical section. + * + * - interrupts must be disabled + * - td_intr_nesting_level may not be 0 + * - td_critnest must be 0 + */ +void +unpend(void) +{ + curthread->td_critnest = 1; + for (;;) { + u_int32_t mask; + + /* + * Fast interrupts have priority + */ + if ((mask = PCPU_GET(fpending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(fpending, mask & ~(1 << irq)); + call_fast_unpend(irq); + continue; + } + + /* + * Threaded interrupts come next + */ + if ((mask = PCPU_GET(ipending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(ipending, mask & ~(1 << irq)); + sched_ithd((void *)irq); + continue; + } + + /* + * Software interrupts and delayed IPIs are last + * + * XXX give the bits #defined names. see also + * isa/xxx_vector.s + */ + if ((mask = PCPU_GET(spending)) != 0) { + int irq = bsfl(mask); + PCPU_SET(spending, mask & ~(1 << irq)); + switch(irq) { + case 0: /* bit 0 - hardclock */ + mtx_lock_spin(&sched_lock); + hardclock_process(curthread, 0); + mtx_unlock_spin(&sched_lock); + break; + case 1: /* bit 1 - statclock */ + mtx_lock_spin(&sched_lock); + statclock_process(curthread->td_kse, (register_t)unpend, 0); + mtx_unlock_spin(&sched_lock); + break; + } + continue; + } + break; + } + PCPU_SET(int_pending, 0); + curthread->td_critnest = 0; +} + +/* * Send an interrupt to process. * * Stack is set up to allow sigcode stored @@ -1732,12 +1852,17 @@ init386(first) /* * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. */ mtx_init(&Giant, "Giant", MTX_DEF | MTX_RECURSE); mtx_init(&sched_lock, "sched lock", MTX_SPIN | MTX_RECURSE); mtx_init(&proc0.p_mtx, "process lock", MTX_DEF); mtx_init(&clock_lock, "clk", MTX_SPIN | MTX_RECURSE); - mtx_init(&icu_lock, "icu", MTX_SPIN); + mtx_init(&icu_lock, "icu", MTX_SPIN | MTX_NOWITNESS); mtx_lock(&Giant); /* make ldt memory segments */ diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 008dfc5..76f0d1b 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -2480,6 +2480,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2512,6 +2515,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c index d3f4d3d..9a13fb3 100644 --- a/sys/i386/i386/mpapic.c +++ b/sys/i386/i386/mpapic.c @@ -187,6 +187,7 @@ io_apic_setup_intpin(int apic, int pin) u_int32_t target; /* the window register is 32 bits */ u_int32_t vector; /* the window register is 32 bits */ int level; + critical_t crit; target = IOART_DEST; @@ -207,11 +208,13 @@ io_apic_setup_intpin(int apic, int pin) * shouldn't and stop the carnage. */ vector = NRSVIDT + pin; /* IDT vec */ + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); io_apic_write(apic, select, (io_apic_read(apic, select) & ~IOART_INTMASK & ~0xff)|IOART_INTMSET|vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); /* we only deal with vectored INTs here */ if (apic_int_type(apic, pin) != 0) @@ -255,10 +258,12 @@ io_apic_setup_intpin(int apic, int pin) printf("IOAPIC #%d intpin %d -> irq %d\n", apic, pin, irq); vector = NRSVIDT + irq; /* IDT vec */ + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); io_apic_write(apic, select, flags | vector); io_apic_write(apic, select + 1, target); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } int diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c index 008dfc5..76f0d1b 100644 --- a/sys/i386/i386/mptable.c +++ b/sys/i386/i386/mptable.c @@ -2480,6 +2480,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2512,6 +2515,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 4f2bdcb..e0f9bcd 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -96,6 +96,8 @@ ENTRY(cpu_switch) movl %esi,PCB_ESI(%edx) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) + pushfl /* PSL */ + popl PCB_PSL(%edx) /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) @@ -233,6 +235,8 @@ sw1b: movl PCB_EDI(%edx),%edi movl PCB_EIP(%edx),%eax movl %eax,(%esp) + pushl PCB_PSL(%edx) + popfl #if defined(SMP) && defined(GRAB_LOPRIO) /* Hold LOPRIO for interrupts. */ @@ -339,6 +343,8 @@ ENTRY(savectx) movl %esi,PCB_ESI(%ecx) movl %edi,PCB_EDI(%ecx) movl %gs,PCB_GS(%ecx) + pushfl + popl PCB_PSL(%ecx) #ifdef DEV_NPX /* diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c index ae56051..ee776af 100644 --- a/sys/i386/i386/tsc.c +++ b/sys/i386/i386/tsc.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + critical_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,13 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + critical_t crit; + + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1126,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } } diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 01e4b30..9af6e8f 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -193,6 +193,7 @@ cpu_fork(td1, p2, td2, flags) pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *); pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */ pcb2->pcb_eip = (int)fork_trampoline; + pcb2->pcb_psl = td2->td_frame->tf_eflags & ~PSL_I; /* ints disabled */ /*- * pcb2->pcb_dr*: cloned above. * pcb2->pcb_savefpu: cloned above. diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index 94d5c3a..b3ced94 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -52,7 +52,7 @@ __BEGIN_DECLS #define writew(va, d) (*(volatile u_int16_t *) (va) = (d)) #define writel(va, d) (*(volatile u_int32_t *) (va) = (d)) -#define CRITICAL_FORK (read_eflags() | PSL_I) +#define MACHINE_CRITICAL_ENTER /* MD code defines critical_enter/exit/fork */ #ifdef __GNUC__ diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h index 008dfc5..76f0d1b 100644 --- a/sys/i386/include/mptable.h +++ b/sys/i386/include/mptable.h @@ -2480,6 +2480,9 @@ ap_init(void) /* * For statclock, we send an IPI to all CPU's to have them call this * function. + * + * WARNING! unpend() will call statclock_process() directly and skip this + * routine. */ void forwarded_statclock(struct trapframe frame) @@ -2512,6 +2515,9 @@ forward_statclock(void) * sched_lock if we could simply peek at the CPU to determine the user/kernel * state and call hardclock_process() on the CPU receiving the clock interrupt * and then just use a simple IPI to handle any ast's if needed. + * + * WARNING! unpend() will call hardclock_process() directly and skip this + * routine. */ void forwarded_hardclock(struct trapframe frame) diff --git a/sys/i386/include/pcb.h b/sys/i386/include/pcb.h index d4a375a..9a1f338 100644 --- a/sys/i386/include/pcb.h +++ b/sys/i386/include/pcb.h @@ -69,7 +69,8 @@ struct pcb { caddr_t pcb_onfault; /* copyin/out fault recovery */ int pcb_gs; struct pcb_ext *pcb_ext; /* optional pcb extension */ - u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ + int pcb_psl; /* process status long */ + u_long __pcb_spare[2]; /* adjust to avoid core dump size changes */ }; /* diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s index e3a37e1..0325fbe 100644 --- a/sys/i386/isa/apic_vector.s +++ b/sys/i386/isa/apic_vector.s @@ -19,11 +19,19 @@ #define PUSH_FRAME \ pushl $0 ; /* dummy error code */ \ pushl $0 ; /* dummy trap type */ \ - pushal ; \ + pushal ; /* 8 ints */ \ pushl %ds ; /* save data and extra segments ... */ \ pushl %es ; \ pushl %fs +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl $0 ; /* dummy eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp ; + #define POP_FRAME \ popl %fs ; \ popl %es ; \ @@ -31,37 +39,8 @@ popal ; \ addl $4+4,%esp -/* - * Macros for interrupt entry, call to handler, and exit. - */ - -#define FAST_INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - movl $KDSEL,%eax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - movl $KPSEL,%eax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT(13*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - addl $4, %esp ; \ - movl $0, lapic+LA_EOI ; \ - lock ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4, %eax ; \ - lock ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ - jmp doreti +#define POP_DUMMY \ + addl $16*4,%esp #define IOAPICADDR(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 8 #define REDIRIDX(irq_num) CNAME(int_to_apicintpin) + 16 * (irq_num) + 12 @@ -114,9 +93,9 @@ IDTVEC(vec_name) ; \ */ #define UNMASK_IRQ(irq_num) \ ICU_LOCK ; /* into critical reg */ \ - testl $IRQ_BIT(irq_num), _apic_imen ; \ + testl $IRQ_BIT(irq_num), apic_imen ; \ je 7f ; /* bit clear, not masked */ \ - andl $~IRQ_BIT(irq_num), _apic_imen ;/* clear mask bit */ \ + andl $~IRQ_BIT(irq_num), apic_imen ;/* clear mask bit */ \ movl IOAPICADDR(irq_num), %ecx ; /* ioapic addr */ \ movl REDIRIDX(irq_num), %eax ; /* get the index */ \ movl %eax, (%ecx) ; /* write the index */ \ @@ -126,6 +105,92 @@ IDTVEC(vec_name) ; \ 7: ; /* already unmasked */ \ ICU_UNLOCK +/* + * Test to see whether we are handling an edge or level triggered INT. + * Level-triggered INTs have to be unmasked. + */ +#define UNMASK_LEVEL_IRQ(irq_num) \ + testl $IRQ_BIT(irq_num), apic_pin_trigger ; \ + jz 9f ; /* edge, don't unmask */ \ + UNMASK_IRQ(irq_num) ; \ +9: + +/* + * Macros for interrupt entry, call to handler, and exit. + */ + +#define FAST_INTR(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + movl $KDSEL,%eax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + movl $KPSEL,%eax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT(13*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(FPENDING) ; \ + MASK_LEVEL_IRQ(irq_num) ; \ + movl $0, lapic+LA_EOI ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + movl $0, lapic+LA_EOI ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti + +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + lock ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4, %eax ; \ + lock ; \ + incl (%eax) ; \ + UNMASK_LEVEL_IRQ(irq_num) ; \ + POP_DUMMY ; \ + ret ; \ + + /* * Slow, threaded interrupts. * @@ -151,16 +216,27 @@ IDTVEC(vec_name) ; \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ -0: ; \ +; \ movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_BIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ incl TD_INTR_NESTING_LEVEL(%ebx) ; \ ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ pushl $irq_num; /* pass the IRQ */ \ call sched_ithd ; \ addl $4, %esp ; /* discard the parameter */ \ ; \ decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ MEXITCOUNT ; \ jmp doreti @@ -304,9 +380,16 @@ Xhardclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $1,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_hardclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -328,10 +411,18 @@ Xstatclock: movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ FAKE_MCOUNT(13*4(%esp)) + movl PCPU(CURTHREAD),%ebx + cmpl $0,TD_CRITNEST(%ebx) + je 1f + movl $1,PCPU(INT_PENDING) + orl $2,PCPU(SPENDING); + jmp 10f +1: incl TD_INTR_NESTING_LEVEL(%ebx) call forwarded_statclock decl TD_INTR_NESTING_LEVEL(%ebx) +10: MEXITCOUNT jmp doreti @@ -494,6 +585,39 @@ MCOUNT_LABEL(bintr) INTR(29,intr29,) INTR(30,intr30,) INTR(31,intr31,) + + FAST_UNPEND(0,fastunpend0) + FAST_UNPEND(1,fastunpend1) + FAST_UNPEND(2,fastunpend2) + FAST_UNPEND(3,fastunpend3) + FAST_UNPEND(4,fastunpend4) + FAST_UNPEND(5,fastunpend5) + FAST_UNPEND(6,fastunpend6) + FAST_UNPEND(7,fastunpend7) + FAST_UNPEND(8,fastunpend8) + FAST_UNPEND(9,fastunpend9) + FAST_UNPEND(10,fastunpend10) + FAST_UNPEND(11,fastunpend11) + FAST_UNPEND(12,fastunpend12) + FAST_UNPEND(13,fastunpend13) + FAST_UNPEND(14,fastunpend14) + FAST_UNPEND(15,fastunpend15) + FAST_UNPEND(16,fastunpend16) + FAST_UNPEND(17,fastunpend17) + FAST_UNPEND(18,fastunpend18) + FAST_UNPEND(19,fastunpend19) + FAST_UNPEND(20,fastunpend20) + FAST_UNPEND(21,fastunpend21) + FAST_UNPEND(22,fastunpend22) + FAST_UNPEND(23,fastunpend23) + FAST_UNPEND(24,fastunpend24) + FAST_UNPEND(25,fastunpend25) + FAST_UNPEND(26,fastunpend26) + FAST_UNPEND(27,fastunpend27) + FAST_UNPEND(28,fastunpend28) + FAST_UNPEND(29,fastunpend29) + FAST_UNPEND(30,fastunpend30) + FAST_UNPEND(31,fastunpend31) MCOUNT_LABEL(eintr) /* diff --git a/sys/i386/isa/atpic_vector.s b/sys/i386/isa/atpic_vector.s index 4e10cc2..3411c06 100644 --- a/sys/i386/isa/atpic_vector.s +++ b/sys/i386/isa/atpic_vector.s @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else -#define ENABLE_ICU1 \ - movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ + +#define ENABLE_ICU1 \ + movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ -#define OUTB_ICU1 \ + +#define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,124 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else -#define ENABLE_ICU1_AND_2 \ - movb $ICU_EOI,%al ; /* as above */ \ - outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ + +#define ENABLE_ICU1_AND_2 \ + movb $ICU_EOI,%al ; /* as above */ \ + outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl $0 ; /* dummy eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ - addl $4,%esp ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; \ + addl $4,%esp ; \ + enable_icus ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +167,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - enable_icus ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +; \ + maybe_extra_ipending ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ +; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - pushl $irq_num; /* pass the IRQ */ \ - call sched_ithd ; \ - addl $4, %esp ; /* discard the parameter */ \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ + pushl $irq_num; /* pass the IRQ */ \ + call sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ +; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/i386/isa/clock.c b/sys/i386/isa/clock.c index ae56051..ee776af 100644 --- a/sys/i386/isa/clock.c +++ b/sys/i386/isa/clock.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + critical_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,13 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + critical_t crit; + + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1126,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } } diff --git a/sys/i386/isa/icu_vector.s b/sys/i386/isa/icu_vector.s index 4e10cc2..3411c06 100644 --- a/sys/i386/isa/icu_vector.s +++ b/sys/i386/isa/icu_vector.s @@ -16,17 +16,23 @@ #define ICU_EOI 0x20 /* XXX - define elsewhere */ #define IRQ_BIT(irq_num) (1 << ((irq_num) % 8)) +#define IRQ_LBIT(irq_num) (1 << (irq_num)) #define IRQ_BYTE(irq_num) ((irq_num) >> 3) #ifdef AUTO_EOI_1 + #define ENABLE_ICU1 /* use auto-EOI to reduce i/o */ #define OUTB_ICU1 + #else -#define ENABLE_ICU1 \ - movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ + +#define ENABLE_ICU1 \ + movb $ICU_EOI,%al ; /* as soon as possible send EOI ... */ \ OUTB_ICU1 /* ... to clear in service bit */ -#define OUTB_ICU1 \ + +#define OUTB_ICU1 \ outb %al,$IO_ICU1 + #endif #ifdef AUTO_EOI_2 @@ -34,48 +40,124 @@ * The data sheet says no auto-EOI on slave, but it sometimes works. */ #define ENABLE_ICU1_AND_2 ENABLE_ICU1 + #else -#define ENABLE_ICU1_AND_2 \ - movb $ICU_EOI,%al ; /* as above */ \ - outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ + +#define ENABLE_ICU1_AND_2 \ + movb $ICU_EOI,%al ; /* as above */ \ + outb %al,$IO_ICU2 ; /* but do second icu first ... */ \ OUTB_ICU1 /* ... then first icu (if !AUTO_EOI_1) */ + #endif +#define PUSH_FRAME \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + pushal ; /* 8 ints */ \ + pushl %ds ; /* save data and extra segments ... */ \ + pushl %es ; \ + pushl %fs + +#define PUSH_DUMMY \ + pushfl ; /* eflags */ \ + pushl %cs ; /* cs */ \ + pushl $0 ; /* dummy eip */ \ + pushl $0 ; /* dummy error code */ \ + pushl $0 ; /* dummy trap type */ \ + subl $11*4,%esp + +#define POP_FRAME \ + popl %fs ; \ + popl %es ; \ + popl %ds ; \ + popal ; \ + addl $4+4,%esp + +#define POP_DUMMY \ + addl $16*4,%esp + +#define MASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + orb $IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET + +#define UNMASK_IRQ(icu, irq_num) \ + movb imen + IRQ_BYTE(irq_num),%al ; \ + andb $~IRQ_BIT(irq_num),%al ; \ + movb %al,imen + IRQ_BYTE(irq_num) ; \ + outb %al,$icu+ICU_IMR_OFFSET /* * Macros for interrupt interrupt entry, call to handler, and exit. */ -#define FAST_INTR(irq_num, vec_name, enable_icus) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ - call critical_enter ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ - pushl intr_unit + (irq_num) * 4 ; \ - call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ - enable_icus ; /* (re)enable ASAP (helps edge trigger?) */ \ - addl $4,%esp ; \ - incl cnt+V_INTR ; /* book-keeping can wait */ \ - movl intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - call critical_exit ; \ - MEXITCOUNT ; \ +#define FAST_INTR(irq_num, vec_name, icu, enable_icus) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ + FAKE_MCOUNT((12+ACTUALLY_PUSHED)*4(%esp)) ; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ +; \ + movl $1,PCPU(INT_PENDING) ; \ + orl $IRQ_LBIT(irq_num),PCPU(FPENDING) ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ + jmp 10f ; \ +1: ; \ + incl TD_CRITNEST(%ebx) ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; \ + addl $4,%esp ; \ + enable_icus ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + decl TD_CRITNEST(%ebx) ; \ + cmpl $0,PCPU(INT_PENDING) ; \ + je 2f ; \ +; \ + call unpend ; \ +2: ; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ jmp doreti +/* + * Restart a fast interrupt that was held up by a critical section. + * This routine is called from unpend(). unpend() ensures we are + * in a critical section and deals with the interrupt nesting level + * for us. If we previously masked the irq, we have to unmask it. + * + * We have a choice. We can regenerate the irq using the 'int' + * instruction or we can create a dummy frame and call the interrupt + * handler directly. I've chosen to use the dummy-frame method. + */ +#define FAST_UNPEND(irq_num, vec_name, icu) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ +; \ + PUSH_DUMMY ; \ + pushl intr_unit + (irq_num) * 4 ; \ + call *intr_handler + (irq_num) * 4 ; /* do the work ASAP */ \ + addl $4, %esp ; \ + incl cnt+V_INTR ; /* book-keeping can wait */ \ + movl intr_countp + (irq_num) * 4,%eax ; \ + incl (%eax) ; \ + UNMASK_IRQ(icu, irq_num) ; \ + POP_DUMMY ; \ + ret + /* * Slow, threaded interrupts. * @@ -85,74 +167,96 @@ IDTVEC(vec_name) ; \ * interrupt handler and don't run anything. We could just do an * iret. FIXME. */ -#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name) ; \ - pushl $0 ; /* dummy error code */ \ - pushl $0 ; /* dummy trap type */ \ - pushal ; \ - pushl %ds ; /* save our data and extra segments ... */ \ - pushl %es ; \ - pushl %fs ; \ - mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ - mov %ax,%ds ; \ - mov %ax,%es ; \ - mov $KPSEL,%ax ; \ - mov %ax,%fs ; \ - maybe_extra_ipending ; \ - movb imen + IRQ_BYTE(irq_num),%al ; \ - orb $IRQ_BIT(irq_num),%al ; \ - movb %al,imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - enable_icus ; \ - movl PCPU(CURTHREAD),%ebx ; \ - incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +#define INTR(irq_num, vec_name, icu, enable_icus, maybe_extra_ipending) \ + .text ; \ + SUPERALIGN_TEXT ; \ +IDTVEC(vec_name) ; \ + PUSH_FRAME ; \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ + mov %ax,%es ; \ + mov $KPSEL,%ax ; \ + mov %ax,%fs ; \ +; \ + maybe_extra_ipending ; \ + MASK_IRQ(icu, irq_num) ; \ + enable_icus ; \ +; \ + movl PCPU(CURTHREAD),%ebx ; \ + cmpl $0,TD_CRITNEST(%ebx) ; \ + je 1f ; \ + movl $1,PCPU(INT_PENDING); \ + orl $IRQ_LBIT(irq_num),PCPU(IPENDING) ; \ + jmp 10f ; \ +1: ; \ + incl TD_INTR_NESTING_LEVEL(%ebx) ; \ +; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - pushl $irq_num; /* pass the IRQ */ \ - call sched_ithd ; \ - addl $4, %esp ; /* discard the parameter */ \ - decl TD_INTR_NESTING_LEVEL(%ebx) ; \ - MEXITCOUNT ; \ - /* We could usually avoid the following jmp by inlining some of */ \ - /* doreti, but it's probably better to use less cache. */ \ - jmp doreti /* and catch up inside doreti */ + cmpl $0,PCPU(INT_PENDING) ; \ + je 9f ; \ + call unpend ; \ +9: ; \ + pushl $irq_num; /* pass the IRQ */ \ + call sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ +; \ + decl TD_INTR_NESTING_LEVEL(%ebx) ; \ +10: ; \ + MEXITCOUNT ; \ + jmp doreti MCOUNT_LABEL(bintr) - FAST_INTR(0,fastintr0, ENABLE_ICU1) - FAST_INTR(1,fastintr1, ENABLE_ICU1) - FAST_INTR(2,fastintr2, ENABLE_ICU1) - FAST_INTR(3,fastintr3, ENABLE_ICU1) - FAST_INTR(4,fastintr4, ENABLE_ICU1) - FAST_INTR(5,fastintr5, ENABLE_ICU1) - FAST_INTR(6,fastintr6, ENABLE_ICU1) - FAST_INTR(7,fastintr7, ENABLE_ICU1) - FAST_INTR(8,fastintr8, ENABLE_ICU1_AND_2) - FAST_INTR(9,fastintr9, ENABLE_ICU1_AND_2) - FAST_INTR(10,fastintr10, ENABLE_ICU1_AND_2) - FAST_INTR(11,fastintr11, ENABLE_ICU1_AND_2) - FAST_INTR(12,fastintr12, ENABLE_ICU1_AND_2) - FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) - FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) - FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + FAST_INTR(0,fastintr0, IO_ICU1, ENABLE_ICU1) + FAST_INTR(1,fastintr1, IO_ICU1, ENABLE_ICU1) + FAST_INTR(2,fastintr2, IO_ICU1, ENABLE_ICU1) + FAST_INTR(3,fastintr3, IO_ICU1, ENABLE_ICU1) + FAST_INTR(4,fastintr4, IO_ICU1, ENABLE_ICU1) + FAST_INTR(5,fastintr5, IO_ICU1, ENABLE_ICU1) + FAST_INTR(6,fastintr6, IO_ICU1, ENABLE_ICU1) + FAST_INTR(7,fastintr7, IO_ICU1, ENABLE_ICU1) + FAST_INTR(8,fastintr8, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(9,fastintr9, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(10,fastintr10, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(11,fastintr11, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(12,fastintr12, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(13,fastintr13, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(14,fastintr14, IO_ICU2, ENABLE_ICU1_AND_2) + FAST_INTR(15,fastintr15, IO_ICU2, ENABLE_ICU1_AND_2) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) /* Threaded interrupts */ - INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) - INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) - INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) - INTR(3,intr3, IO_ICU1, ENABLE_ICU1, al,) - INTR(4,intr4, IO_ICU1, ENABLE_ICU1, al,) - INTR(5,intr5, IO_ICU1, ENABLE_ICU1, al,) - INTR(6,intr6, IO_ICU1, ENABLE_ICU1, al,) - INTR(7,intr7, IO_ICU1, ENABLE_ICU1, al,) - INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) - INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + INTR(0,intr0, IO_ICU1, ENABLE_ICU1, CLKINTR_PENDING) + INTR(1,intr1, IO_ICU1, ENABLE_ICU1,) + INTR(2,intr2, IO_ICU1, ENABLE_ICU1,) + INTR(3,intr3, IO_ICU1, ENABLE_ICU1,) + INTR(4,intr4, IO_ICU1, ENABLE_ICU1,) + INTR(5,intr5, IO_ICU1, ENABLE_ICU1,) + INTR(6,intr6, IO_ICU1, ENABLE_ICU1,) + INTR(7,intr7, IO_ICU1, ENABLE_ICU1,) + INTR(8,intr8, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(9,intr9, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(10,intr10, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(11,intr11, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(12,intr12, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2,) + INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2,) + FAST_UNPEND(0,fastunpend0, IO_ICU1) + FAST_UNPEND(1,fastunpend1, IO_ICU1) + FAST_UNPEND(2,fastunpend2, IO_ICU1) + FAST_UNPEND(3,fastunpend3, IO_ICU1) + FAST_UNPEND(4,fastunpend4, IO_ICU1) + FAST_UNPEND(5,fastunpend5, IO_ICU1) + FAST_UNPEND(6,fastunpend6, IO_ICU1) + FAST_UNPEND(7,fastunpend7, IO_ICU1) + FAST_UNPEND(8,fastunpend8, IO_ICU2) + FAST_UNPEND(9,fastunpend9, IO_ICU2) + FAST_UNPEND(10,fastunpend10, IO_ICU2) + FAST_UNPEND(11,fastunpend11, IO_ICU2) + FAST_UNPEND(12,fastunpend12, IO_ICU2) + FAST_UNPEND(13,fastunpend13, IO_ICU2) + FAST_UNPEND(14,fastunpend14, IO_ICU2) + FAST_UNPEND(15,fastunpend15, IO_ICU2) MCOUNT_LABEL(eintr) + diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c index 92bf581..616e8c3 100644 --- a/sys/i386/isa/intr_machdep.c +++ b/sys/i386/isa/intr_machdep.c @@ -117,6 +117,27 @@ static inthand_t *fastintr[ICU_LEN] = { #endif /* APIC_IO */ }; +static unpendhand_t *fastunpend[ICU_LEN] = { + &IDTVEC(fastunpend0), &IDTVEC(fastunpend1), + &IDTVEC(fastunpend2), &IDTVEC(fastunpend3), + &IDTVEC(fastunpend4), &IDTVEC(fastunpend5), + &IDTVEC(fastunpend6), &IDTVEC(fastunpend7), + &IDTVEC(fastunpend8), &IDTVEC(fastunpend9), + &IDTVEC(fastunpend10), &IDTVEC(fastunpend11), + &IDTVEC(fastunpend12), &IDTVEC(fastunpend13), + &IDTVEC(fastunpend14), &IDTVEC(fastunpend15), +#if defined(APIC_IO) + &IDTVEC(fastunpend16), &IDTVEC(fastunpend17), + &IDTVEC(fastunpend18), &IDTVEC(fastunpend19), + &IDTVEC(fastunpend20), &IDTVEC(fastunpend21), + &IDTVEC(fastunpend22), &IDTVEC(fastunpend23), + &IDTVEC(fastunpend24), &IDTVEC(fastunpend25), + &IDTVEC(fastunpend26), &IDTVEC(fastunpend27), + &IDTVEC(fastunpend28), &IDTVEC(fastunpend29), + &IDTVEC(fastunpend30), &IDTVEC(fastunpend31), +#endif /* APIC_IO */ +}; + static inthand_t *slowintr[ICU_LEN] = { &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3), &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7), @@ -291,13 +312,16 @@ isa_nmi(cd) void icu_reinit() { int i; + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); init_i8259(); for(i=0;i<ICU_LEN;i++) if(intr_handler[i] != isa_strayintr) INTREN(1<<i); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } /* @@ -309,13 +333,16 @@ void isa_defaultirq() { int i; + critical_t crit; /* icu vectors */ for (i = 0; i < ICU_LEN; i++) icu_unset(i, (driver_intr_t *)NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); init_i8259(); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } @@ -476,6 +503,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) int vector; u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ + critical_t crit; #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -488,6 +516,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) return (EBUSY); #endif + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; @@ -522,6 +551,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) #endif /* FAST_HI */ INTREN(1 << intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); return (0); } @@ -535,10 +565,12 @@ icu_unset(intr, handler) int intr; driver_intr_t *handler; { + critical_t crit; if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) return (EINVAL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << intr); intr_countp[intr] = &intrcnt[1 + intr]; @@ -556,6 +588,7 @@ icu_unset(intr, handler) GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); return (0); } @@ -570,19 +603,25 @@ SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); static void ithread_enable(int vector) { + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } static void ithread_disable(int vector) { + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } int @@ -664,3 +703,10 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } + +void +call_fast_unpend(int irq) +{ + fastunpend[irq](); +} + diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h index 789b02b..21d5a93 100644 --- a/sys/i386/isa/intr_machdep.h +++ b/sys/i386/isa/intr_machdep.h @@ -144,6 +144,7 @@ * Type of the first (asm) part of an interrupt handler. */ typedef void inthand_t __P((u_int cs, u_int ef, u_int esp, u_int ss)); +typedef void unpendhand_t __P((void)); #define IDTVEC(name) __CONCAT(X,name) @@ -167,6 +168,18 @@ inthand_t IDTVEC(intr4), IDTVEC(intr5), IDTVEC(intr6), IDTVEC(intr7), IDTVEC(intr8), IDTVEC(intr9), IDTVEC(intr10), IDTVEC(intr11), IDTVEC(intr12), IDTVEC(intr13), IDTVEC(intr14), IDTVEC(intr15); +unpendhand_t + IDTVEC(fastunpend0), IDTVEC(fastunpend1), IDTVEC(fastunpend2), + IDTVEC(fastunpend3), IDTVEC(fastunpend4), IDTVEC(fastunpend5), + IDTVEC(fastunpend6), IDTVEC(fastunpend7), IDTVEC(fastunpend8), + IDTVEC(fastunpend9), IDTVEC(fastunpend10), IDTVEC(fastunpend11), + IDTVEC(fastunpend12), IDTVEC(fastunpend13), IDTVEC(fastunpend14), + IDTVEC(fastunpend15), IDTVEC(fastunpend16), IDTVEC(fastunpend17), + IDTVEC(fastunpend18), IDTVEC(fastunpend19), IDTVEC(fastunpend20), + IDTVEC(fastunpend21), IDTVEC(fastunpend22), IDTVEC(fastunpend23), + IDTVEC(fastunpend24), IDTVEC(fastunpend25), IDTVEC(fastunpend26), + IDTVEC(fastunpend27), IDTVEC(fastunpend28), IDTVEC(fastunpend29), + IDTVEC(fastunpend30), IDTVEC(fastunpend31); #if defined(SMP) || defined(APIC_IO) inthand_t @@ -234,6 +247,7 @@ int inthand_add(const char *name, int irq, driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep); int inthand_remove(void *cookie); void sched_ithd(void *dummy); +void call_fast_unpend(int irq); #endif /* LOCORE */ diff --git a/sys/i386/isa/nmi.c b/sys/i386/isa/nmi.c index 92bf581..616e8c3 100644 --- a/sys/i386/isa/nmi.c +++ b/sys/i386/isa/nmi.c @@ -117,6 +117,27 @@ static inthand_t *fastintr[ICU_LEN] = { #endif /* APIC_IO */ }; +static unpendhand_t *fastunpend[ICU_LEN] = { + &IDTVEC(fastunpend0), &IDTVEC(fastunpend1), + &IDTVEC(fastunpend2), &IDTVEC(fastunpend3), + &IDTVEC(fastunpend4), &IDTVEC(fastunpend5), + &IDTVEC(fastunpend6), &IDTVEC(fastunpend7), + &IDTVEC(fastunpend8), &IDTVEC(fastunpend9), + &IDTVEC(fastunpend10), &IDTVEC(fastunpend11), + &IDTVEC(fastunpend12), &IDTVEC(fastunpend13), + &IDTVEC(fastunpend14), &IDTVEC(fastunpend15), +#if defined(APIC_IO) + &IDTVEC(fastunpend16), &IDTVEC(fastunpend17), + &IDTVEC(fastunpend18), &IDTVEC(fastunpend19), + &IDTVEC(fastunpend20), &IDTVEC(fastunpend21), + &IDTVEC(fastunpend22), &IDTVEC(fastunpend23), + &IDTVEC(fastunpend24), &IDTVEC(fastunpend25), + &IDTVEC(fastunpend26), &IDTVEC(fastunpend27), + &IDTVEC(fastunpend28), &IDTVEC(fastunpend29), + &IDTVEC(fastunpend30), &IDTVEC(fastunpend31), +#endif /* APIC_IO */ +}; + static inthand_t *slowintr[ICU_LEN] = { &IDTVEC(intr0), &IDTVEC(intr1), &IDTVEC(intr2), &IDTVEC(intr3), &IDTVEC(intr4), &IDTVEC(intr5), &IDTVEC(intr6), &IDTVEC(intr7), @@ -291,13 +312,16 @@ isa_nmi(cd) void icu_reinit() { int i; + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); init_i8259(); for(i=0;i<ICU_LEN;i++) if(intr_handler[i] != isa_strayintr) INTREN(1<<i); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } /* @@ -309,13 +333,16 @@ void isa_defaultirq() { int i; + critical_t crit; /* icu vectors */ for (i = 0; i < ICU_LEN; i++) icu_unset(i, (driver_intr_t *)NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); init_i8259(); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } @@ -476,6 +503,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) int vector; u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ + critical_t crit; #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -488,6 +516,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) return (EBUSY); #endif + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); intr_handler[intr] = handler; intr_unit[intr] = arg; @@ -522,6 +551,7 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags) #endif /* FAST_HI */ INTREN(1 << intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); return (0); } @@ -535,10 +565,12 @@ icu_unset(intr, handler) int intr; driver_intr_t *handler; { + critical_t crit; if ((u_int)intr >= ICU_LEN || handler != intr_handler[intr]) return (EINVAL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << intr); intr_countp[intr] = &intrcnt[1 + intr]; @@ -556,6 +588,7 @@ icu_unset(intr, handler) GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); return (0); } @@ -570,19 +603,25 @@ SYSINIT(ithds_init, SI_SUB_INTR, SI_ORDER_SECOND, ithds_init, NULL); static void ithread_enable(int vector) { + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } static void ithread_disable(int vector) { + critical_t crit; + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << vector); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } int @@ -664,3 +703,10 @@ inthand_remove(void *cookie) return (ithread_remove_handler(cookie)); } + +void +call_fast_unpend(int irq) +{ + fastunpend[irq](); +} + diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index 07bd2f5..abccf06 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -429,9 +429,15 @@ no_irq13: * XXX hack around brokenness of bus_teardown_intr(). If we left the * irq active then we would get it instead of exception 16. */ - mtx_lock_spin(&icu_lock); - INTRDIS(1 << irq_num); - mtx_unlock_spin(&icu_lock); + { + critical_t crit; + + crit = cpu_critical_enter(); + mtx_lock_spin(&icu_lock); + INTRDIS(1 << irq_num); + mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); + } bus_release_resource(dev, SYS_RES_IRQ, irq_rid, irq_res); bus_release_resource(dev, SYS_RES_IOPORT, ioport_rid, ioport_res); diff --git a/sys/isa/atrtc.c b/sys/isa/atrtc.c index ae56051..ee776af 100644 --- a/sys/isa/atrtc.c +++ b/sys/isa/atrtc.c @@ -995,6 +995,7 @@ cpu_initclocks() int apic_8254_trial; void *clkdesc; #endif /* APIC_IO */ + critical_t crit; if (statclock_disable) { /* @@ -1029,9 +1030,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, &clkdesc); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #else /* APIC_IO */ @@ -1042,9 +1045,11 @@ cpu_initclocks() */ inthand_add("clk", 0, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(IRQ0); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); #endif /* APIC_IO */ @@ -1067,6 +1072,7 @@ cpu_initclocks() inthand_add("rtc", 8, (driver_intr_t *)rtcintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); #ifdef APIC_IO INTREN(APIC_IRQ8); @@ -1074,6 +1080,7 @@ cpu_initclocks() INTREN(IRQ8); #endif /* APIC_IO */ mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); writertc(RTC_STATUSB, rtc_statusb); @@ -1090,9 +1097,13 @@ cpu_initclocks() * on the IO APIC. * Workaround: Limited variant of mixed mode. */ + critical_t crit; + + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTRDIS(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " @@ -1115,9 +1126,11 @@ cpu_initclocks() inthand_add("clk", apic_8254_intr, (driver_intr_t *)clkintr, NULL, INTR_TYPE_CLK | INTR_FAST, NULL); + crit = cpu_critical_enter(); mtx_lock_spin(&icu_lock); INTREN(1 << apic_8254_intr); mtx_unlock_spin(&icu_lock); + cpu_critical_exit(crit); } } diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 69071a1..d13442c 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -777,12 +777,19 @@ fork_exit(callout, arg, frame) td->td_kse->ke_oncpu = PCPU_GET(cpuid); /* - * Setup the sched_lock state so that we can release it. + * Setup the sched_lock state so that we can release it. If + * MACHINE_CRITICAL_ENTER is set by the MD architecture, the + * trampoline returns with the critical section pre-set. + * XXX note: all architectures should do this, because this code + * improperly assumes that a critical section == hard interrupt + * disablement on entry, which is not necessarily true. */ sched_lock.mtx_lock = (uintptr_t)td; sched_lock.mtx_recurse = 0; +#ifndef MACHINE_CRITICAL_ENTER td->td_critnest = 1; td->td_savecrit = CRITICAL_FORK; +#endif CTR3(KTR_PROC, "fork_exit: new proc %p (pid %d, %s)", p, p->p_pid, p->p_comm); if (PCPU_GET(switchtime.sec) == 0) diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c index bd9c4bf..44c9e5d 100644 --- a/sys/kern/kern_switch.c +++ b/sys/kern/kern_switch.c @@ -69,6 +69,11 @@ setrunqueue(struct thread *td) runq_add(&runq, td->td_kse); } +/* + * XXX temporary until these routines are moved fully into MD areas + */ +#ifndef MACHINE_CRITICAL_ENTER + /* Critical sections that prevent preemption. */ void critical_enter(void) @@ -94,6 +99,8 @@ critical_exit(void) td->td_critnest--; } +#endif + /* * Clear the status bit of the queue corresponding to priority level pri, * indicating that it is empty. diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h index ed72487..dda4b8a 100644 --- a/sys/sys/pcpu.h +++ b/sys/sys/pcpu.h @@ -57,6 +57,10 @@ struct pcpu { u_int pc_cpuid; /* This cpu number */ u_int pc_cpumask; /* This cpu mask */ u_int pc_other_cpus; /* Mask of all other cpus */ + u_int32_t pc_int_pending; /* master int pending flag */ + u_int32_t pc_ipending; /* pending slow interrupts */ + u_int32_t pc_fpending; /* pending fast interrupts */ + u_int32_t pc_spending; /* pending soft interrupts */ SLIST_ENTRY(pcpu) pc_allcpu; struct lock_list_entry *pc_spinlocks; #ifdef KTR_PERCPU |