From 769e0f974d8929599ba599ac496510fffc90ff34 Mon Sep 17 00:00:00 2001 From: jasone Date: Thu, 7 Sep 2000 01:33:02 +0000 Subject: Major update to the way synchronization is done in the kernel. Highlights include: * Mutual exclusion is used instead of spl*(). See mutex(9). (Note: The alpha port is still in transition and currently uses both.) * Per-CPU idle processes. * Interrupts are run in their own separate kernel threads and can be preempted (i386 only). Partially contributed by: BSDi (BSD/OS) Submissions by (at least): cp, dfr, dillon, grog, jake, jhb, sheldonh --- sys/amd64/amd64/amd64-gdbstub.c | 10 +- sys/amd64/amd64/apic_vector.S | 132 ++----- sys/amd64/amd64/autoconf.c | 8 - sys/amd64/amd64/cpu_switch.S | 269 ++++---------- sys/amd64/amd64/exception.S | 41 +-- sys/amd64/amd64/exception.s | 41 +-- sys/amd64/amd64/fpu.c | 18 +- sys/amd64/amd64/genassym.c | 27 +- sys/amd64/amd64/identcpu.c | 3 + sys/amd64/amd64/initcpu.c | 6 +- sys/amd64/amd64/legacy.c | 32 +- sys/amd64/amd64/locore.S | 3 - sys/amd64/amd64/locore.s | 3 - sys/amd64/amd64/machdep.c | 37 +- sys/amd64/amd64/mp_machdep.c | 88 +++-- sys/amd64/amd64/mpboot.S | 36 +- sys/amd64/amd64/mptable.c | 88 +++-- sys/amd64/amd64/nexus.c | 32 +- sys/amd64/amd64/pmap.c | 2 +- sys/amd64/amd64/swtch.s | 269 ++++---------- sys/amd64/amd64/trap.c | 391 ++++++++++---------- sys/amd64/amd64/tsc.c | 155 +++++--- sys/amd64/amd64/vm_machdep.c | 51 +-- sys/amd64/include/cpu.h | 12 +- sys/amd64/include/cpufunc.h | 21 +- sys/amd64/include/mptable.h | 88 +++-- sys/amd64/include/mutex.h | 786 ++++++++++++++++++++++++++++++++++++++++ sys/amd64/include/pcb.h | 6 +- sys/amd64/include/pcpu.h | 33 ++ sys/amd64/include/smp.h | 38 +- sys/amd64/isa/atpic_vector.S | 92 ++--- sys/amd64/isa/clock.c | 155 +++++--- sys/amd64/isa/icu_ipl.S | 57 --- sys/amd64/isa/icu_ipl.s | 57 --- sys/amd64/isa/icu_vector.S | 92 ++--- sys/amd64/isa/icu_vector.s | 92 ++--- sys/amd64/isa/intr_machdep.c | 524 ++++++++------------------- sys/amd64/isa/intr_machdep.h | 50 ++- sys/amd64/isa/ithread.c | 353 ++++++++++++++++++ sys/amd64/isa/nmi.c | 524 ++++++++------------------- sys/amd64/isa/npx.c | 18 +- sys/amd64/isa/vector.S | 9 +- sys/amd64/isa/vector.s | 9 +- 43 files changed, 2597 insertions(+), 2161 deletions(-) create mode 100644 sys/amd64/include/mutex.h create mode 100644 sys/amd64/isa/ithread.c (limited to 'sys/amd64') diff --git a/sys/amd64/amd64/amd64-gdbstub.c b/sys/amd64/amd64/amd64-gdbstub.c index 986b8d4..b442a37 100644 --- a/sys/amd64/amd64/amd64-gdbstub.c +++ b/sys/amd64/amd64/amd64-gdbstub.c @@ -188,7 +188,8 @@ getpacket (char *buffer) unsigned char ch; int s; - s = spltty (); + s = read_eflags(); + disable_intr(); do { /* wait around for the start character, ignore all other characters */ @@ -239,7 +240,7 @@ getpacket (char *buffer) } } while (checksum != xmitcsum); - splx (s); + write_eflags(s); } /* send the packet in buffer. */ @@ -253,7 +254,8 @@ putpacket (char *buffer) int s; /* $#. */ - s = spltty (); + s = read_eflags(); + disable_intr(); do { /* @@ -285,7 +287,7 @@ putpacket (char *buffer) putDebugChar (hexchars[checksum & 0xf]); } while ((getDebugChar () & 0x7f) != '+'); - splx (s); + write_eflags(s); } static char remcomInBuffer[BUFMAX]; diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S index 2a7559d..54bf003 100644 --- a/sys/amd64/amd64/apic_vector.S +++ b/sys/amd64/amd64/apic_vector.S @@ -17,7 +17,7 @@ /* - * Macros for interrupt interrupt entry, call to handler, and exit. + * Macros for interrupt entry, call to handler, and exit. */ #define FAST_INTR(irq_num, vec_name) \ @@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \ /* - * Test to see if the source is currntly masked, clear if so. + * Test to see if the source is currently masked, clear if so. */ #define UNMASK_IRQ(irq_num) \ IMASK_LOCK ; /* into critical reg */ \ @@ -200,7 +200,16 @@ log_intr_event: #else #define APIC_ITRACE(name, irq_num, id) #endif - + +/* + * Slow, threaded interrupts. + * + * XXX Most of the parameters here are obsolete. Fix this when we're + * done. + * XXX we really shouldn't return via doreti if we just schedule the + * interrupt handler and don't run anything. We could just do an + * iret. FIXME. + */ #define INTR(irq_num, vec_name, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \ maybe_extra_ipending ; \ ; \ APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \ - lock ; /* MP-safe */ \ - btsl $(irq_num), iactive ; /* lazy masking */ \ - jc 1f ; /* already active */ \ ; \ MASK_LEVEL_IRQ(irq_num) ; \ EOI_IRQ(irq_num) ; \ 0: ; \ - APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\ - MP_TRYLOCK ; /* XXX this is going away... */ \ - testl %eax, %eax ; /* did we get it? */ \ - jz 3f ; /* no */ \ -; \ - APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 2f ; /* this INT masked */ \ -; \ incb _intr_nesting_level ; \ ; \ /* entry point used by doreti_unpend for HWIs. */ \ __CONCAT(Xresume,irq_num): ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \ - lock ; incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4, %eax ; \ - lock ; incl (%eax) ; \ -; \ - movl _cpl, %eax ; \ - pushl %eax ; \ - orl _intr_mask + (irq_num) * 4, %eax ; \ - movl %eax, _cpl ; \ - lock ; \ - andl $~IRQ_BIT(irq_num), _ipending ; \ -; \ - pushl _intr_unit + (irq_num) * 4 ; \ + pushl $irq_num; /* pass the IRQ */ \ APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; \ + call _sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \ ; \ - lock ; andl $~IRQ_BIT(irq_num), iactive ; \ - UNMASK_IRQ(irq_num) ; \ - APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \ - sti ; /* doreti repeats cli/sti */ \ MEXITCOUNT ; \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -1: ; /* active */ \ - APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \ - MASK_IRQ(irq_num) ; \ - EOI_IRQ(irq_num) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - lock ; \ - btsl $(irq_num), iactive ; /* still active */ \ - jnc 0b ; /* retry */ \ - POP_FRAME ; \ - iret ; /* XXX: iactive bit might be 0 now */ \ - ALIGN_TEXT ; \ -2: ; /* masked by cpl, leave iactive set */ \ - APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - MP_RELLOCK ; \ - POP_FRAME ; \ - iret ; \ - ALIGN_TEXT ; \ -3: ; /* other cpu has isr lock */ \ - APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\ - lock ; \ - orl $IRQ_BIT(irq_num), _ipending ; \ - testl $IRQ_BIT(irq_num), _cpl ; \ - jne 4f ; /* this INT masked */ \ - call forward_irq ; /* forward irq to lock holder */ \ - POP_FRAME ; /* and return */ \ - iret ; \ - ALIGN_TEXT ; \ -4: ; /* blocked */ \ - APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\ - POP_FRAME ; /* and return */ \ - iret + jmp doreti_next /* * Handle "spurious INTerrupts". @@ -434,20 +380,10 @@ _Xcpuast: FAKE_MCOUNT(13*4(%esp)) - /* - * Giant locks do not come cheap. - * A lot of cycles are going to be wasted here. - */ - call _get_mplock - - movl _cpl, %eax - pushl %eax orl $AST_PENDING, _astpending /* XXX */ incb _intr_nesting_level sti - pushl $0 - movl _cpuid, %eax lock btrl %eax, _checkstate_pending_ast @@ -461,7 +397,7 @@ _Xcpuast: lock incl CNAME(cpuast_cnt) MEXITCOUNT - jmp _doreti + jmp doreti_next 1: /* We are already in the process of delivering an ast for this CPU */ POP_FRAME @@ -487,40 +423,24 @@ _Xforward_irq: FAKE_MCOUNT(13*4(%esp)) - MP_TRYLOCK - testl %eax,%eax /* Did we get the lock ? */ - jz 1f /* No */ - lock incl CNAME(forward_irq_hitcnt) cmpb $4, _intr_nesting_level - jae 2f + jae 1f - movl _cpl, %eax - pushl %eax incb _intr_nesting_level sti - pushl $0 - MEXITCOUNT - jmp _doreti /* Handle forwarded interrupt */ + jmp doreti_next /* Handle forwarded interrupt */ 1: lock - incl CNAME(forward_irq_misscnt) - call forward_irq /* Oops, we've lost the isr lock */ - MEXITCOUNT - POP_FRAME - iret -2: - lock incl CNAME(forward_irq_toodeepcnt) -3: - MP_RELLOCK MEXITCOUNT POP_FRAME iret +#if 0 /* * */ @@ -532,9 +452,11 @@ forward_irq: cmpl $0, CNAME(forward_irq_enabled) jz 4f +/* XXX - this is broken now, because mp_lock doesn't exist movl _mp_lock,%eax cmpl $FREE_LOCK,%eax jne 1f + */ movl $0, %eax /* Pick CPU #0 if noone has lock */ 1: shrl $24,%eax @@ -559,6 +481,7 @@ forward_irq: jnz 3b 4: ret +#endif /* * Executed by a CPU when it receives an Xcpustop IPI from another CPU, @@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr) FAST_INTR(22,fastintr22) FAST_INTR(23,fastintr23) #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) +/* Threaded interrupts */ INTR(0,intr0, CLKINTR_PENDING) INTR(1,intr1,) INTR(2,intr2,) @@ -728,15 +652,11 @@ _ihandlers: .long _swi_null, swi_net, _swi_null, _swi_null .long _swi_vm, _swi_null, _softclock -imasks: /* masks for interrupt handlers */ - .space NHWI*4 /* padding; HWI masks are elsewhere */ - - .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK - .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK - +#if 0 /* active flag for lazy masking */ iactive: .long 0 +#endif #ifdef COUNT_XINVLTLB_HITS .globl _xhits diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c index b209065..4edda4b 100644 --- a/sys/amd64/amd64/autoconf.c +++ b/sys/amd64/amd64/autoconf.c @@ -163,14 +163,6 @@ configure(dummy) * XXX this is slightly misplaced. */ spl0(); - - /* - * Allow lowering of the ipl to the lowest kernel level if we - * panic (or call tsleep() before clearing `cold'). No level is - * completely safe (since a panic may occur in a critical region - * at splhigh()), but we want at least bio interrupts to work. - */ - safepri = cpl; } static void diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index c895fef..db56a1b 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -73,189 +73,6 @@ _tlb_flush_count: .long 0 .text -/* - * When no processes are on the runq, cpu_switch() branches to _idle - * to wait for something to come ready. - */ - ALIGN_TEXT - .type _idle,@function -_idle: - xorl %ebp,%ebp - movl %ebp,_switchtime - -#ifdef SMP - - /* when called, we have the mplock, intr disabled */ - /* use our idleproc's "context" */ - movl _IdlePTD, %ecx - movl %cr3, %eax - cmpl %ecx, %eax - je 2f -#if defined(SWTCH_OPTIM_STATS) - decl _swtch_optim_stats - incl _tlb_flush_count -#endif - movl %ecx, %cr3 -2: - /* Keep space for nonexisting return addr, or profiling bombs */ - movl $gd_idlestack_top-4, %ecx - addl %fs:0, %ecx - movl %ecx, %esp - - /* update common_tss.tss_esp0 pointer */ - movl %ecx, _common_tss + TSS_ESP0 - - movl _cpuid, %esi - btrl %esi, _private_tss - jae 1f - - movl $gd_common_tssd, %edi - addl %fs:0, %edi - - /* move correct tss descriptor into GDT slot, then reload tr */ - movl _tss_gdt, %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -1: - - sti - - /* - * XXX callers of cpu_switch() do a bogus splclock(). Locking should - * be left to cpu_switch(). - * - * NOTE: spl*() may only be called while we hold the MP lock (which - * we do). - */ - call _spl0 - - cli - - /* - * _REALLY_ free the lock, no matter how deep the prior nesting. - * We will recover the nesting on the way out when we have a new - * proc to load. - * - * XXX: we had damn well better be sure we had it before doing this! - */ - movl $FREE_LOCK, %eax - movl %eax, _mp_lock - - /* do NOT have lock, intrs disabled */ - .globl idle_loop -idle_loop: - - cmpl $0,_smp_active - jne 1f - cmpl $0,_cpuid - je 1f - jmp 2f - -1: - call _procrunnable - testl %eax,%eax - jnz 3f - - /* - * Handle page-zeroing in the idle loop. Called with interrupts - * disabled and the MP lock released. Inside vm_page_zero_idle - * we enable interrupts and grab the mplock as required. - */ - cmpl $0,_do_page_zero_idle - je 2f - - call _vm_page_zero_idle /* internal locking */ - testl %eax, %eax - jnz idle_loop -2: - - /* enable intrs for a halt */ - movl $0, lapic_tpr /* 1st candidate for an INT */ - call *_hlt_vector /* wait for interrupt */ - cli - jmp idle_loop - - /* - * Note that interrupts must be enabled while obtaining the MP lock - * in order to be able to take IPI's while blocked. - */ -3: - movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */ - sti - call _get_mplock - cli - call _procrunnable - testl %eax,%eax - CROSSJUMP(jnz, sw1a, jz) - call _rel_mplock - jmp idle_loop - -#else /* !SMP */ - - movl $HIDENAME(tmpstk),%esp -#if defined(OVERLY_CONSERVATIVE_PTD_MGMT) -#if defined(SWTCH_OPTIM_STATS) - incl _swtch_optim_stats -#endif - movl _IdlePTD, %ecx - movl %cr3, %eax - cmpl %ecx, %eax - je 2f -#if defined(SWTCH_OPTIM_STATS) - decl _swtch_optim_stats - incl _tlb_flush_count -#endif - movl %ecx, %cr3 -2: -#endif - - /* update common_tss.tss_esp0 pointer */ - movl %esp, _common_tss + TSS_ESP0 - - movl $0, %esi - btrl %esi, _private_tss - jae 1f - - movl $_common_tssd, %edi - - /* move correct tss descriptor into GDT slot, then reload tr */ - movl _tss_gdt, %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -1: - - sti - - /* - * XXX callers of cpu_switch() do a bogus splclock(). Locking should - * be left to cpu_switch(). - */ - call _spl0 - - ALIGN_TEXT -idle_loop: - cli - call _procrunnable - testl %eax,%eax - CROSSJUMP(jnz, sw1a, jz) - call _vm_page_zero_idle - testl %eax, %eax - jnz idle_loop - call *_hlt_vector /* wait for interrupt */ - jmp idle_loop - -#endif /* SMP */ - -CROSSJUMPTARGET(_idle) - ENTRY(default_halt) sti #ifndef SMP @@ -264,16 +81,23 @@ ENTRY(default_halt) ret /* + * cpu_throw() + */ +ENTRY(cpu_throw) + jmp sw1 + +/* * cpu_switch() */ ENTRY(cpu_switch) /* switch to new process. first, save context as needed */ movl _curproc,%ecx + movl %ecx,_prevproc /* if no process to save, don't bother */ testl %ecx,%ecx - je sw1 + jz sw1 #ifdef SMP movb P_ONCPU(%ecx), %al /* save "last" cpu */ @@ -299,7 +123,7 @@ ENTRY(cpu_switch) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) - /* test if debug regisers should be saved */ + /* test if debug registers should be saved */ movb PCB_FLAGS(%edx),%al andb $PCB_DBREGS,%al jz 1f /* no, skip over */ @@ -319,15 +143,12 @@ ENTRY(cpu_switch) movl %eax,PCB_DR0(%edx) 1: + /* save sched_lock recursion count */ + movl _sched_lock+MTX_RECURSE,%eax + movl %eax,PCB_SCHEDNEST(%edx) + #ifdef SMP - movl _mp_lock, %eax /* XXX FIXME: we should be saving the local APIC TPR */ -#ifdef DIAGNOSTIC - cmpl $FREE_LOCK, %eax /* is it free? */ - je badsw4 /* yes, bad medicine! */ -#endif /* DIAGNOSTIC */ - andl $COUNT_FIELD, %eax /* clear CPU portion */ - movl %eax, PCB_MPNEST(%edx) /* store it */ #endif /* SMP */ #if NNPX > 0 @@ -341,25 +162,33 @@ ENTRY(cpu_switch) 1: #endif /* NNPX > 0 */ - movl $0,_curproc /* out of process */ - - /* save is done, now choose a new process or idle */ + /* save is done, now choose a new process */ sw1: - cli #ifdef SMP /* Stop scheduling if smp_active goes zero and we are not BSP */ cmpl $0,_smp_active jne 1f cmpl $0,_cpuid - CROSSJUMP(je, _idle, jne) /* wind down */ + je 1f + + movl _idleproc, %eax + jmp sw1b 1: #endif + /* + * Choose a new process to schedule. chooseproc() returns idleproc + * if it cannot find another process to run. + */ sw1a: call _chooseproc /* trash ecx, edx, ret eax*/ - testl %eax,%eax - CROSSJUMP(je, _idle, jne) /* if no proc, idle */ + +#ifdef DIAGNOSTIC + testl %eax,%eax /* no process? */ + jz badsw3 /* no, panic */ +#endif +sw1b: movl %eax,%ecx xorl %eax,%eax @@ -456,9 +285,6 @@ sw1a: movl %ecx, _curproc /* into next process */ #ifdef SMP - movl _cpu_lockid, %eax - orl PCB_MPNEST(%edx), %eax /* add next count from PROC */ - movl %eax, _mp_lock /* load the mp_lock */ /* XXX FIXME: we should be restoring the local APIC TPR */ #endif /* SMP */ @@ -500,7 +326,22 @@ cpu_switch_load_gs: movl %eax,%dr7 1: - sti + /* + * restore sched_lock recursion count and transfer ownership to + * new process + */ + movl PCB_SCHEDNEST(%edx),%eax + movl %eax,_sched_lock+MTX_RECURSE + + movl _curproc,%eax + movl %eax,_sched_lock+MTX_LOCK + +#ifdef DIAGNOSTIC + pushfl + popl %ecx + testl $0x200, %ecx /* interrupts enabled? */ + jnz badsw6 /* that way madness lies */ +#endif ret CROSSJUMPTARGET(sw1a) @@ -517,15 +358,27 @@ badsw2: call _panic sw0_2: .asciz "cpu_switch: not SRUN" + +badsw3: + pushl $sw0_3 + call _panic + +sw0_3: .asciz "cpu_switch: chooseproc returned NULL" + #endif -#if defined(SMP) && defined(DIAGNOSTIC) -badsw4: - pushl $sw0_4 +#ifdef DIAGNOSTIC +badsw5: + pushl $sw0_5 + call _panic + +sw0_5: .asciz "cpu_switch: interrupts enabled (again)" +badsw6: + pushl $sw0_6 call _panic -sw0_4: .asciz "cpu_switch: do not have lock" -#endif /* SMP && DIAGNOSTIC */ +sw0_6: .asciz "cpu_switch: interrupts enabled" +#endif /* * savectx(pcb) diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index acb8b40..9e77114 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #ifdef SMP @@ -175,20 +176,12 @@ IDTVEC(fpu) mov %ax,%fs FAKE_MCOUNT(13*4(%esp)) -#ifdef SMP MPLOCKED incl _cnt+V_TRAP - MP_LOCK - movl _cpl,%eax - pushl %eax /* save original cpl */ - pushl $0 /* dummy unit to finish intr frame */ -#else /* SMP */ - movl _cpl,%eax - pushl %eax pushl $0 /* dummy unit to finish intr frame */ - incl _cnt+V_TRAP -#endif /* SMP */ + call __mtx_enter_giant_def call _npx_intr + call __mtx_exit_giant_def incb _intr_nesting_level MEXITCOUNT @@ -205,9 +198,6 @@ IDTVEC(align) * gate (TGT), else disabled if this was an interrupt gate (IGT). * Note that int0x80_syscall is a trap gate. Only page faults * use an interrupt gate. - * - * Note that all calls to MP_LOCK must occur with interrupts enabled - * in order to be able to take IPI's while waiting for the lock. */ SUPERALIGN_TEXT @@ -227,16 +217,12 @@ alltraps_with_regs_pushed: FAKE_MCOUNT(13*4(%esp)) calltrap: FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */ - MPLOCKED incl _cnt+V_TRAP - MP_LOCK - movl _cpl,%ebx /* keep orig. cpl here during trap() */ call _trap /* * Return via _doreti to handle ASTs. Have to change trap frame * to interrupt frame. */ - pushl %ebx /* cpl to restore */ subl $4,%esp /* dummy unit to finish intr frame */ incb _intr_nesting_level MEXITCOUNT @@ -274,16 +260,11 @@ IDTVEC(syscall) movl %eax,TF_EFLAGS(%esp) movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */ FAKE_MCOUNT(13*4(%esp)) - MPLOCKED incl _cnt+V_SYSCALL call _syscall2 MEXITCOUNT cli /* atomic astpending access */ - cmpl $0,_astpending - je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif - pushl $0 /* cpl to restore */ + cmpl $0,_astpending /* AST pending? */ + je doreti_syscall_ret /* no, get out of here */ subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level jmp _doreti @@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall) mov %ax,%fs movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */ FAKE_MCOUNT(13*4(%esp)) - MPLOCKED incl _cnt+V_SYSCALL call _syscall2 MEXITCOUNT cli /* atomic astpending access */ - cmpl $0,_astpending - je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif - pushl $0 /* cpl to restore */ + cmpl $0,_astpending /* AST pending? */ + je doreti_syscall_ret /* no, get out of here */ subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level jmp _doreti ENTRY(fork_trampoline) + MTX_EXIT(_sched_lock, %ecx) + sti call _spl0 #ifdef SMP @@ -355,7 +333,6 @@ ENTRY(fork_trampoline) /* * Return via _doreti to handle ASTs. */ - pushl $0 /* cpl to restore */ subl $4,%esp /* dummy unit to finish intr frame */ movb $1,_intr_nesting_level MEXITCOUNT diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s index acb8b40..9e77114 100644 --- a/sys/amd64/amd64/exception.s +++ b/sys/amd64/amd64/exception.s @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #ifdef SMP @@ -175,20 +176,12 @@ IDTVEC(fpu) mov %ax,%fs FAKE_MCOUNT(13*4(%esp)) -#ifdef SMP MPLOCKED incl _cnt+V_TRAP - MP_LOCK - movl _cpl,%eax - pushl %eax /* save original cpl */ - pushl $0 /* dummy unit to finish intr frame */ -#else /* SMP */ - movl _cpl,%eax - pushl %eax pushl $0 /* dummy unit to finish intr frame */ - incl _cnt+V_TRAP -#endif /* SMP */ + call __mtx_enter_giant_def call _npx_intr + call __mtx_exit_giant_def incb _intr_nesting_level MEXITCOUNT @@ -205,9 +198,6 @@ IDTVEC(align) * gate (TGT), else disabled if this was an interrupt gate (IGT). * Note that int0x80_syscall is a trap gate. Only page faults * use an interrupt gate. - * - * Note that all calls to MP_LOCK must occur with interrupts enabled - * in order to be able to take IPI's while waiting for the lock. */ SUPERALIGN_TEXT @@ -227,16 +217,12 @@ alltraps_with_regs_pushed: FAKE_MCOUNT(13*4(%esp)) calltrap: FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */ - MPLOCKED incl _cnt+V_TRAP - MP_LOCK - movl _cpl,%ebx /* keep orig. cpl here during trap() */ call _trap /* * Return via _doreti to handle ASTs. Have to change trap frame * to interrupt frame. */ - pushl %ebx /* cpl to restore */ subl $4,%esp /* dummy unit to finish intr frame */ incb _intr_nesting_level MEXITCOUNT @@ -274,16 +260,11 @@ IDTVEC(syscall) movl %eax,TF_EFLAGS(%esp) movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */ FAKE_MCOUNT(13*4(%esp)) - MPLOCKED incl _cnt+V_SYSCALL call _syscall2 MEXITCOUNT cli /* atomic astpending access */ - cmpl $0,_astpending - je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif - pushl $0 /* cpl to restore */ + cmpl $0,_astpending /* AST pending? */ + je doreti_syscall_ret /* no, get out of here */ subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level jmp _doreti @@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall) mov %ax,%fs movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */ FAKE_MCOUNT(13*4(%esp)) - MPLOCKED incl _cnt+V_SYSCALL call _syscall2 MEXITCOUNT cli /* atomic astpending access */ - cmpl $0,_astpending - je doreti_syscall_ret -#ifdef SMP - MP_LOCK -#endif - pushl $0 /* cpl to restore */ + cmpl $0,_astpending /* AST pending? */ + je doreti_syscall_ret /* no, get out of here */ subl $4,%esp /* dummy unit for interrupt frame */ movb $1,_intr_nesting_level jmp _doreti ENTRY(fork_trampoline) + MTX_EXIT(_sched_lock, %ecx) + sti call _spl0 #ifdef SMP @@ -355,7 +333,6 @@ ENTRY(fork_trampoline) /* * Return via _doreti to handle ASTs. */ - pushl $0 /* cpl to restore */ subl $4,%esp /* dummy unit to finish intr frame */ movb $1,_intr_nesting_level MEXITCOUNT diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 637853e..8610e35 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -245,6 +245,12 @@ npx_probe(dev) setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); npx_idt_probeintr = idt[npx_intrno]; + + /* + * XXX This looks highly bogus, but it appears that npc_probe1 + * needs interrupts enabled. Does this make any difference + * here? + */ enable_intr(); result = npx_probe1(dev); disable_intr(); @@ -797,7 +803,7 @@ npxdna() /* * Record new context early in case frstor causes an IRQ13. */ - npxproc = curproc; + PCPU_SET(npxproc, CURPROC); curpcb->pcb_savefpu.sv_ex_sw = 0; /* * The following frstor may cause an IRQ13 when the state being @@ -834,16 +840,18 @@ npxsave(addr) fnsave(addr); /* fnop(); */ start_emulating(); - npxproc = NULL; + PCPU_SET(npxproc, NULL); #else /* SMP */ + int intrstate; u_char icu1_mask; u_char icu2_mask; u_char old_icu1_mask; u_char old_icu2_mask; struct gate_descriptor save_idt_npxintr; + intrstate = save_intr(); disable_intr(); old_icu1_mask = inb(IO_ICU1 + 1); old_icu2_mask = inb(IO_ICU2 + 1); @@ -851,12 +859,12 @@ npxsave(addr) outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask)); outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); idt[npx_intrno] = npx_idt_probeintr; - enable_intr(); + write_eflags(intrstate); stop_emulating(); fnsave(addr); fnop(); start_emulating(); - npxproc = NULL; + PCPU_SET(npxproc, NULL); disable_intr(); icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ icu2_mask = inb(IO_ICU2 + 1); @@ -866,7 +874,7 @@ npxsave(addr) (icu2_mask & ~(npx0_imask >> 8)) | (old_icu2_mask & (npx0_imask >> 8))); idt[npx_intrno] = save_idt_npxintr; - enable_intr(); /* back to usual state */ + restore_intr(intrstate); /* back to previous state */ #endif /* SMP */ } diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 60accd1..78c6075 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -51,6 +51,10 @@ #include #include #include +/* XXX */ +#ifdef KTR_PERCPU +#include +#endif #include #include #include @@ -73,6 +77,7 @@ #include #include #include +#include ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); @@ -127,9 +132,7 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); -#ifdef SMP -ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest)); -#endif +ASSYM(PCB_SCHEDNEST, offsetof(struct pcb, pcb_schednest)); ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); @@ -170,7 +173,9 @@ ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab)); ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend)); ASSYM(GD_SIZEOF, sizeof(struct globaldata)); ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc)); +ASSYM(GD_PREVPROC, offsetof(struct globaldata, gd_prevproc)); ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc)); +ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc)); ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb)); ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss)); ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime)); @@ -178,11 +183,21 @@ ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks)); ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd)); ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt)); ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending)); +ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct globaldata, gd_intr_nesting_level)); #ifdef USER_LDT ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt)); #endif +ASSYM(GD_WITNESS_SPIN_CHECK, offsetof(struct globaldata, gd_witness_spin_check)); + +/* XXX */ +#ifdef KTR_PERCPU +ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx)); +ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf)); +ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data)); +#endif + #ifdef SMP ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid)); ASSYM(GD_CPU_LOCKID, offsetof(struct globaldata, gd_cpu_lockid)); @@ -211,3 +226,9 @@ ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL)); ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL)); ASSYM(GPROC0_SEL, GPROC0_SEL); ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame)); + +ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock)); +ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse)); +ASSYM(MTX_SAVEFL, offsetof(struct mtx, mtx_savefl)); + +ASSYM(MTX_UNOWNED, MTX_UNOWNED); diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index 0e11e2b..71ecd63 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -42,6 +42,7 @@ #include "opt_cpu.h" #include +#include #include #include #include @@ -53,6 +54,8 @@ #include #include +#include +#include #include #define IDENTBLUE_CYRIX486 0 diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index be86c65..b9395bf 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -607,12 +607,14 @@ void enable_K5_wt_alloc(void) { u_int64_t msr; + int intrstate; /* * Write allocate is supported only on models 1, 2, and 3, with * a stepping of 4 or greater. */ if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) { + intrstate = save_intr(); disable_intr(); msr = rdmsr(0x83); /* HWCR */ wrmsr(0x83, msr & !(0x10)); @@ -645,7 +647,7 @@ enable_K5_wt_alloc(void) msr=rdmsr(0x83); wrmsr(0x83, msr|0x10); /* enable write allocate */ - enable_intr(); + restore_intr(intrstate); } } @@ -708,7 +710,6 @@ enable_K6_wt_alloc(void) wrmsr(0x0c0000082, whcr); write_eflags(eflags); - enable_intr(); } void @@ -770,7 +771,6 @@ enable_K6_2_wt_alloc(void) wrmsr(0x0c0000082, whcr); write_eflags(eflags); - enable_intr(); } #endif /* I585_CPU && CPU_WT_ALLOC */ diff --git a/sys/amd64/amd64/legacy.c b/sys/amd64/amd64/legacy.c index 8a30770..5b6cdbc 100644 --- a/sys/amd64/amd64/legacy.c +++ b/sys/amd64/amd64/legacy.c @@ -68,7 +68,10 @@ #else #include #endif +#include +#include #include +#include static struct rman irq_rman, drq_rman, port_rman, mem_rman; @@ -397,9 +400,9 @@ static int nexus_setup_intr(device_t bus, device_t child, struct resource *irq, int flags, void (*ihand)(void *), void *arg, void **cookiep) { - intrmask_t *mask; driver_t *driver; - int error, icflags; + int error, icflags; + int pri; /* interrupt thread priority */ /* somebody tried to setup an irq that failed to allocate! */ if (irq == NULL) @@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, driver = device_get_driver(child); switch (flags) { - case INTR_TYPE_TTY: - mask = &tty_imask; + case INTR_TYPE_TTY: /* keyboard or parallel port */ + pri = PI_TTYLOW; break; - case (INTR_TYPE_TTY | INTR_TYPE_FAST): - mask = &tty_imask; + case (INTR_TYPE_TTY | INTR_FAST): /* sio */ + pri = PI_TTYHIGH; icflags |= INTR_FAST; break; case INTR_TYPE_BIO: - mask = &bio_imask; + /* + * XXX We need to refine this. BSD/OS distinguishes + * between tape and disk priorities. + */ + pri = PI_DISK; break; case INTR_TYPE_NET: - mask = &net_imask; + pri = PI_NET; break; case INTR_TYPE_CAM: - mask = &cam_imask; + pri = PI_DISK; /* XXX or PI_CAM? */ break; case INTR_TYPE_MISC: - mask = 0; + pri = PI_DULL; /* don't care */ break; + /* We didn't specify an interrupt level. */ default: - panic("still using grody create_intr interface"); + panic("nexus_setup_intr: no interrupt type in flags"); } /* @@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, return (error); *cookiep = inthand_add(device_get_nameunit(child), irq->r_start, - ihand, arg, mask, icflags); + ihand, arg, pri, icflags); if (*cookiep == NULL) error = EINVAL; /* XXX ??? */ diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S index bddd7d5..fa95fb0 100644 --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -862,9 +862,6 @@ map_read_write: movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ movl $1, %ecx /* one private pt coming right up */ fillkpt(R(SMPptpa), $PG_RW) - -/* Initialize mp lock to allow early traps */ - movl $1, R(_mp_lock) #endif /* SMP */ /* install a pde for temporary double map of bottom of VA */ diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s index bddd7d5..fa95fb0 100644 --- a/sys/amd64/amd64/locore.s +++ b/sys/amd64/amd64/locore.s @@ -862,9 +862,6 @@ map_read_write: movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */ movl $1, %ecx /* one private pt coming right up */ fillkpt(R(SMPptpa), $PG_RW) - -/* Initialize mp lock to allow early traps */ - movl $1, R(_mp_lock) #endif /* SMP */ /* install a pde for temporary double map of bottom of VA */ diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 6edecf0..875c9d5 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include @@ -98,10 +99,12 @@ #include #include #include +#include #include /* pcb.h included via sys/user.h */ +#include +#include #ifdef SMP #include -#include #endif #ifdef PERFMON #include @@ -110,6 +113,7 @@ #ifdef OLD_BUS_ARCH #include #endif +#include #include #include #include @@ -247,6 +251,11 @@ vm_offset_t clean_sva, clean_eva; static vm_offset_t pager_sva, pager_eva; static struct trapframe proc0_tf; +struct cpuhead cpuhead; + +mtx_t sched_lock; +mtx_t Giant; + #define offsetof(type, member) ((size_t)(&((type *)0)->member)) static void @@ -431,6 +440,11 @@ again: bufinit(); vm_pager_bufferinit(); + SLIST_INIT(&cpuhead); + SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu); + + mtx_init(&sched_lock, "sched lock", MTX_SPIN); + #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! @@ -1817,11 +1831,6 @@ init386(first) #endif int off; - /* - * Prevent lowering of the ipl if we call tsleep() early. - */ - safepri = cpl; - proc0.p_addr = proc0paddr; atdevbase = ISA_HOLE_START + KERNBASE; @@ -1871,6 +1880,10 @@ init386(first) r_gdt.rd_base = (int) gdt; lgdt(&r_gdt); + /* setup curproc so that mutexes work */ + PCPU_SET(curproc, &proc0); + PCPU_SET(prevproc, &proc0); + /* make ldt memory segments */ /* * The data segment limit must not cover the user area because we @@ -1953,7 +1966,7 @@ init386(first) /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16; - common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; + common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); private_tss = 0; tss_gdt = &gdt[GPROC0_SEL].sd; @@ -1974,6 +1987,12 @@ init386(first) dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); + /* + * We grab Giant during the vm86bios routines, so we need to ensure + * that it is up and running before we use vm86. + */ + mtx_init(&Giant, "Giant", MTX_DEF); + vm86_initialize(); getmemsize(first); @@ -2009,9 +2028,7 @@ init386(first) /* setup proc 0's pcb */ proc0.p_addr->u_pcb.pcb_flags = 0; proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD; -#ifdef SMP - proc0.p_addr->u_pcb.pcb_mpnest = 1; -#endif + proc0.p_addr->u_pcb.pcb_schednest = 0; proc0.p_addr->u_pcb.pcb_ext = 0; proc0.p_md.md_regs = &proc0_tf; } diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 61c5ecf..95b5759 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -36,6 +36,7 @@ #endif #include +#include #include #include #include @@ -65,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY { #define MP_ANNOUNCE_POST 0x19 +/* used to hold the AP's until we are ready to release them */ +struct simplelock ap_boot_lock; /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ int current_postcode; @@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); static int start_ap(int logicalCpu, u_int boot_addr); static int apic_int_is_bus_type(int intr, int bus_type); +static void release_aps(void *dummy); /* * Calculate usable address in base memory for AP trampoline code. @@ -403,7 +408,7 @@ found: /* - * Startup the SMP processors. + * Initialize the SMP hardware and the APIC and start up the AP's. */ void mp_start(void) @@ -619,6 +624,9 @@ mp_enable(u_int boot_addr) /* initialize all SMP locks */ init_locks(); + /* obtain the ap_boot_lock */ + s_lock(&ap_boot_lock); + /* start each Application Processor */ start_all_aps(boot_addr); } @@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock; /* critical region around INTR() routines */ struct simplelock intr_lock; -/* lock regions protected in UP kernel via cli/sti */ -struct simplelock mpintr_lock; - /* lock region used by kernel profiling */ struct simplelock mcount_lock; @@ -1885,26 +1890,16 @@ struct simplelock clock_lock; /* lock around the MP rendezvous */ static struct simplelock smp_rv_lock; +/* only 1 CPU can panic at a time :) */ +struct simplelock panic_lock; + static void init_locks(void) { - /* - * Get the initial mp_lock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - mp_lock = 0x00000001; - -#if 0 - /* ISR uses its own "giant lock" */ - isr_lock = FREE_LOCK; -#endif - #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); #endif - s_lock_init((struct simplelock*)&mpintr_lock); - s_lock_init((struct simplelock*)&mcount_lock); s_lock_init((struct simplelock*)&fast_intr_lock); @@ -1912,6 +1907,7 @@ init_locks(void) s_lock_init((struct simplelock*)&imen_lock); s_lock_init((struct simplelock*)&cpl_lock); s_lock_init(&smp_rv_lock); + s_lock_init(&panic_lock); #ifdef USE_COMLOCK s_lock_init((struct simplelock*)&com_lock); @@ -1919,11 +1915,9 @@ init_locks(void) #ifdef USE_CLOCKLOCK s_lock_init((struct simplelock*)&clock_lock); #endif /* USE_CLOCKLOCK */ -} - -/* Wait for all APs to be fully initialized */ -extern int wait_ap(unsigned int); + s_lock_init(&ap_boot_lock); +} /* * start each AP in our list @@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr) SMPpt[pg + 4] = 0; /* *prv_PMAP1 */ /* prime data page for it to use */ + SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu); gd->gd_cpuid = x; gd->gd_cpu_lockid = x << 24; gd->gd_prv_CMAP1 = &SMPpt[pg + 1]; @@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } - /* * Flush the TLB on all other CPU's * @@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, void ap_init(void); void -ap_init() +ap_init(void) { u_int apic_id; + /* lock against other AP's that are waking up */ + s_lock(&ap_boot_lock); + /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); @@ -2397,6 +2394,30 @@ ap_init() smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ smp_active = 1; /* historic */ } + + /* let other AP's wake up now */ + s_unlock(&ap_boot_lock); + + /* wait until all the AP's are up */ + while (smp_started == 0) + ; /* nothing */ + + /* + * Set curproc to our per-cpu idleproc so that mutexes have + * something unique to lock with. + */ + PCPU_SET(curproc,idleproc); + PCPU_SET(prevproc,idleproc); + + microuptime(&switchtime); + switchticks = ticks; + + /* ok, now grab sched_lock and enter the scheduler */ + enable_intr(); + mtx_enter(&sched_lock, MTX_SPIN); + cpu_throw(); /* doesn't return */ + + panic("scheduler returned us to ap_init"); } #ifdef BETTER_CLOCK @@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap) p = checkstate_curproc[id]; cpustate = checkstate_cpustate[id]; + /* XXX */ + if (p->p_ithd) + cpustate = CHECKSTATE_INTR; + else if (p == idleproc) + cpustate = CHECKSTATE_SYS; + switch (cpustate) { case CHECKSTATE_USER: if (p->p_flag & P_PROFIL) @@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap) if (pscnt > 1) return; - if (!p) + if (p == idleproc) { + p->p_sticks++; cp_time[CP_IDLE]++; - else { + } else { p->p_sticks++; cp_time[CP_SYS]++; } @@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap) p->p_iticks++; cp_time[CP_INTR]++; } - if (p != NULL) { + if (p != idleproc) { schedclock(p); /* Update resource usage integrals and maximums. */ @@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *), /* release lock */ s_unlock(&smp_rv_lock); } + +void +release_aps(void *dummy __unused) +{ + s_unlock(&ap_boot_lock); +} + +SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S index d3602d2..9ede02c 100644 --- a/sys/amd64/amd64/mpboot.S +++ b/sys/amd64/amd64/mpboot.S @@ -114,43 +114,9 @@ mp_begin: /* now running relocated at KERNBASE */ CHECKPOINT(0x39, 6) - /* wait till we can get into the kernel */ - call _boot_get_mplock - - /* Now, let's prepare for some REAL WORK :-) */ + /* Now, let's prepare for some REAL WORK :-) This doesn't return. */ call _ap_init - call _rel_mplock - lock /* Avoid livelock (PIII Errata 39) */ - addl $0,-4(%esp) -2: - cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */ - jz 2b - call _get_mplock - - /* let her rip! (loads new stack) */ - jmp _cpu_switch - -NON_GPROF_ENTRY(wait_ap) - pushl %ebp - movl %esp, %ebp - call _rel_mplock - lock /* Avoid livelock (PIII Errata 39) */ - addl $0,0(%esp) - movl %eax, 8(%ebp) -1: - cmpl $0, CNAME(smp_started) - jnz 2f - decl %eax - cmpl $0, %eax - jge 1b -2: - call _get_mplock - movl %ebp, %esp - popl %ebp - ret - - /* * This is the embedded trampoline or bootstrap that is * copied into 'real-mode' low memory, it is where the diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index 61c5ecf..95b5759 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -36,6 +36,7 @@ #endif #include +#include #include #include #include @@ -65,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY { #define MP_ANNOUNCE_POST 0x19 +/* used to hold the AP's until we are ready to release them */ +struct simplelock ap_boot_lock; /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ int current_postcode; @@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); static int start_ap(int logicalCpu, u_int boot_addr); static int apic_int_is_bus_type(int intr, int bus_type); +static void release_aps(void *dummy); /* * Calculate usable address in base memory for AP trampoline code. @@ -403,7 +408,7 @@ found: /* - * Startup the SMP processors. + * Initialize the SMP hardware and the APIC and start up the AP's. */ void mp_start(void) @@ -619,6 +624,9 @@ mp_enable(u_int boot_addr) /* initialize all SMP locks */ init_locks(); + /* obtain the ap_boot_lock */ + s_lock(&ap_boot_lock); + /* start each Application Processor */ start_all_aps(boot_addr); } @@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock; /* critical region around INTR() routines */ struct simplelock intr_lock; -/* lock regions protected in UP kernel via cli/sti */ -struct simplelock mpintr_lock; - /* lock region used by kernel profiling */ struct simplelock mcount_lock; @@ -1885,26 +1890,16 @@ struct simplelock clock_lock; /* lock around the MP rendezvous */ static struct simplelock smp_rv_lock; +/* only 1 CPU can panic at a time :) */ +struct simplelock panic_lock; + static void init_locks(void) { - /* - * Get the initial mp_lock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - mp_lock = 0x00000001; - -#if 0 - /* ISR uses its own "giant lock" */ - isr_lock = FREE_LOCK; -#endif - #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); #endif - s_lock_init((struct simplelock*)&mpintr_lock); - s_lock_init((struct simplelock*)&mcount_lock); s_lock_init((struct simplelock*)&fast_intr_lock); @@ -1912,6 +1907,7 @@ init_locks(void) s_lock_init((struct simplelock*)&imen_lock); s_lock_init((struct simplelock*)&cpl_lock); s_lock_init(&smp_rv_lock); + s_lock_init(&panic_lock); #ifdef USE_COMLOCK s_lock_init((struct simplelock*)&com_lock); @@ -1919,11 +1915,9 @@ init_locks(void) #ifdef USE_CLOCKLOCK s_lock_init((struct simplelock*)&clock_lock); #endif /* USE_CLOCKLOCK */ -} - -/* Wait for all APs to be fully initialized */ -extern int wait_ap(unsigned int); + s_lock_init(&ap_boot_lock); +} /* * start each AP in our list @@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr) SMPpt[pg + 4] = 0; /* *prv_PMAP1 */ /* prime data page for it to use */ + SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu); gd->gd_cpuid = x; gd->gd_cpu_lockid = x << 24; gd->gd_prv_CMAP1 = &SMPpt[pg + 1]; @@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } - /* * Flush the TLB on all other CPU's * @@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, void ap_init(void); void -ap_init() +ap_init(void) { u_int apic_id; + /* lock against other AP's that are waking up */ + s_lock(&ap_boot_lock); + /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); @@ -2397,6 +2394,30 @@ ap_init() smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ smp_active = 1; /* historic */ } + + /* let other AP's wake up now */ + s_unlock(&ap_boot_lock); + + /* wait until all the AP's are up */ + while (smp_started == 0) + ; /* nothing */ + + /* + * Set curproc to our per-cpu idleproc so that mutexes have + * something unique to lock with. + */ + PCPU_SET(curproc,idleproc); + PCPU_SET(prevproc,idleproc); + + microuptime(&switchtime); + switchticks = ticks; + + /* ok, now grab sched_lock and enter the scheduler */ + enable_intr(); + mtx_enter(&sched_lock, MTX_SPIN); + cpu_throw(); /* doesn't return */ + + panic("scheduler returned us to ap_init"); } #ifdef BETTER_CLOCK @@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap) p = checkstate_curproc[id]; cpustate = checkstate_cpustate[id]; + /* XXX */ + if (p->p_ithd) + cpustate = CHECKSTATE_INTR; + else if (p == idleproc) + cpustate = CHECKSTATE_SYS; + switch (cpustate) { case CHECKSTATE_USER: if (p->p_flag & P_PROFIL) @@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap) if (pscnt > 1) return; - if (!p) + if (p == idleproc) { + p->p_sticks++; cp_time[CP_IDLE]++; - else { + } else { p->p_sticks++; cp_time[CP_SYS]++; } @@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap) p->p_iticks++; cp_time[CP_INTR]++; } - if (p != NULL) { + if (p != idleproc) { schedclock(p); /* Update resource usage integrals and maximums. */ @@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *), /* release lock */ s_unlock(&smp_rv_lock); } + +void +release_aps(void *dummy __unused) +{ + s_unlock(&ap_boot_lock); +} + +SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c index 8a30770..5b6cdbc 100644 --- a/sys/amd64/amd64/nexus.c +++ b/sys/amd64/amd64/nexus.c @@ -68,7 +68,10 @@ #else #include #endif +#include +#include #include +#include static struct rman irq_rman, drq_rman, port_rman, mem_rman; @@ -397,9 +400,9 @@ static int nexus_setup_intr(device_t bus, device_t child, struct resource *irq, int flags, void (*ihand)(void *), void *arg, void **cookiep) { - intrmask_t *mask; driver_t *driver; - int error, icflags; + int error, icflags; + int pri; /* interrupt thread priority */ /* somebody tried to setup an irq that failed to allocate! */ if (irq == NULL) @@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, driver = device_get_driver(child); switch (flags) { - case INTR_TYPE_TTY: - mask = &tty_imask; + case INTR_TYPE_TTY: /* keyboard or parallel port */ + pri = PI_TTYLOW; break; - case (INTR_TYPE_TTY | INTR_TYPE_FAST): - mask = &tty_imask; + case (INTR_TYPE_TTY | INTR_FAST): /* sio */ + pri = PI_TTYHIGH; icflags |= INTR_FAST; break; case INTR_TYPE_BIO: - mask = &bio_imask; + /* + * XXX We need to refine this. BSD/OS distinguishes + * between tape and disk priorities. + */ + pri = PI_DISK; break; case INTR_TYPE_NET: - mask = &net_imask; + pri = PI_NET; break; case INTR_TYPE_CAM: - mask = &cam_imask; + pri = PI_DISK; /* XXX or PI_CAM? */ break; case INTR_TYPE_MISC: - mask = 0; + pri = PI_DULL; /* don't care */ break; + /* We didn't specify an interrupt level. */ default: - panic("still using grody create_intr interface"); + panic("nexus_setup_intr: no interrupt type in flags"); } /* @@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq, return (error); *cookiep = inthand_add(device_get_nameunit(child), irq->r_start, - ihand, arg, mask, icflags); + ihand, arg, pri, icflags); if (*cookiep == NULL) error = EINVAL; /* XXX ??? */ diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index edae292..7ce9120 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -668,7 +668,7 @@ pmap_pte_quick(pmap, va) * (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V; cpu_invlpg(prv_PADDR1); } - return prv_PADDR1 + ((unsigned) index & (NPTEPG - 1)); + return (unsigned *)(prv_PADDR1 + (index & (NPTEPG - 1))); #else if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) { * (unsigned *) PMAP1 = newpf | PG_RW | PG_V; diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s index c895fef..db56a1b 100644 --- a/sys/amd64/amd64/swtch.s +++ b/sys/amd64/amd64/swtch.s @@ -73,189 +73,6 @@ _tlb_flush_count: .long 0 .text -/* - * When no processes are on the runq, cpu_switch() branches to _idle - * to wait for something to come ready. - */ - ALIGN_TEXT - .type _idle,@function -_idle: - xorl %ebp,%ebp - movl %ebp,_switchtime - -#ifdef SMP - - /* when called, we have the mplock, intr disabled */ - /* use our idleproc's "context" */ - movl _IdlePTD, %ecx - movl %cr3, %eax - cmpl %ecx, %eax - je 2f -#if defined(SWTCH_OPTIM_STATS) - decl _swtch_optim_stats - incl _tlb_flush_count -#endif - movl %ecx, %cr3 -2: - /* Keep space for nonexisting return addr, or profiling bombs */ - movl $gd_idlestack_top-4, %ecx - addl %fs:0, %ecx - movl %ecx, %esp - - /* update common_tss.tss_esp0 pointer */ - movl %ecx, _common_tss + TSS_ESP0 - - movl _cpuid, %esi - btrl %esi, _private_tss - jae 1f - - movl $gd_common_tssd, %edi - addl %fs:0, %edi - - /* move correct tss descriptor into GDT slot, then reload tr */ - movl _tss_gdt, %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -1: - - sti - - /* - * XXX callers of cpu_switch() do a bogus splclock(). Locking should - * be left to cpu_switch(). - * - * NOTE: spl*() may only be called while we hold the MP lock (which - * we do). - */ - call _spl0 - - cli - - /* - * _REALLY_ free the lock, no matter how deep the prior nesting. - * We will recover the nesting on the way out when we have a new - * proc to load. - * - * XXX: we had damn well better be sure we had it before doing this! - */ - movl $FREE_LOCK, %eax - movl %eax, _mp_lock - - /* do NOT have lock, intrs disabled */ - .globl idle_loop -idle_loop: - - cmpl $0,_smp_active - jne 1f - cmpl $0,_cpuid - je 1f - jmp 2f - -1: - call _procrunnable - testl %eax,%eax - jnz 3f - - /* - * Handle page-zeroing in the idle loop. Called with interrupts - * disabled and the MP lock released. Inside vm_page_zero_idle - * we enable interrupts and grab the mplock as required. - */ - cmpl $0,_do_page_zero_idle - je 2f - - call _vm_page_zero_idle /* internal locking */ - testl %eax, %eax - jnz idle_loop -2: - - /* enable intrs for a halt */ - movl $0, lapic_tpr /* 1st candidate for an INT */ - call *_hlt_vector /* wait for interrupt */ - cli - jmp idle_loop - - /* - * Note that interrupts must be enabled while obtaining the MP lock - * in order to be able to take IPI's while blocked. - */ -3: - movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */ - sti - call _get_mplock - cli - call _procrunnable - testl %eax,%eax - CROSSJUMP(jnz, sw1a, jz) - call _rel_mplock - jmp idle_loop - -#else /* !SMP */ - - movl $HIDENAME(tmpstk),%esp -#if defined(OVERLY_CONSERVATIVE_PTD_MGMT) -#if defined(SWTCH_OPTIM_STATS) - incl _swtch_optim_stats -#endif - movl _IdlePTD, %ecx - movl %cr3, %eax - cmpl %ecx, %eax - je 2f -#if defined(SWTCH_OPTIM_STATS) - decl _swtch_optim_stats - incl _tlb_flush_count -#endif - movl %ecx, %cr3 -2: -#endif - - /* update common_tss.tss_esp0 pointer */ - movl %esp, _common_tss + TSS_ESP0 - - movl $0, %esi - btrl %esi, _private_tss - jae 1f - - movl $_common_tssd, %edi - - /* move correct tss descriptor into GDT slot, then reload tr */ - movl _tss_gdt, %ebx /* entry in GDT */ - movl 0(%edi), %eax - movl %eax, 0(%ebx) - movl 4(%edi), %eax - movl %eax, 4(%ebx) - movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */ - ltr %si -1: - - sti - - /* - * XXX callers of cpu_switch() do a bogus splclock(). Locking should - * be left to cpu_switch(). - */ - call _spl0 - - ALIGN_TEXT -idle_loop: - cli - call _procrunnable - testl %eax,%eax - CROSSJUMP(jnz, sw1a, jz) - call _vm_page_zero_idle - testl %eax, %eax - jnz idle_loop - call *_hlt_vector /* wait for interrupt */ - jmp idle_loop - -#endif /* SMP */ - -CROSSJUMPTARGET(_idle) - ENTRY(default_halt) sti #ifndef SMP @@ -264,16 +81,23 @@ ENTRY(default_halt) ret /* + * cpu_throw() + */ +ENTRY(cpu_throw) + jmp sw1 + +/* * cpu_switch() */ ENTRY(cpu_switch) /* switch to new process. first, save context as needed */ movl _curproc,%ecx + movl %ecx,_prevproc /* if no process to save, don't bother */ testl %ecx,%ecx - je sw1 + jz sw1 #ifdef SMP movb P_ONCPU(%ecx), %al /* save "last" cpu */ @@ -299,7 +123,7 @@ ENTRY(cpu_switch) movl %edi,PCB_EDI(%edx) movl %gs,PCB_GS(%edx) - /* test if debug regisers should be saved */ + /* test if debug registers should be saved */ movb PCB_FLAGS(%edx),%al andb $PCB_DBREGS,%al jz 1f /* no, skip over */ @@ -319,15 +143,12 @@ ENTRY(cpu_switch) movl %eax,PCB_DR0(%edx) 1: + /* save sched_lock recursion count */ + movl _sched_lock+MTX_RECURSE,%eax + movl %eax,PCB_SCHEDNEST(%edx) + #ifdef SMP - movl _mp_lock, %eax /* XXX FIXME: we should be saving the local APIC TPR */ -#ifdef DIAGNOSTIC - cmpl $FREE_LOCK, %eax /* is it free? */ - je badsw4 /* yes, bad medicine! */ -#endif /* DIAGNOSTIC */ - andl $COUNT_FIELD, %eax /* clear CPU portion */ - movl %eax, PCB_MPNEST(%edx) /* store it */ #endif /* SMP */ #if NNPX > 0 @@ -341,25 +162,33 @@ ENTRY(cpu_switch) 1: #endif /* NNPX > 0 */ - movl $0,_curproc /* out of process */ - - /* save is done, now choose a new process or idle */ + /* save is done, now choose a new process */ sw1: - cli #ifdef SMP /* Stop scheduling if smp_active goes zero and we are not BSP */ cmpl $0,_smp_active jne 1f cmpl $0,_cpuid - CROSSJUMP(je, _idle, jne) /* wind down */ + je 1f + + movl _idleproc, %eax + jmp sw1b 1: #endif + /* + * Choose a new process to schedule. chooseproc() returns idleproc + * if it cannot find another process to run. + */ sw1a: call _chooseproc /* trash ecx, edx, ret eax*/ - testl %eax,%eax - CROSSJUMP(je, _idle, jne) /* if no proc, idle */ + +#ifdef DIAGNOSTIC + testl %eax,%eax /* no process? */ + jz badsw3 /* no, panic */ +#endif +sw1b: movl %eax,%ecx xorl %eax,%eax @@ -456,9 +285,6 @@ sw1a: movl %ecx, _curproc /* into next process */ #ifdef SMP - movl _cpu_lockid, %eax - orl PCB_MPNEST(%edx), %eax /* add next count from PROC */ - movl %eax, _mp_lock /* load the mp_lock */ /* XXX FIXME: we should be restoring the local APIC TPR */ #endif /* SMP */ @@ -500,7 +326,22 @@ cpu_switch_load_gs: movl %eax,%dr7 1: - sti + /* + * restore sched_lock recursion count and transfer ownership to + * new process + */ + movl PCB_SCHEDNEST(%edx),%eax + movl %eax,_sched_lock+MTX_RECURSE + + movl _curproc,%eax + movl %eax,_sched_lock+MTX_LOCK + +#ifdef DIAGNOSTIC + pushfl + popl %ecx + testl $0x200, %ecx /* interrupts enabled? */ + jnz badsw6 /* that way madness lies */ +#endif ret CROSSJUMPTARGET(sw1a) @@ -517,15 +358,27 @@ badsw2: call _panic sw0_2: .asciz "cpu_switch: not SRUN" + +badsw3: + pushl $sw0_3 + call _panic + +sw0_3: .asciz "cpu_switch: chooseproc returned NULL" + #endif -#if defined(SMP) && defined(DIAGNOSTIC) -badsw4: - pushl $sw0_4 +#ifdef DIAGNOSTIC +badsw5: + pushl $sw0_5 + call _panic + +sw0_5: .asciz "cpu_switch: interrupts enabled (again)" +badsw6: + pushl $sw0_6 call _panic -sw0_4: .asciz "cpu_switch: do not have lock" -#endif /* SMP && DIAGNOSTIC */ +sw0_6: .asciz "cpu_switch: interrupts enabled" +#endif /* * savectx(pcb) diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 51de1ac..f32dfae 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -49,10 +49,12 @@ #include "opt_trap.h" #include +#include #include #include #include #include +#include #include #include #include @@ -76,12 +78,14 @@ #include #include #include +#include #include #ifdef SMP #include #endif #include +#include #include #ifdef POWERFAIL_NMI @@ -96,11 +100,14 @@ #include "isa.h" #include "npx.h" +#include + int (*pmath_emulate) __P((struct trapframe *)); extern void trap __P((struct trapframe frame)); extern int trapwrite __P((unsigned addr)); extern void syscall2 __P((struct trapframe frame)); +extern void ast __P((struct trapframe frame)); static int trap_pfault __P((struct trapframe *, int, vm_offset_t)); static void trap_fatal __P((struct trapframe *, vm_offset_t)); @@ -142,7 +149,7 @@ static char *trap_msg[] = { }; static __inline int userret __P((struct proc *p, struct trapframe *frame, - u_quad_t oticks, int have_mplock)); + u_quad_t oticks, int have_giant)); #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; @@ -158,18 +165,18 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, &panic_on_nmi, 0, "Panic on NMI"); static __inline int -userret(p, frame, oticks, have_mplock) +userret(p, frame, oticks, have_giant) struct proc *p; struct trapframe *frame; u_quad_t oticks; - int have_mplock; + int have_giant; { int sig, s; while ((sig = CURSIG(p)) != 0) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } postsig(sig); } @@ -184,31 +191,34 @@ userret(p, frame, oticks, have_mplock) * mi_switch()'ed, we might not be on the queue indicated by * our priority. */ - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; - } s = splhigh(); + mtx_enter(&sched_lock, MTX_SPIN); setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; mi_switch(); + mtx_exit(&sched_lock, MTX_SPIN); splx(s); - while ((sig = CURSIG(p)) != 0) + while ((sig = CURSIG(p)) != 0) { + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; + } postsig(sig); + } } /* * Charge system time if profiling. */ if (p->p_flag & P_PROFIL) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } addupc_task(p, frame->tf_eip, (u_int)(p->p_sticks - oticks) * psratio); } curpriority = p->p_priority; - return(have_mplock); + return(have_giant); } /* @@ -226,13 +236,20 @@ trap(frame) u_quad_t sticks = 0; int i = 0, ucode = 0, type, code; vm_offset_t eva; +#ifdef POWERFAIL_NMI + static int lastalert = 0; +#endif - if (!(frame.tf_eflags & PSL_I)) { + atomic_add_int(&cnt.v_trap, 1); + + if ((frame.tf_eflags & PSL_I) == 0) { /* - * Buggy application or kernel code has disabled interrupts - * and then trapped. Enabling interrupts now is wrong, but - * it is better than running with interrupts disabled until - * they are accidentally enabled later. + * Buggy application or kernel code has disabled + * interrupts and then trapped. Enabling interrupts + * now is wrong, but it is better than running with + * interrupts disabled until they are accidentally + * enabled later. XXX Consider whether is this still + * correct. */ type = frame.tf_trapno; if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM)) @@ -252,54 +269,27 @@ trap(frame) eva = 0; if (frame.tf_trapno == T_PAGEFLT) { /* - * For some Cyrix CPUs, %cr2 is clobbered by interrupts. - * This problem is worked around by using an interrupt - * gate for the pagefault handler. We are finally ready - * to read %cr2 and then must reenable interrupts. - * - * XXX this should be in the switch statement, but the - * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the - * flow of control too much for this to be obviously - * correct. + * For some Cyrix CPUs, %cr2 is clobbered by + * interrupts. This problem is worked around by using + * an interrupt gate for the pagefault handler. We + * are finally ready to read %cr2 and then must + * reenable interrupts. */ eva = rcr2(); enable_intr(); - } + } + + mtx_enter(&Giant, MTX_DEF); #if defined(I586_CPU) && !defined(NO_F00F_HACK) restart: #endif + type = frame.tf_trapno; code = frame.tf_err; - if (in_vm86call) { - if (frame.tf_eflags & PSL_VM && - (type == T_PROTFLT || type == T_STKFLT)) { - i = vm86_emulate((struct vm86frame *)&frame); - if (i != 0) - /* - * returns to original process - */ - vm86_trap((struct vm86frame *)&frame); - return; - } - switch (type) { - /* - * these traps want either a process context, or - * assume a normal userspace trap. - */ - case T_PROTFLT: - case T_SEGNPFLT: - trap_fatal(&frame, eva); - return; - case T_TRCTRAP: - type = T_BPTFLT; /* kernel breakpoint */ - /* FALL THROUGH */ - } - goto kernel_trap; /* normal kernel trap handling */ - } - - if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) { + if ((ISPL(frame.tf_cs) == SEL_UPL) || + ((frame.tf_eflags & PSL_VM) && !in_vm86call)) { /* user trap */ sticks = p->p_sticks; @@ -322,16 +312,6 @@ restart: i = SIGFPE; break; - case T_ASTFLT: /* Allow process switch */ - astoff(); - cnt.v_soft++; - if (p->p_flag & P_OWEUPC) { - p->p_flag &= ~P_OWEUPC; - addupc_task(p, p->p_stats->p_prof.pr_addr, - p->p_stats->p_prof.pr_ticks); - } - goto out; - /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle @@ -342,7 +322,7 @@ restart: if (frame.tf_eflags & PSL_VM) { i = vm86_emulate((struct vm86frame *)&frame); if (i == 0) - goto out; + goto user; break; } /* FALL THROUGH */ @@ -357,14 +337,20 @@ restart: case T_PAGEFLT: /* page fault */ i = trap_pfault(&frame, TRUE, eva); - if (i == -1) - return; #if defined(I586_CPU) && !defined(NO_F00F_HACK) - if (i == -2) + if (i == -2) { + /* + * f00f hack workaround has triggered, treat + * as illegal instruction not page fault. + */ + frame.tf_trapno = T_PRIVINFLT; goto restart; + } #endif - if (i == 0) + if (i == -1) goto out; + if (i == 0) + goto user; ucode = T_PAGEFLT; break; @@ -377,7 +363,15 @@ restart: #if NISA > 0 case T_NMI: #ifdef POWERFAIL_NMI - goto handle_powerfail; +#ifndef TIMER_FREQ +# define TIMER_FREQ 1193182 +#endif + if (time_second - lastalert > 10) { + log(LOG_WARNING, "NMI: power fail\n"); + sysbeep(TIMER_FREQ/880, hz); + lastalert = time_second; + } + goto out; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { @@ -391,7 +385,7 @@ restart: kdb_trap (type, 0, &frame); } #endif /* DDB */ - return; + goto out; } else if (panic_on_nmi) panic("NMI indicates hardware failure"); break; @@ -410,9 +404,9 @@ restart: case T_DNA: #if NNPX > 0 - /* if a transparent fault (due to context switch "late") */ + /* transparent fault (due to context switch "late") */ if (npxdna()) - return; + goto out; #endif if (!pmath_emulate) { i = SIGFPE; @@ -422,7 +416,7 @@ restart: i = (*pmath_emulate)(&frame); if (i == 0) { if (!(frame.tf_eflags & PSL_T)) - return; + goto out; frame.tf_eflags &= ~PSL_T; i = SIGTRAP; } @@ -435,13 +429,12 @@ restart: break; } } else { -kernel_trap: /* kernel trap */ switch (type) { case T_PAGEFLT: /* page fault */ (void) trap_pfault(&frame, FALSE, eva); - return; + goto out; case T_DNA: #if NNPX > 0 @@ -451,31 +444,35 @@ kernel_trap: * registered such use. */ if (npxdna()) - return; + goto out; #endif break; - case T_PROTFLT: /* general protection fault */ - case T_SEGNPFLT: /* segment not present fault */ /* - * Invalid segment selectors and out of bounds - * %eip's and %esp's can be set up in user mode. - * This causes a fault in kernel mode when the - * kernel tries to return to user mode. We want - * to get this fault so that we can fix the - * problem here and not have to check all the - * selectors and pointers when the user changes - * them. + * The following two traps can happen in + * vm86 mode, and, if so, we want to handle + * them specially. */ -#define MAYBE_DORETI_FAULT(where, whereto) \ - do { \ - if (frame.tf_eip == (int)where) { \ - frame.tf_eip = (int)whereto; \ - return; \ - } \ - } while (0) - - if (intr_nesting_level == 0) { + case T_PROTFLT: /* general protection fault */ + case T_STKFLT: /* stack fault */ + if (frame.tf_eflags & PSL_VM) { + i = vm86_emulate((struct vm86frame *)&frame); + if (i != 0) + /* + * returns to original process + */ + vm86_trap((struct vm86frame *)&frame); + goto out; + } + /* FALL THROUGH */ + + case T_SEGNPFLT: /* segment not present fault */ + if (in_vm86call) + break; + + if (intr_nesting_level != 0) + break; + /* * Invalid %fs's and %gs's can be created using * procfs or PT_SETREGS or by invalidating the @@ -488,20 +485,38 @@ kernel_trap: if (frame.tf_eip == (int)cpu_switch_load_gs) { curpcb->pcb_gs = 0; psignal(p, SIGBUS); - return; + goto out; + } + + /* + * Invalid segment selectors and out of bounds + * %eip's and %esp's can be set up in user mode. + * This causes a fault in kernel mode when the + * kernel tries to return to user mode. We want + * to get this fault so that we can fix the + * problem here and not have to check all the + * selectors and pointers when the user changes + * them. + */ + if (frame.tf_eip == (int)doreti_iret) { + frame.tf_eip = (int)doreti_iret_fault; + goto out; + } + if (frame.tf_eip == (int)doreti_popl_ds) { + frame.tf_eip = (int)doreti_popl_ds_fault; + goto out; + } + if (frame.tf_eip == (int)doreti_popl_es) { + frame.tf_eip = (int)doreti_popl_es_fault; + goto out; } - MAYBE_DORETI_FAULT(doreti_iret, - doreti_iret_fault); - MAYBE_DORETI_FAULT(doreti_popl_ds, - doreti_popl_ds_fault); - MAYBE_DORETI_FAULT(doreti_popl_es, - doreti_popl_es_fault); - MAYBE_DORETI_FAULT(doreti_popl_fs, - doreti_popl_fs_fault); + if (frame.tf_eip == (int)doreti_popl_fs) { + frame.tf_eip = (int)doreti_popl_fs_fault; + goto out; + } if (curpcb && curpcb->pcb_onfault) { frame.tf_eip = (int)curpcb->pcb_onfault; - return; - } + goto out; } break; @@ -517,7 +532,7 @@ kernel_trap: */ if (frame.tf_eflags & PSL_NT) { frame.tf_eflags &= ~PSL_NT; - return; + goto out; } break; @@ -529,7 +544,7 @@ kernel_trap: * silently until the syscall handler has * saved the flags. */ - return; + goto out; } if (frame.tf_eip == (int)IDTVEC(syscall) + 1) { /* @@ -537,7 +552,7 @@ kernel_trap: * flags. Stop single stepping it. */ frame.tf_eflags &= ~PSL_T; - return; + goto out; } /* * Ignore debug register trace traps due to @@ -549,13 +564,13 @@ kernel_trap: * in kernel space because that is useful when * debugging the kernel. */ - if (user_dbreg_trap()) { + if (user_dbreg_trap() && !in_vm86call) { /* * Reset breakpoint bits because the * processor doesn't */ load_dr6(rdr6() & 0xfffffff0); - return; + goto out; } /* * Fall through (TRCTRAP kernel mode, kernel address) @@ -567,28 +582,19 @@ kernel_trap: */ #ifdef DDB if (kdb_trap (type, 0, &frame)) - return; + goto out; #endif break; #if NISA > 0 case T_NMI: #ifdef POWERFAIL_NMI -#ifndef TIMER_FREQ -# define TIMER_FREQ 1193182 -#endif - handle_powerfail: - { - static unsigned lastalert = 0; - - if(time_second - lastalert > 10) - { + if (time_second - lastalert > 10) { log(LOG_WARNING, "NMI: power fail\n"); sysbeep(TIMER_FREQ/880, hz); lastalert = time_second; - } - return; } + goto out; #else /* !POWERFAIL_NMI */ /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(code) == 0) { @@ -602,16 +608,16 @@ kernel_trap: kdb_trap (type, 0, &frame); } #endif /* DDB */ - return; + goto out; } else if (panic_on_nmi == 0) - return; + goto out; /* FALL THROUGH */ #endif /* POWERFAIL_NMI */ #endif /* NISA > 0 */ } trap_fatal(&frame, eva); - return; + goto out; } /* Translate fault for emulators (e.g. Linux) */ @@ -630,8 +636,10 @@ kernel_trap: } #endif -out: +user: userret(p, &frame, sticks, 1); +out: + mtx_exit(&Giant, MTX_DEF); } #ifdef notyet @@ -769,10 +777,8 @@ trap_pfault(frame, usermode, eva) * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) - if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) { - frame->tf_trapno = T_PRIVINFLT; + if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) return -2; - } #endif if (usermode) goto nogo; @@ -869,8 +875,7 @@ trap_fatal(frame, eva) frame->tf_eflags & PSL_VM ? "vm86" : ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP - /* three seperate prints in case of a trap on an unmapped page */ - printf("mp_lock = %08x; ", mp_lock); + /* two seperate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", cpuid); printf("lapic.id = %08x\n", lapic.id); #endif @@ -917,26 +922,6 @@ trap_fatal(frame, eva) } else { printf("Idle\n"); } - printf("interrupt mask = "); - if ((cpl & net_imask) == net_imask) - printf("net "); - if ((cpl & tty_imask) == tty_imask) - printf("tty "); - if ((cpl & bio_imask) == bio_imask) - printf("bio "); - if ((cpl & cam_imask) == cam_imask) - printf("cam "); - if (cpl == 0) - printf("none"); -#ifdef SMP -/** - * XXX FIXME: - * we probably SHOULD have stopped the other CPUs before now! - * another CPU COULD have been touching cpl at this moment... - */ - printf(" <- SMP: XXX"); -#endif - printf("\n"); #ifdef KDB if (kdb_trap(&psl)) @@ -973,8 +958,7 @@ dblfault_handler() printf("esp = 0x%x\n", common_tss.tss_esp); printf("ebp = 0x%x\n", common_tss.tss_ebp); #ifdef SMP - /* three seperate prints in case of a trap on an unmapped page */ - printf("mp_lock = %08x; ", mp_lock); + /* two seperate prints in case of a trap on an unmapped page */ printf("cpuid = %d; ", cpuid); printf("lapic.id = %08x\n", lapic.id); #endif @@ -1048,12 +1032,14 @@ syscall2(frame) int error; int narg; int args[8]; - int have_mplock = 0; + int have_giant = 0; u_int code; + atomic_add_int(&cnt.v_syscall, 1); + #ifdef DIAGNOSTIC if (ISPL(frame.tf_cs) != SEL_UPL) { - get_mplock(); + mtx_enter(&Giant, MTX_DEF); panic("syscall"); /* NOT REACHED */ } @@ -1075,9 +1061,9 @@ syscall2(frame) /* * The prep code is not MP aware. */ - get_mplock(); + mtx_enter(&Giant, MTX_DEF); (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms); - rel_mplock(); + mtx_exit(&Giant, MTX_DEF); } else { /* * Need to check if this is a 32 bit or 64 bit syscall. @@ -1114,8 +1100,8 @@ syscall2(frame) */ if (params && (i = narg * sizeof(int)) && (error = copyin(params, (caddr_t)args, (u_int)i))) { - get_mplock(); - have_mplock = 1; + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) ktrsyscall(p->p_tracep, code, narg, args); @@ -1129,15 +1115,15 @@ syscall2(frame) * we are ktracing */ if ((callp->sy_narg & SYF_MPSAFE) == 0) { - get_mplock(); - have_mplock = 1; + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } #ifdef KTRACE if (KTRPOINT(p, KTR_SYSCALL)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } ktrsyscall(p->p_tracep, code, narg, args); } @@ -1192,9 +1178,9 @@ bad: * Traced syscall. trapsignal() is not MP aware. */ if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } frame.tf_eflags &= ~PSL_T; trapsignal(p, SIGTRAP, 0); @@ -1203,13 +1189,13 @@ bad: /* * Handle reschedule and other end-of-syscall issues */ - have_mplock = userret(p, &frame, sticks, have_mplock); + have_giant = userret(p, &frame, sticks, have_giant); #ifdef KTRACE if (KTRPOINT(p, KTR_SYSRET)) { - if (have_mplock == 0) { - get_mplock(); - have_mplock = 1; + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; } ktrsysret(p->p_tracep, code, error, p->p_retval[0]); } @@ -1225,27 +1211,66 @@ bad: /* * Release the MP lock if we had to get it */ - if (have_mplock) - rel_mplock(); + if (have_giant) + mtx_exit(&Giant, MTX_DEF); + + mtx_assert(&sched_lock, MA_NOTOWNED); + mtx_assert(&Giant, MA_NOTOWNED); +} + +void +ast(frame) + struct trapframe frame; +{ + struct proc *p = CURPROC; + u_quad_t sticks; + + /* + * handle atomicy by looping since interrupts are enabled and the + * MP lock is not held. + */ + sticks = ((volatile struct proc *)p)->p_sticks; + while (sticks != ((volatile struct proc *)p)->p_sticks) + sticks = ((volatile struct proc *)p)->p_sticks; + + astoff(); + atomic_add_int(&cnt.v_soft, 1); + if (p->p_flag & P_OWEUPC) { + mtx_enter(&Giant, MTX_DEF); + p->p_flag &= ~P_OWEUPC; + addupc_task(p, p->p_stats->p_prof.pr_addr, + p->p_stats->p_prof.pr_ticks); +} + if (userret(p, &frame, sticks, mtx_owned(&Giant)) != 0) + mtx_exit(&Giant, MTX_DEF); } /* * Simplified back end of syscall(), used when returning from fork() - * directly into user mode. MP lock is held on entry and should be - * held on return. + * directly into user mode. Giant is not held on entry, and must not + * be held on return. */ void fork_return(p, frame) struct proc *p; struct trapframe frame; { + int have_giant; + frame.tf_eax = 0; /* Child returns zero */ frame.tf_eflags &= ~PSL_C; /* success */ frame.tf_edx = 1; - userret(p, &frame, 0, 1); + have_giant = userret(p, &frame, 0, mtx_owned(&Giant)); #ifdef KTRACE - if (KTRPOINT(p, KTR_SYSRET)) + if (KTRPOINT(p, KTR_SYSRET)) { + if (have_giant == 0) { + mtx_enter(&Giant, MTX_DEF); + have_giant = 1; + } ktrsysret(p->p_tracep, SYS_fork, 0, 0); + } #endif + if (have_giant) + mtx_exit(&Giant, MTX_DEF); } diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c index 15044ab..724f3c2 100644 --- a/sys/amd64/amd64/tsc.c +++ b/sys/amd64/amd64/tsc.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -93,10 +94,6 @@ #include #endif -#ifdef SMP -#define disable_intr() CLOCK_DISABLE_INTR() -#define enable_intr() CLOCK_ENABLE_INTR() - #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ @@ -104,7 +101,6 @@ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif -#endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we @@ -147,7 +143,9 @@ int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; +#if 0 static u_int clk_imask = HWI_MASK | SWI_MASK; +#endif static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; @@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, static void clkintr(struct clockframe frame) { + int intrsave; + if (timecounter->tc_get_timecount == i8254_get_timecount) { + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); if (i8254_ticked) i8254_ticked = 0; else { @@ -214,7 +216,8 @@ clkintr(struct clockframe frame) i8254_lastcount = 0; } clkintr_pending = 0; - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); } timer_func(&frame); switch (timer0_state) { @@ -233,14 +236,17 @@ clkintr(struct clockframe frame) break; case ACQUIRE_PENDING: + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); @@ -249,7 +255,9 @@ clkintr(struct clockframe frame) case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; @@ -257,7 +265,8 @@ clkintr(struct clockframe frame) TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; @@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc) static int getit(void) { - u_long ef; - int high, low; + int high, low, intrsave; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -417,7 +426,7 @@ getit(void) high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); return ((high << 8) | low); } @@ -523,6 +532,7 @@ sysbeepstop(void *chan) int sysbeep(int pitch, int period) { + int intrsave; int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) @@ -531,10 +541,13 @@ sysbeep(int pitch, int period) splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); @@ -683,11 +696,12 @@ fail: static void set_timer_freq(u_int freq, int intr_freq) { - u_long ef; + int intrsave; int new_timer0_max_count; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { @@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq) outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); } /* @@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq) void i8254_restore(void) { - u_long ef; + int intrsave; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); } /* @@ -979,8 +994,8 @@ cpu_initclocks() { int diag; #ifdef APIC_IO - int apic_8254_trial; - struct intrec *clkdesc; + int apic_8254_trial, num_8254_ticks; + struct intrec *clkdesc, *rtcdesc; #endif /* APIC_IO */ if (statclock_disable) { @@ -1014,14 +1029,15 @@ cpu_initclocks() } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } - - clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, - NULL, &clk_imask, INTR_EXCL); - INTREN(1 << apic_8254_intr); - #else /* APIC_IO */ - inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, + /* + * XXX Check the priority of this interrupt handler. I + * couldn't find anything suitable in the BSD/OS code (grog, + * 19 July 2000). + */ + /* Setup the PIC clk handler. The APIC handler is setup later */ + inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_EXCL); INTREN(IRQ0); @@ -1032,8 +1048,18 @@ cpu_initclocks() writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ - if (statclock_disable) + if (statclock_disable) { +#ifdef APIC_IO + /* + * XXX - if statclock is disabled, don't attempt the APIC + * trial. Not sure this is sane for APIC_IO. + */ + inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, + PI_REALTIME, INTR_EXCL); + INTREN(1 << apic_8254_intr); +#endif /* APIC_IO */ return; + } diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); @@ -1041,34 +1067,44 @@ cpu_initclocks() #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); -#endif /* APIC_IO */ - inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, - INTR_EXCL); - -#ifdef APIC_IO - INTREN(APIC_IRQ8); -#else - INTREN(IRQ8); -#endif /* APIC_IO */ + if (apic_8254_trial) { + /* + * XXX - We use fast interrupts for clk and rtc long enough to + * perform the APIC probe and then revert to exclusive + * interrupts. + */ + clkdesc = inthand_add("clk", apic_8254_intr, + (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST); + INTREN(1 << apic_8254_intr); - writertc(RTC_STATUSB, rtc_statusb); + rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, + PI_REALTIME, INTR_FAST); /* XXX */ + INTREN(APIC_IRQ8); + writertc(RTC_STATUSB, rtc_statusb); -#ifdef APIC_IO - if (apic_8254_trial) { - printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ - if (read_intr_count(apic_8254_intr) < 3) { + num_8254_ticks = read_intr_count(apic_8254_intr); + + /* disable and remove our fake handlers */ + INTRDIS(1 << apic_8254_intr); + inthand_remove(clkdesc); + + writertc(RTC_STATUSA, rtc_statusa); + writertc(RTC_STATUSB, RTCSB_24HR); + + INTRDIS(APIC_IRQ8); + inthand_remove(rtcdesc); + + if (num_8254_ticks < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ - INTRDIS(1 << apic_8254_intr); - inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", @@ -1087,13 +1123,27 @@ cpu_initclocks() } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); - inthand_add("clk", apic_8254_intr, - (inthand2_t *)clkintr, - NULL, &clk_imask, INTR_EXCL); - INTREN(1 << apic_8254_intr); } } + + /* Finally, setup the real clock handlers */ + inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, + PI_REALTIME, INTR_EXCL); + INTREN(1 << apic_8254_intr); +#endif + + inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME, + INTR_EXCL); +#ifdef APIC_IO + INTREN(APIC_IRQ8); +#else + INTREN(IRQ8); +#endif + + writertc(RTC_STATUSB, rtc_statusb); + +#ifdef APIC_IO if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) @@ -1198,11 +1248,12 @@ static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; - u_long ef; + int intrsave; u_int high, low; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc) count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || - ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && + ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ @@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc) i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); return (count); } diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index cfb6cee..831ab3b 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -57,12 +57,14 @@ #include #include #include +#include #include #include #include #include #include +#include #ifdef SMP #include #endif @@ -177,9 +179,8 @@ cpu_fork(p1, p2, flags) * pcb2->pcb_onfault: cloned above (always NULL here?). */ -#ifdef SMP - pcb2->pcb_mpnest = 1; -#endif + pcb2->pcb_schednest = 0; + /* * XXX don't copy the i/o pages. this should probably be fixed. */ @@ -256,8 +257,11 @@ cpu_exit(p) reset_dbregs(); pcb->pcb_flags &= ~PCB_DBREGS; } + mtx_enter(&sched_lock, MTX_SPIN); + mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH); + mtx_assert(&Giant, MA_NOTOWNED); cnt.v_swtch++; - cpu_switch(p); + cpu_switch(); panic("cpu_exit"); } @@ -406,17 +410,10 @@ vunmapbuf(bp) static void cpu_reset_proxy() { - u_int saved_mp_lock; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) - ; /* Wait for other cpu to disable interupts */ - saved_mp_lock = mp_lock; - mp_lock = 1; - printf("cpu_reset_proxy: Grabbed mp lock for BSP\n"); - cpu_reset_proxy_active = 3; - while (cpu_reset_proxy_active == 3) - ; /* Wait for other cpu to enable interrupts */ + ; /* Wait for other cpu to see that we've started */ stop_cpus((1<= ZIDLE_HI(cnt.v_free_count)) return(0); -#ifdef SMP - if (try_mplock()) { -#endif + if (mtx_try_enter(&Giant, MTX_DEF)) { s = splvm(); - __asm __volatile("sti" : : : "memory"); + intrsave = save_intr(); + enable_intr(); zero_state = 0; m = vm_page_list_find(PQ_FREE, free_rover, FALSE); if (m != NULL && (m->flags & PG_ZERO) == 0) { @@ -595,14 +584,10 @@ vm_page_zero_idle() } free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK; splx(s); - __asm __volatile("cli" : : : "memory"); -#ifdef SMP - rel_mplock(); -#endif + restore_intr(intrsave); + mtx_exit(&Giant, MTX_DEF); return (1); -#ifdef SMP } -#endif /* * We have to enable interrupts for a moment if the try_mplock fails * in order to potentially take an IPI. XXX this should be in diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h index ffabf7f..18822b8 100644 --- a/sys/amd64/include/cpu.h +++ b/sys/amd64/include/cpu.h @@ -46,6 +46,7 @@ #include #include #include +#include /* * definitions of cpu-dependent requirements @@ -86,7 +87,9 @@ * added, we will have an atomicy problem. The type of atomicy we need is * a non-locked orl. */ -#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0) +#define need_resched() do { \ + PCPU_SET(astpending, AST_RESCHED|AST_PENDING); \ +} while (0) #define resched_wanted() (astpending & AST_RESCHED) /* @@ -109,8 +112,9 @@ * it off (asynchronous need_resched() conflicts are not critical). */ #define signotify(p) aston() - -#define aston() do { astpending |= AST_PENDING; } while (0) +#define aston() do { \ + PCPU_SET(astpending, astpending | AST_PENDING); \ +} while (0) #define astoff() /* @@ -135,7 +139,9 @@ #ifdef _KERNEL extern char btext[]; extern char etext[]; +#ifndef intr_nesting_level extern u_char intr_nesting_level; +#endif void fork_trampoline __P((void)); void fork_return __P((struct proc *, struct trapframe)); diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 9a4052f..39868df 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -86,20 +86,29 @@ static __inline void disable_intr(void) { __asm __volatile("cli" : : : "memory"); -#ifdef SMP - MPINTR_LOCK(); -#endif } static __inline void enable_intr(void) { -#ifdef SMP - MPINTR_UNLOCK(); -#endif __asm __volatile("sti"); } +static __inline u_int +save_intr(void) +{ + u_int ef; + + __asm __volatile("pushfl; popl %0" : "=r" (ef)); + return (ef); +} + +static __inline void +restore_intr(u_int ef) +{ + __asm __volatile("pushl %0; popfl" : : "r" (ef) : "memory" ); +} + #define HAVE_INLINE_FFS static __inline int diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h index 61c5ecf..95b5759 100644 --- a/sys/amd64/include/mptable.h +++ b/sys/amd64/include/mptable.h @@ -36,6 +36,7 @@ #endif #include +#include #include #include #include @@ -65,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY { #define MP_ANNOUNCE_POST 0x19 +/* used to hold the AP's until we are ready to release them */ +struct simplelock ap_boot_lock; /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ int current_postcode; @@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr); static void install_ap_tramp(u_int boot_addr); static int start_ap(int logicalCpu, u_int boot_addr); static int apic_int_is_bus_type(int intr, int bus_type); +static void release_aps(void *dummy); /* * Calculate usable address in base memory for AP trampoline code. @@ -403,7 +408,7 @@ found: /* - * Startup the SMP processors. + * Initialize the SMP hardware and the APIC and start up the AP's. */ void mp_start(void) @@ -619,6 +624,9 @@ mp_enable(u_int boot_addr) /* initialize all SMP locks */ init_locks(); + /* obtain the ap_boot_lock */ + s_lock(&ap_boot_lock); + /* start each Application Processor */ start_all_aps(boot_addr); } @@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock; /* critical region around INTR() routines */ struct simplelock intr_lock; -/* lock regions protected in UP kernel via cli/sti */ -struct simplelock mpintr_lock; - /* lock region used by kernel profiling */ struct simplelock mcount_lock; @@ -1885,26 +1890,16 @@ struct simplelock clock_lock; /* lock around the MP rendezvous */ static struct simplelock smp_rv_lock; +/* only 1 CPU can panic at a time :) */ +struct simplelock panic_lock; + static void init_locks(void) { - /* - * Get the initial mp_lock with a count of 1 for the BSP. - * This uses a LOGICAL cpu ID, ie BSP == 0. - */ - mp_lock = 0x00000001; - -#if 0 - /* ISR uses its own "giant lock" */ - isr_lock = FREE_LOCK; -#endif - #if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ) s_lock_init((struct simplelock*)&apic_itrace_debuglock); #endif - s_lock_init((struct simplelock*)&mpintr_lock); - s_lock_init((struct simplelock*)&mcount_lock); s_lock_init((struct simplelock*)&fast_intr_lock); @@ -1912,6 +1907,7 @@ init_locks(void) s_lock_init((struct simplelock*)&imen_lock); s_lock_init((struct simplelock*)&cpl_lock); s_lock_init(&smp_rv_lock); + s_lock_init(&panic_lock); #ifdef USE_COMLOCK s_lock_init((struct simplelock*)&com_lock); @@ -1919,11 +1915,9 @@ init_locks(void) #ifdef USE_CLOCKLOCK s_lock_init((struct simplelock*)&clock_lock); #endif /* USE_CLOCKLOCK */ -} - -/* Wait for all APs to be fully initialized */ -extern int wait_ap(unsigned int); + s_lock_init(&ap_boot_lock); +} /* * start each AP in our list @@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr) SMPpt[pg + 4] = 0; /* *prv_PMAP1 */ /* prime data page for it to use */ + SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu); gd->gd_cpuid = x; gd->gd_cpu_lockid = x << 24; gd->gd_prv_CMAP1 = &SMPpt[pg + 1]; @@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr) return 0; /* return FAILURE */ } - /* * Flush the TLB on all other CPU's * @@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW, void ap_init(void); void -ap_init() +ap_init(void) { u_int apic_id; + /* lock against other AP's that are waking up */ + s_lock(&ap_boot_lock); + /* BSP may have changed PTD while we're waiting for the lock */ cpu_invltlb(); @@ -2397,6 +2394,30 @@ ap_init() smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */ smp_active = 1; /* historic */ } + + /* let other AP's wake up now */ + s_unlock(&ap_boot_lock); + + /* wait until all the AP's are up */ + while (smp_started == 0) + ; /* nothing */ + + /* + * Set curproc to our per-cpu idleproc so that mutexes have + * something unique to lock with. + */ + PCPU_SET(curproc,idleproc); + PCPU_SET(prevproc,idleproc); + + microuptime(&switchtime); + switchticks = ticks; + + /* ok, now grab sched_lock and enter the scheduler */ + enable_intr(); + mtx_enter(&sched_lock, MTX_SPIN); + cpu_throw(); /* doesn't return */ + + panic("scheduler returned us to ap_init"); } #ifdef BETTER_CLOCK @@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap) p = checkstate_curproc[id]; cpustate = checkstate_cpustate[id]; + /* XXX */ + if (p->p_ithd) + cpustate = CHECKSTATE_INTR; + else if (p == idleproc) + cpustate = CHECKSTATE_SYS; + switch (cpustate) { case CHECKSTATE_USER: if (p->p_flag & P_PROFIL) @@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap) if (pscnt > 1) return; - if (!p) + if (p == idleproc) { + p->p_sticks++; cp_time[CP_IDLE]++; - else { + } else { p->p_sticks++; cp_time[CP_SYS]++; } @@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap) p->p_iticks++; cp_time[CP_INTR]++; } - if (p != NULL) { + if (p != idleproc) { schedclock(p); /* Update resource usage integrals and maximums. */ @@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *), /* release lock */ s_unlock(&smp_rv_lock); } + +void +release_aps(void *dummy __unused) +{ + s_unlock(&ap_boot_lock); +} + +SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); diff --git a/sys/amd64/include/mutex.h b/sys/amd64/include/mutex.h new file mode 100644 index 0000000..ef0c963 --- /dev/null +++ b/sys/amd64/include/mutex.h @@ -0,0 +1,786 @@ +/*- + * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Berkeley Software Design Inc's name may not be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $ + * $FreeBSD$ + */ + +#ifndef _MACHINE_MUTEX_H_ +#define _MACHINE_MUTEX_H_ + +#ifndef LOCORE + +#include +#include +#include +#include +#include + +/* + * If kern_mutex.c is being built, compile non-inlined versions of various + * functions so that kernel modules can use them. + */ +#ifndef _KERN_MUTEX_C_ +#define _MTX_INLINE static __inline +#else +#define _MTX_INLINE +#endif + +/* + * Mutex flags + * + * Types + */ +#define MTX_DEF 0x0 /* Default (spin/sleep) */ +#define MTX_SPIN 0x1 /* Spin only lock */ + +/* Options */ +#define MTX_RLIKELY 0x4 /* (opt) Recursion likely */ +#define MTX_NORECURSE 0x8 /* No recursion possible */ +#define MTX_NOSPIN 0x10 /* Don't spin before sleeping */ +#define MTX_NOSWITCH 0x20 /* Do not switch on release */ +#define MTX_FIRST 0x40 /* First spin lock holder */ +#define MTX_TOPHALF 0x80 /* Interrupts not disabled on spin */ + +/* options that should be passed on to mtx_enter_hard, mtx_exit_hard */ +#define MTX_HARDOPTS (MTX_SPIN | MTX_FIRST | MTX_TOPHALF | MTX_NOSWITCH) + +/* Flags/value used in mtx_lock */ +#define MTX_RECURSE 0x01 /* (non-spin) lock held recursively */ +#define MTX_CONTESTED 0x02 /* (non-spin) lock contested */ +#define MTX_FLAGMASK ~(MTX_RECURSE | MTX_CONTESTED) +#define MTX_UNOWNED 0x8 /* Cookie for free mutex */ + +struct proc; /* XXX */ + +/* + * Sleep/spin mutex + */ +struct mtx { + volatile u_int mtx_lock; /* lock owner/gate/flags */ + volatile u_short mtx_recurse; /* number of recursive holds */ + u_short mtx_f1; + u_int mtx_savefl; /* saved flags (for spin locks) */ + char *mtx_description; + TAILQ_HEAD(, proc) mtx_blocked; + LIST_ENTRY(mtx) mtx_contested; + struct mtx *mtx_next; /* all locks in system */ + struct mtx *mtx_prev; +#ifdef SMP_DEBUG + /* If you add anything here, adjust the mtxf_t definition below */ + struct witness *mtx_witness; + LIST_ENTRY(mtx) mtx_held; + char *mtx_file; + int mtx_line; +#endif /* SMP_DEBUG */ +}; + +typedef struct mtx mtx_t; + +/* + * Filler for structs which need to remain the same size + * whether or not SMP_DEBUG is turned on. + */ +typedef struct mtxf { +#ifdef SMP_DEBUG + char mtxf_data[0]; +#else + char mtxf_data[4*sizeof(void *) + sizeof(int)]; +#endif +} mtxf_t; + +#define mp_fixme(string) + +#ifdef _KERNEL +/* Misc */ +#define CURTHD ((u_int)CURPROC) /* Current thread ID */ + +/* Prototypes */ +void mtx_init(mtx_t *m, char *description, int flag); +void mtx_enter_hard(mtx_t *, int type, int flags); +void mtx_exit_hard(mtx_t *, int type); +void mtx_destroy(mtx_t *m); + +#if (defined(KLD_MODULE) || defined(_KERN_MUTEX_C_)) +void mtx_enter(mtx_t *mtxp, int type); +int mtx_try_enter(mtx_t *mtxp, int type); +void mtx_exit(mtx_t *mtxp, int type); +#endif + +/* Global locks */ +extern mtx_t sched_lock; +extern mtx_t Giant; + +/* + * Used to replace return with an exit Giant and return. + */ + +#define EGAR(a) \ +do { \ + mtx_exit(&Giant, MTX_DEF); \ + return (a); \ +} while (0) + +#define VEGAR \ +do { \ + mtx_exit(&Giant, MTX_DEF); \ + return; \ +} while (0) + +#define DROP_GIANT() \ +do { \ + int _giantcnt; \ + WITNESS_SAVE_DECL(Giant); \ + \ + WITNESS_SAVE(&Giant, Giant); \ + for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \ + mtx_exit(&Giant, MTX_DEF) + +#define PICKUP_GIANT() \ + mtx_assert(&Giant, MA_NOTOWNED); \ + while (_giantcnt--) \ + mtx_enter(&Giant, MTX_DEF); \ + WITNESS_RESTORE(&Giant, Giant); \ +} while (0) + +#define PARTIAL_PICKUP_GIANT() \ + mtx_assert(&Giant, MA_NOTOWNED); \ + while (_giantcnt--) \ + mtx_enter(&Giant, MTX_DEF); \ + WITNESS_RESTORE(&Giant, Giant) + + +/* + * Debugging + */ +#ifndef SMP_DEBUG +#define mtx_assert(m, what) +#else /* SMP_DEBUG */ + +#define MA_OWNED 1 +#define MA_NOTOWNED 2 +#define mtx_assert(m, what) { \ + switch ((what)) { \ + case MA_OWNED: \ + ASS(mtx_owned((m))); \ + break; \ + case MA_NOTOWNED: \ + ASS(!mtx_owned((m))); \ + break; \ + default: \ + panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__); \ + } \ +} + +#ifdef INVARIANTS +#define ASS(ex) MPASS(ex) +#define MPASS(ex) if (!(ex)) panic("Assertion %s failed at %s:%d", \ + #ex, __FILE__, __LINE__) +#define MPASS2(ex, what) if (!(ex)) panic("Assertion %s failed at %s:%d", \ + what, __FILE__, __LINE__) + +#ifdef MTX_STRS +char STR_IEN[] = "fl & 0x200"; +char STR_IDIS[] = "!(fl & 0x200)"; +#else /* MTX_STRS */ +extern char STR_IEN[]; +extern char STR_IDIS[]; +#endif /* MTX_STRS */ +#define ASS_IEN MPASS2(read_eflags() & 0x200, STR_IEN) +#define ASS_IDIS MPASS2((read_eflags() & 0x200) == 0, STR_IDIS) +#endif /* INVARIANTS */ + +#endif /* SMP_DEBUG */ + +#if !defined(SMP_DEBUG) || !defined(INVARIANTS) +#define ASS(ex) +#define MPASS(ex) +#define MPASS2(ex, where) +#define ASS_IEN +#define ASS_IDIS +#endif /* !defined(SMP_DEBUG) || !defined(INVARIANTS) */ + +#ifdef WITNESS +#ifndef SMP_DEBUG +#error WITNESS requires SMP_DEBUG +#endif /* SMP_DEBUG */ +#define WITNESS_ENTER(m, f) \ + if ((m)->mtx_witness != NULL) \ + witness_enter((m), (f), __FILE__, __LINE__) +#define WITNESS_EXIT(m, f) \ + if ((m)->mtx_witness != NULL) \ + witness_exit((m), (f), __FILE__, __LINE__) + +#define WITNESS_SLEEP(check, m) witness_sleep(check, (m), __FILE__, __LINE__) +#define WITNESS_SAVE_DECL(n) \ + char * __CONCAT(n, __wf); \ + int __CONCAT(n, __wl) + +#define WITNESS_SAVE(m, n) \ +do { \ + if ((m)->mtx_witness != NULL) \ + witness_save(m, &__CONCAT(n, __wf), &__CONCAT(n, __wl)); \ +} while (0) + +#define WITNESS_RESTORE(m, n) \ +do { \ + if ((m)->mtx_witness != NULL) \ + witness_restore(m, __CONCAT(n, __wf), __CONCAT(n, __wl)); \ +} while (0) + +void witness_init(mtx_t *, int flag); +void witness_destroy(mtx_t *); +void witness_enter(mtx_t *, int, char *, int); +void witness_try_enter(mtx_t *, int, char *, int); +void witness_exit(mtx_t *, int, char *, int); +void witness_display(void(*)(const char *fmt, ...)); +void witness_list(struct proc *); +int witness_sleep(int, mtx_t *, char *, int); +void witness_save(mtx_t *, char **, int *); +void witness_restore(mtx_t *, char *, int); +#else /* WITNESS */ +#define WITNESS_ENTER(m, flag) +#define WITNESS_EXIT(m, flag) +#define WITNESS_SLEEP(check, m) +#define WITNESS_SAVE_DECL(n) +#define WITNESS_SAVE(m, n) +#define WITNESS_RESTORE(m, n) + +/* + * flag++ is slezoid way of shutting up unused parameter warning + * in mtx_init() + */ +#define witness_init(m, flag) flag++ +#define witness_destroy(m) +#define witness_enter(m, flag, f, l) +#define witness_try_enter(m, flag, f, l ) +#define witness_exit(m, flag, f, l) +#endif /* WITNESS */ + +/* + * Assembly macros (for internal use only) + *------------------------------------------------------------------------------ + */ + +#define _V(x) __STRING(x) + +#ifndef I386_CPU + +/* + * For 486 and newer processors. + */ + +/* Get a sleep lock, deal with recursion inline. */ +#define _getlock_sleep(mtxp, tid, type) ({ \ + int _res; \ + \ + __asm __volatile ( \ +" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \ +" " MPLOCKED "" \ +" cmpxchgl %3,%1;" /* Try */ \ +" jz 1f;" /* Got it */ \ +" andl $" _V(MTX_FLAGMASK) ",%%eax;" /* turn off spec bits */ \ +" cmpl %%eax,%3;" /* already have it? */ \ +" je 2f;" /* yes, recurse */ \ +" pushl %4;" \ +" pushl %5;" \ +" call mtx_enter_hard;" \ +" addl $8,%%esp;" \ +" jmp 1f;" \ +"2: lock; orl $" _V(MTX_RECURSE) ",%1;" \ +" incw %2;" \ +"1:" \ +"# getlock_sleep" \ + : "=&a" (_res), /* 0 (dummy output) */ \ + "+m" (mtxp->mtx_lock), /* 1 */ \ + "+m" (mtxp->mtx_recurse) /* 2 */ \ + : "r" (tid), /* 3 (input) */ \ + "gi" (type), /* 4 */ \ + "g" (mtxp) /* 5 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* Get a spin lock, handle recursion inline (as the less common case) */ +#define _getlock_spin_block(mtxp, tid, type) ({ \ + int _res; \ + \ + __asm __volatile ( \ +" pushfl;" \ +" cli;" \ +" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \ +" " MPLOCKED "" \ +" cmpxchgl %3,%1;" /* Try */ \ +" jz 2f;" /* got it */ \ +" pushl %4;" \ +" pushl %5;" \ +" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type, oflags) */ \ +" addl $0xc,%%esp;" \ +" jmp 1f;" \ +"2: popl %2;" /* save flags */ \ +"1:" \ +"# getlock_spin_block" \ + : "=&a" (_res), /* 0 (dummy output) */ \ + "+m" (mtxp->mtx_lock), /* 1 */ \ + "=m" (mtxp->mtx_savefl) /* 2 */ \ + : "r" (tid), /* 3 (input) */ \ + "gi" (type), /* 4 */ \ + "g" (mtxp) /* 5 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* + * Get a lock without any recursion handling. Calls the hard enter function if + * we can't get it inline. + */ +#define _getlock_norecurse(mtxp, tid, type) ({ \ + int _res; \ + \ + __asm __volatile ( \ +" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \ +" " MPLOCKED "" \ +" cmpxchgl %2,%1;" /* Try */ \ +" jz 1f;" /* got it */ \ +" pushl %3;" \ +" pushl %4;" \ +" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type) */ \ +" addl $8,%%esp;" \ +"1:" \ +"# getlock_norecurse" \ + : "=&a" (_res), /* 0 (dummy output) */ \ + "+m" (mtxp->mtx_lock) /* 1 */ \ + : "r" (tid), /* 2 (input) */ \ + "gi" (type), /* 3 */ \ + "g" (mtxp) /* 4 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* + * Release a sleep lock assuming we haven't recursed on it, recursion is handled + * in the hard function. + */ +#define _exitlock_norecurse(mtxp, tid, type) ({ \ + int _tid = (int)(tid); \ + \ + __asm __volatile ( \ +" " MPLOCKED "" \ +" cmpxchgl %4,%0;" /* try easy rel */ \ +" jz 1f;" /* released! */ \ +" pushl %2;" \ +" pushl %3;" \ +" call mtx_exit_hard;" \ +" addl $8,%%esp;" \ +"1:" \ +"# exitlock_norecurse" \ + : "+m" (mtxp->mtx_lock), /* 0 */ \ + "+a" (_tid) /* 1 */ \ + : "gi" (type), /* 2 (input) */ \ + "g" (mtxp), /* 3 */ \ + "r" (MTX_UNOWNED) /* 4 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* + * Release a sleep lock when its likely we recursed (the code to + * deal with simple recursion is inline). + */ +#define _exitlock(mtxp, tid, type) ({ \ + int _tid = (int)(tid); \ + \ + __asm __volatile ( \ +" " MPLOCKED "" \ +" cmpxchgl %5,%0;" /* try easy rel */ \ +" jz 1f;" /* released! */ \ +" testl $" _V(MTX_RECURSE) ",%%eax;" /* recursed? */ \ +" jnz 3f;" /* handle recursion */ \ + /* Lock not recursed and contested: do the hard way */ \ +" pushl %3;" \ +" pushl %4;" \ +" call mtx_exit_hard;" /* mtx_exit_hard(mtxp,type) */ \ +" addl $8,%%esp;" \ +" jmp 1f;" \ + /* lock recursed, lower recursion level */ \ +"3: decw %1;" /* one less level */ \ +" jnz 1f;" /* still recursed, done */ \ +" lock; andl $~" _V(MTX_RECURSE) ",%0;" /* turn off recurse flag */ \ +"1:" \ +"# exitlock" \ + : "+m" (mtxp->mtx_lock), /* 0 */ \ + "+m" (mtxp->mtx_recurse), /* 1 */ \ + "+a" (_tid) /* 2 */ \ + : "gi" (type), /* 3 (input) */ \ + "g" (mtxp), /* 4 */ \ + "r" (MTX_UNOWNED) /* 5 */ \ + : "memory", "ecx", "edx" /* used */ ); \ +}) + +/* + * Release a spin lock (with possible recursion). + * + * We use cmpxchgl to clear lock (instead of simple store) to flush posting + * buffers and make the change visible to other CPU's. + */ +#define _exitlock_spin(mtxp, inten1, inten2) ({ \ + int _res; \ + \ + __asm __volatile ( \ +" movw %1,%%ax;" \ +" decw %%ax;" \ +" js 1f;" \ +" movw %%ax,%1;" \ +" jmp 2f;" \ +"1: movl %0,%%eax;" \ +" movl $ " _V(MTX_UNOWNED) ",%%ecx;" \ +" " inten1 ";" \ +" " MPLOCKED "" \ +" cmpxchgl %%ecx,%0;" \ +" " inten2 ";" \ +"2:" \ +"# exitlock_spin" \ + : "+m" (mtxp->mtx_lock), /* 0 */ \ + "+m" (mtxp->mtx_recurse), /* 1 */ \ + "=&a" (_res) /* 2 */ \ + : "g" (mtxp->mtx_savefl) /* 3 (used in 'inten') */ \ + : "memory", "ecx" /* used */ ); \ +}) + +#else /* I386_CPU */ + +/* + * For 386 processors only. + */ + +/* Get a sleep lock, deal with recursion inline. */ +#define _getlock_sleep(mp, tid, type) do { \ + if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) { \ + if (((mp)->mtx_lock & MTX_FLAGMASK) != (tid)) \ + mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0); \ + else { \ + atomic_set_int(&(mp)->mtx_lock, MTX_RECURSE); \ + (mp)->mtx_recurse++; \ + } \ + } \ +} while (0) + +/* Get a spin lock, handle recursion inline (as the less common case) */ +#define _getlock_spin_block(mp, tid, type) do { \ + u_int _mtx_fl = read_eflags(); \ + disable_intr(); \ + if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \ + mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_fl); \ + else \ + (mp)->mtx_savefl = _mtx_fl; \ +} while (0) + +/* + * Get a lock without any recursion handling. Calls the hard enter function if + * we can't get it inline. + */ +#define _getlock_norecurse(mp, tid, type) do { \ + if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \ + mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0); \ +} while (0) + +/* + * Release a sleep lock assuming we haven't recursed on it, recursion is handled + * in the hard function. + */ +#define _exitlock_norecurse(mp, tid, type) do { \ + if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) \ + mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \ +} while (0) + +/* + * Release a sleep lock when its likely we recursed (the code to + * deal with simple recursion is inline). + */ +#define _exitlock(mp, tid, type) do { \ + if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) { \ + if ((mp)->mtx_lock & MTX_RECURSE) { \ + if (--((mp)->mtx_recurse) == 0) \ + atomic_clear_int(&(mp)->mtx_lock, \ + MTX_RECURSE); \ + } else { \ + mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \ + } \ + } \ +} while (0) + +/* Release a spin lock (with possible recursion). */ +#define _exitlock_spin(mp, inten1, inten2) do { \ + if ((mp)->mtx_recurse == 0) { \ + atomic_cmpset_int(&(mp)->mtx_lock, (mp)->mtx_lock, \ + MTX_UNOWNED); \ + write_eflags((mp)->mtx_savefl); \ + } else { \ + (mp)->mtx_recurse--; \ + } \ +} while (0) + +#endif /* I386_CPU */ + +/* + * Externally visible mutex functions. + *------------------------------------------------------------------------------ + */ + +/* + * Return non-zero if a mutex is already owned by the current thread. + */ +#define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == CURTHD) + +/* Common strings */ +#ifdef MTX_STRS +#ifdef KTR_EXTEND + +/* + * KTR_EXTEND saves file name and line for all entries, so we don't need them + * here. Theoretically we should also change the entries which refer to them + * (from CTR5 to CTR3), but since they're just passed to snprinf as the last + * parameters, it doesn't do any harm to leave them. + */ +char STR_mtx_enter_fmt[] = "GOT %s [%x] r=%d"; +char STR_mtx_exit_fmt[] = "REL %s [%x] r=%d"; +char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] result=%d"; +#else +char STR_mtx_enter_fmt[] = "GOT %s [%x] at %s:%d r=%d"; +char STR_mtx_exit_fmt[] = "REL %s [%x] at %s:%d r=%d"; +char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] at %s:%d result=%d"; +#endif +char STR_mtx_bad_type[] = "((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0"; +char STR_mtx_owned[] = "mtx_owned(_mpp)"; +char STR_mtx_recurse[] = "_mpp->mtx_recurse == 0"; +#else /* MTX_STRS */ +extern char STR_mtx_enter_fmt[]; +extern char STR_mtx_bad_type[]; +extern char STR_mtx_exit_fmt[]; +extern char STR_mtx_owned[]; +extern char STR_mtx_recurse[]; +extern char STR_mtx_try_enter_fmt[]; +#endif /* MTX_STRS */ + +#ifndef KLD_MODULE +/* + * Get lock 'm', the macro handles the easy (and most common cases) and leaves + * the slow stuff to the mtx_enter_hard() function. + * + * Note: since type is usually a constant much of this code is optimized out. + */ +_MTX_INLINE void +mtx_enter(mtx_t *mtxp, int type) +{ + mtx_t *_mpp = mtxp; + + /* bits only valid on mtx_exit() */ + MPASS2(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0, + STR_mtx_bad_type); + + do { + if ((type) & MTX_SPIN) { + /* + * Easy cases of spin locks: + * + * 1) We already own the lock and will simply + * recurse on it (if RLIKELY) + * + * 2) The lock is free, we just get it + */ + if ((type) & MTX_RLIKELY) { + /* + * Check for recursion, if we already + * have this lock we just bump the + * recursion count. + */ + if (_mpp->mtx_lock == CURTHD) { + _mpp->mtx_recurse++; + break; /* Done */ + } + } + + if (((type) & MTX_TOPHALF) == 0) { + /* + * If an interrupt thread uses this + * we must block interrupts here. + */ + if ((type) & MTX_FIRST) { + ASS_IEN; + disable_intr(); + _getlock_norecurse(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } else { + _getlock_spin_block(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } + } else + _getlock_norecurse(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } else { + /* Sleep locks */ + if ((type) & MTX_RLIKELY) + _getlock_sleep(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + else + _getlock_norecurse(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } + } while (0); + WITNESS_ENTER(_mpp, type); + CTR5(KTR_LOCK, STR_mtx_enter_fmt, + (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, + (_mpp)->mtx_recurse); +} + +/* + * Attempt to get MTX_DEF lock, return non-zero if lock acquired. + * + * XXX DOES NOT HANDLE RECURSION + */ +_MTX_INLINE int +mtx_try_enter(mtx_t *mtxp, int type) +{ + mtx_t *const _mpp = mtxp; + int _rval; + + _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD); +#ifdef SMP_DEBUG + if (_rval && (_mpp)->mtx_witness != NULL) { + ASS((_mpp)->mtx_recurse == 0); + witness_try_enter(_mpp, type, __FILE__, __LINE__); + } +#endif + CTR5(KTR_LOCK, STR_mtx_try_enter_fmt, + (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, _rval); + + return _rval; +} + +#define mtx_legal2block() (read_eflags() & 0x200) + +/* + * Release lock m. + */ +_MTX_INLINE void +mtx_exit(mtx_t *mtxp, int type) +{ + mtx_t *const _mpp = mtxp; + + MPASS2(mtx_owned(_mpp), STR_mtx_owned); + WITNESS_EXIT(_mpp, type); + CTR5(KTR_LOCK, STR_mtx_exit_fmt, + (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, + (_mpp)->mtx_recurse); + if ((type) & MTX_SPIN) { + if ((type) & MTX_NORECURSE) { + MPASS2(_mpp->mtx_recurse == 0, STR_mtx_recurse); + atomic_cmpset_int(&_mpp->mtx_lock, _mpp->mtx_lock, + MTX_UNOWNED); + if (((type) & MTX_TOPHALF) == 0) { + if ((type) & MTX_FIRST) { + ASS_IDIS; + enable_intr(); + } else + write_eflags(_mpp->mtx_savefl); + } + } else { + if ((type) & MTX_TOPHALF) + _exitlock_spin(_mpp,,); + else { + if ((type) & MTX_FIRST) { + ASS_IDIS; + _exitlock_spin(_mpp,, "sti"); + } else { + _exitlock_spin(_mpp, + "pushl %3", "popfl"); + } + } + } + } else { + /* Handle sleep locks */ + if ((type) & MTX_RLIKELY) + _exitlock(_mpp, CURTHD, (type) & MTX_HARDOPTS); + else { + _exitlock_norecurse(_mpp, CURTHD, + (type) & MTX_HARDOPTS); + } + } +} + +#endif /* KLD_MODULE */ +#endif /* _KERNEL */ + +#else /* !LOCORE */ + +/* + * Simple assembly macros to get and release non-recursive spin locks + */ + +#if defined(I386_CPU) + +#define MTX_EXIT(lck, reg) \ + movl $ MTX_UNOWNED,lck+MTX_LOCK; + +#else /* I386_CPU */ + +#define MTX_ENTER(reg, lck) \ +9: movl $ MTX_UNOWNED,%eax; \ + MPLOCKED \ + cmpxchgl reg,lck+MTX_LOCK; \ + jnz 9b + +/* Must use locked bus op (cmpxchg) when setting to unowned (barrier) */ +#define MTX_EXIT(lck,reg) \ + movl lck+MTX_LOCK,%eax; \ + movl $ MTX_UNOWNED,reg; \ + MPLOCKED \ + cmpxchgl reg,lck+MTX_LOCK; \ + +#define MTX_ENTER_WITH_RECURSION(reg, lck) \ + movl lck+MTX_LOCK,%eax; \ + cmpl PCPU_CURPROC,%eax; \ + jne 9f; \ + incw lck+MTX_RECURSECNT; \ + jmp 8f; \ +9: movl $ MTX_UNOWNED,%eax; \ + MPLOCKED \ + cmpxchgl reg,lck+MTX_LOCK; \ + jnz 9b; \ +8: + +#define MTX_EXIT_WITH_RECURSION(lck,reg) \ + movw lck+MTX_RECURSECNT,%ax; \ + decw %ax; \ + js 9f; \ + movw %ax,lck+MTX_RECURSECNT; \ + jmp 8f; \ +9: movl lck+MTX_LOCK,%eax; \ + movl $ MTX_UNOWNED,reg; \ + MPLOCKED \ + cmpxchgl reg,lck+MTX_LOCK; \ +8: + +#endif /* I386_CPU */ +#endif /* !LOCORE */ +#endif /* __MACHINE_MUTEX_H */ diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index 08beb5a..1c7af85 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -72,11 +72,7 @@ struct pcb { #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ #define PCB_DBREGS 0x02 /* process using debug registers */ caddr_t pcb_onfault; /* copyin/out fault recovery */ -#ifdef SMP - u_long pcb_mpnest; -#else - u_long pcb_mpnest_dontuse; -#endif + int pcb_schednest; int pcb_gs; struct pcb_ext *pcb_ext; /* optional pcb extension */ u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */ diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index 58bd9cf..440da60 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -26,6 +26,20 @@ * $FreeBSD$ */ +#ifndef _MACHINE_GLOBALDATA_H_ +#define _MACHINE_GLOBALDATA_H_ + +#include +#include +#include +#include +#include + +/* XXX */ +#ifdef KTR_PERCPU +#include +#endif + /* * This structure maps out the global data that needs to be kept on a * per-cpu basis. genassym uses this to generate offsets for the assembler @@ -41,11 +55,14 @@ struct globaldata { struct privatespace *gd_prvspace; /* self-reference */ struct proc *gd_curproc; + struct proc *gd_prevproc; struct proc *gd_npxproc; struct pcb *gd_curpcb; + struct proc *gd_idleproc; struct timeval gd_switchtime; struct i386tss gd_common_tss; int gd_switchticks; + int gd_intr_nesting_level; struct segment_descriptor gd_common_tssd; struct segment_descriptor *gd_tss_gdt; #ifdef USER_LDT @@ -67,8 +84,22 @@ struct globaldata { unsigned *gd_prv_PADDR1; #endif u_int gd_astpending; + SLIST_ENTRY(globaldata) gd_allcpu; + int gd_witness_spin_check; +#ifdef KTR_PERCPU +#ifdef KTR + volatile int gd_ktr_idx; + char *gd_ktr_buf; + char gd_ktr_buf_data[KTR_SIZE]; +#endif +#endif }; +extern struct globaldata globaldata; + +SLIST_HEAD(cpuhead, globaldata); +extern struct cpuhead cpuhead; + #ifdef SMP /* * This is the upper (0xff800000) address space layout that is per-cpu. @@ -93,3 +124,5 @@ struct privatespace { extern struct privatespace SMP_prvspace[]; #endif + +#endif /* ! _MACHINE_GLOBALDATA_H_ */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 69b716b..20d4fa3 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -15,6 +15,9 @@ #ifdef _KERNEL +#ifdef I386_CPU +#error SMP not supported with I386_CPU +#endif #if defined(SMP) && !defined(APIC_IO) # error APIC_IO required for SMP, add "options APIC_IO" to your config file. #endif /* SMP && !APIC_IO */ @@ -57,23 +60,6 @@ extern int bootMP_size; /* functions in mpboot.s */ void bootMP __P((void)); -/* global data in mplock.s */ -extern u_int mp_lock; -extern u_int isr_lock; -#ifdef RECURSIVE_MPINTRLOCK -extern u_int mpintr_lock; -#endif /* RECURSIVE_MPINTRLOCK */ - -/* functions in mplock.s */ -void get_mplock __P((void)); -void rel_mplock __P((void)); -int try_mplock __P((void)); -#ifdef RECURSIVE_MPINTRLOCK -void get_mpintrlock __P((void)); -void rel_mpintrlock __P((void)); -int try_mpintrlock __P((void)); -#endif /* RECURSIVE_MPINTRLOCK */ - /* global data in apic_vector.s */ extern volatile u_int stopped_cpus; extern volatile u_int started_cpus; @@ -185,23 +171,7 @@ extern int smp_started; extern volatile int smp_idle_loops; #endif /* !LOCORE */ -#else /* !SMP && !APIC_IO */ - -/* - * Create dummy MP lock empties - */ - -static __inline void -get_mplock(void) -{ -} - -static __inline void -rel_mplock(void) -{ -} - -#endif +#endif /* SMP && !APIC_IO */ #endif /* _KERNEL */ #endif /* _MACHINE_SMP_H_ */ diff --git a/sys/amd64/isa/atpic_vector.S b/sys/amd64/isa/atpic_vector.S index e427351..d2b88bf 100644 --- a/sys/amd64/isa/atpic_vector.S +++ b/sys/amd64/isa/atpic_vector.S @@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \ pushl %ecx ; \ pushl %edx ; \ pushl %ds ; \ + pushl %fs ; \ MAYBE_PUSHL_ES ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ + mov %ax,%fs ; \ MAYBE_MOVW_AX_ES ; \ FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \ pushl _intr_unit + (irq_num) * 4 ; \ @@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \ incl _cnt+V_INTR ; /* book-keeping can wait */ \ movl _intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ - movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \ +/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \ notl %eax ; \ - andl _ipending,%eax ; \ - jne 2f ; /* yes, maybe handle them */ \ + andl _spending,$SWI_MASK ; \ + jne 2f ; // yes, maybe handle them */ \ 1: ; \ MEXITCOUNT ; \ MAYBE_POPL_ES ; \ + popl %fs ; \ popl %ds ; \ popl %edx ; \ popl %ecx ; \ popl %eax ; \ iret ; \ + +#if 0 ; \ ALIGN_TEXT ; \ 2: ; \ @@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \ incb _intr_nesting_level ; /* ... really limit it ... */ \ sti ; /* ... to do this as early as possible */ \ MAYBE_POPL_ES ; /* discard most of thin frame ... */ \ + popl %fs ; \ popl %ecx ; /* ... original %ds ... */ \ popl %edx ; \ xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \ @@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \ movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \ movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \ movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \ - pushl %eax ; \ subl $4,%esp ; /* junk for unit number */ \ MEXITCOUNT ; \ jmp _doreti +#endif +/* + * Slow, threaded interrupts. + * + * XXX Most of the parameters here are obsolete. Fix this when we're + * done. + * XXX we really shouldn't return via doreti if we just schedule the + * interrupt handler and don't run anything. We could just do an + * iret. FIXME. + */ #define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \ pushl %ds ; /* save our data and extra segments ... */ \ pushl %es ; \ pushl %fs ; \ - mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \ - mov %ax,%ds ; /* ... early for obsolete reasons */ \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ mov %ax,%es ; \ mov %ax,%fs ; \ maybe_extra_ipending ; \ @@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \ movb %al,_imen + IRQ_BYTE(irq_num) ; \ outb %al,$icu+ICU_IMR_OFFSET ; \ enable_icus ; \ - movl _cpl,%eax ; \ - testb $IRQ_BIT(irq_num),%reg ; \ - jne 2f ; \ - incb _intr_nesting_level ; \ + incb _intr_nesting_level ; /* XXX do we need this? */ \ __CONCAT(Xresume,irq_num): ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - movl _cpl,%eax ; \ - pushl %eax ; \ - pushl _intr_unit + (irq_num) * 4 ; \ - orl _intr_mask + (irq_num) * 4,%eax ; \ - movl %eax,_cpl ; \ + pushl $irq_num; /* pass the IRQ */ \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; /* must unmask _imen and icu atomically */ \ - movb _imen + IRQ_BYTE(irq_num),%al ; \ - andb $~IRQ_BIT(irq_num),%al ; \ - movb %al,_imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - sti ; /* XXX _doreti repeats the cli/sti */ \ + call _sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ MEXITCOUNT ; \ /* We could usually avoid the following jmp by inlining some of */ \ /* _doreti, but it's probably better to use less cache. */ \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -2: ; \ - /* XXX skip mcounting here to avoid double count */ \ - orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp ; \ - iret + jmp doreti_next /* and catch up inside doreti */ + +/* + * Reenable the interrupt mask after completing an interrupt. Called + * from ithd_loop. There are two separate functions, one for each + * ICU. + */ + .globl setimask0, setimask1 +setimask0: + cli + movb _imen,%al + outb %al,$IO_ICU1 + ICU_IMR_OFFSET + sti + ret + +setimask1: + cli + movb _imen + 1,%al + outb %al,$IO_ICU2 + ICU_IMR_OFFSET + sti + ret MCOUNT_LABEL(bintr) FAST_INTR(0,fastintr0, ENABLE_ICU1) @@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr) FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) +/* Threaded interrupts */ INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) @@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr) INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + MCOUNT_LABEL(eintr) .data @@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */ .long _swi_null, swi_net, _swi_null, _swi_null .long _swi_vm, _swi_null, _softclock -imasks: /* masks for interrupt handlers */ - .space NHWI*4 /* padding; HWI masks are elsewhere */ - - .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK - .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK - .text diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c index 15044ab..724f3c2 100644 --- a/sys/amd64/isa/clock.c +++ b/sys/amd64/isa/clock.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -93,10 +94,6 @@ #include #endif -#ifdef SMP -#define disable_intr() CLOCK_DISABLE_INTR() -#define enable_intr() CLOCK_ENABLE_INTR() - #ifdef APIC_IO #include /* The interrupt triggered by the 8254 (timer) chip */ @@ -104,7 +101,6 @@ int apic_8254_intr; static u_long read_intr_count __P((int vec)); static void setup_8254_mixed_mode __P((void)); #endif -#endif /* SMP */ /* * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we @@ -147,7 +143,9 @@ int tsc_is_broken; int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */ static int beeping = 0; +#if 0 static u_int clk_imask = HWI_MASK | SWI_MASK; +#endif static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; static u_int hardclock_max_count; static u_int32_t i8254_lastcount; @@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, static void clkintr(struct clockframe frame) { + int intrsave; + if (timecounter->tc_get_timecount == i8254_get_timecount) { + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); if (i8254_ticked) i8254_ticked = 0; else { @@ -214,7 +216,8 @@ clkintr(struct clockframe frame) i8254_lastcount = 0; } clkintr_pending = 0; - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); } timer_func(&frame); switch (timer0_state) { @@ -233,14 +236,17 @@ clkintr(struct clockframe frame) break; case ACQUIRE_PENDING: + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = TIMER_DIV(new_rate); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); timer_func = new_function; timer0_state = ACQUIRED; setdelayed(); @@ -249,7 +255,9 @@ clkintr(struct clockframe frame) case RELEASE_PENDING: if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); i8254_offset = i8254_get_timecount(NULL); i8254_lastcount = 0; timer0_max_count = hardclock_max_count; @@ -257,7 +265,8 @@ clkintr(struct clockframe frame) TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; @@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc) static int getit(void) { - u_long ef; - int high, low; + int high, low, intrsave; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -417,7 +426,7 @@ getit(void) high = inb(TIMER_CNTR0); CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); return ((high << 8) | low); } @@ -523,6 +532,7 @@ sysbeepstop(void *chan) int sysbeep(int pitch, int period) { + int intrsave; int x = splclock(); if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT)) @@ -531,10 +541,13 @@ sysbeep(int pitch, int period) splx(x); return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */ } + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); outb(TIMER_CNTR2, pitch); outb(TIMER_CNTR2, (pitch>>8)); - enable_intr(); + CLOCK_UNLOCK(); + restore_intr(intrsave); if (!beeping) { /* enable counter2 output to speaker */ outb(IO_PPI, inb(IO_PPI) | 3); @@ -683,11 +696,12 @@ fail: static void set_timer_freq(u_int freq, int intr_freq) { - u_long ef; + int intrsave; int new_timer0_max_count; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); timer_freq = freq; new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq); if (new_timer0_max_count != timer0_max_count) { @@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq) outb(TIMER_CNTR0, timer0_max_count >> 8); } CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); } /* @@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq) void i8254_restore(void) { - u_long ef; + int intrsave; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT); outb(TIMER_CNTR0, timer0_max_count & 0xff); outb(TIMER_CNTR0, timer0_max_count >> 8); CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); } /* @@ -979,8 +994,8 @@ cpu_initclocks() { int diag; #ifdef APIC_IO - int apic_8254_trial; - struct intrec *clkdesc; + int apic_8254_trial, num_8254_ticks; + struct intrec *clkdesc, *rtcdesc; #endif /* APIC_IO */ if (statclock_disable) { @@ -1014,14 +1029,15 @@ cpu_initclocks() } else panic("APIC_IO: Cannot route 8254 interrupt to CPU"); } - - clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, - NULL, &clk_imask, INTR_EXCL); - INTREN(1 << apic_8254_intr); - #else /* APIC_IO */ - inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask, + /* + * XXX Check the priority of this interrupt handler. I + * couldn't find anything suitable in the BSD/OS code (grog, + * 19 July 2000). + */ + /* Setup the PIC clk handler. The APIC handler is setup later */ + inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_EXCL); INTREN(IRQ0); @@ -1032,8 +1048,18 @@ cpu_initclocks() writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ - if (statclock_disable) + if (statclock_disable) { +#ifdef APIC_IO + /* + * XXX - if statclock is disabled, don't attempt the APIC + * trial. Not sure this is sane for APIC_IO. + */ + inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, + PI_REALTIME, INTR_EXCL); + INTREN(1 << apic_8254_intr); +#endif /* APIC_IO */ return; + } diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); @@ -1041,34 +1067,44 @@ cpu_initclocks() #ifdef APIC_IO if (isa_apic_irq(8) != 8) panic("APIC RTC != 8"); -#endif /* APIC_IO */ - inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask, - INTR_EXCL); - -#ifdef APIC_IO - INTREN(APIC_IRQ8); -#else - INTREN(IRQ8); -#endif /* APIC_IO */ + if (apic_8254_trial) { + /* + * XXX - We use fast interrupts for clk and rtc long enough to + * perform the APIC probe and then revert to exclusive + * interrupts. + */ + clkdesc = inthand_add("clk", apic_8254_intr, + (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST); + INTREN(1 << apic_8254_intr); - writertc(RTC_STATUSB, rtc_statusb); + rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, + PI_REALTIME, INTR_FAST); /* XXX */ + INTREN(APIC_IRQ8); + writertc(RTC_STATUSB, rtc_statusb); -#ifdef APIC_IO - if (apic_8254_trial) { - printf("APIC_IO: Testing 8254 interrupt delivery\n"); while (read_intr_count(8) < 6) ; /* nothing */ - if (read_intr_count(apic_8254_intr) < 3) { + num_8254_ticks = read_intr_count(apic_8254_intr); + + /* disable and remove our fake handlers */ + INTRDIS(1 << apic_8254_intr); + inthand_remove(clkdesc); + + writertc(RTC_STATUSA, rtc_statusa); + writertc(RTC_STATUSB, RTCSB_24HR); + + INTRDIS(APIC_IRQ8); + inthand_remove(rtcdesc); + + if (num_8254_ticks < 3) { /* * The MP table is broken. * The 8254 was not connected to the specified pin * on the IO APIC. * Workaround: Limited variant of mixed mode. */ - INTRDIS(1 << apic_8254_intr); - inthand_remove(clkdesc); printf("APIC_IO: Broken MP table detected: " "8254 is not connected to " "IOAPIC #%d intpin %d\n", @@ -1087,13 +1123,27 @@ cpu_initclocks() } apic_8254_intr = apic_irq(0, 0); setup_8254_mixed_mode(); - inthand_add("clk", apic_8254_intr, - (inthand2_t *)clkintr, - NULL, &clk_imask, INTR_EXCL); - INTREN(1 << apic_8254_intr); } } + + /* Finally, setup the real clock handlers */ + inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL, + PI_REALTIME, INTR_EXCL); + INTREN(1 << apic_8254_intr); +#endif + + inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME, + INTR_EXCL); +#ifdef APIC_IO + INTREN(APIC_IRQ8); +#else + INTREN(IRQ8); +#endif + + writertc(RTC_STATUSB, rtc_statusb); + +#ifdef APIC_IO if (apic_int_type(0, 0) != 3 || int_to_apicintpin[apic_8254_intr].ioapic != 0 || int_to_apicintpin[apic_8254_intr].int_pin != 0) @@ -1198,11 +1248,12 @@ static unsigned i8254_get_timecount(struct timecounter *tc) { u_int count; - u_long ef; + int intrsave; u_int high, low; - ef = read_eflags(); + intrsave = save_intr(); disable_intr(); + CLOCK_LOCK(); /* Select timer0 and latch counter value. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); @@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc) count = timer0_max_count - ((high << 8) | low); if (count < i8254_lastcount || (!i8254_ticked && (clkintr_pending || - ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) && + ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) && #ifdef APIC_IO #define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */ /* XXX this assumes that apic_8254_intr is < 24. */ @@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc) i8254_lastcount = count; count += i8254_offset; CLOCK_UNLOCK(); - write_eflags(ef); + restore_intr(intrsave); return (count); } diff --git a/sys/amd64/isa/icu_ipl.S b/sys/amd64/isa/icu_ipl.S index 3475358..d178d5c 100644 --- a/sys/amd64/isa/icu_ipl.S +++ b/sys/amd64/isa/icu_ipl.S @@ -55,63 +55,6 @@ _imen: .long HWI_MASK SUPERALIGN_TEXT /* - * Interrupt priority mechanism - * -- soft splXX masks with group mechanism (cpl) - * -- h/w masks for currently active or unused interrupts (imen) - * -- ipending = active interrupts currently masked by cpl - */ - -ENTRY(splz) - /* - * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. We have to repeat the check since if there is an - * interrupt while we're looking, _doreti processing for the - * interrupt will handle all the unmasked pending interrupts - * because we restored early. We're repeating the calculation - * of (ipending & ~cpl) anyway so that the caller doesn't have - * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx" - * is undefined when %ecx is 0 so we can't rely on the secondary - * btrl tests. - */ - movl _cpl,%eax -splz_next: - /* - * We don't need any locking here. (ipending & ~cpl) cannot grow - * while we're looking at it - any interrupt will shrink it to 0. - */ - movl %eax,%ecx - notl %ecx - andl _ipending,%ecx - jne splz_unpend - ret - - ALIGN_TEXT -splz_unpend: - bsfl %ecx,%ecx - btrl %ecx,_ipending - jnc splz_next - cmpl $NHWI,%ecx - jae splz_swi - /* - * We would prefer to call the intr handler directly here but that - * doesn't work for badly behaved handlers that want the interrupt - * frame. Also, there's a problem determining the unit number. - * We should change the interface so that the unit number is not - * determined at config time. - */ - jmp *vec(,%ecx,4) - - ALIGN_TEXT -splz_swi: - pushl %eax - orl imasks(,%ecx,4),%eax - movl %eax,_cpl - call *_ihandlers(,%ecx,4) - popl %eax - movl %eax,_cpl - jmp splz_next - -/* * Fake clock interrupt(s) so that they appear to come from our caller instead * of from here, so that system profiling works. * XXX do this more generally (for all vectors; look up the C entry point). diff --git a/sys/amd64/isa/icu_ipl.s b/sys/amd64/isa/icu_ipl.s index 3475358..d178d5c 100644 --- a/sys/amd64/isa/icu_ipl.s +++ b/sys/amd64/isa/icu_ipl.s @@ -55,63 +55,6 @@ _imen: .long HWI_MASK SUPERALIGN_TEXT /* - * Interrupt priority mechanism - * -- soft splXX masks with group mechanism (cpl) - * -- h/w masks for currently active or unused interrupts (imen) - * -- ipending = active interrupts currently masked by cpl - */ - -ENTRY(splz) - /* - * The caller has restored cpl and checked that (ipending & ~cpl) - * is nonzero. We have to repeat the check since if there is an - * interrupt while we're looking, _doreti processing for the - * interrupt will handle all the unmasked pending interrupts - * because we restored early. We're repeating the calculation - * of (ipending & ~cpl) anyway so that the caller doesn't have - * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx" - * is undefined when %ecx is 0 so we can't rely on the secondary - * btrl tests. - */ - movl _cpl,%eax -splz_next: - /* - * We don't need any locking here. (ipending & ~cpl) cannot grow - * while we're looking at it - any interrupt will shrink it to 0. - */ - movl %eax,%ecx - notl %ecx - andl _ipending,%ecx - jne splz_unpend - ret - - ALIGN_TEXT -splz_unpend: - bsfl %ecx,%ecx - btrl %ecx,_ipending - jnc splz_next - cmpl $NHWI,%ecx - jae splz_swi - /* - * We would prefer to call the intr handler directly here but that - * doesn't work for badly behaved handlers that want the interrupt - * frame. Also, there's a problem determining the unit number. - * We should change the interface so that the unit number is not - * determined at config time. - */ - jmp *vec(,%ecx,4) - - ALIGN_TEXT -splz_swi: - pushl %eax - orl imasks(,%ecx,4),%eax - movl %eax,_cpl - call *_ihandlers(,%ecx,4) - popl %eax - movl %eax,_cpl - jmp splz_next - -/* * Fake clock interrupt(s) so that they appear to come from our caller instead * of from here, so that system profiling works. * XXX do this more generally (for all vectors; look up the C entry point). diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S index e427351..d2b88bf 100644 --- a/sys/amd64/isa/icu_vector.S +++ b/sys/amd64/isa/icu_vector.S @@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \ pushl %ecx ; \ pushl %edx ; \ pushl %ds ; \ + pushl %fs ; \ MAYBE_PUSHL_ES ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ + mov %ax,%fs ; \ MAYBE_MOVW_AX_ES ; \ FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \ pushl _intr_unit + (irq_num) * 4 ; \ @@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \ incl _cnt+V_INTR ; /* book-keeping can wait */ \ movl _intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ - movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \ +/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \ notl %eax ; \ - andl _ipending,%eax ; \ - jne 2f ; /* yes, maybe handle them */ \ + andl _spending,$SWI_MASK ; \ + jne 2f ; // yes, maybe handle them */ \ 1: ; \ MEXITCOUNT ; \ MAYBE_POPL_ES ; \ + popl %fs ; \ popl %ds ; \ popl %edx ; \ popl %ecx ; \ popl %eax ; \ iret ; \ + +#if 0 ; \ ALIGN_TEXT ; \ 2: ; \ @@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \ incb _intr_nesting_level ; /* ... really limit it ... */ \ sti ; /* ... to do this as early as possible */ \ MAYBE_POPL_ES ; /* discard most of thin frame ... */ \ + popl %fs ; \ popl %ecx ; /* ... original %ds ... */ \ popl %edx ; \ xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \ @@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \ movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \ movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \ movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \ - pushl %eax ; \ subl $4,%esp ; /* junk for unit number */ \ MEXITCOUNT ; \ jmp _doreti +#endif +/* + * Slow, threaded interrupts. + * + * XXX Most of the parameters here are obsolete. Fix this when we're + * done. + * XXX we really shouldn't return via doreti if we just schedule the + * interrupt handler and don't run anything. We could just do an + * iret. FIXME. + */ #define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \ pushl %ds ; /* save our data and extra segments ... */ \ pushl %es ; \ pushl %fs ; \ - mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \ - mov %ax,%ds ; /* ... early for obsolete reasons */ \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ mov %ax,%es ; \ mov %ax,%fs ; \ maybe_extra_ipending ; \ @@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \ movb %al,_imen + IRQ_BYTE(irq_num) ; \ outb %al,$icu+ICU_IMR_OFFSET ; \ enable_icus ; \ - movl _cpl,%eax ; \ - testb $IRQ_BIT(irq_num),%reg ; \ - jne 2f ; \ - incb _intr_nesting_level ; \ + incb _intr_nesting_level ; /* XXX do we need this? */ \ __CONCAT(Xresume,irq_num): ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - movl _cpl,%eax ; \ - pushl %eax ; \ - pushl _intr_unit + (irq_num) * 4 ; \ - orl _intr_mask + (irq_num) * 4,%eax ; \ - movl %eax,_cpl ; \ + pushl $irq_num; /* pass the IRQ */ \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; /* must unmask _imen and icu atomically */ \ - movb _imen + IRQ_BYTE(irq_num),%al ; \ - andb $~IRQ_BIT(irq_num),%al ; \ - movb %al,_imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - sti ; /* XXX _doreti repeats the cli/sti */ \ + call _sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ MEXITCOUNT ; \ /* We could usually avoid the following jmp by inlining some of */ \ /* _doreti, but it's probably better to use less cache. */ \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -2: ; \ - /* XXX skip mcounting here to avoid double count */ \ - orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp ; \ - iret + jmp doreti_next /* and catch up inside doreti */ + +/* + * Reenable the interrupt mask after completing an interrupt. Called + * from ithd_loop. There are two separate functions, one for each + * ICU. + */ + .globl setimask0, setimask1 +setimask0: + cli + movb _imen,%al + outb %al,$IO_ICU1 + ICU_IMR_OFFSET + sti + ret + +setimask1: + cli + movb _imen + 1,%al + outb %al,$IO_ICU2 + ICU_IMR_OFFSET + sti + ret MCOUNT_LABEL(bintr) FAST_INTR(0,fastintr0, ENABLE_ICU1) @@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr) FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) +/* Threaded interrupts */ INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) @@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr) INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + MCOUNT_LABEL(eintr) .data @@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */ .long _swi_null, swi_net, _swi_null, _swi_null .long _swi_vm, _swi_null, _softclock -imasks: /* masks for interrupt handlers */ - .space NHWI*4 /* padding; HWI masks are elsewhere */ - - .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK - .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK - .text diff --git a/sys/amd64/isa/icu_vector.s b/sys/amd64/isa/icu_vector.s index e427351..d2b88bf 100644 --- a/sys/amd64/isa/icu_vector.s +++ b/sys/amd64/isa/icu_vector.s @@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \ pushl %ecx ; \ pushl %edx ; \ pushl %ds ; \ + pushl %fs ; \ MAYBE_PUSHL_ES ; \ mov $KDSEL,%ax ; \ mov %ax,%ds ; \ + mov %ax,%fs ; \ MAYBE_MOVW_AX_ES ; \ FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \ pushl _intr_unit + (irq_num) * 4 ; \ @@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \ incl _cnt+V_INTR ; /* book-keeping can wait */ \ movl _intr_countp + (irq_num) * 4,%eax ; \ incl (%eax) ; \ - movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \ +/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \ notl %eax ; \ - andl _ipending,%eax ; \ - jne 2f ; /* yes, maybe handle them */ \ + andl _spending,$SWI_MASK ; \ + jne 2f ; // yes, maybe handle them */ \ 1: ; \ MEXITCOUNT ; \ MAYBE_POPL_ES ; \ + popl %fs ; \ popl %ds ; \ popl %edx ; \ popl %ecx ; \ popl %eax ; \ iret ; \ + +#if 0 ; \ ALIGN_TEXT ; \ 2: ; \ @@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \ incb _intr_nesting_level ; /* ... really limit it ... */ \ sti ; /* ... to do this as early as possible */ \ MAYBE_POPL_ES ; /* discard most of thin frame ... */ \ + popl %fs ; \ popl %ecx ; /* ... original %ds ... */ \ popl %edx ; \ xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \ @@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \ movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \ movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \ movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \ - pushl %eax ; \ subl $4,%esp ; /* junk for unit number */ \ MEXITCOUNT ; \ jmp _doreti +#endif +/* + * Slow, threaded interrupts. + * + * XXX Most of the parameters here are obsolete. Fix this when we're + * done. + * XXX we really shouldn't return via doreti if we just schedule the + * interrupt handler and don't run anything. We could just do an + * iret. FIXME. + */ #define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \ .text ; \ SUPERALIGN_TEXT ; \ @@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \ pushl %ds ; /* save our data and extra segments ... */ \ pushl %es ; \ pushl %fs ; \ - mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \ - mov %ax,%ds ; /* ... early for obsolete reasons */ \ + mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \ + mov %ax,%ds ; \ mov %ax,%es ; \ mov %ax,%fs ; \ maybe_extra_ipending ; \ @@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \ movb %al,_imen + IRQ_BYTE(irq_num) ; \ outb %al,$icu+ICU_IMR_OFFSET ; \ enable_icus ; \ - movl _cpl,%eax ; \ - testb $IRQ_BIT(irq_num),%reg ; \ - jne 2f ; \ - incb _intr_nesting_level ; \ + incb _intr_nesting_level ; /* XXX do we need this? */ \ __CONCAT(Xresume,irq_num): ; \ FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \ - incl _cnt+V_INTR ; /* tally interrupts */ \ - movl _intr_countp + (irq_num) * 4,%eax ; \ - incl (%eax) ; \ - movl _cpl,%eax ; \ - pushl %eax ; \ - pushl _intr_unit + (irq_num) * 4 ; \ - orl _intr_mask + (irq_num) * 4,%eax ; \ - movl %eax,_cpl ; \ + pushl $irq_num; /* pass the IRQ */ \ sti ; \ - call *_intr_handler + (irq_num) * 4 ; \ - cli ; /* must unmask _imen and icu atomically */ \ - movb _imen + IRQ_BYTE(irq_num),%al ; \ - andb $~IRQ_BIT(irq_num),%al ; \ - movb %al,_imen + IRQ_BYTE(irq_num) ; \ - outb %al,$icu+ICU_IMR_OFFSET ; \ - sti ; /* XXX _doreti repeats the cli/sti */ \ + call _sched_ithd ; \ + addl $4, %esp ; /* discard the parameter */ \ MEXITCOUNT ; \ /* We could usually avoid the following jmp by inlining some of */ \ /* _doreti, but it's probably better to use less cache. */ \ - jmp _doreti ; \ -; \ - ALIGN_TEXT ; \ -2: ; \ - /* XXX skip mcounting here to avoid double count */ \ - orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \ - popl %fs ; \ - popl %es ; \ - popl %ds ; \ - popal ; \ - addl $4+4,%esp ; \ - iret + jmp doreti_next /* and catch up inside doreti */ + +/* + * Reenable the interrupt mask after completing an interrupt. Called + * from ithd_loop. There are two separate functions, one for each + * ICU. + */ + .globl setimask0, setimask1 +setimask0: + cli + movb _imen,%al + outb %al,$IO_ICU1 + ICU_IMR_OFFSET + sti + ret + +setimask1: + cli + movb _imen + 1,%al + outb %al,$IO_ICU2 + ICU_IMR_OFFSET + sti + ret MCOUNT_LABEL(bintr) FAST_INTR(0,fastintr0, ENABLE_ICU1) @@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr) FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2) FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2) FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2) + #define CLKINTR_PENDING movl $1,CNAME(clkintr_pending) +/* Threaded interrupts */ INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING) INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,) INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,) @@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr) INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,) INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,) + MCOUNT_LABEL(eintr) .data @@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */ .long _swi_null, swi_net, _swi_null, _swi_null .long _swi_vm, _swi_null, _softclock -imasks: /* masks for interrupt handlers */ - .space NHWI*4 /* padding; HWI masks are elsewhere */ - - .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK - .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK - .text diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c index 34a8c22..870760e 100644 --- a/sys/amd64/isa/intr_machdep.c +++ b/sys/amd64/isa/intr_machdep.c @@ -36,12 +36,6 @@ * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD$ */ -/* - * This file contains an aggregated module marked: - * Copyright (c) 1997, Stefan Esser - * All rights reserved. - * See the notice for details. - */ #include "opt_auto_eoi.h" @@ -51,11 +45,14 @@ #ifndef SMP #include #endif +#include #include #include #include +#include #include #include +#include #include #include #include @@ -91,30 +88,14 @@ #include #endif -/* XXX should be in suitable include files */ -#ifdef PC98 -#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */ -#define ICU_SLAVEID 7 -#else -#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */ -#define ICU_SLAVEID 2 -#endif - -#ifdef APIC_IO /* - * This is to accommodate "mixed-mode" programming for - * motherboards that don't connect the 8254 to the IO APIC. + * Per-interrupt data. We consider the soft interrupt to be a special + * case, so these arrays have NHWI + NSWI entries, not ICU_LEN. */ -#define AUTO_EOI_1 1 -#endif - -#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) - -u_long *intr_countp[ICU_LEN]; -inthand2_t *intr_handler[ICU_LEN]; -u_int intr_mask[ICU_LEN]; -static u_int* intr_mptr[ICU_LEN]; -void *intr_unit[ICU_LEN]; +u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */ +inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */ +ithd *ithds[NHWI + NSWI]; /* real interrupt handler */ +void *intr_unit[NHWI + NSWI]; static inthand_t *fastintr[ICU_LEN] = { &IDTVEC(fastintr0), &IDTVEC(fastintr1), @@ -292,8 +273,9 @@ isa_nmi(cd) } /* - * Fill in default interrupt table (in case of spuruious interrupt - * during configuration of kernel, setup interrupt control unit + * Create a default interrupt table to avoid problems caused by + * spurious interrupts during configuration of kernel, then setup + * interrupt control unit. */ void isa_defaultirq() @@ -364,16 +346,6 @@ isa_strayintr(vcookiep) { int intr = (void **)vcookiep - &intr_unit[0]; - /* DON'T BOTHER FOR NOW! */ - /* for some reason, we get bursts of intr #7, even if not enabled! */ - /* - * Well the reason you got bursts of intr #7 is because someone - * raised an interrupt line and dropped it before the 8259 could - * prioritize it. This is documented in the intel data book. This - * means you have BAD hardware! I have changed this so that only - * the first 5 get logged, then it quits logging them, and puts - * out a special message. rgrimes 3/25/1993 - */ /* * XXX TODO print a different message for #7 if it is for a * glitch. Glitches can be distinguished from real #7's by @@ -405,36 +377,10 @@ isa_irq_pending() } #endif -int -update_intr_masks(void) -{ - int intr, n=0; - u_int mask,*maskptr; - - for (intr=0; intr < ICU_LEN; intr ++) { -#if defined(APIC_IO) - /* no 8259 SLAVE to ignore */ -#else - if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */ -#endif /* APIC_IO */ - maskptr = intr_mptr[intr]; - if (!maskptr) - continue; - *maskptr |= SWI_LOW_MASK | (1 << intr); - mask = *maskptr; - if (mask != intr_mask[intr]) { -#if 0 - printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n", - intr, intr_mask[intr], mask, maskptr); -#endif - intr_mask[intr]=mask; - n++; - } - - } - return (n); -} - +/* + * Update intrnames array with the specified name. This is used by + * vmstat(8) and the like. + */ static void update_intrname(int intr, char *name) { @@ -485,7 +431,7 @@ found: } int -icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) +icu_setup(int intr, inthand2_t *handler, void *arg, int flags) { #ifdef FAST_HI int select; /* the select register is 8 bits */ @@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ u_long ef; - u_int mask = (maskptr ? *maskptr : 0); #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) ef = read_eflags(); disable_intr(); intr_handler[intr] = handler; - intr_mptr[intr] = maskptr; - intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr); intr_unit[intr] = arg; #ifdef FAST_HI if (flags & INTR_FAST) { @@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ INTREN(1 << intr); - MPINTR_UNLOCK(); write_eflags(ef); return (0); } +/* + * Dissociate an interrupt handler from an IRQ and set the handler to + * the stray interrupt handler. The 'handler' parameter is used only + * for consistency checking. + */ int icu_unset(intr, handler) int intr; @@ -567,8 +514,6 @@ icu_unset(intr, handler) disable_intr(); intr_countp[intr] = &intrcnt[1 + intr]; intr_handler[intr] = isa_strayintr; - intr_mptr[intr] = NULL; - intr_mask[intr] = HWI_MASK | SWI_MASK; intr_unit[intr] = &intr_unit[intr]; #ifdef FAST_HI_XXX /* XXX how do I re-create dvp here? */ @@ -581,353 +526,172 @@ icu_unset(intr, handler) setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ - MPINTR_UNLOCK(); write_eflags(ef); return (0); } -/* The following notice applies beyond this point in the file */ - -/* - * Copyright (c) 1997, Stefan Esser - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - * - */ - -typedef struct intrec { - intrmask_t mask; - inthand2_t *handler; - void *argument; - struct intrec *next; - char *name; - int intr; - intrmask_t *maskptr; - int flags; -} intrec; - -static intrec *intreclist_head[ICU_LEN]; - -/* - * The interrupt multiplexer calls each of the handlers in turn. The - * ipl is initially quite low. It is raised as necessary for each call - * and lowered after the call. Thus out of order handling is possible - * even for interrupts of the same type. This is probably no more - * harmful than out of order handling in general (not harmful except - * for real time response which we don't support anyway). - */ -static void -intr_mux(void *arg) -{ - intrec *p; - intrmask_t oldspl; - - for (p = arg; p != NULL; p = p->next) { - oldspl = splq(p->mask); - p->handler(p->argument); - splx(oldspl); - } -} - -static intrec* -find_idesc(unsigned *maskptr, int irq) -{ - intrec *p = intreclist_head[irq]; - - while (p && p->maskptr != maskptr) - p = p->next; - - return (p); -} - -static intrec** -find_pred(intrec *idesc, int irq) +intrec * +inthand_add(const char *name, int irq, inthand2_t handler, void *arg, + int pri, int flags) { - intrec **pp = &intreclist_head[irq]; - intrec *p = *pp; - - while (p != idesc) { - if (p == NULL) - return (NULL); - pp = &p->next; - p = *pp; - } - return (pp); -} - -/* - * Both the low level handler and the shared interrupt multiplexer - * block out further interrupts as set in the handlers "mask", while - * the handler is running. In fact *maskptr should be used for this - * purpose, but since this requires one more pointer dereference on - * each interrupt, we rather bother update "mask" whenever *maskptr - * changes. The function "update_masks" should be called **after** - * all manipulation of the linked list of interrupt handlers hung - * off of intrdec_head[irq] is complete, since the chain of handlers - * will both determine the *maskptr values and the instances of mask - * that are fixed. This function should be called with the irq for - * which a new handler has been add blocked, since the masks may not - * yet know about the use of this irq for a device of a certain class. - */ + ithd *ithd = ithds[irq]; /* descriptor for the IRQ */ + intrec *head; /* chain of handlers for IRQ */ + intrec *idesc; /* descriptor for this handler */ + struct proc *p; /* interrupt thread */ + int errcode = 0; -static void -update_mux_masks(void) -{ - int irq; - for (irq = 0; irq < ICU_LEN; irq++) { - intrec *idesc = intreclist_head[irq]; - while (idesc != NULL) { - if (idesc->maskptr != NULL) { - /* our copy of *maskptr may be stale, refresh */ - idesc->mask = *idesc->maskptr; - } - idesc = idesc->next; + if (name == NULL) /* no name? */ + panic ("anonymous interrupt"); + if (ithd == NULL || ithd->it_ih == NULL) { + /* first handler for this irq. */ + if (ithd == NULL) { + ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK); + if (ithd == NULL) + return (NULL); + bzero(ithd, sizeof(struct ithd)); + ithd->irq = irq; + ithds[irq] = ithd; } - } -} - -static void -update_masks(intrmask_t *maskptr, int irq) -{ - intrmask_t mask = 1 << irq; - - if (maskptr == NULL) - return; - - if (find_idesc(maskptr, irq) == NULL) { - /* no reference to this maskptr was found in this irq's chain */ - if ((*maskptr & mask) == 0) - return; - /* the irq was included in the classes mask, remove it */ - *maskptr &= ~mask; - } else { - /* a reference to this maskptr was found in this irq's chain */ - if ((*maskptr & mask) != 0) - return; - /* put the irq into the classes mask */ - *maskptr |= mask; - } - /* we need to update all values in the intr_mask[irq] array */ - update_intr_masks(); - /* update mask in chains of the interrupt multiplex handler as well */ - update_mux_masks(); -} - -/* - * Add interrupt handler to linked list hung off of intreclist_head[irq] - * and install shared interrupt multiplex handler, if necessary - */ - -static int -add_intrdesc(intrec *idesc) -{ - int irq = idesc->intr; - - intrec *head = intreclist_head[irq]; - - if (head == NULL) { - /* first handler for this irq, just install it */ - if (icu_setup(irq, idesc->handler, idesc->argument, - idesc->maskptr, idesc->flags) != 0) - return (-1); - - update_intrname(irq, idesc->name); - /* keep reference */ - intreclist_head[irq] = idesc; - } else { - if ((idesc->flags & INTR_EXCL) != 0 - || (head->flags & INTR_EXCL) != 0) { + /* + * If we have a fast interrupt, we need to set the + * handler address directly. Do that below. For a + * slow interrupt, we don't need to know more details, + * so do it here because it's tidier. + */ + if ((flags & INTR_FAST) == 0) { /* - * can't append new handler, if either list head or - * new handler do not allow interrupts to be shared + * Only create a kernel thread if we don't already + * have one. */ - if (bootverbose) - printf("\tdevice combination doesn't support " - "shared irq%d\n", irq); - return (-1); - } - if (head->next == NULL) { + if (ithd->it_proc == NULL) { + errcode = kthread_create(ithd_loop, NULL, &p, + RFSTOPPED | RFHIGHPID, "irq%d: %s", irq, + name); + if (errcode) + panic("inthand_add: Can't create " + "interrupt thread"); + p->p_rtprio.type = RTP_PRIO_ITHREAD; + p->p_stat = SWAIT; /* we're idle */ + + /* Put in linkages. */ + ithd->it_proc = p; + p->p_ithd = ithd; + } else + snprintf(ithd->it_proc->p_comm, MAXCOMLEN, + "irq%d: %s", irq, name); + p->p_rtprio.prio = pri; + /* - * second handler for this irq, replace device driver's - * handler by shared interrupt multiplexer function + * The interrupt process must be in place, but + * not necessarily schedulable, before we + * initialize the ICU, since it may cause an + * immediate interrupt. */ - icu_unset(irq, head->handler); - if (icu_setup(irq, intr_mux, head, 0, 0) != 0) - return (-1); - if (bootverbose) - printf("\tusing shared irq%d.\n", irq); - update_intrname(irq, "mux"); + if (icu_setup(irq, &sched_ithd, arg, flags) != 0) + panic("inthand_add: Can't initialize ICU"); } - /* just append to the end of the chain */ - while (head->next != NULL) - head = head->next; - head->next = idesc; - } - update_masks(idesc->maskptr, irq); - return (0); -} - -/* - * Create and activate an interrupt handler descriptor data structure. - * - * The dev_instance pointer is required for resource management, and will - * only be passed through to resource_claim(). - * - * There will be functions that derive a driver and unit name from a - * dev_instance variable, and those functions will be used to maintain the - * interrupt counter label array referenced by systat and vmstat to report - * device interrupt rates (->update_intrlabels). - * - * Add the interrupt handler descriptor data structure created by an - * earlier call of create_intr() to the linked list for its irq and - * adjust the interrupt masks if necessary. - * - * WARNING: This is an internal function and not to be used by device - * drivers. It is subject to change without notice. - */ - -intrec * -inthand_add(const char *name, int irq, inthand2_t handler, void *arg, - intrmask_t *maskptr, int flags) -{ - intrec *idesc; - int errcode = -1; - intrmask_t oldspl; - - if (ICU_LEN > 8 * sizeof *maskptr) { - printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n", - ICU_LEN, 8 * sizeof *maskptr); + } else if ((flags & INTR_EXCL) != 0 + || (ithd->it_ih->flags & INTR_EXCL) != 0) { + /* + * We can't append the new handler if either + * list ithd or new handler do not allow + * interrupts to be shared. + */ + if (bootverbose) + printf("\tdevice combination %s and %s " + "doesn't support shared irq%d\n", + ithd->it_ih->name, name, irq); + return(NULL); + } else if (flags & INTR_FAST) { + /* We can only have one fast interrupt by itself. */ + if (bootverbose) + printf("\tCan't add fast interrupt %s" + " to normal interrupt %s on irq%d", + name, ithd->it_ih->name, irq); return (NULL); + } else { /* update p_comm */ + p = ithd->it_proc; + if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) { + strcat(p->p_comm, " "); + strcat(p->p_comm, name); + } else if (strlen(p->p_comm) == MAXCOMLEN) + p->p_comm[MAXCOMLEN - 1] = '+'; + else + strcat(p->p_comm, "+"); } - if ((unsigned)irq >= ICU_LEN) { - printf("create_intr: requested irq%d too high, limit is %d\n", - irq, ICU_LEN -1); + idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK); + if (idesc == NULL) return (NULL); - } + bzero(idesc, sizeof (struct intrec)); - idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK); - if (idesc == NULL) - return NULL; - bzero(idesc, sizeof *idesc); + idesc->handler = handler; + idesc->argument = arg; + idesc->flags = flags; + idesc->ithd = ithd; - if (name == NULL) - name = "???"; idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK); if (idesc->name == NULL) { free(idesc, M_DEVBUF); - return NULL; + return (NULL); } strcpy(idesc->name, name); - idesc->handler = handler; - idesc->argument = arg; - idesc->maskptr = maskptr; - idesc->intr = irq; - idesc->flags = flags; - - /* block this irq */ - oldspl = splq(1 << irq); - - /* add irq to class selected by maskptr */ - errcode = add_intrdesc(idesc); - splx(oldspl); - - if (errcode != 0) { + /* Slow interrupts got set up above. */ + if ((flags & INTR_FAST) + && (icu_setup(irq, idesc->handler, idesc->argument, + idesc->flags) != 0) ) { if (bootverbose) - printf("\tintr_connect(irq%d) failed, result=%d\n", + printf("\tinthand_add(irq%d) failed, result=%d\n", irq, errcode); free(idesc->name, M_DEVBUF); free(idesc, M_DEVBUF); - idesc = NULL; + return NULL; } - + head = ithd->it_ih; /* look at chain of handlers */ + if (head) { + while (head->next != NULL) + head = head->next; /* find the end */ + head->next = idesc; /* hook it in there */ + } else + ithd->it_ih = idesc; /* put it up front */ + update_intrname(irq, idesc->name); return (idesc); } /* - * Deactivate and remove the interrupt handler descriptor data connected - * created by an earlier call of intr_connect() from the linked list and - * adjust theinterrupt masks if necessary. + * Deactivate and remove linked list the interrupt handler descriptor + * data connected created by an earlier call of inthand_add(), then + * adjust the interrupt masks if necessary. * - * Return the memory held by the interrupt handler descriptor data structure - * to the system. Make sure, the handler is not actively used anymore, before. + * Return the memory held by the interrupt handler descriptor data + * structure to the system. First ensure the handler is not actively + * in use. */ int inthand_remove(intrec *idesc) { - intrec **hook, *head; - int irq; - int errcode = 0; - intrmask_t oldspl; + ithd *ithd; /* descriptor for the IRQ */ + intrec *ih; /* chain of handlers */ if (idesc == NULL) return (-1); + ithd = idesc->ithd; + ih = ithd->it_ih; - irq = idesc->intr; - - /* find pointer that keeps the reference to this interrupt descriptor */ - hook = find_pred(idesc, irq); - if (hook == NULL) + if (ih == idesc) /* first in the chain */ + ithd->it_ih = idesc->next; /* unhook it */ + else { + while ((ih != NULL) + && (ih->next != idesc) ) + ih = ih->next; + if (ih->next != idesc) return (-1); - - /* make copy of original list head, the line after may overwrite it */ - head = intreclist_head[irq]; - - /* unlink: make predecessor point to idesc->next instead of to idesc */ - *hook = idesc->next; - - /* now check whether the element we removed was the list head */ - if (idesc == head) { - - oldspl = splq(1 << irq); - - /* check whether the new list head is the only element on list */ - head = intreclist_head[irq]; - if (head != NULL) { - icu_unset(irq, intr_mux); - if (head->next != NULL) { - /* install the multiplex handler with new list head as argument */ - errcode = icu_setup(irq, intr_mux, head, 0, 0); - if (errcode == 0) - update_intrname(irq, NULL); - } else { - /* install the one remaining handler for this irq */ - errcode = icu_setup(irq, head->handler, - head->argument, - head->maskptr, head->flags); - if (errcode == 0) - update_intrname(irq, head->name); + ih->next = ih->next->next; } - } else { - /* revert to old handler, eg: strayintr */ - icu_unset(irq, idesc->handler); - } - splx(oldspl); - } - update_masks(idesc->maskptr, irq); + + if (ithd->it_ih == NULL) /* no handlers left, */ + icu_unset(ithd->irq, idesc->handler); free(idesc, M_DEVBUF); return (0); } diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h index 5982295..87c97a3 100644 --- a/sys/amd64/isa/intr_machdep.h +++ b/sys/amd64/isa/intr_machdep.h @@ -98,7 +98,6 @@ #define TPR_BLOCK_XCPUSTOP 0xaf /* */ #define TPR_BLOCK_ALL 0xff /* all INTs */ - #ifdef TEST_TEST1 /* put a 'fake' HWI in top of APIC prio 0x3x, 32 + 31 = 63 = 0x3f */ #define XTEST1_OFFSET (ICU_OFFSET + 31) @@ -145,8 +144,9 @@ extern u_long intrcnt[]; /* counts for for each device and stray */ extern char intrnames[]; /* string table containing device names */ extern u_long *intr_countp[]; /* pointers into intrcnt[] */ extern inthand2_t *intr_handler[]; /* C entry points of intr handlers */ -extern u_int intr_mask[]; /* sets of intrs masked during handling of 1 */ +extern ithd *ithds[]; extern void *intr_unit[]; /* cookies to pass to intr handlers */ +extern ithd softinterrupt; /* soft interrupt thread */ inthand_t IDTVEC(fastintr0), IDTVEC(fastintr1), @@ -190,26 +190,60 @@ inthand_t #endif /** TEST_TEST1 */ #endif /* SMP || APIC_IO */ +#ifdef PC98 +#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */ +#define ICU_SLAVEID 7 +#else +#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */ +#define ICU_SLAVEID 2 +#endif + +#ifdef APIC_IO +/* + * This is to accommodate "mixed-mode" programming for + * motherboards that don't connect the 8254 to the IO APIC. + */ +#define AUTO_EOI_1 1 +#endif + +#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) + void isa_defaultirq __P((void)); int isa_nmi __P((int cd)); int icu_setup __P((int intr, inthand2_t *func, void *arg, - u_int *maskptr, int flags)); + int flags)); int icu_unset __P((int intr, inthand2_t *handler)); -int update_intr_masks __P((void)); intrmask_t splq __P((intrmask_t mask)); -#define INTR_FAST 0x00000001 /* fast interrupt handler */ -#define INTR_EXCL 0x00010000 /* excl. intr, default is shared */ +/* + * Describe a hardware interrupt handler. These structures are + * accessed via the array intreclist, which contains one pointer per + * hardware interrupt. + * + * Multiple interrupt handlers for a specific IRQ can be chained + * together via the 'next' pointer. + */ +typedef struct intrec { + inthand2_t *handler; /* code address of handler */ + void *argument; /* argument to pass to handler */ + enum intr_type flags; /* flag bits (sys/bus.h) */ + char *name; /* name of handler */ + ithd *ithd; /* handler we're connected to */ + struct intrec *next; /* next handler for this irq */ +} intrec; /* * WARNING: These are internal functions and not to be used by device drivers! * They are subject to change without notice. */ struct intrec *inthand_add(const char *name, int irq, inthand2_t handler, - void *arg, intrmask_t *maskptr, int flags); - + void *arg, int pri, int flags); int inthand_remove(struct intrec *idesc); +void sched_ithd(void *); +void ithd_loop(void *); +void start_softintr(void *); +void intr_soft(void *); #endif /* LOCORE */ diff --git a/sys/amd64/isa/ithread.c b/sys/amd64/isa/ithread.c new file mode 100644 index 0000000..4ceac42 --- /dev/null +++ b/sys/amd64/isa/ithread.c @@ -0,0 +1,353 @@ +/*- + * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Berkeley Software Design Inc's name may not be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * From BSDI: intr.c,v 1.6.2.5 1999/07/06 19:16:52 cp Exp + * $FreeBSD$ + */ + +/* Interrupt thread code. */ + +#include "opt_auto_eoi.h" + +#include "isa.h" + +#include +#include /* change this name XXX */ +#ifndef SMP +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(APIC_IO) +#include +#include /** FAST_HI */ +#include +#endif /* APIC_IO */ +#ifdef PC98 +#include +#include +#include +#else +#include +#endif +#include + +#if NISA > 0 +#include +#endif +#include +#include +#ifdef APIC_IO +#include +#endif + +#include "mca.h" +#if NMCA > 0 +#include +#endif + +#include +#include +#include +#include +#if 0 +#include +#endif + +u_long softintrcnt [NSWI]; + +SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, NULL) + +/* + * Schedule a heavyweight interrupt process. This function is called + * from the interrupt handlers Xintr. + */ +void +sched_ithd(void *cookie) +{ + int irq = (int) cookie; /* IRQ we're handling */ + ithd *ir = ithds[irq]; /* and the process that does it */ + + /* This used to be in icu_vector.s */ + /* + * We count software interrupts when we process them. The + * code here follows previous practice, but there's an + * argument for counting hardware interrupts when they're + * processed too. + */ + if (irq < NHWI) /* real interrupt, */ + atomic_add_long(intr_countp[irq], 1); /* one more for this IRQ */ + atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */ + + CTR3(KTR_INTR, "sched_ithd pid %d(%s) need=%d", + ir->it_proc->p_pid, ir->it_proc->p_comm, ir->it_need); + +#if 0 + /* + * If we are in the debugger, we can't use interrupt threads to + * process interrupts since the threads are scheduled. Instead, + * call the interrupt handlers directly. This should be able to + * go away once we have light-weight interrupt handlers. + */ + if (db_active) { + intrec *ih; /* and our interrupt handler chain */ +#if 0 + membar_unlock(); /* push out "it_need=0" */ +#endif + for (ih = ir->it_ih; ih != NULL; ih = ih->next) { + if ((ih->flags & INTR_MPSAFE) == 0) + mtx_enter(&Giant, MTX_DEF); + ih->handler(ih->argument); + if ((ih->flags & INTR_MPSAFE) == 0) + mtx_exit(&Giant, MTX_DEF); + } + + INTREN (1 << ir->irq); /* reset the mask bit */ + return; + } +#endif + + /* + * Set it_need so that if the thread is already running but close + * to done, it will do another go-round. Then get the sched lock + * and see if the thread is on whichkqs yet. If not, put it on + * there. In any case, kick everyone so that if the new thread + * is higher priority than their current thread, it gets run now. + */ + ir->it_need = 1; + mtx_enter(&sched_lock, MTX_SPIN); + if (ir->it_proc->p_stat == SWAIT) { /* not on run queue */ + CTR1(KTR_INTR, "sched_ithd: setrunqueue %d", + ir->it_proc->p_pid); +/* membar_lock(); */ + ir->it_proc->p_stat = SRUN; + setrunqueue(ir->it_proc); + aston(); + } + else { +if (irq < NHWI && (irq & 7) != 0) + CTR3(KTR_INTR, "sched_ithd %d: it_need %d, state %d", + ir->it_proc->p_pid, + ir->it_need, + ir->it_proc->p_stat ); + } + mtx_exit(&sched_lock, MTX_SPIN); +#if 0 + aston(); /* ??? check priorities first? */ +#else + need_resched(); +#endif +} + +/* + * This is the main code for all interrupt threads. It gets put on + * whichkqs by setrunqueue above. + */ +void +ithd_loop(void *dummy) +{ + ithd *me; /* our thread context */ + intrec *ih; /* and our interrupt handler chain */ + + me = curproc->p_ithd; /* point to myself */ + + /* + * As long as we have interrupts outstanding, go through the + * list of handlers, giving each one a go at it. + */ + for (;;) { + CTR3(KTR_INTR, "ithd_loop pid %d(%s) need=%d", + me->it_proc->p_pid, me->it_proc->p_comm, me->it_need); + while (me->it_need) { + /* + * Service interrupts. If another interrupt + * arrives while we are running, they will set + * it_need to denote that we should make + * another pass. + */ + me->it_need = 0; +#if 0 + membar_unlock(); /* push out "it_need=0" */ +#endif + for (ih = me->it_ih; ih != NULL; ih = ih->next) { + CTR5(KTR_INTR, + "ithd_loop pid %d ih=%p: %p(%p) flg=%x", + me->it_proc->p_pid, (void *)ih, + (void *)ih->handler, ih->argument, + ih->flags); + + if ((ih->flags & INTR_MPSAFE) == 0) + mtx_enter(&Giant, MTX_DEF); + ih->handler(ih->argument); + if ((ih->flags & INTR_MPSAFE) == 0) + mtx_exit(&Giant, MTX_DEF); + } + } + + /* + * Processed all our interrupts. Now get the sched + * lock. This may take a while and it_need may get + * set again, so we have to check it again. + */ + mtx_enter(&sched_lock, MTX_SPIN); + if (!me->it_need) { + + INTREN (1 << me->irq); /* reset the mask bit */ + me->it_proc->p_stat = SWAIT; /* we're idle */ +#ifdef APIC_IO + CTR1(KTR_INTR, "ithd_loop pid %d: done", + me->it_proc->p_pid); +#else + CTR2(KTR_INTR, "ithd_loop pid %d: done, imen=%x", + me->it_proc->p_pid, imen); +#endif + mi_switch(); + CTR1(KTR_INTR, "ithd_loop pid %d: resumed", + me->it_proc->p_pid); + } + mtx_exit(&sched_lock, MTX_SPIN); + } +} + +/* + * Start soft interrupt thread. + */ +void +start_softintr(void *dummy) +{ + int error; + struct proc *p; + ithd *softintr; /* descriptor for the "IRQ" */ + intrec *idesc; /* descriptor for this handler */ + char *name = "sintr"; /* name for idesc */ + int i; + + if (ithds[SOFTINTR]) { /* we already have a thread */ + printf("start_softintr: already running"); + return; + } + /* first handler for this irq. */ + softintr = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK); + if (softintr == NULL) + panic ("Can't create soft interrupt thread"); + bzero(softintr, sizeof(struct ithd)); + softintr->irq = SOFTINTR; + ithds[SOFTINTR] = softintr; + error = kthread_create(intr_soft, NULL, &p, + RFSTOPPED | RFHIGHPID, "softinterrupt"); + if (error) + panic("start_softintr: kthread_create error %d\n", error); + + p->p_rtprio.type = RTP_PRIO_ITHREAD; + p->p_rtprio.prio = PI_SOFT; /* soft interrupt */ + p->p_stat = SWAIT; /* we're idle */ + + /* Put in linkages. */ + softintr->it_proc = p; + p->p_ithd = softintr; /* reverse link */ + + idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK); + if (idesc == NULL) + panic ("Can't create soft interrupt thread"); + bzero(idesc, sizeof (struct intrec)); + + idesc->ithd = softintr; + idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK); + if (idesc->name == NULL) + panic ("Can't create soft interrupt thread"); + strcpy(idesc->name, name); + for (i = NHWI; i < NHWI + NSWI; i++) + intr_countp[i] = &softintrcnt [i - NHWI]; +} + +/* + * Software interrupt process code. + */ +void +intr_soft(void *dummy) +{ + int i; + ithd *me; /* our thread context */ + + me = curproc->p_ithd; /* point to myself */ + + /* Main loop */ + for (;;) { +#if 0 + CTR3(KTR_INTR, "intr_soft pid %d(%s) need=%d", + me->it_proc->p_pid, me->it_proc->p_comm, + me->it_need); +#endif + + /* + * Service interrupts. If another interrupt arrives + * while we are running, they will set it_need to + * denote that we should make another pass. + */ + me->it_need = 0; + while ((i = ffs(spending))) { + i--; + atomic_add_long(intr_countp[i], 1); + spending &= ~ (1 << i); + mtx_enter(&Giant, MTX_DEF); + (ihandlers[i])(); + mtx_exit(&Giant, MTX_DEF); + } + /* + * Processed all our interrupts. Now get the sched + * lock. This may take a while and it_need may get + * set again, so we have to check it again. + */ + mtx_enter(&sched_lock, MTX_SPIN); + if (!me->it_need) { +#if 0 + CTR1(KTR_INTR, "intr_soft pid %d: done", + me->it_proc->p_pid); +#endif + me->it_proc->p_stat = SWAIT; /* we're idle */ + mi_switch(); +#if 0 + CTR1(KTR_INTR, "intr_soft pid %d: resumed", + me->it_proc->p_pid); +#endif + } + mtx_exit(&sched_lock, MTX_SPIN); + } +} diff --git a/sys/amd64/isa/nmi.c b/sys/amd64/isa/nmi.c index 34a8c22..870760e 100644 --- a/sys/amd64/isa/nmi.c +++ b/sys/amd64/isa/nmi.c @@ -36,12 +36,6 @@ * from: @(#)isa.c 7.2 (Berkeley) 5/13/91 * $FreeBSD$ */ -/* - * This file contains an aggregated module marked: - * Copyright (c) 1997, Stefan Esser - * All rights reserved. - * See the notice for details. - */ #include "opt_auto_eoi.h" @@ -51,11 +45,14 @@ #ifndef SMP #include #endif +#include #include #include #include +#include #include #include +#include #include #include #include @@ -91,30 +88,14 @@ #include #endif -/* XXX should be in suitable include files */ -#ifdef PC98 -#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */ -#define ICU_SLAVEID 7 -#else -#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */ -#define ICU_SLAVEID 2 -#endif - -#ifdef APIC_IO /* - * This is to accommodate "mixed-mode" programming for - * motherboards that don't connect the 8254 to the IO APIC. + * Per-interrupt data. We consider the soft interrupt to be a special + * case, so these arrays have NHWI + NSWI entries, not ICU_LEN. */ -#define AUTO_EOI_1 1 -#endif - -#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN) - -u_long *intr_countp[ICU_LEN]; -inthand2_t *intr_handler[ICU_LEN]; -u_int intr_mask[ICU_LEN]; -static u_int* intr_mptr[ICU_LEN]; -void *intr_unit[ICU_LEN]; +u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */ +inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */ +ithd *ithds[NHWI + NSWI]; /* real interrupt handler */ +void *intr_unit[NHWI + NSWI]; static inthand_t *fastintr[ICU_LEN] = { &IDTVEC(fastintr0), &IDTVEC(fastintr1), @@ -292,8 +273,9 @@ isa_nmi(cd) } /* - * Fill in default interrupt table (in case of spuruious interrupt - * during configuration of kernel, setup interrupt control unit + * Create a default interrupt table to avoid problems caused by + * spurious interrupts during configuration of kernel, then setup + * interrupt control unit. */ void isa_defaultirq() @@ -364,16 +346,6 @@ isa_strayintr(vcookiep) { int intr = (void **)vcookiep - &intr_unit[0]; - /* DON'T BOTHER FOR NOW! */ - /* for some reason, we get bursts of intr #7, even if not enabled! */ - /* - * Well the reason you got bursts of intr #7 is because someone - * raised an interrupt line and dropped it before the 8259 could - * prioritize it. This is documented in the intel data book. This - * means you have BAD hardware! I have changed this so that only - * the first 5 get logged, then it quits logging them, and puts - * out a special message. rgrimes 3/25/1993 - */ /* * XXX TODO print a different message for #7 if it is for a * glitch. Glitches can be distinguished from real #7's by @@ -405,36 +377,10 @@ isa_irq_pending() } #endif -int -update_intr_masks(void) -{ - int intr, n=0; - u_int mask,*maskptr; - - for (intr=0; intr < ICU_LEN; intr ++) { -#if defined(APIC_IO) - /* no 8259 SLAVE to ignore */ -#else - if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */ -#endif /* APIC_IO */ - maskptr = intr_mptr[intr]; - if (!maskptr) - continue; - *maskptr |= SWI_LOW_MASK | (1 << intr); - mask = *maskptr; - if (mask != intr_mask[intr]) { -#if 0 - printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n", - intr, intr_mask[intr], mask, maskptr); -#endif - intr_mask[intr]=mask; - n++; - } - - } - return (n); -} - +/* + * Update intrnames array with the specified name. This is used by + * vmstat(8) and the like. + */ static void update_intrname(int intr, char *name) { @@ -485,7 +431,7 @@ found: } int -icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) +icu_setup(int intr, inthand2_t *handler, void *arg, int flags) { #ifdef FAST_HI int select; /* the select register is 8 bits */ @@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) u_int32_t value; /* the window register is 32 bits */ #endif /* FAST_HI */ u_long ef; - u_int mask = (maskptr ? *maskptr : 0); #if defined(APIC_IO) if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */ @@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) ef = read_eflags(); disable_intr(); intr_handler[intr] = handler; - intr_mptr[intr] = maskptr; - intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr); intr_unit[intr] = arg; #ifdef FAST_HI if (flags & INTR_FAST) { @@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags) SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ INTREN(1 << intr); - MPINTR_UNLOCK(); write_eflags(ef); return (0); } +/* + * Dissociate an interrupt handler from an IRQ and set the handler to + * the stray interrupt handler. The 'handler' parameter is used only + * for consistency checking. + */ int icu_unset(intr, handler) int intr; @@ -567,8 +514,6 @@ icu_unset(intr, handler) disable_intr(); intr_countp[intr] = &intrcnt[1 + intr]; intr_handler[intr] = isa_strayintr; - intr_mptr[intr] = NULL; - intr_mask[intr] = HWI_MASK | SWI_MASK; intr_unit[intr] = &intr_unit[intr]; #ifdef FAST_HI_XXX /* XXX how do I re-create dvp here? */ @@ -581,353 +526,172 @@ icu_unset(intr, handler) setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif /* FAST_HI */ - MPINTR_UNLOCK(); write_eflags(ef); return (0); } -/* The following notice applies beyond this point in the file */ - -/* - * Copyright (c) 1997, Stefan Esser - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice unmodified, this list of conditions, and the following - * disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - * - */ - -typedef struct intrec { - intrmask_t mask; - inthand2_t *handler; - void *argument; - struct intrec *next; - char *name; - int intr; - intrmask_t *maskptr; - int flags; -} intrec; - -static intrec *intreclist_head[ICU_LEN]; - -/* - * The interrupt multiplexer calls each of the handlers in turn. The - * ipl is initially quite low. It is raised as necessary for each call - * and lowered after the call. Thus out of order handling is possible - * even for interrupts of the same type. This is probably no more - * harmful than out of order handling in general (not harmful except - * for real time response which we don't support anyway). - */ -static void -intr_mux(void *arg) -{ - intrec *p; - intrmask_t oldspl; - - for (p = arg; p != NULL; p = p->next) { - oldspl = splq(p->mask); - p->handler(p->argument); - splx(oldspl); - } -} - -static intrec* -find_idesc(unsigned *maskptr, int irq) -{ - intrec *p = intreclist_head[irq]; - - while (p && p->maskptr != maskptr) - p = p->next; - - return (p); -} - -static intrec** -find_pred(intrec *idesc, int irq) +intrec * +inthand_add(const char *name, int irq, inthand2_t handler, void *arg, + int pri, int flags) { - intrec **pp = &intreclist_head[irq]; - intrec *p = *pp; - - while (p != idesc) { - if (p == NULL) - return (NULL); - pp = &p->next; - p = *pp; - } - return (pp); -} - -/* - * Both the low level handler and the shared interrupt multiplexer - * block out further interrupts as set in the handlers "mask", while - * the handler is running. In fact *maskptr should be used for this - * purpose, but since this requires one more pointer dereference on - * each interrupt, we rather bother update "mask" whenever *maskptr - * changes. The function "update_masks" should be called **after** - * all manipulation of the linked list of interrupt handlers hung - * off of intrdec_head[irq] is complete, since the chain of handlers - * will both determine the *maskptr values and the instances of mask - * that are fixed. This function should be called with the irq for - * which a new handler has been add blocked, since the masks may not - * yet know about the use of this irq for a device of a certain class. - */ + ithd *ithd = ithds[irq]; /* descriptor for the IRQ */ + intrec *head; /* chain of handlers for IRQ */ + intrec *idesc; /* descriptor for this handler */ + struct proc *p; /* interrupt thread */ + int errcode = 0; -static void -update_mux_masks(void) -{ - int irq; - for (irq = 0; irq < ICU_LEN; irq++) { - intrec *idesc = intreclist_head[irq]; - while (idesc != NULL) { - if (idesc->maskptr != NULL) { - /* our copy of *maskptr may be stale, refresh */ - idesc->mask = *idesc->maskptr; - } - idesc = idesc->next; + if (name == NULL) /* no name? */ + panic ("anonymous interrupt"); + if (ithd == NULL || ithd->it_ih == NULL) { + /* first handler for this irq. */ + if (ithd == NULL) { + ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK); + if (ithd == NULL) + return (NULL); + bzero(ithd, sizeof(struct ithd)); + ithd->irq = irq; + ithds[irq] = ithd; } - } -} - -static void -update_masks(intrmask_t *maskptr, int irq) -{ - intrmask_t mask = 1 << irq; - - if (maskptr == NULL) - return; - - if (find_idesc(maskptr, irq) == NULL) { - /* no reference to this maskptr was found in this irq's chain */ - if ((*maskptr & mask) == 0) - return; - /* the irq was included in the classes mask, remove it */ - *maskptr &= ~mask; - } else { - /* a reference to this maskptr was found in this irq's chain */ - if ((*maskptr & mask) != 0) - return; - /* put the irq into the classes mask */ - *maskptr |= mask; - } - /* we need to update all values in the intr_mask[irq] array */ - update_intr_masks(); - /* update mask in chains of the interrupt multiplex handler as well */ - update_mux_masks(); -} - -/* - * Add interrupt handler to linked list hung off of intreclist_head[irq] - * and install shared interrupt multiplex handler, if necessary - */ - -static int -add_intrdesc(intrec *idesc) -{ - int irq = idesc->intr; - - intrec *head = intreclist_head[irq]; - - if (head == NULL) { - /* first handler for this irq, just install it */ - if (icu_setup(irq, idesc->handler, idesc->argument, - idesc->maskptr, idesc->flags) != 0) - return (-1); - - update_intrname(irq, idesc->name); - /* keep reference */ - intreclist_head[irq] = idesc; - } else { - if ((idesc->flags & INTR_EXCL) != 0 - || (head->flags & INTR_EXCL) != 0) { + /* + * If we have a fast interrupt, we need to set the + * handler address directly. Do that below. For a + * slow interrupt, we don't need to know more details, + * so do it here because it's tidier. + */ + if ((flags & INTR_FAST) == 0) { /* - * can't append new handler, if either list head or - * new handler do not allow interrupts to be shared + * Only create a kernel thread if we don't already + * have one. */ - if (bootverbose) - printf("\tdevice combination doesn't support " - "shared irq%d\n", irq); - return (-1); - } - if (head->next == NULL) { + if (ithd->it_proc == NULL) { + errcode = kthread_create(ithd_loop, NULL, &p, + RFSTOPPED | RFHIGHPID, "irq%d: %s", irq, + name); + if (errcode) + panic("inthand_add: Can't create " + "interrupt thread"); + p->p_rtprio.type = RTP_PRIO_ITHREAD; + p->p_stat = SWAIT; /* we're idle */ + + /* Put in linkages. */ + ithd->it_proc = p; + p->p_ithd = ithd; + } else + snprintf(ithd->it_proc->p_comm, MAXCOMLEN, + "irq%d: %s", irq, name); + p->p_rtprio.prio = pri; + /* - * second handler for this irq, replace device driver's - * handler by shared interrupt multiplexer function + * The interrupt process must be in place, but + * not necessarily schedulable, before we + * initialize the ICU, since it may cause an + * immediate interrupt. */ - icu_unset(irq, head->handler); - if (icu_setup(irq, intr_mux, head, 0, 0) != 0) - return (-1); - if (bootverbose) - printf("\tusing shared irq%d.\n", irq); - update_intrname(irq, "mux"); + if (icu_setup(irq, &sched_ithd, arg, flags) != 0) + panic("inthand_add: Can't initialize ICU"); } - /* just append to the end of the chain */ - while (head->next != NULL) - head = head->next; - head->next = idesc; - } - update_masks(idesc->maskptr, irq); - return (0); -} - -/* - * Create and activate an interrupt handler descriptor data structure. - * - * The dev_instance pointer is required for resource management, and will - * only be passed through to resource_claim(). - * - * There will be functions that derive a driver and unit name from a - * dev_instance variable, and those functions will be used to maintain the - * interrupt counter label array referenced by systat and vmstat to report - * device interrupt rates (->update_intrlabels). - * - * Add the interrupt handler descriptor data structure created by an - * earlier call of create_intr() to the linked list for its irq and - * adjust the interrupt masks if necessary. - * - * WARNING: This is an internal function and not to be used by device - * drivers. It is subject to change without notice. - */ - -intrec * -inthand_add(const char *name, int irq, inthand2_t handler, void *arg, - intrmask_t *maskptr, int flags) -{ - intrec *idesc; - int errcode = -1; - intrmask_t oldspl; - - if (ICU_LEN > 8 * sizeof *maskptr) { - printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n", - ICU_LEN, 8 * sizeof *maskptr); + } else if ((flags & INTR_EXCL) != 0 + || (ithd->it_ih->flags & INTR_EXCL) != 0) { + /* + * We can't append the new handler if either + * list ithd or new handler do not allow + * interrupts to be shared. + */ + if (bootverbose) + printf("\tdevice combination %s and %s " + "doesn't support shared irq%d\n", + ithd->it_ih->name, name, irq); + return(NULL); + } else if (flags & INTR_FAST) { + /* We can only have one fast interrupt by itself. */ + if (bootverbose) + printf("\tCan't add fast interrupt %s" + " to normal interrupt %s on irq%d", + name, ithd->it_ih->name, irq); return (NULL); + } else { /* update p_comm */ + p = ithd->it_proc; + if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) { + strcat(p->p_comm, " "); + strcat(p->p_comm, name); + } else if (strlen(p->p_comm) == MAXCOMLEN) + p->p_comm[MAXCOMLEN - 1] = '+'; + else + strcat(p->p_comm, "+"); } - if ((unsigned)irq >= ICU_LEN) { - printf("create_intr: requested irq%d too high, limit is %d\n", - irq, ICU_LEN -1); + idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK); + if (idesc == NULL) return (NULL); - } + bzero(idesc, sizeof (struct intrec)); - idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK); - if (idesc == NULL) - return NULL; - bzero(idesc, sizeof *idesc); + idesc->handler = handler; + idesc->argument = arg; + idesc->flags = flags; + idesc->ithd = ithd; - if (name == NULL) - name = "???"; idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK); if (idesc->name == NULL) { free(idesc, M_DEVBUF); - return NULL; + return (NULL); } strcpy(idesc->name, name); - idesc->handler = handler; - idesc->argument = arg; - idesc->maskptr = maskptr; - idesc->intr = irq; - idesc->flags = flags; - - /* block this irq */ - oldspl = splq(1 << irq); - - /* add irq to class selected by maskptr */ - errcode = add_intrdesc(idesc); - splx(oldspl); - - if (errcode != 0) { + /* Slow interrupts got set up above. */ + if ((flags & INTR_FAST) + && (icu_setup(irq, idesc->handler, idesc->argument, + idesc->flags) != 0) ) { if (bootverbose) - printf("\tintr_connect(irq%d) failed, result=%d\n", + printf("\tinthand_add(irq%d) failed, result=%d\n", irq, errcode); free(idesc->name, M_DEVBUF); free(idesc, M_DEVBUF); - idesc = NULL; + return NULL; } - + head = ithd->it_ih; /* look at chain of handlers */ + if (head) { + while (head->next != NULL) + head = head->next; /* find the end */ + head->next = idesc; /* hook it in there */ + } else + ithd->it_ih = idesc; /* put it up front */ + update_intrname(irq, idesc->name); return (idesc); } /* - * Deactivate and remove the interrupt handler descriptor data connected - * created by an earlier call of intr_connect() from the linked list and - * adjust theinterrupt masks if necessary. + * Deactivate and remove linked list the interrupt handler descriptor + * data connected created by an earlier call of inthand_add(), then + * adjust the interrupt masks if necessary. * - * Return the memory held by the interrupt handler descriptor data structure - * to the system. Make sure, the handler is not actively used anymore, before. + * Return the memory held by the interrupt handler descriptor data + * structure to the system. First ensure the handler is not actively + * in use. */ int inthand_remove(intrec *idesc) { - intrec **hook, *head; - int irq; - int errcode = 0; - intrmask_t oldspl; + ithd *ithd; /* descriptor for the IRQ */ + intrec *ih; /* chain of handlers */ if (idesc == NULL) return (-1); + ithd = idesc->ithd; + ih = ithd->it_ih; - irq = idesc->intr; - - /* find pointer that keeps the reference to this interrupt descriptor */ - hook = find_pred(idesc, irq); - if (hook == NULL) + if (ih == idesc) /* first in the chain */ + ithd->it_ih = idesc->next; /* unhook it */ + else { + while ((ih != NULL) + && (ih->next != idesc) ) + ih = ih->next; + if (ih->next != idesc) return (-1); - - /* make copy of original list head, the line after may overwrite it */ - head = intreclist_head[irq]; - - /* unlink: make predecessor point to idesc->next instead of to idesc */ - *hook = idesc->next; - - /* now check whether the element we removed was the list head */ - if (idesc == head) { - - oldspl = splq(1 << irq); - - /* check whether the new list head is the only element on list */ - head = intreclist_head[irq]; - if (head != NULL) { - icu_unset(irq, intr_mux); - if (head->next != NULL) { - /* install the multiplex handler with new list head as argument */ - errcode = icu_setup(irq, intr_mux, head, 0, 0); - if (errcode == 0) - update_intrname(irq, NULL); - } else { - /* install the one remaining handler for this irq */ - errcode = icu_setup(irq, head->handler, - head->argument, - head->maskptr, head->flags); - if (errcode == 0) - update_intrname(irq, head->name); + ih->next = ih->next->next; } - } else { - /* revert to old handler, eg: strayintr */ - icu_unset(irq, idesc->handler); - } - splx(oldspl); - } - update_masks(idesc->maskptr, irq); + + if (ithd->it_ih == NULL) /* no handlers left, */ + icu_unset(ithd->irq, idesc->handler); free(idesc, M_DEVBUF); return (0); } diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c index 637853e..8610e35 100644 --- a/sys/amd64/isa/npx.c +++ b/sys/amd64/isa/npx.c @@ -245,6 +245,12 @@ npx_probe(dev) setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); npx_idt_probeintr = idt[npx_intrno]; + + /* + * XXX This looks highly bogus, but it appears that npc_probe1 + * needs interrupts enabled. Does this make any difference + * here? + */ enable_intr(); result = npx_probe1(dev); disable_intr(); @@ -797,7 +803,7 @@ npxdna() /* * Record new context early in case frstor causes an IRQ13. */ - npxproc = curproc; + PCPU_SET(npxproc, CURPROC); curpcb->pcb_savefpu.sv_ex_sw = 0; /* * The following frstor may cause an IRQ13 when the state being @@ -834,16 +840,18 @@ npxsave(addr) fnsave(addr); /* fnop(); */ start_emulating(); - npxproc = NULL; + PCPU_SET(npxproc, NULL); #else /* SMP */ + int intrstate; u_char icu1_mask; u_char icu2_mask; u_char old_icu1_mask; u_char old_icu2_mask; struct gate_descriptor save_idt_npxintr; + intrstate = save_intr(); disable_intr(); old_icu1_mask = inb(IO_ICU1 + 1); old_icu2_mask = inb(IO_ICU2 + 1); @@ -851,12 +859,12 @@ npxsave(addr) outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask)); outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); idt[npx_intrno] = npx_idt_probeintr; - enable_intr(); + write_eflags(intrstate); stop_emulating(); fnsave(addr); fnop(); start_emulating(); - npxproc = NULL; + PCPU_SET(npxproc, NULL); disable_intr(); icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ icu2_mask = inb(IO_ICU2 + 1); @@ -866,7 +874,7 @@ npxsave(addr) (icu2_mask & ~(npx0_imask >> 8)) | (old_icu2_mask & (npx0_imask >> 8))); idt[npx_intrno] = save_idt_npxintr; - enable_intr(); /* back to usual state */ + restore_intr(intrstate); /* back to previous state */ #endif /* SMP */ } diff --git a/sys/amd64/isa/vector.S b/sys/amd64/isa/vector.S index 5447a90..79f2320 100644 --- a/sys/amd64/isa/vector.S +++ b/sys/amd64/isa/vector.S @@ -16,9 +16,10 @@ #include #endif +#define FAST_INTR_HANDLER_USES_ES 1 #ifdef FAST_INTR_HANDLER_USES_ES #define ACTUALLY_PUSHED 1 -#define MAYBE_MOVW_AX_ES movl %ax,%es +#define MAYBE_MOVW_AX_ES movw %ax,%es #define MAYBE_POPL_ES popl %es #define MAYBE_PUSHL_ES pushl %es #else @@ -36,11 +37,6 @@ .data ALIGN_DATA - .globl _intr_nesting_level -_intr_nesting_level: - .byte 0 - .space 3 - /* * Interrupt counters and names for export to vmstat(8) and friends. * @@ -58,7 +54,6 @@ _eintrcnt: _intrnames: .space NR_INTRNAMES * 16 _eintrnames: - .text /* diff --git a/sys/amd64/isa/vector.s b/sys/amd64/isa/vector.s index 5447a90..79f2320 100644 --- a/sys/amd64/isa/vector.s +++ b/sys/amd64/isa/vector.s @@ -16,9 +16,10 @@ #include #endif +#define FAST_INTR_HANDLER_USES_ES 1 #ifdef FAST_INTR_HANDLER_USES_ES #define ACTUALLY_PUSHED 1 -#define MAYBE_MOVW_AX_ES movl %ax,%es +#define MAYBE_MOVW_AX_ES movw %ax,%es #define MAYBE_POPL_ES popl %es #define MAYBE_PUSHL_ES pushl %es #else @@ -36,11 +37,6 @@ .data ALIGN_DATA - .globl _intr_nesting_level -_intr_nesting_level: - .byte 0 - .space 3 - /* * Interrupt counters and names for export to vmstat(8) and friends. * @@ -58,7 +54,6 @@ _eintrcnt: _intrnames: .space NR_INTRNAMES * 16 _eintrnames: - .text /* -- cgit v1.1