summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorjasone <jasone@FreeBSD.org>2000-09-07 01:33:02 +0000
committerjasone <jasone@FreeBSD.org>2000-09-07 01:33:02 +0000
commit769e0f974d8929599ba599ac496510fffc90ff34 (patch)
tree9387522900085835de81e7830e570ef3f6b3ea80 /sys/amd64
parentacf1927de02afda4855ec278b1128fd9446405ea (diff)
downloadFreeBSD-src-769e0f974d8929599ba599ac496510fffc90ff34.zip
FreeBSD-src-769e0f974d8929599ba599ac496510fffc90ff34.tar.gz
Major update to the way synchronization is done in the kernel. Highlights
include: * Mutual exclusion is used instead of spl*(). See mutex(9). (Note: The alpha port is still in transition and currently uses both.) * Per-CPU idle processes. * Interrupts are run in their own separate kernel threads and can be preempted (i386 only). Partially contributed by: BSDi (BSD/OS) Submissions by (at least): cp, dfr, dillon, grog, jake, jhb, sheldonh
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/amd64-gdbstub.c10
-rw-r--r--sys/amd64/amd64/apic_vector.S132
-rw-r--r--sys/amd64/amd64/autoconf.c8
-rw-r--r--sys/amd64/amd64/cpu_switch.S269
-rw-r--r--sys/amd64/amd64/exception.S41
-rw-r--r--sys/amd64/amd64/exception.s41
-rw-r--r--sys/amd64/amd64/fpu.c18
-rw-r--r--sys/amd64/amd64/genassym.c27
-rw-r--r--sys/amd64/amd64/identcpu.c3
-rw-r--r--sys/amd64/amd64/initcpu.c6
-rw-r--r--sys/amd64/amd64/legacy.c32
-rw-r--r--sys/amd64/amd64/locore.S3
-rw-r--r--sys/amd64/amd64/locore.s3
-rw-r--r--sys/amd64/amd64/machdep.c37
-rw-r--r--sys/amd64/amd64/mp_machdep.c88
-rw-r--r--sys/amd64/amd64/mpboot.S36
-rw-r--r--sys/amd64/amd64/mptable.c88
-rw-r--r--sys/amd64/amd64/nexus.c32
-rw-r--r--sys/amd64/amd64/pmap.c2
-rw-r--r--sys/amd64/amd64/swtch.s269
-rw-r--r--sys/amd64/amd64/trap.c391
-rw-r--r--sys/amd64/amd64/tsc.c155
-rw-r--r--sys/amd64/amd64/vm_machdep.c51
-rw-r--r--sys/amd64/include/cpu.h12
-rw-r--r--sys/amd64/include/cpufunc.h21
-rw-r--r--sys/amd64/include/mptable.h88
-rw-r--r--sys/amd64/include/mutex.h786
-rw-r--r--sys/amd64/include/pcb.h6
-rw-r--r--sys/amd64/include/pcpu.h33
-rw-r--r--sys/amd64/include/smp.h38
-rw-r--r--sys/amd64/isa/atpic_vector.S92
-rw-r--r--sys/amd64/isa/clock.c155
-rw-r--r--sys/amd64/isa/icu_ipl.S57
-rw-r--r--sys/amd64/isa/icu_ipl.s57
-rw-r--r--sys/amd64/isa/icu_vector.S92
-rw-r--r--sys/amd64/isa/icu_vector.s92
-rw-r--r--sys/amd64/isa/intr_machdep.c524
-rw-r--r--sys/amd64/isa/intr_machdep.h50
-rw-r--r--sys/amd64/isa/ithread.c353
-rw-r--r--sys/amd64/isa/nmi.c524
-rw-r--r--sys/amd64/isa/npx.c18
-rw-r--r--sys/amd64/isa/vector.S9
-rw-r--r--sys/amd64/isa/vector.s9
43 files changed, 2597 insertions, 2161 deletions
diff --git a/sys/amd64/amd64/amd64-gdbstub.c b/sys/amd64/amd64/amd64-gdbstub.c
index 986b8d4..b442a37 100644
--- a/sys/amd64/amd64/amd64-gdbstub.c
+++ b/sys/amd64/amd64/amd64-gdbstub.c
@@ -188,7 +188,8 @@ getpacket (char *buffer)
unsigned char ch;
int s;
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/* wait around for the start character, ignore all other characters */
@@ -239,7 +240,7 @@ getpacket (char *buffer)
}
}
while (checksum != xmitcsum);
- splx (s);
+ write_eflags(s);
}
/* send the packet in buffer. */
@@ -253,7 +254,8 @@ putpacket (char *buffer)
int s;
/* $<packet info>#<checksum>. */
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/*
@@ -285,7 +287,7 @@ putpacket (char *buffer)
putDebugChar (hexchars[checksum & 0xf]);
}
while ((getDebugChar () & 0x7f) != '+');
- splx (s);
+ write_eflags(s);
}
static char remcomInBuffer[BUFMAX];
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 2a7559d..54bf003 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -17,7 +17,7 @@
/*
- * Macros for interrupt interrupt entry, call to handler, and exit.
+ * Macros for interrupt entry, call to handler, and exit.
*/
#define FAST_INTR(irq_num, vec_name) \
@@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \
/*
- * Test to see if the source is currntly masked, clear if so.
+ * Test to see if the source is currently masked, clear if so.
*/
#define UNMASK_IRQ(irq_num) \
IMASK_LOCK ; /* into critical reg */ \
@@ -200,7 +200,16 @@ log_intr_event:
#else
#define APIC_ITRACE(name, irq_num, id)
#endif
-
+
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \
maybe_extra_ipending ; \
; \
APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
; \
MASK_LEVEL_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- MP_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f ; /* no */ \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
-; \
incb _intr_nesting_level ; \
; \
/* entry point used by doreti_unpend for HWIs. */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- movl _cpl, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cpl ; \
- lock ; \
- andl $~IRQ_BIT(irq_num), _ipending ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
+ pushl $irq_num; /* pass the IRQ */ \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
MEXITCOUNT ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; /* XXX: iactive bit might be 0 now */ \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl, leave iactive set */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- MP_RELLOCK ; \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- POP_FRAME ; /* and return */ \
- iret
+ jmp doreti_next
/*
* Handle "spurious INTerrupts".
@@ -434,20 +380,10 @@ _Xcpuast:
FAKE_MCOUNT(13*4(%esp))
- /*
- * Giant locks do not come cheap.
- * A lot of cycles are going to be wasted here.
- */
- call _get_mplock
-
- movl _cpl, %eax
- pushl %eax
orl $AST_PENDING, _astpending /* XXX */
incb _intr_nesting_level
sti
- pushl $0
-
movl _cpuid, %eax
lock
btrl %eax, _checkstate_pending_ast
@@ -461,7 +397,7 @@ _Xcpuast:
lock
incl CNAME(cpuast_cnt)
MEXITCOUNT
- jmp _doreti
+ jmp doreti_next
1:
/* We are already in the process of delivering an ast for this CPU */
POP_FRAME
@@ -487,40 +423,24 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- MP_TRYLOCK
- testl %eax,%eax /* Did we get the lock ? */
- jz 1f /* No */
-
lock
incl CNAME(forward_irq_hitcnt)
cmpb $4, _intr_nesting_level
- jae 2f
+ jae 1f
- movl _cpl, %eax
- pushl %eax
incb _intr_nesting_level
sti
- pushl $0
-
MEXITCOUNT
- jmp _doreti /* Handle forwarded interrupt */
+ jmp doreti_next /* Handle forwarded interrupt */
1:
lock
- incl CNAME(forward_irq_misscnt)
- call forward_irq /* Oops, we've lost the isr lock */
- MEXITCOUNT
- POP_FRAME
- iret
-2:
- lock
incl CNAME(forward_irq_toodeepcnt)
-3:
- MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
+#if 0
/*
*
*/
@@ -532,9 +452,11 @@ forward_irq:
cmpl $0, CNAME(forward_irq_enabled)
jz 4f
+/* XXX - this is broken now, because mp_lock doesn't exist
movl _mp_lock,%eax
cmpl $FREE_LOCK,%eax
jne 1f
+ */
movl $0, %eax /* Pick CPU #0 if noone has lock */
1:
shrl $24,%eax
@@ -559,6 +481,7 @@ forward_irq:
jnz 3b
4:
ret
+#endif
/*
* Executed by a CPU when it receives an Xcpustop IPI from another CPU,
@@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr)
FAST_INTR(22,fastintr22)
FAST_INTR(23,fastintr23)
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, CLKINTR_PENDING)
INTR(1,intr1,)
INTR(2,intr2,)
@@ -728,15 +652,11 @@ _ihandlers:
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
+#if 0
/* active flag for lazy masking */
iactive:
.long 0
+#endif
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c
index b209065..4edda4b 100644
--- a/sys/amd64/amd64/autoconf.c
+++ b/sys/amd64/amd64/autoconf.c
@@ -163,14 +163,6 @@ configure(dummy)
* XXX this is slightly misplaced.
*/
spl0();
-
- /*
- * Allow lowering of the ipl to the lowest kernel level if we
- * panic (or call tsleep() before clearing `cold'). No level is
- * completely safe (since a panic may occur in a critical region
- * at splhigh()), but we want at least bio interrupts to work.
- */
- safepri = cpl;
}
static void
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index c895fef..db56a1b 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -73,189 +73,6 @@ _tlb_flush_count: .long 0
.text
-/*
- * When no processes are on the runq, cpu_switch() branches to _idle
- * to wait for something to come ready.
- */
- ALIGN_TEXT
- .type _idle,@function
-_idle:
- xorl %ebp,%ebp
- movl %ebp,_switchtime
-
-#ifdef SMP
-
- /* when called, we have the mplock, intr disabled */
- /* use our idleproc's "context" */
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
- /* Keep space for nonexisting return addr, or profiling bombs */
- movl $gd_idlestack_top-4, %ecx
- addl %fs:0, %ecx
- movl %ecx, %esp
-
- /* update common_tss.tss_esp0 pointer */
- movl %ecx, _common_tss + TSS_ESP0
-
- movl _cpuid, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $gd_common_tssd, %edi
- addl %fs:0, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- *
- * NOTE: spl*() may only be called while we hold the MP lock (which
- * we do).
- */
- call _spl0
-
- cli
-
- /*
- * _REALLY_ free the lock, no matter how deep the prior nesting.
- * We will recover the nesting on the way out when we have a new
- * proc to load.
- *
- * XXX: we had damn well better be sure we had it before doing this!
- */
- movl $FREE_LOCK, %eax
- movl %eax, _mp_lock
-
- /* do NOT have lock, intrs disabled */
- .globl idle_loop
-idle_loop:
-
- cmpl $0,_smp_active
- jne 1f
- cmpl $0,_cpuid
- je 1f
- jmp 2f
-
-1:
- call _procrunnable
- testl %eax,%eax
- jnz 3f
-
- /*
- * Handle page-zeroing in the idle loop. Called with interrupts
- * disabled and the MP lock released. Inside vm_page_zero_idle
- * we enable interrupts and grab the mplock as required.
- */
- cmpl $0,_do_page_zero_idle
- je 2f
-
- call _vm_page_zero_idle /* internal locking */
- testl %eax, %eax
- jnz idle_loop
-2:
-
- /* enable intrs for a halt */
- movl $0, lapic_tpr /* 1st candidate for an INT */
- call *_hlt_vector /* wait for interrupt */
- cli
- jmp idle_loop
-
- /*
- * Note that interrupts must be enabled while obtaining the MP lock
- * in order to be able to take IPI's while blocked.
- */
-3:
- movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
- sti
- call _get_mplock
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _rel_mplock
- jmp idle_loop
-
-#else /* !SMP */
-
- movl $HIDENAME(tmpstk),%esp
-#if defined(OVERLY_CONSERVATIVE_PTD_MGMT)
-#if defined(SWTCH_OPTIM_STATS)
- incl _swtch_optim_stats
-#endif
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
-#endif
-
- /* update common_tss.tss_esp0 pointer */
- movl %esp, _common_tss + TSS_ESP0
-
- movl $0, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $_common_tssd, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- */
- call _spl0
-
- ALIGN_TEXT
-idle_loop:
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _vm_page_zero_idle
- testl %eax, %eax
- jnz idle_loop
- call *_hlt_vector /* wait for interrupt */
- jmp idle_loop
-
-#endif /* SMP */
-
-CROSSJUMPTARGET(_idle)
-
ENTRY(default_halt)
sti
#ifndef SMP
@@ -264,16 +81,23 @@ ENTRY(default_halt)
ret
/*
+ * cpu_throw()
+ */
+ENTRY(cpu_throw)
+ jmp sw1
+
+/*
* cpu_switch()
*/
ENTRY(cpu_switch)
/* switch to new process. first, save context as needed */
movl _curproc,%ecx
+ movl %ecx,_prevproc
/* if no process to save, don't bother */
testl %ecx,%ecx
- je sw1
+ jz sw1
#ifdef SMP
movb P_ONCPU(%ecx), %al /* save "last" cpu */
@@ -299,7 +123,7 @@ ENTRY(cpu_switch)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
- /* test if debug regisers should be saved */
+ /* test if debug registers should be saved */
movb PCB_FLAGS(%edx),%al
andb $PCB_DBREGS,%al
jz 1f /* no, skip over */
@@ -319,15 +143,12 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
+ /* save sched_lock recursion count */
+ movl _sched_lock+MTX_RECURSE,%eax
+ movl %eax,PCB_SCHEDNEST(%edx)
+
#ifdef SMP
- movl _mp_lock, %eax
/* XXX FIXME: we should be saving the local APIC TPR */
-#ifdef DIAGNOSTIC
- cmpl $FREE_LOCK, %eax /* is it free? */
- je badsw4 /* yes, bad medicine! */
-#endif /* DIAGNOSTIC */
- andl $COUNT_FIELD, %eax /* clear CPU portion */
- movl %eax, PCB_MPNEST(%edx) /* store it */
#endif /* SMP */
#if NNPX > 0
@@ -341,25 +162,33 @@ ENTRY(cpu_switch)
1:
#endif /* NNPX > 0 */
- movl $0,_curproc /* out of process */
-
- /* save is done, now choose a new process or idle */
+ /* save is done, now choose a new process */
sw1:
- cli
#ifdef SMP
/* Stop scheduling if smp_active goes zero and we are not BSP */
cmpl $0,_smp_active
jne 1f
cmpl $0,_cpuid
- CROSSJUMP(je, _idle, jne) /* wind down */
+ je 1f
+
+ movl _idleproc, %eax
+ jmp sw1b
1:
#endif
+ /*
+ * Choose a new process to schedule. chooseproc() returns idleproc
+ * if it cannot find another process to run.
+ */
sw1a:
call _chooseproc /* trash ecx, edx, ret eax*/
- testl %eax,%eax
- CROSSJUMP(je, _idle, jne) /* if no proc, idle */
+
+#ifdef DIAGNOSTIC
+ testl %eax,%eax /* no process? */
+ jz badsw3 /* no, panic */
+#endif
+sw1b:
movl %eax,%ecx
xorl %eax,%eax
@@ -456,9 +285,6 @@ sw1a:
movl %ecx, _curproc /* into next process */
#ifdef SMP
- movl _cpu_lockid, %eax
- orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
- movl %eax, _mp_lock /* load the mp_lock */
/* XXX FIXME: we should be restoring the local APIC TPR */
#endif /* SMP */
@@ -500,7 +326,22 @@ cpu_switch_load_gs:
movl %eax,%dr7
1:
- sti
+ /*
+ * restore sched_lock recursion count and transfer ownership to
+ * new process
+ */
+ movl PCB_SCHEDNEST(%edx),%eax
+ movl %eax,_sched_lock+MTX_RECURSE
+
+ movl _curproc,%eax
+ movl %eax,_sched_lock+MTX_LOCK
+
+#ifdef DIAGNOSTIC
+ pushfl
+ popl %ecx
+ testl $0x200, %ecx /* interrupts enabled? */
+ jnz badsw6 /* that way madness lies */
+#endif
ret
CROSSJUMPTARGET(sw1a)
@@ -517,15 +358,27 @@ badsw2:
call _panic
sw0_2: .asciz "cpu_switch: not SRUN"
+
+badsw3:
+ pushl $sw0_3
+ call _panic
+
+sw0_3: .asciz "cpu_switch: chooseproc returned NULL"
+
#endif
-#if defined(SMP) && defined(DIAGNOSTIC)
-badsw4:
- pushl $sw0_4
+#ifdef DIAGNOSTIC
+badsw5:
+ pushl $sw0_5
+ call _panic
+
+sw0_5: .asciz "cpu_switch: interrupts enabled (again)"
+badsw6:
+ pushl $sw0_6
call _panic
-sw0_4: .asciz "cpu_switch: do not have lock"
-#endif /* SMP && DIAGNOSTIC */
+sw0_6: .asciz "cpu_switch: interrupts enabled"
+#endif
/*
* savectx(pcb)
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index acb8b40..9e77114 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -38,6 +38,7 @@
#include <machine/asmacros.h>
#include <machine/ipl.h>
#include <machine/lock.h>
+#include <machine/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
@@ -175,20 +176,12 @@ IDTVEC(fpu)
mov %ax,%fs
FAKE_MCOUNT(13*4(%esp))
-#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%eax
- pushl %eax /* save original cpl */
- pushl $0 /* dummy unit to finish intr frame */
-#else /* SMP */
- movl _cpl,%eax
- pushl %eax
pushl $0 /* dummy unit to finish intr frame */
- incl _cnt+V_TRAP
-#endif /* SMP */
+ call __mtx_enter_giant_def
call _npx_intr
+ call __mtx_exit_giant_def
incb _intr_nesting_level
MEXITCOUNT
@@ -205,9 +198,6 @@ IDTVEC(align)
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
- *
- * Note that all calls to MP_LOCK must occur with interrupts enabled
- * in order to be able to take IPI's while waiting for the lock.
*/
SUPERALIGN_TEXT
@@ -227,16 +217,12 @@ alltraps_with_regs_pushed:
FAKE_MCOUNT(13*4(%esp))
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
- MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%ebx /* keep orig. cpl here during trap() */
call _trap
/*
* Return via _doreti to handle ASTs. Have to change trap frame
* to interrupt frame.
*/
- pushl %ebx /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
incb _intr_nesting_level
MEXITCOUNT
@@ -274,16 +260,11 @@ IDTVEC(syscall)
movl %eax,TF_EFLAGS(%esp)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
@@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall)
mov %ax,%fs
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
ENTRY(fork_trampoline)
+ MTX_EXIT(_sched_lock, %ecx)
+ sti
call _spl0
#ifdef SMP
@@ -355,7 +333,6 @@ ENTRY(fork_trampoline)
/*
* Return via _doreti to handle ASTs.
*/
- pushl $0 /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
movb $1,_intr_nesting_level
MEXITCOUNT
diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s
index acb8b40..9e77114 100644
--- a/sys/amd64/amd64/exception.s
+++ b/sys/amd64/amd64/exception.s
@@ -38,6 +38,7 @@
#include <machine/asmacros.h>
#include <machine/ipl.h>
#include <machine/lock.h>
+#include <machine/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
@@ -175,20 +176,12 @@ IDTVEC(fpu)
mov %ax,%fs
FAKE_MCOUNT(13*4(%esp))
-#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%eax
- pushl %eax /* save original cpl */
- pushl $0 /* dummy unit to finish intr frame */
-#else /* SMP */
- movl _cpl,%eax
- pushl %eax
pushl $0 /* dummy unit to finish intr frame */
- incl _cnt+V_TRAP
-#endif /* SMP */
+ call __mtx_enter_giant_def
call _npx_intr
+ call __mtx_exit_giant_def
incb _intr_nesting_level
MEXITCOUNT
@@ -205,9 +198,6 @@ IDTVEC(align)
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
- *
- * Note that all calls to MP_LOCK must occur with interrupts enabled
- * in order to be able to take IPI's while waiting for the lock.
*/
SUPERALIGN_TEXT
@@ -227,16 +217,12 @@ alltraps_with_regs_pushed:
FAKE_MCOUNT(13*4(%esp))
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
- MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%ebx /* keep orig. cpl here during trap() */
call _trap
/*
* Return via _doreti to handle ASTs. Have to change trap frame
* to interrupt frame.
*/
- pushl %ebx /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
incb _intr_nesting_level
MEXITCOUNT
@@ -274,16 +260,11 @@ IDTVEC(syscall)
movl %eax,TF_EFLAGS(%esp)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
@@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall)
mov %ax,%fs
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
ENTRY(fork_trampoline)
+ MTX_EXIT(_sched_lock, %ecx)
+ sti
call _spl0
#ifdef SMP
@@ -355,7 +333,6 @@ ENTRY(fork_trampoline)
/*
* Return via _doreti to handle ASTs.
*/
- pushl $0 /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
movb $1,_intr_nesting_level
MEXITCOUNT
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 637853e..8610e35 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -245,6 +245,12 @@ npx_probe(dev)
setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
npx_idt_probeintr = idt[npx_intrno];
+
+ /*
+ * XXX This looks highly bogus, but it appears that npc_probe1
+ * needs interrupts enabled. Does this make any difference
+ * here?
+ */
enable_intr();
result = npx_probe1(dev);
disable_intr();
@@ -797,7 +803,7 @@ npxdna()
/*
* Record new context early in case frstor causes an IRQ13.
*/
- npxproc = curproc;
+ PCPU_SET(npxproc, CURPROC);
curpcb->pcb_savefpu.sv_ex_sw = 0;
/*
* The following frstor may cause an IRQ13 when the state being
@@ -834,16 +840,18 @@ npxsave(addr)
fnsave(addr);
/* fnop(); */
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
#else /* SMP */
+ int intrstate;
u_char icu1_mask;
u_char icu2_mask;
u_char old_icu1_mask;
u_char old_icu2_mask;
struct gate_descriptor save_idt_npxintr;
+ intrstate = save_intr();
disable_intr();
old_icu1_mask = inb(IO_ICU1 + 1);
old_icu2_mask = inb(IO_ICU2 + 1);
@@ -851,12 +859,12 @@ npxsave(addr)
outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
idt[npx_intrno] = npx_idt_probeintr;
- enable_intr();
+ write_eflags(intrstate);
stop_emulating();
fnsave(addr);
fnop();
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
disable_intr();
icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */
icu2_mask = inb(IO_ICU2 + 1);
@@ -866,7 +874,7 @@ npxsave(addr)
(icu2_mask & ~(npx0_imask >> 8))
| (old_icu2_mask & (npx0_imask >> 8)));
idt[npx_intrno] = save_idt_npxintr;
- enable_intr(); /* back to usual state */
+ restore_intr(intrstate); /* back to previous state */
#endif /* SMP */
}
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 60accd1..78c6075 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -51,6 +51,10 @@
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/resourcevar.h>
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
#include <machine/frame.h>
#include <machine/bootinfo.h>
#include <machine/tss.h>
@@ -73,6 +77,7 @@
#include <machine/sigframe.h>
#include <machine/globaldata.h>
#include <machine/vm86.h>
+#include <machine/mutex.h>
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
@@ -127,9 +132,7 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
-#ifdef SMP
-ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest));
-#endif
+ASSYM(PCB_SCHEDNEST, offsetof(struct pcb, pcb_schednest));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
@@ -170,7 +173,9 @@ ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
ASSYM(GD_SIZEOF, sizeof(struct globaldata));
ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
+ASSYM(GD_PREVPROC, offsetof(struct globaldata, gd_prevproc));
ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc));
+ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc));
ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss));
ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
@@ -178,11 +183,21 @@ ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
+ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct globaldata, gd_intr_nesting_level));
#ifdef USER_LDT
ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
#endif
+ASSYM(GD_WITNESS_SPIN_CHECK, offsetof(struct globaldata, gd_witness_spin_check));
+
+/* XXX */
+#ifdef KTR_PERCPU
+ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx));
+ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf));
+ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data));
+#endif
+
#ifdef SMP
ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid));
ASSYM(GD_CPU_LOCKID, offsetof(struct globaldata, gd_cpu_lockid));
@@ -211,3 +226,9 @@ ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse));
+ASSYM(MTX_SAVEFL, offsetof(struct mtx, mtx_savefl));
+
+ASSYM(MTX_UNOWNED, MTX_UNOWNED);
diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c
index 0e11e2b..71ecd63 100644
--- a/sys/amd64/amd64/identcpu.c
+++ b/sys/amd64/amd64/identcpu.c
@@ -42,6 +42,7 @@
#include "opt_cpu.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -53,6 +54,8 @@
#include <machine/specialreg.h>
#include <machine/md_var.h>
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#define IDENTBLUE_CYRIX486 0
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index be86c65..b9395bf 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -607,12 +607,14 @@ void
enable_K5_wt_alloc(void)
{
u_int64_t msr;
+ int intrstate;
/*
* Write allocate is supported only on models 1, 2, and 3, with
* a stepping of 4 or greater.
*/
if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
+ intrstate = save_intr();
disable_intr();
msr = rdmsr(0x83); /* HWCR */
wrmsr(0x83, msr & !(0x10));
@@ -645,7 +647,7 @@ enable_K5_wt_alloc(void)
msr=rdmsr(0x83);
wrmsr(0x83, msr|0x10); /* enable write allocate */
- enable_intr();
+ restore_intr(intrstate);
}
}
@@ -708,7 +710,6 @@ enable_K6_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
void
@@ -770,7 +771,6 @@ enable_K6_2_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
#endif /* I585_CPU && CPU_WT_ALLOC */
diff --git a/sys/amd64/amd64/legacy.c b/sys/amd64/amd64/legacy.c
index 8a30770..5b6cdbc 100644
--- a/sys/amd64/amd64/legacy.c
+++ b/sys/amd64/amd64/legacy.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index bddd7d5..fa95fb0 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -862,9 +862,6 @@ map_read_write:
movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
movl $1, %ecx /* one private pt coming right up */
fillkpt(R(SMPptpa), $PG_RW)
-
-/* Initialize mp lock to allow early traps */
- movl $1, R(_mp_lock)
#endif /* SMP */
/* install a pde for temporary double map of bottom of VA */
diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s
index bddd7d5..fa95fb0 100644
--- a/sys/amd64/amd64/locore.s
+++ b/sys/amd64/amd64/locore.s
@@ -862,9 +862,6 @@ map_read_write:
movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
movl $1, %ecx /* one private pt coming right up */
fillkpt(R(SMPptpa), $PG_RW)
-
-/* Initialize mp lock to allow early traps */
- movl $1, R(_mp_lock)
#endif /* SMP */
/* install a pde for temporary double map of bottom of VA */
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 6edecf0..875c9d5 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -58,6 +58,7 @@
#include <sys/sysproto.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/proc.h>
@@ -98,10 +99,12 @@
#include <machine/bootinfo.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>
+#include <machine/globals.h>
#ifdef SMP
#include <machine/smp.h>
-#include <machine/globaldata.h>
#endif
#ifdef PERFMON
#include <machine/perfmon.h>
@@ -110,6 +113,7 @@
#ifdef OLD_BUS_ARCH
#include <i386/isa/isa_device.h>
#endif
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#include <isa/rtc.h>
#include <machine/vm86.h>
@@ -247,6 +251,11 @@ vm_offset_t clean_sva, clean_eva;
static vm_offset_t pager_sva, pager_eva;
static struct trapframe proc0_tf;
+struct cpuhead cpuhead;
+
+mtx_t sched_lock;
+mtx_t Giant;
+
#define offsetof(type, member) ((size_t)(&((type *)0)->member))
static void
@@ -431,6 +440,11 @@ again:
bufinit();
vm_pager_bufferinit();
+ SLIST_INIT(&cpuhead);
+ SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu);
+
+ mtx_init(&sched_lock, "sched lock", MTX_SPIN);
+
#ifdef SMP
/*
* OK, enough kmem_alloc/malloc state should be up, lets get on with it!
@@ -1817,11 +1831,6 @@ init386(first)
#endif
int off;
- /*
- * Prevent lowering of the ipl if we call tsleep() early.
- */
- safepri = cpl;
-
proc0.p_addr = proc0paddr;
atdevbase = ISA_HOLE_START + KERNBASE;
@@ -1871,6 +1880,10 @@ init386(first)
r_gdt.rd_base = (int) gdt;
lgdt(&r_gdt);
+ /* setup curproc so that mutexes work */
+ PCPU_SET(curproc, &proc0);
+ PCPU_SET(prevproc, &proc0);
+
/* make ldt memory segments */
/*
* The data segment limit must not cover the user area because we
@@ -1953,7 +1966,7 @@ init386(first)
/* make an initial tss so cpu can get interrupt stack on syscall! */
common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16;
- common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
+ common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
private_tss = 0;
tss_gdt = &gdt[GPROC0_SEL].sd;
@@ -1974,6 +1987,12 @@ init386(first)
dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+ /*
+ * We grab Giant during the vm86bios routines, so we need to ensure
+ * that it is up and running before we use vm86.
+ */
+ mtx_init(&Giant, "Giant", MTX_DEF);
+
vm86_initialize();
getmemsize(first);
@@ -2009,9 +2028,7 @@ init386(first)
/* setup proc 0's pcb */
proc0.p_addr->u_pcb.pcb_flags = 0;
proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
-#ifdef SMP
- proc0.p_addr->u_pcb.pcb_mpnest = 1;
-#endif
+ proc0.p_addr->u_pcb.pcb_schednest = 0;
proc0.p_addr->u_pcb.pcb_ext = 0;
proc0.p_md.md_regs = &proc0_tf;
}
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 61c5ecf..95b5759 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S
index d3602d2..9ede02c 100644
--- a/sys/amd64/amd64/mpboot.S
+++ b/sys/amd64/amd64/mpboot.S
@@ -114,43 +114,9 @@ mp_begin: /* now running relocated at KERNBASE */
CHECKPOINT(0x39, 6)
- /* wait till we can get into the kernel */
- call _boot_get_mplock
-
- /* Now, let's prepare for some REAL WORK :-) */
+ /* Now, let's prepare for some REAL WORK :-) This doesn't return. */
call _ap_init
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,-4(%esp)
-2:
- cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */
- jz 2b
- call _get_mplock
-
- /* let her rip! (loads new stack) */
- jmp _cpu_switch
-
-NON_GPROF_ENTRY(wait_ap)
- pushl %ebp
- movl %esp, %ebp
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,0(%esp)
- movl %eax, 8(%ebp)
-1:
- cmpl $0, CNAME(smp_started)
- jnz 2f
- decl %eax
- cmpl $0, %eax
- jge 1b
-2:
- call _get_mplock
- movl %ebp, %esp
- popl %ebp
- ret
-
-
/*
* This is the embedded trampoline or bootstrap that is
* copied into 'real-mode' low memory, it is where the
diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c
index 61c5ecf..95b5759 100644
--- a/sys/amd64/amd64/mptable.c
+++ b/sys/amd64/amd64/mptable.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c
index 8a30770..5b6cdbc 100644
--- a/sys/amd64/amd64/nexus.c
+++ b/sys/amd64/amd64/nexus.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index edae292..7ce9120 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -668,7 +668,7 @@ pmap_pte_quick(pmap, va)
* (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V;
cpu_invlpg(prv_PADDR1);
}
- return prv_PADDR1 + ((unsigned) index & (NPTEPG - 1));
+ return (unsigned *)(prv_PADDR1 + (index & (NPTEPG - 1)));
#else
if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s
index c895fef..db56a1b 100644
--- a/sys/amd64/amd64/swtch.s
+++ b/sys/amd64/amd64/swtch.s
@@ -73,189 +73,6 @@ _tlb_flush_count: .long 0
.text
-/*
- * When no processes are on the runq, cpu_switch() branches to _idle
- * to wait for something to come ready.
- */
- ALIGN_TEXT
- .type _idle,@function
-_idle:
- xorl %ebp,%ebp
- movl %ebp,_switchtime
-
-#ifdef SMP
-
- /* when called, we have the mplock, intr disabled */
- /* use our idleproc's "context" */
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
- /* Keep space for nonexisting return addr, or profiling bombs */
- movl $gd_idlestack_top-4, %ecx
- addl %fs:0, %ecx
- movl %ecx, %esp
-
- /* update common_tss.tss_esp0 pointer */
- movl %ecx, _common_tss + TSS_ESP0
-
- movl _cpuid, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $gd_common_tssd, %edi
- addl %fs:0, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- *
- * NOTE: spl*() may only be called while we hold the MP lock (which
- * we do).
- */
- call _spl0
-
- cli
-
- /*
- * _REALLY_ free the lock, no matter how deep the prior nesting.
- * We will recover the nesting on the way out when we have a new
- * proc to load.
- *
- * XXX: we had damn well better be sure we had it before doing this!
- */
- movl $FREE_LOCK, %eax
- movl %eax, _mp_lock
-
- /* do NOT have lock, intrs disabled */
- .globl idle_loop
-idle_loop:
-
- cmpl $0,_smp_active
- jne 1f
- cmpl $0,_cpuid
- je 1f
- jmp 2f
-
-1:
- call _procrunnable
- testl %eax,%eax
- jnz 3f
-
- /*
- * Handle page-zeroing in the idle loop. Called with interrupts
- * disabled and the MP lock released. Inside vm_page_zero_idle
- * we enable interrupts and grab the mplock as required.
- */
- cmpl $0,_do_page_zero_idle
- je 2f
-
- call _vm_page_zero_idle /* internal locking */
- testl %eax, %eax
- jnz idle_loop
-2:
-
- /* enable intrs for a halt */
- movl $0, lapic_tpr /* 1st candidate for an INT */
- call *_hlt_vector /* wait for interrupt */
- cli
- jmp idle_loop
-
- /*
- * Note that interrupts must be enabled while obtaining the MP lock
- * in order to be able to take IPI's while blocked.
- */
-3:
- movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
- sti
- call _get_mplock
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _rel_mplock
- jmp idle_loop
-
-#else /* !SMP */
-
- movl $HIDENAME(tmpstk),%esp
-#if defined(OVERLY_CONSERVATIVE_PTD_MGMT)
-#if defined(SWTCH_OPTIM_STATS)
- incl _swtch_optim_stats
-#endif
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
-#endif
-
- /* update common_tss.tss_esp0 pointer */
- movl %esp, _common_tss + TSS_ESP0
-
- movl $0, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $_common_tssd, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- */
- call _spl0
-
- ALIGN_TEXT
-idle_loop:
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _vm_page_zero_idle
- testl %eax, %eax
- jnz idle_loop
- call *_hlt_vector /* wait for interrupt */
- jmp idle_loop
-
-#endif /* SMP */
-
-CROSSJUMPTARGET(_idle)
-
ENTRY(default_halt)
sti
#ifndef SMP
@@ -264,16 +81,23 @@ ENTRY(default_halt)
ret
/*
+ * cpu_throw()
+ */
+ENTRY(cpu_throw)
+ jmp sw1
+
+/*
* cpu_switch()
*/
ENTRY(cpu_switch)
/* switch to new process. first, save context as needed */
movl _curproc,%ecx
+ movl %ecx,_prevproc
/* if no process to save, don't bother */
testl %ecx,%ecx
- je sw1
+ jz sw1
#ifdef SMP
movb P_ONCPU(%ecx), %al /* save "last" cpu */
@@ -299,7 +123,7 @@ ENTRY(cpu_switch)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
- /* test if debug regisers should be saved */
+ /* test if debug registers should be saved */
movb PCB_FLAGS(%edx),%al
andb $PCB_DBREGS,%al
jz 1f /* no, skip over */
@@ -319,15 +143,12 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
+ /* save sched_lock recursion count */
+ movl _sched_lock+MTX_RECURSE,%eax
+ movl %eax,PCB_SCHEDNEST(%edx)
+
#ifdef SMP
- movl _mp_lock, %eax
/* XXX FIXME: we should be saving the local APIC TPR */
-#ifdef DIAGNOSTIC
- cmpl $FREE_LOCK, %eax /* is it free? */
- je badsw4 /* yes, bad medicine! */
-#endif /* DIAGNOSTIC */
- andl $COUNT_FIELD, %eax /* clear CPU portion */
- movl %eax, PCB_MPNEST(%edx) /* store it */
#endif /* SMP */
#if NNPX > 0
@@ -341,25 +162,33 @@ ENTRY(cpu_switch)
1:
#endif /* NNPX > 0 */
- movl $0,_curproc /* out of process */
-
- /* save is done, now choose a new process or idle */
+ /* save is done, now choose a new process */
sw1:
- cli
#ifdef SMP
/* Stop scheduling if smp_active goes zero and we are not BSP */
cmpl $0,_smp_active
jne 1f
cmpl $0,_cpuid
- CROSSJUMP(je, _idle, jne) /* wind down */
+ je 1f
+
+ movl _idleproc, %eax
+ jmp sw1b
1:
#endif
+ /*
+ * Choose a new process to schedule. chooseproc() returns idleproc
+ * if it cannot find another process to run.
+ */
sw1a:
call _chooseproc /* trash ecx, edx, ret eax*/
- testl %eax,%eax
- CROSSJUMP(je, _idle, jne) /* if no proc, idle */
+
+#ifdef DIAGNOSTIC
+ testl %eax,%eax /* no process? */
+ jz badsw3 /* no, panic */
+#endif
+sw1b:
movl %eax,%ecx
xorl %eax,%eax
@@ -456,9 +285,6 @@ sw1a:
movl %ecx, _curproc /* into next process */
#ifdef SMP
- movl _cpu_lockid, %eax
- orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
- movl %eax, _mp_lock /* load the mp_lock */
/* XXX FIXME: we should be restoring the local APIC TPR */
#endif /* SMP */
@@ -500,7 +326,22 @@ cpu_switch_load_gs:
movl %eax,%dr7
1:
- sti
+ /*
+ * restore sched_lock recursion count and transfer ownership to
+ * new process
+ */
+ movl PCB_SCHEDNEST(%edx),%eax
+ movl %eax,_sched_lock+MTX_RECURSE
+
+ movl _curproc,%eax
+ movl %eax,_sched_lock+MTX_LOCK
+
+#ifdef DIAGNOSTIC
+ pushfl
+ popl %ecx
+ testl $0x200, %ecx /* interrupts enabled? */
+ jnz badsw6 /* that way madness lies */
+#endif
ret
CROSSJUMPTARGET(sw1a)
@@ -517,15 +358,27 @@ badsw2:
call _panic
sw0_2: .asciz "cpu_switch: not SRUN"
+
+badsw3:
+ pushl $sw0_3
+ call _panic
+
+sw0_3: .asciz "cpu_switch: chooseproc returned NULL"
+
#endif
-#if defined(SMP) && defined(DIAGNOSTIC)
-badsw4:
- pushl $sw0_4
+#ifdef DIAGNOSTIC
+badsw5:
+ pushl $sw0_5
+ call _panic
+
+sw0_5: .asciz "cpu_switch: interrupts enabled (again)"
+badsw6:
+ pushl $sw0_6
call _panic
-sw0_4: .asciz "cpu_switch: do not have lock"
-#endif /* SMP && DIAGNOSTIC */
+sw0_6: .asciz "cpu_switch: interrupts enabled"
+#endif
/*
* savectx(pcb)
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 51de1ac..f32dfae 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -49,10 +49,12 @@
#include "opt_trap.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
@@ -76,12 +78,14 @@
#include <machine/cpu.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb.h>
#ifdef SMP
#include <machine/smp.h>
#endif
#include <machine/tss.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#ifdef POWERFAIL_NMI
@@ -96,11 +100,14 @@
#include "isa.h"
#include "npx.h"
+#include <sys/sysctl.h>
+
int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
extern void syscall2 __P((struct trapframe frame));
+extern void ast __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -142,7 +149,7 @@ static char *trap_msg[] = {
};
static __inline int userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks, int have_mplock));
+ u_quad_t oticks, int have_giant));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
@@ -158,18 +165,18 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&panic_on_nmi, 0, "Panic on NMI");
static __inline int
-userret(p, frame, oticks, have_mplock)
+userret(p, frame, oticks, have_giant)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
- int have_mplock;
+ int have_giant;
{
int sig, s;
while ((sig = CURSIG(p)) != 0) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
postsig(sig);
}
@@ -184,31 +191,34 @@ userret(p, frame, oticks, have_mplock)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
- }
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
postsig(sig);
+ }
}
/*
* Charge system time if profiling.
*/
if (p->p_flag & P_PROFIL) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
}
curpriority = p->p_priority;
- return(have_mplock);
+ return(have_giant);
}
/*
@@ -226,13 +236,20 @@ trap(frame)
u_quad_t sticks = 0;
int i = 0, ucode = 0, type, code;
vm_offset_t eva;
+#ifdef POWERFAIL_NMI
+ static int lastalert = 0;
+#endif
- if (!(frame.tf_eflags & PSL_I)) {
+ atomic_add_int(&cnt.v_trap, 1);
+
+ if ((frame.tf_eflags & PSL_I) == 0) {
/*
- * Buggy application or kernel code has disabled interrupts
- * and then trapped. Enabling interrupts now is wrong, but
- * it is better than running with interrupts disabled until
- * they are accidentally enabled later.
+ * Buggy application or kernel code has disabled
+ * interrupts and then trapped. Enabling interrupts
+ * now is wrong, but it is better than running with
+ * interrupts disabled until they are accidentally
+ * enabled later. XXX Consider whether is this still
+ * correct.
*/
type = frame.tf_trapno;
if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
@@ -252,54 +269,27 @@ trap(frame)
eva = 0;
if (frame.tf_trapno == T_PAGEFLT) {
/*
- * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
- * This problem is worked around by using an interrupt
- * gate for the pagefault handler. We are finally ready
- * to read %cr2 and then must reenable interrupts.
- *
- * XXX this should be in the switch statement, but the
- * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
- * flow of control too much for this to be obviously
- * correct.
+ * For some Cyrix CPUs, %cr2 is clobbered by
+ * interrupts. This problem is worked around by using
+ * an interrupt gate for the pagefault handler. We
+ * are finally ready to read %cr2 and then must
+ * reenable interrupts.
*/
eva = rcr2();
enable_intr();
- }
+ }
+
+ mtx_enter(&Giant, MTX_DEF);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
restart:
#endif
+
type = frame.tf_trapno;
code = frame.tf_err;
- if (in_vm86call) {
- if (frame.tf_eflags & PSL_VM &&
- (type == T_PROTFLT || type == T_STKFLT)) {
- i = vm86_emulate((struct vm86frame *)&frame);
- if (i != 0)
- /*
- * returns to original process
- */
- vm86_trap((struct vm86frame *)&frame);
- return;
- }
- switch (type) {
- /*
- * these traps want either a process context, or
- * assume a normal userspace trap.
- */
- case T_PROTFLT:
- case T_SEGNPFLT:
- trap_fatal(&frame, eva);
- return;
- case T_TRCTRAP:
- type = T_BPTFLT; /* kernel breakpoint */
- /* FALL THROUGH */
- }
- goto kernel_trap; /* normal kernel trap handling */
- }
-
- if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
+ if ((ISPL(frame.tf_cs) == SEL_UPL) ||
+ ((frame.tf_eflags & PSL_VM) && !in_vm86call)) {
/* user trap */
sticks = p->p_sticks;
@@ -322,16 +312,6 @@ restart:
i = SIGFPE;
break;
- case T_ASTFLT: /* Allow process switch */
- astoff();
- cnt.v_soft++;
- if (p->p_flag & P_OWEUPC) {
- p->p_flag &= ~P_OWEUPC;
- addupc_task(p, p->p_stats->p_prof.pr_addr,
- p->p_stats->p_prof.pr_ticks);
- }
- goto out;
-
/*
* The following two traps can happen in
* vm86 mode, and, if so, we want to handle
@@ -342,7 +322,7 @@ restart:
if (frame.tf_eflags & PSL_VM) {
i = vm86_emulate((struct vm86frame *)&frame);
if (i == 0)
- goto out;
+ goto user;
break;
}
/* FALL THROUGH */
@@ -357,14 +337,20 @@ restart:
case T_PAGEFLT: /* page fault */
i = trap_pfault(&frame, TRUE, eva);
- if (i == -1)
- return;
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if (i == -2)
+ if (i == -2) {
+ /*
+ * f00f hack workaround has triggered, treat
+ * as illegal instruction not page fault.
+ */
+ frame.tf_trapno = T_PRIVINFLT;
goto restart;
+ }
#endif
- if (i == 0)
+ if (i == -1)
goto out;
+ if (i == 0)
+ goto user;
ucode = T_PAGEFLT;
break;
@@ -377,7 +363,15 @@ restart:
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
- goto handle_powerfail;
+#ifndef TIMER_FREQ
+# define TIMER_FREQ 1193182
+#endif
+ if (time_second - lastalert > 10) {
+ log(LOG_WARNING, "NMI: power fail\n");
+ sysbeep(TIMER_FREQ/880, hz);
+ lastalert = time_second;
+ }
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -391,7 +385,7 @@ restart:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi)
panic("NMI indicates hardware failure");
break;
@@ -410,9 +404,9 @@ restart:
case T_DNA:
#if NNPX > 0
- /* if a transparent fault (due to context switch "late") */
+ /* transparent fault (due to context switch "late") */
if (npxdna())
- return;
+ goto out;
#endif
if (!pmath_emulate) {
i = SIGFPE;
@@ -422,7 +416,7 @@ restart:
i = (*pmath_emulate)(&frame);
if (i == 0) {
if (!(frame.tf_eflags & PSL_T))
- return;
+ goto out;
frame.tf_eflags &= ~PSL_T;
i = SIGTRAP;
}
@@ -435,13 +429,12 @@ restart:
break;
}
} else {
-kernel_trap:
/* kernel trap */
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(&frame, FALSE, eva);
- return;
+ goto out;
case T_DNA:
#if NNPX > 0
@@ -451,31 +444,35 @@ kernel_trap:
* registered such use.
*/
if (npxdna())
- return;
+ goto out;
#endif
break;
- case T_PROTFLT: /* general protection fault */
- case T_SEGNPFLT: /* segment not present fault */
/*
- * Invalid segment selectors and out of bounds
- * %eip's and %esp's can be set up in user mode.
- * This causes a fault in kernel mode when the
- * kernel tries to return to user mode. We want
- * to get this fault so that we can fix the
- * problem here and not have to check all the
- * selectors and pointers when the user changes
- * them.
+ * The following two traps can happen in
+ * vm86 mode, and, if so, we want to handle
+ * them specially.
*/
-#define MAYBE_DORETI_FAULT(where, whereto) \
- do { \
- if (frame.tf_eip == (int)where) { \
- frame.tf_eip = (int)whereto; \
- return; \
- } \
- } while (0)
-
- if (intr_nesting_level == 0) {
+ case T_PROTFLT: /* general protection fault */
+ case T_STKFLT: /* stack fault */
+ if (frame.tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)&frame);
+ if (i != 0)
+ /*
+ * returns to original process
+ */
+ vm86_trap((struct vm86frame *)&frame);
+ goto out;
+ }
+ /* FALL THROUGH */
+
+ case T_SEGNPFLT: /* segment not present fault */
+ if (in_vm86call)
+ break;
+
+ if (intr_nesting_level != 0)
+ break;
+
/*
* Invalid %fs's and %gs's can be created using
* procfs or PT_SETREGS or by invalidating the
@@ -488,20 +485,38 @@ kernel_trap:
if (frame.tf_eip == (int)cpu_switch_load_gs) {
curpcb->pcb_gs = 0;
psignal(p, SIGBUS);
- return;
+ goto out;
+ }
+
+ /*
+ * Invalid segment selectors and out of bounds
+ * %eip's and %esp's can be set up in user mode.
+ * This causes a fault in kernel mode when the
+ * kernel tries to return to user mode. We want
+ * to get this fault so that we can fix the
+ * problem here and not have to check all the
+ * selectors and pointers when the user changes
+ * them.
+ */
+ if (frame.tf_eip == (int)doreti_iret) {
+ frame.tf_eip = (int)doreti_iret_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_ds) {
+ frame.tf_eip = (int)doreti_popl_ds_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_es) {
+ frame.tf_eip = (int)doreti_popl_es_fault;
+ goto out;
}
- MAYBE_DORETI_FAULT(doreti_iret,
- doreti_iret_fault);
- MAYBE_DORETI_FAULT(doreti_popl_ds,
- doreti_popl_ds_fault);
- MAYBE_DORETI_FAULT(doreti_popl_es,
- doreti_popl_es_fault);
- MAYBE_DORETI_FAULT(doreti_popl_fs,
- doreti_popl_fs_fault);
+ if (frame.tf_eip == (int)doreti_popl_fs) {
+ frame.tf_eip = (int)doreti_popl_fs_fault;
+ goto out;
+ }
if (curpcb && curpcb->pcb_onfault) {
frame.tf_eip = (int)curpcb->pcb_onfault;
- return;
- }
+ goto out;
}
break;
@@ -517,7 +532,7 @@ kernel_trap:
*/
if (frame.tf_eflags & PSL_NT) {
frame.tf_eflags &= ~PSL_NT;
- return;
+ goto out;
}
break;
@@ -529,7 +544,7 @@ kernel_trap:
* silently until the syscall handler has
* saved the flags.
*/
- return;
+ goto out;
}
if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
/*
@@ -537,7 +552,7 @@ kernel_trap:
* flags. Stop single stepping it.
*/
frame.tf_eflags &= ~PSL_T;
- return;
+ goto out;
}
/*
* Ignore debug register trace traps due to
@@ -549,13 +564,13 @@ kernel_trap:
* in kernel space because that is useful when
* debugging the kernel.
*/
- if (user_dbreg_trap()) {
+ if (user_dbreg_trap() && !in_vm86call) {
/*
* Reset breakpoint bits because the
* processor doesn't
*/
load_dr6(rdr6() & 0xfffffff0);
- return;
+ goto out;
}
/*
* Fall through (TRCTRAP kernel mode, kernel address)
@@ -567,28 +582,19 @@ kernel_trap:
*/
#ifdef DDB
if (kdb_trap (type, 0, &frame))
- return;
+ goto out;
#endif
break;
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
-#ifndef TIMER_FREQ
-# define TIMER_FREQ 1193182
-#endif
- handle_powerfail:
- {
- static unsigned lastalert = 0;
-
- if(time_second - lastalert > 10)
- {
+ if (time_second - lastalert > 10) {
log(LOG_WARNING, "NMI: power fail\n");
sysbeep(TIMER_FREQ/880, hz);
lastalert = time_second;
- }
- return;
}
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -602,16 +608,16 @@ kernel_trap:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi == 0)
- return;
+ goto out;
/* FALL THROUGH */
#endif /* POWERFAIL_NMI */
#endif /* NISA > 0 */
}
trap_fatal(&frame, eva);
- return;
+ goto out;
}
/* Translate fault for emulators (e.g. Linux) */
@@ -630,8 +636,10 @@ kernel_trap:
}
#endif
-out:
+user:
userret(p, &frame, sticks, 1);
+out:
+ mtx_exit(&Giant, MTX_DEF);
}
#ifdef notyet
@@ -769,10 +777,8 @@ trap_pfault(frame, usermode, eva)
* fault.
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) {
- frame->tf_trapno = T_PRIVINFLT;
+ if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
return -2;
- }
#endif
if (usermode)
goto nogo;
@@ -869,8 +875,7 @@ trap_fatal(frame, eva)
frame->tf_eflags & PSL_VM ? "vm86" :
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -917,26 +922,6 @@ trap_fatal(frame, eva)
} else {
printf("Idle\n");
}
- printf("interrupt mask = ");
- if ((cpl & net_imask) == net_imask)
- printf("net ");
- if ((cpl & tty_imask) == tty_imask)
- printf("tty ");
- if ((cpl & bio_imask) == bio_imask)
- printf("bio ");
- if ((cpl & cam_imask) == cam_imask)
- printf("cam ");
- if (cpl == 0)
- printf("none");
-#ifdef SMP
-/**
- * XXX FIXME:
- * we probably SHOULD have stopped the other CPUs before now!
- * another CPU COULD have been touching cpl at this moment...
- */
- printf(" <- SMP: XXX");
-#endif
- printf("\n");
#ifdef KDB
if (kdb_trap(&psl))
@@ -973,8 +958,7 @@ dblfault_handler()
printf("esp = 0x%x\n", common_tss.tss_esp);
printf("ebp = 0x%x\n", common_tss.tss_ebp);
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -1048,12 +1032,14 @@ syscall2(frame)
int error;
int narg;
int args[8];
- int have_mplock = 0;
+ int have_giant = 0;
u_int code;
+ atomic_add_int(&cnt.v_syscall, 1);
+
#ifdef DIAGNOSTIC
if (ISPL(frame.tf_cs) != SEL_UPL) {
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
panic("syscall");
/* NOT REACHED */
}
@@ -1075,9 +1061,9 @@ syscall2(frame)
/*
* The prep code is not MP aware.
*/
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
- rel_mplock();
+ mtx_exit(&Giant, MTX_DEF);
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@@ -1114,8 +1100,8 @@ syscall2(frame)
*/
if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
ktrsyscall(p->p_tracep, code, narg, args);
@@ -1129,15 +1115,15 @@ syscall2(frame)
* we are ktracing
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsyscall(p->p_tracep, code, narg, args);
}
@@ -1192,9 +1178,9 @@ bad:
* Traced syscall. trapsignal() is not MP aware.
*/
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
@@ -1203,13 +1189,13 @@ bad:
/*
* Handle reschedule and other end-of-syscall issues
*/
- have_mplock = userret(p, &frame, sticks, have_mplock);
+ have_giant = userret(p, &frame, sticks, have_giant);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
}
@@ -1225,27 +1211,66 @@ bad:
/*
* Release the MP lock if we had to get it
*/
- if (have_mplock)
- rel_mplock();
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
+
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
+
+void
+ast(frame)
+ struct trapframe frame;
+{
+ struct proc *p = CURPROC;
+ u_quad_t sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
+ astoff();
+ atomic_add_int(&cnt.v_soft, 1);
+ if (p->p_flag & P_OWEUPC) {
+ mtx_enter(&Giant, MTX_DEF);
+ p->p_flag &= ~P_OWEUPC;
+ addupc_task(p, p->p_stats->p_prof.pr_addr,
+ p->p_stats->p_prof.pr_ticks);
+}
+ if (userret(p, &frame, sticks, mtx_owned(&Giant)) != 0)
+ mtx_exit(&Giant, MTX_DEF);
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode. MP lock is held on entry and should be
- * held on return.
+ * directly into user mode. Giant is not held on entry, and must not
+ * be held on return.
*/
void
fork_return(p, frame)
struct proc *p;
struct trapframe frame;
{
+ int have_giant;
+
frame.tf_eax = 0; /* Child returns zero */
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0, 1);
+ have_giant = userret(p, &frame, 0, mtx_owned(&Giant));
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
+ }
#endif
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
}
diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c
index 15044ab..724f3c2 100644
--- a/sys/amd64/amd64/tsc.c
+++ b/sys/amd64/amd64/tsc.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index cfb6cee..831ab3b 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -57,12 +57,14 @@
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <machine/clock.h>
#include <machine/cpu.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#ifdef SMP
#include <machine/smp.h>
#endif
@@ -177,9 +179,8 @@ cpu_fork(p1, p2, flags)
* pcb2->pcb_onfault: cloned above (always NULL here?).
*/
-#ifdef SMP
- pcb2->pcb_mpnest = 1;
-#endif
+ pcb2->pcb_schednest = 0;
+
/*
* XXX don't copy the i/o pages. this should probably be fixed.
*/
@@ -256,8 +257,11 @@ cpu_exit(p)
reset_dbregs();
pcb->pcb_flags &= ~PCB_DBREGS;
}
+ mtx_enter(&sched_lock, MTX_SPIN);
+ mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH);
+ mtx_assert(&Giant, MA_NOTOWNED);
cnt.v_swtch++;
- cpu_switch(p);
+ cpu_switch();
panic("cpu_exit");
}
@@ -406,17 +410,10 @@ vunmapbuf(bp)
static void
cpu_reset_proxy()
{
- u_int saved_mp_lock;
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
- ; /* Wait for other cpu to disable interupts */
- saved_mp_lock = mp_lock;
- mp_lock = 1;
- printf("cpu_reset_proxy: Grabbed mp lock for BSP\n");
- cpu_reset_proxy_active = 3;
- while (cpu_reset_proxy_active == 3)
- ; /* Wait for other cpu to enable interrupts */
+ ; /* Wait for other cpu to see that we've started */
stop_cpus((1<<cpu_reset_proxyid));
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
@@ -453,6 +450,7 @@ cpu_reset()
cpu_reset_proxyid = cpuid;
cpustop_restartfunc = cpu_reset_proxy;
+ cpu_reset_proxy_active = 0;
printf("cpu_reset: Restarting BSP\n");
started_cpus = (1<<0); /* Restart CPU #0 */
@@ -461,17 +459,9 @@ cpu_reset()
cnt++; /* Wait for BSP to announce restart */
if (cpu_reset_proxy_active == 0)
printf("cpu_reset: Failed to restart BSP\n");
- __asm __volatile("cli" : : : "memory");
+ enable_intr();
cpu_reset_proxy_active = 2;
- cnt = 0;
- while (cpu_reset_proxy_active == 2 && cnt < 10000000)
- cnt++; /* Do nothing */
- if (cpu_reset_proxy_active == 2) {
- printf("cpu_reset: BSP did not grab mp lock\n");
- cpu_reset_real(); /* XXX: Bogus ? */
- }
- cpu_reset_proxy_active = 4;
- __asm __volatile("sti" : : : "memory");
+
while (1);
/* NOTREACHED */
}
@@ -553,7 +543,7 @@ vm_page_zero_idle()
static int free_rover;
static int zero_state;
vm_page_t m;
- int s;
+ int s, intrsave;
/*
* Attempt to maintain approximately 1/2 of our free pages in a
@@ -569,11 +559,10 @@ vm_page_zero_idle()
if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
return(0);
-#ifdef SMP
- if (try_mplock()) {
-#endif
+ if (mtx_try_enter(&Giant, MTX_DEF)) {
s = splvm();
- __asm __volatile("sti" : : : "memory");
+ intrsave = save_intr();
+ enable_intr();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
if (m != NULL && (m->flags & PG_ZERO) == 0) {
@@ -595,14 +584,10 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- __asm __volatile("cli" : : : "memory");
-#ifdef SMP
- rel_mplock();
-#endif
+ restore_intr(intrsave);
+ mtx_exit(&Giant, MTX_DEF);
return (1);
-#ifdef SMP
}
-#endif
/*
* We have to enable interrupts for a moment if the try_mplock fails
* in order to potentially take an IPI. XXX this should be in
diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h
index ffabf7f..18822b8 100644
--- a/sys/amd64/include/cpu.h
+++ b/sys/amd64/include/cpu.h
@@ -46,6 +46,7 @@
#include <machine/psl.h>
#include <machine/frame.h>
#include <machine/segments.h>
+#include <machine/globals.h>
/*
* definitions of cpu-dependent requirements
@@ -86,7 +87,9 @@
* added, we will have an atomicy problem. The type of atomicy we need is
* a non-locked orl.
*/
-#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0)
+#define need_resched() do { \
+ PCPU_SET(astpending, AST_RESCHED|AST_PENDING); \
+} while (0)
#define resched_wanted() (astpending & AST_RESCHED)
/*
@@ -109,8 +112,9 @@
* it off (asynchronous need_resched() conflicts are not critical).
*/
#define signotify(p) aston()
-
-#define aston() do { astpending |= AST_PENDING; } while (0)
+#define aston() do { \
+ PCPU_SET(astpending, astpending | AST_PENDING); \
+} while (0)
#define astoff()
/*
@@ -135,7 +139,9 @@
#ifdef _KERNEL
extern char btext[];
extern char etext[];
+#ifndef intr_nesting_level
extern u_char intr_nesting_level;
+#endif
void fork_trampoline __P((void));
void fork_return __P((struct proc *, struct trapframe));
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 9a4052f..39868df 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -86,20 +86,29 @@ static __inline void
disable_intr(void)
{
__asm __volatile("cli" : : : "memory");
-#ifdef SMP
- MPINTR_LOCK();
-#endif
}
static __inline void
enable_intr(void)
{
-#ifdef SMP
- MPINTR_UNLOCK();
-#endif
__asm __volatile("sti");
}
+static __inline u_int
+save_intr(void)
+{
+ u_int ef;
+
+ __asm __volatile("pushfl; popl %0" : "=r" (ef));
+ return (ef);
+}
+
+static __inline void
+restore_intr(u_int ef)
+{
+ __asm __volatile("pushl %0; popfl" : : "r" (ef) : "memory" );
+}
+
#define HAVE_INLINE_FFS
static __inline int
diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h
index 61c5ecf..95b5759 100644
--- a/sys/amd64/include/mptable.h
+++ b/sys/amd64/include/mptable.h
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/amd64/include/mutex.h b/sys/amd64/include/mutex.h
new file mode 100644
index 0000000..ef0c963
--- /dev/null
+++ b/sys/amd64/include/mutex.h
@@ -0,0 +1,786 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_MUTEX_H_
+#define _MACHINE_MUTEX_H_
+
+#ifndef LOCORE
+
+#include <sys/ktr.h>
+#include <sys/queue.h>
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine/globals.h>
+
+/*
+ * If kern_mutex.c is being built, compile non-inlined versions of various
+ * functions so that kernel modules can use them.
+ */
+#ifndef _KERN_MUTEX_C_
+#define _MTX_INLINE static __inline
+#else
+#define _MTX_INLINE
+#endif
+
+/*
+ * Mutex flags
+ *
+ * Types
+ */
+#define MTX_DEF 0x0 /* Default (spin/sleep) */
+#define MTX_SPIN 0x1 /* Spin only lock */
+
+/* Options */
+#define MTX_RLIKELY 0x4 /* (opt) Recursion likely */
+#define MTX_NORECURSE 0x8 /* No recursion possible */
+#define MTX_NOSPIN 0x10 /* Don't spin before sleeping */
+#define MTX_NOSWITCH 0x20 /* Do not switch on release */
+#define MTX_FIRST 0x40 /* First spin lock holder */
+#define MTX_TOPHALF 0x80 /* Interrupts not disabled on spin */
+
+/* options that should be passed on to mtx_enter_hard, mtx_exit_hard */
+#define MTX_HARDOPTS (MTX_SPIN | MTX_FIRST | MTX_TOPHALF | MTX_NOSWITCH)
+
+/* Flags/value used in mtx_lock */
+#define MTX_RECURSE 0x01 /* (non-spin) lock held recursively */
+#define MTX_CONTESTED 0x02 /* (non-spin) lock contested */
+#define MTX_FLAGMASK ~(MTX_RECURSE | MTX_CONTESTED)
+#define MTX_UNOWNED 0x8 /* Cookie for free mutex */
+
+struct proc; /* XXX */
+
+/*
+ * Sleep/spin mutex
+ */
+struct mtx {
+ volatile u_int mtx_lock; /* lock owner/gate/flags */
+ volatile u_short mtx_recurse; /* number of recursive holds */
+ u_short mtx_f1;
+ u_int mtx_savefl; /* saved flags (for spin locks) */
+ char *mtx_description;
+ TAILQ_HEAD(, proc) mtx_blocked;
+ LIST_ENTRY(mtx) mtx_contested;
+ struct mtx *mtx_next; /* all locks in system */
+ struct mtx *mtx_prev;
+#ifdef SMP_DEBUG
+ /* If you add anything here, adjust the mtxf_t definition below */
+ struct witness *mtx_witness;
+ LIST_ENTRY(mtx) mtx_held;
+ char *mtx_file;
+ int mtx_line;
+#endif /* SMP_DEBUG */
+};
+
+typedef struct mtx mtx_t;
+
+/*
+ * Filler for structs which need to remain the same size
+ * whether or not SMP_DEBUG is turned on.
+ */
+typedef struct mtxf {
+#ifdef SMP_DEBUG
+ char mtxf_data[0];
+#else
+ char mtxf_data[4*sizeof(void *) + sizeof(int)];
+#endif
+} mtxf_t;
+
+#define mp_fixme(string)
+
+#ifdef _KERNEL
+/* Misc */
+#define CURTHD ((u_int)CURPROC) /* Current thread ID */
+
+/* Prototypes */
+void mtx_init(mtx_t *m, char *description, int flag);
+void mtx_enter_hard(mtx_t *, int type, int flags);
+void mtx_exit_hard(mtx_t *, int type);
+void mtx_destroy(mtx_t *m);
+
+#if (defined(KLD_MODULE) || defined(_KERN_MUTEX_C_))
+void mtx_enter(mtx_t *mtxp, int type);
+int mtx_try_enter(mtx_t *mtxp, int type);
+void mtx_exit(mtx_t *mtxp, int type);
+#endif
+
+/* Global locks */
+extern mtx_t sched_lock;
+extern mtx_t Giant;
+
+/*
+ * Used to replace return with an exit Giant and return.
+ */
+
+#define EGAR(a) \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return (a); \
+} while (0)
+
+#define VEGAR \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return; \
+} while (0)
+
+#define DROP_GIANT() \
+do { \
+ int _giantcnt; \
+ WITNESS_SAVE_DECL(Giant); \
+ \
+ WITNESS_SAVE(&Giant, Giant); \
+ for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \
+ mtx_exit(&Giant, MTX_DEF)
+
+#define PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant); \
+} while (0)
+
+#define PARTIAL_PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant)
+
+
+/*
+ * Debugging
+ */
+#ifndef SMP_DEBUG
+#define mtx_assert(m, what)
+#else /* SMP_DEBUG */
+
+#define MA_OWNED 1
+#define MA_NOTOWNED 2
+#define mtx_assert(m, what) { \
+ switch ((what)) { \
+ case MA_OWNED: \
+ ASS(mtx_owned((m))); \
+ break; \
+ case MA_NOTOWNED: \
+ ASS(!mtx_owned((m))); \
+ break; \
+ default: \
+ panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__); \
+ } \
+}
+
+#ifdef INVARIANTS
+#define ASS(ex) MPASS(ex)
+#define MPASS(ex) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ #ex, __FILE__, __LINE__)
+#define MPASS2(ex, what) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ what, __FILE__, __LINE__)
+
+#ifdef MTX_STRS
+char STR_IEN[] = "fl & 0x200";
+char STR_IDIS[] = "!(fl & 0x200)";
+#else /* MTX_STRS */
+extern char STR_IEN[];
+extern char STR_IDIS[];
+#endif /* MTX_STRS */
+#define ASS_IEN MPASS2(read_eflags() & 0x200, STR_IEN)
+#define ASS_IDIS MPASS2((read_eflags() & 0x200) == 0, STR_IDIS)
+#endif /* INVARIANTS */
+
+#endif /* SMP_DEBUG */
+
+#if !defined(SMP_DEBUG) || !defined(INVARIANTS)
+#define ASS(ex)
+#define MPASS(ex)
+#define MPASS2(ex, where)
+#define ASS_IEN
+#define ASS_IDIS
+#endif /* !defined(SMP_DEBUG) || !defined(INVARIANTS) */
+
+#ifdef WITNESS
+#ifndef SMP_DEBUG
+#error WITNESS requires SMP_DEBUG
+#endif /* SMP_DEBUG */
+#define WITNESS_ENTER(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_enter((m), (f), __FILE__, __LINE__)
+#define WITNESS_EXIT(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_exit((m), (f), __FILE__, __LINE__)
+
+#define WITNESS_SLEEP(check, m) witness_sleep(check, (m), __FILE__, __LINE__)
+#define WITNESS_SAVE_DECL(n) \
+ char * __CONCAT(n, __wf); \
+ int __CONCAT(n, __wl)
+
+#define WITNESS_SAVE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_save(m, &__CONCAT(n, __wf), &__CONCAT(n, __wl)); \
+} while (0)
+
+#define WITNESS_RESTORE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_restore(m, __CONCAT(n, __wf), __CONCAT(n, __wl)); \
+} while (0)
+
+void witness_init(mtx_t *, int flag);
+void witness_destroy(mtx_t *);
+void witness_enter(mtx_t *, int, char *, int);
+void witness_try_enter(mtx_t *, int, char *, int);
+void witness_exit(mtx_t *, int, char *, int);
+void witness_display(void(*)(const char *fmt, ...));
+void witness_list(struct proc *);
+int witness_sleep(int, mtx_t *, char *, int);
+void witness_save(mtx_t *, char **, int *);
+void witness_restore(mtx_t *, char *, int);
+#else /* WITNESS */
+#define WITNESS_ENTER(m, flag)
+#define WITNESS_EXIT(m, flag)
+#define WITNESS_SLEEP(check, m)
+#define WITNESS_SAVE_DECL(n)
+#define WITNESS_SAVE(m, n)
+#define WITNESS_RESTORE(m, n)
+
+/*
+ * flag++ is slezoid way of shutting up unused parameter warning
+ * in mtx_init()
+ */
+#define witness_init(m, flag) flag++
+#define witness_destroy(m)
+#define witness_enter(m, flag, f, l)
+#define witness_try_enter(m, flag, f, l )
+#define witness_exit(m, flag, f, l)
+#endif /* WITNESS */
+
+/*
+ * Assembly macros (for internal use only)
+ *------------------------------------------------------------------------------
+ */
+
+#define _V(x) __STRING(x)
+
+#ifndef I386_CPU
+
+/*
+ * For 486 and newer processors.
+ */
+
+/* Get a sleep lock, deal with recursion inline. */
+#define _getlock_sleep(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %3,%1;" /* Try */ \
+" jz 1f;" /* Got it */ \
+" andl $" _V(MTX_FLAGMASK) ",%%eax;" /* turn off spec bits */ \
+" cmpl %%eax,%3;" /* already have it? */ \
+" je 2f;" /* yes, recurse */ \
+" pushl %4;" \
+" pushl %5;" \
+" call mtx_enter_hard;" \
+" addl $8,%%esp;" \
+" jmp 1f;" \
+"2: lock; orl $" _V(MTX_RECURSE) ",%1;" \
+" incw %2;" \
+"1:" \
+"# getlock_sleep" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock), /* 1 */ \
+ "+m" (mtxp->mtx_recurse) /* 2 */ \
+ : "r" (tid), /* 3 (input) */ \
+ "gi" (type), /* 4 */ \
+ "g" (mtxp) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/* Get a spin lock, handle recursion inline (as the less common case) */
+#define _getlock_spin_block(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" pushfl;" \
+" cli;" \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %3,%1;" /* Try */ \
+" jz 2f;" /* got it */ \
+" pushl %4;" \
+" pushl %5;" \
+" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type, oflags) */ \
+" addl $0xc,%%esp;" \
+" jmp 1f;" \
+"2: popl %2;" /* save flags */ \
+"1:" \
+"# getlock_spin_block" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock), /* 1 */ \
+ "=m" (mtxp->mtx_savefl) /* 2 */ \
+ : "r" (tid), /* 3 (input) */ \
+ "gi" (type), /* 4 */ \
+ "g" (mtxp) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter function if
+ * we can't get it inline.
+ */
+#define _getlock_norecurse(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %2,%1;" /* Try */ \
+" jz 1f;" /* got it */ \
+" pushl %3;" \
+" pushl %4;" \
+" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type) */ \
+" addl $8,%%esp;" \
+"1:" \
+"# getlock_norecurse" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock) /* 1 */ \
+ : "r" (tid), /* 2 (input) */ \
+ "gi" (type), /* 3 */ \
+ "g" (mtxp) /* 4 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is handled
+ * in the hard function.
+ */
+#define _exitlock_norecurse(mtxp, tid, type) ({ \
+ int _tid = (int)(tid); \
+ \
+ __asm __volatile ( \
+" " MPLOCKED "" \
+" cmpxchgl %4,%0;" /* try easy rel */ \
+" jz 1f;" /* released! */ \
+" pushl %2;" \
+" pushl %3;" \
+" call mtx_exit_hard;" \
+" addl $8,%%esp;" \
+"1:" \
+"# exitlock_norecurse" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+a" (_tid) /* 1 */ \
+ : "gi" (type), /* 2 (input) */ \
+ "g" (mtxp), /* 3 */ \
+ "r" (MTX_UNOWNED) /* 4 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+#define _exitlock(mtxp, tid, type) ({ \
+ int _tid = (int)(tid); \
+ \
+ __asm __volatile ( \
+" " MPLOCKED "" \
+" cmpxchgl %5,%0;" /* try easy rel */ \
+" jz 1f;" /* released! */ \
+" testl $" _V(MTX_RECURSE) ",%%eax;" /* recursed? */ \
+" jnz 3f;" /* handle recursion */ \
+ /* Lock not recursed and contested: do the hard way */ \
+" pushl %3;" \
+" pushl %4;" \
+" call mtx_exit_hard;" /* mtx_exit_hard(mtxp,type) */ \
+" addl $8,%%esp;" \
+" jmp 1f;" \
+ /* lock recursed, lower recursion level */ \
+"3: decw %1;" /* one less level */ \
+" jnz 1f;" /* still recursed, done */ \
+" lock; andl $~" _V(MTX_RECURSE) ",%0;" /* turn off recurse flag */ \
+"1:" \
+"# exitlock" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+m" (mtxp->mtx_recurse), /* 1 */ \
+ "+a" (_tid) /* 2 */ \
+ : "gi" (type), /* 3 (input) */ \
+ "g" (mtxp), /* 4 */ \
+ "r" (MTX_UNOWNED) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a spin lock (with possible recursion).
+ *
+ * We use cmpxchgl to clear lock (instead of simple store) to flush posting
+ * buffers and make the change visible to other CPU's.
+ */
+#define _exitlock_spin(mtxp, inten1, inten2) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movw %1,%%ax;" \
+" decw %%ax;" \
+" js 1f;" \
+" movw %%ax,%1;" \
+" jmp 2f;" \
+"1: movl %0,%%eax;" \
+" movl $ " _V(MTX_UNOWNED) ",%%ecx;" \
+" " inten1 ";" \
+" " MPLOCKED "" \
+" cmpxchgl %%ecx,%0;" \
+" " inten2 ";" \
+"2:" \
+"# exitlock_spin" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+m" (mtxp->mtx_recurse), /* 1 */ \
+ "=&a" (_res) /* 2 */ \
+ : "g" (mtxp->mtx_savefl) /* 3 (used in 'inten') */ \
+ : "memory", "ecx" /* used */ ); \
+})
+
+#else /* I386_CPU */
+
+/*
+ * For 386 processors only.
+ */
+
+/* Get a sleep lock, deal with recursion inline. */
+#define _getlock_sleep(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) { \
+ if (((mp)->mtx_lock & MTX_FLAGMASK) != (tid)) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0); \
+ else { \
+ atomic_set_int(&(mp)->mtx_lock, MTX_RECURSE); \
+ (mp)->mtx_recurse++; \
+ } \
+ } \
+} while (0)
+
+/* Get a spin lock, handle recursion inline (as the less common case) */
+#define _getlock_spin_block(mp, tid, type) do { \
+ u_int _mtx_fl = read_eflags(); \
+ disable_intr(); \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_fl); \
+ else \
+ (mp)->mtx_savefl = _mtx_fl; \
+} while (0)
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter function if
+ * we can't get it inline.
+ */
+#define _getlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0); \
+} while (0)
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is handled
+ * in the hard function.
+ */
+#define _exitlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+} while (0)
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+#define _exitlock(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) { \
+ if ((mp)->mtx_lock & MTX_RECURSE) { \
+ if (--((mp)->mtx_recurse) == 0) \
+ atomic_clear_int(&(mp)->mtx_lock, \
+ MTX_RECURSE); \
+ } else { \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+ } \
+ } \
+} while (0)
+
+/* Release a spin lock (with possible recursion). */
+#define _exitlock_spin(mp, inten1, inten2) do { \
+ if ((mp)->mtx_recurse == 0) { \
+ atomic_cmpset_int(&(mp)->mtx_lock, (mp)->mtx_lock, \
+ MTX_UNOWNED); \
+ write_eflags((mp)->mtx_savefl); \
+ } else { \
+ (mp)->mtx_recurse--; \
+ } \
+} while (0)
+
+#endif /* I386_CPU */
+
+/*
+ * Externally visible mutex functions.
+ *------------------------------------------------------------------------------
+ */
+
+/*
+ * Return non-zero if a mutex is already owned by the current thread.
+ */
+#define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == CURTHD)
+
+/* Common strings */
+#ifdef MTX_STRS
+#ifdef KTR_EXTEND
+
+/*
+ * KTR_EXTEND saves file name and line for all entries, so we don't need them
+ * here. Theoretically we should also change the entries which refer to them
+ * (from CTR5 to CTR3), but since they're just passed to snprinf as the last
+ * parameters, it doesn't do any harm to leave them.
+ */
+char STR_mtx_enter_fmt[] = "GOT %s [%x] r=%d";
+char STR_mtx_exit_fmt[] = "REL %s [%x] r=%d";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] result=%d";
+#else
+char STR_mtx_enter_fmt[] = "GOT %s [%x] at %s:%d r=%d";
+char STR_mtx_exit_fmt[] = "REL %s [%x] at %s:%d r=%d";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] at %s:%d result=%d";
+#endif
+char STR_mtx_bad_type[] = "((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0";
+char STR_mtx_owned[] = "mtx_owned(_mpp)";
+char STR_mtx_recurse[] = "_mpp->mtx_recurse == 0";
+#else /* MTX_STRS */
+extern char STR_mtx_enter_fmt[];
+extern char STR_mtx_bad_type[];
+extern char STR_mtx_exit_fmt[];
+extern char STR_mtx_owned[];
+extern char STR_mtx_recurse[];
+extern char STR_mtx_try_enter_fmt[];
+#endif /* MTX_STRS */
+
+#ifndef KLD_MODULE
+/*
+ * Get lock 'm', the macro handles the easy (and most common cases) and leaves
+ * the slow stuff to the mtx_enter_hard() function.
+ *
+ * Note: since type is usually a constant much of this code is optimized out.
+ */
+_MTX_INLINE void
+mtx_enter(mtx_t *mtxp, int type)
+{
+ mtx_t *_mpp = mtxp;
+
+ /* bits only valid on mtx_exit() */
+ MPASS2(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
+ STR_mtx_bad_type);
+
+ do {
+ if ((type) & MTX_SPIN) {
+ /*
+ * Easy cases of spin locks:
+ *
+ * 1) We already own the lock and will simply
+ * recurse on it (if RLIKELY)
+ *
+ * 2) The lock is free, we just get it
+ */
+ if ((type) & MTX_RLIKELY) {
+ /*
+ * Check for recursion, if we already
+ * have this lock we just bump the
+ * recursion count.
+ */
+ if (_mpp->mtx_lock == CURTHD) {
+ _mpp->mtx_recurse++;
+ break; /* Done */
+ }
+ }
+
+ if (((type) & MTX_TOPHALF) == 0) {
+ /*
+ * If an interrupt thread uses this
+ * we must block interrupts here.
+ */
+ if ((type) & MTX_FIRST) {
+ ASS_IEN;
+ disable_intr();
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ } else {
+ _getlock_spin_block(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ } else
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ } else {
+ /* Sleep locks */
+ if ((type) & MTX_RLIKELY)
+ _getlock_sleep(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ else
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ } while (0);
+ WITNESS_ENTER(_mpp, type);
+ CTR5(KTR_LOCK, STR_mtx_enter_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__,
+ (_mpp)->mtx_recurse);
+}
+
+/*
+ * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
+ *
+ * XXX DOES NOT HANDLE RECURSION
+ */
+_MTX_INLINE int
+mtx_try_enter(mtx_t *mtxp, int type)
+{
+ mtx_t *const _mpp = mtxp;
+ int _rval;
+
+ _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD);
+#ifdef SMP_DEBUG
+ if (_rval && (_mpp)->mtx_witness != NULL) {
+ ASS((_mpp)->mtx_recurse == 0);
+ witness_try_enter(_mpp, type, __FILE__, __LINE__);
+ }
+#endif
+ CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, _rval);
+
+ return _rval;
+}
+
+#define mtx_legal2block() (read_eflags() & 0x200)
+
+/*
+ * Release lock m.
+ */
+_MTX_INLINE void
+mtx_exit(mtx_t *mtxp, int type)
+{
+ mtx_t *const _mpp = mtxp;
+
+ MPASS2(mtx_owned(_mpp), STR_mtx_owned);
+ WITNESS_EXIT(_mpp, type);
+ CTR5(KTR_LOCK, STR_mtx_exit_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__,
+ (_mpp)->mtx_recurse);
+ if ((type) & MTX_SPIN) {
+ if ((type) & MTX_NORECURSE) {
+ MPASS2(_mpp->mtx_recurse == 0, STR_mtx_recurse);
+ atomic_cmpset_int(&_mpp->mtx_lock, _mpp->mtx_lock,
+ MTX_UNOWNED);
+ if (((type) & MTX_TOPHALF) == 0) {
+ if ((type) & MTX_FIRST) {
+ ASS_IDIS;
+ enable_intr();
+ } else
+ write_eflags(_mpp->mtx_savefl);
+ }
+ } else {
+ if ((type) & MTX_TOPHALF)
+ _exitlock_spin(_mpp,,);
+ else {
+ if ((type) & MTX_FIRST) {
+ ASS_IDIS;
+ _exitlock_spin(_mpp,, "sti");
+ } else {
+ _exitlock_spin(_mpp,
+ "pushl %3", "popfl");
+ }
+ }
+ }
+ } else {
+ /* Handle sleep locks */
+ if ((type) & MTX_RLIKELY)
+ _exitlock(_mpp, CURTHD, (type) & MTX_HARDOPTS);
+ else {
+ _exitlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ }
+}
+
+#endif /* KLD_MODULE */
+#endif /* _KERNEL */
+
+#else /* !LOCORE */
+
+/*
+ * Simple assembly macros to get and release non-recursive spin locks
+ */
+
+#if defined(I386_CPU)
+
+#define MTX_EXIT(lck, reg) \
+ movl $ MTX_UNOWNED,lck+MTX_LOCK;
+
+#else /* I386_CPU */
+
+#define MTX_ENTER(reg, lck) \
+9: movl $ MTX_UNOWNED,%eax; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+ jnz 9b
+
+/* Must use locked bus op (cmpxchg) when setting to unowned (barrier) */
+#define MTX_EXIT(lck,reg) \
+ movl lck+MTX_LOCK,%eax; \
+ movl $ MTX_UNOWNED,reg; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+
+#define MTX_ENTER_WITH_RECURSION(reg, lck) \
+ movl lck+MTX_LOCK,%eax; \
+ cmpl PCPU_CURPROC,%eax; \
+ jne 9f; \
+ incw lck+MTX_RECURSECNT; \
+ jmp 8f; \
+9: movl $ MTX_UNOWNED,%eax; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+ jnz 9b; \
+8:
+
+#define MTX_EXIT_WITH_RECURSION(lck,reg) \
+ movw lck+MTX_RECURSECNT,%ax; \
+ decw %ax; \
+ js 9f; \
+ movw %ax,lck+MTX_RECURSECNT; \
+ jmp 8f; \
+9: movl lck+MTX_LOCK,%eax; \
+ movl $ MTX_UNOWNED,reg; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+8:
+
+#endif /* I386_CPU */
+#endif /* !LOCORE */
+#endif /* __MACHINE_MUTEX_H */
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 08beb5a..1c7af85 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -72,11 +72,7 @@ struct pcb {
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */
caddr_t pcb_onfault; /* copyin/out fault recovery */
-#ifdef SMP
- u_long pcb_mpnest;
-#else
- u_long pcb_mpnest_dontuse;
-#endif
+ int pcb_schednest;
int pcb_gs;
struct pcb_ext *pcb_ext; /* optional pcb extension */
u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index 58bd9cf..440da60 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -26,6 +26,20 @@
* $FreeBSD$
*/
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/pmap.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
+
/*
* This structure maps out the global data that needs to be kept on a
* per-cpu basis. genassym uses this to generate offsets for the assembler
@@ -41,11 +55,14 @@
struct globaldata {
struct privatespace *gd_prvspace; /* self-reference */
struct proc *gd_curproc;
+ struct proc *gd_prevproc;
struct proc *gd_npxproc;
struct pcb *gd_curpcb;
+ struct proc *gd_idleproc;
struct timeval gd_switchtime;
struct i386tss gd_common_tss;
int gd_switchticks;
+ int gd_intr_nesting_level;
struct segment_descriptor gd_common_tssd;
struct segment_descriptor *gd_tss_gdt;
#ifdef USER_LDT
@@ -67,8 +84,22 @@ struct globaldata {
unsigned *gd_prv_PADDR1;
#endif
u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+ int gd_witness_spin_check;
+#ifdef KTR_PERCPU
+#ifdef KTR
+ volatile int gd_ktr_idx;
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[KTR_SIZE];
+#endif
+#endif
};
+extern struct globaldata globaldata;
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
#ifdef SMP
/*
* This is the upper (0xff800000) address space layout that is per-cpu.
@@ -93,3 +124,5 @@ struct privatespace {
extern struct privatespace SMP_prvspace[];
#endif
+
+#endif /* ! _MACHINE_GLOBALDATA_H_ */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 69b716b..20d4fa3 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -15,6 +15,9 @@
#ifdef _KERNEL
+#ifdef I386_CPU
+#error SMP not supported with I386_CPU
+#endif
#if defined(SMP) && !defined(APIC_IO)
# error APIC_IO required for SMP, add "options APIC_IO" to your config file.
#endif /* SMP && !APIC_IO */
@@ -57,23 +60,6 @@ extern int bootMP_size;
/* functions in mpboot.s */
void bootMP __P((void));
-/* global data in mplock.s */
-extern u_int mp_lock;
-extern u_int isr_lock;
-#ifdef RECURSIVE_MPINTRLOCK
-extern u_int mpintr_lock;
-#endif /* RECURSIVE_MPINTRLOCK */
-
-/* functions in mplock.s */
-void get_mplock __P((void));
-void rel_mplock __P((void));
-int try_mplock __P((void));
-#ifdef RECURSIVE_MPINTRLOCK
-void get_mpintrlock __P((void));
-void rel_mpintrlock __P((void));
-int try_mpintrlock __P((void));
-#endif /* RECURSIVE_MPINTRLOCK */
-
/* global data in apic_vector.s */
extern volatile u_int stopped_cpus;
extern volatile u_int started_cpus;
@@ -185,23 +171,7 @@ extern int smp_started;
extern volatile int smp_idle_loops;
#endif /* !LOCORE */
-#else /* !SMP && !APIC_IO */
-
-/*
- * Create dummy MP lock empties
- */
-
-static __inline void
-get_mplock(void)
-{
-}
-
-static __inline void
-rel_mplock(void)
-{
-}
-
-#endif
+#endif /* SMP && !APIC_IO */
#endif /* _KERNEL */
#endif /* _MACHINE_SMP_H_ */
diff --git a/sys/amd64/isa/atpic_vector.S b/sys/amd64/isa/atpic_vector.S
index e427351..d2b88bf 100644
--- a/sys/amd64/isa/atpic_vector.S
+++ b/sys/amd64/isa/atpic_vector.S
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c
index 15044ab..724f3c2 100644
--- a/sys/amd64/isa/clock.c
+++ b/sys/amd64/isa/clock.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/amd64/isa/icu_ipl.S b/sys/amd64/isa/icu_ipl.S
index 3475358..d178d5c 100644
--- a/sys/amd64/isa/icu_ipl.S
+++ b/sys/amd64/isa/icu_ipl.S
@@ -55,63 +55,6 @@ _imen: .long HWI_MASK
SUPERALIGN_TEXT
/*
- * Interrupt priority mechanism
- * -- soft splXX masks with group mechanism (cpl)
- * -- h/w masks for currently active or unused interrupts (imen)
- * -- ipending = active interrupts currently masked by cpl
- */
-
-ENTRY(splz)
- /*
- * The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. We have to repeat the check since if there is an
- * interrupt while we're looking, _doreti processing for the
- * interrupt will handle all the unmasked pending interrupts
- * because we restored early. We're repeating the calculation
- * of (ipending & ~cpl) anyway so that the caller doesn't have
- * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx"
- * is undefined when %ecx is 0 so we can't rely on the secondary
- * btrl tests.
- */
- movl _cpl,%eax
-splz_next:
- /*
- * We don't need any locking here. (ipending & ~cpl) cannot grow
- * while we're looking at it - any interrupt will shrink it to 0.
- */
- movl %eax,%ecx
- notl %ecx
- andl _ipending,%ecx
- jne splz_unpend
- ret
-
- ALIGN_TEXT
-splz_unpend:
- bsfl %ecx,%ecx
- btrl %ecx,_ipending
- jnc splz_next
- cmpl $NHWI,%ecx
- jae splz_swi
- /*
- * We would prefer to call the intr handler directly here but that
- * doesn't work for badly behaved handlers that want the interrupt
- * frame. Also, there's a problem determining the unit number.
- * We should change the interface so that the unit number is not
- * determined at config time.
- */
- jmp *vec(,%ecx,4)
-
- ALIGN_TEXT
-splz_swi:
- pushl %eax
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
- call *_ihandlers(,%ecx,4)
- popl %eax
- movl %eax,_cpl
- jmp splz_next
-
-/*
* Fake clock interrupt(s) so that they appear to come from our caller instead
* of from here, so that system profiling works.
* XXX do this more generally (for all vectors; look up the C entry point).
diff --git a/sys/amd64/isa/icu_ipl.s b/sys/amd64/isa/icu_ipl.s
index 3475358..d178d5c 100644
--- a/sys/amd64/isa/icu_ipl.s
+++ b/sys/amd64/isa/icu_ipl.s
@@ -55,63 +55,6 @@ _imen: .long HWI_MASK
SUPERALIGN_TEXT
/*
- * Interrupt priority mechanism
- * -- soft splXX masks with group mechanism (cpl)
- * -- h/w masks for currently active or unused interrupts (imen)
- * -- ipending = active interrupts currently masked by cpl
- */
-
-ENTRY(splz)
- /*
- * The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. We have to repeat the check since if there is an
- * interrupt while we're looking, _doreti processing for the
- * interrupt will handle all the unmasked pending interrupts
- * because we restored early. We're repeating the calculation
- * of (ipending & ~cpl) anyway so that the caller doesn't have
- * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx"
- * is undefined when %ecx is 0 so we can't rely on the secondary
- * btrl tests.
- */
- movl _cpl,%eax
-splz_next:
- /*
- * We don't need any locking here. (ipending & ~cpl) cannot grow
- * while we're looking at it - any interrupt will shrink it to 0.
- */
- movl %eax,%ecx
- notl %ecx
- andl _ipending,%ecx
- jne splz_unpend
- ret
-
- ALIGN_TEXT
-splz_unpend:
- bsfl %ecx,%ecx
- btrl %ecx,_ipending
- jnc splz_next
- cmpl $NHWI,%ecx
- jae splz_swi
- /*
- * We would prefer to call the intr handler directly here but that
- * doesn't work for badly behaved handlers that want the interrupt
- * frame. Also, there's a problem determining the unit number.
- * We should change the interface so that the unit number is not
- * determined at config time.
- */
- jmp *vec(,%ecx,4)
-
- ALIGN_TEXT
-splz_swi:
- pushl %eax
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
- call *_ihandlers(,%ecx,4)
- popl %eax
- movl %eax,_cpl
- jmp splz_next
-
-/*
* Fake clock interrupt(s) so that they appear to come from our caller instead
* of from here, so that system profiling works.
* XXX do this more generally (for all vectors; look up the C entry point).
diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S
index e427351..d2b88bf 100644
--- a/sys/amd64/isa/icu_vector.S
+++ b/sys/amd64/isa/icu_vector.S
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/amd64/isa/icu_vector.s b/sys/amd64/isa/icu_vector.s
index e427351..d2b88bf 100644
--- a/sys/amd64/isa/icu_vector.s
+++ b/sys/amd64/isa/icu_vector.s
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c
index 34a8c22..870760e 100644
--- a/sys/amd64/isa/intr_machdep.c
+++ b/sys/amd64/isa/intr_machdep.c
@@ -36,12 +36,6 @@
* from: @(#)isa.c 7.2 (Berkeley) 5/13/91
* $FreeBSD$
*/
-/*
- * This file contains an aggregated module marked:
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- * See the notice for details.
- */
#include "opt_auto_eoi.h"
@@ -51,11 +45,14 @@
#ifndef SMP
#include <machine/lock.h>
#endif
+#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/unistd.h>
#include <sys/errno.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
@@ -91,30 +88,14 @@
#include <i386/isa/mca_machdep.h>
#endif
-/* XXX should be in suitable include files */
-#ifdef PC98
-#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
-#define ICU_SLAVEID 7
-#else
-#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
-#define ICU_SLAVEID 2
-#endif
-
-#ifdef APIC_IO
/*
- * This is to accommodate "mixed-mode" programming for
- * motherboards that don't connect the 8254 to the IO APIC.
+ * Per-interrupt data. We consider the soft interrupt to be a special
+ * case, so these arrays have NHWI + NSWI entries, not ICU_LEN.
*/
-#define AUTO_EOI_1 1
-#endif
-
-#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
-
-u_long *intr_countp[ICU_LEN];
-inthand2_t *intr_handler[ICU_LEN];
-u_int intr_mask[ICU_LEN];
-static u_int* intr_mptr[ICU_LEN];
-void *intr_unit[ICU_LEN];
+u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */
+inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */
+ithd *ithds[NHWI + NSWI]; /* real interrupt handler */
+void *intr_unit[NHWI + NSWI];
static inthand_t *fastintr[ICU_LEN] = {
&IDTVEC(fastintr0), &IDTVEC(fastintr1),
@@ -292,8 +273,9 @@ isa_nmi(cd)
}
/*
- * Fill in default interrupt table (in case of spuruious interrupt
- * during configuration of kernel, setup interrupt control unit
+ * Create a default interrupt table to avoid problems caused by
+ * spurious interrupts during configuration of kernel, then setup
+ * interrupt control unit.
*/
void
isa_defaultirq()
@@ -364,16 +346,6 @@ isa_strayintr(vcookiep)
{
int intr = (void **)vcookiep - &intr_unit[0];
- /* DON'T BOTHER FOR NOW! */
- /* for some reason, we get bursts of intr #7, even if not enabled! */
- /*
- * Well the reason you got bursts of intr #7 is because someone
- * raised an interrupt line and dropped it before the 8259 could
- * prioritize it. This is documented in the intel data book. This
- * means you have BAD hardware! I have changed this so that only
- * the first 5 get logged, then it quits logging them, and puts
- * out a special message. rgrimes 3/25/1993
- */
/*
* XXX TODO print a different message for #7 if it is for a
* glitch. Glitches can be distinguished from real #7's by
@@ -405,36 +377,10 @@ isa_irq_pending()
}
#endif
-int
-update_intr_masks(void)
-{
- int intr, n=0;
- u_int mask,*maskptr;
-
- for (intr=0; intr < ICU_LEN; intr ++) {
-#if defined(APIC_IO)
- /* no 8259 SLAVE to ignore */
-#else
- if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */
-#endif /* APIC_IO */
- maskptr = intr_mptr[intr];
- if (!maskptr)
- continue;
- *maskptr |= SWI_LOW_MASK | (1 << intr);
- mask = *maskptr;
- if (mask != intr_mask[intr]) {
-#if 0
- printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n",
- intr, intr_mask[intr], mask, maskptr);
-#endif
- intr_mask[intr]=mask;
- n++;
- }
-
- }
- return (n);
-}
-
+/*
+ * Update intrnames array with the specified name. This is used by
+ * vmstat(8) and the like.
+ */
static void
update_intrname(int intr, char *name)
{
@@ -485,7 +431,7 @@ found:
}
int
-icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
+icu_setup(int intr, inthand2_t *handler, void *arg, int flags)
{
#ifdef FAST_HI
int select; /* the select register is 8 bits */
@@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
u_int32_t value; /* the window register is 32 bits */
#endif /* FAST_HI */
u_long ef;
- u_int mask = (maskptr ? *maskptr : 0);
#if defined(APIC_IO)
if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */
@@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
ef = read_eflags();
disable_intr();
intr_handler[intr] = handler;
- intr_mptr[intr] = maskptr;
- intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr);
intr_unit[intr] = arg;
#ifdef FAST_HI
if (flags & INTR_FAST) {
@@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
INTREN(1 << intr);
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
+/*
+ * Dissociate an interrupt handler from an IRQ and set the handler to
+ * the stray interrupt handler. The 'handler' parameter is used only
+ * for consistency checking.
+ */
int
icu_unset(intr, handler)
int intr;
@@ -567,8 +514,6 @@ icu_unset(intr, handler)
disable_intr();
intr_countp[intr] = &intrcnt[1 + intr];
intr_handler[intr] = isa_strayintr;
- intr_mptr[intr] = NULL;
- intr_mask[intr] = HWI_MASK | SWI_MASK;
intr_unit[intr] = &intr_unit[intr];
#ifdef FAST_HI_XXX
/* XXX how do I re-create dvp here? */
@@ -581,353 +526,172 @@ icu_unset(intr, handler)
setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
-/* The following notice applies beyond this point in the file */
-
-/*
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-typedef struct intrec {
- intrmask_t mask;
- inthand2_t *handler;
- void *argument;
- struct intrec *next;
- char *name;
- int intr;
- intrmask_t *maskptr;
- int flags;
-} intrec;
-
-static intrec *intreclist_head[ICU_LEN];
-
-/*
- * The interrupt multiplexer calls each of the handlers in turn. The
- * ipl is initially quite low. It is raised as necessary for each call
- * and lowered after the call. Thus out of order handling is possible
- * even for interrupts of the same type. This is probably no more
- * harmful than out of order handling in general (not harmful except
- * for real time response which we don't support anyway).
- */
-static void
-intr_mux(void *arg)
-{
- intrec *p;
- intrmask_t oldspl;
-
- for (p = arg; p != NULL; p = p->next) {
- oldspl = splq(p->mask);
- p->handler(p->argument);
- splx(oldspl);
- }
-}
-
-static intrec*
-find_idesc(unsigned *maskptr, int irq)
-{
- intrec *p = intreclist_head[irq];
-
- while (p && p->maskptr != maskptr)
- p = p->next;
-
- return (p);
-}
-
-static intrec**
-find_pred(intrec *idesc, int irq)
+intrec *
+inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
+ int pri, int flags)
{
- intrec **pp = &intreclist_head[irq];
- intrec *p = *pp;
-
- while (p != idesc) {
- if (p == NULL)
- return (NULL);
- pp = &p->next;
- p = *pp;
- }
- return (pp);
-}
-
-/*
- * Both the low level handler and the shared interrupt multiplexer
- * block out further interrupts as set in the handlers "mask", while
- * the handler is running. In fact *maskptr should be used for this
- * purpose, but since this requires one more pointer dereference on
- * each interrupt, we rather bother update "mask" whenever *maskptr
- * changes. The function "update_masks" should be called **after**
- * all manipulation of the linked list of interrupt handlers hung
- * off of intrdec_head[irq] is complete, since the chain of handlers
- * will both determine the *maskptr values and the instances of mask
- * that are fixed. This function should be called with the irq for
- * which a new handler has been add blocked, since the masks may not
- * yet know about the use of this irq for a device of a certain class.
- */
+ ithd *ithd = ithds[irq]; /* descriptor for the IRQ */
+ intrec *head; /* chain of handlers for IRQ */
+ intrec *idesc; /* descriptor for this handler */
+ struct proc *p; /* interrupt thread */
+ int errcode = 0;
-static void
-update_mux_masks(void)
-{
- int irq;
- for (irq = 0; irq < ICU_LEN; irq++) {
- intrec *idesc = intreclist_head[irq];
- while (idesc != NULL) {
- if (idesc->maskptr != NULL) {
- /* our copy of *maskptr may be stale, refresh */
- idesc->mask = *idesc->maskptr;
- }
- idesc = idesc->next;
+ if (name == NULL) /* no name? */
+ panic ("anonymous interrupt");
+ if (ithd == NULL || ithd->it_ih == NULL) {
+ /* first handler for this irq. */
+ if (ithd == NULL) {
+ ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (ithd == NULL)
+ return (NULL);
+ bzero(ithd, sizeof(struct ithd));
+ ithd->irq = irq;
+ ithds[irq] = ithd;
}
- }
-}
-
-static void
-update_masks(intrmask_t *maskptr, int irq)
-{
- intrmask_t mask = 1 << irq;
-
- if (maskptr == NULL)
- return;
-
- if (find_idesc(maskptr, irq) == NULL) {
- /* no reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) == 0)
- return;
- /* the irq was included in the classes mask, remove it */
- *maskptr &= ~mask;
- } else {
- /* a reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) != 0)
- return;
- /* put the irq into the classes mask */
- *maskptr |= mask;
- }
- /* we need to update all values in the intr_mask[irq] array */
- update_intr_masks();
- /* update mask in chains of the interrupt multiplex handler as well */
- update_mux_masks();
-}
-
-/*
- * Add interrupt handler to linked list hung off of intreclist_head[irq]
- * and install shared interrupt multiplex handler, if necessary
- */
-
-static int
-add_intrdesc(intrec *idesc)
-{
- int irq = idesc->intr;
-
- intrec *head = intreclist_head[irq];
-
- if (head == NULL) {
- /* first handler for this irq, just install it */
- if (icu_setup(irq, idesc->handler, idesc->argument,
- idesc->maskptr, idesc->flags) != 0)
- return (-1);
-
- update_intrname(irq, idesc->name);
- /* keep reference */
- intreclist_head[irq] = idesc;
- } else {
- if ((idesc->flags & INTR_EXCL) != 0
- || (head->flags & INTR_EXCL) != 0) {
+ /*
+ * If we have a fast interrupt, we need to set the
+ * handler address directly. Do that below. For a
+ * slow interrupt, we don't need to know more details,
+ * so do it here because it's tidier.
+ */
+ if ((flags & INTR_FAST) == 0) {
/*
- * can't append new handler, if either list head or
- * new handler do not allow interrupts to be shared
+ * Only create a kernel thread if we don't already
+ * have one.
*/
- if (bootverbose)
- printf("\tdevice combination doesn't support "
- "shared irq%d\n", irq);
- return (-1);
- }
- if (head->next == NULL) {
+ if (ithd->it_proc == NULL) {
+ errcode = kthread_create(ithd_loop, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "irq%d: %s", irq,
+ name);
+ if (errcode)
+ panic("inthand_add: Can't create "
+ "interrupt thread");
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ ithd->it_proc = p;
+ p->p_ithd = ithd;
+ } else
+ snprintf(ithd->it_proc->p_comm, MAXCOMLEN,
+ "irq%d: %s", irq, name);
+ p->p_rtprio.prio = pri;
+
/*
- * second handler for this irq, replace device driver's
- * handler by shared interrupt multiplexer function
+ * The interrupt process must be in place, but
+ * not necessarily schedulable, before we
+ * initialize the ICU, since it may cause an
+ * immediate interrupt.
*/
- icu_unset(irq, head->handler);
- if (icu_setup(irq, intr_mux, head, 0, 0) != 0)
- return (-1);
- if (bootverbose)
- printf("\tusing shared irq%d.\n", irq);
- update_intrname(irq, "mux");
+ if (icu_setup(irq, &sched_ithd, arg, flags) != 0)
+ panic("inthand_add: Can't initialize ICU");
}
- /* just append to the end of the chain */
- while (head->next != NULL)
- head = head->next;
- head->next = idesc;
- }
- update_masks(idesc->maskptr, irq);
- return (0);
-}
-
-/*
- * Create and activate an interrupt handler descriptor data structure.
- *
- * The dev_instance pointer is required for resource management, and will
- * only be passed through to resource_claim().
- *
- * There will be functions that derive a driver and unit name from a
- * dev_instance variable, and those functions will be used to maintain the
- * interrupt counter label array referenced by systat and vmstat to report
- * device interrupt rates (->update_intrlabels).
- *
- * Add the interrupt handler descriptor data structure created by an
- * earlier call of create_intr() to the linked list for its irq and
- * adjust the interrupt masks if necessary.
- *
- * WARNING: This is an internal function and not to be used by device
- * drivers. It is subject to change without notice.
- */
-
-intrec *
-inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
- intrmask_t *maskptr, int flags)
-{
- intrec *idesc;
- int errcode = -1;
- intrmask_t oldspl;
-
- if (ICU_LEN > 8 * sizeof *maskptr) {
- printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n",
- ICU_LEN, 8 * sizeof *maskptr);
+ } else if ((flags & INTR_EXCL) != 0
+ || (ithd->it_ih->flags & INTR_EXCL) != 0) {
+ /*
+ * We can't append the new handler if either
+ * list ithd or new handler do not allow
+ * interrupts to be shared.
+ */
+ if (bootverbose)
+ printf("\tdevice combination %s and %s "
+ "doesn't support shared irq%d\n",
+ ithd->it_ih->name, name, irq);
+ return(NULL);
+ } else if (flags & INTR_FAST) {
+ /* We can only have one fast interrupt by itself. */
+ if (bootverbose)
+ printf("\tCan't add fast interrupt %s"
+ " to normal interrupt %s on irq%d",
+ name, ithd->it_ih->name, irq);
return (NULL);
+ } else { /* update p_comm */
+ p = ithd->it_proc;
+ if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) {
+ strcat(p->p_comm, " ");
+ strcat(p->p_comm, name);
+ } else if (strlen(p->p_comm) == MAXCOMLEN)
+ p->p_comm[MAXCOMLEN - 1] = '+';
+ else
+ strcat(p->p_comm, "+");
}
- if ((unsigned)irq >= ICU_LEN) {
- printf("create_intr: requested irq%d too high, limit is %d\n",
- irq, ICU_LEN -1);
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
return (NULL);
- }
+ bzero(idesc, sizeof (struct intrec));
- idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK);
- if (idesc == NULL)
- return NULL;
- bzero(idesc, sizeof *idesc);
+ idesc->handler = handler;
+ idesc->argument = arg;
+ idesc->flags = flags;
+ idesc->ithd = ithd;
- if (name == NULL)
- name = "???";
idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
if (idesc->name == NULL) {
free(idesc, M_DEVBUF);
- return NULL;
+ return (NULL);
}
strcpy(idesc->name, name);
- idesc->handler = handler;
- idesc->argument = arg;
- idesc->maskptr = maskptr;
- idesc->intr = irq;
- idesc->flags = flags;
-
- /* block this irq */
- oldspl = splq(1 << irq);
-
- /* add irq to class selected by maskptr */
- errcode = add_intrdesc(idesc);
- splx(oldspl);
-
- if (errcode != 0) {
+ /* Slow interrupts got set up above. */
+ if ((flags & INTR_FAST)
+ && (icu_setup(irq, idesc->handler, idesc->argument,
+ idesc->flags) != 0) ) {
if (bootverbose)
- printf("\tintr_connect(irq%d) failed, result=%d\n",
+ printf("\tinthand_add(irq%d) failed, result=%d\n",
irq, errcode);
free(idesc->name, M_DEVBUF);
free(idesc, M_DEVBUF);
- idesc = NULL;
+ return NULL;
}
-
+ head = ithd->it_ih; /* look at chain of handlers */
+ if (head) {
+ while (head->next != NULL)
+ head = head->next; /* find the end */
+ head->next = idesc; /* hook it in there */
+ } else
+ ithd->it_ih = idesc; /* put it up front */
+ update_intrname(irq, idesc->name);
return (idesc);
}
/*
- * Deactivate and remove the interrupt handler descriptor data connected
- * created by an earlier call of intr_connect() from the linked list and
- * adjust theinterrupt masks if necessary.
+ * Deactivate and remove linked list the interrupt handler descriptor
+ * data connected created by an earlier call of inthand_add(), then
+ * adjust the interrupt masks if necessary.
*
- * Return the memory held by the interrupt handler descriptor data structure
- * to the system. Make sure, the handler is not actively used anymore, before.
+ * Return the memory held by the interrupt handler descriptor data
+ * structure to the system. First ensure the handler is not actively
+ * in use.
*/
int
inthand_remove(intrec *idesc)
{
- intrec **hook, *head;
- int irq;
- int errcode = 0;
- intrmask_t oldspl;
+ ithd *ithd; /* descriptor for the IRQ */
+ intrec *ih; /* chain of handlers */
if (idesc == NULL)
return (-1);
+ ithd = idesc->ithd;
+ ih = ithd->it_ih;
- irq = idesc->intr;
-
- /* find pointer that keeps the reference to this interrupt descriptor */
- hook = find_pred(idesc, irq);
- if (hook == NULL)
+ if (ih == idesc) /* first in the chain */
+ ithd->it_ih = idesc->next; /* unhook it */
+ else {
+ while ((ih != NULL)
+ && (ih->next != idesc) )
+ ih = ih->next;
+ if (ih->next != idesc)
return (-1);
-
- /* make copy of original list head, the line after may overwrite it */
- head = intreclist_head[irq];
-
- /* unlink: make predecessor point to idesc->next instead of to idesc */
- *hook = idesc->next;
-
- /* now check whether the element we removed was the list head */
- if (idesc == head) {
-
- oldspl = splq(1 << irq);
-
- /* check whether the new list head is the only element on list */
- head = intreclist_head[irq];
- if (head != NULL) {
- icu_unset(irq, intr_mux);
- if (head->next != NULL) {
- /* install the multiplex handler with new list head as argument */
- errcode = icu_setup(irq, intr_mux, head, 0, 0);
- if (errcode == 0)
- update_intrname(irq, NULL);
- } else {
- /* install the one remaining handler for this irq */
- errcode = icu_setup(irq, head->handler,
- head->argument,
- head->maskptr, head->flags);
- if (errcode == 0)
- update_intrname(irq, head->name);
+ ih->next = ih->next->next;
}
- } else {
- /* revert to old handler, eg: strayintr */
- icu_unset(irq, idesc->handler);
- }
- splx(oldspl);
- }
- update_masks(idesc->maskptr, irq);
+
+ if (ithd->it_ih == NULL) /* no handlers left, */
+ icu_unset(ithd->irq, idesc->handler);
free(idesc, M_DEVBUF);
return (0);
}
diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h
index 5982295..87c97a3 100644
--- a/sys/amd64/isa/intr_machdep.h
+++ b/sys/amd64/isa/intr_machdep.h
@@ -98,7 +98,6 @@
#define TPR_BLOCK_XCPUSTOP 0xaf /* */
#define TPR_BLOCK_ALL 0xff /* all INTs */
-
#ifdef TEST_TEST1
/* put a 'fake' HWI in top of APIC prio 0x3x, 32 + 31 = 63 = 0x3f */
#define XTEST1_OFFSET (ICU_OFFSET + 31)
@@ -145,8 +144,9 @@ extern u_long intrcnt[]; /* counts for for each device and stray */
extern char intrnames[]; /* string table containing device names */
extern u_long *intr_countp[]; /* pointers into intrcnt[] */
extern inthand2_t *intr_handler[]; /* C entry points of intr handlers */
-extern u_int intr_mask[]; /* sets of intrs masked during handling of 1 */
+extern ithd *ithds[];
extern void *intr_unit[]; /* cookies to pass to intr handlers */
+extern ithd softinterrupt; /* soft interrupt thread */
inthand_t
IDTVEC(fastintr0), IDTVEC(fastintr1),
@@ -190,26 +190,60 @@ inthand_t
#endif /** TEST_TEST1 */
#endif /* SMP || APIC_IO */
+#ifdef PC98
+#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
+#define ICU_SLAVEID 7
+#else
+#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
+#define ICU_SLAVEID 2
+#endif
+
+#ifdef APIC_IO
+/*
+ * This is to accommodate "mixed-mode" programming for
+ * motherboards that don't connect the 8254 to the IO APIC.
+ */
+#define AUTO_EOI_1 1
+#endif
+
+#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
+
void isa_defaultirq __P((void));
int isa_nmi __P((int cd));
int icu_setup __P((int intr, inthand2_t *func, void *arg,
- u_int *maskptr, int flags));
+ int flags));
int icu_unset __P((int intr, inthand2_t *handler));
-int update_intr_masks __P((void));
intrmask_t splq __P((intrmask_t mask));
-#define INTR_FAST 0x00000001 /* fast interrupt handler */
-#define INTR_EXCL 0x00010000 /* excl. intr, default is shared */
+/*
+ * Describe a hardware interrupt handler. These structures are
+ * accessed via the array intreclist, which contains one pointer per
+ * hardware interrupt.
+ *
+ * Multiple interrupt handlers for a specific IRQ can be chained
+ * together via the 'next' pointer.
+ */
+typedef struct intrec {
+ inthand2_t *handler; /* code address of handler */
+ void *argument; /* argument to pass to handler */
+ enum intr_type flags; /* flag bits (sys/bus.h) */
+ char *name; /* name of handler */
+ ithd *ithd; /* handler we're connected to */
+ struct intrec *next; /* next handler for this irq */
+} intrec;
/*
* WARNING: These are internal functions and not to be used by device drivers!
* They are subject to change without notice.
*/
struct intrec *inthand_add(const char *name, int irq, inthand2_t handler,
- void *arg, intrmask_t *maskptr, int flags);
-
+ void *arg, int pri, int flags);
int inthand_remove(struct intrec *idesc);
+void sched_ithd(void *);
+void ithd_loop(void *);
+void start_softintr(void *);
+void intr_soft(void *);
#endif /* LOCORE */
diff --git a/sys/amd64/isa/ithread.c b/sys/amd64/isa/ithread.c
new file mode 100644
index 0000000..4ceac42
--- /dev/null
+++ b/sys/amd64/isa/ithread.c
@@ -0,0 +1,353 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From BSDI: intr.c,v 1.6.2.5 1999/07/06 19:16:52 cp Exp
+ * $FreeBSD$
+ */
+
+/* Interrupt thread code. */
+
+#include "opt_auto_eoi.h"
+
+#include "isa.h"
+
+#include <sys/param.h>
+#include <sys/rtprio.h> /* change this name XXX */
+#ifndef SMP
+#include <machine/lock.h>
+#endif
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/unistd.h>
+#include <sys/errno.h>
+#include <sys/interrupt.h>
+#include <machine/ipl.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
+#include <sys/bus.h>
+
+#if defined(APIC_IO)
+#include <machine/smp.h>
+#include <machine/smptests.h> /** FAST_HI */
+#include <machine/resource.h>
+#endif /* APIC_IO */
+#ifdef PC98
+#include <pc98/pc98/pc98.h>
+#include <pc98/pc98/pc98_machdep.h>
+#include <pc98/pc98/epsonio.h>
+#else
+#include <i386/isa/isa.h>
+#endif
+#include <i386/isa/icu.h>
+
+#if NISA > 0
+#include <isa/isavar.h>
+#endif
+#include <i386/isa/intr_machdep.h>
+#include <sys/interrupt.h>
+#ifdef APIC_IO
+#include <machine/clock.h>
+#endif
+
+#include "mca.h"
+#if NMCA > 0
+#include <i386/isa/mca_machdep.h>
+#endif
+
+#include <sys/vmmeter.h>
+#include <machine/mutex.h>
+#include <sys/ktr.h>
+#include <machine/cpu.h>
+#if 0
+#include <ddb/ddb.h>
+#endif
+
+u_long softintrcnt [NSWI];
+
+SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, NULL)
+
+/*
+ * Schedule a heavyweight interrupt process. This function is called
+ * from the interrupt handlers Xintr<num>.
+ */
+void
+sched_ithd(void *cookie)
+{
+ int irq = (int) cookie; /* IRQ we're handling */
+ ithd *ir = ithds[irq]; /* and the process that does it */
+
+ /* This used to be in icu_vector.s */
+ /*
+ * We count software interrupts when we process them. The
+ * code here follows previous practice, but there's an
+ * argument for counting hardware interrupts when they're
+ * processed too.
+ */
+ if (irq < NHWI) /* real interrupt, */
+ atomic_add_long(intr_countp[irq], 1); /* one more for this IRQ */
+ atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */
+
+ CTR3(KTR_INTR, "sched_ithd pid %d(%s) need=%d",
+ ir->it_proc->p_pid, ir->it_proc->p_comm, ir->it_need);
+
+#if 0
+ /*
+ * If we are in the debugger, we can't use interrupt threads to
+ * process interrupts since the threads are scheduled. Instead,
+ * call the interrupt handlers directly. This should be able to
+ * go away once we have light-weight interrupt handlers.
+ */
+ if (db_active) {
+ intrec *ih; /* and our interrupt handler chain */
+#if 0
+ membar_unlock(); /* push out "it_need=0" */
+#endif
+ for (ih = ir->it_ih; ih != NULL; ih = ih->next) {
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ ih->handler(ih->argument);
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_exit(&Giant, MTX_DEF);
+ }
+
+ INTREN (1 << ir->irq); /* reset the mask bit */
+ return;
+ }
+#endif
+
+ /*
+ * Set it_need so that if the thread is already running but close
+ * to done, it will do another go-round. Then get the sched lock
+ * and see if the thread is on whichkqs yet. If not, put it on
+ * there. In any case, kick everyone so that if the new thread
+ * is higher priority than their current thread, it gets run now.
+ */
+ ir->it_need = 1;
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (ir->it_proc->p_stat == SWAIT) { /* not on run queue */
+ CTR1(KTR_INTR, "sched_ithd: setrunqueue %d",
+ ir->it_proc->p_pid);
+/* membar_lock(); */
+ ir->it_proc->p_stat = SRUN;
+ setrunqueue(ir->it_proc);
+ aston();
+ }
+ else {
+if (irq < NHWI && (irq & 7) != 0)
+ CTR3(KTR_INTR, "sched_ithd %d: it_need %d, state %d",
+ ir->it_proc->p_pid,
+ ir->it_need,
+ ir->it_proc->p_stat );
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+#if 0
+ aston(); /* ??? check priorities first? */
+#else
+ need_resched();
+#endif
+}
+
+/*
+ * This is the main code for all interrupt threads. It gets put on
+ * whichkqs by setrunqueue above.
+ */
+void
+ithd_loop(void *dummy)
+{
+ ithd *me; /* our thread context */
+ intrec *ih; /* and our interrupt handler chain */
+
+ me = curproc->p_ithd; /* point to myself */
+
+ /*
+ * As long as we have interrupts outstanding, go through the
+ * list of handlers, giving each one a go at it.
+ */
+ for (;;) {
+ CTR3(KTR_INTR, "ithd_loop pid %d(%s) need=%d",
+ me->it_proc->p_pid, me->it_proc->p_comm, me->it_need);
+ while (me->it_need) {
+ /*
+ * Service interrupts. If another interrupt
+ * arrives while we are running, they will set
+ * it_need to denote that we should make
+ * another pass.
+ */
+ me->it_need = 0;
+#if 0
+ membar_unlock(); /* push out "it_need=0" */
+#endif
+ for (ih = me->it_ih; ih != NULL; ih = ih->next) {
+ CTR5(KTR_INTR,
+ "ithd_loop pid %d ih=%p: %p(%p) flg=%x",
+ me->it_proc->p_pid, (void *)ih,
+ (void *)ih->handler, ih->argument,
+ ih->flags);
+
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ ih->handler(ih->argument);
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_exit(&Giant, MTX_DEF);
+ }
+ }
+
+ /*
+ * Processed all our interrupts. Now get the sched
+ * lock. This may take a while and it_need may get
+ * set again, so we have to check it again.
+ */
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (!me->it_need) {
+
+ INTREN (1 << me->irq); /* reset the mask bit */
+ me->it_proc->p_stat = SWAIT; /* we're idle */
+#ifdef APIC_IO
+ CTR1(KTR_INTR, "ithd_loop pid %d: done",
+ me->it_proc->p_pid);
+#else
+ CTR2(KTR_INTR, "ithd_loop pid %d: done, imen=%x",
+ me->it_proc->p_pid, imen);
+#endif
+ mi_switch();
+ CTR1(KTR_INTR, "ithd_loop pid %d: resumed",
+ me->it_proc->p_pid);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+}
+
+/*
+ * Start soft interrupt thread.
+ */
+void
+start_softintr(void *dummy)
+{
+ int error;
+ struct proc *p;
+ ithd *softintr; /* descriptor for the "IRQ" */
+ intrec *idesc; /* descriptor for this handler */
+ char *name = "sintr"; /* name for idesc */
+ int i;
+
+ if (ithds[SOFTINTR]) { /* we already have a thread */
+ printf("start_softintr: already running");
+ return;
+ }
+ /* first handler for this irq. */
+ softintr = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (softintr == NULL)
+ panic ("Can't create soft interrupt thread");
+ bzero(softintr, sizeof(struct ithd));
+ softintr->irq = SOFTINTR;
+ ithds[SOFTINTR] = softintr;
+ error = kthread_create(intr_soft, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "softinterrupt");
+ if (error)
+ panic("start_softintr: kthread_create error %d\n", error);
+
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_rtprio.prio = PI_SOFT; /* soft interrupt */
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ softintr->it_proc = p;
+ p->p_ithd = softintr; /* reverse link */
+
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
+ panic ("Can't create soft interrupt thread");
+ bzero(idesc, sizeof (struct intrec));
+
+ idesc->ithd = softintr;
+ idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
+ if (idesc->name == NULL)
+ panic ("Can't create soft interrupt thread");
+ strcpy(idesc->name, name);
+ for (i = NHWI; i < NHWI + NSWI; i++)
+ intr_countp[i] = &softintrcnt [i - NHWI];
+}
+
+/*
+ * Software interrupt process code.
+ */
+void
+intr_soft(void *dummy)
+{
+ int i;
+ ithd *me; /* our thread context */
+
+ me = curproc->p_ithd; /* point to myself */
+
+ /* Main loop */
+ for (;;) {
+#if 0
+ CTR3(KTR_INTR, "intr_soft pid %d(%s) need=%d",
+ me->it_proc->p_pid, me->it_proc->p_comm,
+ me->it_need);
+#endif
+
+ /*
+ * Service interrupts. If another interrupt arrives
+ * while we are running, they will set it_need to
+ * denote that we should make another pass.
+ */
+ me->it_need = 0;
+ while ((i = ffs(spending))) {
+ i--;
+ atomic_add_long(intr_countp[i], 1);
+ spending &= ~ (1 << i);
+ mtx_enter(&Giant, MTX_DEF);
+ (ihandlers[i])();
+ mtx_exit(&Giant, MTX_DEF);
+ }
+ /*
+ * Processed all our interrupts. Now get the sched
+ * lock. This may take a while and it_need may get
+ * set again, so we have to check it again.
+ */
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (!me->it_need) {
+#if 0
+ CTR1(KTR_INTR, "intr_soft pid %d: done",
+ me->it_proc->p_pid);
+#endif
+ me->it_proc->p_stat = SWAIT; /* we're idle */
+ mi_switch();
+#if 0
+ CTR1(KTR_INTR, "intr_soft pid %d: resumed",
+ me->it_proc->p_pid);
+#endif
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+}
diff --git a/sys/amd64/isa/nmi.c b/sys/amd64/isa/nmi.c
index 34a8c22..870760e 100644
--- a/sys/amd64/isa/nmi.c
+++ b/sys/amd64/isa/nmi.c
@@ -36,12 +36,6 @@
* from: @(#)isa.c 7.2 (Berkeley) 5/13/91
* $FreeBSD$
*/
-/*
- * This file contains an aggregated module marked:
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- * See the notice for details.
- */
#include "opt_auto_eoi.h"
@@ -51,11 +45,14 @@
#ifndef SMP
#include <machine/lock.h>
#endif
+#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/unistd.h>
#include <sys/errno.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
@@ -91,30 +88,14 @@
#include <i386/isa/mca_machdep.h>
#endif
-/* XXX should be in suitable include files */
-#ifdef PC98
-#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
-#define ICU_SLAVEID 7
-#else
-#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
-#define ICU_SLAVEID 2
-#endif
-
-#ifdef APIC_IO
/*
- * This is to accommodate "mixed-mode" programming for
- * motherboards that don't connect the 8254 to the IO APIC.
+ * Per-interrupt data. We consider the soft interrupt to be a special
+ * case, so these arrays have NHWI + NSWI entries, not ICU_LEN.
*/
-#define AUTO_EOI_1 1
-#endif
-
-#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
-
-u_long *intr_countp[ICU_LEN];
-inthand2_t *intr_handler[ICU_LEN];
-u_int intr_mask[ICU_LEN];
-static u_int* intr_mptr[ICU_LEN];
-void *intr_unit[ICU_LEN];
+u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */
+inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */
+ithd *ithds[NHWI + NSWI]; /* real interrupt handler */
+void *intr_unit[NHWI + NSWI];
static inthand_t *fastintr[ICU_LEN] = {
&IDTVEC(fastintr0), &IDTVEC(fastintr1),
@@ -292,8 +273,9 @@ isa_nmi(cd)
}
/*
- * Fill in default interrupt table (in case of spuruious interrupt
- * during configuration of kernel, setup interrupt control unit
+ * Create a default interrupt table to avoid problems caused by
+ * spurious interrupts during configuration of kernel, then setup
+ * interrupt control unit.
*/
void
isa_defaultirq()
@@ -364,16 +346,6 @@ isa_strayintr(vcookiep)
{
int intr = (void **)vcookiep - &intr_unit[0];
- /* DON'T BOTHER FOR NOW! */
- /* for some reason, we get bursts of intr #7, even if not enabled! */
- /*
- * Well the reason you got bursts of intr #7 is because someone
- * raised an interrupt line and dropped it before the 8259 could
- * prioritize it. This is documented in the intel data book. This
- * means you have BAD hardware! I have changed this so that only
- * the first 5 get logged, then it quits logging them, and puts
- * out a special message. rgrimes 3/25/1993
- */
/*
* XXX TODO print a different message for #7 if it is for a
* glitch. Glitches can be distinguished from real #7's by
@@ -405,36 +377,10 @@ isa_irq_pending()
}
#endif
-int
-update_intr_masks(void)
-{
- int intr, n=0;
- u_int mask,*maskptr;
-
- for (intr=0; intr < ICU_LEN; intr ++) {
-#if defined(APIC_IO)
- /* no 8259 SLAVE to ignore */
-#else
- if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */
-#endif /* APIC_IO */
- maskptr = intr_mptr[intr];
- if (!maskptr)
- continue;
- *maskptr |= SWI_LOW_MASK | (1 << intr);
- mask = *maskptr;
- if (mask != intr_mask[intr]) {
-#if 0
- printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n",
- intr, intr_mask[intr], mask, maskptr);
-#endif
- intr_mask[intr]=mask;
- n++;
- }
-
- }
- return (n);
-}
-
+/*
+ * Update intrnames array with the specified name. This is used by
+ * vmstat(8) and the like.
+ */
static void
update_intrname(int intr, char *name)
{
@@ -485,7 +431,7 @@ found:
}
int
-icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
+icu_setup(int intr, inthand2_t *handler, void *arg, int flags)
{
#ifdef FAST_HI
int select; /* the select register is 8 bits */
@@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
u_int32_t value; /* the window register is 32 bits */
#endif /* FAST_HI */
u_long ef;
- u_int mask = (maskptr ? *maskptr : 0);
#if defined(APIC_IO)
if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */
@@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
ef = read_eflags();
disable_intr();
intr_handler[intr] = handler;
- intr_mptr[intr] = maskptr;
- intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr);
intr_unit[intr] = arg;
#ifdef FAST_HI
if (flags & INTR_FAST) {
@@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
INTREN(1 << intr);
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
+/*
+ * Dissociate an interrupt handler from an IRQ and set the handler to
+ * the stray interrupt handler. The 'handler' parameter is used only
+ * for consistency checking.
+ */
int
icu_unset(intr, handler)
int intr;
@@ -567,8 +514,6 @@ icu_unset(intr, handler)
disable_intr();
intr_countp[intr] = &intrcnt[1 + intr];
intr_handler[intr] = isa_strayintr;
- intr_mptr[intr] = NULL;
- intr_mask[intr] = HWI_MASK | SWI_MASK;
intr_unit[intr] = &intr_unit[intr];
#ifdef FAST_HI_XXX
/* XXX how do I re-create dvp here? */
@@ -581,353 +526,172 @@ icu_unset(intr, handler)
setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
-/* The following notice applies beyond this point in the file */
-
-/*
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-typedef struct intrec {
- intrmask_t mask;
- inthand2_t *handler;
- void *argument;
- struct intrec *next;
- char *name;
- int intr;
- intrmask_t *maskptr;
- int flags;
-} intrec;
-
-static intrec *intreclist_head[ICU_LEN];
-
-/*
- * The interrupt multiplexer calls each of the handlers in turn. The
- * ipl is initially quite low. It is raised as necessary for each call
- * and lowered after the call. Thus out of order handling is possible
- * even for interrupts of the same type. This is probably no more
- * harmful than out of order handling in general (not harmful except
- * for real time response which we don't support anyway).
- */
-static void
-intr_mux(void *arg)
-{
- intrec *p;
- intrmask_t oldspl;
-
- for (p = arg; p != NULL; p = p->next) {
- oldspl = splq(p->mask);
- p->handler(p->argument);
- splx(oldspl);
- }
-}
-
-static intrec*
-find_idesc(unsigned *maskptr, int irq)
-{
- intrec *p = intreclist_head[irq];
-
- while (p && p->maskptr != maskptr)
- p = p->next;
-
- return (p);
-}
-
-static intrec**
-find_pred(intrec *idesc, int irq)
+intrec *
+inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
+ int pri, int flags)
{
- intrec **pp = &intreclist_head[irq];
- intrec *p = *pp;
-
- while (p != idesc) {
- if (p == NULL)
- return (NULL);
- pp = &p->next;
- p = *pp;
- }
- return (pp);
-}
-
-/*
- * Both the low level handler and the shared interrupt multiplexer
- * block out further interrupts as set in the handlers "mask", while
- * the handler is running. In fact *maskptr should be used for this
- * purpose, but since this requires one more pointer dereference on
- * each interrupt, we rather bother update "mask" whenever *maskptr
- * changes. The function "update_masks" should be called **after**
- * all manipulation of the linked list of interrupt handlers hung
- * off of intrdec_head[irq] is complete, since the chain of handlers
- * will both determine the *maskptr values and the instances of mask
- * that are fixed. This function should be called with the irq for
- * which a new handler has been add blocked, since the masks may not
- * yet know about the use of this irq for a device of a certain class.
- */
+ ithd *ithd = ithds[irq]; /* descriptor for the IRQ */
+ intrec *head; /* chain of handlers for IRQ */
+ intrec *idesc; /* descriptor for this handler */
+ struct proc *p; /* interrupt thread */
+ int errcode = 0;
-static void
-update_mux_masks(void)
-{
- int irq;
- for (irq = 0; irq < ICU_LEN; irq++) {
- intrec *idesc = intreclist_head[irq];
- while (idesc != NULL) {
- if (idesc->maskptr != NULL) {
- /* our copy of *maskptr may be stale, refresh */
- idesc->mask = *idesc->maskptr;
- }
- idesc = idesc->next;
+ if (name == NULL) /* no name? */
+ panic ("anonymous interrupt");
+ if (ithd == NULL || ithd->it_ih == NULL) {
+ /* first handler for this irq. */
+ if (ithd == NULL) {
+ ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (ithd == NULL)
+ return (NULL);
+ bzero(ithd, sizeof(struct ithd));
+ ithd->irq = irq;
+ ithds[irq] = ithd;
}
- }
-}
-
-static void
-update_masks(intrmask_t *maskptr, int irq)
-{
- intrmask_t mask = 1 << irq;
-
- if (maskptr == NULL)
- return;
-
- if (find_idesc(maskptr, irq) == NULL) {
- /* no reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) == 0)
- return;
- /* the irq was included in the classes mask, remove it */
- *maskptr &= ~mask;
- } else {
- /* a reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) != 0)
- return;
- /* put the irq into the classes mask */
- *maskptr |= mask;
- }
- /* we need to update all values in the intr_mask[irq] array */
- update_intr_masks();
- /* update mask in chains of the interrupt multiplex handler as well */
- update_mux_masks();
-}
-
-/*
- * Add interrupt handler to linked list hung off of intreclist_head[irq]
- * and install shared interrupt multiplex handler, if necessary
- */
-
-static int
-add_intrdesc(intrec *idesc)
-{
- int irq = idesc->intr;
-
- intrec *head = intreclist_head[irq];
-
- if (head == NULL) {
- /* first handler for this irq, just install it */
- if (icu_setup(irq, idesc->handler, idesc->argument,
- idesc->maskptr, idesc->flags) != 0)
- return (-1);
-
- update_intrname(irq, idesc->name);
- /* keep reference */
- intreclist_head[irq] = idesc;
- } else {
- if ((idesc->flags & INTR_EXCL) != 0
- || (head->flags & INTR_EXCL) != 0) {
+ /*
+ * If we have a fast interrupt, we need to set the
+ * handler address directly. Do that below. For a
+ * slow interrupt, we don't need to know more details,
+ * so do it here because it's tidier.
+ */
+ if ((flags & INTR_FAST) == 0) {
/*
- * can't append new handler, if either list head or
- * new handler do not allow interrupts to be shared
+ * Only create a kernel thread if we don't already
+ * have one.
*/
- if (bootverbose)
- printf("\tdevice combination doesn't support "
- "shared irq%d\n", irq);
- return (-1);
- }
- if (head->next == NULL) {
+ if (ithd->it_proc == NULL) {
+ errcode = kthread_create(ithd_loop, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "irq%d: %s", irq,
+ name);
+ if (errcode)
+ panic("inthand_add: Can't create "
+ "interrupt thread");
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ ithd->it_proc = p;
+ p->p_ithd = ithd;
+ } else
+ snprintf(ithd->it_proc->p_comm, MAXCOMLEN,
+ "irq%d: %s", irq, name);
+ p->p_rtprio.prio = pri;
+
/*
- * second handler for this irq, replace device driver's
- * handler by shared interrupt multiplexer function
+ * The interrupt process must be in place, but
+ * not necessarily schedulable, before we
+ * initialize the ICU, since it may cause an
+ * immediate interrupt.
*/
- icu_unset(irq, head->handler);
- if (icu_setup(irq, intr_mux, head, 0, 0) != 0)
- return (-1);
- if (bootverbose)
- printf("\tusing shared irq%d.\n", irq);
- update_intrname(irq, "mux");
+ if (icu_setup(irq, &sched_ithd, arg, flags) != 0)
+ panic("inthand_add: Can't initialize ICU");
}
- /* just append to the end of the chain */
- while (head->next != NULL)
- head = head->next;
- head->next = idesc;
- }
- update_masks(idesc->maskptr, irq);
- return (0);
-}
-
-/*
- * Create and activate an interrupt handler descriptor data structure.
- *
- * The dev_instance pointer is required for resource management, and will
- * only be passed through to resource_claim().
- *
- * There will be functions that derive a driver and unit name from a
- * dev_instance variable, and those functions will be used to maintain the
- * interrupt counter label array referenced by systat and vmstat to report
- * device interrupt rates (->update_intrlabels).
- *
- * Add the interrupt handler descriptor data structure created by an
- * earlier call of create_intr() to the linked list for its irq and
- * adjust the interrupt masks if necessary.
- *
- * WARNING: This is an internal function and not to be used by device
- * drivers. It is subject to change without notice.
- */
-
-intrec *
-inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
- intrmask_t *maskptr, int flags)
-{
- intrec *idesc;
- int errcode = -1;
- intrmask_t oldspl;
-
- if (ICU_LEN > 8 * sizeof *maskptr) {
- printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n",
- ICU_LEN, 8 * sizeof *maskptr);
+ } else if ((flags & INTR_EXCL) != 0
+ || (ithd->it_ih->flags & INTR_EXCL) != 0) {
+ /*
+ * We can't append the new handler if either
+ * list ithd or new handler do not allow
+ * interrupts to be shared.
+ */
+ if (bootverbose)
+ printf("\tdevice combination %s and %s "
+ "doesn't support shared irq%d\n",
+ ithd->it_ih->name, name, irq);
+ return(NULL);
+ } else if (flags & INTR_FAST) {
+ /* We can only have one fast interrupt by itself. */
+ if (bootverbose)
+ printf("\tCan't add fast interrupt %s"
+ " to normal interrupt %s on irq%d",
+ name, ithd->it_ih->name, irq);
return (NULL);
+ } else { /* update p_comm */
+ p = ithd->it_proc;
+ if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) {
+ strcat(p->p_comm, " ");
+ strcat(p->p_comm, name);
+ } else if (strlen(p->p_comm) == MAXCOMLEN)
+ p->p_comm[MAXCOMLEN - 1] = '+';
+ else
+ strcat(p->p_comm, "+");
}
- if ((unsigned)irq >= ICU_LEN) {
- printf("create_intr: requested irq%d too high, limit is %d\n",
- irq, ICU_LEN -1);
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
return (NULL);
- }
+ bzero(idesc, sizeof (struct intrec));
- idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK);
- if (idesc == NULL)
- return NULL;
- bzero(idesc, sizeof *idesc);
+ idesc->handler = handler;
+ idesc->argument = arg;
+ idesc->flags = flags;
+ idesc->ithd = ithd;
- if (name == NULL)
- name = "???";
idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
if (idesc->name == NULL) {
free(idesc, M_DEVBUF);
- return NULL;
+ return (NULL);
}
strcpy(idesc->name, name);
- idesc->handler = handler;
- idesc->argument = arg;
- idesc->maskptr = maskptr;
- idesc->intr = irq;
- idesc->flags = flags;
-
- /* block this irq */
- oldspl = splq(1 << irq);
-
- /* add irq to class selected by maskptr */
- errcode = add_intrdesc(idesc);
- splx(oldspl);
-
- if (errcode != 0) {
+ /* Slow interrupts got set up above. */
+ if ((flags & INTR_FAST)
+ && (icu_setup(irq, idesc->handler, idesc->argument,
+ idesc->flags) != 0) ) {
if (bootverbose)
- printf("\tintr_connect(irq%d) failed, result=%d\n",
+ printf("\tinthand_add(irq%d) failed, result=%d\n",
irq, errcode);
free(idesc->name, M_DEVBUF);
free(idesc, M_DEVBUF);
- idesc = NULL;
+ return NULL;
}
-
+ head = ithd->it_ih; /* look at chain of handlers */
+ if (head) {
+ while (head->next != NULL)
+ head = head->next; /* find the end */
+ head->next = idesc; /* hook it in there */
+ } else
+ ithd->it_ih = idesc; /* put it up front */
+ update_intrname(irq, idesc->name);
return (idesc);
}
/*
- * Deactivate and remove the interrupt handler descriptor data connected
- * created by an earlier call of intr_connect() from the linked list and
- * adjust theinterrupt masks if necessary.
+ * Deactivate and remove linked list the interrupt handler descriptor
+ * data connected created by an earlier call of inthand_add(), then
+ * adjust the interrupt masks if necessary.
*
- * Return the memory held by the interrupt handler descriptor data structure
- * to the system. Make sure, the handler is not actively used anymore, before.
+ * Return the memory held by the interrupt handler descriptor data
+ * structure to the system. First ensure the handler is not actively
+ * in use.
*/
int
inthand_remove(intrec *idesc)
{
- intrec **hook, *head;
- int irq;
- int errcode = 0;
- intrmask_t oldspl;
+ ithd *ithd; /* descriptor for the IRQ */
+ intrec *ih; /* chain of handlers */
if (idesc == NULL)
return (-1);
+ ithd = idesc->ithd;
+ ih = ithd->it_ih;
- irq = idesc->intr;
-
- /* find pointer that keeps the reference to this interrupt descriptor */
- hook = find_pred(idesc, irq);
- if (hook == NULL)
+ if (ih == idesc) /* first in the chain */
+ ithd->it_ih = idesc->next; /* unhook it */
+ else {
+ while ((ih != NULL)
+ && (ih->next != idesc) )
+ ih = ih->next;
+ if (ih->next != idesc)
return (-1);
-
- /* make copy of original list head, the line after may overwrite it */
- head = intreclist_head[irq];
-
- /* unlink: make predecessor point to idesc->next instead of to idesc */
- *hook = idesc->next;
-
- /* now check whether the element we removed was the list head */
- if (idesc == head) {
-
- oldspl = splq(1 << irq);
-
- /* check whether the new list head is the only element on list */
- head = intreclist_head[irq];
- if (head != NULL) {
- icu_unset(irq, intr_mux);
- if (head->next != NULL) {
- /* install the multiplex handler with new list head as argument */
- errcode = icu_setup(irq, intr_mux, head, 0, 0);
- if (errcode == 0)
- update_intrname(irq, NULL);
- } else {
- /* install the one remaining handler for this irq */
- errcode = icu_setup(irq, head->handler,
- head->argument,
- head->maskptr, head->flags);
- if (errcode == 0)
- update_intrname(irq, head->name);
+ ih->next = ih->next->next;
}
- } else {
- /* revert to old handler, eg: strayintr */
- icu_unset(irq, idesc->handler);
- }
- splx(oldspl);
- }
- update_masks(idesc->maskptr, irq);
+
+ if (ithd->it_ih == NULL) /* no handlers left, */
+ icu_unset(ithd->irq, idesc->handler);
free(idesc, M_DEVBUF);
return (0);
}
diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c
index 637853e..8610e35 100644
--- a/sys/amd64/isa/npx.c
+++ b/sys/amd64/isa/npx.c
@@ -245,6 +245,12 @@ npx_probe(dev)
setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
npx_idt_probeintr = idt[npx_intrno];
+
+ /*
+ * XXX This looks highly bogus, but it appears that npc_probe1
+ * needs interrupts enabled. Does this make any difference
+ * here?
+ */
enable_intr();
result = npx_probe1(dev);
disable_intr();
@@ -797,7 +803,7 @@ npxdna()
/*
* Record new context early in case frstor causes an IRQ13.
*/
- npxproc = curproc;
+ PCPU_SET(npxproc, CURPROC);
curpcb->pcb_savefpu.sv_ex_sw = 0;
/*
* The following frstor may cause an IRQ13 when the state being
@@ -834,16 +840,18 @@ npxsave(addr)
fnsave(addr);
/* fnop(); */
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
#else /* SMP */
+ int intrstate;
u_char icu1_mask;
u_char icu2_mask;
u_char old_icu1_mask;
u_char old_icu2_mask;
struct gate_descriptor save_idt_npxintr;
+ intrstate = save_intr();
disable_intr();
old_icu1_mask = inb(IO_ICU1 + 1);
old_icu2_mask = inb(IO_ICU2 + 1);
@@ -851,12 +859,12 @@ npxsave(addr)
outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
idt[npx_intrno] = npx_idt_probeintr;
- enable_intr();
+ write_eflags(intrstate);
stop_emulating();
fnsave(addr);
fnop();
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
disable_intr();
icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */
icu2_mask = inb(IO_ICU2 + 1);
@@ -866,7 +874,7 @@ npxsave(addr)
(icu2_mask & ~(npx0_imask >> 8))
| (old_icu2_mask & (npx0_imask >> 8)));
idt[npx_intrno] = save_idt_npxintr;
- enable_intr(); /* back to usual state */
+ restore_intr(intrstate); /* back to previous state */
#endif /* SMP */
}
diff --git a/sys/amd64/isa/vector.S b/sys/amd64/isa/vector.S
index 5447a90..79f2320 100644
--- a/sys/amd64/isa/vector.S
+++ b/sys/amd64/isa/vector.S
@@ -16,9 +16,10 @@
#include <i386/isa/isa.h>
#endif
+#define FAST_INTR_HANDLER_USES_ES 1
#ifdef FAST_INTR_HANDLER_USES_ES
#define ACTUALLY_PUSHED 1
-#define MAYBE_MOVW_AX_ES movl %ax,%es
+#define MAYBE_MOVW_AX_ES movw %ax,%es
#define MAYBE_POPL_ES popl %es
#define MAYBE_PUSHL_ES pushl %es
#else
@@ -36,11 +37,6 @@
.data
ALIGN_DATA
- .globl _intr_nesting_level
-_intr_nesting_level:
- .byte 0
- .space 3
-
/*
* Interrupt counters and names for export to vmstat(8) and friends.
*
@@ -58,7 +54,6 @@ _eintrcnt:
_intrnames:
.space NR_INTRNAMES * 16
_eintrnames:
-
.text
/*
diff --git a/sys/amd64/isa/vector.s b/sys/amd64/isa/vector.s
index 5447a90..79f2320 100644
--- a/sys/amd64/isa/vector.s
+++ b/sys/amd64/isa/vector.s
@@ -16,9 +16,10 @@
#include <i386/isa/isa.h>
#endif
+#define FAST_INTR_HANDLER_USES_ES 1
#ifdef FAST_INTR_HANDLER_USES_ES
#define ACTUALLY_PUSHED 1
-#define MAYBE_MOVW_AX_ES movl %ax,%es
+#define MAYBE_MOVW_AX_ES movw %ax,%es
#define MAYBE_POPL_ES popl %es
#define MAYBE_PUSHL_ES pushl %es
#else
@@ -36,11 +37,6 @@
.data
ALIGN_DATA
- .globl _intr_nesting_level
-_intr_nesting_level:
- .byte 0
- .space 3
-
/*
* Interrupt counters and names for export to vmstat(8) and friends.
*
@@ -58,7 +54,6 @@ _eintrcnt:
_intrnames:
.space NR_INTRNAMES * 16
_eintrnames:
-
.text
/*
OpenPOWER on IntegriCloud