summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64
diff options
context:
space:
mode:
authorjasone <jasone@FreeBSD.org>2000-09-07 01:33:02 +0000
committerjasone <jasone@FreeBSD.org>2000-09-07 01:33:02 +0000
commit769e0f974d8929599ba599ac496510fffc90ff34 (patch)
tree9387522900085835de81e7830e570ef3f6b3ea80 /sys/amd64/amd64
parentacf1927de02afda4855ec278b1128fd9446405ea (diff)
downloadFreeBSD-src-769e0f974d8929599ba599ac496510fffc90ff34.zip
FreeBSD-src-769e0f974d8929599ba599ac496510fffc90ff34.tar.gz
Major update to the way synchronization is done in the kernel. Highlights
include: * Mutual exclusion is used instead of spl*(). See mutex(9). (Note: The alpha port is still in transition and currently uses both.) * Per-CPU idle processes. * Interrupts are run in their own separate kernel threads and can be preempted (i386 only). Partially contributed by: BSDi (BSD/OS) Submissions by (at least): cp, dfr, dillon, grog, jake, jhb, sheldonh
Diffstat (limited to 'sys/amd64/amd64')
-rw-r--r--sys/amd64/amd64/amd64-gdbstub.c10
-rw-r--r--sys/amd64/amd64/apic_vector.S132
-rw-r--r--sys/amd64/amd64/autoconf.c8
-rw-r--r--sys/amd64/amd64/cpu_switch.S269
-rw-r--r--sys/amd64/amd64/exception.S41
-rw-r--r--sys/amd64/amd64/exception.s41
-rw-r--r--sys/amd64/amd64/fpu.c18
-rw-r--r--sys/amd64/amd64/genassym.c27
-rw-r--r--sys/amd64/amd64/identcpu.c3
-rw-r--r--sys/amd64/amd64/initcpu.c6
-rw-r--r--sys/amd64/amd64/legacy.c32
-rw-r--r--sys/amd64/amd64/locore.S3
-rw-r--r--sys/amd64/amd64/locore.s3
-rw-r--r--sys/amd64/amd64/machdep.c37
-rw-r--r--sys/amd64/amd64/mp_machdep.c88
-rw-r--r--sys/amd64/amd64/mpboot.S36
-rw-r--r--sys/amd64/amd64/mptable.c88
-rw-r--r--sys/amd64/amd64/nexus.c32
-rw-r--r--sys/amd64/amd64/pmap.c2
-rw-r--r--sys/amd64/amd64/swtch.s269
-rw-r--r--sys/amd64/amd64/trap.c391
-rw-r--r--sys/amd64/amd64/tsc.c155
-rw-r--r--sys/amd64/amd64/vm_machdep.c51
23 files changed, 737 insertions, 1005 deletions
diff --git a/sys/amd64/amd64/amd64-gdbstub.c b/sys/amd64/amd64/amd64-gdbstub.c
index 986b8d4..b442a37 100644
--- a/sys/amd64/amd64/amd64-gdbstub.c
+++ b/sys/amd64/amd64/amd64-gdbstub.c
@@ -188,7 +188,8 @@ getpacket (char *buffer)
unsigned char ch;
int s;
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/* wait around for the start character, ignore all other characters */
@@ -239,7 +240,7 @@ getpacket (char *buffer)
}
}
while (checksum != xmitcsum);
- splx (s);
+ write_eflags(s);
}
/* send the packet in buffer. */
@@ -253,7 +254,8 @@ putpacket (char *buffer)
int s;
/* $<packet info>#<checksum>. */
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/*
@@ -285,7 +287,7 @@ putpacket (char *buffer)
putDebugChar (hexchars[checksum & 0xf]);
}
while ((getDebugChar () & 0x7f) != '+');
- splx (s);
+ write_eflags(s);
}
static char remcomInBuffer[BUFMAX];
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 2a7559d..54bf003 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -17,7 +17,7 @@
/*
- * Macros for interrupt interrupt entry, call to handler, and exit.
+ * Macros for interrupt entry, call to handler, and exit.
*/
#define FAST_INTR(irq_num, vec_name) \
@@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \
/*
- * Test to see if the source is currntly masked, clear if so.
+ * Test to see if the source is currently masked, clear if so.
*/
#define UNMASK_IRQ(irq_num) \
IMASK_LOCK ; /* into critical reg */ \
@@ -200,7 +200,16 @@ log_intr_event:
#else
#define APIC_ITRACE(name, irq_num, id)
#endif
-
+
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \
maybe_extra_ipending ; \
; \
APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
; \
MASK_LEVEL_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- MP_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f ; /* no */ \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
-; \
incb _intr_nesting_level ; \
; \
/* entry point used by doreti_unpend for HWIs. */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- movl _cpl, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cpl ; \
- lock ; \
- andl $~IRQ_BIT(irq_num), _ipending ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
+ pushl $irq_num; /* pass the IRQ */ \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
MEXITCOUNT ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; /* XXX: iactive bit might be 0 now */ \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl, leave iactive set */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- MP_RELLOCK ; \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- POP_FRAME ; /* and return */ \
- iret
+ jmp doreti_next
/*
* Handle "spurious INTerrupts".
@@ -434,20 +380,10 @@ _Xcpuast:
FAKE_MCOUNT(13*4(%esp))
- /*
- * Giant locks do not come cheap.
- * A lot of cycles are going to be wasted here.
- */
- call _get_mplock
-
- movl _cpl, %eax
- pushl %eax
orl $AST_PENDING, _astpending /* XXX */
incb _intr_nesting_level
sti
- pushl $0
-
movl _cpuid, %eax
lock
btrl %eax, _checkstate_pending_ast
@@ -461,7 +397,7 @@ _Xcpuast:
lock
incl CNAME(cpuast_cnt)
MEXITCOUNT
- jmp _doreti
+ jmp doreti_next
1:
/* We are already in the process of delivering an ast for this CPU */
POP_FRAME
@@ -487,40 +423,24 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- MP_TRYLOCK
- testl %eax,%eax /* Did we get the lock ? */
- jz 1f /* No */
-
lock
incl CNAME(forward_irq_hitcnt)
cmpb $4, _intr_nesting_level
- jae 2f
+ jae 1f
- movl _cpl, %eax
- pushl %eax
incb _intr_nesting_level
sti
- pushl $0
-
MEXITCOUNT
- jmp _doreti /* Handle forwarded interrupt */
+ jmp doreti_next /* Handle forwarded interrupt */
1:
lock
- incl CNAME(forward_irq_misscnt)
- call forward_irq /* Oops, we've lost the isr lock */
- MEXITCOUNT
- POP_FRAME
- iret
-2:
- lock
incl CNAME(forward_irq_toodeepcnt)
-3:
- MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
+#if 0
/*
*
*/
@@ -532,9 +452,11 @@ forward_irq:
cmpl $0, CNAME(forward_irq_enabled)
jz 4f
+/* XXX - this is broken now, because mp_lock doesn't exist
movl _mp_lock,%eax
cmpl $FREE_LOCK,%eax
jne 1f
+ */
movl $0, %eax /* Pick CPU #0 if noone has lock */
1:
shrl $24,%eax
@@ -559,6 +481,7 @@ forward_irq:
jnz 3b
4:
ret
+#endif
/*
* Executed by a CPU when it receives an Xcpustop IPI from another CPU,
@@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr)
FAST_INTR(22,fastintr22)
FAST_INTR(23,fastintr23)
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, CLKINTR_PENDING)
INTR(1,intr1,)
INTR(2,intr2,)
@@ -728,15 +652,11 @@ _ihandlers:
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
+#if 0
/* active flag for lazy masking */
iactive:
.long 0
+#endif
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c
index b209065..4edda4b 100644
--- a/sys/amd64/amd64/autoconf.c
+++ b/sys/amd64/amd64/autoconf.c
@@ -163,14 +163,6 @@ configure(dummy)
* XXX this is slightly misplaced.
*/
spl0();
-
- /*
- * Allow lowering of the ipl to the lowest kernel level if we
- * panic (or call tsleep() before clearing `cold'). No level is
- * completely safe (since a panic may occur in a critical region
- * at splhigh()), but we want at least bio interrupts to work.
- */
- safepri = cpl;
}
static void
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index c895fef..db56a1b 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -73,189 +73,6 @@ _tlb_flush_count: .long 0
.text
-/*
- * When no processes are on the runq, cpu_switch() branches to _idle
- * to wait for something to come ready.
- */
- ALIGN_TEXT
- .type _idle,@function
-_idle:
- xorl %ebp,%ebp
- movl %ebp,_switchtime
-
-#ifdef SMP
-
- /* when called, we have the mplock, intr disabled */
- /* use our idleproc's "context" */
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
- /* Keep space for nonexisting return addr, or profiling bombs */
- movl $gd_idlestack_top-4, %ecx
- addl %fs:0, %ecx
- movl %ecx, %esp
-
- /* update common_tss.tss_esp0 pointer */
- movl %ecx, _common_tss + TSS_ESP0
-
- movl _cpuid, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $gd_common_tssd, %edi
- addl %fs:0, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- *
- * NOTE: spl*() may only be called while we hold the MP lock (which
- * we do).
- */
- call _spl0
-
- cli
-
- /*
- * _REALLY_ free the lock, no matter how deep the prior nesting.
- * We will recover the nesting on the way out when we have a new
- * proc to load.
- *
- * XXX: we had damn well better be sure we had it before doing this!
- */
- movl $FREE_LOCK, %eax
- movl %eax, _mp_lock
-
- /* do NOT have lock, intrs disabled */
- .globl idle_loop
-idle_loop:
-
- cmpl $0,_smp_active
- jne 1f
- cmpl $0,_cpuid
- je 1f
- jmp 2f
-
-1:
- call _procrunnable
- testl %eax,%eax
- jnz 3f
-
- /*
- * Handle page-zeroing in the idle loop. Called with interrupts
- * disabled and the MP lock released. Inside vm_page_zero_idle
- * we enable interrupts and grab the mplock as required.
- */
- cmpl $0,_do_page_zero_idle
- je 2f
-
- call _vm_page_zero_idle /* internal locking */
- testl %eax, %eax
- jnz idle_loop
-2:
-
- /* enable intrs for a halt */
- movl $0, lapic_tpr /* 1st candidate for an INT */
- call *_hlt_vector /* wait for interrupt */
- cli
- jmp idle_loop
-
- /*
- * Note that interrupts must be enabled while obtaining the MP lock
- * in order to be able to take IPI's while blocked.
- */
-3:
- movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
- sti
- call _get_mplock
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _rel_mplock
- jmp idle_loop
-
-#else /* !SMP */
-
- movl $HIDENAME(tmpstk),%esp
-#if defined(OVERLY_CONSERVATIVE_PTD_MGMT)
-#if defined(SWTCH_OPTIM_STATS)
- incl _swtch_optim_stats
-#endif
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
-#endif
-
- /* update common_tss.tss_esp0 pointer */
- movl %esp, _common_tss + TSS_ESP0
-
- movl $0, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $_common_tssd, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- */
- call _spl0
-
- ALIGN_TEXT
-idle_loop:
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _vm_page_zero_idle
- testl %eax, %eax
- jnz idle_loop
- call *_hlt_vector /* wait for interrupt */
- jmp idle_loop
-
-#endif /* SMP */
-
-CROSSJUMPTARGET(_idle)
-
ENTRY(default_halt)
sti
#ifndef SMP
@@ -264,16 +81,23 @@ ENTRY(default_halt)
ret
/*
+ * cpu_throw()
+ */
+ENTRY(cpu_throw)
+ jmp sw1
+
+/*
* cpu_switch()
*/
ENTRY(cpu_switch)
/* switch to new process. first, save context as needed */
movl _curproc,%ecx
+ movl %ecx,_prevproc
/* if no process to save, don't bother */
testl %ecx,%ecx
- je sw1
+ jz sw1
#ifdef SMP
movb P_ONCPU(%ecx), %al /* save "last" cpu */
@@ -299,7 +123,7 @@ ENTRY(cpu_switch)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
- /* test if debug regisers should be saved */
+ /* test if debug registers should be saved */
movb PCB_FLAGS(%edx),%al
andb $PCB_DBREGS,%al
jz 1f /* no, skip over */
@@ -319,15 +143,12 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
+ /* save sched_lock recursion count */
+ movl _sched_lock+MTX_RECURSE,%eax
+ movl %eax,PCB_SCHEDNEST(%edx)
+
#ifdef SMP
- movl _mp_lock, %eax
/* XXX FIXME: we should be saving the local APIC TPR */
-#ifdef DIAGNOSTIC
- cmpl $FREE_LOCK, %eax /* is it free? */
- je badsw4 /* yes, bad medicine! */
-#endif /* DIAGNOSTIC */
- andl $COUNT_FIELD, %eax /* clear CPU portion */
- movl %eax, PCB_MPNEST(%edx) /* store it */
#endif /* SMP */
#if NNPX > 0
@@ -341,25 +162,33 @@ ENTRY(cpu_switch)
1:
#endif /* NNPX > 0 */
- movl $0,_curproc /* out of process */
-
- /* save is done, now choose a new process or idle */
+ /* save is done, now choose a new process */
sw1:
- cli
#ifdef SMP
/* Stop scheduling if smp_active goes zero and we are not BSP */
cmpl $0,_smp_active
jne 1f
cmpl $0,_cpuid
- CROSSJUMP(je, _idle, jne) /* wind down */
+ je 1f
+
+ movl _idleproc, %eax
+ jmp sw1b
1:
#endif
+ /*
+ * Choose a new process to schedule. chooseproc() returns idleproc
+ * if it cannot find another process to run.
+ */
sw1a:
call _chooseproc /* trash ecx, edx, ret eax*/
- testl %eax,%eax
- CROSSJUMP(je, _idle, jne) /* if no proc, idle */
+
+#ifdef DIAGNOSTIC
+ testl %eax,%eax /* no process? */
+ jz badsw3 /* no, panic */
+#endif
+sw1b:
movl %eax,%ecx
xorl %eax,%eax
@@ -456,9 +285,6 @@ sw1a:
movl %ecx, _curproc /* into next process */
#ifdef SMP
- movl _cpu_lockid, %eax
- orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
- movl %eax, _mp_lock /* load the mp_lock */
/* XXX FIXME: we should be restoring the local APIC TPR */
#endif /* SMP */
@@ -500,7 +326,22 @@ cpu_switch_load_gs:
movl %eax,%dr7
1:
- sti
+ /*
+ * restore sched_lock recursion count and transfer ownership to
+ * new process
+ */
+ movl PCB_SCHEDNEST(%edx),%eax
+ movl %eax,_sched_lock+MTX_RECURSE
+
+ movl _curproc,%eax
+ movl %eax,_sched_lock+MTX_LOCK
+
+#ifdef DIAGNOSTIC
+ pushfl
+ popl %ecx
+ testl $0x200, %ecx /* interrupts enabled? */
+ jnz badsw6 /* that way madness lies */
+#endif
ret
CROSSJUMPTARGET(sw1a)
@@ -517,15 +358,27 @@ badsw2:
call _panic
sw0_2: .asciz "cpu_switch: not SRUN"
+
+badsw3:
+ pushl $sw0_3
+ call _panic
+
+sw0_3: .asciz "cpu_switch: chooseproc returned NULL"
+
#endif
-#if defined(SMP) && defined(DIAGNOSTIC)
-badsw4:
- pushl $sw0_4
+#ifdef DIAGNOSTIC
+badsw5:
+ pushl $sw0_5
+ call _panic
+
+sw0_5: .asciz "cpu_switch: interrupts enabled (again)"
+badsw6:
+ pushl $sw0_6
call _panic
-sw0_4: .asciz "cpu_switch: do not have lock"
-#endif /* SMP && DIAGNOSTIC */
+sw0_6: .asciz "cpu_switch: interrupts enabled"
+#endif
/*
* savectx(pcb)
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index acb8b40..9e77114 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -38,6 +38,7 @@
#include <machine/asmacros.h>
#include <machine/ipl.h>
#include <machine/lock.h>
+#include <machine/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
@@ -175,20 +176,12 @@ IDTVEC(fpu)
mov %ax,%fs
FAKE_MCOUNT(13*4(%esp))
-#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%eax
- pushl %eax /* save original cpl */
- pushl $0 /* dummy unit to finish intr frame */
-#else /* SMP */
- movl _cpl,%eax
- pushl %eax
pushl $0 /* dummy unit to finish intr frame */
- incl _cnt+V_TRAP
-#endif /* SMP */
+ call __mtx_enter_giant_def
call _npx_intr
+ call __mtx_exit_giant_def
incb _intr_nesting_level
MEXITCOUNT
@@ -205,9 +198,6 @@ IDTVEC(align)
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
- *
- * Note that all calls to MP_LOCK must occur with interrupts enabled
- * in order to be able to take IPI's while waiting for the lock.
*/
SUPERALIGN_TEXT
@@ -227,16 +217,12 @@ alltraps_with_regs_pushed:
FAKE_MCOUNT(13*4(%esp))
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
- MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%ebx /* keep orig. cpl here during trap() */
call _trap
/*
* Return via _doreti to handle ASTs. Have to change trap frame
* to interrupt frame.
*/
- pushl %ebx /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
incb _intr_nesting_level
MEXITCOUNT
@@ -274,16 +260,11 @@ IDTVEC(syscall)
movl %eax,TF_EFLAGS(%esp)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
@@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall)
mov %ax,%fs
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
ENTRY(fork_trampoline)
+ MTX_EXIT(_sched_lock, %ecx)
+ sti
call _spl0
#ifdef SMP
@@ -355,7 +333,6 @@ ENTRY(fork_trampoline)
/*
* Return via _doreti to handle ASTs.
*/
- pushl $0 /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
movb $1,_intr_nesting_level
MEXITCOUNT
diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s
index acb8b40..9e77114 100644
--- a/sys/amd64/amd64/exception.s
+++ b/sys/amd64/amd64/exception.s
@@ -38,6 +38,7 @@
#include <machine/asmacros.h>
#include <machine/ipl.h>
#include <machine/lock.h>
+#include <machine/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
@@ -175,20 +176,12 @@ IDTVEC(fpu)
mov %ax,%fs
FAKE_MCOUNT(13*4(%esp))
-#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%eax
- pushl %eax /* save original cpl */
- pushl $0 /* dummy unit to finish intr frame */
-#else /* SMP */
- movl _cpl,%eax
- pushl %eax
pushl $0 /* dummy unit to finish intr frame */
- incl _cnt+V_TRAP
-#endif /* SMP */
+ call __mtx_enter_giant_def
call _npx_intr
+ call __mtx_exit_giant_def
incb _intr_nesting_level
MEXITCOUNT
@@ -205,9 +198,6 @@ IDTVEC(align)
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
- *
- * Note that all calls to MP_LOCK must occur with interrupts enabled
- * in order to be able to take IPI's while waiting for the lock.
*/
SUPERALIGN_TEXT
@@ -227,16 +217,12 @@ alltraps_with_regs_pushed:
FAKE_MCOUNT(13*4(%esp))
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
- MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%ebx /* keep orig. cpl here during trap() */
call _trap
/*
* Return via _doreti to handle ASTs. Have to change trap frame
* to interrupt frame.
*/
- pushl %ebx /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
incb _intr_nesting_level
MEXITCOUNT
@@ -274,16 +260,11 @@ IDTVEC(syscall)
movl %eax,TF_EFLAGS(%esp)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
@@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall)
mov %ax,%fs
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
ENTRY(fork_trampoline)
+ MTX_EXIT(_sched_lock, %ecx)
+ sti
call _spl0
#ifdef SMP
@@ -355,7 +333,6 @@ ENTRY(fork_trampoline)
/*
* Return via _doreti to handle ASTs.
*/
- pushl $0 /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
movb $1,_intr_nesting_level
MEXITCOUNT
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 637853e..8610e35 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -245,6 +245,12 @@ npx_probe(dev)
setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
npx_idt_probeintr = idt[npx_intrno];
+
+ /*
+ * XXX This looks highly bogus, but it appears that npc_probe1
+ * needs interrupts enabled. Does this make any difference
+ * here?
+ */
enable_intr();
result = npx_probe1(dev);
disable_intr();
@@ -797,7 +803,7 @@ npxdna()
/*
* Record new context early in case frstor causes an IRQ13.
*/
- npxproc = curproc;
+ PCPU_SET(npxproc, CURPROC);
curpcb->pcb_savefpu.sv_ex_sw = 0;
/*
* The following frstor may cause an IRQ13 when the state being
@@ -834,16 +840,18 @@ npxsave(addr)
fnsave(addr);
/* fnop(); */
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
#else /* SMP */
+ int intrstate;
u_char icu1_mask;
u_char icu2_mask;
u_char old_icu1_mask;
u_char old_icu2_mask;
struct gate_descriptor save_idt_npxintr;
+ intrstate = save_intr();
disable_intr();
old_icu1_mask = inb(IO_ICU1 + 1);
old_icu2_mask = inb(IO_ICU2 + 1);
@@ -851,12 +859,12 @@ npxsave(addr)
outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
idt[npx_intrno] = npx_idt_probeintr;
- enable_intr();
+ write_eflags(intrstate);
stop_emulating();
fnsave(addr);
fnop();
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
disable_intr();
icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */
icu2_mask = inb(IO_ICU2 + 1);
@@ -866,7 +874,7 @@ npxsave(addr)
(icu2_mask & ~(npx0_imask >> 8))
| (old_icu2_mask & (npx0_imask >> 8)));
idt[npx_intrno] = save_idt_npxintr;
- enable_intr(); /* back to usual state */
+ restore_intr(intrstate); /* back to previous state */
#endif /* SMP */
}
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 60accd1..78c6075 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -51,6 +51,10 @@
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/resourcevar.h>
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
#include <machine/frame.h>
#include <machine/bootinfo.h>
#include <machine/tss.h>
@@ -73,6 +77,7 @@
#include <machine/sigframe.h>
#include <machine/globaldata.h>
#include <machine/vm86.h>
+#include <machine/mutex.h>
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
@@ -127,9 +132,7 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
-#ifdef SMP
-ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest));
-#endif
+ASSYM(PCB_SCHEDNEST, offsetof(struct pcb, pcb_schednest));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
@@ -170,7 +173,9 @@ ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
ASSYM(GD_SIZEOF, sizeof(struct globaldata));
ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
+ASSYM(GD_PREVPROC, offsetof(struct globaldata, gd_prevproc));
ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc));
+ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc));
ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss));
ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
@@ -178,11 +183,21 @@ ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
+ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct globaldata, gd_intr_nesting_level));
#ifdef USER_LDT
ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
#endif
+ASSYM(GD_WITNESS_SPIN_CHECK, offsetof(struct globaldata, gd_witness_spin_check));
+
+/* XXX */
+#ifdef KTR_PERCPU
+ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx));
+ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf));
+ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data));
+#endif
+
#ifdef SMP
ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid));
ASSYM(GD_CPU_LOCKID, offsetof(struct globaldata, gd_cpu_lockid));
@@ -211,3 +226,9 @@ ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse));
+ASSYM(MTX_SAVEFL, offsetof(struct mtx, mtx_savefl));
+
+ASSYM(MTX_UNOWNED, MTX_UNOWNED);
diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c
index 0e11e2b..71ecd63 100644
--- a/sys/amd64/amd64/identcpu.c
+++ b/sys/amd64/amd64/identcpu.c
@@ -42,6 +42,7 @@
#include "opt_cpu.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -53,6 +54,8 @@
#include <machine/specialreg.h>
#include <machine/md_var.h>
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#define IDENTBLUE_CYRIX486 0
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index be86c65..b9395bf 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -607,12 +607,14 @@ void
enable_K5_wt_alloc(void)
{
u_int64_t msr;
+ int intrstate;
/*
* Write allocate is supported only on models 1, 2, and 3, with
* a stepping of 4 or greater.
*/
if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
+ intrstate = save_intr();
disable_intr();
msr = rdmsr(0x83); /* HWCR */
wrmsr(0x83, msr & !(0x10));
@@ -645,7 +647,7 @@ enable_K5_wt_alloc(void)
msr=rdmsr(0x83);
wrmsr(0x83, msr|0x10); /* enable write allocate */
- enable_intr();
+ restore_intr(intrstate);
}
}
@@ -708,7 +710,6 @@ enable_K6_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
void
@@ -770,7 +771,6 @@ enable_K6_2_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
#endif /* I585_CPU && CPU_WT_ALLOC */
diff --git a/sys/amd64/amd64/legacy.c b/sys/amd64/amd64/legacy.c
index 8a30770..5b6cdbc 100644
--- a/sys/amd64/amd64/legacy.c
+++ b/sys/amd64/amd64/legacy.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index bddd7d5..fa95fb0 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -862,9 +862,6 @@ map_read_write:
movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
movl $1, %ecx /* one private pt coming right up */
fillkpt(R(SMPptpa), $PG_RW)
-
-/* Initialize mp lock to allow early traps */
- movl $1, R(_mp_lock)
#endif /* SMP */
/* install a pde for temporary double map of bottom of VA */
diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s
index bddd7d5..fa95fb0 100644
--- a/sys/amd64/amd64/locore.s
+++ b/sys/amd64/amd64/locore.s
@@ -862,9 +862,6 @@ map_read_write:
movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
movl $1, %ecx /* one private pt coming right up */
fillkpt(R(SMPptpa), $PG_RW)
-
-/* Initialize mp lock to allow early traps */
- movl $1, R(_mp_lock)
#endif /* SMP */
/* install a pde for temporary double map of bottom of VA */
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 6edecf0..875c9d5 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -58,6 +58,7 @@
#include <sys/sysproto.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/proc.h>
@@ -98,10 +99,12 @@
#include <machine/bootinfo.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>
+#include <machine/globals.h>
#ifdef SMP
#include <machine/smp.h>
-#include <machine/globaldata.h>
#endif
#ifdef PERFMON
#include <machine/perfmon.h>
@@ -110,6 +113,7 @@
#ifdef OLD_BUS_ARCH
#include <i386/isa/isa_device.h>
#endif
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#include <isa/rtc.h>
#include <machine/vm86.h>
@@ -247,6 +251,11 @@ vm_offset_t clean_sva, clean_eva;
static vm_offset_t pager_sva, pager_eva;
static struct trapframe proc0_tf;
+struct cpuhead cpuhead;
+
+mtx_t sched_lock;
+mtx_t Giant;
+
#define offsetof(type, member) ((size_t)(&((type *)0)->member))
static void
@@ -431,6 +440,11 @@ again:
bufinit();
vm_pager_bufferinit();
+ SLIST_INIT(&cpuhead);
+ SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu);
+
+ mtx_init(&sched_lock, "sched lock", MTX_SPIN);
+
#ifdef SMP
/*
* OK, enough kmem_alloc/malloc state should be up, lets get on with it!
@@ -1817,11 +1831,6 @@ init386(first)
#endif
int off;
- /*
- * Prevent lowering of the ipl if we call tsleep() early.
- */
- safepri = cpl;
-
proc0.p_addr = proc0paddr;
atdevbase = ISA_HOLE_START + KERNBASE;
@@ -1871,6 +1880,10 @@ init386(first)
r_gdt.rd_base = (int) gdt;
lgdt(&r_gdt);
+ /* setup curproc so that mutexes work */
+ PCPU_SET(curproc, &proc0);
+ PCPU_SET(prevproc, &proc0);
+
/* make ldt memory segments */
/*
* The data segment limit must not cover the user area because we
@@ -1953,7 +1966,7 @@ init386(first)
/* make an initial tss so cpu can get interrupt stack on syscall! */
common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16;
- common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
+ common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
private_tss = 0;
tss_gdt = &gdt[GPROC0_SEL].sd;
@@ -1974,6 +1987,12 @@ init386(first)
dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+ /*
+ * We grab Giant during the vm86bios routines, so we need to ensure
+ * that it is up and running before we use vm86.
+ */
+ mtx_init(&Giant, "Giant", MTX_DEF);
+
vm86_initialize();
getmemsize(first);
@@ -2009,9 +2028,7 @@ init386(first)
/* setup proc 0's pcb */
proc0.p_addr->u_pcb.pcb_flags = 0;
proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
-#ifdef SMP
- proc0.p_addr->u_pcb.pcb_mpnest = 1;
-#endif
+ proc0.p_addr->u_pcb.pcb_schednest = 0;
proc0.p_addr->u_pcb.pcb_ext = 0;
proc0.p_md.md_regs = &proc0_tf;
}
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 61c5ecf..95b5759 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S
index d3602d2..9ede02c 100644
--- a/sys/amd64/amd64/mpboot.S
+++ b/sys/amd64/amd64/mpboot.S
@@ -114,43 +114,9 @@ mp_begin: /* now running relocated at KERNBASE */
CHECKPOINT(0x39, 6)
- /* wait till we can get into the kernel */
- call _boot_get_mplock
-
- /* Now, let's prepare for some REAL WORK :-) */
+ /* Now, let's prepare for some REAL WORK :-) This doesn't return. */
call _ap_init
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,-4(%esp)
-2:
- cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */
- jz 2b
- call _get_mplock
-
- /* let her rip! (loads new stack) */
- jmp _cpu_switch
-
-NON_GPROF_ENTRY(wait_ap)
- pushl %ebp
- movl %esp, %ebp
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,0(%esp)
- movl %eax, 8(%ebp)
-1:
- cmpl $0, CNAME(smp_started)
- jnz 2f
- decl %eax
- cmpl $0, %eax
- jge 1b
-2:
- call _get_mplock
- movl %ebp, %esp
- popl %ebp
- ret
-
-
/*
* This is the embedded trampoline or bootstrap that is
* copied into 'real-mode' low memory, it is where the
diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c
index 61c5ecf..95b5759 100644
--- a/sys/amd64/amd64/mptable.c
+++ b/sys/amd64/amd64/mptable.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c
index 8a30770..5b6cdbc 100644
--- a/sys/amd64/amd64/nexus.c
+++ b/sys/amd64/amd64/nexus.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index edae292..7ce9120 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -668,7 +668,7 @@ pmap_pte_quick(pmap, va)
* (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V;
cpu_invlpg(prv_PADDR1);
}
- return prv_PADDR1 + ((unsigned) index & (NPTEPG - 1));
+ return (unsigned *)(prv_PADDR1 + (index & (NPTEPG - 1)));
#else
if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s
index c895fef..db56a1b 100644
--- a/sys/amd64/amd64/swtch.s
+++ b/sys/amd64/amd64/swtch.s
@@ -73,189 +73,6 @@ _tlb_flush_count: .long 0
.text
-/*
- * When no processes are on the runq, cpu_switch() branches to _idle
- * to wait for something to come ready.
- */
- ALIGN_TEXT
- .type _idle,@function
-_idle:
- xorl %ebp,%ebp
- movl %ebp,_switchtime
-
-#ifdef SMP
-
- /* when called, we have the mplock, intr disabled */
- /* use our idleproc's "context" */
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
- /* Keep space for nonexisting return addr, or profiling bombs */
- movl $gd_idlestack_top-4, %ecx
- addl %fs:0, %ecx
- movl %ecx, %esp
-
- /* update common_tss.tss_esp0 pointer */
- movl %ecx, _common_tss + TSS_ESP0
-
- movl _cpuid, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $gd_common_tssd, %edi
- addl %fs:0, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- *
- * NOTE: spl*() may only be called while we hold the MP lock (which
- * we do).
- */
- call _spl0
-
- cli
-
- /*
- * _REALLY_ free the lock, no matter how deep the prior nesting.
- * We will recover the nesting on the way out when we have a new
- * proc to load.
- *
- * XXX: we had damn well better be sure we had it before doing this!
- */
- movl $FREE_LOCK, %eax
- movl %eax, _mp_lock
-
- /* do NOT have lock, intrs disabled */
- .globl idle_loop
-idle_loop:
-
- cmpl $0,_smp_active
- jne 1f
- cmpl $0,_cpuid
- je 1f
- jmp 2f
-
-1:
- call _procrunnable
- testl %eax,%eax
- jnz 3f
-
- /*
- * Handle page-zeroing in the idle loop. Called with interrupts
- * disabled and the MP lock released. Inside vm_page_zero_idle
- * we enable interrupts and grab the mplock as required.
- */
- cmpl $0,_do_page_zero_idle
- je 2f
-
- call _vm_page_zero_idle /* internal locking */
- testl %eax, %eax
- jnz idle_loop
-2:
-
- /* enable intrs for a halt */
- movl $0, lapic_tpr /* 1st candidate for an INT */
- call *_hlt_vector /* wait for interrupt */
- cli
- jmp idle_loop
-
- /*
- * Note that interrupts must be enabled while obtaining the MP lock
- * in order to be able to take IPI's while blocked.
- */
-3:
- movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
- sti
- call _get_mplock
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _rel_mplock
- jmp idle_loop
-
-#else /* !SMP */
-
- movl $HIDENAME(tmpstk),%esp
-#if defined(OVERLY_CONSERVATIVE_PTD_MGMT)
-#if defined(SWTCH_OPTIM_STATS)
- incl _swtch_optim_stats
-#endif
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
-#endif
-
- /* update common_tss.tss_esp0 pointer */
- movl %esp, _common_tss + TSS_ESP0
-
- movl $0, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $_common_tssd, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- */
- call _spl0
-
- ALIGN_TEXT
-idle_loop:
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _vm_page_zero_idle
- testl %eax, %eax
- jnz idle_loop
- call *_hlt_vector /* wait for interrupt */
- jmp idle_loop
-
-#endif /* SMP */
-
-CROSSJUMPTARGET(_idle)
-
ENTRY(default_halt)
sti
#ifndef SMP
@@ -264,16 +81,23 @@ ENTRY(default_halt)
ret
/*
+ * cpu_throw()
+ */
+ENTRY(cpu_throw)
+ jmp sw1
+
+/*
* cpu_switch()
*/
ENTRY(cpu_switch)
/* switch to new process. first, save context as needed */
movl _curproc,%ecx
+ movl %ecx,_prevproc
/* if no process to save, don't bother */
testl %ecx,%ecx
- je sw1
+ jz sw1
#ifdef SMP
movb P_ONCPU(%ecx), %al /* save "last" cpu */
@@ -299,7 +123,7 @@ ENTRY(cpu_switch)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
- /* test if debug regisers should be saved */
+ /* test if debug registers should be saved */
movb PCB_FLAGS(%edx),%al
andb $PCB_DBREGS,%al
jz 1f /* no, skip over */
@@ -319,15 +143,12 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
+ /* save sched_lock recursion count */
+ movl _sched_lock+MTX_RECURSE,%eax
+ movl %eax,PCB_SCHEDNEST(%edx)
+
#ifdef SMP
- movl _mp_lock, %eax
/* XXX FIXME: we should be saving the local APIC TPR */
-#ifdef DIAGNOSTIC
- cmpl $FREE_LOCK, %eax /* is it free? */
- je badsw4 /* yes, bad medicine! */
-#endif /* DIAGNOSTIC */
- andl $COUNT_FIELD, %eax /* clear CPU portion */
- movl %eax, PCB_MPNEST(%edx) /* store it */
#endif /* SMP */
#if NNPX > 0
@@ -341,25 +162,33 @@ ENTRY(cpu_switch)
1:
#endif /* NNPX > 0 */
- movl $0,_curproc /* out of process */
-
- /* save is done, now choose a new process or idle */
+ /* save is done, now choose a new process */
sw1:
- cli
#ifdef SMP
/* Stop scheduling if smp_active goes zero and we are not BSP */
cmpl $0,_smp_active
jne 1f
cmpl $0,_cpuid
- CROSSJUMP(je, _idle, jne) /* wind down */
+ je 1f
+
+ movl _idleproc, %eax
+ jmp sw1b
1:
#endif
+ /*
+ * Choose a new process to schedule. chooseproc() returns idleproc
+ * if it cannot find another process to run.
+ */
sw1a:
call _chooseproc /* trash ecx, edx, ret eax*/
- testl %eax,%eax
- CROSSJUMP(je, _idle, jne) /* if no proc, idle */
+
+#ifdef DIAGNOSTIC
+ testl %eax,%eax /* no process? */
+ jz badsw3 /* no, panic */
+#endif
+sw1b:
movl %eax,%ecx
xorl %eax,%eax
@@ -456,9 +285,6 @@ sw1a:
movl %ecx, _curproc /* into next process */
#ifdef SMP
- movl _cpu_lockid, %eax
- orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
- movl %eax, _mp_lock /* load the mp_lock */
/* XXX FIXME: we should be restoring the local APIC TPR */
#endif /* SMP */
@@ -500,7 +326,22 @@ cpu_switch_load_gs:
movl %eax,%dr7
1:
- sti
+ /*
+ * restore sched_lock recursion count and transfer ownership to
+ * new process
+ */
+ movl PCB_SCHEDNEST(%edx),%eax
+ movl %eax,_sched_lock+MTX_RECURSE
+
+ movl _curproc,%eax
+ movl %eax,_sched_lock+MTX_LOCK
+
+#ifdef DIAGNOSTIC
+ pushfl
+ popl %ecx
+ testl $0x200, %ecx /* interrupts enabled? */
+ jnz badsw6 /* that way madness lies */
+#endif
ret
CROSSJUMPTARGET(sw1a)
@@ -517,15 +358,27 @@ badsw2:
call _panic
sw0_2: .asciz "cpu_switch: not SRUN"
+
+badsw3:
+ pushl $sw0_3
+ call _panic
+
+sw0_3: .asciz "cpu_switch: chooseproc returned NULL"
+
#endif
-#if defined(SMP) && defined(DIAGNOSTIC)
-badsw4:
- pushl $sw0_4
+#ifdef DIAGNOSTIC
+badsw5:
+ pushl $sw0_5
+ call _panic
+
+sw0_5: .asciz "cpu_switch: interrupts enabled (again)"
+badsw6:
+ pushl $sw0_6
call _panic
-sw0_4: .asciz "cpu_switch: do not have lock"
-#endif /* SMP && DIAGNOSTIC */
+sw0_6: .asciz "cpu_switch: interrupts enabled"
+#endif
/*
* savectx(pcb)
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 51de1ac..f32dfae 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -49,10 +49,12 @@
#include "opt_trap.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
@@ -76,12 +78,14 @@
#include <machine/cpu.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb.h>
#ifdef SMP
#include <machine/smp.h>
#endif
#include <machine/tss.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#ifdef POWERFAIL_NMI
@@ -96,11 +100,14 @@
#include "isa.h"
#include "npx.h"
+#include <sys/sysctl.h>
+
int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
extern void syscall2 __P((struct trapframe frame));
+extern void ast __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -142,7 +149,7 @@ static char *trap_msg[] = {
};
static __inline int userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks, int have_mplock));
+ u_quad_t oticks, int have_giant));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
@@ -158,18 +165,18 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&panic_on_nmi, 0, "Panic on NMI");
static __inline int
-userret(p, frame, oticks, have_mplock)
+userret(p, frame, oticks, have_giant)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
- int have_mplock;
+ int have_giant;
{
int sig, s;
while ((sig = CURSIG(p)) != 0) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
postsig(sig);
}
@@ -184,31 +191,34 @@ userret(p, frame, oticks, have_mplock)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
- }
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
postsig(sig);
+ }
}
/*
* Charge system time if profiling.
*/
if (p->p_flag & P_PROFIL) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
}
curpriority = p->p_priority;
- return(have_mplock);
+ return(have_giant);
}
/*
@@ -226,13 +236,20 @@ trap(frame)
u_quad_t sticks = 0;
int i = 0, ucode = 0, type, code;
vm_offset_t eva;
+#ifdef POWERFAIL_NMI
+ static int lastalert = 0;
+#endif
- if (!(frame.tf_eflags & PSL_I)) {
+ atomic_add_int(&cnt.v_trap, 1);
+
+ if ((frame.tf_eflags & PSL_I) == 0) {
/*
- * Buggy application or kernel code has disabled interrupts
- * and then trapped. Enabling interrupts now is wrong, but
- * it is better than running with interrupts disabled until
- * they are accidentally enabled later.
+ * Buggy application or kernel code has disabled
+ * interrupts and then trapped. Enabling interrupts
+ * now is wrong, but it is better than running with
+ * interrupts disabled until they are accidentally
+ * enabled later. XXX Consider whether is this still
+ * correct.
*/
type = frame.tf_trapno;
if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
@@ -252,54 +269,27 @@ trap(frame)
eva = 0;
if (frame.tf_trapno == T_PAGEFLT) {
/*
- * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
- * This problem is worked around by using an interrupt
- * gate for the pagefault handler. We are finally ready
- * to read %cr2 and then must reenable interrupts.
- *
- * XXX this should be in the switch statement, but the
- * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
- * flow of control too much for this to be obviously
- * correct.
+ * For some Cyrix CPUs, %cr2 is clobbered by
+ * interrupts. This problem is worked around by using
+ * an interrupt gate for the pagefault handler. We
+ * are finally ready to read %cr2 and then must
+ * reenable interrupts.
*/
eva = rcr2();
enable_intr();
- }
+ }
+
+ mtx_enter(&Giant, MTX_DEF);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
restart:
#endif
+
type = frame.tf_trapno;
code = frame.tf_err;
- if (in_vm86call) {
- if (frame.tf_eflags & PSL_VM &&
- (type == T_PROTFLT || type == T_STKFLT)) {
- i = vm86_emulate((struct vm86frame *)&frame);
- if (i != 0)
- /*
- * returns to original process
- */
- vm86_trap((struct vm86frame *)&frame);
- return;
- }
- switch (type) {
- /*
- * these traps want either a process context, or
- * assume a normal userspace trap.
- */
- case T_PROTFLT:
- case T_SEGNPFLT:
- trap_fatal(&frame, eva);
- return;
- case T_TRCTRAP:
- type = T_BPTFLT; /* kernel breakpoint */
- /* FALL THROUGH */
- }
- goto kernel_trap; /* normal kernel trap handling */
- }
-
- if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
+ if ((ISPL(frame.tf_cs) == SEL_UPL) ||
+ ((frame.tf_eflags & PSL_VM) && !in_vm86call)) {
/* user trap */
sticks = p->p_sticks;
@@ -322,16 +312,6 @@ restart:
i = SIGFPE;
break;
- case T_ASTFLT: /* Allow process switch */
- astoff();
- cnt.v_soft++;
- if (p->p_flag & P_OWEUPC) {
- p->p_flag &= ~P_OWEUPC;
- addupc_task(p, p->p_stats->p_prof.pr_addr,
- p->p_stats->p_prof.pr_ticks);
- }
- goto out;
-
/*
* The following two traps can happen in
* vm86 mode, and, if so, we want to handle
@@ -342,7 +322,7 @@ restart:
if (frame.tf_eflags & PSL_VM) {
i = vm86_emulate((struct vm86frame *)&frame);
if (i == 0)
- goto out;
+ goto user;
break;
}
/* FALL THROUGH */
@@ -357,14 +337,20 @@ restart:
case T_PAGEFLT: /* page fault */
i = trap_pfault(&frame, TRUE, eva);
- if (i == -1)
- return;
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if (i == -2)
+ if (i == -2) {
+ /*
+ * f00f hack workaround has triggered, treat
+ * as illegal instruction not page fault.
+ */
+ frame.tf_trapno = T_PRIVINFLT;
goto restart;
+ }
#endif
- if (i == 0)
+ if (i == -1)
goto out;
+ if (i == 0)
+ goto user;
ucode = T_PAGEFLT;
break;
@@ -377,7 +363,15 @@ restart:
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
- goto handle_powerfail;
+#ifndef TIMER_FREQ
+# define TIMER_FREQ 1193182
+#endif
+ if (time_second - lastalert > 10) {
+ log(LOG_WARNING, "NMI: power fail\n");
+ sysbeep(TIMER_FREQ/880, hz);
+ lastalert = time_second;
+ }
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -391,7 +385,7 @@ restart:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi)
panic("NMI indicates hardware failure");
break;
@@ -410,9 +404,9 @@ restart:
case T_DNA:
#if NNPX > 0
- /* if a transparent fault (due to context switch "late") */
+ /* transparent fault (due to context switch "late") */
if (npxdna())
- return;
+ goto out;
#endif
if (!pmath_emulate) {
i = SIGFPE;
@@ -422,7 +416,7 @@ restart:
i = (*pmath_emulate)(&frame);
if (i == 0) {
if (!(frame.tf_eflags & PSL_T))
- return;
+ goto out;
frame.tf_eflags &= ~PSL_T;
i = SIGTRAP;
}
@@ -435,13 +429,12 @@ restart:
break;
}
} else {
-kernel_trap:
/* kernel trap */
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(&frame, FALSE, eva);
- return;
+ goto out;
case T_DNA:
#if NNPX > 0
@@ -451,31 +444,35 @@ kernel_trap:
* registered such use.
*/
if (npxdna())
- return;
+ goto out;
#endif
break;
- case T_PROTFLT: /* general protection fault */
- case T_SEGNPFLT: /* segment not present fault */
/*
- * Invalid segment selectors and out of bounds
- * %eip's and %esp's can be set up in user mode.
- * This causes a fault in kernel mode when the
- * kernel tries to return to user mode. We want
- * to get this fault so that we can fix the
- * problem here and not have to check all the
- * selectors and pointers when the user changes
- * them.
+ * The following two traps can happen in
+ * vm86 mode, and, if so, we want to handle
+ * them specially.
*/
-#define MAYBE_DORETI_FAULT(where, whereto) \
- do { \
- if (frame.tf_eip == (int)where) { \
- frame.tf_eip = (int)whereto; \
- return; \
- } \
- } while (0)
-
- if (intr_nesting_level == 0) {
+ case T_PROTFLT: /* general protection fault */
+ case T_STKFLT: /* stack fault */
+ if (frame.tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)&frame);
+ if (i != 0)
+ /*
+ * returns to original process
+ */
+ vm86_trap((struct vm86frame *)&frame);
+ goto out;
+ }
+ /* FALL THROUGH */
+
+ case T_SEGNPFLT: /* segment not present fault */
+ if (in_vm86call)
+ break;
+
+ if (intr_nesting_level != 0)
+ break;
+
/*
* Invalid %fs's and %gs's can be created using
* procfs or PT_SETREGS or by invalidating the
@@ -488,20 +485,38 @@ kernel_trap:
if (frame.tf_eip == (int)cpu_switch_load_gs) {
curpcb->pcb_gs = 0;
psignal(p, SIGBUS);
- return;
+ goto out;
+ }
+
+ /*
+ * Invalid segment selectors and out of bounds
+ * %eip's and %esp's can be set up in user mode.
+ * This causes a fault in kernel mode when the
+ * kernel tries to return to user mode. We want
+ * to get this fault so that we can fix the
+ * problem here and not have to check all the
+ * selectors and pointers when the user changes
+ * them.
+ */
+ if (frame.tf_eip == (int)doreti_iret) {
+ frame.tf_eip = (int)doreti_iret_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_ds) {
+ frame.tf_eip = (int)doreti_popl_ds_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_es) {
+ frame.tf_eip = (int)doreti_popl_es_fault;
+ goto out;
}
- MAYBE_DORETI_FAULT(doreti_iret,
- doreti_iret_fault);
- MAYBE_DORETI_FAULT(doreti_popl_ds,
- doreti_popl_ds_fault);
- MAYBE_DORETI_FAULT(doreti_popl_es,
- doreti_popl_es_fault);
- MAYBE_DORETI_FAULT(doreti_popl_fs,
- doreti_popl_fs_fault);
+ if (frame.tf_eip == (int)doreti_popl_fs) {
+ frame.tf_eip = (int)doreti_popl_fs_fault;
+ goto out;
+ }
if (curpcb && curpcb->pcb_onfault) {
frame.tf_eip = (int)curpcb->pcb_onfault;
- return;
- }
+ goto out;
}
break;
@@ -517,7 +532,7 @@ kernel_trap:
*/
if (frame.tf_eflags & PSL_NT) {
frame.tf_eflags &= ~PSL_NT;
- return;
+ goto out;
}
break;
@@ -529,7 +544,7 @@ kernel_trap:
* silently until the syscall handler has
* saved the flags.
*/
- return;
+ goto out;
}
if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
/*
@@ -537,7 +552,7 @@ kernel_trap:
* flags. Stop single stepping it.
*/
frame.tf_eflags &= ~PSL_T;
- return;
+ goto out;
}
/*
* Ignore debug register trace traps due to
@@ -549,13 +564,13 @@ kernel_trap:
* in kernel space because that is useful when
* debugging the kernel.
*/
- if (user_dbreg_trap()) {
+ if (user_dbreg_trap() && !in_vm86call) {
/*
* Reset breakpoint bits because the
* processor doesn't
*/
load_dr6(rdr6() & 0xfffffff0);
- return;
+ goto out;
}
/*
* Fall through (TRCTRAP kernel mode, kernel address)
@@ -567,28 +582,19 @@ kernel_trap:
*/
#ifdef DDB
if (kdb_trap (type, 0, &frame))
- return;
+ goto out;
#endif
break;
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
-#ifndef TIMER_FREQ
-# define TIMER_FREQ 1193182
-#endif
- handle_powerfail:
- {
- static unsigned lastalert = 0;
-
- if(time_second - lastalert > 10)
- {
+ if (time_second - lastalert > 10) {
log(LOG_WARNING, "NMI: power fail\n");
sysbeep(TIMER_FREQ/880, hz);
lastalert = time_second;
- }
- return;
}
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -602,16 +608,16 @@ kernel_trap:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi == 0)
- return;
+ goto out;
/* FALL THROUGH */
#endif /* POWERFAIL_NMI */
#endif /* NISA > 0 */
}
trap_fatal(&frame, eva);
- return;
+ goto out;
}
/* Translate fault for emulators (e.g. Linux) */
@@ -630,8 +636,10 @@ kernel_trap:
}
#endif
-out:
+user:
userret(p, &frame, sticks, 1);
+out:
+ mtx_exit(&Giant, MTX_DEF);
}
#ifdef notyet
@@ -769,10 +777,8 @@ trap_pfault(frame, usermode, eva)
* fault.
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) {
- frame->tf_trapno = T_PRIVINFLT;
+ if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
return -2;
- }
#endif
if (usermode)
goto nogo;
@@ -869,8 +875,7 @@ trap_fatal(frame, eva)
frame->tf_eflags & PSL_VM ? "vm86" :
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -917,26 +922,6 @@ trap_fatal(frame, eva)
} else {
printf("Idle\n");
}
- printf("interrupt mask = ");
- if ((cpl & net_imask) == net_imask)
- printf("net ");
- if ((cpl & tty_imask) == tty_imask)
- printf("tty ");
- if ((cpl & bio_imask) == bio_imask)
- printf("bio ");
- if ((cpl & cam_imask) == cam_imask)
- printf("cam ");
- if (cpl == 0)
- printf("none");
-#ifdef SMP
-/**
- * XXX FIXME:
- * we probably SHOULD have stopped the other CPUs before now!
- * another CPU COULD have been touching cpl at this moment...
- */
- printf(" <- SMP: XXX");
-#endif
- printf("\n");
#ifdef KDB
if (kdb_trap(&psl))
@@ -973,8 +958,7 @@ dblfault_handler()
printf("esp = 0x%x\n", common_tss.tss_esp);
printf("ebp = 0x%x\n", common_tss.tss_ebp);
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -1048,12 +1032,14 @@ syscall2(frame)
int error;
int narg;
int args[8];
- int have_mplock = 0;
+ int have_giant = 0;
u_int code;
+ atomic_add_int(&cnt.v_syscall, 1);
+
#ifdef DIAGNOSTIC
if (ISPL(frame.tf_cs) != SEL_UPL) {
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
panic("syscall");
/* NOT REACHED */
}
@@ -1075,9 +1061,9 @@ syscall2(frame)
/*
* The prep code is not MP aware.
*/
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
- rel_mplock();
+ mtx_exit(&Giant, MTX_DEF);
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@@ -1114,8 +1100,8 @@ syscall2(frame)
*/
if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
ktrsyscall(p->p_tracep, code, narg, args);
@@ -1129,15 +1115,15 @@ syscall2(frame)
* we are ktracing
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsyscall(p->p_tracep, code, narg, args);
}
@@ -1192,9 +1178,9 @@ bad:
* Traced syscall. trapsignal() is not MP aware.
*/
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
@@ -1203,13 +1189,13 @@ bad:
/*
* Handle reschedule and other end-of-syscall issues
*/
- have_mplock = userret(p, &frame, sticks, have_mplock);
+ have_giant = userret(p, &frame, sticks, have_giant);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
}
@@ -1225,27 +1211,66 @@ bad:
/*
* Release the MP lock if we had to get it
*/
- if (have_mplock)
- rel_mplock();
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
+
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
+
+void
+ast(frame)
+ struct trapframe frame;
+{
+ struct proc *p = CURPROC;
+ u_quad_t sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
+ astoff();
+ atomic_add_int(&cnt.v_soft, 1);
+ if (p->p_flag & P_OWEUPC) {
+ mtx_enter(&Giant, MTX_DEF);
+ p->p_flag &= ~P_OWEUPC;
+ addupc_task(p, p->p_stats->p_prof.pr_addr,
+ p->p_stats->p_prof.pr_ticks);
+}
+ if (userret(p, &frame, sticks, mtx_owned(&Giant)) != 0)
+ mtx_exit(&Giant, MTX_DEF);
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode. MP lock is held on entry and should be
- * held on return.
+ * directly into user mode. Giant is not held on entry, and must not
+ * be held on return.
*/
void
fork_return(p, frame)
struct proc *p;
struct trapframe frame;
{
+ int have_giant;
+
frame.tf_eax = 0; /* Child returns zero */
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0, 1);
+ have_giant = userret(p, &frame, 0, mtx_owned(&Giant));
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
+ }
#endif
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
}
diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c
index 15044ab..724f3c2 100644
--- a/sys/amd64/amd64/tsc.c
+++ b/sys/amd64/amd64/tsc.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index cfb6cee..831ab3b 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -57,12 +57,14 @@
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <machine/clock.h>
#include <machine/cpu.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#ifdef SMP
#include <machine/smp.h>
#endif
@@ -177,9 +179,8 @@ cpu_fork(p1, p2, flags)
* pcb2->pcb_onfault: cloned above (always NULL here?).
*/
-#ifdef SMP
- pcb2->pcb_mpnest = 1;
-#endif
+ pcb2->pcb_schednest = 0;
+
/*
* XXX don't copy the i/o pages. this should probably be fixed.
*/
@@ -256,8 +257,11 @@ cpu_exit(p)
reset_dbregs();
pcb->pcb_flags &= ~PCB_DBREGS;
}
+ mtx_enter(&sched_lock, MTX_SPIN);
+ mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH);
+ mtx_assert(&Giant, MA_NOTOWNED);
cnt.v_swtch++;
- cpu_switch(p);
+ cpu_switch();
panic("cpu_exit");
}
@@ -406,17 +410,10 @@ vunmapbuf(bp)
static void
cpu_reset_proxy()
{
- u_int saved_mp_lock;
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
- ; /* Wait for other cpu to disable interupts */
- saved_mp_lock = mp_lock;
- mp_lock = 1;
- printf("cpu_reset_proxy: Grabbed mp lock for BSP\n");
- cpu_reset_proxy_active = 3;
- while (cpu_reset_proxy_active == 3)
- ; /* Wait for other cpu to enable interrupts */
+ ; /* Wait for other cpu to see that we've started */
stop_cpus((1<<cpu_reset_proxyid));
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
@@ -453,6 +450,7 @@ cpu_reset()
cpu_reset_proxyid = cpuid;
cpustop_restartfunc = cpu_reset_proxy;
+ cpu_reset_proxy_active = 0;
printf("cpu_reset: Restarting BSP\n");
started_cpus = (1<<0); /* Restart CPU #0 */
@@ -461,17 +459,9 @@ cpu_reset()
cnt++; /* Wait for BSP to announce restart */
if (cpu_reset_proxy_active == 0)
printf("cpu_reset: Failed to restart BSP\n");
- __asm __volatile("cli" : : : "memory");
+ enable_intr();
cpu_reset_proxy_active = 2;
- cnt = 0;
- while (cpu_reset_proxy_active == 2 && cnt < 10000000)
- cnt++; /* Do nothing */
- if (cpu_reset_proxy_active == 2) {
- printf("cpu_reset: BSP did not grab mp lock\n");
- cpu_reset_real(); /* XXX: Bogus ? */
- }
- cpu_reset_proxy_active = 4;
- __asm __volatile("sti" : : : "memory");
+
while (1);
/* NOTREACHED */
}
@@ -553,7 +543,7 @@ vm_page_zero_idle()
static int free_rover;
static int zero_state;
vm_page_t m;
- int s;
+ int s, intrsave;
/*
* Attempt to maintain approximately 1/2 of our free pages in a
@@ -569,11 +559,10 @@ vm_page_zero_idle()
if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
return(0);
-#ifdef SMP
- if (try_mplock()) {
-#endif
+ if (mtx_try_enter(&Giant, MTX_DEF)) {
s = splvm();
- __asm __volatile("sti" : : : "memory");
+ intrsave = save_intr();
+ enable_intr();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
if (m != NULL && (m->flags & PG_ZERO) == 0) {
@@ -595,14 +584,10 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- __asm __volatile("cli" : : : "memory");
-#ifdef SMP
- rel_mplock();
-#endif
+ restore_intr(intrsave);
+ mtx_exit(&Giant, MTX_DEF);
return (1);
-#ifdef SMP
}
-#endif
/*
* We have to enable interrupts for a moment if the try_mplock fails
* in order to potentially take an IPI. XXX this should be in
OpenPOWER on IntegriCloud