summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjasone <jasone@FreeBSD.org>2000-09-07 01:33:02 +0000
committerjasone <jasone@FreeBSD.org>2000-09-07 01:33:02 +0000
commit769e0f974d8929599ba599ac496510fffc90ff34 (patch)
tree9387522900085835de81e7830e570ef3f6b3ea80
parentacf1927de02afda4855ec278b1128fd9446405ea (diff)
downloadFreeBSD-src-769e0f974d8929599ba599ac496510fffc90ff34.zip
FreeBSD-src-769e0f974d8929599ba599ac496510fffc90ff34.tar.gz
Major update to the way synchronization is done in the kernel. Highlights
include: * Mutual exclusion is used instead of spl*(). See mutex(9). (Note: The alpha port is still in transition and currently uses both.) * Per-CPU idle processes. * Interrupts are run in their own separate kernel threads and can be preempted (i386 only). Partially contributed by: BSDi (BSD/OS) Submissions by (at least): cp, dfr, dillon, grog, jake, jhb, sheldonh
-rw-r--r--bin/ps/print.c10
-rw-r--r--share/man/man9/Makefile4
-rw-r--r--share/man/man9/mutex.9222
-rw-r--r--sys/alpha/alpha/clock.c204
-rw-r--r--sys/alpha/alpha/genassym.c19
-rw-r--r--sys/alpha/alpha/interrupt.c41
-rw-r--r--sys/alpha/alpha/ipl_funcs.c12
-rw-r--r--sys/alpha/alpha/locore.s50
-rw-r--r--sys/alpha/alpha/machdep.c67
-rw-r--r--sys/alpha/alpha/mem.c3
-rw-r--r--sys/alpha/alpha/mp_machdep.c1115
-rw-r--r--sys/alpha/alpha/pmap.c126
-rw-r--r--sys/alpha/alpha/prom.c1
-rw-r--r--sys/alpha/alpha/support.s34
-rw-r--r--sys/alpha/alpha/swtch.s45
-rw-r--r--sys/alpha/alpha/synch_machdep.c529
-rw-r--r--sys/alpha/alpha/trap.c43
-rw-r--r--sys/alpha/alpha/vm_machdep.c11
-rw-r--r--sys/alpha/include/asm.h6
-rw-r--r--sys/alpha/include/cpu.h6
-rw-r--r--sys/alpha/include/cpufunc.h28
-rw-r--r--sys/alpha/include/globaldata.h79
-rw-r--r--sys/alpha/include/globals.h63
-rw-r--r--sys/alpha/include/ipl.h15
-rw-r--r--sys/alpha/include/lock.h32
-rw-r--r--sys/alpha/include/mutex.h563
-rw-r--r--sys/alpha/include/param.h4
-rw-r--r--sys/alpha/include/pcb.h9
-rw-r--r--sys/alpha/include/pcpu.h79
-rw-r--r--sys/alpha/include/pmap.h8
-rw-r--r--sys/alpha/include/proc.h8
-rw-r--r--sys/alpha/include/rpb.h54
-rw-r--r--sys/alpha/include/smp.h53
-rw-r--r--sys/amd64/amd64/amd64-gdbstub.c10
-rw-r--r--sys/amd64/amd64/apic_vector.S132
-rw-r--r--sys/amd64/amd64/autoconf.c8
-rw-r--r--sys/amd64/amd64/cpu_switch.S269
-rw-r--r--sys/amd64/amd64/exception.S41
-rw-r--r--sys/amd64/amd64/exception.s41
-rw-r--r--sys/amd64/amd64/fpu.c18
-rw-r--r--sys/amd64/amd64/genassym.c27
-rw-r--r--sys/amd64/amd64/identcpu.c3
-rw-r--r--sys/amd64/amd64/initcpu.c6
-rw-r--r--sys/amd64/amd64/legacy.c32
-rw-r--r--sys/amd64/amd64/locore.S3
-rw-r--r--sys/amd64/amd64/locore.s3
-rw-r--r--sys/amd64/amd64/machdep.c37
-rw-r--r--sys/amd64/amd64/mp_machdep.c88
-rw-r--r--sys/amd64/amd64/mpboot.S36
-rw-r--r--sys/amd64/amd64/mptable.c88
-rw-r--r--sys/amd64/amd64/nexus.c32
-rw-r--r--sys/amd64/amd64/pmap.c2
-rw-r--r--sys/amd64/amd64/swtch.s269
-rw-r--r--sys/amd64/amd64/trap.c391
-rw-r--r--sys/amd64/amd64/tsc.c155
-rw-r--r--sys/amd64/amd64/vm_machdep.c51
-rw-r--r--sys/amd64/include/cpu.h12
-rw-r--r--sys/amd64/include/cpufunc.h21
-rw-r--r--sys/amd64/include/mptable.h88
-rw-r--r--sys/amd64/include/mutex.h786
-rw-r--r--sys/amd64/include/pcb.h6
-rw-r--r--sys/amd64/include/pcpu.h33
-rw-r--r--sys/amd64/include/smp.h38
-rw-r--r--sys/amd64/isa/atpic_vector.S92
-rw-r--r--sys/amd64/isa/clock.c155
-rw-r--r--sys/amd64/isa/icu_ipl.S57
-rw-r--r--sys/amd64/isa/icu_ipl.s57
-rw-r--r--sys/amd64/isa/icu_vector.S92
-rw-r--r--sys/amd64/isa/icu_vector.s92
-rw-r--r--sys/amd64/isa/intr_machdep.c524
-rw-r--r--sys/amd64/isa/intr_machdep.h50
-rw-r--r--sys/amd64/isa/ithread.c353
-rw-r--r--sys/amd64/isa/nmi.c524
-rw-r--r--sys/amd64/isa/npx.c18
-rw-r--r--sys/amd64/isa/vector.S9
-rw-r--r--sys/amd64/isa/vector.s9
-rw-r--r--sys/conf/files3
-rw-r--r--sys/conf/files.alpha2
-rw-r--r--sys/conf/files.i3863
-rw-r--r--sys/conf/files.pc981
-rw-r--r--sys/conf/options9
-rw-r--r--sys/conf/options.alpha4
-rw-r--r--sys/dev/ata/ata-all.c2
-rw-r--r--sys/dev/cy/cy.c197
-rw-r--r--sys/dev/cy/cy_isa.c197
-rw-r--r--sys/dev/sio/sio.c155
-rw-r--r--sys/fs/cd9660/cd9660_util.c1
-rw-r--r--sys/i386/i386/apic_vector.s132
-rw-r--r--sys/i386/i386/autoconf.c8
-rw-r--r--sys/i386/i386/exception.s41
-rw-r--r--sys/i386/i386/genassym.c27
-rw-r--r--sys/i386/i386/globals.s38
-rw-r--r--sys/i386/i386/i386-gdbstub.c10
-rw-r--r--sys/i386/i386/identcpu.c3
-rw-r--r--sys/i386/i386/initcpu.c6
-rw-r--r--sys/i386/i386/legacy.c32
-rw-r--r--sys/i386/i386/locore.s3
-rw-r--r--sys/i386/i386/machdep.c37
-rw-r--r--sys/i386/i386/mp_machdep.c88
-rw-r--r--sys/i386/i386/mpapic.c3
-rw-r--r--sys/i386/i386/mpboot.s36
-rw-r--r--sys/i386/i386/mplock.s343
-rw-r--r--sys/i386/i386/mptable.c88
-rw-r--r--sys/i386/i386/nexus.c32
-rw-r--r--sys/i386/i386/perfmon.c15
-rw-r--r--sys/i386/i386/pmap.c2
-rw-r--r--sys/i386/i386/swtch.s269
-rw-r--r--sys/i386/i386/synch_machdep.c559
-rw-r--r--sys/i386/i386/trap.c391
-rw-r--r--sys/i386/i386/tsc.c155
-rw-r--r--sys/i386/i386/vm86bios.s10
-rw-r--r--sys/i386/i386/vm_machdep.c51
-rw-r--r--sys/i386/include/asnames.h24
-rw-r--r--sys/i386/include/cpu.h12
-rw-r--r--sys/i386/include/cpufunc.h21
-rw-r--r--sys/i386/include/globaldata.h33
-rw-r--r--sys/i386/include/globals.h42
-rw-r--r--sys/i386/include/ipl.h17
-rw-r--r--sys/i386/include/lock.h45
-rw-r--r--sys/i386/include/mptable.h88
-rw-r--r--sys/i386/include/mutex.h786
-rw-r--r--sys/i386/include/pcb.h6
-rw-r--r--sys/i386/include/pcpu.h33
-rw-r--r--sys/i386/include/smp.h38
-rw-r--r--sys/i386/include/smptests.h5
-rw-r--r--sys/i386/isa/apic_ipl.s74
-rw-r--r--sys/i386/isa/apic_vector.s132
-rw-r--r--sys/i386/isa/atpic_vector.s92
-rw-r--r--sys/i386/isa/bs/bsif.h13
-rw-r--r--sys/i386/isa/clock.c155
-rw-r--r--sys/i386/isa/cy.c197
-rw-r--r--sys/i386/isa/icu_ipl.s57
-rw-r--r--sys/i386/isa/icu_vector.s92
-rw-r--r--sys/i386/isa/intr_machdep.c524
-rw-r--r--sys/i386/isa/intr_machdep.h50
-rw-r--r--sys/i386/isa/ipl.s149
-rw-r--r--sys/i386/isa/ipl_funcs.c267
-rw-r--r--sys/i386/isa/ithread.c353
-rw-r--r--sys/i386/isa/loran.c2
-rw-r--r--sys/i386/isa/nmi.c524
-rw-r--r--sys/i386/isa/npx.c18
-rw-r--r--sys/i386/isa/vector.s9
-rw-r--r--sys/isa/atrtc.c155
-rw-r--r--sys/isa/sio.c155
-rw-r--r--sys/isofs/cd9660/cd9660_util.c1
-rw-r--r--sys/kern/init_main.c33
-rw-r--r--sys/kern/kern_clock.c29
-rw-r--r--sys/kern/kern_exit.c1
-rw-r--r--sys/kern/kern_fork.c80
-rw-r--r--sys/kern/kern_idle.c108
-rw-r--r--sys/kern/kern_kthread.c17
-rw-r--r--sys/kern/kern_mutex.c799
-rw-r--r--sys/kern/kern_proc.c1
-rw-r--r--sys/kern/kern_resource.c2
-rw-r--r--sys/kern/kern_shutdown.c9
-rw-r--r--sys/kern/kern_sig.c3
-rw-r--r--sys/kern/kern_subr.c7
-rw-r--r--sys/kern/kern_switch.c100
-rw-r--r--sys/kern/kern_synch.c115
-rw-r--r--sys/kern/kern_tc.c9
-rw-r--r--sys/kern/kern_threads.c5
-rw-r--r--sys/kern/subr_prf.c3
-rw-r--r--sys/kern/subr_prof.c4
-rw-r--r--sys/kern/subr_smp.c88
-rw-r--r--sys/kern/subr_trap.c391
-rw-r--r--sys/kern/subr_turnstile.c799
-rw-r--r--sys/kern/subr_witness.c799
-rw-r--r--sys/kern/tty.c3
-rw-r--r--sys/kern/vfs_bio.c20
-rw-r--r--sys/kern/vfs_export.c4
-rw-r--r--sys/kern/vfs_subr.c4
-rw-r--r--sys/modules/if_ppp/Makefile4
-rw-r--r--sys/modules/netgraph/tty/Makefile3
-rw-r--r--sys/net/ppp_tty.c23
-rw-r--r--sys/netgraph/ng_tty.c14
-rw-r--r--sys/nfs/nfs_srvcache.c1
-rw-r--r--sys/nfsserver/nfs_srvcache.c1
-rw-r--r--sys/pci/pci_compat.c4
-rw-r--r--sys/powerpc/aim/vm_machdep.c11
-rw-r--r--sys/powerpc/include/globaldata.h79
-rw-r--r--sys/powerpc/include/globals.h63
-rw-r--r--sys/powerpc/include/mutex.h563
-rw-r--r--sys/powerpc/include/pcpu.h79
-rw-r--r--sys/powerpc/powerpc/genassym.c19
-rw-r--r--sys/powerpc/powerpc/vm_machdep.c11
-rw-r--r--sys/sys/buf.h2
-rw-r--r--sys/sys/bus.h15
-rw-r--r--sys/sys/kernel.h4
-rw-r--r--sys/sys/kthread.h2
-rw-r--r--sys/sys/proc.h92
-rw-r--r--sys/sys/rtprio.h33
-rw-r--r--sys/sys/signalvar.h8
-rw-r--r--sys/sys/smp.h38
-rw-r--r--sys/sys/unistd.h9
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c1
-rw-r--r--sys/ufs/ffs/ffs_softdep.c1
-rw-r--r--sys/ufs/ufs/ufs_vfsops.c2
-rw-r--r--sys/vm/vm_glue.c11
-rw-r--r--sys/vm/vm_meter.c5
-rw-r--r--sys/vm/vm_pageout.c8
-rw-r--r--usr.bin/top/machine.c6
201 files changed, 14118 insertions, 5850 deletions
diff --git a/bin/ps/print.c b/bin/ps/print.c
index a49c8fa..7f7898f 100644
--- a/bin/ps/print.c
+++ b/bin/ps/print.c
@@ -185,7 +185,7 @@ state(k, ve)
break;
case SSLEEP:
- if (flag & P_SINTR) /* interuptable (long) */
+ if (flag & P_SINTR) /* interruptable (long) */
*cp = p->p_slptime >= MAXSLP ? 'I' : 'S';
else
*cp = 'D';
@@ -196,6 +196,14 @@ state(k, ve)
*cp = 'R';
break;
+ case SWAIT:
+ *cp = 'W';
+ break;
+
+ case SMTX:
+ *cp = 'M';
+ break;
+
case SZOMB:
*cp = 'Z';
break;
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index d378f7c..9a1ceef 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -14,8 +14,8 @@ MAN9= CONDSPLASSERT.9 KASSERT.9 MD5.9 SPLASSERT.9 \
at_exit.9 at_fork.9 bios.9 boot.9 buf.9 cd.9 copy.9 \
devfs_add_devswf.9 devfs_link.9 devfs_remove_dev.9 devstat.9 \
devtoname.9 fetch.9 ifnet.9 inittodr.9 intro.9 kernacc.9 malloc.9 \
- make_dev.9 microseq.9 mi_switch.9 namei.9 panic.9 physio.9 posix4.9 \
- psignal.9 resettodr.9 rtalloc.9 rtentry.9 sleep.9 spl.9 \
+ make_dev.9 microseq.9 mi_switch.9 mutex.9 namei.9 panic.9 physio.9 \
+ posix4.9 psignal.9 resettodr.9 rtalloc.9 rtentry.9 sleep.9 spl.9 \
store.9 style.9 suser.9 time.9 timeout.9 uio.9 \
vget.9 vnode.9 vput.9 vref.9 vrele.9 vslock.9 \
microtime.9 microuptime.9 tvtohz.9
diff --git a/share/man/man9/mutex.9 b/share/man/man9/mutex.9
new file mode 100644
index 0000000..ac1b78f
--- /dev/null
+++ b/share/man/man9/mutex.9
@@ -0,0 +1,222 @@
+.\"
+.\" Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. Berkeley Software Design Inc's name may not be used to endorse or
+.\" promote products derived from this software without specific prior
+.\" written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" from BSDI $Id: mutex.4,v 1.1.2.3 1998/04/27 22:53:13 ewv Exp $
+.\" $FreeBSD$
+.\"
+.Dd April 20, 1998
+.Dt MUTEX 9
+.Sh NAME
+.Nm mutex,
+.Nm mtx_enter,
+.Nm mtx_exit
+.Nd kernel synchronization primitives
+.Sh SYNOPSIS
+.Ft void
+.Fn mtx_enter "mtx_t *mutex" "int flags"
+.Ft void
+.Fn mtx_exit "mtx_t *mutex" "int flags"
+.Ft int
+.Fn mtx_owned "mtx_t *mutex"
+.Sh DESCRIPTION
+The
+.Fn mtx_enter
+function acquires a mutual exclusion lock
+on behalf of the currently running kernel thread.
+If another kernel thread is holding the mutex,
+the caller will be disconnected from the CPU
+until the mutex is available
+(i.e. it will sleep),
+spin wait for the mutex,
+or possibly a combination of both.
+.Pp
+It is possible for the same thread to recursively acquire a mutex
+with no ill effects;
+if recursion on a given mutex can be avoided,
+faster and smaller code will usually be generated.
+.Pp
+The
+.Fn mtx_exit
+function releases a mutual exclusion lock;
+if a higher priority thread is waiting for the mutex,
+the releasing thread may be disconnected
+to allow the higher priority thread to acquire the mutex and run.
+.Pp
+The type of a mutex is not an attribute of the mutex,
+but instead a function of the
+.Fa flags
+argument passed to
+.Fn mtx_enter
+and
+.Fn mtx_exit ;
+this allows code to be generated for the specific mutex type
+at compile time
+and avoids wasting run time on the determination of lock features.
+This does place on the programmer,
+the burden of using matching forms of the
+.Fn mtx_enter
+and
+.Fn mtx_exit
+functions for a given mutex.
+It is an error to acquire a mutex in one mode (e.g. spin)
+and release it in another (e.g. default).
+It is also an error to get the lock in one mode
+and allow another thread to attempt to get the lock in another mode.
+A good general rule is to always use a given mutex in one mode only.
+.Pp
+The
+.Fn mtx_owned
+function returns a non-zero value
+if the mutex pointed to is already held by the current thread.
+.Ss The default Mutex Type
+Most kernel code should use the default lock type;
+the default lock type will allow the thread
+to be disconnected from the CPU
+if it cannot get the lock.
+The machine dependent implementation
+may treat the lock as a short term spin lock
+under some circumstances.
+However, it is always safe to use these forms of locks
+in an interrupt thread
+without fear of deadlock
+against an interrupted thread on the same CPU.
+.Ss The spin Mutex Type
+A spin mutex will not relinquish the CPU
+when it cannot immediately get the requested lock,
+but will loop, waiting for the mutex to be released by another CPU.
+This could result in deadlock
+if a thread interrupted the thread which held a mutex
+and then tried to acquire the mutex;
+for this reason spin locks will disable all interrupts
+(on the local CPU only)
+by default.
+.Pp
+Spin locks are fairly specialized locks
+that are intended to be held for very short periods of time;
+their primary purpose is to protect portions of the code
+that implement default (i.e. sleep) locks.
+.Ss Flags
+The flags passed to the
+.Fn mtx_enter
+and
+.Fn mtx_exit
+functions determine what type of mutex is being used
+and also provide various options
+used to generate more efficient code under certain circumstances.
+.Pp
+Both lock types (default and spin)
+can be acquired recursively by the same thread.
+This behavior can be changed with flags.
+.Pp
+The type of the mutex must always be specified:
+.Bl -tag -width MTX_NORECURSE
+.It Dv MTX_DEF
+Default lock type;
+will always allow the current thread to be suspended
+to avoid deadlock conditions against interrupt threads.
+The machine dependent implementation of this lock type
+may spin for a while before suspending the current thread.
+Most locks should be of this type.
+.It Dv MTX_SPIN
+Spin lock;
+will never relinquish the CPU.
+By default all interrupts are disabled on the local CPU
+while any spin lock is held.
+.El
+.Pp
+Options that modify mutex behavior:
+.Bl -tag -width MTX_NORECURSE
+.It Dv MTX_NORECURSE
+If it is known, absolutely,
+that the mutex will not be recursively acquired at this invocation
+then this flag should be specified.
+.Pp
+If the lock is already held by the current thread,
+then a kernel with
+.Dv SMP_DEBUG
+defined will panic;
+without debugging enabled,
+the thread may deadlock against itself
+or leave the mutex in a corrupted state.
+.Pp
+This flag prevents generation of additional inline code
+to deal with recursive lock acquisitions
+and should be specified whenever possible
+in the interests of efficiency.
+Not specifying this flag will only cause the generated code
+to be a little larger than necessary;
+it will still operate correctly.
+.It Dv MTX_RLIKELY
+This provides a hint that it is likely that this mutex
+will be held recursively at this invocation.
+The actual optimization used is machine dependent;
+generally, this will inline code to handle recursion
+where a function call would otherwise be needed.
+.Pp
+This is a hint only;
+leaving it out or specifying it inappropriately
+will not cause any great harm other than
+possibly generating less efficient code.
+.It Dv MTX_TOPHALF
+This option applies to spin locks only.
+It indicates that the mutex is never acquired
+from an interrupt thread,
+so it is safe to leave interrupts enabled while holding the lock.
+Since an interrupt may occur while holding the lock,
+this may be detrimental to other processors
+spin waiting for the lock.
+Do not forget to include this option when the lock is released.
+.Pp
+This option should not be used in new code;
+it is documented here for completeness only.
+.It Dv MTX_FIRST
+This option applies to spin locks only.
+It indicates this is the first spin lock acquired by the thread.
+No other spin locks may be held,
+and the requested lock also may not be currently held.
+Do not forget to include this option when the lock is released.
+.It Dv MTX_NOSWITCH
+When releasing a mutex,
+this flag prevents a thread switch that might occur
+if another higher priority thread was waiting for the mutex.
+This may cause priority inversion and should be used carefully.
+.Pp
+This flag is used internally by the lock code.
+It should not be used in general kernel code
+and is documented here for completeness only.
+.It Dv MTX_NOSPIN
+For default locks,
+this hint will prevent spinning before relinquishing the CPU.
+This should be specified when it is known
+that the lock will usually remain unavailable for some time
+when it is not immediately available
+(i.e.: coarse grained locks protecting large subsystems).
+.El
+.Sh HISTORY
+These
+functions appeared in BSD/OS 4.1 and
+.Fx 5.0 .
diff --git a/sys/alpha/alpha/clock.c b/sys/alpha/alpha/clock.c
index 88adaa4..500d169 100644
--- a/sys/alpha/alpha/clock.c
+++ b/sys/alpha/alpha/clock.c
@@ -43,6 +43,8 @@
* @(#)clock.c 8.1 (Berkeley) 6/10/93
*/
+#include "opt_clock.h"
+
#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
#include <sys/param.h>
@@ -80,8 +82,23 @@ int disable_rtc_set; /* disable resettodr() if != 0 */
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#define TIMER_DIV(x) ((timer_freq + (x) / 2) / (x))
+
+#ifndef TIMER_FREQ
+#define TIMER_FREQ 1193182
+#endif
+u_int32_t timer_freq = TIMER_FREQ;
+int timer0_max_count;
+
+static u_int32_t i8254_lastcount;
+static u_int32_t i8254_offset;
+static int i8254_ticked;
+static int clkintr_pending = 0;
+
extern int cycles_per_sec;
+extern int ncpus;
+static timecounter_get_t i8254_get_timecount;
static timecounter_get_t alpha_get_timecount;
static struct timecounter alpha_timecounter = {
@@ -95,6 +112,17 @@ static struct timecounter alpha_timecounter = {
SYSCTL_OPAQUE(_debug, OID_AUTO, alpha_timecounter, CTLFLAG_RD,
&alpha_timecounter, sizeof(alpha_timecounter), "S,timecounter", "");
+static struct timecounter i8254_timecounter = {
+ i8254_get_timecount, /* get_timecount */
+ 0, /* no poll_pps */
+ ~0u, /* counter_mask */
+ 0, /* frequency */
+ "i8254" /* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
+ &i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", "");
+
/* Values for timerX_state: */
#define RELEASED 0
#define RELEASE_PENDING 1
@@ -120,11 +148,14 @@ static u_int32_t max_cycles_per_tick;
static u_int32_t last_time;
static void handleclock(void* arg);
-static u_int32_t calibrate_clocks(u_int32_t firmware_freq);
+static void calibrate_clocks(u_int32_t firmware_freq,
+ u_int32_t *pcc, u_int32_t *timer);
+static void set_timer_freq(u_int freq, int intr_freq);
void
clockattach(device_t dev)
{
+ u_int32_t pcc, freq, delta;
/*
* Just bookkeeping.
@@ -132,7 +163,33 @@ clockattach(device_t dev)
if (clockdev)
panic("clockattach: multiple clocks");
clockdev = dev;
- cycles_per_sec = calibrate_clocks(cycles_per_sec);
+
+ calibrate_clocks(cycles_per_sec, &pcc, &freq);
+ cycles_per_sec = pcc;
+
+ /*
+ * Use the calibrated i8254 frequency if it seems reasonable.
+ * Otherwise use the default, and don't use the calibrated i586
+ * frequency.
+ */
+ delta = freq > timer_freq ? freq - timer_freq : timer_freq - freq;
+ if (delta < timer_freq / 100) {
+#ifndef CLK_USE_I8254_CALIBRATION
+ if (bootverbose)
+ printf(
+"CLK_USE_I8254_CALIBRATION not specified - using default frequency\n");
+ freq = timer_freq;
+#endif
+ timer_freq = freq;
+ } else {
+ if (bootverbose)
+ printf(
+ "%d Hz differs from default of %d Hz by more than 1%%\n",
+ freq, timer_freq);
+ }
+ set_timer_freq(timer_freq, hz);
+ i8254_timecounter.tc_frequency = timer_freq;
+
#ifdef EVCNT_COUNTERS
evcnt_attach(dev, "intr", &clock_intr_evcnt);
#endif
@@ -190,8 +247,12 @@ cpu_initclocks()
scaled_ticks_per_cycle = ((u_int64_t)hz << FIX_SHIFT) / freq;
max_cycles_per_tick = 2*freq / hz;
- alpha_timecounter.tc_frequency = freq;
- tc_init(&alpha_timecounter);
+ tc_init(&i8254_timecounter);
+
+ if (ncpus == 1) {
+ alpha_timecounter.tc_frequency = freq;
+ tc_init(&alpha_timecounter);
+ }
stathz = 128;
platform.clockintr = (void (*) __P((void *))) handleclock;
@@ -202,15 +263,36 @@ cpu_initclocks()
CLOCK_INIT(clockdev);
}
-static u_int32_t
-calibrate_clocks(u_int32_t firmware_freq)
+static int
+getit(void)
+{
+ int high, low;
+ int s;
+
+ s = splhigh();
+
+ /* Select timer0 and latch counter value. */
+ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+
+ low = inb(TIMER_CNTR0);
+ high = inb(TIMER_CNTR0);
+
+ splx(s);
+ return ((high << 8) | low);
+}
+
+static void
+calibrate_clocks(u_int32_t firmware_freq, u_int32_t *pcc, u_int32_t *timer)
{
u_int32_t start_pcc, stop_pcc;
+ u_int count, prev_count, tot_count;
int sec, start_sec;
if (bootverbose)
printf("Calibrating clock(s) ... ");
+ set_timer_freq(timer_freq, hz);
+
/* Read the mc146818A seconds counter. */
if (CLOCK_GETSECS(clockdev, &sec))
goto fail;
@@ -224,16 +306,36 @@ calibrate_clocks(u_int32_t firmware_freq)
break;
}
- /* Start keeping track of the PCC. */
+ /* Start keeping track of the PCC and i8254. */
+ prev_count = getit();
+ if (prev_count == 0)
+ goto fail;
+ tot_count = 0;
+
start_pcc = alpha_rpcc();
/*
- * Wait for the mc146818A seconds counter to change.
+ * Wait for the mc146818A seconds counter to change. Read the i8254
+ * counter for each iteration since this is convenient and only
+ * costs a few usec of inaccuracy. The timing of the final reads
+ * of the counters almost matches the timing of the initial reads,
+ * so the main cause of inaccuracy is the varying latency from
+ * inside getit() or rtcin(RTC_STATUSA) to the beginning of the
+ * rtcin(RTC_SEC) that returns a changed seconds count. The
+ * maximum inaccuracy from this cause is < 10 usec on 486's.
*/
start_sec = sec;
for (;;) {
if (CLOCK_GETSECS(clockdev, &sec))
goto fail;
+ count = getit();
+ if (count == 0)
+ goto fail;
+ if (count > prev_count)
+ tot_count += prev_count - (count - timer0_max_count);
+ else
+ tot_count += prev_count - count;
+ prev_count = count;
if (sec != start_sec)
break;
}
@@ -246,29 +348,55 @@ calibrate_clocks(u_int32_t firmware_freq)
if (bootverbose) {
printf("PCC clock: %u Hz (firmware %u Hz)\n",
stop_pcc - start_pcc, firmware_freq);
+ printf("i8254 clock: %u Hz\n", tot_count);
}
- return (stop_pcc - start_pcc);
+ *pcc = stop_pcc - start_pcc;
+ *timer = tot_count;
+ return;
fail:
if (bootverbose)
printf("failed, using firmware default of %u Hz\n",
firmware_freq);
- return (firmware_freq);
+
+ *pcc = firmware_freq;
+ *timer = 0;
+ return;
+}
+
+static void
+set_timer_freq(u_int freq, int intr_freq)
+{
+ int new_timer0_max_count;
+ int s;
+
+ s = splhigh();
+ timer_freq = freq;
+ new_timer0_max_count = TIMER_DIV(intr_freq);
+ if (new_timer0_max_count != timer0_max_count) {
+ timer0_max_count = new_timer0_max_count;
+ outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
+ outb(TIMER_CNTR0, timer0_max_count & 0xff);
+ outb(TIMER_CNTR0, timer0_max_count >> 8);
+ }
+ splx(s);
}
static void
handleclock(void* arg)
{
- u_int32_t now = alpha_rpcc();
- u_int32_t delta = now - last_time;
- last_time = now;
-
- if (delta > max_cycles_per_tick) {
- int i, missed_ticks;
- missed_ticks = (delta * scaled_ticks_per_cycle) >> FIX_SHIFT;
- for (i = 0; i < missed_ticks; i++)
- hardclock(arg);
+ if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ int s = splhigh();
+ if (i8254_ticked)
+ i8254_ticked = 0;
+ else {
+ i8254_offset += timer0_max_count;
+ i8254_lastcount = 0;
+ }
+ clkintr_pending = 0;
+ splx(s);
}
+
hardclock(arg);
setdelayed();
}
@@ -433,6 +561,35 @@ resettodr()
}
static unsigned
+i8254_get_timecount(struct timecounter *tc)
+{
+ u_int count;
+ u_int high, low;
+ int s;
+
+ s = splhigh();
+
+ /* Select timer0 and latch counter value. */
+ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+
+ low = inb(TIMER_CNTR0);
+ high = inb(TIMER_CNTR0);
+ count = timer0_max_count - ((high << 8) | low);
+ if (count < i8254_lastcount ||
+ (!i8254_ticked && (clkintr_pending ||
+ ((count < 20) && (inb(IO_ICU1) & 1)))
+ )) {
+ i8254_ticked = 1;
+ i8254_offset += timer0_max_count;
+ }
+ i8254_lastcount = count;
+ count += i8254_offset;
+
+ splx(s);
+ return (count);
+}
+
+static unsigned
alpha_get_timecount(struct timecounter* tc)
{
return alpha_rpcc();
@@ -477,15 +634,6 @@ sysbeepstop(void *chan)
beeping = 0;
}
-/*
- * Frequency of all three count-down timers; (TIMER_FREQ/freq) is the
- * appropriate count to generate a frequency of freq hz.
- */
-#ifndef TIMER_FREQ
-#define TIMER_FREQ 1193182
-#endif
-#define TIMER_DIV(x) ((TIMER_FREQ+(x)/2)/(x))
-
int
sysbeep(int pitch, int period)
{
diff --git a/sys/alpha/alpha/genassym.c b/sys/alpha/alpha/genassym.c
index a67f2d1..066d87b 100644
--- a/sys/alpha/alpha/genassym.c
+++ b/sys/alpha/alpha/genassym.c
@@ -51,8 +51,11 @@
#include <sys/socket.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
+#include <sys/ktr.h>
#include <machine/frame.h>
#include <machine/chipset.h>
+#include <machine/globaldata.h>
+#include <machine/mutex.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -66,6 +69,21 @@
#include <nfs/nfs.h>
#include <nfs/nfsdiskless.h>
+#include "opt_smp.h"
+
+ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
+ASSYM(GD_FPCURPROC, offsetof(struct globaldata, gd_fpcurproc));
+ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
+ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
+ASSYM(GD_CPUNO, offsetof(struct globaldata, gd_cpuno));
+ASSYM(GD_IDLEPCBPHYS, offsetof(struct globaldata, gd_idlepcbphys));
+ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse));
+ASSYM(MTX_SAVEIPL, offsetof(struct mtx, mtx_saveipl));
+ASSYM(MTX_UNOWNED, MTX_UNOWNED);
+
ASSYM(P_ADDR, offsetof(struct proc, p_addr));
ASSYM(P_MD_FLAGS, offsetof(struct proc, p_md.md_flags));
ASSYM(P_MD_PCBPADDR, offsetof(struct proc, p_md.md_pcbpaddr));
@@ -81,6 +99,7 @@ ASSYM(PTESIZE, PTESIZE);
ASSYM(U_PCB_ONFAULT, offsetof(struct user, u_pcb.pcb_onfault));
ASSYM(U_PCB_HWPCB_KSP, offsetof(struct user, u_pcb.pcb_hw.apcb_ksp));
ASSYM(U_PCB_CONTEXT, offsetof(struct user, u_pcb.pcb_context));
+ASSYM(U_PCB_SCHEDNEST, offsetof(struct user, u_pcb.pcb_schednest));
ASSYM(PCB_HW, offsetof(struct pcb, pcb_hw));
diff --git a/sys/alpha/alpha/interrupt.c b/sys/alpha/alpha/interrupt.c
index deedefe..20f621e 100644
--- a/sys/alpha/alpha/interrupt.c
+++ b/sys/alpha/alpha/interrupt.c
@@ -33,6 +33,8 @@
* notice.
*/
+#include "opt_ddb.h"
+
#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
/* __KERNEL_RCSID(0, "$NetBSD: interrupt.c,v 1.23 1998/02/24 07:38:01 thorpej Exp $");*/
@@ -43,12 +45,15 @@
#include <sys/vmmeter.h>
#include <sys/bus.h>
#include <sys/malloc.h>
+#include <sys/ktr.h>
#include <machine/reg.h>
#include <machine/frame.h>
#include <machine/cpuconf.h>
#include <machine/bwx.h>
#include <machine/intr.h>
+#include <machine/mutex.h>
+#include <machine/rpb.h>
#ifdef EVCNT_COUNTERS
struct evcnt clock_intr_evcnt; /* event counter for clock intrs. */
@@ -56,8 +61,11 @@ struct evcnt clock_intr_evcnt; /* event counter for clock intrs. */
#include <machine/intrcnt.h>
#endif
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
volatile int mc_expected, mc_received;
-u_int32_t intr_nesting_level;
static void
dummy_perf(unsigned long vector, struct trapframe *framep)
@@ -75,13 +83,19 @@ interrupt(a0, a1, a2, framep)
unsigned long a0, a1, a2;
struct trapframe *framep;
{
+ /*
+ * Find our per-cpu globals.
+ */
+ globalp = (struct globaldata *) alpha_pal_rdval();
- atomic_add_int(&intr_nesting_level, 1);
+ atomic_add_int(&PCPU_GET(intr_nesting_level), 1);
{
struct proc* p = curproc;
if (!p) p = &proc0;
- if ((caddr_t) framep < (caddr_t) p->p_addr + 1024)
+ if ((caddr_t) framep < (caddr_t) p->p_addr + 1024) {
+ mtx_enter(&Giant, MTX_DEF);
panic("possible stack overflow\n");
+ }
}
framep->tf_regs[FRAME_TRAPARG_A0] = a0;
@@ -89,10 +103,18 @@ interrupt(a0, a1, a2, framep)
framep->tf_regs[FRAME_TRAPARG_A2] = a2;
switch (a0) {
case ALPHA_INTR_XPROC: /* interprocessor interrupt */
- printf("interprocessor interrupt!\n");
+ CTR0(KTR_INTR|KTR_SMP, "interprocessor interrupt");
+ smp_handle_ipi(framep); /* note: lock not taken */
break;
case ALPHA_INTR_CLOCK: /* clock interrupt */
+ CTR0(KTR_INTR, "clock interrupt");
+ if (PCPU_GET(cpuno) != hwrpb->rpb_primary_cpu_id) {
+ CTR0(KTR_INTR, "ignoring clock on secondary");
+ return;
+ }
+
+ mtx_enter(&Giant, MTX_DEF);
cnt.v_intr++;
#ifdef EVCNT_COUNTERS
clock_intr_evcnt.ev_count++;
@@ -105,24 +127,31 @@ interrupt(a0, a1, a2, framep)
if((++schedclk2 & 0x7) == 0)
statclock((struct clockframe *)framep);
}
+ mtx_exit(&Giant, MTX_DEF);
break;
case ALPHA_INTR_ERROR: /* Machine Check or Correctable Error */
+ mtx_enter(&Giant, MTX_DEF);
a0 = alpha_pal_rdmces();
if (platform.mcheck_handler)
(*platform.mcheck_handler)(a0, framep, a1, a2);
else
machine_check(a0, framep, a1, a2);
+ mtx_exit(&Giant, MTX_DEF);
break;
case ALPHA_INTR_DEVICE: /* I/O device interrupt */
+ mtx_enter(&Giant, MTX_DEF);
cnt.v_intr++;
if (platform.iointr)
(*platform.iointr)(framep, a1);
+ mtx_exit(&Giant, MTX_DEF);
break;
case ALPHA_INTR_PERF: /* interprocessor interrupt */
+ mtx_enter(&Giant, MTX_DEF);
perf_irq(a1, framep);
+ mtx_exit(&Giant, MTX_DEF);
break;
case ALPHA_INTR_PASSIVE:
@@ -132,11 +161,12 @@ interrupt(a0, a1, a2, framep)
break;
default:
+ mtx_enter(&Giant, MTX_DEF);
panic("unexpected interrupt: type 0x%lx vec 0x%lx a2 0x%lx\n",
a0, a1, a2);
/* NOTREACHED */
}
- atomic_subtract_int(&intr_nesting_level, 1);
+ atomic_subtract_int(&PCPU_GET(intr_nesting_level), 1);
}
void
@@ -204,6 +234,7 @@ fatal:
printf(" pid = %d, comm = %s\n", curproc->p_pid,
curproc->p_comm);
printf("\n");
+ kdb_trap(mces, vector, param, ALPHA_KENTRY_MM, framep);
panic("machine check");
}
diff --git a/sys/alpha/alpha/ipl_funcs.c b/sys/alpha/alpha/ipl_funcs.c
index 8c2cb67..6642bce 100644
--- a/sys/alpha/alpha/ipl_funcs.c
+++ b/sys/alpha/alpha/ipl_funcs.c
@@ -30,9 +30,13 @@
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
+#include <sys/ktr.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
#include <machine/cpu.h>
+#include <machine/globaldata.h>
+#include <machine/globals.h>
+#include <machine/mutex.h>
#include <net/netisr.h>
#include "sio.h"
@@ -129,7 +133,9 @@ do_sir()
u_int32_t pend;
int i;
- atomic_add_int(&intr_nesting_level, 1);
+ mtx_enter(&Giant, MTX_DEF);
+
+ atomic_add_int(&PCPU_GET(intr_nesting_level), 1);
splsoft();
while ((pend = atomic_readandclear(&ipending)) != 0) {
for (i = 0; pend && i < 32; i++) {
@@ -142,7 +148,9 @@ do_sir()
}
}
}
- atomic_subtract_int(&intr_nesting_level, 1);
+ atomic_subtract_int(&PCPU_GET(intr_nesting_level), 1);
+
+ mtx_exit(&Giant, MTX_DEF);
}
#define GENSET(name, ptr, bit) \
diff --git a/sys/alpha/alpha/locore.s b/sys/alpha/alpha/locore.s
index 221c673..2da1315 100644
--- a/sys/alpha/alpha/locore.s
+++ b/sys/alpha/alpha/locore.s
@@ -77,7 +77,7 @@
*/
#define SWITCH_CONTEXT \
/* Make a note of the context we're running on. */ \
- stq a0, curpcb ; \
+ stq a0, GD_CURPCB(globalp); \
\
/* Swap in the new context. */ \
call_pal PAL_OSF1_swpctx
@@ -107,6 +107,12 @@
call_pal PAL_OSF1_wrvptptr /* clobbers a0, t0, t8-t11 */
/*
+ * Initialise globalp.
+ */
+ call_pal PAL_OSF1_rdval /* clobbers t0, t8-t11 */
+ mov v0, globalp
+
+ /*
* Switch to proc0's PCB, which is at U_PCB off of proc0paddr.
*/
lda t0,proc0 /* get phys addr of pcb */
@@ -126,18 +132,50 @@
* Note that setregs() is responsible for setting its contents
* to 'reasonable' values.
*/
- lda sp,-(FRAME_SIZE * 8)(sp) /* space for struct trapframe */
+ lda sp,-288(sp) /* space for struct trapframe */
mov sp, a0 /* arg is frame ptr */
CALL(mi_startup) /* go to mi_startup()! */
+ /* NOTREACHED */
+
+ END(locorestart)
+
/*
- * Call exception_return, to simulate return from (fake)
- * exception to user-land, running process 1, init!
+ * Secondary processors start executing here. They will have their
+ * unique value set to point at the per-cpu structure and will
+ * be executing on their private idle stack.
*/
- jmp zero, exception_return /* "And that's all she wrote." */
- END(locorestart)
+ NESTED(smp_init_secondary_glue, 1, 0, ra, 0, 0)
+ mov pv, globalp
+
+ ldiq a0, ALPHA_PSL_IPL_HIGH /* disable all interrupts */
+ call_pal PAL_OSF1_swpipl
+
+ br pv, 1f
+1: LDGP(pv)
+ mov gp, a0
+ call_pal PAL_OSF1_wrkgp /* clobbers a0, t0, t8-t11 */
+ ldiq a0, -2 /* TBIA */
+ call_pal PAL_OSF1_tbi
+ call_pal PAL_imb
+
+ ldq a0, GD_IDLEPCBPHYS(globalp) /* switch to idle ctx */
+ call_pal PAL_OSF1_swpctx
+
+ CALL(smp_init_secondary) /* initialise the rest */
+
+ /*
+ * After initialising, we start idling for real.
+ * We have the kernel lock at this point.
+ */
+ CALL(cpu_switch) /* never returns */
+
+ call_pal PAL_halt
+
+ END(smp_init_secondary_glue)
+
/**************************************************************************/
/*
diff --git a/sys/alpha/alpha/machdep.c b/sys/alpha/alpha/machdep.c
index 598362d..9af4441 100644
--- a/sys/alpha/alpha/machdep.c
+++ b/sys/alpha/alpha/machdep.c
@@ -97,6 +97,8 @@
#include <sys/systm.h>
#include <sys/eventhandler.h>
#include <sys/sysproto.h>
+#include <machine/mutex.h>
+#include <sys/ktr.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -127,6 +129,8 @@
#include <machine/reg.h>
#include <machine/fpu.h>
#include <machine/pal.h>
+#include <machine/smp.h>
+#include <machine/globaldata.h>
#include <machine/cpuconf.h>
#include <machine/bootinfo.h>
#include <machine/rpb.h>
@@ -140,18 +144,17 @@
#include <miscfs/procfs/procfs.h>
#include <machine/sigframe.h>
-struct proc* curproc;
-struct proc* fpcurproc;
-struct pcb* curpcb;
u_int64_t cycles_per_usec;
u_int32_t cycles_per_sec;
-int whichqs, whichrtqs, whichidqs;
int cold = 1;
struct platform platform;
alpha_chipset_t chipset;
struct bootinfo_kernel bootinfo;
-struct timeval switchtime;
-int switchticks;
+
+struct cpuhead cpuhead;
+
+mtx_t sched_lock;
+mtx_t Giant;
struct user *proc0paddr;
@@ -419,6 +422,14 @@ again:
vm_pager_bufferinit();
EVENTHANDLER_REGISTER(shutdown_final, alpha_srm_shutdown, 0,
SHUTDOWN_PRI_LAST);
+
+#ifdef SMP
+ /*
+ * OK, enough kmem_alloc/malloc state should be up, lets get on with it!
+ */
+ mp_start(); /* fire up the secondaries */
+ mp_announce();
+#endif /* SMP */
}
int
@@ -978,11 +989,25 @@ alpha_init(pfn, ptb, bim, bip, biv)
(struct user *)pmap_steal_memory(UPAGES * PAGE_SIZE);
/*
+ * Setup the global data for the bootstrap cpu.
+ */
+ {
+ size_t sz = round_page(UPAGES * PAGE_SIZE);
+ globalp = (struct globaldata *) pmap_steal_memory(sz);
+ globaldata_init(globalp, alpha_pal_whami(), sz);
+ alpha_pal_wrval((u_int64_t) globalp);
+ PCPU_GET(next_asn) = 1; /* 0 used for proc0 pmap */
+ }
+
+ /*
* Initialize the virtual memory system, and set the
* page table base register in proc 0's PCB.
*/
pmap_bootstrap(ALPHA_PHYS_TO_K0SEG(alpha_ptob(ptb)),
hwrpb->rpb_max_asn);
+ hwrpb->rpb_vptb = VPTBASE;
+ hwrpb->rpb_checksum = hwrpb_checksum();
+
/*
* Initialize the rest of proc 0's PCB, and cache its physical
@@ -999,6 +1024,29 @@ alpha_init(pfn, ptb, bim, bip, biv)
(u_int64_t)proc0paddr + USPACE - sizeof(struct trapframe);
proc0.p_md.md_tf =
(struct trapframe *)proc0paddr->u_pcb.pcb_hw.apcb_ksp;
+ PCPU_SET(curproc, &proc0);
+
+ /*
+ * Get the right value for the boot cpu's idle ptbr.
+ */
+ globalp->gd_idlepcb.apcb_ptbr = proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr;
+
+ /*
+ * Record all cpus in a list.
+ */
+ SLIST_INIT(&cpuhead);
+ SLIST_INSERT_HEAD(&cpuhead, GLOBALP, gd_allcpu);
+
+ /*
+ * Initialise the kernel lock.
+ */
+ mtx_init(&Giant, "Giant", MTX_DEF);
+ mtx_init(&sched_lock, "sched lock", MTX_SPIN);
+
+ /*
+ * Enable interrupts on first release (in switch_trampoline).
+ */
+ sched_lock.mtx_saveipl = ALPHA_PSL_IPL_0;
/*
* Look at arguments passed to us and compute boothowto.
@@ -1118,6 +1166,8 @@ alpha_init(pfn, ptb, bim, bip, biv)
#endif
}
+ hwrpb_restart_setup();
+
alpha_pal_wrfen(0);
}
@@ -2034,9 +2084,14 @@ SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
void
alpha_fpstate_check(struct proc *p)
{
+ /*
+ * For SMP, we should check the fpcurproc of each cpu.
+ */
+#ifndef SMP
if (p->p_addr->u_pcb.pcb_hw.apcb_flags & ALPHA_PCB_FLAGS_FEN)
if (p != fpcurproc)
panic("alpha_check_fpcurproc: bogus");
+#endif
}
#define SET_FEN(p) \
diff --git a/sys/alpha/alpha/mem.c b/sys/alpha/alpha/mem.c
index 940d827..196ed14 100644
--- a/sys/alpha/alpha/mem.c
+++ b/sys/alpha/alpha/mem.c
@@ -261,9 +261,12 @@ mem_modevent(module_t mod, int type, void *data)
case MOD_LOAD:
if (bootverbose)
printf("mem: <memory & I/O>\n");
+/* XXX - ??? */
+#if 0
/* Initialise memory range handling */
if (mem_range_softc.mr_op != NULL)
mem_range_softc.mr_op->init(&mem_range_softc);
+#endif
memdev = make_dev(&mem_cdevsw, 0, UID_ROOT, GID_KMEM,
0640, "mem");
diff --git a/sys/alpha/alpha/mp_machdep.c b/sys/alpha/alpha/mp_machdep.c
new file mode 100644
index 0000000..367b57e
--- /dev/null
+++ b/sys/alpha/alpha/mp_machdep.c
@@ -0,0 +1,1115 @@
+/*-
+ * Copyright (c) 2000 Doug Rabson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <machine/mutex.h>
+#include <sys/ktr.h>
+#include <sys/proc.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <sys/user.h>
+#include <sys/dkstat.h>
+
+#include <machine/smp.h>
+#include <machine/lock.h>
+#include <machine/atomic.h>
+#include <machine/ipl.h>
+#include <machine/globaldata.h>
+#include <machine/pmap.h>
+#include <machine/rpb.h>
+#include <machine/clock.h>
+
+#define CHECKSTATE_USER 0
+#define CHECKSTATE_SYS 1
+#define CHECKSTATE_INTR 2
+
+volatile u_int stopped_cpus;
+volatile u_int started_cpus;
+volatile u_int checkstate_probed_cpus;
+volatile u_int checkstate_need_ast;
+volatile u_int checkstate_pending_ast;
+struct proc* checkstate_curproc[NCPUS];
+int checkstate_cpustate[NCPUS];
+u_long checkstate_pc[NCPUS];
+volatile u_int resched_cpus;
+void (*cpustop_restartfunc) __P((void));
+int mp_ncpus;
+
+int smp_started;
+int boot_cpu_id;
+u_int32_t all_cpus;
+
+static struct globaldata *cpuno_to_globaldata[NCPUS];
+
+int smp_active = 0; /* are the APs allowed to run? */
+SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RW, &smp_active, 0, "");
+
+/* Is forwarding of a interrupt to the CPU holding the ISR lock enabled ? */
+int forward_irq_enabled = 1;
+SYSCTL_INT(_machdep, OID_AUTO, forward_irq_enabled, CTLFLAG_RW,
+ &forward_irq_enabled, 0, "");
+
+/* Enable forwarding of a signal to a process running on a different CPU */
+static int forward_signal_enabled = 1;
+SYSCTL_INT(_machdep, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
+ &forward_signal_enabled, 0, "");
+
+/* Enable forwarding of roundrobin to all other cpus */
+static int forward_roundrobin_enabled = 1;
+SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
+ &forward_roundrobin_enabled, 0, "");
+
+/*
+ * Communicate with a console running on a secondary processor.
+ * Return 1 on failure.
+ */
+static int
+smp_send_secondary_command(const char *command, int cpuno)
+{
+ u_int64_t mask = 1L << cpuno;
+ struct pcs *cpu = LOCATE_PCS(hwrpb, cpuno);
+ int i, len;
+
+ /*
+ * Sanity check.
+ */
+ len = strlen(command);
+ if (len > sizeof(cpu->pcs_buffer.rxbuf)) {
+ printf("smp_send_secondary_command: command '%s' too long\n",
+ command);
+ return 0;
+ }
+
+ /*
+ * Wait for the rx bit to clear.
+ */
+ for (i = 0; i < 100000; i++) {
+ if (!(hwrpb->rpb_rxrdy & mask))
+ break;
+ DELAY(10);
+ }
+ if (hwrpb->rpb_rxrdy & mask)
+ return 0;
+
+ /*
+ * Write the command into the processor's buffer.
+ */
+ bcopy(command, cpu->pcs_buffer.rxbuf, len);
+ cpu->pcs_buffer.rxlen = len;
+
+ /*
+ * Set the bit in the rxrdy mask and let the secondary try to
+ * handle the command.
+ */
+ atomic_set_64(&hwrpb->rpb_rxrdy, mask);
+
+ /*
+ * Wait for the rx bit to clear.
+ */
+ for (i = 0; i < 100000; i++) {
+ if (!(hwrpb->rpb_rxrdy & mask))
+ break;
+ DELAY(10);
+ }
+ if (hwrpb->rpb_rxrdy & mask)
+ return 0;
+
+ return 1;
+}
+
+void
+smp_init_secondary(void)
+{
+ /*
+ * Record the globaldata pointer in the per-cpu system value.
+ */
+ alpha_pal_wrval((u_int64_t) globalp);
+
+ /*
+ * Point interrupt/exception vectors to our own.
+ */
+ alpha_pal_wrent(XentInt, ALPHA_KENTRY_INT);
+ alpha_pal_wrent(XentArith, ALPHA_KENTRY_ARITH);
+ alpha_pal_wrent(XentMM, ALPHA_KENTRY_MM);
+ alpha_pal_wrent(XentIF, ALPHA_KENTRY_IF);
+ alpha_pal_wrent(XentUna, ALPHA_KENTRY_UNA);
+ alpha_pal_wrent(XentSys, ALPHA_KENTRY_SYS);
+
+ mtx_enter(&Giant, MTX_DEF);
+
+ printf("smp_init_secondary: called\n");
+ CTR0(KTR_SMP, "smp_init_secondary");
+
+ /*
+ * Add to mask.
+ */
+ smp_started = 1;
+ if (PCPU_GET(cpuno) + 1 > mp_ncpus)
+ mp_ncpus = PCPU_GET(cpuno) + 1;
+ spl0();
+ smp_ipi_all(0);
+
+ mtx_exit(&Giant, MTX_DEF);
+}
+
+extern void smp_init_secondary_glue(void);
+
+static int
+smp_start_secondary(int cpuno)
+{
+ struct pcs *cpu = LOCATE_PCS(hwrpb, cpuno);
+ struct pcs *bootcpu = LOCATE_PCS(hwrpb, hwrpb->rpb_primary_cpu_id);
+ struct alpha_pcb *pcb = (struct alpha_pcb *) cpu->pcs_hwpcb;
+ struct globaldata *globaldata;
+ int i;
+ size_t sz;
+
+ if ((cpu->pcs_flags & PCS_PV) == 0) {
+ printf("smp_start_secondary: cpu %d PALcode invalid\n", cpuno);
+ return 0;
+ }
+
+ printf("smp_start_secondary: starting cpu %d\n", cpuno);
+
+ sz = round_page(UPAGES * PAGE_SIZE);
+ globaldata = malloc(sz, M_TEMP, M_NOWAIT);
+ if (!globaldata) {
+ printf("smp_start_secondary: can't allocate memory\n");
+ return 0;
+ }
+
+ globaldata_init(globaldata, cpuno, sz);
+
+ /*
+ * Copy the idle pcb and setup the address to start executing.
+ * Use the pcb unique value to point the secondary at its globaldata
+ * structure.
+ */
+ *pcb = globaldata->gd_idlepcb;
+ hwrpb->rpb_restart = (u_int64_t) smp_init_secondary_glue;
+ hwrpb->rpb_restart_val = (u_int64_t) globaldata;
+ hwrpb->rpb_checksum = hwrpb_checksum();
+
+ /*
+ * Tell the cpu to start with the same PALcode as us.
+ */
+ bcopy(&bootcpu->pcs_pal_rev, &cpu->pcs_pal_rev,
+ sizeof cpu->pcs_pal_rev);
+
+ /*
+ * Set flags in cpu structure and push out write buffers to
+ * make sure the secondary sees it.
+ */
+ cpu->pcs_flags |= PCS_CV|PCS_RC;
+ cpu->pcs_flags &= ~PCS_BIP;
+ alpha_mb();
+
+ /*
+ * Fire it up and hope for the best.
+ */
+ if (!smp_send_secondary_command("START\r\n", cpuno)) {
+ printf("smp_init_secondary: can't send START command\n");
+ free(globaldata, M_TEMP);
+ return 0;
+ }
+
+ /*
+ * Wait for the secondary to set the BIP flag in its structure.
+ */
+ for (i = 0; i < 100000; i++) {
+ if (cpu->pcs_flags & PCS_BIP)
+ break;
+ DELAY(10);
+ }
+ if (!(cpu->pcs_flags & PCS_BIP)) {
+ printf("smp_init_secondary: secondary did not respond\n");
+ free(globaldata, M_TEMP);
+ }
+
+ /*
+ * It worked (I think).
+ */
+ /* if (bootverbose) */
+ printf("smp_init_secondary: cpu %d started\n", cpuno);
+
+ return 1;
+}
+
+/*
+ * Initialise a struct globaldata.
+ */
+void
+globaldata_init(struct globaldata *globaldata, int cpuno, size_t sz)
+{
+ bzero(globaldata, sz);
+ globaldata->gd_idlepcbphys = vtophys((vm_offset_t) &globaldata->gd_idlepcb);
+ globaldata->gd_idlepcb.apcb_ksp = (u_int64_t)
+ ((caddr_t) globaldata + sz - sizeof(struct trapframe));
+ globaldata->gd_idlepcb.apcb_ptbr = proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr;
+ globaldata->gd_cpuno = cpuno;
+ globaldata->gd_other_cpus = all_cpus & ~(1 << cpuno);
+ globaldata->gd_next_asn = 0;
+ globaldata->gd_current_asngen = 1;
+ cpuno_to_globaldata[cpuno] = globaldata;
+}
+
+struct globaldata *
+globaldata_find(int cpuno)
+{
+ return cpuno_to_globaldata[cpuno];
+}
+
+/* Implementation of simplelocks */
+
+/*
+ * Atomically swap the value of *p with val. Return the old value of *p.
+ */
+static __inline int
+atomic_xchg(volatile u_int *p, u_int val)
+{
+ u_int32_t oldval, temp;
+ __asm__ __volatile__ (
+ "1:\tldl_l %0,%3\n\t" /* load current value */
+ "mov %4,%1\n\t" /* value to store */
+ "stl_c %1,%2\n\t" /* attempt to store */
+ "beq %1,2f\n\t" /* if the store failed, spin */
+ "br 3f\n" /* it worked, exit */
+ "2:\tbr 1b\n" /* *p not updated, loop */
+ "3:\n" /* it worked */
+ : "=&r"(oldval), "=r"(temp), "=m" (*p)
+ : "m"(*p), "r"(val)
+ : "memory");
+ return oldval;
+}
+
+void
+s_lock_init(struct simplelock *lkp)
+{
+ lkp->lock_data = 0;
+}
+
+void
+s_lock(struct simplelock *lkp)
+{
+ for (;;) {
+ if (s_lock_try(lkp))
+ return;
+
+ /*
+ * Spin until clear.
+ */
+ while (lkp->lock_data)
+ ;
+ }
+}
+
+int
+s_lock_try(struct simplelock *lkp)
+{
+ u_int32_t oldval, temp;
+
+ __asm__ __volatile__ (
+ "1:\tldl_l %0,%3\n\t" /* load current value */
+ "blbs %0,2f\n" /* if set, give up now */
+ "mov 1,%1\n\t" /* value to store */
+ "stl_c %1,%2\n\t" /* attempt to store */
+ "beq %1,3f\n\t" /* if the store failed, spin */
+ "2:" /* exit */
+ ".section .text2,\"ax\"\n" /* improve branch prediction */
+ "3:\tbr 1b\n" /* *p not updated, loop */
+ ".previous\n"
+ : "=&r"(oldval), "=r"(temp), "=m" (lkp->lock_data)
+ : "m"(lkp->lock_data)
+ : "memory");
+
+ if (!oldval) {
+ /*
+ * It was clear, return success.
+ */
+ alpha_mb();
+ return 1;
+ }
+ return 0;
+}
+
+/* Other stuff */
+
+/* lock around the MP rendezvous */
+static struct simplelock smp_rv_lock;
+
+static void
+init_locks(void)
+{
+ s_lock_init(&smp_rv_lock);
+}
+
+void
+mp_start()
+{
+ int i;
+ int cpuno = PCPU_GET(cpuno);
+
+ init_locks();
+
+ if (cpuno + 1 > mp_ncpus)
+ mp_ncpus = cpuno + 1;
+
+ all_cpus = 1<<cpuno;
+ for (i = 0; i < hwrpb->rpb_pcs_cnt; i++) {
+ struct pcs *pcsp;
+
+ if (i == cpuno)
+ continue;
+ pcsp = (struct pcs *)((char *)hwrpb + hwrpb->rpb_pcs_off +
+ (i * hwrpb->rpb_pcs_size));
+ if ((pcsp->pcs_flags & PCS_PP) != 0) {
+ all_cpus |= 1<<i;
+ break; /* only one for now */
+ }
+ }
+ PCPU_SET(other_cpus, all_cpus & ~(1<<cpuno));
+
+ for (i = 0; i < hwrpb->rpb_pcs_cnt; i++) {
+ struct pcs *pcsp;
+
+ if (i == cpuno)
+ continue;
+ pcsp = (struct pcs *)((char *)hwrpb + hwrpb->rpb_pcs_off +
+ (i * hwrpb->rpb_pcs_size));
+ if ((pcsp->pcs_flags & PCS_PP) != 0) {
+ smp_active = 1;
+ smp_start_secondary(i);
+ break; /* only one for now */
+ }
+ }
+}
+
+void
+mp_announce()
+{
+}
+
+void
+smp_invltlb()
+{
+}
+
+#define GD_TO_INDEX(pc, prof) \
+ ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \
+ (u_quad_t)((prof)->pr_scale)) >> 16) & ~1)
+
+extern long cp_time[CPUSTATES];
+
+static void
+addugd_intr_forwarded(struct proc *p, int id, int *astmap)
+{
+ int i;
+ struct uprof *prof;
+ u_long pc;
+
+ pc = checkstate_pc[id];
+ prof = &p->p_stats->p_prof;
+ if (pc >= prof->pr_off &&
+ (i = GD_TO_INDEX(pc, prof)) < prof->pr_size) {
+ if ((p->p_flag & P_OWEUPC) == 0) {
+ prof->pr_addr = pc;
+ prof->pr_ticks = 1;
+ p->p_flag |= P_OWEUPC;
+ }
+ *astmap |= (1 << id);
+ }
+}
+
+static void
+forwarded_statclock(int id, int pscnt, int *astmap)
+{
+ struct pstats *pstats;
+ long rss;
+ struct rusage *ru;
+ struct vmspace *vm;
+ int cpustate;
+ struct proc *p;
+#ifdef GPROF
+ register struct gmonparam *g;
+ int i;
+#endif
+
+ p = checkstate_curproc[id];
+ cpustate = checkstate_cpustate[id];
+
+ switch (cpustate) {
+ case CHECKSTATE_USER:
+ if (p->p_flag & P_PROFIL)
+ addugd_intr_forwarded(p, id, astmap);
+ if (pscnt > 1)
+ return;
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ break;
+ case CHECKSTATE_SYS:
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addugd_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = checkstate_pc[id] - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (pscnt > 1)
+ return;
+
+ if (!p)
+ cp_time[CP_IDLE]++;
+ else {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ }
+ break;
+ case CHECKSTATE_INTR:
+ default:
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addugd_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = checkstate_pc[id] - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (pscnt > 1)
+ return;
+ if (p)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ }
+ if (p != NULL) {
+ schedclock(p);
+
+ /* Update resource usage integrals and maximums. */
+ if ((pstats = p->p_stats) != NULL &&
+ (ru = &pstats->p_ru) != NULL &&
+ (vm = p->p_vmspace) != NULL) {
+ ru->ru_ixrss += pgtok(vm->vm_tsize);
+ ru->ru_idrss += pgtok(vm->vm_dsize);
+ ru->ru_isrss += pgtok(vm->vm_ssize);
+ rss = pgtok(vmspace_resident_count(vm));
+ if (ru->ru_maxrss < rss)
+ ru->ru_maxrss = rss;
+ }
+ }
+}
+
+#define BETTER_CLOCK_DIAGNOSTIC
+
+void
+forward_statclock(int pscnt)
+{
+ int map;
+ int id;
+ int i;
+
+ /* Kludge. We don't yet have separate locks for the interrupts
+ * and the kernel. This means that we cannot let the other processors
+ * handle complex interrupts while inhibiting them from entering
+ * the kernel in a non-interrupt context.
+ *
+ * What we can do, without changing the locking mechanisms yet,
+ * is letting the other processors handle a very simple interrupt
+ * (wich determines the processor states), and do the main
+ * work ourself.
+ */
+
+ CTR1(KTR_SMP, "forward_statclock(%d)", pscnt);
+
+ if (!smp_started || cold || panicstr)
+ return;
+
+ /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */
+
+ map = PCPU_GET(other_cpus) & ~stopped_cpus ;
+ checkstate_probed_cpus = 0;
+ if (map != 0)
+ smp_ipi_selected(map, IPI_CHECKSTATE);
+
+ i = 0;
+ while (checkstate_probed_cpus != map) {
+ /* spin */
+ i++;
+ if (i == 100000) {
+#ifdef BETTER_CLOCK_DIAGNOSTIC
+ printf("forward_statclock: checkstate %x\n",
+ checkstate_probed_cpus);
+#endif
+ break;
+ }
+ }
+
+ /*
+ * Step 2: walk through other processors processes, update ticks and
+ * profiling info.
+ */
+
+ map = 0;
+ for (id = 0; id < mp_ncpus; id++) {
+ if (id == cpuid)
+ continue;
+ if (((1 << id) & checkstate_probed_cpus) == 0)
+ continue;
+ forwarded_statclock(id, pscnt, &map);
+ }
+ if (map != 0) {
+ checkstate_need_ast |= map;
+ smp_ipi_selected(map, IPI_AST);
+ i = 0;
+ while ((checkstate_need_ast & map) != 0) {
+ /* spin */
+ i++;
+ if (i > 100000) {
+#ifdef BETTER_CLOCK_DIAGNOSTIC
+ printf("forward_statclock: dropped ast 0x%x\n",
+ checkstate_need_ast & map);
+#endif
+ break;
+ }
+ }
+ }
+}
+
+void
+forward_hardclock(int pscnt)
+{
+ int map;
+ int id;
+ struct proc *p;
+ struct pstats *pstats;
+ int i;
+
+ /* Kludge. We don't yet have separate locks for the interrupts
+ * and the kernel. This means that we cannot let the other processors
+ * handle complex interrupts while inhibiting them from entering
+ * the kernel in a non-interrupt context.
+ *
+ * What we can do, without changing the locking mechanisms yet,
+ * is letting the other processors handle a very simple interrupt
+ * (wich determines the processor states), and do the main
+ * work ourself.
+ */
+
+ CTR1(KTR_SMP, "forward_hardclock(%d)", pscnt);
+
+ if (!smp_started || cold || panicstr)
+ return;
+
+ /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */
+
+ map = PCPU_GET(other_cpus) & ~stopped_cpus ;
+ checkstate_probed_cpus = 0;
+ if (map != 0)
+ smp_ipi_selected(map, IPI_CHECKSTATE);
+
+ i = 0;
+ while (checkstate_probed_cpus != map) {
+ /* spin */
+ i++;
+ if (i == 100000) {
+#ifdef BETTER_CLOCK_DIAGNOSTIC
+ printf("forward_hardclock: checkstate %x\n",
+ checkstate_probed_cpus);
+#endif
+ breakpoint();
+ break;
+ }
+ }
+
+ /*
+ * Step 2: walk through other processors processes, update virtual
+ * timer and profiling timer. If stathz == 0, also update ticks and
+ * profiling info.
+ */
+
+ map = 0;
+ for (id = 0; id < mp_ncpus; id++) {
+ if (id == cpuid)
+ continue;
+ if (((1 << id) & checkstate_probed_cpus) == 0)
+ continue;
+ p = checkstate_curproc[id];
+ if (p) {
+ pstats = p->p_stats;
+ if (checkstate_cpustate[id] == CHECKSTATE_USER &&
+ timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
+ psignal(p, SIGVTALRM);
+ map |= (1 << id);
+ }
+ if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
+ psignal(p, SIGPROF);
+ map |= (1 << id);
+ }
+ }
+ if (stathz == 0) {
+ forwarded_statclock( id, pscnt, &map);
+ }
+ }
+ if (map != 0) {
+ checkstate_need_ast |= map;
+ smp_ipi_selected(map, IPI_AST);
+ i = 0;
+ while ((checkstate_need_ast & map) != 0) {
+ /* spin */
+ i++;
+ if (i > 100000) {
+#ifdef BETTER_CLOCK_DIAGNOSTIC
+ printf("forward_hardclock: dropped ast 0x%x\n",
+ checkstate_need_ast & map);
+#endif
+ break;
+ }
+ }
+ }
+}
+
+void
+forward_signal(struct proc *p)
+{
+ int map;
+ int id;
+ int i;
+
+ /* Kludge. We don't yet have separate locks for the interrupts
+ * and the kernel. This means that we cannot let the other processors
+ * handle complex interrupts while inhibiting them from entering
+ * the kernel in a non-interrupt context.
+ *
+ * What we can do, without changing the locking mechanisms yet,
+ * is letting the other processors handle a very simple interrupt
+ * (wich determines the processor states), and do the main
+ * work ourself.
+ */
+
+ CTR1(KTR_SMP, "forward_signal(%p)", p);
+
+ if (!smp_started || cold || panicstr)
+ return;
+ if (!forward_signal_enabled)
+ return;
+ while (1) {
+ if (p->p_stat != SRUN)
+ return;
+ id = p->p_oncpu;
+ if (id == 0xff)
+ return;
+ map = (1<<id);
+ checkstate_need_ast |= map;
+ smp_ipi_selected(map, IPI_AST);
+ i = 0;
+ while ((checkstate_need_ast & map) != 0) {
+ /* spin */
+ i++;
+ if (i > 100000) {
+#if 0
+ printf("forward_signal: dropped ast 0x%x\n",
+ checkstate_need_ast & map);
+#endif
+ break;
+ }
+ }
+ if (id == p->p_oncpu)
+ return;
+ }
+}
+
+void
+forward_roundrobin(void)
+{
+ u_int map;
+ int i;
+
+ CTR0(KTR_SMP, "forward_roundrobin()");
+
+ if (!smp_started || cold || panicstr)
+ return;
+ if (!forward_roundrobin_enabled)
+ return;
+ resched_cpus |= PCPU_GET(other_cpus);
+ map = PCPU_GET(other_cpus) & ~stopped_cpus ;
+ smp_ipi_selected(map, IPI_AST);
+ i = 0;
+ while ((checkstate_need_ast & map) != 0) {
+ /* spin */
+ i++;
+ if (i > 100000) {
+#if 0
+ printf("forward_roundrobin: dropped ast 0x%x\n",
+ checkstate_need_ast & map);
+#endif
+ break;
+ }
+ }
+}
+
+/*
+ * When called the executing CPU will send an IPI to all other CPUs
+ * requesting that they halt execution.
+ *
+ * Usually (but not necessarily) called with 'other_cpus' as its arg.
+ *
+ * - Signals all CPUs in map to stop.
+ * - Waits for each to stop.
+ *
+ * Returns:
+ * -1: error
+ * 0: NA
+ * 1: ok
+ *
+ * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
+ * from executing at same time.
+ */
+int
+stop_cpus(u_int map)
+{
+ int i;
+
+ if (!smp_started)
+ return 0;
+
+ CTR1(KTR_SMP, "stop_cpus(%x)", map);
+
+ /* send the stop IPI to all CPUs in map */
+ smp_ipi_selected(map, IPI_STOP);
+
+ i = 0;
+ while ((stopped_cpus & map) != map) {
+ /* spin */
+ i++;
+ if (i == 100000) {
+ printf("timeout stopping cpus\n");
+ break;
+ }
+ alpha_mb();
+ }
+
+ printf("stopped_cpus=%x\n", stopped_cpus);
+
+ return 1;
+}
+
+
+/*
+ * Called by a CPU to restart stopped CPUs.
+ *
+ * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
+ *
+ * - Signals all CPUs in map to restart.
+ * - Waits for each to restart.
+ *
+ * Returns:
+ * -1: error
+ * 0: NA
+ * 1: ok
+ */
+int
+restart_cpus(u_int map)
+{
+ if (!smp_started)
+ return 0;
+
+ CTR1(KTR_SMP, "restart_cpus(%x)", map);
+
+ started_cpus = map; /* signal other cpus to restart */
+ alpha_mb();
+
+ while ((stopped_cpus & map) != 0) /* wait for each to clear its bit */
+ alpha_mb();
+
+ return 1;
+}
+
+/*
+ * All-CPU rendezvous. CPUs are signalled, all execute the setup function
+ * (if specified), rendezvous, execute the action function (if specified),
+ * rendezvous again, execute the teardown function (if specified), and then
+ * resume.
+ *
+ * Note that the supplied external functions _must_ be reentrant and aware
+ * that they are running in parallel and in an unknown lock context.
+ */
+static void (*smp_rv_setup_func)(void *arg);
+static void (*smp_rv_action_func)(void *arg);
+static void (*smp_rv_teardown_func)(void *arg);
+static void *smp_rv_func_arg;
+static volatile int smp_rv_waiters[2];
+
+void
+smp_rendezvous_action(void)
+{
+ /* setup function */
+ if (smp_rv_setup_func != NULL)
+ smp_rv_setup_func(smp_rv_func_arg);
+ /* spin on entry rendezvous */
+ atomic_add_int(&smp_rv_waiters[0], 1);
+ while (smp_rv_waiters[0] < mp_ncpus)
+ ;
+ /* action function */
+ if (smp_rv_action_func != NULL)
+ smp_rv_action_func(smp_rv_func_arg);
+ /* spin on exit rendezvous */
+ atomic_add_int(&smp_rv_waiters[1], 1);
+ while (smp_rv_waiters[1] < mp_ncpus)
+ ;
+ /* teardown function */
+ if (smp_rv_teardown_func != NULL)
+ smp_rv_teardown_func(smp_rv_func_arg);
+}
+
+void
+smp_rendezvous(void (* setup_func)(void *),
+ void (* action_func)(void *),
+ void (* teardown_func)(void *),
+ void *arg)
+{
+ int s;
+
+ /* disable interrupts on this CPU, save interrupt status */
+ s = splhigh();
+
+ /* obtain rendezvous lock */
+ s_lock(&smp_rv_lock); /* XXX sleep here? NOWAIT flag? */
+
+ /* set static function pointers */
+ smp_rv_setup_func = setup_func;
+ smp_rv_action_func = action_func;
+ smp_rv_teardown_func = teardown_func;
+ smp_rv_func_arg = arg;
+ smp_rv_waiters[0] = 0;
+ smp_rv_waiters[1] = 0;
+
+ /* signal other processors, which will enter the IPI with interrupts off */
+ smp_ipi_all_but_self(IPI_RENDEZVOUS);
+
+ /* call executor function */
+ smp_rendezvous_action();
+
+ /* release lock */
+ s_unlock(&smp_rv_lock);
+
+ /* restore interrupt flag */
+ splx(s);
+}
+
+/*
+ * send an IPI to a set of cpus.
+ */
+void
+smp_ipi_selected(u_int32_t cpus, u_int64_t ipi)
+{
+ struct globaldata *globaldata;
+
+ CTR2(KTR_SMP, "smp_ipi_selected", cpus, ipi);
+ alpha_mb();
+ while (cpus) {
+ int cpuno = ffs(cpus) - 1;
+ cpus &= ~(1 << cpuno);
+
+ globaldata = cpuno_to_globaldata[cpuno];
+ if (globaldata) {
+ atomic_set_64(&globaldata->gd_pending_ipis, ipi);
+ alpha_mb();
+ CTR1(KTR_SMP, "calling alpha_pal_wripir(%d)", cpuno);
+ alpha_pal_wripir(cpuno);
+ }
+ }
+}
+
+/*
+ * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
+ */
+void
+smp_ipi_all(u_int64_t ipi)
+{
+ smp_ipi_selected(all_cpus, ipi);
+}
+
+/*
+ * send an IPI to all CPUs EXCEPT myself
+ */
+void
+smp_ipi_all_but_self(u_int64_t ipi)
+{
+ smp_ipi_selected(PCPU_GET(other_cpus), ipi);
+}
+
+/*
+ * send an IPI to myself
+ */
+void
+smp_ipi_self(u_int64_t ipi)
+{
+ smp_ipi_selected(1 << PCPU_GET(cpuno), ipi);
+}
+
+static u_int64_t
+atomic_readandclear(u_int64_t* p)
+{
+ u_int64_t v, temp;
+ __asm__ __volatile__ (
+ "wmb\n" /* ensure pending writes have drained */
+ "1:\tldq_l %0,%3\n\t" /* load current value, asserting lock */
+ "ldiq %1,0\n\t" /* value to store */
+ "stq_c %1,%2\n\t" /* attempt to store */
+ "beq %1,2f\n\t" /* if the store failed, spin */
+ "br 3f\n" /* it worked, exit */
+ "2:\tbr 1b\n" /* *p not updated, loop */
+ "3:\tmb\n" /* it worked */
+ : "=&r"(v), "=&r"(temp), "=m" (*p)
+ : "m"(*p)
+ : "memory");
+ return v;
+}
+
+/*
+ * Handle an IPI sent to this processor.
+ */
+void
+smp_handle_ipi(struct trapframe *frame)
+{
+ u_int64_t ipis = atomic_readandclear(&PCPU_GET(pending_ipis));
+ u_int64_t ipi;
+ int cpuno = PCPU_GET(cpuno);
+
+ CTR1(KTR_SMP, "smp_handle_ipi(), ipis=%x", ipis);
+ while (ipis) {
+ /*
+ * Find the lowest set bit.
+ */
+ ipi = ipis & ~(ipis - 1);
+ switch (ipi) {
+ case IPI_INVLTLB:
+ break;
+
+ case IPI_RENDEZVOUS:
+ CTR0(KTR_SMP, "IPI_RENDEZVOUS");
+ smp_rendezvous_action();
+ break;
+
+ case IPI_AST:
+ CTR0(KTR_SMP, "IPI_AST");
+ atomic_clear_int(&checkstate_need_ast, 1<<cpuno);
+ atomic_set_int(&checkstate_pending_ast, 1<<cpuno);
+ if (frame->tf_regs[FRAME_PS] & ALPHA_PSL_USERMODE)
+ ast(frame); /* XXX */
+ break;
+
+ case IPI_CHECKSTATE:
+ CTR0(KTR_SMP, "IPI_CHECKSTATE");
+ if (frame->tf_regs[FRAME_PS] & ALPHA_PSL_USERMODE)
+ checkstate_cpustate[cpuno] = CHECKSTATE_USER;
+ else if (PCPU_GET(intr_nesting_level) == 1)
+ checkstate_cpustate[cpuno] = CHECKSTATE_SYS;
+ else
+ checkstate_cpustate[cpuno] = CHECKSTATE_INTR;
+ checkstate_curproc[cpuno] = PCPU_GET(curproc);
+ atomic_set_int(&checkstate_probed_cpus, 1<<cpuno);
+ break;
+
+ case IPI_STOP:
+ CTR0(KTR_SMP, "IPI_STOP");
+ atomic_set_int(&stopped_cpus, 1<<cpuno);
+ while ((started_cpus & (1<<cpuno)) == 0)
+ alpha_mb();
+ atomic_clear_int(&started_cpus, 1<<cpuno);
+ atomic_clear_int(&stopped_cpus, 1<<cpuno);
+ break;
+ }
+ }
+
+ /*
+ * Drop console messages on the floor.
+ */
+ if (PCPU_GET(cpuno) == hwrpb->rpb_primary_cpu_id
+ && hwrpb->rpb_txrdy != 0) {
+ hwrpb->rpb_txrdy = 0;
+ alpha_mb();
+ }
+}
+
+#if 0
+
+/*
+ * Atomically compare the value stored at *p with cmpval and if the
+ * two values are equal, update the value of *p with newval. Returns
+ * zero if the compare failed, nonzero otherwise.
+ */
+u_int64_t
+atomic_cmpset_64(volatile u_int64_t* p, u_int64_t cmpval, u_int64_t newval)
+{
+ u_int64_t ret, temp;
+
+
+ printf("atomic_cmpset_64: *p=%lx, cmpval=%lx, newval=%lx\n",
+ *p, cmpval, newval);
+ __asm __volatile (
+ "1:\tldq_l %1, %5\n\t" /* load old value */
+ "cmpeq %1, %3, %0\n\t" /* compare */
+ "beq %0, 2f\n\t" /* exit if not equal */
+ "mov %4, %1\n\t" /* value to store */
+ "stq_c %1, %2\n\t" /* attempt to store */
+ "beq %1, 3f\n\t" /* if it failed, spin */
+ "2:\n" /* done */
+ ".section .text3,\"ax\"\n" /* improve branch prediction */
+ "3:\tbr 1b\n" /* try again */
+ ".previous\n"
+ : "=&r" (ret), "=r" (temp), "=m" (*p)
+ : "r" (cmpval), "r" (newval), "m" (*p)
+ : "memory");
+ printf("atomic_cmpset_64: *p=%lx\n", *p);
+
+ return ret;
+}
+
+#endif
diff --git a/sys/alpha/alpha/pmap.c b/sys/alpha/alpha/pmap.c
index 7cdf67e..2a4852f 100644
--- a/sys/alpha/alpha/pmap.c
+++ b/sys/alpha/alpha/pmap.c
@@ -171,6 +171,7 @@
#include <machine/md_var.h>
#include <machine/rpb.h>
+#include <machine/smp.h>
#ifndef PMAP_SHPGPERPROC
#define PMAP_SHPGPERPROC 200
@@ -325,9 +326,7 @@ vm_offset_t kernel_vm_end;
* Data for the ASN allocator
*/
static int pmap_maxasn;
-static int pmap_nextasn = 0;
-static u_int pmap_current_asngen = 1;
-static pmap_t pmap_active = 0;
+static pmap_t pmap_active[NCPUS];
/*
* Data for the pv entry allocation mechanism
@@ -456,16 +455,13 @@ void
pmap_bootstrap(vm_offset_t ptaddr, u_int maxasn)
{
pt_entry_t newpte;
- pt_entry_t* pte;
- vm_offset_t va;
int i;
/*
- * Setup ASNs
+ * Setup ASNs. PCPU_GET(next_asn) and PCPU_GET(current_asngen) are set
+ * up already.
*/
- pmap_nextasn = 0;
pmap_maxasn = maxasn;
- pmap_current_asngen = 1;
/*
* Allocate a level 1 map for the kernel.
@@ -550,27 +546,14 @@ pmap_bootstrap(vm_offset_t ptaddr, u_int maxasn)
kernel_pmap = &kernel_pmap_store;
kernel_pmap->pm_lev1 = Lev1map;
kernel_pmap->pm_count = 1;
- kernel_pmap->pm_active = 1;
- kernel_pmap->pm_asn = 0;
- kernel_pmap->pm_asngen = pmap_current_asngen;
- pmap_nextasn = 1;
+ kernel_pmap->pm_active = ~0;
+ kernel_pmap->pm_asn[alpha_pal_whami()].asn = 0;
+ kernel_pmap->pm_asn[alpha_pal_whami()].gen = 1;
TAILQ_INIT(&kernel_pmap->pm_pvlist);
nklev3 = NKPT;
nklev2 = 1;
/*
- * Reserve some special page table entries/VA space for temporary
- * mapping of pages.
- */
-#define SYSMAP(c, p, v, n) \
- v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
-
- va = virtual_avail;
- pte = pmap_lev3pte(kernel_pmap, va);
-
- virtual_avail = va;
-
- /*
* Set up proc0's PCB such that the ptbr points to the right place
* and has the kernel pmap's.
*/
@@ -663,23 +646,43 @@ pmap_init2()
static void
pmap_invalidate_asn(pmap_t pmap)
{
- pmap->pm_asngen = 0;
+ pmap->pm_asn[PCPU_GET(cpuno)].gen = 0;
}
+struct pmap_invalidate_page_arg {
+ pmap_t pmap;
+ vm_offset_t va;
+};
+
static void
-pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+pmap_invalidate_page_action(void *arg)
{
- if (pmap_isactive(pmap)) {
+ pmap_t pmap = ((struct pmap_invalidate_page_arg *) arg)->pmap;
+ vm_offset_t va = ((struct pmap_invalidate_page_arg *) arg)->va;
+
+ if (pmap->pm_active & (1 << PCPU_GET(cpuno))) {
ALPHA_TBIS(va);
alpha_pal_imb(); /* XXX overkill? */
- } else
+ } else {
pmap_invalidate_asn(pmap);
+ }
}
static void
-pmap_invalidate_all(pmap_t pmap)
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+ struct pmap_invalidate_page_arg arg;
+ arg.pmap = pmap;
+ arg.va = va;
+ smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *) &arg);
+}
+
+static void
+pmap_invalidate_all_action(void *arg)
{
- if (pmap_isactive(pmap)) {
+ pmap_t pmap = (pmap_t) arg;
+
+ if (pmap->pm_active & (1 << PCPU_GET(cpuno))) {
ALPHA_TBIA();
alpha_pal_imb(); /* XXX overkill? */
} else
@@ -687,24 +690,31 @@ pmap_invalidate_all(pmap_t pmap)
}
static void
+pmap_invalidate_all(pmap_t pmap)
+{
+ smp_rendezvous(0, pmap_invalidate_all_action, 0, (void *) pmap);
+}
+
+static void
pmap_get_asn(pmap_t pmap)
{
- if (pmap->pm_asngen != pmap_current_asngen) {
- if (pmap_nextasn > pmap_maxasn) {
+ if (pmap->pm_asn[PCPU_GET(cpuno)].gen != PCPU_GET(current_asngen)) {
+ if (PCPU_GET(next_asn) > pmap_maxasn) {
/*
* Start a new ASN generation.
*
* Invalidate all per-process mappings and I-cache
*/
- pmap_nextasn = 0;
- pmap_current_asngen++;
+ PCPU_GET(next_asn) = 0;
+ PCPU_GET(current_asngen)++;
+ PCPU_GET(current_asngen) &= (1 << 24) - 1;
- if (pmap_current_asngen == 0) {
+ if (PCPU_GET(current_asngen) == 0) {
/*
- * Clear the pm_asngen of all pmaps.
+ * Clear the pm_asn[].gen of all pmaps.
* This is safe since it is only called from
* pmap_activate after it has deactivated
- * the old pmap.
+ * the old pmap and it only affects this cpu.
*/
struct proc *p;
pmap_t tpmap;
@@ -712,11 +722,11 @@ pmap_get_asn(pmap_t pmap)
#ifdef PMAP_DIAGNOSTIC
printf("pmap_get_asn: generation rollover\n");
#endif
- pmap_current_asngen = 1;
+ PCPU_GET(current_asngen) = 1;
LIST_FOREACH(p, &allproc, p_list) {
if (p->p_vmspace) {
tpmap = vmspace_pmap(p->p_vmspace);
- tpmap->pm_asngen = 0;
+ tpmap->pm_asn[PCPU_GET(cpuno)].gen = 0;
}
}
}
@@ -729,8 +739,8 @@ pmap_get_asn(pmap_t pmap)
ALPHA_TBIAP();
alpha_pal_imb(); /* XXX overkill? */
}
- pmap->pm_asn = pmap_nextasn++;
- pmap->pm_asngen = pmap_current_asngen;
+ pmap->pm_asn[PCPU_GET(cpuno)].asn = PCPU_GET(next_asn)++;
+ pmap->pm_asn[PCPU_GET(cpuno)].gen = PCPU_GET(current_asngen);
}
}
@@ -1163,13 +1173,17 @@ void
pmap_pinit0(pmap)
struct pmap *pmap;
{
+ int i;
+
pmap->pm_lev1 = Lev1map;
pmap->pm_flags = 0;
pmap->pm_count = 1;
pmap->pm_ptphint = NULL;
pmap->pm_active = 0;
- pmap->pm_asn = 0;
- pmap->pm_asngen = 0;
+ for (i = 0; i < NCPUS; i++) {
+ pmap->pm_asn[i].asn = 0;
+ pmap->pm_asn[i].gen = 0;
+ }
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
}
@@ -1183,6 +1197,7 @@ pmap_pinit(pmap)
register struct pmap *pmap;
{
vm_page_t lev1pg;
+ int i;
/*
* allocate object for the ptes
@@ -1215,8 +1230,10 @@ pmap_pinit(pmap)
pmap->pm_count = 1;
pmap->pm_ptphint = NULL;
pmap->pm_active = 0;
- pmap->pm_asn = 0;
- pmap->pm_asngen = 0;
+ for (i = 0; i < NCPUS; i++) {
+ pmap->pm_asn[i].asn = 0;
+ pmap->pm_asn[i].gen = 0;
+ }
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
}
@@ -2994,21 +3011,22 @@ pmap_activate(struct proc *p)
pmap = vmspace_pmap(p->p_vmspace);
- if (pmap_active && pmap != pmap_active) {
- pmap_active->pm_active = 0;
- pmap_active = 0;
+ if (pmap_active[PCPU_GET(cpuno)] && pmap != pmap_active[PCPU_GET(cpuno)]) {
+ atomic_clear_32(&pmap_active[PCPU_GET(cpuno)]->pm_active,
+ 1 << PCPU_GET(cpuno));
+ pmap_active[PCPU_GET(cpuno)] = 0;
}
p->p_addr->u_pcb.pcb_hw.apcb_ptbr =
ALPHA_K0SEG_TO_PHYS((vm_offset_t) pmap->pm_lev1) >> PAGE_SHIFT;
- if (pmap->pm_asngen != pmap_current_asngen)
+ if (pmap->pm_asn[PCPU_GET(cpuno)].gen != PCPU_GET(current_asngen))
pmap_get_asn(pmap);
- pmap_active = pmap;
- pmap->pm_active = 1; /* XXX use bitmap for SMP */
+ pmap_active[PCPU_GET(cpuno)] = pmap;
+ atomic_set_32(&pmap->pm_active, 1 << PCPU_GET(cpuno));
- p->p_addr->u_pcb.pcb_hw.apcb_asn = pmap->pm_asn;
+ p->p_addr->u_pcb.pcb_hw.apcb_asn = pmap->pm_asn[PCPU_GET(cpuno)].asn;
if (p == curproc) {
alpha_pal_swpctx((u_long)p->p_md.md_pcbpaddr);
@@ -3020,8 +3038,8 @@ pmap_deactivate(struct proc *p)
{
pmap_t pmap;
pmap = vmspace_pmap(p->p_vmspace);
- pmap->pm_active = 0;
- pmap_active = 0;
+ atomic_clear_32(&pmap->pm_active, 1 << PCPU_GET(cpuno));
+ pmap_active[PCPU_GET(cpuno)] = 0;
}
vm_offset_t
diff --git a/sys/alpha/alpha/prom.c b/sys/alpha/alpha/prom.c
index 5880b2c..805539a 100644
--- a/sys/alpha/alpha/prom.c
+++ b/sys/alpha/alpha/prom.c
@@ -57,7 +57,6 @@ int prom_mapped = 1; /* Is PROM still mapped? */
pt_entry_t rom_pte, saved_pte[1]; /* XXX */
static pt_entry_t *rom_lev1map __P((void));
-extern struct pcb* curpcb;
extern pt_entry_t* Lev1map;
static void prom_cache_sync __P((void));
diff --git a/sys/alpha/alpha/support.s b/sys/alpha/alpha/support.s
index 2e5ff39..2a87327 100644
--- a/sys/alpha/alpha/support.s
+++ b/sys/alpha/alpha/support.s
@@ -71,7 +71,7 @@
beq t1, fusufault
lda t0, fusufault /* trap faults */
- ldq t2, curproc
+ ldq t2, GD_CURPROC(globalp)
ldq t2, P_ADDR(t2)
stq t0, U_PCB_ONFAULT(t2)
@@ -91,7 +91,7 @@
beq t1, fusufault
lda t0, fusufault /* trap faults */
- ldq t2, curproc
+ ldq t2, GD_CURPROC(globalp)
ldq t2, P_ADDR(t2)
stq t0, U_PCB_ONFAULT(t2)
@@ -116,7 +116,7 @@
beq t1, fusufault
lda t0, fusufault /* trap faults */
- ldq t2, curproc
+ ldq t2, GD_CURPROC(globalp)
ldq t2, P_ADDR(t2)
stq t0, U_PCB_ONFAULT(t2)
@@ -135,7 +135,7 @@
beq t1, fusufault
lda t0, fusufault /* trap faults */
- ldq t2, curproc
+ ldq t2, GD_CURPROC(globalp)
ldq t2, P_ADDR(t2)
stq t0, U_PCB_ONFAULT(t2)
@@ -153,7 +153,7 @@
END(suibyte)
LEAF(fusufault, 0)
- ldq t0, curproc
+ ldq t0, GD_CURPROC(globalp)
ldq t0, P_ADDR(t0)
stq zero, U_PCB_ONFAULT(t0)
ldiq v0, -1
@@ -221,13 +221,13 @@ NESTED(copyinstr, 4, 16, ra, 0, 0)
beq t1, copyerr /* if it's not, error out. */
lda v0, copyerr /* set up fault handler. */
.set noat
- ldq at_reg, curproc
+ ldq at_reg, GD_CURPROC(globalp)
ldq at_reg, P_ADDR(at_reg)
stq v0, U_PCB_ONFAULT(at_reg)
.set at
CALL(copystr) /* do the copy. */
.set noat
- ldq at_reg, curproc /* kill the fault handler. */
+ ldq at_reg, GD_CURPROC(globalp) /* kill the fault handler. */
ldq at_reg, P_ADDR(at_reg)
stq zero, U_PCB_ONFAULT(at_reg)
.set at
@@ -245,13 +245,13 @@ NESTED(copyoutstr, 4, 16, ra, 0, 0)
beq t1, copyerr /* if it's not, error out. */
lda v0, copyerr /* set up fault handler. */
.set noat
- ldq at_reg, curproc
+ ldq at_reg, GD_CURPROC(globalp)
ldq at_reg, P_ADDR(at_reg)
stq v0, U_PCB_ONFAULT(at_reg)
.set at
CALL(copystr) /* do the copy. */
.set noat
- ldq at_reg, curproc /* kill the fault handler. */
+ ldq at_reg, GD_CURPROC(globalp) /* kill the fault handler. */
ldq at_reg, P_ADDR(at_reg)
stq zero, U_PCB_ONFAULT(at_reg)
.set at
@@ -423,13 +423,13 @@ bcopy_da_finish:
insql t4,a1,t4
addq a1,a2,a4
ldq_u t6,0(a1)
- ldq_u t7,-1(a4)
+ ldq_u t8,-1(a4)
bic t6,t4,t6
- bic t7,t5,t7
+ bic t8,t5,t8
and t2,t4,t2
and t3,t5,t3
or t2,t6,t2
- or t3,t7,t3
+ or t3,t8,t3
stq_u t3,-1(a4)
stq_u t2,0(a1)
RET
@@ -513,13 +513,13 @@ NESTED(copyin, 3, 16, ra, 0, 0)
beq t1, copyerr /* if it's not, error out. */
lda v0, copyerr /* set up fault handler. */
.set noat
- ldq at_reg, curproc
+ ldq at_reg, GD_CURPROC(globalp)
ldq at_reg, P_ADDR(at_reg)
stq v0, U_PCB_ONFAULT(at_reg)
.set at
CALL(bcopy) /* do the copy. */
.set noat
- ldq at_reg, curproc /* kill the fault handler. */
+ ldq at_reg, GD_CURPROC(globalp) /* kill the fault handler. */
ldq at_reg, P_ADDR(at_reg)
stq zero, U_PCB_ONFAULT(at_reg)
.set at
@@ -538,13 +538,13 @@ NESTED(copyout, 3, 16, ra, 0, 0)
beq t1, copyerr /* if it's not, error out. */
lda v0, copyerr /* set up fault handler. */
.set noat
- ldq at_reg, curproc
+ ldq at_reg, GD_CURPROC(globalp)
ldq at_reg, P_ADDR(at_reg)
stq v0, U_PCB_ONFAULT(at_reg)
.set at
CALL(bcopy) /* do the copy. */
.set noat
- ldq at_reg, curproc /* kill the fault handler. */
+ ldq at_reg, GD_CURPROC(globalp) /* kill the fault handler. */
ldq at_reg, P_ADDR(at_reg)
stq zero, U_PCB_ONFAULT(at_reg)
.set at
@@ -555,7 +555,7 @@ NESTED(copyout, 3, 16, ra, 0, 0)
END(copyout)
LEAF(copyerr, 0)
- ldq t0, curproc
+ ldq t0, GD_CURPROC(globalp)
ldq t0, P_ADDR(t0)
stq zero, U_PCB_ONFAULT(t0) /* reset fault handler. */
ldq ra, (16-8)(sp) /* restore ra. */
diff --git a/sys/alpha/alpha/swtch.s b/sys/alpha/alpha/swtch.s
index ee191eb..f457a34 100644
--- a/sys/alpha/alpha/swtch.s
+++ b/sys/alpha/alpha/swtch.s
@@ -28,7 +28,9 @@
* rights to redistribute these changes.
*/
+#define _LOCORE
#include <machine/asm.h>
+#include <machine/mutex.h>
#include "assym.s"
/**************************************************************************/
@@ -39,7 +41,7 @@
*/
#define SWITCH_CONTEXT \
/* Make a note of the context we're running on. */ \
- stq a0, curpcb; \
+ stq a0, GD_CURPCB(globalp); \
\
/* Swap in the new context. */ \
call_pal PAL_OSF1_swpctx
@@ -86,34 +88,16 @@ IMPORT(want_resched, 4)
IMPORT(Lev1map, 8)
/*
- * When no processes are on the runq, cpu_switch branches to idle
- * to wait for something to come ready.
- * Note: this is really a part of cpu_switch() but defined here for kernel
- * profiling.
- */
-LEAF(idle, 0)
- br pv, Lidle1
-Lidle1: LDGP(pv)
- stq zero, switchtime /* zero switchtime.tv_sec */
- stq zero, curproc /* curproc <- NULL for stats */
- mov zero, a0 /* enable all interrupts */
- call_pal PAL_OSF1_swpipl
-Lidle2:
- CALL(procrunnable)
- beq v0, Lidle2
- ldiq a0, ALPHA_PSL_IPL_HIGH /* disable all interrupts */
- call_pal PAL_OSF1_swpipl
- jmp zero, sw1 /* jump back into the fray */
- END(idle)
-
-/*
* cpu_switch()
* Find the highest priority process and resume it.
*/
LEAF(cpu_switch, 1)
LDGP(pv)
/* do an inline savectx(), to save old context */
+ ldq a0, GD_CURPROC(globalp)
ldq a1, P_ADDR(a0)
+ ldl t0, sched_lock+MTX_RECURSE /* save sched_lock state */
+ stl t0, U_PCB_SCHEDNEST(a1)
/* NOTE: ksp is stored by the swpctx */
stq s0, U_PCB_CONTEXT+(0 * 8)(a1) /* store s0 - s6 */
stq s1, U_PCB_CONTEXT+(1 * 8)(a1)
@@ -129,16 +113,12 @@ LEAF(cpu_switch, 1)
mov a0, s0 /* save old curproc */
mov a1, s1 /* save old U-area */
- CALL(procrunnable) /* anything to run? */
- beq v0, idle /* and if none, go idle */
-
ldiq a0, ALPHA_PSL_IPL_HIGH /* disable all interrupts */
call_pal PAL_OSF1_swpipl
sw1:
br pv, Lcs1
Lcs1: LDGP(pv)
- CALL(chooseproc)
- beq v0, idle
+ CALL(chooseproc) /* can't return NULL */
mov v0, s2
ldq s3, P_MD_PCBPADDR(s2) /* save new pcbpaddr */
@@ -194,7 +174,7 @@ Lcs7:
* because we might have re-entered cpu_switch() from idle(),
* in which case curproc would be NULL.
*/
- stq s2, curproc /* curproc = p */
+ stq s2, GD_CURPROC(globalp) /* curproc = p */
stl zero, want_resched /* we've rescheduled */
/*
@@ -212,6 +192,10 @@ Lcs7:
ldq s5, U_PCB_CONTEXT+(5 * 8)(t0)
ldq s6, U_PCB_CONTEXT+(6 * 8)(t0)
ldq ra, U_PCB_CONTEXT+(7 * 8)(t0) /* restore ra */
+ ldl t1, U_PCB_SCHEDNEST(t0)
+ stl t1, sched_lock+MTX_RECURSE /* restore lock */
+ ldq t1, GD_CURPROC(globalp)
+ stq t1, sched_lock+MTX_LOCK
ldq a0, U_PCB_CONTEXT+(8 * 8)(t0) /* restore ipl */
and a0, ALPHA_PSL_IPL_MASK, a0
call_pal PAL_OSF1_swpipl
@@ -231,6 +215,7 @@ Lcs7:
* pointer to the executing process's proc structure.
*/
LEAF(switch_trampoline, 0)
+ MTX_EXIT(sched_lock)
mov s0, pv
mov s1, ra
mov s2, a0
@@ -266,7 +251,7 @@ Lchkast:
and s1, ALPHA_PSL_USERMODE, t0 /* are we returning to user? */
beq t0, Lrestoreregs /* no: just return */
- ldl t2, astpending /* AST pending? */
+ ldl t2, GD_ASTPENDING(globalp) /* AST pending? */
beq t2, Lrestoreregs /* no: return */
/* We've got an AST. Handle it. */
@@ -277,7 +262,7 @@ Lchkast:
Lrestoreregs:
/* set the hae register if this process has specified a value */
- ldq t0, curproc
+ ldq t0, GD_CURPROC(globalp)
beq t0, Lnohae
ldq t1, P_MD_FLAGS(t0)
and t1, MDP_HAEUSED
diff --git a/sys/alpha/alpha/synch_machdep.c b/sys/alpha/alpha/synch_machdep.c
new file mode 100644
index 0000000..a3077e9
--- /dev/null
+++ b/sys/alpha/alpha/synch_machdep.c
@@ -0,0 +1,529 @@
+/*-
+ * Copyright (c) 1997, 1998 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
+ * $FreeBSD$
+ */
+
+#define MTX_STRS /* define common strings */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <machine/atomic.h>
+#include <machine/clock.h>
+#include <machine/cpu.h>
+#include <machine/mutex.h>
+
+/* All mutii in system (used for debug/panic) */
+mtx_t all_mtx = { MTX_UNOWNED, 0, 0, "All muti queue head",
+ TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
+ { NULL, NULL }, &all_mtx, &all_mtx
+#ifdef SMP_DEBUG
+ , NULL, { NULL, NULL }, NULL, 0
+#endif
+};
+
+int mtx_cur_cnt;
+int mtx_max_cnt;
+
+extern void _mtx_enter_giant_def(void);
+extern void _mtx_exit_giant_def(void);
+
+static void propagate_priority(struct proc *) __unused;
+
+#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED)
+#define mtx_owner(m) (mtx_unowned(m) ? NULL \
+ : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
+
+#define RETIP(x) *(((u_int64_t *)(&x)) - 1)
+#define SET_PRIO(p, pri) (p)->p_priority = (pri)
+
+/*
+ * XXX Temporary, for use from assembly language
+ */
+
+void
+_mtx_enter_giant_def(void)
+{
+
+ mtx_enter(&Giant, MTX_DEF);
+}
+
+void
+_mtx_exit_giant_def(void)
+{
+
+ mtx_exit(&Giant, MTX_DEF);
+}
+
+static void
+propagate_priority(struct proc *p)
+{
+ int pri = p->p_priority;
+ mtx_t *m = p->p_blocked;
+
+ for (;;) {
+ struct proc *p1;
+
+ p = mtx_owner(m);
+
+ if (p == NULL) {
+ /*
+ * This really isn't quite right. Really
+ * ought to bump priority of process that
+ * next axcquires the mutex.
+ */
+ MPASS(m->mtx_lock == MTX_CONTESTED);
+ return;
+ }
+ MPASS(p->p_magic == P_MAGIC);
+ if (p->p_priority <= pri)
+ return;
+ /*
+ * If lock holder is actually running just bump priority.
+ */
+ if (TAILQ_NEXT(p, p_procq) == NULL) {
+ SET_PRIO(p, pri);
+ return;
+ }
+ /*
+ * If on run queue move to new run queue, and
+ * quit. Otherwise pick up mutex p is blocked on
+ */
+ if ((m = p->p_blocked) == NULL) {
+ remrunqueue(p);
+ SET_PRIO(p, pri);
+ setrunqueue(p);
+ return;
+ }
+ /*
+ * Check if the proc needs to be moved up on
+ * the blocked chain
+ */
+ if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
+ p1->p_priority <= pri)
+ continue;
+
+ /*
+ * Remove proc from blocked chain
+ */
+ TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
+ TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
+ MPASS(p1->p_magic == P_MAGIC);
+ if (p1->p_priority > pri)
+ break;
+ }
+ if (p1)
+ TAILQ_INSERT_BEFORE(p1, p, p_procq);
+ else
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
+ CTR4(KTR_LOCK,
+ "propagate priority: p 0x%x moved before 0x%x on [0x%x] %s",
+ p, p1, m, m->mtx_description);
+ }
+}
+
+void
+mtx_enter_hard(mtx_t *m, int type, int ipl)
+{
+ struct proc *p = CURPROC;
+
+ switch (type) {
+ case MTX_DEF:
+ if ((m->mtx_lock & MTX_FLAGMASK) == (u_int64_t)p) {
+ m->mtx_recurse++;
+ atomic_set_64(&m->mtx_lock, MTX_RECURSE);
+ CTR1(KTR_LOCK, "mtx_enter: 0x%x recurse", m);
+ return;
+ }
+ CTR3(KTR_LOCK, "mtx_enter: 0x%x contested (lock=%x) [0x%x]",
+ m, m->mtx_lock, RETIP(m));
+ while (!atomic_cmpset_64(&m->mtx_lock, MTX_UNOWNED,
+ (u_int64_t)p)) {
+ int v;
+ struct timeval tv;
+ struct proc *p1;
+
+ mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
+ /*
+ * check if the lock has been released while
+ * waiting for the schedlock.
+ */
+ if ((v = m->mtx_lock) == MTX_UNOWNED) {
+ mtx_exit(&sched_lock, MTX_SPIN);
+ continue;
+ }
+ /*
+ * The mutex was marked contested on release. This
+ * means that there are processes blocked on it.
+ */
+ if (v == MTX_CONTESTED) {
+ p1 = TAILQ_FIRST(&m->mtx_blocked);
+ m->mtx_lock = (u_int64_t)p | MTX_CONTESTED;
+ if (p1->p_priority < p->p_priority) {
+ SET_PRIO(p, p1->p_priority);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ return;
+ }
+ /*
+ * If the mutex isn't already contested and
+ * a failure occurs setting the contested bit the
+ * mutex was either release or the
+ * state of the RECURSION bit changed.
+ */
+ if ((v & MTX_CONTESTED) == 0 &&
+ !atomic_cmpset_64(&m->mtx_lock, v,
+ v | MTX_CONTESTED)) {
+ mtx_exit(&sched_lock, MTX_SPIN);
+ continue;
+ }
+
+ /* We definitely have to sleep for this lock */
+ mtx_assert(m, MA_NOTOWNED);
+
+ printf("m->mtx_lock=%lx\n", m->mtx_lock);
+
+#ifdef notyet
+ /*
+ * If we're borrowing an interrupted thread's VM
+ * context must clean up before going to sleep.
+ */
+ if (p->p_flag & (P_ITHD | P_SITHD)) {
+ ithd_t *it = (ithd_t *)p;
+
+ if (it->it_interrupted) {
+ CTR2(KTR_LOCK,
+ "mtx_enter: 0x%x interrupted 0x%x",
+ it, it->it_interrupted);
+ intr_thd_fixup(it);
+ }
+ }
+#endif
+
+ /* Put us on the list of procs blocked on this mutex */
+ if (TAILQ_EMPTY(&m->mtx_blocked)) {
+ p1 = (struct proc *)(m->mtx_lock &
+ MTX_FLAGMASK);
+ LIST_INSERT_HEAD(&p1->p_contested, m,
+ mtx_contested);
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
+ } else {
+ TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
+ if (p1->p_priority > p->p_priority)
+ break;
+ if (p1)
+ TAILQ_INSERT_BEFORE(p1, p, p_procq);
+ else
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
+ p_procq);
+ }
+
+ p->p_blocked = m; /* Who we're blocked on */
+#ifdef notyet
+ propagate_priority(p);
+#endif
+ CTR3(KTR_LOCK, "mtx_enter: p 0x%x blocked on [0x%x] %s",
+ p, m, m->mtx_description);
+ /*
+ * cloaned from mi_switch
+ */
+ microtime(&tv);
+ p->p_runtime += (tv.tv_usec -
+ PCPU_GET(switchtime.tv_usec)) +
+ (tv.tv_sec -
+ PCPU_GET(switchtime.tv_sec)) *
+ (int64_t)1000000;
+ PCPU_SET(switchtime.tv_usec, tv.tv_usec);
+ PCPU_SET(switchtime.tv_sec, tv.tv_sec);
+ cpu_switch();
+ if (PCPU_GET(switchtime.tv_sec) == 0)
+ microtime(&GLOBALP->gd_switchtime);
+ PCPU_SET(switchticks, ticks);
+ CTR3(KTR_LOCK,
+ "mtx_enter: p 0x%x free from blocked on [0x%x] %s",
+ p, m, m->mtx_description);
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+ alpha_mb();
+ return;
+ case MTX_SPIN:
+ case MTX_SPIN | MTX_FIRST:
+ case MTX_SPIN | MTX_TOPHALF:
+ {
+ int i = 0;
+
+ if (m->mtx_lock == (u_int64_t)p) {
+ m->mtx_recurse++;
+ return;
+ }
+ CTR1(KTR_LOCK, "mtx_enter: 0x%x spinning", m);
+ for (;;) {
+ if (atomic_cmpset_64(&m->mtx_lock, MTX_UNOWNED,
+ (u_int64_t)p)) {
+ alpha_mb();
+ break;
+ }
+ while (m->mtx_lock != MTX_UNOWNED) {
+ if (i++ < 1000000)
+ continue;
+ if (i++ < 6000000)
+ DELAY (1);
+ else
+ panic("spin lock > 5 seconds");
+ }
+ }
+
+#ifdef SMP_DEBUG
+ if (type != MTX_SPIN)
+ m->mtx_saveipl = 0xbeefface;
+ else
+#endif
+ m->mtx_saveipl = ipl;
+ CTR1(KTR_LOCK, "mtx_enter: 0x%x spin done", m);
+ return;
+ }
+ }
+}
+
+void
+mtx_exit_hard(mtx_t *m, int type)
+{
+ struct proc *p, *p1;
+ mtx_t *m1;
+ int pri;
+
+ switch (type) {
+ case MTX_DEF:
+ case MTX_DEF | MTX_NOSWITCH:
+ if (m->mtx_recurse != 0) {
+ if (--(m->mtx_recurse) == 0)
+ atomic_clear_64(&m->mtx_lock, MTX_RECURSE);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%x unrecurse", m);
+ return;
+ }
+ mtx_enter(&sched_lock, MTX_SPIN);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%x contested", m);
+ p = CURPROC;
+ p1 = TAILQ_FIRST(&m->mtx_blocked);
+ MPASS(p->p_magic == P_MAGIC);
+ MPASS(p1->p_magic == P_MAGIC);
+ TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
+ if (TAILQ_EMPTY(&m->mtx_blocked)) {
+ LIST_REMOVE(m, mtx_contested);
+ atomic_cmpset_64(&m->mtx_lock, m->mtx_lock,
+ MTX_UNOWNED);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%x not held", m);
+ } else
+ m->mtx_lock = MTX_CONTESTED;
+ pri = MAXPRI;
+ LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
+ int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
+ if (cp < pri)
+ pri = cp;
+ }
+ if (pri > p->p_nativepri)
+ pri = p->p_nativepri;
+ SET_PRIO(p, pri);
+ CTR2(KTR_LOCK, "mtx_exit: 0x%x contested setrunqueue 0x%x",
+ m, p1);
+ p1->p_blocked = NULL;
+ setrunqueue(p1);
+ if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
+#ifdef notyet
+ if (p->p_flag & (P_ITHD | P_SITHD)) {
+ ithd_t *it = (ithd_t *)p;
+
+ if (it->it_interrupted) {
+ CTR2(KTR_LOCK,
+ "mtx_exit: 0x%x interruped 0x%x",
+ it, it->it_interrupted);
+ intr_thd_fixup(it);
+ }
+ }
+#endif
+ setrunqueue(p);
+ CTR2(KTR_LOCK, "mtx_exit: 0x%x switching out lock=0x%x",
+ m, m->mtx_lock);
+ cpu_switch();
+ CTR2(KTR_LOCK, "mtx_exit: 0x%x resuming lock=0x%x",
+ m, m->mtx_lock);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ return;
+ case MTX_SPIN:
+ case MTX_SPIN | MTX_FIRST:
+ if (m->mtx_recurse != 0) {
+ m->mtx_recurse--;
+ return;
+ }
+ alpha_mb();
+ if (atomic_cmpset_64(&m->mtx_lock, CURTHD, MTX_UNOWNED)) {
+ MPASS(m->mtx_saveipl != 0xbeefface);
+ alpha_pal_swpipl(m->mtx_saveipl);
+ return;
+ }
+ panic("unsucuessful release of spin lock");
+ case MTX_SPIN | MTX_TOPHALF:
+ if (m->mtx_recurse != 0) {
+ m->mtx_recurse--;
+ return;
+ }
+ alpha_mb();
+ if (atomic_cmpset_64(&m->mtx_lock, CURTHD, MTX_UNOWNED))
+ return;
+ panic("unsucuessful release of spin lock");
+ default:
+ panic("mtx_exit_hard: unsupported type 0x%x\n", type);
+ }
+}
+
+#define MV_DESTROY 0 /* validate before destory */
+#define MV_INIT 1 /* validate before init */
+
+#ifdef SMP_DEBUG
+
+int mtx_validate __P((mtx_t *, int));
+
+int
+mtx_validate(mtx_t *m, int when)
+{
+ mtx_t *mp;
+ int i;
+ int retval = 0;
+
+ if (m == &all_mtx || cold)
+ return 0;
+
+ mtx_enter(&all_mtx, MTX_DEF);
+ ASS(kernacc((caddr_t)all_mtx.mtx_next, 4, 1) == 1);
+ ASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
+ for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
+ if (kernacc((caddr_t)mp->mtx_next, 4, 1) != 1) {
+ panic("mtx_validate: mp=%p mp->mtx_next=%p",
+ mp, mp->mtx_next);
+ }
+ i++;
+ if (i > mtx_cur_cnt) {
+ panic("mtx_validate: too many in chain, known=%d\n",
+ mtx_cur_cnt);
+ }
+ }
+ ASS(i == mtx_cur_cnt);
+ switch (when) {
+ case MV_DESTROY:
+ for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
+ if (mp == m)
+ break;
+ ASS(mp == m);
+ break;
+ case MV_INIT:
+ for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
+ if (mp == m) {
+ /*
+ * Not good. This mutex already exits
+ */
+ retval = 1;
+#if 1
+ printf("re-initing existing mutex %s\n",
+ m->mtx_description);
+ ASS(m->mtx_lock == MTX_UNOWNED);
+ retval = 1;
+#else
+ panic("re-initing existing mutex %s",
+ m->mtx_description);
+#endif
+ }
+ }
+ mtx_exit(&all_mtx, MTX_DEF);
+ return (retval);
+}
+#endif
+
+void
+mtx_init(mtx_t *m, char *t, int flag)
+{
+
+ CTR2(KTR_LOCK, "mtx_init 0x%x (%s)", m, t);
+#ifdef SMP_DEBUG
+ if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */
+ return;
+#endif
+ bzero((void *)m, sizeof *m);
+ TAILQ_INIT(&m->mtx_blocked);
+ m->mtx_description = t;
+ m->mtx_lock = MTX_UNOWNED;
+ /* Put on all mutex queue */
+ mtx_enter(&all_mtx, MTX_DEF);
+ m->mtx_next = &all_mtx;
+ m->mtx_prev = all_mtx.mtx_prev;
+ m->mtx_prev->mtx_next = m;
+ all_mtx.mtx_prev = m;
+ if (++mtx_cur_cnt > mtx_max_cnt)
+ mtx_max_cnt = mtx_cur_cnt;
+ mtx_exit(&all_mtx, MTX_DEF);
+ witness_init(m, flag);
+}
+
+void
+mtx_destroy(mtx_t *m)
+{
+
+ CTR2(KTR_LOCK, "mtx_destroy 0x%x (%s)", m, m->mtx_description);
+#ifdef SMP_DEBUG
+ if (m->mtx_next == NULL)
+ panic("mtx_destroy: %p (%s) already destroyed",
+ m, m->mtx_description);
+
+ if (!mtx_owned(m)) {
+ ASS(m->mtx_lock == MTX_UNOWNED);
+ } else {
+ ASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
+ }
+ mtx_validate(m, MV_DESTROY); /* diagnostic */
+#endif
+
+#ifdef WITNESS
+ if (m->mtx_witness)
+ witness_destroy(m);
+#endif /* WITNESS */
+
+ /* Remove from the all mutex queue */
+ mtx_enter(&all_mtx, MTX_DEF);
+ m->mtx_next->mtx_prev = m->mtx_prev;
+ m->mtx_prev->mtx_next = m->mtx_next;
+#ifdef SMP_DEBUG
+ m->mtx_next = m->mtx_prev = NULL;
+#endif
+ mtx_cur_cnt--;
+ mtx_exit(&all_mtx, MTX_DEF);
+}
diff --git a/sys/alpha/alpha/trap.c b/sys/alpha/alpha/trap.c
index a21d532..072c5f3 100644
--- a/sys/alpha/alpha/trap.c
+++ b/sys/alpha/alpha/trap.c
@@ -35,8 +35,9 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <machine/mutex.h>
+#include <sys/ktr.h>
#include <sys/sysproto.h>
-#include <sys/signalvar.h>
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/exec.h>
@@ -58,6 +59,8 @@
#include <machine/reg.h>
#include <machine/pal.h>
#include <machine/fpu.h>
+#include <machine/smp.h>
+#include <machine/mutex.h>
#ifdef KTRACE
#include <sys/uio.h>
@@ -69,8 +72,6 @@
#endif
u_int32_t want_resched;
-u_int32_t astpending;
-struct proc *fpcurproc; /* current user of the FPU */
void userret __P((struct proc *, u_int64_t, u_quad_t));
@@ -201,6 +202,11 @@ trap(a0, a1, a2, entry, framep)
u_quad_t sticks;
int user;
+ /*
+ * Find our per-cpu globals.
+ */
+ globalp = (struct globaldata *) alpha_pal_rdval();
+
cnt.v_trap++;
p = curproc;
ucode = 0;
@@ -233,9 +239,12 @@ trap(a0, a1, a2, entry, framep)
* and per-process unaligned-access-handling flags).
*/
if (user) {
- if ((i = unaligned_fixup(a0, a1, a2, p)) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ if ((i = unaligned_fixup(a0, a1, a2, p)) == 0) {
+ mtx_exit(&Giant, MTX_DEF);
goto out;
-
+ }
+ mtx_exit(&Giant, MTX_DEF);
ucode = a0; /* VA */
break;
}
@@ -259,9 +268,13 @@ trap(a0, a1, a2, entry, framep)
* is not requested or if the completion fails.
*/
if (user) {
+ mtx_enter(&Giant, MTX_DEF);
if (a0 & EXCSUM_SWC)
- if (fp_software_completion(a1, p))
+ if (fp_software_completion(a1, p)) {
+ mtx_exit(&Giant, MTX_DEF);
goto out;
+ }
+ mtx_exit(&Giant, MTX_DEF);
i = SIGFPE;
ucode = a0; /* exception summary */
break;
@@ -364,6 +377,7 @@ trap(a0, a1, a2, entry, framep)
vm_prot_t ftype = 0;
int rv;
+ mtx_enter(&Giant, MTX_DEF);
/*
* If it was caused by fuswintr or suswintr,
* just punt. Note that we check the faulting
@@ -379,6 +393,7 @@ trap(a0, a1, a2, entry, framep)
framep->tf_regs[FRAME_PC] =
p->p_addr->u_pcb.pcb_onfault;
p->p_addr->u_pcb.pcb_onfault = 0;
+ mtx_exit(&Giant, MTX_DEF);
goto out;
}
@@ -489,9 +504,11 @@ trap(a0, a1, a2, entry, framep)
rv = KERN_INVALID_ADDRESS;
}
if (rv == KERN_SUCCESS) {
+ mtx_exit(&Giant, MTX_DEF);
goto out;
}
+ mtx_exit(&Giant, MTX_DEF);
if (!user) {
/* Check for copyin/copyout fault */
if (p != NULL &&
@@ -573,6 +590,12 @@ syscall(code, framep)
u_int64_t args[10]; /* XXX */
u_int hidden = 0, nargs;
+ /*
+ * Find our per-cpu globals.
+ */
+ globalp = (struct globaldata *) alpha_pal_rdval();
+ mtx_enter(&Giant, MTX_DEF);
+
framep->tf_regs[FRAME_TRAPARG_A0] = 0;
framep->tf_regs[FRAME_TRAPARG_A1] = 0;
framep->tf_regs[FRAME_TRAPARG_A2] = 0;
@@ -693,6 +716,7 @@ syscall(code, framep)
* is not the case, this code will need to be revisited.
*/
STOPEVENT(p, S_SCX, code);
+ mtx_exit(&Giant, MTX_DEF);
}
/*
@@ -712,6 +736,7 @@ child_return(p)
if (KTRPOINT(p, KTR_SYSRET))
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
#endif
+ mtx_exit(&Giant, MTX_DEF);
}
/*
@@ -725,6 +750,8 @@ ast(framep)
register struct proc *p;
u_quad_t sticks;
+ mtx_enter(&Giant, MTX_DEF);
+
p = curproc;
sticks = p->p_sticks;
p->p_md.md_tf = framep;
@@ -734,7 +761,7 @@ ast(framep)
cnt.v_soft++;
- astpending = 0;
+ PCPU_SET(astpending, 0);
if (p->p_flag & P_OWEUPC) {
p->p_flag &= ~P_OWEUPC;
addupc_task(p, p->p_stats->p_prof.pr_addr,
@@ -742,6 +769,8 @@ ast(framep)
}
userret(p, framep->tf_regs[FRAME_PC], sticks);
+
+ mtx_exit(&Giant, MTX_DEF);
}
/*
diff --git a/sys/alpha/alpha/vm_machdep.c b/sys/alpha/alpha/vm_machdep.c
index 8baea02..3831d67 100644
--- a/sys/alpha/alpha/vm_machdep.c
+++ b/sys/alpha/alpha/vm_machdep.c
@@ -84,6 +84,7 @@
#include <machine/fpu.h>
#include <machine/md_var.h>
#include <machine/prom.h>
+#include <machine/mutex.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -246,8 +247,10 @@ cpu_exit(p)
alpha_fpstate_drop(p);
(void) splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ mtx_exit(&Giant, MTX_DEF);
cnt.v_swtch++;
- cpu_switch(p);
+ cpu_switch();
panic("cpu_exit");
}
@@ -358,7 +361,7 @@ vunmapbuf(bp)
}
/*
- * Force reset the processor by invalidating the entire address space!
+ * Reset back to firmware.
*/
void
cpu_reset()
@@ -416,7 +419,7 @@ vm_page_zero_idle()
return(0);
#ifdef SMP
- if (try_mplock()) {
+ if (KLOCK_ENTER(M_TRY)) {
#endif
s = splvm();
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
@@ -447,7 +450,7 @@ vm_page_zero_idle()
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
#ifdef SMP
- rel_mplock();
+ KLOCK_EXIT;
#endif
return (1);
#ifdef SMP
diff --git a/sys/alpha/include/asm.h b/sys/alpha/include/asm.h
index b185295..d46eb97 100644
--- a/sys/alpha/include/asm.h
+++ b/sys/alpha/include/asm.h
@@ -90,6 +90,11 @@
#define sp $30 /* (S) stack pointer */
#define zero $31 /* wired zero */
+/* In the kernel, we use t7 to point at the per-cpu globals. */
+#ifdef _KERNEL
+#define globalp $8
+#endif
+
/* Floating point registers (XXXX VERIFY THIS) */
#define fv0 $f0 /* (T) return value (real) */
#define fv1 $f1 /* (T) return value (imaginary)*/
@@ -266,7 +271,6 @@ _name_:; \
.loc 1 __LINE__; \
bsr ra,exception_save_regs /* jmp/CALL trashes pv/t12 */
-
/*
* LEAF
* Declare a global leaf function.
diff --git a/sys/alpha/include/cpu.h b/sys/alpha/include/cpu.h
index c9d783b..99eb79e 100644
--- a/sys/alpha/include/cpu.h
+++ b/sys/alpha/include/cpu.h
@@ -65,7 +65,7 @@ struct clockframe {
#define CLKF_BASEPRI(framep) \
(((framep)->cf_tf.tf_regs[FRAME_PS] & ALPHA_PSL_IPL_MASK) == 0)
#define CLKF_PC(framep) ((framep)->cf_tf.tf_regs[FRAME_PC])
-#define CLKF_INTR(framep) (intr_nesting_level >= 2)
+#define CLKF_INTR(framep) (PCPU_GET(intr_nesting_level) >= 2)
/*
* Preempt the current process if in interrupt from user mode,
@@ -89,9 +89,10 @@ struct clockframe {
*/
#define signotify(p) aston()
-#define aston() (astpending = 1)
+#define aston() PCPU_SET(astpending, 1)
#ifdef _KERNEL
+extern u_int astpending;
extern u_int32_t intr_nesting_level; /* bookeeping only; counts sw intrs */
extern u_int32_t want_resched; /* resched() was called */
#endif
@@ -132,7 +133,6 @@ struct reg;
struct rpb;
struct trapframe;
-extern struct proc *fpcurproc;
extern struct rpb *hwrpb;
extern volatile int mc_expected, mc_received;
diff --git a/sys/alpha/include/cpufunc.h b/sys/alpha/include/cpufunc.h
index e7d37f0..cabfe0f 100644
--- a/sys/alpha/include/cpufunc.h
+++ b/sys/alpha/include/cpufunc.h
@@ -33,6 +33,7 @@
#include <sys/types.h>
#include <machine/chipset.h>
+#include <machine/alpha_cpu.h>
#ifdef __GNUC__
@@ -44,6 +45,33 @@ breakpoint(void)
#endif
+/*
+ * Bogus interrupt manipulation
+ */
+static __inline void
+disable_intr(void)
+{
+ alpha_pal_swpipl(ALPHA_PSL_IPL_HIGH);
+}
+
+static __inline void
+enable_intr(void)
+{
+ alpha_pal_swpipl(ALPHA_PSL_IPL_0);
+}
+
+static __inline u_int
+save_intr(void)
+{
+ return alpha_pal_rdps() & ALPHA_PSL_IPL_MASK;
+}
+
+static __inline void
+restore_intr(u_int ipl)
+{
+ alpha_pal_swpipl(ipl);
+}
+
#endif /* _KERNEL */
#endif /* !_MACHINE_CPUFUNC_H_ */
diff --git a/sys/alpha/include/globaldata.h b/sys/alpha/include/globaldata.h
new file mode 100644
index 0000000..b246bb1
--- /dev/null
+++ b/sys/alpha/include/globaldata.h
@@ -0,0 +1,79 @@
+/*-
+ * Copyright (c) 1999 Luoqi Chen <luoqi@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+
+/*
+ * This structure maps out the global data that needs to be kept on a
+ * per-cpu basis. genassym uses this to generate offsets for the assembler
+ * code, which also provides external symbols so that C can get at them as
+ * though they were really globals. This structure is pointed to by
+ * the per-cpu system value (see alpha_pal_rdval() and alpha_pal_wrval()).
+ * Inside the kernel, the globally reserved register t7 is used to
+ * point at the globaldata structure.
+ */
+struct globaldata {
+ struct alpha_pcb gd_idlepcb; /* pcb for idling */
+ struct proc *gd_curproc; /* current process */
+ struct proc *gd_idleproc; /* idle process */
+ struct proc *gd_fpcurproc; /* fp state owner */
+ struct pcb *gd_curpcb; /* current pcb */
+ struct timeval gd_switchtime;
+ int gd_switchticks;
+ u_int gd_cpuno; /* this cpu number */
+ u_int gd_other_cpus; /* all other cpus */
+ int gd_inside_intr;
+ u_int64_t gd_idlepcbphys; /* pa of gd_idlepcb */
+ u_int64_t gd_pending_ipis; /* pending IPI events */
+ u_int32_t gd_next_asn; /* next ASN to allocate */
+ u_int32_t gd_current_asngen; /* ASN rollover check */
+ u_int32_t gd_intr_nesting_level; /* interrupt recursion */
+
+ u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+#ifdef KTR_PERCPU
+ volatile int gd_ktr_idx; /* Index into trace table */
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[0];
+#endif
+};
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
+void globaldata_init(struct globaldata *pcpu, int cpuno, size_t sz);
+struct globaldata *globaldata_find(int cpuno);
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_GLOBALDATA_H_ */
diff --git a/sys/alpha/include/globals.h b/sys/alpha/include/globals.h
new file mode 100644
index 0000000..303efdf
--- /dev/null
+++ b/sys/alpha/include/globals.h
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 1999 Luoqi Chen <luoqi@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_GLOBALS_H_
+#define _MACHINE_GLOBALS_H_
+
+#ifdef _KERNEL
+
+register struct globaldata *globalp __asm__("$8");
+
+#if 1
+#define GLOBALP globalp
+#else
+#define GLOBALP ((struct globaldata *) alpha_pal_rdval())
+#endif
+
+#define PCPU_GET(name) (GLOBALP->gd_##name)
+#define PCPU_SET(name,value) (GLOBALP->gd_##name = (value))
+
+/*
+ * The following set of macros works for UP kernel as well, but for maximum
+ * performance we allow the global variables to be accessed directly. On the
+ * other hand, kernel modules should always use these macros to maintain
+ * portability between UP and SMP kernels.
+ */
+#define CURPROC PCPU_GET(curproc)
+#define curproc PCPU_GET(curproc)
+#define idleproc PCPU_GET(idleproc)
+#define curpcb PCPU_GET(curpcb)
+#define fpcurproc PCPU_GET(fpcurproc)
+#define switchtime PCPU_GET(switchtime)
+#define switchticks PCPU_GET(switchticks)
+#define cpuid PCPU_GET(cpuno)
+#define prevproc PCPU_GET(curproc) /* XXX - until ithreads */
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_GLOBALS_H_ */
diff --git a/sys/alpha/include/ipl.h b/sys/alpha/include/ipl.h
index ea93fbb..2e9b3cc 100644
--- a/sys/alpha/include/ipl.h
+++ b/sys/alpha/include/ipl.h
@@ -127,4 +127,19 @@ extern void schedsoftclock(void);
extern unsigned cpl; /* current priority level mask */
#endif
+/*
+ * Interprocessor interrupts for SMP.
+ */
+#define IPI_INVLTLB 0x0001
+#define IPI_RENDEZVOUS 0x0002
+#define IPI_AST 0x0004
+#define IPI_CHECKSTATE 0x0008
+#define IPI_STOP 0x0010
+
+void smp_ipi_selected(u_int32_t cpus, u_int64_t ipi);
+void smp_ipi_all(u_int64_t ipi);
+void smp_ipi_all_but_self(u_int64_t ipi);
+void smp_ipi_self(u_int64_t ipi);
+void smp_handle_ipi(struct trapframe *frame);
+
#endif /* !_MACHINE_MD_VAR_H_ */
diff --git a/sys/alpha/include/lock.h b/sys/alpha/include/lock.h
index c2ae0fa..1066d46 100644
--- a/sys/alpha/include/lock.h
+++ b/sys/alpha/include/lock.h
@@ -35,10 +35,40 @@
* It is an error to hold one of these locks while a process is sleeping.
*/
struct simplelock {
- volatile int lock_data;
+ volatile u_int lock_data;
};
+/* functions in mp_machdep.c */
+void s_lock_init __P((struct simplelock *));
+void s_lock __P((struct simplelock *));
+int s_lock_try __P((struct simplelock *));
+void ss_lock __P((struct simplelock *));
+void ss_unlock __P((struct simplelock *));
+void s_lock_np __P((struct simplelock *));
+void s_unlock_np __P((struct simplelock *));
+
+/* inline simplelock functions */
+static __inline void
+s_unlock(struct simplelock *lkp)
+{
+ alpha_mb();
+ lkp->lock_data = 0;
+}
+
+#if !defined(SIMPLELOCK_DEBUG) && NCPUS > 1
+/*
+ * This set of defines turns on the real functions in i386/isa/apic_ipl.s.
+ */
+#define simple_lock_init(alp) s_lock_init(alp)
+#define simple_lock(alp) s_lock(alp)
+#define simple_lock_try(alp) s_lock_try(alp)
+#define simple_unlock(alp) s_unlock(alp)
+
+#endif /* !SIMPLELOCK_DEBUG && NCPUS > 1 */
+
#define COM_LOCK()
#define COM_UNLOCK()
+#define COM_DISABLE_INTR() COM_LOCK()
+#define COM_ENABLE_INTR() COM_UNLOCK()
#endif /* !_MACHINE_LOCK_H_ */
diff --git a/sys/alpha/include/mutex.h b/sys/alpha/include/mutex.h
new file mode 100644
index 0000000..ac13b8c
--- /dev/null
+++ b/sys/alpha/include/mutex.h
@@ -0,0 +1,563 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $
+ * $FreeBSD$
+ */
+
+
+#ifndef _MACHINE_MUTEX_H_
+#define _MACHINE_MUTEX_H_
+
+#ifndef LOCORE
+
+#include <sys/queue.h>
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine/globaldata.h>
+
+/*
+ * Mutex flags
+ *
+ * Types
+ */
+#define MTX_DEF 0x1 /* Default (spin/sleep) */
+#define MTX_SPIN 0x2 /* Spin only lock */
+
+/* Options */
+#define MTX_RLIKELY 0x4 /* (opt) Recursion likely */
+#define MTX_NORECURSE 0x8 /* No recursion possible */
+#define MTX_NOSPIN 0x10 /* Don't spin before sleeping */
+#define MTX_NOSWITCH 0x20 /* Do not switch on release */
+#define MTX_FIRST 0x40 /* First spin lock holder */
+#define MTX_TOPHALF 0x80 /* Interrupts not disabled on spin */
+
+/* options that should be passed on to mtx_enter_hard, mtx_exit_hard */
+#define MTX_HARDOPTS (MTX_DEF | MTX_SPIN | MTX_FIRST | MTX_TOPHALF | MTX_NOSWITCH)
+
+/* Flags/value used in mtx_lock */
+#define MTX_RECURSE 0x01 /* (non-spin) lock held recursively */
+#define MTX_CONTESTED 0x02 /* (non-spin) lock contested */
+#define MTX_FLAGMASK ~(MTX_RECURSE | MTX_CONTESTED)
+#define MTX_UNOWNED 0x8 /* Cookie for free mutex */
+
+struct proc; /* XXX */
+
+/*
+ * Sleep/spin mutex
+ */
+struct mtx {
+ volatile u_int64_t mtx_lock; /* lock owner/gate/flags */
+ volatile u_int32_t mtx_recurse; /* number of recursive holds */
+ u_int32_t mtx_saveipl; /* saved ipl (for spin locks) */
+ char *mtx_description;
+ TAILQ_HEAD(, proc) mtx_blocked;
+ LIST_ENTRY(mtx) mtx_contested;
+ struct mtx *mtx_next; /* all locks in system */
+ struct mtx *mtx_prev;
+#ifdef SMP_DEBUG
+ /* If you add anything here, adjust the mtxf_t definition below */
+ struct witness *mtx_witness;
+ LIST_ENTRY(mtx) mtx_held;
+ char *mtx_file;
+ int mtx_line;
+#endif /* SMP_DEBUG */
+};
+
+typedef struct mtx mtx_t;
+
+/*
+ * Filler for structs which need to remain the same size
+ * whether or not SMP_DEBUG is turned on.
+ */
+typedef struct mtxf {
+#ifdef SMP_DEBUG
+ char mtxf_data[0];
+#else
+ char mtxf_data[4*sizeof(void *) + sizeof(int)];
+#endif
+} mtxf_t;
+
+#define mp_fixme(string)
+
+#ifdef _KERNEL
+/* Misc */
+#define CURTHD ((u_int64_t)CURPROC) /* Current thread ID */
+
+/* Prototypes */
+void mtx_init(mtx_t *m, char *description, int flag);
+void mtx_enter_hard(mtx_t *, int type, int ipl);
+void mtx_exit_hard(mtx_t *, int type);
+void mtx_destroy(mtx_t *m);
+
+/* Global locks */
+extern mtx_t sched_lock;
+extern mtx_t Giant;
+
+/*
+ * Used to replace return with an exit Giant and return.
+ */
+
+#define EGAR(a) \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return (a); \
+} while (0)
+
+#define VEGAR \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return; \
+} while (0)
+
+#define DROP_GIANT() \
+do { \
+ int _giantcnt; \
+ WITNESS_SAVE_DECL(Giant); \
+ \
+ WITNESS_SAVE(&Giant, Giant); \
+ for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \
+ mtx_exit(&Giant, MTX_DEF)
+
+#define PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant); \
+} while (0)
+
+#define PARTIAL_PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant)
+
+
+/*
+ * Debugging
+ */
+#ifndef SMP_DEBUG
+#define mtx_assert(m, what)
+#else /* SMP_DEBUG */
+
+#define MA_OWNED 1
+#define MA_NOTOWNED 2
+#define mtx_assert(m, what) { \
+ switch ((what)) { \
+ case MA_OWNED: \
+ ASS(mtx_owned((m))); \
+ break; \
+ case MA_NOTOWNED: \
+ ASS(!mtx_owned((m))); \
+ break; \
+ default: \
+ panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__); \
+ } \
+}
+
+#ifdef INVARIANTS
+#define ASS(ex) MPASS(ex)
+#define MPASS(ex) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ #ex, __FILE__, __LINE__)
+#define MPASS2(ex, what) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ what, __FILE__, __LINE__)
+
+#ifdef MTX_STRS
+char STR_IEN[] = "fl & 0x200";
+char STR_IDIS[] = "!(fl & 0x200)";
+#else /* MTX_STRS */
+extern char STR_IEN[];
+extern char STR_IDIS[];
+#endif /* MTX_STRS */
+#define ASS_IEN MPASS2((alpha_pal_rdps & ALPHA_PSL_IPL_MASK)
+ == ALPHA_PSL_IPL_HIGH, STR_IEN)
+#define ASS_IDIS MPASS2((alpha_pal_rdps & ALPHA_PSL_IPL_MASK)
+ != ALPHA_PSL_IPL_HIGH, STR_IDIS)
+#endif /* INVARIANTS */
+
+#endif /* SMP_DEBUG */
+
+#if !defined(SMP_DEBUG) || !defined(INVARIANTS)
+#define ASS(ex)
+#define MPASS(ex)
+#define MPASS2(ex, where)
+#define ASS_IEN
+#define ASS_IDIS
+#endif /* !defined(SMP_DEBUG) || !defined(INVARIANTS) */
+
+#ifdef WITNESS
+#ifndef SMP_DEBUG
+#error WITNESS requires SMP_DEBUG
+#endif /* SMP_DEBUG */
+#define WITNESS_ENTER(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_enter((m), (f), __FILE__, __LINE__)
+#define WITNESS_EXIT(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_exit((m), (f), __FILE__, __LINE__)
+
+#define WITNESS_SLEEP(check, m) witness_sleep(check, (m), __FILE__, __LINE__)
+#define WITNESS_SAVE_DECL(n) \
+ char * __CONCAT(n, __wf); \
+ int __CONCAT(n, __wl)
+
+#define WITNESS_SAVE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_save(m, &__CONCAT(n, __wf), &__CONCAT(n, __wl)); \
+} while (0)
+
+#define WITNESS_RESTORE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_restore(m, __CONCAT(n, __wf), __CONCAT(n, __wl)); \
+} while (0)
+
+void witness_init(mtx_t *, int flag);
+void witness_destroy(mtx_t *);
+void witness_enter(mtx_t *, int, char *, int);
+void witness_try_enter(mtx_t *, int, char *, int);
+void witness_exit(mtx_t *, int, char *, int);
+void witness_display(void(*)(const char *fmt, ...));
+void witness_list(struct proc *);
+int witness_sleep(int, mtx_t *, char *, int);
+void witness_save(mtx_t *, char **, int *);
+void witness_restore(mtx_t *, char *, int);
+#else /* WITNESS */
+#define WITNESS_ENTER(m, flag)
+#define WITNESS_EXIT(m, flag)
+#define WITNESS_SLEEP(check, m)
+#define WITNESS_SAVE_DECL(n)
+#define WITNESS_SAVE(m, n)
+#define WITNESS_RESTORE(m, n)
+
+/*
+ * flag++ is slezoid way of shutting up unused parameter warning
+ * in mtx_init()
+ */
+#define witness_init(m, flag) flag++
+#define witness_destroy(m)
+#define witness_enter(m, flag, f, l)
+#define witness_try_enter(m, flag, f, l )
+#define witness_exit(m, flag, f, l)
+#endif /* WITNESS */
+
+/*
+ * Assembly macros (for internal use only)
+ *--------------------------------------------------------------------------
+ */
+
+/*
+ * Get a sleep lock, deal with recursion inline
+ */
+
+#define _V(x) __STRING(x)
+
+#define _getlock_sleep(mp, tid, type) do { \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) { \
+ if (((mp)->mtx_lock & MTX_FLAGMASK) != (tid)) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0); \
+ else { \
+ if (((mp)->mtx_lock & MTX_RECURSE) == 0) \
+ atomic_set_64(&(mp)->mtx_lock, MTX_RECURSE); \
+ (mp)->mtx_recurse++; \
+ } \
+ } else { \
+ alpha_mb(); \
+ } \
+} while (0)
+
+/*
+ * Get a spin lock, handle recusion inline (as the less common case)
+ */
+
+#define _getlock_spin_block(mp, tid, type) do { \
+ u_int _ipl = alpha_pal_rdps() & ALPHA_PSL_IPL_MASK; \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _ipl); \
+ else { \
+ alpha_mb(); \
+ (mp)->mtx_saveipl = _ipl; \
+ } \
+} while (0)
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter
+ * function if we can't get it inline.
+ */
+
+#define _getlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0); \
+ else \
+ alpha_mb(); \
+} while (0)
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is
+ * handled in the hard function.
+ */
+
+#define _exitlock_norecurse(mp, tid, type) do { \
+ alpha_mb(); \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+} while (0)
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+
+#define _exitlock(mp, tid, type) do { \
+ alpha_mb(); \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) {\
+ if (((mp)->mtx_lock & MTX_RECURSE) && \
+ (--(mp)->mtx_recurse == 0)) \
+ atomic_clear_64(&(mp)->mtx_lock, MTX_RECURSE); \
+ else \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+ } \
+} while (0)
+
+/*
+ * Release a spin lock (with possible recursion)
+ */
+
+#define _exitlock_spin(mp) do { \
+ int _ipl = (mp)->mtx_saveipl; \
+ alpha_mb(); \
+ if ((mp)->mtx_recurse == 0 || (--(mp)->mtx_recurse) == 0) \
+ atomic_cmpset_64(&(mp)->mtx_lock, (mp)->mtx_lock, \
+ MTX_UNOWNED); \
+ alpha_pal_swpipl(_ipl); \
+} while (0)
+
+/*
+ * Externally visible mutex functions
+ *------------------------------------------------------------------------
+ */
+
+/*
+ * Return non-zero if a mutex is already owned by the current thread
+ */
+#define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == CURTHD)
+
+/* Common strings */
+#ifdef MTX_STRS
+char STR_mtx_enter_fmt[] = "GOT %s [%p] at %s:%d r=%d";
+char STR_mtx_bad_type[] = "((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0";
+char STR_mtx_exit_fmt[] = "REL %s [%p] at %s:%d r=%d";
+char STR_mtx_owned[] = "mtx_owned(_mpp)";
+char STR_mtx_recurse[] = "_mpp->mtx_recurse == 0";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%p] at %s:%d result=%d";
+#else /* MTX_STRS */
+extern char STR_mtx_enter_fmt[];
+extern char STR_mtx_bad_type[];
+extern char STR_mtx_exit_fmt[];
+extern char STR_mtx_owned[];
+extern char STR_mtx_recurse[];
+extern char STR_mtx_try_enter_fmt[];
+#endif /* MTX_STRS */
+
+/*
+ * Get lock 'm', the macro handles the easy (and most common cases) and
+ * leaves the slow stuff to the mtx_enter_hard() function.
+ *
+ * Note: since type is usually a constant much of this code is optimized out
+ */
+#define mtx_enter(mtxp, type) do { \
+ mtx_t * _mpp = mtxp; \
+ \
+ /* bits only valid on mtx_exit() */ \
+ MPASS2(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0, STR_mtx_bad_type); \
+ \
+ do { \
+ if ((type) & MTX_SPIN) { \
+ /* \
+ * Easy cases of spin locks: \
+ * \
+ * 1) We already own the lock and will simply \
+ * recurse on it (if RLIKELY) \
+ * \
+ * 2) The lock is free, we just get it \
+ */ \
+ if ((type) & MTX_RLIKELY) { \
+ /* \
+ * Check for recursion, if we already \
+ * have this lock we just bump the \
+ * recursion count. \
+ */ \
+ if (_mpp->mtx_lock == CURTHD) { \
+ _mpp->mtx_recurse++; \
+ break; /* Done */ \
+ } \
+ } \
+ \
+ if (((type) & MTX_TOPHALF) == 0) \
+ /* \
+ * If an interrupt thread uses this \
+ * we must block interrupts here. \
+ */ \
+ _getlock_spin_block(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ else \
+ _getlock_norecurse(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ } else { \
+ /* Sleep locks */ \
+ if ((type) & MTX_RLIKELY) \
+ _getlock_sleep(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ else \
+ _getlock_norecurse(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ } \
+ } while (0); \
+ WITNESS_ENTER(_mpp, type); \
+ CTR5(KTR_LOCK, STR_mtx_enter_fmt, \
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, \
+ (_mpp)->mtx_recurse); \
+} while (0)
+
+/*
+ * Attempt to get MTX_DEF lock, return non-zero if lock acquired
+ *
+ * XXX DOES NOT HANDLE RECURSION
+ */
+#ifdef SMP_DEBUG
+#define mtx_try_enter(mtxp, type) ({ \
+ mtx_t *const _mpp = mtxp; \
+ int _rval; \
+ \
+ _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD);\
+ if (_rval && (_mpp)->mtx_witness != NULL) { \
+ ASS((_mpp)->mtx_recurse == 0); \
+ witness_try_enter(_mpp, type, __FILE__, __LINE__); \
+ } \
+ CTR5(KTR_LOCK, STR_mtx_try_enter_fmt, \
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, \
+ _rval); \
+ _rval; \
+})
+
+#else /* SMP_DEBUG */
+
+#define mtx_try_enter(mtxp, type) ({ \
+ mtx_t *const _mpp = mtxp; \
+ int _rval; \
+ \
+ _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD);\
+ CTR5(KTR_LOCK, STR_mtx_try_enter_fmt, \
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, \
+ _rval); \
+ _rval; \
+})
+
+#endif /* SMP_DEBUG */
+
+#if 0
+#define mtx_legal2block() ({ \
+ register int _l2b; \
+ __asm __volatile ( \
+" pushfl;" \
+" popl %%eax;" \
+" andl $0x200, %%eax;" \
+ : "=a" (_l2b) \
+ : \
+ : "cc"); \
+ _l2b; \
+})
+#endif
+
+#define mtx_legal2block() (read_eflags() & 0x200)
+
+/*
+ * Release lock m
+ */
+#define mtx_exit(mtxp, type) do { \
+ mtx_t *const _mpp = mtxp; \
+ \
+ MPASS2(mtx_owned(_mpp), STR_mtx_owned); \
+ WITNESS_EXIT(_mpp, type); \
+ CTR5(KTR_LOCK, STR_mtx_exit_fmt, \
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, \
+ (_mpp)->mtx_recurse); \
+ if ((type) & MTX_SPIN) { \
+ if ((type) & MTX_NORECURSE) { \
+ MPASS2(_mpp->mtx_recurse == 0, STR_mtx_recurse); \
+ atomic_cmpset_64(&_mpp->mtx_lock, _mpp->mtx_lock, \
+ MTX_UNOWNED); \
+ if (((type) & MTX_TOPHALF) == 0) { \
+ splx(_mpp->mtx_saveipl); \
+ } \
+ } else \
+ if ((type) & MTX_TOPHALF) \
+ _exitlock_norecurse(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ else \
+ _exitlock_spin(_mpp); \
+ } else { \
+ /* Handle sleep locks */ \
+ if ((type) & MTX_RLIKELY) \
+ _exitlock(_mpp, CURTHD, (type) & MTX_HARDOPTS); \
+ else \
+ _exitlock_norecurse(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ } \
+} while (0)
+#endif /* _KERNEL */
+
+#else /* !LOCORE */
+
+/*
+ * Simple assembly macros to get and release non-recursive spin locks
+ */
+#define MTX_ENTER(lck) \
+ call_pal PAL_OSF1_rdps; \
+ and v0, ALPHA_PSL_IPL_MASK, v0; \
+1: ldq_l a0, lck+MTX_LOCK; \
+ cmpeq a0, MTX_UNOWNED, a1; \
+ beq a1, 1b; \
+ ldq a0, PC_CURPROC(globalp); \
+ stq_c a0, lck+MTX_LOCK; \
+ beq a0, 1b; \
+ mb; \
+ stl v0, lck+MTX_SAVEIPL; \
+ ldq a0, ALPHA_PSL_IPL_HIGH; \
+ call_pal PSL_OSF1_swpipl
+
+#define MTX_EXIT(lck) \
+ mb; \
+ ldiq a0, MTX_UNOWNED; \
+ stq a0, lck+MTX_LOCK; \
+ ldl a0, lck+MTX_SAVEIPL; \
+ call_pal PAL_OSF1_swpipl
+
+#endif /* !LOCORE */
+
+#endif /* __MACHINE_MUTEX_H */
diff --git a/sys/alpha/include/param.h b/sys/alpha/include/param.h
index 80dce22..742a3f7 100644
--- a/sys/alpha/include/param.h
+++ b/sys/alpha/include/param.h
@@ -70,7 +70,11 @@
#define OBJFORMAT_NAMES "elf"
#define OBJFORMAT_DEFAULT "elf"
+#ifdef SMP
+#define NCPUS 32
+#else
#define NCPUS 1
+#endif
/*
* Round p (pointer or byte index) up to a correctly-aligned value for all
diff --git a/sys/alpha/include/pcb.h b/sys/alpha/include/pcb.h
index 3caa144..3bf2586 100644
--- a/sys/alpha/include/pcb.h
+++ b/sys/alpha/include/pcb.h
@@ -30,7 +30,7 @@
#include <machine/frame.h>
#include <machine/reg.h>
-
+#include <machine/globaldata.h>
#include <machine/alpha_cpu.h>
/*
@@ -53,6 +53,7 @@ struct pcb {
u_int64_t pcb_fp_control; /* IEEE control word [SW] */
unsigned long pcb_onfault; /* for copy faults [SW] */
unsigned long pcb_accessaddr; /* for [fs]uswintr [SW] */
+ u_int32_t pcb_schednest; /* state of sched_lock [SW] */
};
/*
@@ -64,3 +65,9 @@ struct md_coredump {
struct trapframe md_tf;
struct fpreg md_fpstate;
};
+
+#ifdef _KERNEL
+#ifndef curpcb
+extern struct pcb *curpcb; /* our current running pcb */
+#endif
+#endif
diff --git a/sys/alpha/include/pcpu.h b/sys/alpha/include/pcpu.h
new file mode 100644
index 0000000..b246bb1
--- /dev/null
+++ b/sys/alpha/include/pcpu.h
@@ -0,0 +1,79 @@
+/*-
+ * Copyright (c) 1999 Luoqi Chen <luoqi@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+
+/*
+ * This structure maps out the global data that needs to be kept on a
+ * per-cpu basis. genassym uses this to generate offsets for the assembler
+ * code, which also provides external symbols so that C can get at them as
+ * though they were really globals. This structure is pointed to by
+ * the per-cpu system value (see alpha_pal_rdval() and alpha_pal_wrval()).
+ * Inside the kernel, the globally reserved register t7 is used to
+ * point at the globaldata structure.
+ */
+struct globaldata {
+ struct alpha_pcb gd_idlepcb; /* pcb for idling */
+ struct proc *gd_curproc; /* current process */
+ struct proc *gd_idleproc; /* idle process */
+ struct proc *gd_fpcurproc; /* fp state owner */
+ struct pcb *gd_curpcb; /* current pcb */
+ struct timeval gd_switchtime;
+ int gd_switchticks;
+ u_int gd_cpuno; /* this cpu number */
+ u_int gd_other_cpus; /* all other cpus */
+ int gd_inside_intr;
+ u_int64_t gd_idlepcbphys; /* pa of gd_idlepcb */
+ u_int64_t gd_pending_ipis; /* pending IPI events */
+ u_int32_t gd_next_asn; /* next ASN to allocate */
+ u_int32_t gd_current_asngen; /* ASN rollover check */
+ u_int32_t gd_intr_nesting_level; /* interrupt recursion */
+
+ u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+#ifdef KTR_PERCPU
+ volatile int gd_ktr_idx; /* Index into trace table */
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[0];
+#endif
+};
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
+void globaldata_init(struct globaldata *pcpu, int cpuno, size_t sz);
+struct globaldata *globaldata_find(int cpuno);
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_GLOBALDATA_H_ */
diff --git a/sys/alpha/include/pmap.h b/sys/alpha/include/pmap.h
index 134c9a2..de59b66 100644
--- a/sys/alpha/include/pmap.h
+++ b/sys/alpha/include/pmap.h
@@ -174,9 +174,11 @@ struct pmap {
TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */
int pm_count; /* reference count */
int pm_flags; /* pmap flags */
- int pm_active; /* active flag */
- int pm_asn; /* address space number */
- u_int pm_asngen; /* generation number of pm_asn */
+ u_int32_t pm_active; /* active cpus */
+ struct {
+ u_int32_t asn:8; /* address space number */
+ u_int32_t gen:24; /* generation number */
+ } pm_asn[NCPUS];
struct pmap_statistics pm_stats; /* pmap statistics */
struct vm_page *pm_ptphint; /* pmap ptp hint */
};
diff --git a/sys/alpha/include/proc.h b/sys/alpha/include/proc.h
index 502b607..d003816 100644
--- a/sys/alpha/include/proc.h
+++ b/sys/alpha/include/proc.h
@@ -28,6 +28,12 @@
* rights to redistribute these changes.
*/
+#ifndef _MACHINE_PROC_H_
+#define _MACHINE_PROC_H_
+
+#include <machine/globaldata.h>
+#include <machine/globals.h>
+
/*
* Machine-dependent part of the proc struct for the Alpha.
*/
@@ -55,3 +61,5 @@ struct mdproc {
#define MDP_UAC_SIGBUS 0x0040 /* Deliver SIGBUS upon
unaligned access */
#define MDP_UAC_MASK (MDP_UAC_NOPRINT | MDP_UAC_NOFIX | MDP_UAC_SIGBUS)
+
+#endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/alpha/include/rpb.h b/sys/alpha/include/rpb.h
index 1f2f884..0be0775 100644
--- a/sys/alpha/include/rpb.h
+++ b/sys/alpha/include/rpb.h
@@ -219,7 +219,8 @@ struct rpb {
* PCS: Per-CPU information.
*/
struct pcs {
- u_int8_t pcs_hwpcb[128]; /* 0: PAL dependent */
+
+ u_int64_t pcs_hwpcb[16]; /* 0: PAL dependent */
#define PCS_BIP 0x000001 /* boot in progress */
#define PCS_RC 0x000002 /* restart possible */
@@ -238,12 +239,12 @@ struct pcs {
#define PCS_HALT_WARM_BOOT 0x030000
#define PCS_HALT_STAY_HALTED 0x040000
#define PCS_mbz 0xffffffffff000000 /* 24:63 -- must be zero */
- u_int64_t pcs_flags; /* 80: */
+ u_int64_t pcs_flags; /* 128: */
- u_int64_t pcs_pal_memsize; /* 88: PAL memory size */
- u_int64_t pcs_pal_scrsize; /* 90: PAL scratch size */
- vm_offset_t pcs_pal_memaddr; /* 98: PAL memory addr */
- vm_offset_t pcs_pal_scraddr; /* A0: PAL scratch addr */
+ u_int64_t pcs_pal_memsize; /* 136: PAL memory size */
+ u_int64_t pcs_pal_scrsize; /* 144: PAL scratch size */
+ vm_offset_t pcs_pal_memaddr; /* 152: PAL memory addr */
+ vm_offset_t pcs_pal_scraddr; /* 160: PAL scratch addr */
struct {
u_int64_t
minorrev : 8, /* alphabetic char 'a' - 'z' */
@@ -261,14 +262,14 @@ struct pcs {
sbz1 : 8,
compatibility : 16, /* Compatibility revision */
proc_cnt : 16; /* Processor count */
- } pcs_pal_rev; /* A8: */
+ } pcs_pal_rev; /* 168: */
#define pcs_minorrev pcs_pal_rev.minorrev
#define pcs_majorrev pcs_pal_rev.majorrev
#define pcs_pal_type pcs_pal_rev.pal_type
#define pcs_compatibility pcs_pal_rev.compatibility
#define pcs_proc_cnt pcs_pal_rev.proc_cnt
- u_int64_t pcs_proc_type; /* B0: processor type */
+ u_int64_t pcs_proc_type; /* 176: processor type */
#define PCS_PROC_MAJOR 0x00000000ffffffff
#define PCS_PROC_MAJORSHIFT 0
@@ -288,23 +289,23 @@ struct pcs {
/* Minor number interpretation is processor specific. See cpu.c. */
- u_int64_t pcs_proc_var; /* B8: processor variation. */
+ u_int64_t pcs_proc_var; /* 184: processor variation. */
#define PCS_VAR_VAXFP 0x0000000000000001 /* VAX FP support */
#define PCS_VAR_IEEEFP 0x0000000000000002 /* IEEE FP support */
#define PCS_VAR_PE 0x0000000000000004 /* Primary Eligible */
#define PCS_VAR_RESERVED 0xfffffffffffffff8 /* Reserved */
- char pcs_proc_revision[8]; /* C0: only first 4 valid */
- char pcs_proc_sn[16]; /* C8: only first 10 valid */
- vm_offset_t pcs_machcheck; /* D8: mach chk phys addr. */
- u_int64_t pcs_machcheck_len; /* E0: length in bytes */
- vm_offset_t pcs_halt_pcbb; /* E8: phys addr of halt PCB */
- vm_offset_t pcs_halt_pc; /* F0: halt PC */
- u_int64_t pcs_halt_ps; /* F8: halt PS */
- u_int64_t pcs_halt_r25; /* 100: halt argument list */
- u_int64_t pcs_halt_r26; /* 108: halt return addr list */
- u_int64_t pcs_halt_r27; /* 110: halt procedure value */
+ char pcs_proc_revision[8]; /* 192: only first 4 valid */
+ char pcs_proc_sn[16]; /* 200: only first 10 valid */
+ vm_offset_t pcs_machcheck; /* 216: mach chk phys addr. */
+ u_int64_t pcs_machcheck_len; /* 224: length in bytes */
+ vm_offset_t pcs_halt_pcbb; /* 232: pa of halt PCB */
+ vm_offset_t pcs_halt_pc; /* 240: halt PC */
+ u_int64_t pcs_halt_ps; /* 248: halt PS */
+ u_int64_t pcs_halt_r25; /* 256: halt argument list */
+ u_int64_t pcs_halt_r26; /* 264: halt ra list */
+ u_int64_t pcs_halt_r27; /* 272: halt procedure value */
#define PCS_HALT_RESERVED 0
#define PCS_HALT_POWERUP 1
@@ -315,17 +316,22 @@ struct pcs {
#define PCS_HALT_DOUBLE_ERROR_ABORT 6
#define PCS_HALT_SCBB 7
#define PCS_HALT_PTBR 8 /* 9-FF: reserved */
- u_int64_t pcs_halt_reason; /* 118: */
+ u_int64_t pcs_halt_reason; /* 280: */
- u_int64_t pcs_reserved_soft; /* 120: preserved software */
- u_int64_t pcs_buffer[21]; /* 128: console buffers */
+ u_int64_t pcs_reserved_soft; /* 288: preserved software */
+ struct {
+ u_int32_t rxlen;
+ u_int32_t txlen;
+ char rxbuf[80];
+ char txbuf[80];
+ } pcs_buffer; /* 296: console buffers */
#define PALvar_reserved 0
#define PALvar_OpenVMS 1
#define PALvar_OSF1 2
- u_int64_t pcs_palrevisions[16]; /* 1D0: PALcode revisions */
+ u_int64_t pcs_palrevisions[16]; /* 464: PALcode revisions */
- u_int64_t pcs_reserved_arch[6]; /* 250: reserved arch */
+ u_int64_t pcs_reserved_arch[6]; /* 592: reserved arch */
};
/*
diff --git a/sys/alpha/include/smp.h b/sys/alpha/include/smp.h
index 48d6737..00aec6a 100644
--- a/sys/alpha/include/smp.h
+++ b/sys/alpha/include/smp.h
@@ -1,10 +1,57 @@
/*
+ * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ *
* $FreeBSD$
+ *
*/
+
#ifndef _MACHINE_SMP_H_
#define _MACHINE_SMP_H_
-#define get_mplock() { }
-#define rel_mplock() { }
+#ifdef _KERNEL
+
+#include <machine/mutex.h>
+#include <machine/ipl.h>
+#include <sys/ktr.h>
+
+#ifndef LOCORE
+
+#define BETTER_CLOCK /* unconditional on alpha */
+
+/* global data in mp_machdep.c */
+extern volatile u_int checkstate_probed_cpus;
+extern volatile u_int checkstate_need_ast;
+extern volatile u_int resched_cpus;
+extern void (*cpustop_restartfunc) __P((void));
+
+extern int smp_active;
+extern int mp_ncpus;
+extern u_int all_cpus;
+extern u_int started_cpus;
+extern u_int stopped_cpus;
+
+/* functions in mp_machdep.c */
+void mp_start(void);
+void mp_announce(void);
+void smp_invltlb(void);
+void forward_statclock(int pscnt);
+void forward_hardclock(int pscnt);
+void forward_signal(struct proc *);
+void forward_roundrobin(void);
+int stop_cpus(u_int);
+int restart_cpus(u_int);
+void smp_rendezvous_action(void);
+void smp_rendezvous(void (*)(void *),
+ void (*)(void *),
+ void (*)(void *),
+ void *arg);
+void smp_init_secondary(void);
-#endif
+#endif /* !LOCORE */
+#endif /* _KERNEL */
+#endif /* _MACHINE_SMP_H_ */
diff --git a/sys/amd64/amd64/amd64-gdbstub.c b/sys/amd64/amd64/amd64-gdbstub.c
index 986b8d4..b442a37 100644
--- a/sys/amd64/amd64/amd64-gdbstub.c
+++ b/sys/amd64/amd64/amd64-gdbstub.c
@@ -188,7 +188,8 @@ getpacket (char *buffer)
unsigned char ch;
int s;
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/* wait around for the start character, ignore all other characters */
@@ -239,7 +240,7 @@ getpacket (char *buffer)
}
}
while (checksum != xmitcsum);
- splx (s);
+ write_eflags(s);
}
/* send the packet in buffer. */
@@ -253,7 +254,8 @@ putpacket (char *buffer)
int s;
/* $<packet info>#<checksum>. */
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/*
@@ -285,7 +287,7 @@ putpacket (char *buffer)
putDebugChar (hexchars[checksum & 0xf]);
}
while ((getDebugChar () & 0x7f) != '+');
- splx (s);
+ write_eflags(s);
}
static char remcomInBuffer[BUFMAX];
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 2a7559d..54bf003 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -17,7 +17,7 @@
/*
- * Macros for interrupt interrupt entry, call to handler, and exit.
+ * Macros for interrupt entry, call to handler, and exit.
*/
#define FAST_INTR(irq_num, vec_name) \
@@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \
/*
- * Test to see if the source is currntly masked, clear if so.
+ * Test to see if the source is currently masked, clear if so.
*/
#define UNMASK_IRQ(irq_num) \
IMASK_LOCK ; /* into critical reg */ \
@@ -200,7 +200,16 @@ log_intr_event:
#else
#define APIC_ITRACE(name, irq_num, id)
#endif
-
+
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \
maybe_extra_ipending ; \
; \
APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
; \
MASK_LEVEL_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- MP_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f ; /* no */ \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
-; \
incb _intr_nesting_level ; \
; \
/* entry point used by doreti_unpend for HWIs. */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- movl _cpl, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cpl ; \
- lock ; \
- andl $~IRQ_BIT(irq_num), _ipending ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
+ pushl $irq_num; /* pass the IRQ */ \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
MEXITCOUNT ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; /* XXX: iactive bit might be 0 now */ \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl, leave iactive set */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- MP_RELLOCK ; \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- POP_FRAME ; /* and return */ \
- iret
+ jmp doreti_next
/*
* Handle "spurious INTerrupts".
@@ -434,20 +380,10 @@ _Xcpuast:
FAKE_MCOUNT(13*4(%esp))
- /*
- * Giant locks do not come cheap.
- * A lot of cycles are going to be wasted here.
- */
- call _get_mplock
-
- movl _cpl, %eax
- pushl %eax
orl $AST_PENDING, _astpending /* XXX */
incb _intr_nesting_level
sti
- pushl $0
-
movl _cpuid, %eax
lock
btrl %eax, _checkstate_pending_ast
@@ -461,7 +397,7 @@ _Xcpuast:
lock
incl CNAME(cpuast_cnt)
MEXITCOUNT
- jmp _doreti
+ jmp doreti_next
1:
/* We are already in the process of delivering an ast for this CPU */
POP_FRAME
@@ -487,40 +423,24 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- MP_TRYLOCK
- testl %eax,%eax /* Did we get the lock ? */
- jz 1f /* No */
-
lock
incl CNAME(forward_irq_hitcnt)
cmpb $4, _intr_nesting_level
- jae 2f
+ jae 1f
- movl _cpl, %eax
- pushl %eax
incb _intr_nesting_level
sti
- pushl $0
-
MEXITCOUNT
- jmp _doreti /* Handle forwarded interrupt */
+ jmp doreti_next /* Handle forwarded interrupt */
1:
lock
- incl CNAME(forward_irq_misscnt)
- call forward_irq /* Oops, we've lost the isr lock */
- MEXITCOUNT
- POP_FRAME
- iret
-2:
- lock
incl CNAME(forward_irq_toodeepcnt)
-3:
- MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
+#if 0
/*
*
*/
@@ -532,9 +452,11 @@ forward_irq:
cmpl $0, CNAME(forward_irq_enabled)
jz 4f
+/* XXX - this is broken now, because mp_lock doesn't exist
movl _mp_lock,%eax
cmpl $FREE_LOCK,%eax
jne 1f
+ */
movl $0, %eax /* Pick CPU #0 if noone has lock */
1:
shrl $24,%eax
@@ -559,6 +481,7 @@ forward_irq:
jnz 3b
4:
ret
+#endif
/*
* Executed by a CPU when it receives an Xcpustop IPI from another CPU,
@@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr)
FAST_INTR(22,fastintr22)
FAST_INTR(23,fastintr23)
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, CLKINTR_PENDING)
INTR(1,intr1,)
INTR(2,intr2,)
@@ -728,15 +652,11 @@ _ihandlers:
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
+#if 0
/* active flag for lazy masking */
iactive:
.long 0
+#endif
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
diff --git a/sys/amd64/amd64/autoconf.c b/sys/amd64/amd64/autoconf.c
index b209065..4edda4b 100644
--- a/sys/amd64/amd64/autoconf.c
+++ b/sys/amd64/amd64/autoconf.c
@@ -163,14 +163,6 @@ configure(dummy)
* XXX this is slightly misplaced.
*/
spl0();
-
- /*
- * Allow lowering of the ipl to the lowest kernel level if we
- * panic (or call tsleep() before clearing `cold'). No level is
- * completely safe (since a panic may occur in a critical region
- * at splhigh()), but we want at least bio interrupts to work.
- */
- safepri = cpl;
}
static void
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index c895fef..db56a1b 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -73,189 +73,6 @@ _tlb_flush_count: .long 0
.text
-/*
- * When no processes are on the runq, cpu_switch() branches to _idle
- * to wait for something to come ready.
- */
- ALIGN_TEXT
- .type _idle,@function
-_idle:
- xorl %ebp,%ebp
- movl %ebp,_switchtime
-
-#ifdef SMP
-
- /* when called, we have the mplock, intr disabled */
- /* use our idleproc's "context" */
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
- /* Keep space for nonexisting return addr, or profiling bombs */
- movl $gd_idlestack_top-4, %ecx
- addl %fs:0, %ecx
- movl %ecx, %esp
-
- /* update common_tss.tss_esp0 pointer */
- movl %ecx, _common_tss + TSS_ESP0
-
- movl _cpuid, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $gd_common_tssd, %edi
- addl %fs:0, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- *
- * NOTE: spl*() may only be called while we hold the MP lock (which
- * we do).
- */
- call _spl0
-
- cli
-
- /*
- * _REALLY_ free the lock, no matter how deep the prior nesting.
- * We will recover the nesting on the way out when we have a new
- * proc to load.
- *
- * XXX: we had damn well better be sure we had it before doing this!
- */
- movl $FREE_LOCK, %eax
- movl %eax, _mp_lock
-
- /* do NOT have lock, intrs disabled */
- .globl idle_loop
-idle_loop:
-
- cmpl $0,_smp_active
- jne 1f
- cmpl $0,_cpuid
- je 1f
- jmp 2f
-
-1:
- call _procrunnable
- testl %eax,%eax
- jnz 3f
-
- /*
- * Handle page-zeroing in the idle loop. Called with interrupts
- * disabled and the MP lock released. Inside vm_page_zero_idle
- * we enable interrupts and grab the mplock as required.
- */
- cmpl $0,_do_page_zero_idle
- je 2f
-
- call _vm_page_zero_idle /* internal locking */
- testl %eax, %eax
- jnz idle_loop
-2:
-
- /* enable intrs for a halt */
- movl $0, lapic_tpr /* 1st candidate for an INT */
- call *_hlt_vector /* wait for interrupt */
- cli
- jmp idle_loop
-
- /*
- * Note that interrupts must be enabled while obtaining the MP lock
- * in order to be able to take IPI's while blocked.
- */
-3:
- movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
- sti
- call _get_mplock
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _rel_mplock
- jmp idle_loop
-
-#else /* !SMP */
-
- movl $HIDENAME(tmpstk),%esp
-#if defined(OVERLY_CONSERVATIVE_PTD_MGMT)
-#if defined(SWTCH_OPTIM_STATS)
- incl _swtch_optim_stats
-#endif
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
-#endif
-
- /* update common_tss.tss_esp0 pointer */
- movl %esp, _common_tss + TSS_ESP0
-
- movl $0, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $_common_tssd, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- */
- call _spl0
-
- ALIGN_TEXT
-idle_loop:
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _vm_page_zero_idle
- testl %eax, %eax
- jnz idle_loop
- call *_hlt_vector /* wait for interrupt */
- jmp idle_loop
-
-#endif /* SMP */
-
-CROSSJUMPTARGET(_idle)
-
ENTRY(default_halt)
sti
#ifndef SMP
@@ -264,16 +81,23 @@ ENTRY(default_halt)
ret
/*
+ * cpu_throw()
+ */
+ENTRY(cpu_throw)
+ jmp sw1
+
+/*
* cpu_switch()
*/
ENTRY(cpu_switch)
/* switch to new process. first, save context as needed */
movl _curproc,%ecx
+ movl %ecx,_prevproc
/* if no process to save, don't bother */
testl %ecx,%ecx
- je sw1
+ jz sw1
#ifdef SMP
movb P_ONCPU(%ecx), %al /* save "last" cpu */
@@ -299,7 +123,7 @@ ENTRY(cpu_switch)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
- /* test if debug regisers should be saved */
+ /* test if debug registers should be saved */
movb PCB_FLAGS(%edx),%al
andb $PCB_DBREGS,%al
jz 1f /* no, skip over */
@@ -319,15 +143,12 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
+ /* save sched_lock recursion count */
+ movl _sched_lock+MTX_RECURSE,%eax
+ movl %eax,PCB_SCHEDNEST(%edx)
+
#ifdef SMP
- movl _mp_lock, %eax
/* XXX FIXME: we should be saving the local APIC TPR */
-#ifdef DIAGNOSTIC
- cmpl $FREE_LOCK, %eax /* is it free? */
- je badsw4 /* yes, bad medicine! */
-#endif /* DIAGNOSTIC */
- andl $COUNT_FIELD, %eax /* clear CPU portion */
- movl %eax, PCB_MPNEST(%edx) /* store it */
#endif /* SMP */
#if NNPX > 0
@@ -341,25 +162,33 @@ ENTRY(cpu_switch)
1:
#endif /* NNPX > 0 */
- movl $0,_curproc /* out of process */
-
- /* save is done, now choose a new process or idle */
+ /* save is done, now choose a new process */
sw1:
- cli
#ifdef SMP
/* Stop scheduling if smp_active goes zero and we are not BSP */
cmpl $0,_smp_active
jne 1f
cmpl $0,_cpuid
- CROSSJUMP(je, _idle, jne) /* wind down */
+ je 1f
+
+ movl _idleproc, %eax
+ jmp sw1b
1:
#endif
+ /*
+ * Choose a new process to schedule. chooseproc() returns idleproc
+ * if it cannot find another process to run.
+ */
sw1a:
call _chooseproc /* trash ecx, edx, ret eax*/
- testl %eax,%eax
- CROSSJUMP(je, _idle, jne) /* if no proc, idle */
+
+#ifdef DIAGNOSTIC
+ testl %eax,%eax /* no process? */
+ jz badsw3 /* no, panic */
+#endif
+sw1b:
movl %eax,%ecx
xorl %eax,%eax
@@ -456,9 +285,6 @@ sw1a:
movl %ecx, _curproc /* into next process */
#ifdef SMP
- movl _cpu_lockid, %eax
- orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
- movl %eax, _mp_lock /* load the mp_lock */
/* XXX FIXME: we should be restoring the local APIC TPR */
#endif /* SMP */
@@ -500,7 +326,22 @@ cpu_switch_load_gs:
movl %eax,%dr7
1:
- sti
+ /*
+ * restore sched_lock recursion count and transfer ownership to
+ * new process
+ */
+ movl PCB_SCHEDNEST(%edx),%eax
+ movl %eax,_sched_lock+MTX_RECURSE
+
+ movl _curproc,%eax
+ movl %eax,_sched_lock+MTX_LOCK
+
+#ifdef DIAGNOSTIC
+ pushfl
+ popl %ecx
+ testl $0x200, %ecx /* interrupts enabled? */
+ jnz badsw6 /* that way madness lies */
+#endif
ret
CROSSJUMPTARGET(sw1a)
@@ -517,15 +358,27 @@ badsw2:
call _panic
sw0_2: .asciz "cpu_switch: not SRUN"
+
+badsw3:
+ pushl $sw0_3
+ call _panic
+
+sw0_3: .asciz "cpu_switch: chooseproc returned NULL"
+
#endif
-#if defined(SMP) && defined(DIAGNOSTIC)
-badsw4:
- pushl $sw0_4
+#ifdef DIAGNOSTIC
+badsw5:
+ pushl $sw0_5
+ call _panic
+
+sw0_5: .asciz "cpu_switch: interrupts enabled (again)"
+badsw6:
+ pushl $sw0_6
call _panic
-sw0_4: .asciz "cpu_switch: do not have lock"
-#endif /* SMP && DIAGNOSTIC */
+sw0_6: .asciz "cpu_switch: interrupts enabled"
+#endif
/*
* savectx(pcb)
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index acb8b40..9e77114 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -38,6 +38,7 @@
#include <machine/asmacros.h>
#include <machine/ipl.h>
#include <machine/lock.h>
+#include <machine/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
@@ -175,20 +176,12 @@ IDTVEC(fpu)
mov %ax,%fs
FAKE_MCOUNT(13*4(%esp))
-#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%eax
- pushl %eax /* save original cpl */
- pushl $0 /* dummy unit to finish intr frame */
-#else /* SMP */
- movl _cpl,%eax
- pushl %eax
pushl $0 /* dummy unit to finish intr frame */
- incl _cnt+V_TRAP
-#endif /* SMP */
+ call __mtx_enter_giant_def
call _npx_intr
+ call __mtx_exit_giant_def
incb _intr_nesting_level
MEXITCOUNT
@@ -205,9 +198,6 @@ IDTVEC(align)
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
- *
- * Note that all calls to MP_LOCK must occur with interrupts enabled
- * in order to be able to take IPI's while waiting for the lock.
*/
SUPERALIGN_TEXT
@@ -227,16 +217,12 @@ alltraps_with_regs_pushed:
FAKE_MCOUNT(13*4(%esp))
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
- MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%ebx /* keep orig. cpl here during trap() */
call _trap
/*
* Return via _doreti to handle ASTs. Have to change trap frame
* to interrupt frame.
*/
- pushl %ebx /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
incb _intr_nesting_level
MEXITCOUNT
@@ -274,16 +260,11 @@ IDTVEC(syscall)
movl %eax,TF_EFLAGS(%esp)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
@@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall)
mov %ax,%fs
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
ENTRY(fork_trampoline)
+ MTX_EXIT(_sched_lock, %ecx)
+ sti
call _spl0
#ifdef SMP
@@ -355,7 +333,6 @@ ENTRY(fork_trampoline)
/*
* Return via _doreti to handle ASTs.
*/
- pushl $0 /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
movb $1,_intr_nesting_level
MEXITCOUNT
diff --git a/sys/amd64/amd64/exception.s b/sys/amd64/amd64/exception.s
index acb8b40..9e77114 100644
--- a/sys/amd64/amd64/exception.s
+++ b/sys/amd64/amd64/exception.s
@@ -38,6 +38,7 @@
#include <machine/asmacros.h>
#include <machine/ipl.h>
#include <machine/lock.h>
+#include <machine/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
@@ -175,20 +176,12 @@ IDTVEC(fpu)
mov %ax,%fs
FAKE_MCOUNT(13*4(%esp))
-#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%eax
- pushl %eax /* save original cpl */
- pushl $0 /* dummy unit to finish intr frame */
-#else /* SMP */
- movl _cpl,%eax
- pushl %eax
pushl $0 /* dummy unit to finish intr frame */
- incl _cnt+V_TRAP
-#endif /* SMP */
+ call __mtx_enter_giant_def
call _npx_intr
+ call __mtx_exit_giant_def
incb _intr_nesting_level
MEXITCOUNT
@@ -205,9 +198,6 @@ IDTVEC(align)
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
- *
- * Note that all calls to MP_LOCK must occur with interrupts enabled
- * in order to be able to take IPI's while waiting for the lock.
*/
SUPERALIGN_TEXT
@@ -227,16 +217,12 @@ alltraps_with_regs_pushed:
FAKE_MCOUNT(13*4(%esp))
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
- MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%ebx /* keep orig. cpl here during trap() */
call _trap
/*
* Return via _doreti to handle ASTs. Have to change trap frame
* to interrupt frame.
*/
- pushl %ebx /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
incb _intr_nesting_level
MEXITCOUNT
@@ -274,16 +260,11 @@ IDTVEC(syscall)
movl %eax,TF_EFLAGS(%esp)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
@@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall)
mov %ax,%fs
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
ENTRY(fork_trampoline)
+ MTX_EXIT(_sched_lock, %ecx)
+ sti
call _spl0
#ifdef SMP
@@ -355,7 +333,6 @@ ENTRY(fork_trampoline)
/*
* Return via _doreti to handle ASTs.
*/
- pushl $0 /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
movb $1,_intr_nesting_level
MEXITCOUNT
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 637853e..8610e35 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -245,6 +245,12 @@ npx_probe(dev)
setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
npx_idt_probeintr = idt[npx_intrno];
+
+ /*
+ * XXX This looks highly bogus, but it appears that npc_probe1
+ * needs interrupts enabled. Does this make any difference
+ * here?
+ */
enable_intr();
result = npx_probe1(dev);
disable_intr();
@@ -797,7 +803,7 @@ npxdna()
/*
* Record new context early in case frstor causes an IRQ13.
*/
- npxproc = curproc;
+ PCPU_SET(npxproc, CURPROC);
curpcb->pcb_savefpu.sv_ex_sw = 0;
/*
* The following frstor may cause an IRQ13 when the state being
@@ -834,16 +840,18 @@ npxsave(addr)
fnsave(addr);
/* fnop(); */
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
#else /* SMP */
+ int intrstate;
u_char icu1_mask;
u_char icu2_mask;
u_char old_icu1_mask;
u_char old_icu2_mask;
struct gate_descriptor save_idt_npxintr;
+ intrstate = save_intr();
disable_intr();
old_icu1_mask = inb(IO_ICU1 + 1);
old_icu2_mask = inb(IO_ICU2 + 1);
@@ -851,12 +859,12 @@ npxsave(addr)
outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
idt[npx_intrno] = npx_idt_probeintr;
- enable_intr();
+ write_eflags(intrstate);
stop_emulating();
fnsave(addr);
fnop();
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
disable_intr();
icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */
icu2_mask = inb(IO_ICU2 + 1);
@@ -866,7 +874,7 @@ npxsave(addr)
(icu2_mask & ~(npx0_imask >> 8))
| (old_icu2_mask & (npx0_imask >> 8)));
idt[npx_intrno] = save_idt_npxintr;
- enable_intr(); /* back to usual state */
+ restore_intr(intrstate); /* back to previous state */
#endif /* SMP */
}
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 60accd1..78c6075 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -51,6 +51,10 @@
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/resourcevar.h>
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
#include <machine/frame.h>
#include <machine/bootinfo.h>
#include <machine/tss.h>
@@ -73,6 +77,7 @@
#include <machine/sigframe.h>
#include <machine/globaldata.h>
#include <machine/vm86.h>
+#include <machine/mutex.h>
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
@@ -127,9 +132,7 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
-#ifdef SMP
-ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest));
-#endif
+ASSYM(PCB_SCHEDNEST, offsetof(struct pcb, pcb_schednest));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
@@ -170,7 +173,9 @@ ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
ASSYM(GD_SIZEOF, sizeof(struct globaldata));
ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
+ASSYM(GD_PREVPROC, offsetof(struct globaldata, gd_prevproc));
ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc));
+ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc));
ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss));
ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
@@ -178,11 +183,21 @@ ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
+ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct globaldata, gd_intr_nesting_level));
#ifdef USER_LDT
ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
#endif
+ASSYM(GD_WITNESS_SPIN_CHECK, offsetof(struct globaldata, gd_witness_spin_check));
+
+/* XXX */
+#ifdef KTR_PERCPU
+ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx));
+ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf));
+ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data));
+#endif
+
#ifdef SMP
ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid));
ASSYM(GD_CPU_LOCKID, offsetof(struct globaldata, gd_cpu_lockid));
@@ -211,3 +226,9 @@ ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse));
+ASSYM(MTX_SAVEFL, offsetof(struct mtx, mtx_savefl));
+
+ASSYM(MTX_UNOWNED, MTX_UNOWNED);
diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c
index 0e11e2b..71ecd63 100644
--- a/sys/amd64/amd64/identcpu.c
+++ b/sys/amd64/amd64/identcpu.c
@@ -42,6 +42,7 @@
#include "opt_cpu.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -53,6 +54,8 @@
#include <machine/specialreg.h>
#include <machine/md_var.h>
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#define IDENTBLUE_CYRIX486 0
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index be86c65..b9395bf 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -607,12 +607,14 @@ void
enable_K5_wt_alloc(void)
{
u_int64_t msr;
+ int intrstate;
/*
* Write allocate is supported only on models 1, 2, and 3, with
* a stepping of 4 or greater.
*/
if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
+ intrstate = save_intr();
disable_intr();
msr = rdmsr(0x83); /* HWCR */
wrmsr(0x83, msr & !(0x10));
@@ -645,7 +647,7 @@ enable_K5_wt_alloc(void)
msr=rdmsr(0x83);
wrmsr(0x83, msr|0x10); /* enable write allocate */
- enable_intr();
+ restore_intr(intrstate);
}
}
@@ -708,7 +710,6 @@ enable_K6_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
void
@@ -770,7 +771,6 @@ enable_K6_2_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
#endif /* I585_CPU && CPU_WT_ALLOC */
diff --git a/sys/amd64/amd64/legacy.c b/sys/amd64/amd64/legacy.c
index 8a30770..5b6cdbc 100644
--- a/sys/amd64/amd64/legacy.c
+++ b/sys/amd64/amd64/legacy.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index bddd7d5..fa95fb0 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -862,9 +862,6 @@ map_read_write:
movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
movl $1, %ecx /* one private pt coming right up */
fillkpt(R(SMPptpa), $PG_RW)
-
-/* Initialize mp lock to allow early traps */
- movl $1, R(_mp_lock)
#endif /* SMP */
/* install a pde for temporary double map of bottom of VA */
diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s
index bddd7d5..fa95fb0 100644
--- a/sys/amd64/amd64/locore.s
+++ b/sys/amd64/amd64/locore.s
@@ -862,9 +862,6 @@ map_read_write:
movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
movl $1, %ecx /* one private pt coming right up */
fillkpt(R(SMPptpa), $PG_RW)
-
-/* Initialize mp lock to allow early traps */
- movl $1, R(_mp_lock)
#endif /* SMP */
/* install a pde for temporary double map of bottom of VA */
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 6edecf0..875c9d5 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -58,6 +58,7 @@
#include <sys/sysproto.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/proc.h>
@@ -98,10 +99,12 @@
#include <machine/bootinfo.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>
+#include <machine/globals.h>
#ifdef SMP
#include <machine/smp.h>
-#include <machine/globaldata.h>
#endif
#ifdef PERFMON
#include <machine/perfmon.h>
@@ -110,6 +113,7 @@
#ifdef OLD_BUS_ARCH
#include <i386/isa/isa_device.h>
#endif
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#include <isa/rtc.h>
#include <machine/vm86.h>
@@ -247,6 +251,11 @@ vm_offset_t clean_sva, clean_eva;
static vm_offset_t pager_sva, pager_eva;
static struct trapframe proc0_tf;
+struct cpuhead cpuhead;
+
+mtx_t sched_lock;
+mtx_t Giant;
+
#define offsetof(type, member) ((size_t)(&((type *)0)->member))
static void
@@ -431,6 +440,11 @@ again:
bufinit();
vm_pager_bufferinit();
+ SLIST_INIT(&cpuhead);
+ SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu);
+
+ mtx_init(&sched_lock, "sched lock", MTX_SPIN);
+
#ifdef SMP
/*
* OK, enough kmem_alloc/malloc state should be up, lets get on with it!
@@ -1817,11 +1831,6 @@ init386(first)
#endif
int off;
- /*
- * Prevent lowering of the ipl if we call tsleep() early.
- */
- safepri = cpl;
-
proc0.p_addr = proc0paddr;
atdevbase = ISA_HOLE_START + KERNBASE;
@@ -1871,6 +1880,10 @@ init386(first)
r_gdt.rd_base = (int) gdt;
lgdt(&r_gdt);
+ /* setup curproc so that mutexes work */
+ PCPU_SET(curproc, &proc0);
+ PCPU_SET(prevproc, &proc0);
+
/* make ldt memory segments */
/*
* The data segment limit must not cover the user area because we
@@ -1953,7 +1966,7 @@ init386(first)
/* make an initial tss so cpu can get interrupt stack on syscall! */
common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16;
- common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
+ common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
private_tss = 0;
tss_gdt = &gdt[GPROC0_SEL].sd;
@@ -1974,6 +1987,12 @@ init386(first)
dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+ /*
+ * We grab Giant during the vm86bios routines, so we need to ensure
+ * that it is up and running before we use vm86.
+ */
+ mtx_init(&Giant, "Giant", MTX_DEF);
+
vm86_initialize();
getmemsize(first);
@@ -2009,9 +2028,7 @@ init386(first)
/* setup proc 0's pcb */
proc0.p_addr->u_pcb.pcb_flags = 0;
proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
-#ifdef SMP
- proc0.p_addr->u_pcb.pcb_mpnest = 1;
-#endif
+ proc0.p_addr->u_pcb.pcb_schednest = 0;
proc0.p_addr->u_pcb.pcb_ext = 0;
proc0.p_md.md_regs = &proc0_tf;
}
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 61c5ecf..95b5759 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S
index d3602d2..9ede02c 100644
--- a/sys/amd64/amd64/mpboot.S
+++ b/sys/amd64/amd64/mpboot.S
@@ -114,43 +114,9 @@ mp_begin: /* now running relocated at KERNBASE */
CHECKPOINT(0x39, 6)
- /* wait till we can get into the kernel */
- call _boot_get_mplock
-
- /* Now, let's prepare for some REAL WORK :-) */
+ /* Now, let's prepare for some REAL WORK :-) This doesn't return. */
call _ap_init
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,-4(%esp)
-2:
- cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */
- jz 2b
- call _get_mplock
-
- /* let her rip! (loads new stack) */
- jmp _cpu_switch
-
-NON_GPROF_ENTRY(wait_ap)
- pushl %ebp
- movl %esp, %ebp
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,0(%esp)
- movl %eax, 8(%ebp)
-1:
- cmpl $0, CNAME(smp_started)
- jnz 2f
- decl %eax
- cmpl $0, %eax
- jge 1b
-2:
- call _get_mplock
- movl %ebp, %esp
- popl %ebp
- ret
-
-
/*
* This is the embedded trampoline or bootstrap that is
* copied into 'real-mode' low memory, it is where the
diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c
index 61c5ecf..95b5759 100644
--- a/sys/amd64/amd64/mptable.c
+++ b/sys/amd64/amd64/mptable.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c
index 8a30770..5b6cdbc 100644
--- a/sys/amd64/amd64/nexus.c
+++ b/sys/amd64/amd64/nexus.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index edae292..7ce9120 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -668,7 +668,7 @@ pmap_pte_quick(pmap, va)
* (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V;
cpu_invlpg(prv_PADDR1);
}
- return prv_PADDR1 + ((unsigned) index & (NPTEPG - 1));
+ return (unsigned *)(prv_PADDR1 + (index & (NPTEPG - 1)));
#else
if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
diff --git a/sys/amd64/amd64/swtch.s b/sys/amd64/amd64/swtch.s
index c895fef..db56a1b 100644
--- a/sys/amd64/amd64/swtch.s
+++ b/sys/amd64/amd64/swtch.s
@@ -73,189 +73,6 @@ _tlb_flush_count: .long 0
.text
-/*
- * When no processes are on the runq, cpu_switch() branches to _idle
- * to wait for something to come ready.
- */
- ALIGN_TEXT
- .type _idle,@function
-_idle:
- xorl %ebp,%ebp
- movl %ebp,_switchtime
-
-#ifdef SMP
-
- /* when called, we have the mplock, intr disabled */
- /* use our idleproc's "context" */
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
- /* Keep space for nonexisting return addr, or profiling bombs */
- movl $gd_idlestack_top-4, %ecx
- addl %fs:0, %ecx
- movl %ecx, %esp
-
- /* update common_tss.tss_esp0 pointer */
- movl %ecx, _common_tss + TSS_ESP0
-
- movl _cpuid, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $gd_common_tssd, %edi
- addl %fs:0, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- *
- * NOTE: spl*() may only be called while we hold the MP lock (which
- * we do).
- */
- call _spl0
-
- cli
-
- /*
- * _REALLY_ free the lock, no matter how deep the prior nesting.
- * We will recover the nesting on the way out when we have a new
- * proc to load.
- *
- * XXX: we had damn well better be sure we had it before doing this!
- */
- movl $FREE_LOCK, %eax
- movl %eax, _mp_lock
-
- /* do NOT have lock, intrs disabled */
- .globl idle_loop
-idle_loop:
-
- cmpl $0,_smp_active
- jne 1f
- cmpl $0,_cpuid
- je 1f
- jmp 2f
-
-1:
- call _procrunnable
- testl %eax,%eax
- jnz 3f
-
- /*
- * Handle page-zeroing in the idle loop. Called with interrupts
- * disabled and the MP lock released. Inside vm_page_zero_idle
- * we enable interrupts and grab the mplock as required.
- */
- cmpl $0,_do_page_zero_idle
- je 2f
-
- call _vm_page_zero_idle /* internal locking */
- testl %eax, %eax
- jnz idle_loop
-2:
-
- /* enable intrs for a halt */
- movl $0, lapic_tpr /* 1st candidate for an INT */
- call *_hlt_vector /* wait for interrupt */
- cli
- jmp idle_loop
-
- /*
- * Note that interrupts must be enabled while obtaining the MP lock
- * in order to be able to take IPI's while blocked.
- */
-3:
- movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
- sti
- call _get_mplock
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _rel_mplock
- jmp idle_loop
-
-#else /* !SMP */
-
- movl $HIDENAME(tmpstk),%esp
-#if defined(OVERLY_CONSERVATIVE_PTD_MGMT)
-#if defined(SWTCH_OPTIM_STATS)
- incl _swtch_optim_stats
-#endif
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
-#endif
-
- /* update common_tss.tss_esp0 pointer */
- movl %esp, _common_tss + TSS_ESP0
-
- movl $0, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $_common_tssd, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- */
- call _spl0
-
- ALIGN_TEXT
-idle_loop:
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _vm_page_zero_idle
- testl %eax, %eax
- jnz idle_loop
- call *_hlt_vector /* wait for interrupt */
- jmp idle_loop
-
-#endif /* SMP */
-
-CROSSJUMPTARGET(_idle)
-
ENTRY(default_halt)
sti
#ifndef SMP
@@ -264,16 +81,23 @@ ENTRY(default_halt)
ret
/*
+ * cpu_throw()
+ */
+ENTRY(cpu_throw)
+ jmp sw1
+
+/*
* cpu_switch()
*/
ENTRY(cpu_switch)
/* switch to new process. first, save context as needed */
movl _curproc,%ecx
+ movl %ecx,_prevproc
/* if no process to save, don't bother */
testl %ecx,%ecx
- je sw1
+ jz sw1
#ifdef SMP
movb P_ONCPU(%ecx), %al /* save "last" cpu */
@@ -299,7 +123,7 @@ ENTRY(cpu_switch)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
- /* test if debug regisers should be saved */
+ /* test if debug registers should be saved */
movb PCB_FLAGS(%edx),%al
andb $PCB_DBREGS,%al
jz 1f /* no, skip over */
@@ -319,15 +143,12 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
+ /* save sched_lock recursion count */
+ movl _sched_lock+MTX_RECURSE,%eax
+ movl %eax,PCB_SCHEDNEST(%edx)
+
#ifdef SMP
- movl _mp_lock, %eax
/* XXX FIXME: we should be saving the local APIC TPR */
-#ifdef DIAGNOSTIC
- cmpl $FREE_LOCK, %eax /* is it free? */
- je badsw4 /* yes, bad medicine! */
-#endif /* DIAGNOSTIC */
- andl $COUNT_FIELD, %eax /* clear CPU portion */
- movl %eax, PCB_MPNEST(%edx) /* store it */
#endif /* SMP */
#if NNPX > 0
@@ -341,25 +162,33 @@ ENTRY(cpu_switch)
1:
#endif /* NNPX > 0 */
- movl $0,_curproc /* out of process */
-
- /* save is done, now choose a new process or idle */
+ /* save is done, now choose a new process */
sw1:
- cli
#ifdef SMP
/* Stop scheduling if smp_active goes zero and we are not BSP */
cmpl $0,_smp_active
jne 1f
cmpl $0,_cpuid
- CROSSJUMP(je, _idle, jne) /* wind down */
+ je 1f
+
+ movl _idleproc, %eax
+ jmp sw1b
1:
#endif
+ /*
+ * Choose a new process to schedule. chooseproc() returns idleproc
+ * if it cannot find another process to run.
+ */
sw1a:
call _chooseproc /* trash ecx, edx, ret eax*/
- testl %eax,%eax
- CROSSJUMP(je, _idle, jne) /* if no proc, idle */
+
+#ifdef DIAGNOSTIC
+ testl %eax,%eax /* no process? */
+ jz badsw3 /* no, panic */
+#endif
+sw1b:
movl %eax,%ecx
xorl %eax,%eax
@@ -456,9 +285,6 @@ sw1a:
movl %ecx, _curproc /* into next process */
#ifdef SMP
- movl _cpu_lockid, %eax
- orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
- movl %eax, _mp_lock /* load the mp_lock */
/* XXX FIXME: we should be restoring the local APIC TPR */
#endif /* SMP */
@@ -500,7 +326,22 @@ cpu_switch_load_gs:
movl %eax,%dr7
1:
- sti
+ /*
+ * restore sched_lock recursion count and transfer ownership to
+ * new process
+ */
+ movl PCB_SCHEDNEST(%edx),%eax
+ movl %eax,_sched_lock+MTX_RECURSE
+
+ movl _curproc,%eax
+ movl %eax,_sched_lock+MTX_LOCK
+
+#ifdef DIAGNOSTIC
+ pushfl
+ popl %ecx
+ testl $0x200, %ecx /* interrupts enabled? */
+ jnz badsw6 /* that way madness lies */
+#endif
ret
CROSSJUMPTARGET(sw1a)
@@ -517,15 +358,27 @@ badsw2:
call _panic
sw0_2: .asciz "cpu_switch: not SRUN"
+
+badsw3:
+ pushl $sw0_3
+ call _panic
+
+sw0_3: .asciz "cpu_switch: chooseproc returned NULL"
+
#endif
-#if defined(SMP) && defined(DIAGNOSTIC)
-badsw4:
- pushl $sw0_4
+#ifdef DIAGNOSTIC
+badsw5:
+ pushl $sw0_5
+ call _panic
+
+sw0_5: .asciz "cpu_switch: interrupts enabled (again)"
+badsw6:
+ pushl $sw0_6
call _panic
-sw0_4: .asciz "cpu_switch: do not have lock"
-#endif /* SMP && DIAGNOSTIC */
+sw0_6: .asciz "cpu_switch: interrupts enabled"
+#endif
/*
* savectx(pcb)
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 51de1ac..f32dfae 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -49,10 +49,12 @@
#include "opt_trap.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
@@ -76,12 +78,14 @@
#include <machine/cpu.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb.h>
#ifdef SMP
#include <machine/smp.h>
#endif
#include <machine/tss.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#ifdef POWERFAIL_NMI
@@ -96,11 +100,14 @@
#include "isa.h"
#include "npx.h"
+#include <sys/sysctl.h>
+
int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
extern void syscall2 __P((struct trapframe frame));
+extern void ast __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -142,7 +149,7 @@ static char *trap_msg[] = {
};
static __inline int userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks, int have_mplock));
+ u_quad_t oticks, int have_giant));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
@@ -158,18 +165,18 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&panic_on_nmi, 0, "Panic on NMI");
static __inline int
-userret(p, frame, oticks, have_mplock)
+userret(p, frame, oticks, have_giant)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
- int have_mplock;
+ int have_giant;
{
int sig, s;
while ((sig = CURSIG(p)) != 0) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
postsig(sig);
}
@@ -184,31 +191,34 @@ userret(p, frame, oticks, have_mplock)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
- }
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
postsig(sig);
+ }
}
/*
* Charge system time if profiling.
*/
if (p->p_flag & P_PROFIL) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
}
curpriority = p->p_priority;
- return(have_mplock);
+ return(have_giant);
}
/*
@@ -226,13 +236,20 @@ trap(frame)
u_quad_t sticks = 0;
int i = 0, ucode = 0, type, code;
vm_offset_t eva;
+#ifdef POWERFAIL_NMI
+ static int lastalert = 0;
+#endif
- if (!(frame.tf_eflags & PSL_I)) {
+ atomic_add_int(&cnt.v_trap, 1);
+
+ if ((frame.tf_eflags & PSL_I) == 0) {
/*
- * Buggy application or kernel code has disabled interrupts
- * and then trapped. Enabling interrupts now is wrong, but
- * it is better than running with interrupts disabled until
- * they are accidentally enabled later.
+ * Buggy application or kernel code has disabled
+ * interrupts and then trapped. Enabling interrupts
+ * now is wrong, but it is better than running with
+ * interrupts disabled until they are accidentally
+ * enabled later. XXX Consider whether is this still
+ * correct.
*/
type = frame.tf_trapno;
if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
@@ -252,54 +269,27 @@ trap(frame)
eva = 0;
if (frame.tf_trapno == T_PAGEFLT) {
/*
- * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
- * This problem is worked around by using an interrupt
- * gate for the pagefault handler. We are finally ready
- * to read %cr2 and then must reenable interrupts.
- *
- * XXX this should be in the switch statement, but the
- * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
- * flow of control too much for this to be obviously
- * correct.
+ * For some Cyrix CPUs, %cr2 is clobbered by
+ * interrupts. This problem is worked around by using
+ * an interrupt gate for the pagefault handler. We
+ * are finally ready to read %cr2 and then must
+ * reenable interrupts.
*/
eva = rcr2();
enable_intr();
- }
+ }
+
+ mtx_enter(&Giant, MTX_DEF);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
restart:
#endif
+
type = frame.tf_trapno;
code = frame.tf_err;
- if (in_vm86call) {
- if (frame.tf_eflags & PSL_VM &&
- (type == T_PROTFLT || type == T_STKFLT)) {
- i = vm86_emulate((struct vm86frame *)&frame);
- if (i != 0)
- /*
- * returns to original process
- */
- vm86_trap((struct vm86frame *)&frame);
- return;
- }
- switch (type) {
- /*
- * these traps want either a process context, or
- * assume a normal userspace trap.
- */
- case T_PROTFLT:
- case T_SEGNPFLT:
- trap_fatal(&frame, eva);
- return;
- case T_TRCTRAP:
- type = T_BPTFLT; /* kernel breakpoint */
- /* FALL THROUGH */
- }
- goto kernel_trap; /* normal kernel trap handling */
- }
-
- if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
+ if ((ISPL(frame.tf_cs) == SEL_UPL) ||
+ ((frame.tf_eflags & PSL_VM) && !in_vm86call)) {
/* user trap */
sticks = p->p_sticks;
@@ -322,16 +312,6 @@ restart:
i = SIGFPE;
break;
- case T_ASTFLT: /* Allow process switch */
- astoff();
- cnt.v_soft++;
- if (p->p_flag & P_OWEUPC) {
- p->p_flag &= ~P_OWEUPC;
- addupc_task(p, p->p_stats->p_prof.pr_addr,
- p->p_stats->p_prof.pr_ticks);
- }
- goto out;
-
/*
* The following two traps can happen in
* vm86 mode, and, if so, we want to handle
@@ -342,7 +322,7 @@ restart:
if (frame.tf_eflags & PSL_VM) {
i = vm86_emulate((struct vm86frame *)&frame);
if (i == 0)
- goto out;
+ goto user;
break;
}
/* FALL THROUGH */
@@ -357,14 +337,20 @@ restart:
case T_PAGEFLT: /* page fault */
i = trap_pfault(&frame, TRUE, eva);
- if (i == -1)
- return;
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if (i == -2)
+ if (i == -2) {
+ /*
+ * f00f hack workaround has triggered, treat
+ * as illegal instruction not page fault.
+ */
+ frame.tf_trapno = T_PRIVINFLT;
goto restart;
+ }
#endif
- if (i == 0)
+ if (i == -1)
goto out;
+ if (i == 0)
+ goto user;
ucode = T_PAGEFLT;
break;
@@ -377,7 +363,15 @@ restart:
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
- goto handle_powerfail;
+#ifndef TIMER_FREQ
+# define TIMER_FREQ 1193182
+#endif
+ if (time_second - lastalert > 10) {
+ log(LOG_WARNING, "NMI: power fail\n");
+ sysbeep(TIMER_FREQ/880, hz);
+ lastalert = time_second;
+ }
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -391,7 +385,7 @@ restart:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi)
panic("NMI indicates hardware failure");
break;
@@ -410,9 +404,9 @@ restart:
case T_DNA:
#if NNPX > 0
- /* if a transparent fault (due to context switch "late") */
+ /* transparent fault (due to context switch "late") */
if (npxdna())
- return;
+ goto out;
#endif
if (!pmath_emulate) {
i = SIGFPE;
@@ -422,7 +416,7 @@ restart:
i = (*pmath_emulate)(&frame);
if (i == 0) {
if (!(frame.tf_eflags & PSL_T))
- return;
+ goto out;
frame.tf_eflags &= ~PSL_T;
i = SIGTRAP;
}
@@ -435,13 +429,12 @@ restart:
break;
}
} else {
-kernel_trap:
/* kernel trap */
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(&frame, FALSE, eva);
- return;
+ goto out;
case T_DNA:
#if NNPX > 0
@@ -451,31 +444,35 @@ kernel_trap:
* registered such use.
*/
if (npxdna())
- return;
+ goto out;
#endif
break;
- case T_PROTFLT: /* general protection fault */
- case T_SEGNPFLT: /* segment not present fault */
/*
- * Invalid segment selectors and out of bounds
- * %eip's and %esp's can be set up in user mode.
- * This causes a fault in kernel mode when the
- * kernel tries to return to user mode. We want
- * to get this fault so that we can fix the
- * problem here and not have to check all the
- * selectors and pointers when the user changes
- * them.
+ * The following two traps can happen in
+ * vm86 mode, and, if so, we want to handle
+ * them specially.
*/
-#define MAYBE_DORETI_FAULT(where, whereto) \
- do { \
- if (frame.tf_eip == (int)where) { \
- frame.tf_eip = (int)whereto; \
- return; \
- } \
- } while (0)
-
- if (intr_nesting_level == 0) {
+ case T_PROTFLT: /* general protection fault */
+ case T_STKFLT: /* stack fault */
+ if (frame.tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)&frame);
+ if (i != 0)
+ /*
+ * returns to original process
+ */
+ vm86_trap((struct vm86frame *)&frame);
+ goto out;
+ }
+ /* FALL THROUGH */
+
+ case T_SEGNPFLT: /* segment not present fault */
+ if (in_vm86call)
+ break;
+
+ if (intr_nesting_level != 0)
+ break;
+
/*
* Invalid %fs's and %gs's can be created using
* procfs or PT_SETREGS or by invalidating the
@@ -488,20 +485,38 @@ kernel_trap:
if (frame.tf_eip == (int)cpu_switch_load_gs) {
curpcb->pcb_gs = 0;
psignal(p, SIGBUS);
- return;
+ goto out;
+ }
+
+ /*
+ * Invalid segment selectors and out of bounds
+ * %eip's and %esp's can be set up in user mode.
+ * This causes a fault in kernel mode when the
+ * kernel tries to return to user mode. We want
+ * to get this fault so that we can fix the
+ * problem here and not have to check all the
+ * selectors and pointers when the user changes
+ * them.
+ */
+ if (frame.tf_eip == (int)doreti_iret) {
+ frame.tf_eip = (int)doreti_iret_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_ds) {
+ frame.tf_eip = (int)doreti_popl_ds_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_es) {
+ frame.tf_eip = (int)doreti_popl_es_fault;
+ goto out;
}
- MAYBE_DORETI_FAULT(doreti_iret,
- doreti_iret_fault);
- MAYBE_DORETI_FAULT(doreti_popl_ds,
- doreti_popl_ds_fault);
- MAYBE_DORETI_FAULT(doreti_popl_es,
- doreti_popl_es_fault);
- MAYBE_DORETI_FAULT(doreti_popl_fs,
- doreti_popl_fs_fault);
+ if (frame.tf_eip == (int)doreti_popl_fs) {
+ frame.tf_eip = (int)doreti_popl_fs_fault;
+ goto out;
+ }
if (curpcb && curpcb->pcb_onfault) {
frame.tf_eip = (int)curpcb->pcb_onfault;
- return;
- }
+ goto out;
}
break;
@@ -517,7 +532,7 @@ kernel_trap:
*/
if (frame.tf_eflags & PSL_NT) {
frame.tf_eflags &= ~PSL_NT;
- return;
+ goto out;
}
break;
@@ -529,7 +544,7 @@ kernel_trap:
* silently until the syscall handler has
* saved the flags.
*/
- return;
+ goto out;
}
if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
/*
@@ -537,7 +552,7 @@ kernel_trap:
* flags. Stop single stepping it.
*/
frame.tf_eflags &= ~PSL_T;
- return;
+ goto out;
}
/*
* Ignore debug register trace traps due to
@@ -549,13 +564,13 @@ kernel_trap:
* in kernel space because that is useful when
* debugging the kernel.
*/
- if (user_dbreg_trap()) {
+ if (user_dbreg_trap() && !in_vm86call) {
/*
* Reset breakpoint bits because the
* processor doesn't
*/
load_dr6(rdr6() & 0xfffffff0);
- return;
+ goto out;
}
/*
* Fall through (TRCTRAP kernel mode, kernel address)
@@ -567,28 +582,19 @@ kernel_trap:
*/
#ifdef DDB
if (kdb_trap (type, 0, &frame))
- return;
+ goto out;
#endif
break;
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
-#ifndef TIMER_FREQ
-# define TIMER_FREQ 1193182
-#endif
- handle_powerfail:
- {
- static unsigned lastalert = 0;
-
- if(time_second - lastalert > 10)
- {
+ if (time_second - lastalert > 10) {
log(LOG_WARNING, "NMI: power fail\n");
sysbeep(TIMER_FREQ/880, hz);
lastalert = time_second;
- }
- return;
}
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -602,16 +608,16 @@ kernel_trap:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi == 0)
- return;
+ goto out;
/* FALL THROUGH */
#endif /* POWERFAIL_NMI */
#endif /* NISA > 0 */
}
trap_fatal(&frame, eva);
- return;
+ goto out;
}
/* Translate fault for emulators (e.g. Linux) */
@@ -630,8 +636,10 @@ kernel_trap:
}
#endif
-out:
+user:
userret(p, &frame, sticks, 1);
+out:
+ mtx_exit(&Giant, MTX_DEF);
}
#ifdef notyet
@@ -769,10 +777,8 @@ trap_pfault(frame, usermode, eva)
* fault.
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) {
- frame->tf_trapno = T_PRIVINFLT;
+ if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
return -2;
- }
#endif
if (usermode)
goto nogo;
@@ -869,8 +875,7 @@ trap_fatal(frame, eva)
frame->tf_eflags & PSL_VM ? "vm86" :
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -917,26 +922,6 @@ trap_fatal(frame, eva)
} else {
printf("Idle\n");
}
- printf("interrupt mask = ");
- if ((cpl & net_imask) == net_imask)
- printf("net ");
- if ((cpl & tty_imask) == tty_imask)
- printf("tty ");
- if ((cpl & bio_imask) == bio_imask)
- printf("bio ");
- if ((cpl & cam_imask) == cam_imask)
- printf("cam ");
- if (cpl == 0)
- printf("none");
-#ifdef SMP
-/**
- * XXX FIXME:
- * we probably SHOULD have stopped the other CPUs before now!
- * another CPU COULD have been touching cpl at this moment...
- */
- printf(" <- SMP: XXX");
-#endif
- printf("\n");
#ifdef KDB
if (kdb_trap(&psl))
@@ -973,8 +958,7 @@ dblfault_handler()
printf("esp = 0x%x\n", common_tss.tss_esp);
printf("ebp = 0x%x\n", common_tss.tss_ebp);
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -1048,12 +1032,14 @@ syscall2(frame)
int error;
int narg;
int args[8];
- int have_mplock = 0;
+ int have_giant = 0;
u_int code;
+ atomic_add_int(&cnt.v_syscall, 1);
+
#ifdef DIAGNOSTIC
if (ISPL(frame.tf_cs) != SEL_UPL) {
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
panic("syscall");
/* NOT REACHED */
}
@@ -1075,9 +1061,9 @@ syscall2(frame)
/*
* The prep code is not MP aware.
*/
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
- rel_mplock();
+ mtx_exit(&Giant, MTX_DEF);
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@@ -1114,8 +1100,8 @@ syscall2(frame)
*/
if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
ktrsyscall(p->p_tracep, code, narg, args);
@@ -1129,15 +1115,15 @@ syscall2(frame)
* we are ktracing
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsyscall(p->p_tracep, code, narg, args);
}
@@ -1192,9 +1178,9 @@ bad:
* Traced syscall. trapsignal() is not MP aware.
*/
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
@@ -1203,13 +1189,13 @@ bad:
/*
* Handle reschedule and other end-of-syscall issues
*/
- have_mplock = userret(p, &frame, sticks, have_mplock);
+ have_giant = userret(p, &frame, sticks, have_giant);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
}
@@ -1225,27 +1211,66 @@ bad:
/*
* Release the MP lock if we had to get it
*/
- if (have_mplock)
- rel_mplock();
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
+
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
+
+void
+ast(frame)
+ struct trapframe frame;
+{
+ struct proc *p = CURPROC;
+ u_quad_t sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
+ astoff();
+ atomic_add_int(&cnt.v_soft, 1);
+ if (p->p_flag & P_OWEUPC) {
+ mtx_enter(&Giant, MTX_DEF);
+ p->p_flag &= ~P_OWEUPC;
+ addupc_task(p, p->p_stats->p_prof.pr_addr,
+ p->p_stats->p_prof.pr_ticks);
+}
+ if (userret(p, &frame, sticks, mtx_owned(&Giant)) != 0)
+ mtx_exit(&Giant, MTX_DEF);
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode. MP lock is held on entry and should be
- * held on return.
+ * directly into user mode. Giant is not held on entry, and must not
+ * be held on return.
*/
void
fork_return(p, frame)
struct proc *p;
struct trapframe frame;
{
+ int have_giant;
+
frame.tf_eax = 0; /* Child returns zero */
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0, 1);
+ have_giant = userret(p, &frame, 0, mtx_owned(&Giant));
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
+ }
#endif
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
}
diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c
index 15044ab..724f3c2 100644
--- a/sys/amd64/amd64/tsc.c
+++ b/sys/amd64/amd64/tsc.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index cfb6cee..831ab3b 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -57,12 +57,14 @@
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <machine/clock.h>
#include <machine/cpu.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#ifdef SMP
#include <machine/smp.h>
#endif
@@ -177,9 +179,8 @@ cpu_fork(p1, p2, flags)
* pcb2->pcb_onfault: cloned above (always NULL here?).
*/
-#ifdef SMP
- pcb2->pcb_mpnest = 1;
-#endif
+ pcb2->pcb_schednest = 0;
+
/*
* XXX don't copy the i/o pages. this should probably be fixed.
*/
@@ -256,8 +257,11 @@ cpu_exit(p)
reset_dbregs();
pcb->pcb_flags &= ~PCB_DBREGS;
}
+ mtx_enter(&sched_lock, MTX_SPIN);
+ mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH);
+ mtx_assert(&Giant, MA_NOTOWNED);
cnt.v_swtch++;
- cpu_switch(p);
+ cpu_switch();
panic("cpu_exit");
}
@@ -406,17 +410,10 @@ vunmapbuf(bp)
static void
cpu_reset_proxy()
{
- u_int saved_mp_lock;
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
- ; /* Wait for other cpu to disable interupts */
- saved_mp_lock = mp_lock;
- mp_lock = 1;
- printf("cpu_reset_proxy: Grabbed mp lock for BSP\n");
- cpu_reset_proxy_active = 3;
- while (cpu_reset_proxy_active == 3)
- ; /* Wait for other cpu to enable interrupts */
+ ; /* Wait for other cpu to see that we've started */
stop_cpus((1<<cpu_reset_proxyid));
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
@@ -453,6 +450,7 @@ cpu_reset()
cpu_reset_proxyid = cpuid;
cpustop_restartfunc = cpu_reset_proxy;
+ cpu_reset_proxy_active = 0;
printf("cpu_reset: Restarting BSP\n");
started_cpus = (1<<0); /* Restart CPU #0 */
@@ -461,17 +459,9 @@ cpu_reset()
cnt++; /* Wait for BSP to announce restart */
if (cpu_reset_proxy_active == 0)
printf("cpu_reset: Failed to restart BSP\n");
- __asm __volatile("cli" : : : "memory");
+ enable_intr();
cpu_reset_proxy_active = 2;
- cnt = 0;
- while (cpu_reset_proxy_active == 2 && cnt < 10000000)
- cnt++; /* Do nothing */
- if (cpu_reset_proxy_active == 2) {
- printf("cpu_reset: BSP did not grab mp lock\n");
- cpu_reset_real(); /* XXX: Bogus ? */
- }
- cpu_reset_proxy_active = 4;
- __asm __volatile("sti" : : : "memory");
+
while (1);
/* NOTREACHED */
}
@@ -553,7 +543,7 @@ vm_page_zero_idle()
static int free_rover;
static int zero_state;
vm_page_t m;
- int s;
+ int s, intrsave;
/*
* Attempt to maintain approximately 1/2 of our free pages in a
@@ -569,11 +559,10 @@ vm_page_zero_idle()
if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
return(0);
-#ifdef SMP
- if (try_mplock()) {
-#endif
+ if (mtx_try_enter(&Giant, MTX_DEF)) {
s = splvm();
- __asm __volatile("sti" : : : "memory");
+ intrsave = save_intr();
+ enable_intr();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
if (m != NULL && (m->flags & PG_ZERO) == 0) {
@@ -595,14 +584,10 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- __asm __volatile("cli" : : : "memory");
-#ifdef SMP
- rel_mplock();
-#endif
+ restore_intr(intrsave);
+ mtx_exit(&Giant, MTX_DEF);
return (1);
-#ifdef SMP
}
-#endif
/*
* We have to enable interrupts for a moment if the try_mplock fails
* in order to potentially take an IPI. XXX this should be in
diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h
index ffabf7f..18822b8 100644
--- a/sys/amd64/include/cpu.h
+++ b/sys/amd64/include/cpu.h
@@ -46,6 +46,7 @@
#include <machine/psl.h>
#include <machine/frame.h>
#include <machine/segments.h>
+#include <machine/globals.h>
/*
* definitions of cpu-dependent requirements
@@ -86,7 +87,9 @@
* added, we will have an atomicy problem. The type of atomicy we need is
* a non-locked orl.
*/
-#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0)
+#define need_resched() do { \
+ PCPU_SET(astpending, AST_RESCHED|AST_PENDING); \
+} while (0)
#define resched_wanted() (astpending & AST_RESCHED)
/*
@@ -109,8 +112,9 @@
* it off (asynchronous need_resched() conflicts are not critical).
*/
#define signotify(p) aston()
-
-#define aston() do { astpending |= AST_PENDING; } while (0)
+#define aston() do { \
+ PCPU_SET(astpending, astpending | AST_PENDING); \
+} while (0)
#define astoff()
/*
@@ -135,7 +139,9 @@
#ifdef _KERNEL
extern char btext[];
extern char etext[];
+#ifndef intr_nesting_level
extern u_char intr_nesting_level;
+#endif
void fork_trampoline __P((void));
void fork_return __P((struct proc *, struct trapframe));
diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 9a4052f..39868df 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -86,20 +86,29 @@ static __inline void
disable_intr(void)
{
__asm __volatile("cli" : : : "memory");
-#ifdef SMP
- MPINTR_LOCK();
-#endif
}
static __inline void
enable_intr(void)
{
-#ifdef SMP
- MPINTR_UNLOCK();
-#endif
__asm __volatile("sti");
}
+static __inline u_int
+save_intr(void)
+{
+ u_int ef;
+
+ __asm __volatile("pushfl; popl %0" : "=r" (ef));
+ return (ef);
+}
+
+static __inline void
+restore_intr(u_int ef)
+{
+ __asm __volatile("pushl %0; popfl" : : "r" (ef) : "memory" );
+}
+
#define HAVE_INLINE_FFS
static __inline int
diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h
index 61c5ecf..95b5759 100644
--- a/sys/amd64/include/mptable.h
+++ b/sys/amd64/include/mptable.h
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/amd64/include/mutex.h b/sys/amd64/include/mutex.h
new file mode 100644
index 0000000..ef0c963
--- /dev/null
+++ b/sys/amd64/include/mutex.h
@@ -0,0 +1,786 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_MUTEX_H_
+#define _MACHINE_MUTEX_H_
+
+#ifndef LOCORE
+
+#include <sys/ktr.h>
+#include <sys/queue.h>
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine/globals.h>
+
+/*
+ * If kern_mutex.c is being built, compile non-inlined versions of various
+ * functions so that kernel modules can use them.
+ */
+#ifndef _KERN_MUTEX_C_
+#define _MTX_INLINE static __inline
+#else
+#define _MTX_INLINE
+#endif
+
+/*
+ * Mutex flags
+ *
+ * Types
+ */
+#define MTX_DEF 0x0 /* Default (spin/sleep) */
+#define MTX_SPIN 0x1 /* Spin only lock */
+
+/* Options */
+#define MTX_RLIKELY 0x4 /* (opt) Recursion likely */
+#define MTX_NORECURSE 0x8 /* No recursion possible */
+#define MTX_NOSPIN 0x10 /* Don't spin before sleeping */
+#define MTX_NOSWITCH 0x20 /* Do not switch on release */
+#define MTX_FIRST 0x40 /* First spin lock holder */
+#define MTX_TOPHALF 0x80 /* Interrupts not disabled on spin */
+
+/* options that should be passed on to mtx_enter_hard, mtx_exit_hard */
+#define MTX_HARDOPTS (MTX_SPIN | MTX_FIRST | MTX_TOPHALF | MTX_NOSWITCH)
+
+/* Flags/value used in mtx_lock */
+#define MTX_RECURSE 0x01 /* (non-spin) lock held recursively */
+#define MTX_CONTESTED 0x02 /* (non-spin) lock contested */
+#define MTX_FLAGMASK ~(MTX_RECURSE | MTX_CONTESTED)
+#define MTX_UNOWNED 0x8 /* Cookie for free mutex */
+
+struct proc; /* XXX */
+
+/*
+ * Sleep/spin mutex
+ */
+struct mtx {
+ volatile u_int mtx_lock; /* lock owner/gate/flags */
+ volatile u_short mtx_recurse; /* number of recursive holds */
+ u_short mtx_f1;
+ u_int mtx_savefl; /* saved flags (for spin locks) */
+ char *mtx_description;
+ TAILQ_HEAD(, proc) mtx_blocked;
+ LIST_ENTRY(mtx) mtx_contested;
+ struct mtx *mtx_next; /* all locks in system */
+ struct mtx *mtx_prev;
+#ifdef SMP_DEBUG
+ /* If you add anything here, adjust the mtxf_t definition below */
+ struct witness *mtx_witness;
+ LIST_ENTRY(mtx) mtx_held;
+ char *mtx_file;
+ int mtx_line;
+#endif /* SMP_DEBUG */
+};
+
+typedef struct mtx mtx_t;
+
+/*
+ * Filler for structs which need to remain the same size
+ * whether or not SMP_DEBUG is turned on.
+ */
+typedef struct mtxf {
+#ifdef SMP_DEBUG
+ char mtxf_data[0];
+#else
+ char mtxf_data[4*sizeof(void *) + sizeof(int)];
+#endif
+} mtxf_t;
+
+#define mp_fixme(string)
+
+#ifdef _KERNEL
+/* Misc */
+#define CURTHD ((u_int)CURPROC) /* Current thread ID */
+
+/* Prototypes */
+void mtx_init(mtx_t *m, char *description, int flag);
+void mtx_enter_hard(mtx_t *, int type, int flags);
+void mtx_exit_hard(mtx_t *, int type);
+void mtx_destroy(mtx_t *m);
+
+#if (defined(KLD_MODULE) || defined(_KERN_MUTEX_C_))
+void mtx_enter(mtx_t *mtxp, int type);
+int mtx_try_enter(mtx_t *mtxp, int type);
+void mtx_exit(mtx_t *mtxp, int type);
+#endif
+
+/* Global locks */
+extern mtx_t sched_lock;
+extern mtx_t Giant;
+
+/*
+ * Used to replace return with an exit Giant and return.
+ */
+
+#define EGAR(a) \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return (a); \
+} while (0)
+
+#define VEGAR \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return; \
+} while (0)
+
+#define DROP_GIANT() \
+do { \
+ int _giantcnt; \
+ WITNESS_SAVE_DECL(Giant); \
+ \
+ WITNESS_SAVE(&Giant, Giant); \
+ for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \
+ mtx_exit(&Giant, MTX_DEF)
+
+#define PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant); \
+} while (0)
+
+#define PARTIAL_PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant)
+
+
+/*
+ * Debugging
+ */
+#ifndef SMP_DEBUG
+#define mtx_assert(m, what)
+#else /* SMP_DEBUG */
+
+#define MA_OWNED 1
+#define MA_NOTOWNED 2
+#define mtx_assert(m, what) { \
+ switch ((what)) { \
+ case MA_OWNED: \
+ ASS(mtx_owned((m))); \
+ break; \
+ case MA_NOTOWNED: \
+ ASS(!mtx_owned((m))); \
+ break; \
+ default: \
+ panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__); \
+ } \
+}
+
+#ifdef INVARIANTS
+#define ASS(ex) MPASS(ex)
+#define MPASS(ex) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ #ex, __FILE__, __LINE__)
+#define MPASS2(ex, what) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ what, __FILE__, __LINE__)
+
+#ifdef MTX_STRS
+char STR_IEN[] = "fl & 0x200";
+char STR_IDIS[] = "!(fl & 0x200)";
+#else /* MTX_STRS */
+extern char STR_IEN[];
+extern char STR_IDIS[];
+#endif /* MTX_STRS */
+#define ASS_IEN MPASS2(read_eflags() & 0x200, STR_IEN)
+#define ASS_IDIS MPASS2((read_eflags() & 0x200) == 0, STR_IDIS)
+#endif /* INVARIANTS */
+
+#endif /* SMP_DEBUG */
+
+#if !defined(SMP_DEBUG) || !defined(INVARIANTS)
+#define ASS(ex)
+#define MPASS(ex)
+#define MPASS2(ex, where)
+#define ASS_IEN
+#define ASS_IDIS
+#endif /* !defined(SMP_DEBUG) || !defined(INVARIANTS) */
+
+#ifdef WITNESS
+#ifndef SMP_DEBUG
+#error WITNESS requires SMP_DEBUG
+#endif /* SMP_DEBUG */
+#define WITNESS_ENTER(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_enter((m), (f), __FILE__, __LINE__)
+#define WITNESS_EXIT(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_exit((m), (f), __FILE__, __LINE__)
+
+#define WITNESS_SLEEP(check, m) witness_sleep(check, (m), __FILE__, __LINE__)
+#define WITNESS_SAVE_DECL(n) \
+ char * __CONCAT(n, __wf); \
+ int __CONCAT(n, __wl)
+
+#define WITNESS_SAVE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_save(m, &__CONCAT(n, __wf), &__CONCAT(n, __wl)); \
+} while (0)
+
+#define WITNESS_RESTORE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_restore(m, __CONCAT(n, __wf), __CONCAT(n, __wl)); \
+} while (0)
+
+void witness_init(mtx_t *, int flag);
+void witness_destroy(mtx_t *);
+void witness_enter(mtx_t *, int, char *, int);
+void witness_try_enter(mtx_t *, int, char *, int);
+void witness_exit(mtx_t *, int, char *, int);
+void witness_display(void(*)(const char *fmt, ...));
+void witness_list(struct proc *);
+int witness_sleep(int, mtx_t *, char *, int);
+void witness_save(mtx_t *, char **, int *);
+void witness_restore(mtx_t *, char *, int);
+#else /* WITNESS */
+#define WITNESS_ENTER(m, flag)
+#define WITNESS_EXIT(m, flag)
+#define WITNESS_SLEEP(check, m)
+#define WITNESS_SAVE_DECL(n)
+#define WITNESS_SAVE(m, n)
+#define WITNESS_RESTORE(m, n)
+
+/*
+ * flag++ is slezoid way of shutting up unused parameter warning
+ * in mtx_init()
+ */
+#define witness_init(m, flag) flag++
+#define witness_destroy(m)
+#define witness_enter(m, flag, f, l)
+#define witness_try_enter(m, flag, f, l )
+#define witness_exit(m, flag, f, l)
+#endif /* WITNESS */
+
+/*
+ * Assembly macros (for internal use only)
+ *------------------------------------------------------------------------------
+ */
+
+#define _V(x) __STRING(x)
+
+#ifndef I386_CPU
+
+/*
+ * For 486 and newer processors.
+ */
+
+/* Get a sleep lock, deal with recursion inline. */
+#define _getlock_sleep(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %3,%1;" /* Try */ \
+" jz 1f;" /* Got it */ \
+" andl $" _V(MTX_FLAGMASK) ",%%eax;" /* turn off spec bits */ \
+" cmpl %%eax,%3;" /* already have it? */ \
+" je 2f;" /* yes, recurse */ \
+" pushl %4;" \
+" pushl %5;" \
+" call mtx_enter_hard;" \
+" addl $8,%%esp;" \
+" jmp 1f;" \
+"2: lock; orl $" _V(MTX_RECURSE) ",%1;" \
+" incw %2;" \
+"1:" \
+"# getlock_sleep" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock), /* 1 */ \
+ "+m" (mtxp->mtx_recurse) /* 2 */ \
+ : "r" (tid), /* 3 (input) */ \
+ "gi" (type), /* 4 */ \
+ "g" (mtxp) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/* Get a spin lock, handle recursion inline (as the less common case) */
+#define _getlock_spin_block(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" pushfl;" \
+" cli;" \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %3,%1;" /* Try */ \
+" jz 2f;" /* got it */ \
+" pushl %4;" \
+" pushl %5;" \
+" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type, oflags) */ \
+" addl $0xc,%%esp;" \
+" jmp 1f;" \
+"2: popl %2;" /* save flags */ \
+"1:" \
+"# getlock_spin_block" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock), /* 1 */ \
+ "=m" (mtxp->mtx_savefl) /* 2 */ \
+ : "r" (tid), /* 3 (input) */ \
+ "gi" (type), /* 4 */ \
+ "g" (mtxp) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter function if
+ * we can't get it inline.
+ */
+#define _getlock_norecurse(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %2,%1;" /* Try */ \
+" jz 1f;" /* got it */ \
+" pushl %3;" \
+" pushl %4;" \
+" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type) */ \
+" addl $8,%%esp;" \
+"1:" \
+"# getlock_norecurse" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock) /* 1 */ \
+ : "r" (tid), /* 2 (input) */ \
+ "gi" (type), /* 3 */ \
+ "g" (mtxp) /* 4 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is handled
+ * in the hard function.
+ */
+#define _exitlock_norecurse(mtxp, tid, type) ({ \
+ int _tid = (int)(tid); \
+ \
+ __asm __volatile ( \
+" " MPLOCKED "" \
+" cmpxchgl %4,%0;" /* try easy rel */ \
+" jz 1f;" /* released! */ \
+" pushl %2;" \
+" pushl %3;" \
+" call mtx_exit_hard;" \
+" addl $8,%%esp;" \
+"1:" \
+"# exitlock_norecurse" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+a" (_tid) /* 1 */ \
+ : "gi" (type), /* 2 (input) */ \
+ "g" (mtxp), /* 3 */ \
+ "r" (MTX_UNOWNED) /* 4 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+#define _exitlock(mtxp, tid, type) ({ \
+ int _tid = (int)(tid); \
+ \
+ __asm __volatile ( \
+" " MPLOCKED "" \
+" cmpxchgl %5,%0;" /* try easy rel */ \
+" jz 1f;" /* released! */ \
+" testl $" _V(MTX_RECURSE) ",%%eax;" /* recursed? */ \
+" jnz 3f;" /* handle recursion */ \
+ /* Lock not recursed and contested: do the hard way */ \
+" pushl %3;" \
+" pushl %4;" \
+" call mtx_exit_hard;" /* mtx_exit_hard(mtxp,type) */ \
+" addl $8,%%esp;" \
+" jmp 1f;" \
+ /* lock recursed, lower recursion level */ \
+"3: decw %1;" /* one less level */ \
+" jnz 1f;" /* still recursed, done */ \
+" lock; andl $~" _V(MTX_RECURSE) ",%0;" /* turn off recurse flag */ \
+"1:" \
+"# exitlock" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+m" (mtxp->mtx_recurse), /* 1 */ \
+ "+a" (_tid) /* 2 */ \
+ : "gi" (type), /* 3 (input) */ \
+ "g" (mtxp), /* 4 */ \
+ "r" (MTX_UNOWNED) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a spin lock (with possible recursion).
+ *
+ * We use cmpxchgl to clear lock (instead of simple store) to flush posting
+ * buffers and make the change visible to other CPU's.
+ */
+#define _exitlock_spin(mtxp, inten1, inten2) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movw %1,%%ax;" \
+" decw %%ax;" \
+" js 1f;" \
+" movw %%ax,%1;" \
+" jmp 2f;" \
+"1: movl %0,%%eax;" \
+" movl $ " _V(MTX_UNOWNED) ",%%ecx;" \
+" " inten1 ";" \
+" " MPLOCKED "" \
+" cmpxchgl %%ecx,%0;" \
+" " inten2 ";" \
+"2:" \
+"# exitlock_spin" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+m" (mtxp->mtx_recurse), /* 1 */ \
+ "=&a" (_res) /* 2 */ \
+ : "g" (mtxp->mtx_savefl) /* 3 (used in 'inten') */ \
+ : "memory", "ecx" /* used */ ); \
+})
+
+#else /* I386_CPU */
+
+/*
+ * For 386 processors only.
+ */
+
+/* Get a sleep lock, deal with recursion inline. */
+#define _getlock_sleep(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) { \
+ if (((mp)->mtx_lock & MTX_FLAGMASK) != (tid)) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0); \
+ else { \
+ atomic_set_int(&(mp)->mtx_lock, MTX_RECURSE); \
+ (mp)->mtx_recurse++; \
+ } \
+ } \
+} while (0)
+
+/* Get a spin lock, handle recursion inline (as the less common case) */
+#define _getlock_spin_block(mp, tid, type) do { \
+ u_int _mtx_fl = read_eflags(); \
+ disable_intr(); \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_fl); \
+ else \
+ (mp)->mtx_savefl = _mtx_fl; \
+} while (0)
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter function if
+ * we can't get it inline.
+ */
+#define _getlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0); \
+} while (0)
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is handled
+ * in the hard function.
+ */
+#define _exitlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+} while (0)
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+#define _exitlock(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) { \
+ if ((mp)->mtx_lock & MTX_RECURSE) { \
+ if (--((mp)->mtx_recurse) == 0) \
+ atomic_clear_int(&(mp)->mtx_lock, \
+ MTX_RECURSE); \
+ } else { \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+ } \
+ } \
+} while (0)
+
+/* Release a spin lock (with possible recursion). */
+#define _exitlock_spin(mp, inten1, inten2) do { \
+ if ((mp)->mtx_recurse == 0) { \
+ atomic_cmpset_int(&(mp)->mtx_lock, (mp)->mtx_lock, \
+ MTX_UNOWNED); \
+ write_eflags((mp)->mtx_savefl); \
+ } else { \
+ (mp)->mtx_recurse--; \
+ } \
+} while (0)
+
+#endif /* I386_CPU */
+
+/*
+ * Externally visible mutex functions.
+ *------------------------------------------------------------------------------
+ */
+
+/*
+ * Return non-zero if a mutex is already owned by the current thread.
+ */
+#define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == CURTHD)
+
+/* Common strings */
+#ifdef MTX_STRS
+#ifdef KTR_EXTEND
+
+/*
+ * KTR_EXTEND saves file name and line for all entries, so we don't need them
+ * here. Theoretically we should also change the entries which refer to them
+ * (from CTR5 to CTR3), but since they're just passed to snprinf as the last
+ * parameters, it doesn't do any harm to leave them.
+ */
+char STR_mtx_enter_fmt[] = "GOT %s [%x] r=%d";
+char STR_mtx_exit_fmt[] = "REL %s [%x] r=%d";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] result=%d";
+#else
+char STR_mtx_enter_fmt[] = "GOT %s [%x] at %s:%d r=%d";
+char STR_mtx_exit_fmt[] = "REL %s [%x] at %s:%d r=%d";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] at %s:%d result=%d";
+#endif
+char STR_mtx_bad_type[] = "((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0";
+char STR_mtx_owned[] = "mtx_owned(_mpp)";
+char STR_mtx_recurse[] = "_mpp->mtx_recurse == 0";
+#else /* MTX_STRS */
+extern char STR_mtx_enter_fmt[];
+extern char STR_mtx_bad_type[];
+extern char STR_mtx_exit_fmt[];
+extern char STR_mtx_owned[];
+extern char STR_mtx_recurse[];
+extern char STR_mtx_try_enter_fmt[];
+#endif /* MTX_STRS */
+
+#ifndef KLD_MODULE
+/*
+ * Get lock 'm', the macro handles the easy (and most common cases) and leaves
+ * the slow stuff to the mtx_enter_hard() function.
+ *
+ * Note: since type is usually a constant much of this code is optimized out.
+ */
+_MTX_INLINE void
+mtx_enter(mtx_t *mtxp, int type)
+{
+ mtx_t *_mpp = mtxp;
+
+ /* bits only valid on mtx_exit() */
+ MPASS2(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
+ STR_mtx_bad_type);
+
+ do {
+ if ((type) & MTX_SPIN) {
+ /*
+ * Easy cases of spin locks:
+ *
+ * 1) We already own the lock and will simply
+ * recurse on it (if RLIKELY)
+ *
+ * 2) The lock is free, we just get it
+ */
+ if ((type) & MTX_RLIKELY) {
+ /*
+ * Check for recursion, if we already
+ * have this lock we just bump the
+ * recursion count.
+ */
+ if (_mpp->mtx_lock == CURTHD) {
+ _mpp->mtx_recurse++;
+ break; /* Done */
+ }
+ }
+
+ if (((type) & MTX_TOPHALF) == 0) {
+ /*
+ * If an interrupt thread uses this
+ * we must block interrupts here.
+ */
+ if ((type) & MTX_FIRST) {
+ ASS_IEN;
+ disable_intr();
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ } else {
+ _getlock_spin_block(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ } else
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ } else {
+ /* Sleep locks */
+ if ((type) & MTX_RLIKELY)
+ _getlock_sleep(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ else
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ } while (0);
+ WITNESS_ENTER(_mpp, type);
+ CTR5(KTR_LOCK, STR_mtx_enter_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__,
+ (_mpp)->mtx_recurse);
+}
+
+/*
+ * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
+ *
+ * XXX DOES NOT HANDLE RECURSION
+ */
+_MTX_INLINE int
+mtx_try_enter(mtx_t *mtxp, int type)
+{
+ mtx_t *const _mpp = mtxp;
+ int _rval;
+
+ _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD);
+#ifdef SMP_DEBUG
+ if (_rval && (_mpp)->mtx_witness != NULL) {
+ ASS((_mpp)->mtx_recurse == 0);
+ witness_try_enter(_mpp, type, __FILE__, __LINE__);
+ }
+#endif
+ CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, _rval);
+
+ return _rval;
+}
+
+#define mtx_legal2block() (read_eflags() & 0x200)
+
+/*
+ * Release lock m.
+ */
+_MTX_INLINE void
+mtx_exit(mtx_t *mtxp, int type)
+{
+ mtx_t *const _mpp = mtxp;
+
+ MPASS2(mtx_owned(_mpp), STR_mtx_owned);
+ WITNESS_EXIT(_mpp, type);
+ CTR5(KTR_LOCK, STR_mtx_exit_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__,
+ (_mpp)->mtx_recurse);
+ if ((type) & MTX_SPIN) {
+ if ((type) & MTX_NORECURSE) {
+ MPASS2(_mpp->mtx_recurse == 0, STR_mtx_recurse);
+ atomic_cmpset_int(&_mpp->mtx_lock, _mpp->mtx_lock,
+ MTX_UNOWNED);
+ if (((type) & MTX_TOPHALF) == 0) {
+ if ((type) & MTX_FIRST) {
+ ASS_IDIS;
+ enable_intr();
+ } else
+ write_eflags(_mpp->mtx_savefl);
+ }
+ } else {
+ if ((type) & MTX_TOPHALF)
+ _exitlock_spin(_mpp,,);
+ else {
+ if ((type) & MTX_FIRST) {
+ ASS_IDIS;
+ _exitlock_spin(_mpp,, "sti");
+ } else {
+ _exitlock_spin(_mpp,
+ "pushl %3", "popfl");
+ }
+ }
+ }
+ } else {
+ /* Handle sleep locks */
+ if ((type) & MTX_RLIKELY)
+ _exitlock(_mpp, CURTHD, (type) & MTX_HARDOPTS);
+ else {
+ _exitlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ }
+}
+
+#endif /* KLD_MODULE */
+#endif /* _KERNEL */
+
+#else /* !LOCORE */
+
+/*
+ * Simple assembly macros to get and release non-recursive spin locks
+ */
+
+#if defined(I386_CPU)
+
+#define MTX_EXIT(lck, reg) \
+ movl $ MTX_UNOWNED,lck+MTX_LOCK;
+
+#else /* I386_CPU */
+
+#define MTX_ENTER(reg, lck) \
+9: movl $ MTX_UNOWNED,%eax; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+ jnz 9b
+
+/* Must use locked bus op (cmpxchg) when setting to unowned (barrier) */
+#define MTX_EXIT(lck,reg) \
+ movl lck+MTX_LOCK,%eax; \
+ movl $ MTX_UNOWNED,reg; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+
+#define MTX_ENTER_WITH_RECURSION(reg, lck) \
+ movl lck+MTX_LOCK,%eax; \
+ cmpl PCPU_CURPROC,%eax; \
+ jne 9f; \
+ incw lck+MTX_RECURSECNT; \
+ jmp 8f; \
+9: movl $ MTX_UNOWNED,%eax; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+ jnz 9b; \
+8:
+
+#define MTX_EXIT_WITH_RECURSION(lck,reg) \
+ movw lck+MTX_RECURSECNT,%ax; \
+ decw %ax; \
+ js 9f; \
+ movw %ax,lck+MTX_RECURSECNT; \
+ jmp 8f; \
+9: movl lck+MTX_LOCK,%eax; \
+ movl $ MTX_UNOWNED,reg; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+8:
+
+#endif /* I386_CPU */
+#endif /* !LOCORE */
+#endif /* __MACHINE_MUTEX_H */
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 08beb5a..1c7af85 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -72,11 +72,7 @@ struct pcb {
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */
caddr_t pcb_onfault; /* copyin/out fault recovery */
-#ifdef SMP
- u_long pcb_mpnest;
-#else
- u_long pcb_mpnest_dontuse;
-#endif
+ int pcb_schednest;
int pcb_gs;
struct pcb_ext *pcb_ext; /* optional pcb extension */
u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index 58bd9cf..440da60 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -26,6 +26,20 @@
* $FreeBSD$
*/
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/pmap.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
+
/*
* This structure maps out the global data that needs to be kept on a
* per-cpu basis. genassym uses this to generate offsets for the assembler
@@ -41,11 +55,14 @@
struct globaldata {
struct privatespace *gd_prvspace; /* self-reference */
struct proc *gd_curproc;
+ struct proc *gd_prevproc;
struct proc *gd_npxproc;
struct pcb *gd_curpcb;
+ struct proc *gd_idleproc;
struct timeval gd_switchtime;
struct i386tss gd_common_tss;
int gd_switchticks;
+ int gd_intr_nesting_level;
struct segment_descriptor gd_common_tssd;
struct segment_descriptor *gd_tss_gdt;
#ifdef USER_LDT
@@ -67,8 +84,22 @@ struct globaldata {
unsigned *gd_prv_PADDR1;
#endif
u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+ int gd_witness_spin_check;
+#ifdef KTR_PERCPU
+#ifdef KTR
+ volatile int gd_ktr_idx;
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[KTR_SIZE];
+#endif
+#endif
};
+extern struct globaldata globaldata;
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
#ifdef SMP
/*
* This is the upper (0xff800000) address space layout that is per-cpu.
@@ -93,3 +124,5 @@ struct privatespace {
extern struct privatespace SMP_prvspace[];
#endif
+
+#endif /* ! _MACHINE_GLOBALDATA_H_ */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index 69b716b..20d4fa3 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -15,6 +15,9 @@
#ifdef _KERNEL
+#ifdef I386_CPU
+#error SMP not supported with I386_CPU
+#endif
#if defined(SMP) && !defined(APIC_IO)
# error APIC_IO required for SMP, add "options APIC_IO" to your config file.
#endif /* SMP && !APIC_IO */
@@ -57,23 +60,6 @@ extern int bootMP_size;
/* functions in mpboot.s */
void bootMP __P((void));
-/* global data in mplock.s */
-extern u_int mp_lock;
-extern u_int isr_lock;
-#ifdef RECURSIVE_MPINTRLOCK
-extern u_int mpintr_lock;
-#endif /* RECURSIVE_MPINTRLOCK */
-
-/* functions in mplock.s */
-void get_mplock __P((void));
-void rel_mplock __P((void));
-int try_mplock __P((void));
-#ifdef RECURSIVE_MPINTRLOCK
-void get_mpintrlock __P((void));
-void rel_mpintrlock __P((void));
-int try_mpintrlock __P((void));
-#endif /* RECURSIVE_MPINTRLOCK */
-
/* global data in apic_vector.s */
extern volatile u_int stopped_cpus;
extern volatile u_int started_cpus;
@@ -185,23 +171,7 @@ extern int smp_started;
extern volatile int smp_idle_loops;
#endif /* !LOCORE */
-#else /* !SMP && !APIC_IO */
-
-/*
- * Create dummy MP lock empties
- */
-
-static __inline void
-get_mplock(void)
-{
-}
-
-static __inline void
-rel_mplock(void)
-{
-}
-
-#endif
+#endif /* SMP && !APIC_IO */
#endif /* _KERNEL */
#endif /* _MACHINE_SMP_H_ */
diff --git a/sys/amd64/isa/atpic_vector.S b/sys/amd64/isa/atpic_vector.S
index e427351..d2b88bf 100644
--- a/sys/amd64/isa/atpic_vector.S
+++ b/sys/amd64/isa/atpic_vector.S
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c
index 15044ab..724f3c2 100644
--- a/sys/amd64/isa/clock.c
+++ b/sys/amd64/isa/clock.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/amd64/isa/icu_ipl.S b/sys/amd64/isa/icu_ipl.S
index 3475358..d178d5c 100644
--- a/sys/amd64/isa/icu_ipl.S
+++ b/sys/amd64/isa/icu_ipl.S
@@ -55,63 +55,6 @@ _imen: .long HWI_MASK
SUPERALIGN_TEXT
/*
- * Interrupt priority mechanism
- * -- soft splXX masks with group mechanism (cpl)
- * -- h/w masks for currently active or unused interrupts (imen)
- * -- ipending = active interrupts currently masked by cpl
- */
-
-ENTRY(splz)
- /*
- * The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. We have to repeat the check since if there is an
- * interrupt while we're looking, _doreti processing for the
- * interrupt will handle all the unmasked pending interrupts
- * because we restored early. We're repeating the calculation
- * of (ipending & ~cpl) anyway so that the caller doesn't have
- * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx"
- * is undefined when %ecx is 0 so we can't rely on the secondary
- * btrl tests.
- */
- movl _cpl,%eax
-splz_next:
- /*
- * We don't need any locking here. (ipending & ~cpl) cannot grow
- * while we're looking at it - any interrupt will shrink it to 0.
- */
- movl %eax,%ecx
- notl %ecx
- andl _ipending,%ecx
- jne splz_unpend
- ret
-
- ALIGN_TEXT
-splz_unpend:
- bsfl %ecx,%ecx
- btrl %ecx,_ipending
- jnc splz_next
- cmpl $NHWI,%ecx
- jae splz_swi
- /*
- * We would prefer to call the intr handler directly here but that
- * doesn't work for badly behaved handlers that want the interrupt
- * frame. Also, there's a problem determining the unit number.
- * We should change the interface so that the unit number is not
- * determined at config time.
- */
- jmp *vec(,%ecx,4)
-
- ALIGN_TEXT
-splz_swi:
- pushl %eax
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
- call *_ihandlers(,%ecx,4)
- popl %eax
- movl %eax,_cpl
- jmp splz_next
-
-/*
* Fake clock interrupt(s) so that they appear to come from our caller instead
* of from here, so that system profiling works.
* XXX do this more generally (for all vectors; look up the C entry point).
diff --git a/sys/amd64/isa/icu_ipl.s b/sys/amd64/isa/icu_ipl.s
index 3475358..d178d5c 100644
--- a/sys/amd64/isa/icu_ipl.s
+++ b/sys/amd64/isa/icu_ipl.s
@@ -55,63 +55,6 @@ _imen: .long HWI_MASK
SUPERALIGN_TEXT
/*
- * Interrupt priority mechanism
- * -- soft splXX masks with group mechanism (cpl)
- * -- h/w masks for currently active or unused interrupts (imen)
- * -- ipending = active interrupts currently masked by cpl
- */
-
-ENTRY(splz)
- /*
- * The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. We have to repeat the check since if there is an
- * interrupt while we're looking, _doreti processing for the
- * interrupt will handle all the unmasked pending interrupts
- * because we restored early. We're repeating the calculation
- * of (ipending & ~cpl) anyway so that the caller doesn't have
- * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx"
- * is undefined when %ecx is 0 so we can't rely on the secondary
- * btrl tests.
- */
- movl _cpl,%eax
-splz_next:
- /*
- * We don't need any locking here. (ipending & ~cpl) cannot grow
- * while we're looking at it - any interrupt will shrink it to 0.
- */
- movl %eax,%ecx
- notl %ecx
- andl _ipending,%ecx
- jne splz_unpend
- ret
-
- ALIGN_TEXT
-splz_unpend:
- bsfl %ecx,%ecx
- btrl %ecx,_ipending
- jnc splz_next
- cmpl $NHWI,%ecx
- jae splz_swi
- /*
- * We would prefer to call the intr handler directly here but that
- * doesn't work for badly behaved handlers that want the interrupt
- * frame. Also, there's a problem determining the unit number.
- * We should change the interface so that the unit number is not
- * determined at config time.
- */
- jmp *vec(,%ecx,4)
-
- ALIGN_TEXT
-splz_swi:
- pushl %eax
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
- call *_ihandlers(,%ecx,4)
- popl %eax
- movl %eax,_cpl
- jmp splz_next
-
-/*
* Fake clock interrupt(s) so that they appear to come from our caller instead
* of from here, so that system profiling works.
* XXX do this more generally (for all vectors; look up the C entry point).
diff --git a/sys/amd64/isa/icu_vector.S b/sys/amd64/isa/icu_vector.S
index e427351..d2b88bf 100644
--- a/sys/amd64/isa/icu_vector.S
+++ b/sys/amd64/isa/icu_vector.S
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/amd64/isa/icu_vector.s b/sys/amd64/isa/icu_vector.s
index e427351..d2b88bf 100644
--- a/sys/amd64/isa/icu_vector.s
+++ b/sys/amd64/isa/icu_vector.s
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/amd64/isa/intr_machdep.c b/sys/amd64/isa/intr_machdep.c
index 34a8c22..870760e 100644
--- a/sys/amd64/isa/intr_machdep.c
+++ b/sys/amd64/isa/intr_machdep.c
@@ -36,12 +36,6 @@
* from: @(#)isa.c 7.2 (Berkeley) 5/13/91
* $FreeBSD$
*/
-/*
- * This file contains an aggregated module marked:
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- * See the notice for details.
- */
#include "opt_auto_eoi.h"
@@ -51,11 +45,14 @@
#ifndef SMP
#include <machine/lock.h>
#endif
+#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/unistd.h>
#include <sys/errno.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
@@ -91,30 +88,14 @@
#include <i386/isa/mca_machdep.h>
#endif
-/* XXX should be in suitable include files */
-#ifdef PC98
-#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
-#define ICU_SLAVEID 7
-#else
-#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
-#define ICU_SLAVEID 2
-#endif
-
-#ifdef APIC_IO
/*
- * This is to accommodate "mixed-mode" programming for
- * motherboards that don't connect the 8254 to the IO APIC.
+ * Per-interrupt data. We consider the soft interrupt to be a special
+ * case, so these arrays have NHWI + NSWI entries, not ICU_LEN.
*/
-#define AUTO_EOI_1 1
-#endif
-
-#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
-
-u_long *intr_countp[ICU_LEN];
-inthand2_t *intr_handler[ICU_LEN];
-u_int intr_mask[ICU_LEN];
-static u_int* intr_mptr[ICU_LEN];
-void *intr_unit[ICU_LEN];
+u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */
+inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */
+ithd *ithds[NHWI + NSWI]; /* real interrupt handler */
+void *intr_unit[NHWI + NSWI];
static inthand_t *fastintr[ICU_LEN] = {
&IDTVEC(fastintr0), &IDTVEC(fastintr1),
@@ -292,8 +273,9 @@ isa_nmi(cd)
}
/*
- * Fill in default interrupt table (in case of spuruious interrupt
- * during configuration of kernel, setup interrupt control unit
+ * Create a default interrupt table to avoid problems caused by
+ * spurious interrupts during configuration of kernel, then setup
+ * interrupt control unit.
*/
void
isa_defaultirq()
@@ -364,16 +346,6 @@ isa_strayintr(vcookiep)
{
int intr = (void **)vcookiep - &intr_unit[0];
- /* DON'T BOTHER FOR NOW! */
- /* for some reason, we get bursts of intr #7, even if not enabled! */
- /*
- * Well the reason you got bursts of intr #7 is because someone
- * raised an interrupt line and dropped it before the 8259 could
- * prioritize it. This is documented in the intel data book. This
- * means you have BAD hardware! I have changed this so that only
- * the first 5 get logged, then it quits logging them, and puts
- * out a special message. rgrimes 3/25/1993
- */
/*
* XXX TODO print a different message for #7 if it is for a
* glitch. Glitches can be distinguished from real #7's by
@@ -405,36 +377,10 @@ isa_irq_pending()
}
#endif
-int
-update_intr_masks(void)
-{
- int intr, n=0;
- u_int mask,*maskptr;
-
- for (intr=0; intr < ICU_LEN; intr ++) {
-#if defined(APIC_IO)
- /* no 8259 SLAVE to ignore */
-#else
- if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */
-#endif /* APIC_IO */
- maskptr = intr_mptr[intr];
- if (!maskptr)
- continue;
- *maskptr |= SWI_LOW_MASK | (1 << intr);
- mask = *maskptr;
- if (mask != intr_mask[intr]) {
-#if 0
- printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n",
- intr, intr_mask[intr], mask, maskptr);
-#endif
- intr_mask[intr]=mask;
- n++;
- }
-
- }
- return (n);
-}
-
+/*
+ * Update intrnames array with the specified name. This is used by
+ * vmstat(8) and the like.
+ */
static void
update_intrname(int intr, char *name)
{
@@ -485,7 +431,7 @@ found:
}
int
-icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
+icu_setup(int intr, inthand2_t *handler, void *arg, int flags)
{
#ifdef FAST_HI
int select; /* the select register is 8 bits */
@@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
u_int32_t value; /* the window register is 32 bits */
#endif /* FAST_HI */
u_long ef;
- u_int mask = (maskptr ? *maskptr : 0);
#if defined(APIC_IO)
if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */
@@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
ef = read_eflags();
disable_intr();
intr_handler[intr] = handler;
- intr_mptr[intr] = maskptr;
- intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr);
intr_unit[intr] = arg;
#ifdef FAST_HI
if (flags & INTR_FAST) {
@@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
INTREN(1 << intr);
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
+/*
+ * Dissociate an interrupt handler from an IRQ and set the handler to
+ * the stray interrupt handler. The 'handler' parameter is used only
+ * for consistency checking.
+ */
int
icu_unset(intr, handler)
int intr;
@@ -567,8 +514,6 @@ icu_unset(intr, handler)
disable_intr();
intr_countp[intr] = &intrcnt[1 + intr];
intr_handler[intr] = isa_strayintr;
- intr_mptr[intr] = NULL;
- intr_mask[intr] = HWI_MASK | SWI_MASK;
intr_unit[intr] = &intr_unit[intr];
#ifdef FAST_HI_XXX
/* XXX how do I re-create dvp here? */
@@ -581,353 +526,172 @@ icu_unset(intr, handler)
setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
-/* The following notice applies beyond this point in the file */
-
-/*
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-typedef struct intrec {
- intrmask_t mask;
- inthand2_t *handler;
- void *argument;
- struct intrec *next;
- char *name;
- int intr;
- intrmask_t *maskptr;
- int flags;
-} intrec;
-
-static intrec *intreclist_head[ICU_LEN];
-
-/*
- * The interrupt multiplexer calls each of the handlers in turn. The
- * ipl is initially quite low. It is raised as necessary for each call
- * and lowered after the call. Thus out of order handling is possible
- * even for interrupts of the same type. This is probably no more
- * harmful than out of order handling in general (not harmful except
- * for real time response which we don't support anyway).
- */
-static void
-intr_mux(void *arg)
-{
- intrec *p;
- intrmask_t oldspl;
-
- for (p = arg; p != NULL; p = p->next) {
- oldspl = splq(p->mask);
- p->handler(p->argument);
- splx(oldspl);
- }
-}
-
-static intrec*
-find_idesc(unsigned *maskptr, int irq)
-{
- intrec *p = intreclist_head[irq];
-
- while (p && p->maskptr != maskptr)
- p = p->next;
-
- return (p);
-}
-
-static intrec**
-find_pred(intrec *idesc, int irq)
+intrec *
+inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
+ int pri, int flags)
{
- intrec **pp = &intreclist_head[irq];
- intrec *p = *pp;
-
- while (p != idesc) {
- if (p == NULL)
- return (NULL);
- pp = &p->next;
- p = *pp;
- }
- return (pp);
-}
-
-/*
- * Both the low level handler and the shared interrupt multiplexer
- * block out further interrupts as set in the handlers "mask", while
- * the handler is running. In fact *maskptr should be used for this
- * purpose, but since this requires one more pointer dereference on
- * each interrupt, we rather bother update "mask" whenever *maskptr
- * changes. The function "update_masks" should be called **after**
- * all manipulation of the linked list of interrupt handlers hung
- * off of intrdec_head[irq] is complete, since the chain of handlers
- * will both determine the *maskptr values and the instances of mask
- * that are fixed. This function should be called with the irq for
- * which a new handler has been add blocked, since the masks may not
- * yet know about the use of this irq for a device of a certain class.
- */
+ ithd *ithd = ithds[irq]; /* descriptor for the IRQ */
+ intrec *head; /* chain of handlers for IRQ */
+ intrec *idesc; /* descriptor for this handler */
+ struct proc *p; /* interrupt thread */
+ int errcode = 0;
-static void
-update_mux_masks(void)
-{
- int irq;
- for (irq = 0; irq < ICU_LEN; irq++) {
- intrec *idesc = intreclist_head[irq];
- while (idesc != NULL) {
- if (idesc->maskptr != NULL) {
- /* our copy of *maskptr may be stale, refresh */
- idesc->mask = *idesc->maskptr;
- }
- idesc = idesc->next;
+ if (name == NULL) /* no name? */
+ panic ("anonymous interrupt");
+ if (ithd == NULL || ithd->it_ih == NULL) {
+ /* first handler for this irq. */
+ if (ithd == NULL) {
+ ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (ithd == NULL)
+ return (NULL);
+ bzero(ithd, sizeof(struct ithd));
+ ithd->irq = irq;
+ ithds[irq] = ithd;
}
- }
-}
-
-static void
-update_masks(intrmask_t *maskptr, int irq)
-{
- intrmask_t mask = 1 << irq;
-
- if (maskptr == NULL)
- return;
-
- if (find_idesc(maskptr, irq) == NULL) {
- /* no reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) == 0)
- return;
- /* the irq was included in the classes mask, remove it */
- *maskptr &= ~mask;
- } else {
- /* a reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) != 0)
- return;
- /* put the irq into the classes mask */
- *maskptr |= mask;
- }
- /* we need to update all values in the intr_mask[irq] array */
- update_intr_masks();
- /* update mask in chains of the interrupt multiplex handler as well */
- update_mux_masks();
-}
-
-/*
- * Add interrupt handler to linked list hung off of intreclist_head[irq]
- * and install shared interrupt multiplex handler, if necessary
- */
-
-static int
-add_intrdesc(intrec *idesc)
-{
- int irq = idesc->intr;
-
- intrec *head = intreclist_head[irq];
-
- if (head == NULL) {
- /* first handler for this irq, just install it */
- if (icu_setup(irq, idesc->handler, idesc->argument,
- idesc->maskptr, idesc->flags) != 0)
- return (-1);
-
- update_intrname(irq, idesc->name);
- /* keep reference */
- intreclist_head[irq] = idesc;
- } else {
- if ((idesc->flags & INTR_EXCL) != 0
- || (head->flags & INTR_EXCL) != 0) {
+ /*
+ * If we have a fast interrupt, we need to set the
+ * handler address directly. Do that below. For a
+ * slow interrupt, we don't need to know more details,
+ * so do it here because it's tidier.
+ */
+ if ((flags & INTR_FAST) == 0) {
/*
- * can't append new handler, if either list head or
- * new handler do not allow interrupts to be shared
+ * Only create a kernel thread if we don't already
+ * have one.
*/
- if (bootverbose)
- printf("\tdevice combination doesn't support "
- "shared irq%d\n", irq);
- return (-1);
- }
- if (head->next == NULL) {
+ if (ithd->it_proc == NULL) {
+ errcode = kthread_create(ithd_loop, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "irq%d: %s", irq,
+ name);
+ if (errcode)
+ panic("inthand_add: Can't create "
+ "interrupt thread");
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ ithd->it_proc = p;
+ p->p_ithd = ithd;
+ } else
+ snprintf(ithd->it_proc->p_comm, MAXCOMLEN,
+ "irq%d: %s", irq, name);
+ p->p_rtprio.prio = pri;
+
/*
- * second handler for this irq, replace device driver's
- * handler by shared interrupt multiplexer function
+ * The interrupt process must be in place, but
+ * not necessarily schedulable, before we
+ * initialize the ICU, since it may cause an
+ * immediate interrupt.
*/
- icu_unset(irq, head->handler);
- if (icu_setup(irq, intr_mux, head, 0, 0) != 0)
- return (-1);
- if (bootverbose)
- printf("\tusing shared irq%d.\n", irq);
- update_intrname(irq, "mux");
+ if (icu_setup(irq, &sched_ithd, arg, flags) != 0)
+ panic("inthand_add: Can't initialize ICU");
}
- /* just append to the end of the chain */
- while (head->next != NULL)
- head = head->next;
- head->next = idesc;
- }
- update_masks(idesc->maskptr, irq);
- return (0);
-}
-
-/*
- * Create and activate an interrupt handler descriptor data structure.
- *
- * The dev_instance pointer is required for resource management, and will
- * only be passed through to resource_claim().
- *
- * There will be functions that derive a driver and unit name from a
- * dev_instance variable, and those functions will be used to maintain the
- * interrupt counter label array referenced by systat and vmstat to report
- * device interrupt rates (->update_intrlabels).
- *
- * Add the interrupt handler descriptor data structure created by an
- * earlier call of create_intr() to the linked list for its irq and
- * adjust the interrupt masks if necessary.
- *
- * WARNING: This is an internal function and not to be used by device
- * drivers. It is subject to change without notice.
- */
-
-intrec *
-inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
- intrmask_t *maskptr, int flags)
-{
- intrec *idesc;
- int errcode = -1;
- intrmask_t oldspl;
-
- if (ICU_LEN > 8 * sizeof *maskptr) {
- printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n",
- ICU_LEN, 8 * sizeof *maskptr);
+ } else if ((flags & INTR_EXCL) != 0
+ || (ithd->it_ih->flags & INTR_EXCL) != 0) {
+ /*
+ * We can't append the new handler if either
+ * list ithd or new handler do not allow
+ * interrupts to be shared.
+ */
+ if (bootverbose)
+ printf("\tdevice combination %s and %s "
+ "doesn't support shared irq%d\n",
+ ithd->it_ih->name, name, irq);
+ return(NULL);
+ } else if (flags & INTR_FAST) {
+ /* We can only have one fast interrupt by itself. */
+ if (bootverbose)
+ printf("\tCan't add fast interrupt %s"
+ " to normal interrupt %s on irq%d",
+ name, ithd->it_ih->name, irq);
return (NULL);
+ } else { /* update p_comm */
+ p = ithd->it_proc;
+ if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) {
+ strcat(p->p_comm, " ");
+ strcat(p->p_comm, name);
+ } else if (strlen(p->p_comm) == MAXCOMLEN)
+ p->p_comm[MAXCOMLEN - 1] = '+';
+ else
+ strcat(p->p_comm, "+");
}
- if ((unsigned)irq >= ICU_LEN) {
- printf("create_intr: requested irq%d too high, limit is %d\n",
- irq, ICU_LEN -1);
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
return (NULL);
- }
+ bzero(idesc, sizeof (struct intrec));
- idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK);
- if (idesc == NULL)
- return NULL;
- bzero(idesc, sizeof *idesc);
+ idesc->handler = handler;
+ idesc->argument = arg;
+ idesc->flags = flags;
+ idesc->ithd = ithd;
- if (name == NULL)
- name = "???";
idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
if (idesc->name == NULL) {
free(idesc, M_DEVBUF);
- return NULL;
+ return (NULL);
}
strcpy(idesc->name, name);
- idesc->handler = handler;
- idesc->argument = arg;
- idesc->maskptr = maskptr;
- idesc->intr = irq;
- idesc->flags = flags;
-
- /* block this irq */
- oldspl = splq(1 << irq);
-
- /* add irq to class selected by maskptr */
- errcode = add_intrdesc(idesc);
- splx(oldspl);
-
- if (errcode != 0) {
+ /* Slow interrupts got set up above. */
+ if ((flags & INTR_FAST)
+ && (icu_setup(irq, idesc->handler, idesc->argument,
+ idesc->flags) != 0) ) {
if (bootverbose)
- printf("\tintr_connect(irq%d) failed, result=%d\n",
+ printf("\tinthand_add(irq%d) failed, result=%d\n",
irq, errcode);
free(idesc->name, M_DEVBUF);
free(idesc, M_DEVBUF);
- idesc = NULL;
+ return NULL;
}
-
+ head = ithd->it_ih; /* look at chain of handlers */
+ if (head) {
+ while (head->next != NULL)
+ head = head->next; /* find the end */
+ head->next = idesc; /* hook it in there */
+ } else
+ ithd->it_ih = idesc; /* put it up front */
+ update_intrname(irq, idesc->name);
return (idesc);
}
/*
- * Deactivate and remove the interrupt handler descriptor data connected
- * created by an earlier call of intr_connect() from the linked list and
- * adjust theinterrupt masks if necessary.
+ * Deactivate and remove linked list the interrupt handler descriptor
+ * data connected created by an earlier call of inthand_add(), then
+ * adjust the interrupt masks if necessary.
*
- * Return the memory held by the interrupt handler descriptor data structure
- * to the system. Make sure, the handler is not actively used anymore, before.
+ * Return the memory held by the interrupt handler descriptor data
+ * structure to the system. First ensure the handler is not actively
+ * in use.
*/
int
inthand_remove(intrec *idesc)
{
- intrec **hook, *head;
- int irq;
- int errcode = 0;
- intrmask_t oldspl;
+ ithd *ithd; /* descriptor for the IRQ */
+ intrec *ih; /* chain of handlers */
if (idesc == NULL)
return (-1);
+ ithd = idesc->ithd;
+ ih = ithd->it_ih;
- irq = idesc->intr;
-
- /* find pointer that keeps the reference to this interrupt descriptor */
- hook = find_pred(idesc, irq);
- if (hook == NULL)
+ if (ih == idesc) /* first in the chain */
+ ithd->it_ih = idesc->next; /* unhook it */
+ else {
+ while ((ih != NULL)
+ && (ih->next != idesc) )
+ ih = ih->next;
+ if (ih->next != idesc)
return (-1);
-
- /* make copy of original list head, the line after may overwrite it */
- head = intreclist_head[irq];
-
- /* unlink: make predecessor point to idesc->next instead of to idesc */
- *hook = idesc->next;
-
- /* now check whether the element we removed was the list head */
- if (idesc == head) {
-
- oldspl = splq(1 << irq);
-
- /* check whether the new list head is the only element on list */
- head = intreclist_head[irq];
- if (head != NULL) {
- icu_unset(irq, intr_mux);
- if (head->next != NULL) {
- /* install the multiplex handler with new list head as argument */
- errcode = icu_setup(irq, intr_mux, head, 0, 0);
- if (errcode == 0)
- update_intrname(irq, NULL);
- } else {
- /* install the one remaining handler for this irq */
- errcode = icu_setup(irq, head->handler,
- head->argument,
- head->maskptr, head->flags);
- if (errcode == 0)
- update_intrname(irq, head->name);
+ ih->next = ih->next->next;
}
- } else {
- /* revert to old handler, eg: strayintr */
- icu_unset(irq, idesc->handler);
- }
- splx(oldspl);
- }
- update_masks(idesc->maskptr, irq);
+
+ if (ithd->it_ih == NULL) /* no handlers left, */
+ icu_unset(ithd->irq, idesc->handler);
free(idesc, M_DEVBUF);
return (0);
}
diff --git a/sys/amd64/isa/intr_machdep.h b/sys/amd64/isa/intr_machdep.h
index 5982295..87c97a3 100644
--- a/sys/amd64/isa/intr_machdep.h
+++ b/sys/amd64/isa/intr_machdep.h
@@ -98,7 +98,6 @@
#define TPR_BLOCK_XCPUSTOP 0xaf /* */
#define TPR_BLOCK_ALL 0xff /* all INTs */
-
#ifdef TEST_TEST1
/* put a 'fake' HWI in top of APIC prio 0x3x, 32 + 31 = 63 = 0x3f */
#define XTEST1_OFFSET (ICU_OFFSET + 31)
@@ -145,8 +144,9 @@ extern u_long intrcnt[]; /* counts for for each device and stray */
extern char intrnames[]; /* string table containing device names */
extern u_long *intr_countp[]; /* pointers into intrcnt[] */
extern inthand2_t *intr_handler[]; /* C entry points of intr handlers */
-extern u_int intr_mask[]; /* sets of intrs masked during handling of 1 */
+extern ithd *ithds[];
extern void *intr_unit[]; /* cookies to pass to intr handlers */
+extern ithd softinterrupt; /* soft interrupt thread */
inthand_t
IDTVEC(fastintr0), IDTVEC(fastintr1),
@@ -190,26 +190,60 @@ inthand_t
#endif /** TEST_TEST1 */
#endif /* SMP || APIC_IO */
+#ifdef PC98
+#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
+#define ICU_SLAVEID 7
+#else
+#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
+#define ICU_SLAVEID 2
+#endif
+
+#ifdef APIC_IO
+/*
+ * This is to accommodate "mixed-mode" programming for
+ * motherboards that don't connect the 8254 to the IO APIC.
+ */
+#define AUTO_EOI_1 1
+#endif
+
+#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
+
void isa_defaultirq __P((void));
int isa_nmi __P((int cd));
int icu_setup __P((int intr, inthand2_t *func, void *arg,
- u_int *maskptr, int flags));
+ int flags));
int icu_unset __P((int intr, inthand2_t *handler));
-int update_intr_masks __P((void));
intrmask_t splq __P((intrmask_t mask));
-#define INTR_FAST 0x00000001 /* fast interrupt handler */
-#define INTR_EXCL 0x00010000 /* excl. intr, default is shared */
+/*
+ * Describe a hardware interrupt handler. These structures are
+ * accessed via the array intreclist, which contains one pointer per
+ * hardware interrupt.
+ *
+ * Multiple interrupt handlers for a specific IRQ can be chained
+ * together via the 'next' pointer.
+ */
+typedef struct intrec {
+ inthand2_t *handler; /* code address of handler */
+ void *argument; /* argument to pass to handler */
+ enum intr_type flags; /* flag bits (sys/bus.h) */
+ char *name; /* name of handler */
+ ithd *ithd; /* handler we're connected to */
+ struct intrec *next; /* next handler for this irq */
+} intrec;
/*
* WARNING: These are internal functions and not to be used by device drivers!
* They are subject to change without notice.
*/
struct intrec *inthand_add(const char *name, int irq, inthand2_t handler,
- void *arg, intrmask_t *maskptr, int flags);
-
+ void *arg, int pri, int flags);
int inthand_remove(struct intrec *idesc);
+void sched_ithd(void *);
+void ithd_loop(void *);
+void start_softintr(void *);
+void intr_soft(void *);
#endif /* LOCORE */
diff --git a/sys/amd64/isa/ithread.c b/sys/amd64/isa/ithread.c
new file mode 100644
index 0000000..4ceac42
--- /dev/null
+++ b/sys/amd64/isa/ithread.c
@@ -0,0 +1,353 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From BSDI: intr.c,v 1.6.2.5 1999/07/06 19:16:52 cp Exp
+ * $FreeBSD$
+ */
+
+/* Interrupt thread code. */
+
+#include "opt_auto_eoi.h"
+
+#include "isa.h"
+
+#include <sys/param.h>
+#include <sys/rtprio.h> /* change this name XXX */
+#ifndef SMP
+#include <machine/lock.h>
+#endif
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/unistd.h>
+#include <sys/errno.h>
+#include <sys/interrupt.h>
+#include <machine/ipl.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
+#include <sys/bus.h>
+
+#if defined(APIC_IO)
+#include <machine/smp.h>
+#include <machine/smptests.h> /** FAST_HI */
+#include <machine/resource.h>
+#endif /* APIC_IO */
+#ifdef PC98
+#include <pc98/pc98/pc98.h>
+#include <pc98/pc98/pc98_machdep.h>
+#include <pc98/pc98/epsonio.h>
+#else
+#include <i386/isa/isa.h>
+#endif
+#include <i386/isa/icu.h>
+
+#if NISA > 0
+#include <isa/isavar.h>
+#endif
+#include <i386/isa/intr_machdep.h>
+#include <sys/interrupt.h>
+#ifdef APIC_IO
+#include <machine/clock.h>
+#endif
+
+#include "mca.h"
+#if NMCA > 0
+#include <i386/isa/mca_machdep.h>
+#endif
+
+#include <sys/vmmeter.h>
+#include <machine/mutex.h>
+#include <sys/ktr.h>
+#include <machine/cpu.h>
+#if 0
+#include <ddb/ddb.h>
+#endif
+
+u_long softintrcnt [NSWI];
+
+SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, NULL)
+
+/*
+ * Schedule a heavyweight interrupt process. This function is called
+ * from the interrupt handlers Xintr<num>.
+ */
+void
+sched_ithd(void *cookie)
+{
+ int irq = (int) cookie; /* IRQ we're handling */
+ ithd *ir = ithds[irq]; /* and the process that does it */
+
+ /* This used to be in icu_vector.s */
+ /*
+ * We count software interrupts when we process them. The
+ * code here follows previous practice, but there's an
+ * argument for counting hardware interrupts when they're
+ * processed too.
+ */
+ if (irq < NHWI) /* real interrupt, */
+ atomic_add_long(intr_countp[irq], 1); /* one more for this IRQ */
+ atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */
+
+ CTR3(KTR_INTR, "sched_ithd pid %d(%s) need=%d",
+ ir->it_proc->p_pid, ir->it_proc->p_comm, ir->it_need);
+
+#if 0
+ /*
+ * If we are in the debugger, we can't use interrupt threads to
+ * process interrupts since the threads are scheduled. Instead,
+ * call the interrupt handlers directly. This should be able to
+ * go away once we have light-weight interrupt handlers.
+ */
+ if (db_active) {
+ intrec *ih; /* and our interrupt handler chain */
+#if 0
+ membar_unlock(); /* push out "it_need=0" */
+#endif
+ for (ih = ir->it_ih; ih != NULL; ih = ih->next) {
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ ih->handler(ih->argument);
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_exit(&Giant, MTX_DEF);
+ }
+
+ INTREN (1 << ir->irq); /* reset the mask bit */
+ return;
+ }
+#endif
+
+ /*
+ * Set it_need so that if the thread is already running but close
+ * to done, it will do another go-round. Then get the sched lock
+ * and see if the thread is on whichkqs yet. If not, put it on
+ * there. In any case, kick everyone so that if the new thread
+ * is higher priority than their current thread, it gets run now.
+ */
+ ir->it_need = 1;
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (ir->it_proc->p_stat == SWAIT) { /* not on run queue */
+ CTR1(KTR_INTR, "sched_ithd: setrunqueue %d",
+ ir->it_proc->p_pid);
+/* membar_lock(); */
+ ir->it_proc->p_stat = SRUN;
+ setrunqueue(ir->it_proc);
+ aston();
+ }
+ else {
+if (irq < NHWI && (irq & 7) != 0)
+ CTR3(KTR_INTR, "sched_ithd %d: it_need %d, state %d",
+ ir->it_proc->p_pid,
+ ir->it_need,
+ ir->it_proc->p_stat );
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+#if 0
+ aston(); /* ??? check priorities first? */
+#else
+ need_resched();
+#endif
+}
+
+/*
+ * This is the main code for all interrupt threads. It gets put on
+ * whichkqs by setrunqueue above.
+ */
+void
+ithd_loop(void *dummy)
+{
+ ithd *me; /* our thread context */
+ intrec *ih; /* and our interrupt handler chain */
+
+ me = curproc->p_ithd; /* point to myself */
+
+ /*
+ * As long as we have interrupts outstanding, go through the
+ * list of handlers, giving each one a go at it.
+ */
+ for (;;) {
+ CTR3(KTR_INTR, "ithd_loop pid %d(%s) need=%d",
+ me->it_proc->p_pid, me->it_proc->p_comm, me->it_need);
+ while (me->it_need) {
+ /*
+ * Service interrupts. If another interrupt
+ * arrives while we are running, they will set
+ * it_need to denote that we should make
+ * another pass.
+ */
+ me->it_need = 0;
+#if 0
+ membar_unlock(); /* push out "it_need=0" */
+#endif
+ for (ih = me->it_ih; ih != NULL; ih = ih->next) {
+ CTR5(KTR_INTR,
+ "ithd_loop pid %d ih=%p: %p(%p) flg=%x",
+ me->it_proc->p_pid, (void *)ih,
+ (void *)ih->handler, ih->argument,
+ ih->flags);
+
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ ih->handler(ih->argument);
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_exit(&Giant, MTX_DEF);
+ }
+ }
+
+ /*
+ * Processed all our interrupts. Now get the sched
+ * lock. This may take a while and it_need may get
+ * set again, so we have to check it again.
+ */
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (!me->it_need) {
+
+ INTREN (1 << me->irq); /* reset the mask bit */
+ me->it_proc->p_stat = SWAIT; /* we're idle */
+#ifdef APIC_IO
+ CTR1(KTR_INTR, "ithd_loop pid %d: done",
+ me->it_proc->p_pid);
+#else
+ CTR2(KTR_INTR, "ithd_loop pid %d: done, imen=%x",
+ me->it_proc->p_pid, imen);
+#endif
+ mi_switch();
+ CTR1(KTR_INTR, "ithd_loop pid %d: resumed",
+ me->it_proc->p_pid);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+}
+
+/*
+ * Start soft interrupt thread.
+ */
+void
+start_softintr(void *dummy)
+{
+ int error;
+ struct proc *p;
+ ithd *softintr; /* descriptor for the "IRQ" */
+ intrec *idesc; /* descriptor for this handler */
+ char *name = "sintr"; /* name for idesc */
+ int i;
+
+ if (ithds[SOFTINTR]) { /* we already have a thread */
+ printf("start_softintr: already running");
+ return;
+ }
+ /* first handler for this irq. */
+ softintr = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (softintr == NULL)
+ panic ("Can't create soft interrupt thread");
+ bzero(softintr, sizeof(struct ithd));
+ softintr->irq = SOFTINTR;
+ ithds[SOFTINTR] = softintr;
+ error = kthread_create(intr_soft, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "softinterrupt");
+ if (error)
+ panic("start_softintr: kthread_create error %d\n", error);
+
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_rtprio.prio = PI_SOFT; /* soft interrupt */
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ softintr->it_proc = p;
+ p->p_ithd = softintr; /* reverse link */
+
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
+ panic ("Can't create soft interrupt thread");
+ bzero(idesc, sizeof (struct intrec));
+
+ idesc->ithd = softintr;
+ idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
+ if (idesc->name == NULL)
+ panic ("Can't create soft interrupt thread");
+ strcpy(idesc->name, name);
+ for (i = NHWI; i < NHWI + NSWI; i++)
+ intr_countp[i] = &softintrcnt [i - NHWI];
+}
+
+/*
+ * Software interrupt process code.
+ */
+void
+intr_soft(void *dummy)
+{
+ int i;
+ ithd *me; /* our thread context */
+
+ me = curproc->p_ithd; /* point to myself */
+
+ /* Main loop */
+ for (;;) {
+#if 0
+ CTR3(KTR_INTR, "intr_soft pid %d(%s) need=%d",
+ me->it_proc->p_pid, me->it_proc->p_comm,
+ me->it_need);
+#endif
+
+ /*
+ * Service interrupts. If another interrupt arrives
+ * while we are running, they will set it_need to
+ * denote that we should make another pass.
+ */
+ me->it_need = 0;
+ while ((i = ffs(spending))) {
+ i--;
+ atomic_add_long(intr_countp[i], 1);
+ spending &= ~ (1 << i);
+ mtx_enter(&Giant, MTX_DEF);
+ (ihandlers[i])();
+ mtx_exit(&Giant, MTX_DEF);
+ }
+ /*
+ * Processed all our interrupts. Now get the sched
+ * lock. This may take a while and it_need may get
+ * set again, so we have to check it again.
+ */
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (!me->it_need) {
+#if 0
+ CTR1(KTR_INTR, "intr_soft pid %d: done",
+ me->it_proc->p_pid);
+#endif
+ me->it_proc->p_stat = SWAIT; /* we're idle */
+ mi_switch();
+#if 0
+ CTR1(KTR_INTR, "intr_soft pid %d: resumed",
+ me->it_proc->p_pid);
+#endif
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+}
diff --git a/sys/amd64/isa/nmi.c b/sys/amd64/isa/nmi.c
index 34a8c22..870760e 100644
--- a/sys/amd64/isa/nmi.c
+++ b/sys/amd64/isa/nmi.c
@@ -36,12 +36,6 @@
* from: @(#)isa.c 7.2 (Berkeley) 5/13/91
* $FreeBSD$
*/
-/*
- * This file contains an aggregated module marked:
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- * See the notice for details.
- */
#include "opt_auto_eoi.h"
@@ -51,11 +45,14 @@
#ifndef SMP
#include <machine/lock.h>
#endif
+#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/unistd.h>
#include <sys/errno.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
@@ -91,30 +88,14 @@
#include <i386/isa/mca_machdep.h>
#endif
-/* XXX should be in suitable include files */
-#ifdef PC98
-#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
-#define ICU_SLAVEID 7
-#else
-#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
-#define ICU_SLAVEID 2
-#endif
-
-#ifdef APIC_IO
/*
- * This is to accommodate "mixed-mode" programming for
- * motherboards that don't connect the 8254 to the IO APIC.
+ * Per-interrupt data. We consider the soft interrupt to be a special
+ * case, so these arrays have NHWI + NSWI entries, not ICU_LEN.
*/
-#define AUTO_EOI_1 1
-#endif
-
-#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
-
-u_long *intr_countp[ICU_LEN];
-inthand2_t *intr_handler[ICU_LEN];
-u_int intr_mask[ICU_LEN];
-static u_int* intr_mptr[ICU_LEN];
-void *intr_unit[ICU_LEN];
+u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */
+inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */
+ithd *ithds[NHWI + NSWI]; /* real interrupt handler */
+void *intr_unit[NHWI + NSWI];
static inthand_t *fastintr[ICU_LEN] = {
&IDTVEC(fastintr0), &IDTVEC(fastintr1),
@@ -292,8 +273,9 @@ isa_nmi(cd)
}
/*
- * Fill in default interrupt table (in case of spuruious interrupt
- * during configuration of kernel, setup interrupt control unit
+ * Create a default interrupt table to avoid problems caused by
+ * spurious interrupts during configuration of kernel, then setup
+ * interrupt control unit.
*/
void
isa_defaultirq()
@@ -364,16 +346,6 @@ isa_strayintr(vcookiep)
{
int intr = (void **)vcookiep - &intr_unit[0];
- /* DON'T BOTHER FOR NOW! */
- /* for some reason, we get bursts of intr #7, even if not enabled! */
- /*
- * Well the reason you got bursts of intr #7 is because someone
- * raised an interrupt line and dropped it before the 8259 could
- * prioritize it. This is documented in the intel data book. This
- * means you have BAD hardware! I have changed this so that only
- * the first 5 get logged, then it quits logging them, and puts
- * out a special message. rgrimes 3/25/1993
- */
/*
* XXX TODO print a different message for #7 if it is for a
* glitch. Glitches can be distinguished from real #7's by
@@ -405,36 +377,10 @@ isa_irq_pending()
}
#endif
-int
-update_intr_masks(void)
-{
- int intr, n=0;
- u_int mask,*maskptr;
-
- for (intr=0; intr < ICU_LEN; intr ++) {
-#if defined(APIC_IO)
- /* no 8259 SLAVE to ignore */
-#else
- if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */
-#endif /* APIC_IO */
- maskptr = intr_mptr[intr];
- if (!maskptr)
- continue;
- *maskptr |= SWI_LOW_MASK | (1 << intr);
- mask = *maskptr;
- if (mask != intr_mask[intr]) {
-#if 0
- printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n",
- intr, intr_mask[intr], mask, maskptr);
-#endif
- intr_mask[intr]=mask;
- n++;
- }
-
- }
- return (n);
-}
-
+/*
+ * Update intrnames array with the specified name. This is used by
+ * vmstat(8) and the like.
+ */
static void
update_intrname(int intr, char *name)
{
@@ -485,7 +431,7 @@ found:
}
int
-icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
+icu_setup(int intr, inthand2_t *handler, void *arg, int flags)
{
#ifdef FAST_HI
int select; /* the select register is 8 bits */
@@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
u_int32_t value; /* the window register is 32 bits */
#endif /* FAST_HI */
u_long ef;
- u_int mask = (maskptr ? *maskptr : 0);
#if defined(APIC_IO)
if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */
@@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
ef = read_eflags();
disable_intr();
intr_handler[intr] = handler;
- intr_mptr[intr] = maskptr;
- intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr);
intr_unit[intr] = arg;
#ifdef FAST_HI
if (flags & INTR_FAST) {
@@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
INTREN(1 << intr);
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
+/*
+ * Dissociate an interrupt handler from an IRQ and set the handler to
+ * the stray interrupt handler. The 'handler' parameter is used only
+ * for consistency checking.
+ */
int
icu_unset(intr, handler)
int intr;
@@ -567,8 +514,6 @@ icu_unset(intr, handler)
disable_intr();
intr_countp[intr] = &intrcnt[1 + intr];
intr_handler[intr] = isa_strayintr;
- intr_mptr[intr] = NULL;
- intr_mask[intr] = HWI_MASK | SWI_MASK;
intr_unit[intr] = &intr_unit[intr];
#ifdef FAST_HI_XXX
/* XXX how do I re-create dvp here? */
@@ -581,353 +526,172 @@ icu_unset(intr, handler)
setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
-/* The following notice applies beyond this point in the file */
-
-/*
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-typedef struct intrec {
- intrmask_t mask;
- inthand2_t *handler;
- void *argument;
- struct intrec *next;
- char *name;
- int intr;
- intrmask_t *maskptr;
- int flags;
-} intrec;
-
-static intrec *intreclist_head[ICU_LEN];
-
-/*
- * The interrupt multiplexer calls each of the handlers in turn. The
- * ipl is initially quite low. It is raised as necessary for each call
- * and lowered after the call. Thus out of order handling is possible
- * even for interrupts of the same type. This is probably no more
- * harmful than out of order handling in general (not harmful except
- * for real time response which we don't support anyway).
- */
-static void
-intr_mux(void *arg)
-{
- intrec *p;
- intrmask_t oldspl;
-
- for (p = arg; p != NULL; p = p->next) {
- oldspl = splq(p->mask);
- p->handler(p->argument);
- splx(oldspl);
- }
-}
-
-static intrec*
-find_idesc(unsigned *maskptr, int irq)
-{
- intrec *p = intreclist_head[irq];
-
- while (p && p->maskptr != maskptr)
- p = p->next;
-
- return (p);
-}
-
-static intrec**
-find_pred(intrec *idesc, int irq)
+intrec *
+inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
+ int pri, int flags)
{
- intrec **pp = &intreclist_head[irq];
- intrec *p = *pp;
-
- while (p != idesc) {
- if (p == NULL)
- return (NULL);
- pp = &p->next;
- p = *pp;
- }
- return (pp);
-}
-
-/*
- * Both the low level handler and the shared interrupt multiplexer
- * block out further interrupts as set in the handlers "mask", while
- * the handler is running. In fact *maskptr should be used for this
- * purpose, but since this requires one more pointer dereference on
- * each interrupt, we rather bother update "mask" whenever *maskptr
- * changes. The function "update_masks" should be called **after**
- * all manipulation of the linked list of interrupt handlers hung
- * off of intrdec_head[irq] is complete, since the chain of handlers
- * will both determine the *maskptr values and the instances of mask
- * that are fixed. This function should be called with the irq for
- * which a new handler has been add blocked, since the masks may not
- * yet know about the use of this irq for a device of a certain class.
- */
+ ithd *ithd = ithds[irq]; /* descriptor for the IRQ */
+ intrec *head; /* chain of handlers for IRQ */
+ intrec *idesc; /* descriptor for this handler */
+ struct proc *p; /* interrupt thread */
+ int errcode = 0;
-static void
-update_mux_masks(void)
-{
- int irq;
- for (irq = 0; irq < ICU_LEN; irq++) {
- intrec *idesc = intreclist_head[irq];
- while (idesc != NULL) {
- if (idesc->maskptr != NULL) {
- /* our copy of *maskptr may be stale, refresh */
- idesc->mask = *idesc->maskptr;
- }
- idesc = idesc->next;
+ if (name == NULL) /* no name? */
+ panic ("anonymous interrupt");
+ if (ithd == NULL || ithd->it_ih == NULL) {
+ /* first handler for this irq. */
+ if (ithd == NULL) {
+ ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (ithd == NULL)
+ return (NULL);
+ bzero(ithd, sizeof(struct ithd));
+ ithd->irq = irq;
+ ithds[irq] = ithd;
}
- }
-}
-
-static void
-update_masks(intrmask_t *maskptr, int irq)
-{
- intrmask_t mask = 1 << irq;
-
- if (maskptr == NULL)
- return;
-
- if (find_idesc(maskptr, irq) == NULL) {
- /* no reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) == 0)
- return;
- /* the irq was included in the classes mask, remove it */
- *maskptr &= ~mask;
- } else {
- /* a reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) != 0)
- return;
- /* put the irq into the classes mask */
- *maskptr |= mask;
- }
- /* we need to update all values in the intr_mask[irq] array */
- update_intr_masks();
- /* update mask in chains of the interrupt multiplex handler as well */
- update_mux_masks();
-}
-
-/*
- * Add interrupt handler to linked list hung off of intreclist_head[irq]
- * and install shared interrupt multiplex handler, if necessary
- */
-
-static int
-add_intrdesc(intrec *idesc)
-{
- int irq = idesc->intr;
-
- intrec *head = intreclist_head[irq];
-
- if (head == NULL) {
- /* first handler for this irq, just install it */
- if (icu_setup(irq, idesc->handler, idesc->argument,
- idesc->maskptr, idesc->flags) != 0)
- return (-1);
-
- update_intrname(irq, idesc->name);
- /* keep reference */
- intreclist_head[irq] = idesc;
- } else {
- if ((idesc->flags & INTR_EXCL) != 0
- || (head->flags & INTR_EXCL) != 0) {
+ /*
+ * If we have a fast interrupt, we need to set the
+ * handler address directly. Do that below. For a
+ * slow interrupt, we don't need to know more details,
+ * so do it here because it's tidier.
+ */
+ if ((flags & INTR_FAST) == 0) {
/*
- * can't append new handler, if either list head or
- * new handler do not allow interrupts to be shared
+ * Only create a kernel thread if we don't already
+ * have one.
*/
- if (bootverbose)
- printf("\tdevice combination doesn't support "
- "shared irq%d\n", irq);
- return (-1);
- }
- if (head->next == NULL) {
+ if (ithd->it_proc == NULL) {
+ errcode = kthread_create(ithd_loop, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "irq%d: %s", irq,
+ name);
+ if (errcode)
+ panic("inthand_add: Can't create "
+ "interrupt thread");
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ ithd->it_proc = p;
+ p->p_ithd = ithd;
+ } else
+ snprintf(ithd->it_proc->p_comm, MAXCOMLEN,
+ "irq%d: %s", irq, name);
+ p->p_rtprio.prio = pri;
+
/*
- * second handler for this irq, replace device driver's
- * handler by shared interrupt multiplexer function
+ * The interrupt process must be in place, but
+ * not necessarily schedulable, before we
+ * initialize the ICU, since it may cause an
+ * immediate interrupt.
*/
- icu_unset(irq, head->handler);
- if (icu_setup(irq, intr_mux, head, 0, 0) != 0)
- return (-1);
- if (bootverbose)
- printf("\tusing shared irq%d.\n", irq);
- update_intrname(irq, "mux");
+ if (icu_setup(irq, &sched_ithd, arg, flags) != 0)
+ panic("inthand_add: Can't initialize ICU");
}
- /* just append to the end of the chain */
- while (head->next != NULL)
- head = head->next;
- head->next = idesc;
- }
- update_masks(idesc->maskptr, irq);
- return (0);
-}
-
-/*
- * Create and activate an interrupt handler descriptor data structure.
- *
- * The dev_instance pointer is required for resource management, and will
- * only be passed through to resource_claim().
- *
- * There will be functions that derive a driver and unit name from a
- * dev_instance variable, and those functions will be used to maintain the
- * interrupt counter label array referenced by systat and vmstat to report
- * device interrupt rates (->update_intrlabels).
- *
- * Add the interrupt handler descriptor data structure created by an
- * earlier call of create_intr() to the linked list for its irq and
- * adjust the interrupt masks if necessary.
- *
- * WARNING: This is an internal function and not to be used by device
- * drivers. It is subject to change without notice.
- */
-
-intrec *
-inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
- intrmask_t *maskptr, int flags)
-{
- intrec *idesc;
- int errcode = -1;
- intrmask_t oldspl;
-
- if (ICU_LEN > 8 * sizeof *maskptr) {
- printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n",
- ICU_LEN, 8 * sizeof *maskptr);
+ } else if ((flags & INTR_EXCL) != 0
+ || (ithd->it_ih->flags & INTR_EXCL) != 0) {
+ /*
+ * We can't append the new handler if either
+ * list ithd or new handler do not allow
+ * interrupts to be shared.
+ */
+ if (bootverbose)
+ printf("\tdevice combination %s and %s "
+ "doesn't support shared irq%d\n",
+ ithd->it_ih->name, name, irq);
+ return(NULL);
+ } else if (flags & INTR_FAST) {
+ /* We can only have one fast interrupt by itself. */
+ if (bootverbose)
+ printf("\tCan't add fast interrupt %s"
+ " to normal interrupt %s on irq%d",
+ name, ithd->it_ih->name, irq);
return (NULL);
+ } else { /* update p_comm */
+ p = ithd->it_proc;
+ if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) {
+ strcat(p->p_comm, " ");
+ strcat(p->p_comm, name);
+ } else if (strlen(p->p_comm) == MAXCOMLEN)
+ p->p_comm[MAXCOMLEN - 1] = '+';
+ else
+ strcat(p->p_comm, "+");
}
- if ((unsigned)irq >= ICU_LEN) {
- printf("create_intr: requested irq%d too high, limit is %d\n",
- irq, ICU_LEN -1);
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
return (NULL);
- }
+ bzero(idesc, sizeof (struct intrec));
- idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK);
- if (idesc == NULL)
- return NULL;
- bzero(idesc, sizeof *idesc);
+ idesc->handler = handler;
+ idesc->argument = arg;
+ idesc->flags = flags;
+ idesc->ithd = ithd;
- if (name == NULL)
- name = "???";
idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
if (idesc->name == NULL) {
free(idesc, M_DEVBUF);
- return NULL;
+ return (NULL);
}
strcpy(idesc->name, name);
- idesc->handler = handler;
- idesc->argument = arg;
- idesc->maskptr = maskptr;
- idesc->intr = irq;
- idesc->flags = flags;
-
- /* block this irq */
- oldspl = splq(1 << irq);
-
- /* add irq to class selected by maskptr */
- errcode = add_intrdesc(idesc);
- splx(oldspl);
-
- if (errcode != 0) {
+ /* Slow interrupts got set up above. */
+ if ((flags & INTR_FAST)
+ && (icu_setup(irq, idesc->handler, idesc->argument,
+ idesc->flags) != 0) ) {
if (bootverbose)
- printf("\tintr_connect(irq%d) failed, result=%d\n",
+ printf("\tinthand_add(irq%d) failed, result=%d\n",
irq, errcode);
free(idesc->name, M_DEVBUF);
free(idesc, M_DEVBUF);
- idesc = NULL;
+ return NULL;
}
-
+ head = ithd->it_ih; /* look at chain of handlers */
+ if (head) {
+ while (head->next != NULL)
+ head = head->next; /* find the end */
+ head->next = idesc; /* hook it in there */
+ } else
+ ithd->it_ih = idesc; /* put it up front */
+ update_intrname(irq, idesc->name);
return (idesc);
}
/*
- * Deactivate and remove the interrupt handler descriptor data connected
- * created by an earlier call of intr_connect() from the linked list and
- * adjust theinterrupt masks if necessary.
+ * Deactivate and remove linked list the interrupt handler descriptor
+ * data connected created by an earlier call of inthand_add(), then
+ * adjust the interrupt masks if necessary.
*
- * Return the memory held by the interrupt handler descriptor data structure
- * to the system. Make sure, the handler is not actively used anymore, before.
+ * Return the memory held by the interrupt handler descriptor data
+ * structure to the system. First ensure the handler is not actively
+ * in use.
*/
int
inthand_remove(intrec *idesc)
{
- intrec **hook, *head;
- int irq;
- int errcode = 0;
- intrmask_t oldspl;
+ ithd *ithd; /* descriptor for the IRQ */
+ intrec *ih; /* chain of handlers */
if (idesc == NULL)
return (-1);
+ ithd = idesc->ithd;
+ ih = ithd->it_ih;
- irq = idesc->intr;
-
- /* find pointer that keeps the reference to this interrupt descriptor */
- hook = find_pred(idesc, irq);
- if (hook == NULL)
+ if (ih == idesc) /* first in the chain */
+ ithd->it_ih = idesc->next; /* unhook it */
+ else {
+ while ((ih != NULL)
+ && (ih->next != idesc) )
+ ih = ih->next;
+ if (ih->next != idesc)
return (-1);
-
- /* make copy of original list head, the line after may overwrite it */
- head = intreclist_head[irq];
-
- /* unlink: make predecessor point to idesc->next instead of to idesc */
- *hook = idesc->next;
-
- /* now check whether the element we removed was the list head */
- if (idesc == head) {
-
- oldspl = splq(1 << irq);
-
- /* check whether the new list head is the only element on list */
- head = intreclist_head[irq];
- if (head != NULL) {
- icu_unset(irq, intr_mux);
- if (head->next != NULL) {
- /* install the multiplex handler with new list head as argument */
- errcode = icu_setup(irq, intr_mux, head, 0, 0);
- if (errcode == 0)
- update_intrname(irq, NULL);
- } else {
- /* install the one remaining handler for this irq */
- errcode = icu_setup(irq, head->handler,
- head->argument,
- head->maskptr, head->flags);
- if (errcode == 0)
- update_intrname(irq, head->name);
+ ih->next = ih->next->next;
}
- } else {
- /* revert to old handler, eg: strayintr */
- icu_unset(irq, idesc->handler);
- }
- splx(oldspl);
- }
- update_masks(idesc->maskptr, irq);
+
+ if (ithd->it_ih == NULL) /* no handlers left, */
+ icu_unset(ithd->irq, idesc->handler);
free(idesc, M_DEVBUF);
return (0);
}
diff --git a/sys/amd64/isa/npx.c b/sys/amd64/isa/npx.c
index 637853e..8610e35 100644
--- a/sys/amd64/isa/npx.c
+++ b/sys/amd64/isa/npx.c
@@ -245,6 +245,12 @@ npx_probe(dev)
setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
npx_idt_probeintr = idt[npx_intrno];
+
+ /*
+ * XXX This looks highly bogus, but it appears that npc_probe1
+ * needs interrupts enabled. Does this make any difference
+ * here?
+ */
enable_intr();
result = npx_probe1(dev);
disable_intr();
@@ -797,7 +803,7 @@ npxdna()
/*
* Record new context early in case frstor causes an IRQ13.
*/
- npxproc = curproc;
+ PCPU_SET(npxproc, CURPROC);
curpcb->pcb_savefpu.sv_ex_sw = 0;
/*
* The following frstor may cause an IRQ13 when the state being
@@ -834,16 +840,18 @@ npxsave(addr)
fnsave(addr);
/* fnop(); */
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
#else /* SMP */
+ int intrstate;
u_char icu1_mask;
u_char icu2_mask;
u_char old_icu1_mask;
u_char old_icu2_mask;
struct gate_descriptor save_idt_npxintr;
+ intrstate = save_intr();
disable_intr();
old_icu1_mask = inb(IO_ICU1 + 1);
old_icu2_mask = inb(IO_ICU2 + 1);
@@ -851,12 +859,12 @@ npxsave(addr)
outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
idt[npx_intrno] = npx_idt_probeintr;
- enable_intr();
+ write_eflags(intrstate);
stop_emulating();
fnsave(addr);
fnop();
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
disable_intr();
icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */
icu2_mask = inb(IO_ICU2 + 1);
@@ -866,7 +874,7 @@ npxsave(addr)
(icu2_mask & ~(npx0_imask >> 8))
| (old_icu2_mask & (npx0_imask >> 8)));
idt[npx_intrno] = save_idt_npxintr;
- enable_intr(); /* back to usual state */
+ restore_intr(intrstate); /* back to previous state */
#endif /* SMP */
}
diff --git a/sys/amd64/isa/vector.S b/sys/amd64/isa/vector.S
index 5447a90..79f2320 100644
--- a/sys/amd64/isa/vector.S
+++ b/sys/amd64/isa/vector.S
@@ -16,9 +16,10 @@
#include <i386/isa/isa.h>
#endif
+#define FAST_INTR_HANDLER_USES_ES 1
#ifdef FAST_INTR_HANDLER_USES_ES
#define ACTUALLY_PUSHED 1
-#define MAYBE_MOVW_AX_ES movl %ax,%es
+#define MAYBE_MOVW_AX_ES movw %ax,%es
#define MAYBE_POPL_ES popl %es
#define MAYBE_PUSHL_ES pushl %es
#else
@@ -36,11 +37,6 @@
.data
ALIGN_DATA
- .globl _intr_nesting_level
-_intr_nesting_level:
- .byte 0
- .space 3
-
/*
* Interrupt counters and names for export to vmstat(8) and friends.
*
@@ -58,7 +54,6 @@ _eintrcnt:
_intrnames:
.space NR_INTRNAMES * 16
_eintrnames:
-
.text
/*
diff --git a/sys/amd64/isa/vector.s b/sys/amd64/isa/vector.s
index 5447a90..79f2320 100644
--- a/sys/amd64/isa/vector.s
+++ b/sys/amd64/isa/vector.s
@@ -16,9 +16,10 @@
#include <i386/isa/isa.h>
#endif
+#define FAST_INTR_HANDLER_USES_ES 1
#ifdef FAST_INTR_HANDLER_USES_ES
#define ACTUALLY_PUSHED 1
-#define MAYBE_MOVW_AX_ES movl %ax,%es
+#define MAYBE_MOVW_AX_ES movw %ax,%es
#define MAYBE_POPL_ES popl %es
#define MAYBE_PUSHL_ES pushl %es
#else
@@ -36,11 +37,6 @@
.data
ALIGN_DATA
- .globl _intr_nesting_level
-_intr_nesting_level:
- .byte 0
- .space 3
-
/*
* Interrupt counters and names for export to vmstat(8) and friends.
*
@@ -58,7 +54,6 @@ _eintrcnt:
_intrnames:
.space NR_INTRNAMES * 16
_eintrnames:
-
.text
/*
diff --git a/sys/conf/files b/sys/conf/files
index 7b086d4..70ea3a5 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -433,9 +433,11 @@ kern/kern_event.c standard
kern/kern_exec.c standard
kern/kern_exit.c standard
kern/kern_fork.c standard
+kern/kern_idle.c standard
kern/kern_intr.c standard
kern/kern_jail.c standard
kern/kern_kthread.c standard
+kern/kern_ktr.c optional ktr
kern/kern_ktrace.c standard
kern/kern_linker.c standard
kern/kern_lock.c standard
@@ -443,6 +445,7 @@ kern/kern_lockf.c standard
kern/kern_malloc.c standard
kern/kern_mib.c standard
kern/kern_module.c standard
+kern/kern_mutex.c standard
kern/kern_ntptime.c standard
kern/kern_physio.c standard
kern/kern_proc.c standard
diff --git a/sys/conf/files.alpha b/sys/conf/files.alpha
index bb746e1..6e8ba94 100644
--- a/sys/conf/files.alpha
+++ b/sys/conf/files.alpha
@@ -67,6 +67,7 @@ alpha/alpha/perfmon.c optional perfmon profiling-routine
alpha/alpha/perfmon.c optional perfmon
alpha/alpha/pmap.c standard
alpha/alpha/procfs_machdep.c standard
+alpha/alpha/mp_machdep.c standard
alpha/alpha/prom.c standard
alpha/alpha/promcons.c standard
alpha/alpha/prom_disp.s standard
@@ -75,6 +76,7 @@ alpha/alpha/simplelock.s optional smp
alpha/alpha/support.s standard
alpha/alpha/swtch.s standard
alpha/alpha/sys_machdep.c standard
+alpha/alpha/synch_machdep.c standard
alpha/alpha/trap.c standard
alpha/alpha/userconfig.c optional userconfig
alpha/alpha/vm_machdep.c standard
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index f5fa25a..e9a7acd 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -188,7 +188,6 @@ i386/i386/mp_clock.c optional smp
i386/i386/mp_machdep.c optional smp
i386/i386/mpapic.c optional smp
i386/i386/mpboot.s optional smp
-i386/i386/mplock.s optional smp
i386/i386/nexus.c standard
i386/i386/perfmon.c optional perfmon
i386/i386/perfmon.c optional perfmon profiling-routine
@@ -198,6 +197,7 @@ i386/i386/simplelock.s optional smp
i386/i386/support.s standard
i386/i386/swtch.s standard
i386/i386/sys_machdep.c standard
+i386/i386/synch_machdep.c standard
i386/i386/trap.c standard
i386/i386/userconfig.c optional userconfig
i386/i386/vm86.c standard
@@ -242,6 +242,7 @@ i386/isa/if_wi.c optional wi card
i386/isa/if_wl.c count wl
i386/isa/if_wlp.c optional wlp
i386/isa/intr_machdep.c standard
+i386/isa/ithread.c standard
i386/isa/ipl_funcs.c standard \
compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
i386/isa/isa.c optional isa
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index bcb6773..2e8481e 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -175,7 +175,6 @@ i386/i386/mp_clock.c optional smp
i386/i386/mp_machdep.c optional smp
i386/i386/mpapic.c optional smp
i386/i386/mpboot.s optional smp
-i386/i386/mplock.s optional smp
i386/i386/nexus.c standard
i386/i386/perfmon.c optional perfmon
i386/i386/perfmon.c optional perfmon profiling-routine
diff --git a/sys/conf/options b/sys/conf/options
index ddd04a3..8093240 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -454,6 +454,15 @@ PCFCLOCK_VERBOSE opt_pcfclock.h
PCFCLOCK_MAX_RETRIES opt_pcfclock.h
TDFX_LINUX opt_tdfx.h
+KTR opt_global.h
+KTR_MASK opt_global.h
+KTR_CPUMASK opt_global.h
+KTR_COMPILE opt_global.h
+KTR_ENTRIES opt_global.h
+KTR_EXTEND opt_global.h
+SMP_DEBUG opt_global.h
+WITNESS opt_global.h
+
# options for ACPI support
ACPI_DEBUG opt_acpi.h
AML_DEBUG opt_acpi.h
diff --git a/sys/conf/options.alpha b/sys/conf/options.alpha
index 8260cb0..7d53c37 100644
--- a/sys/conf/options.alpha
+++ b/sys/conf/options.alpha
@@ -64,3 +64,7 @@ KBD_MAXRETRY opt_kbd.h
KBD_MAXWAIT opt_kbd.h
KBD_RESETDELAY opt_kbd.h
KBDIO_DEBUG opt_kbd.h
+
+# Clock options
+CLK_USE_I8254_CALIBRATION opt_clock.h
+TIMER_FREQ opt_clock.h
diff --git a/sys/dev/ata/ata-all.c b/sys/dev/ata/ata-all.c
index fc89297..17aff9e 100644
--- a/sys/dev/ata/ata-all.c
+++ b/sys/dev/ata/ata-all.c
@@ -63,6 +63,8 @@
#include <machine/clock.h>
#ifdef __i386__
#include <machine/smp.h>
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#endif
#ifdef __alpha__
diff --git a/sys/dev/cy/cy.c b/sys/dev/cy/cy.c
index 52a8cf3..5487d8f 100644
--- a/sys/dev/cy/cy.c
+++ b/sys/dev/cy/cy.c
@@ -94,11 +94,6 @@
#error "The cy device requires the old isa compatibility shims"
#endif
-#ifdef SMP
-#define disable_intr() COM_DISABLE_INTR()
-#define enable_intr() COM_ENABLE_INTR()
-#endif /* SMP */
-
/*
* Dictionary so that I can name everything *sio* or *com* to compare with
* sio.c. There is also lots of ugly formatting and unnecessary ifdefs to
@@ -366,7 +361,7 @@ static struct com_s *p_com_addr[NSIO];
#define com_addr(unit) (p_com_addr[unit])
struct isa_driver siodriver = {
- INTR_TYPE_TTY | INTR_TYPE_FAST,
+ INTR_TYPE_TTY | INTR_FAST,
sioprobe,
sioattach,
driver_name
@@ -604,11 +599,9 @@ cyattach_common(cy_iobase, cy_align)
com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL;
}
if (siosetwater(com, com->it_in.c_ispeed) != 0) {
- enable_intr();
free(com, M_DEVBUF);
return (0);
}
- enable_intr();
termioschars(&com->it_in);
com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate;
com->it_out = com->it_in;
@@ -662,6 +655,7 @@ sioopen(dev, flag, mode, p)
int s;
struct tty *tp;
int unit;
+ int intrsave;
mynor = minor(dev);
unit = MINOR_TO_UNIT(mynor);
@@ -768,14 +762,17 @@ open_top:
}
}
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
(void) inb(com->line_status_port);
(void) inb(com->data_port);
com->prev_modem_status = com->last_modem_status
= inb(com->modem_status_port);
outb(iobase + com_ier, IER_ERXRDY | IER_ETXRDY | IER_ERLS
| IER_EMSC);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#else /* !0 */
/*
* Flush fifos. This requires a full channel reset which
@@ -786,13 +783,16 @@ open_top:
CD1400_CCR_CMDRESET | CD1400_CCR_CHANRESET);
cd1400_channel_cmd(com, com->channel_control);
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->prev_modem_status = com->last_modem_status
= cd_getreg(com, CD1400_MSVR2);
cd_setreg(com, CD1400_SRER,
com->intr_enable
= CD1400_SRER_MDMCH | CD1400_SRER_RXDATA);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif /* 0 */
/*
* Handle initial DCD. Callout devices get a fake initial
@@ -875,6 +875,7 @@ comhardclose(com)
int s;
struct tty *tp;
int unit;
+ int intrsave;
unit = com->unit;
iobase = com->iobase;
@@ -888,10 +889,13 @@ comhardclose(com)
outb(iobase + com_cfcr, com->cfcr_image &= ~CFCR_SBREAK);
#else
/* XXX */
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->etc = ETC_NONE;
cd_setreg(com, CD1400_COR2, com->cor[1] &= ~CD1400_COR2_ETC);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
cd1400_channel_cmd(com, CD1400_CCR_CMDRESET | CD1400_CCR_FTF);
#endif
@@ -899,9 +903,12 @@ comhardclose(com)
#if 0
outb(iobase + com_ier, 0);
#else
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
cd_setreg(com, CD1400_SRER, com->intr_enable = 0);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif
tp = com->tp;
if ((tp->t_cflag & HUPCL)
@@ -991,6 +998,11 @@ siodtrwakeup(chan)
wakeup(&com->dtr_wait);
}
+/*
+ * This function:
+ * a) needs to be called with COM_LOCK() held, and
+ * b) needs to return with COM_LOCK() held.
+ */
static void
sioinput(com)
struct com_s *com;
@@ -1000,6 +1012,7 @@ sioinput(com)
u_char line_status;
int recv_data;
struct tty *tp;
+ int intrsave;
buf = com->ibuf;
tp = com->tp;
@@ -1016,7 +1029,15 @@ sioinput(com)
* slinput is reasonably fast (usually 40 instructions plus
* call overhead).
*/
+
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
incc = com->iptr - buf;
if (tp->t_rawq.c_cc + incc > tp->t_ihiwat
@@ -1038,10 +1059,18 @@ sioinput(com)
tp->t_lflag &= ~FLUSHO;
comstart(tp);
}
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
} else {
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
line_status = buf[com->ierroff];
recv_data = *buf++;
@@ -1057,7 +1086,8 @@ sioinput(com)
recv_data |= TTY_PE;
}
(*linesw[tp->t_line].l_rint)(recv_data, tp);
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
}
com_events -= (com->iptr - com->ibuf);
@@ -1729,6 +1759,7 @@ static void
siopoll()
{
int unit;
+ int intrsave;
#ifdef CyDebug
++cy_timeouts;
@@ -1751,7 +1782,9 @@ repeat:
* (actually never opened devices) so that we don't
* loop.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
incc = com->iptr - com->ibuf;
com->iptr = com->ibuf;
if (com->state & CS_CHECKMSR) {
@@ -1759,7 +1792,8 @@ repeat:
com->state &= ~CS_CHECKMSR;
}
com_events -= incc;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (incc != 0)
log(LOG_DEBUG,
"sio%d: %d events for device with no tp\n",
@@ -1767,29 +1801,39 @@ repeat:
continue;
}
if (com->iptr != com->ibuf) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
sioinput(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (com->state & CS_CHECKMSR) {
u_char delta_modem_status;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
+ sioinput(com);
delta_modem_status = com->last_modem_status
^ com->prev_modem_status;
com->prev_modem_status = com->last_modem_status;
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_CHECKMSR;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta_modem_status & MSR_DCD)
(*linesw[tp->t_line].l_modem)
(tp, com->prev_modem_status & MSR_DCD);
}
if (com->extra_state & CSE_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->extra_state &= ~CSE_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (!(com->state & CS_BUSY)) {
tp->t_state &= ~TS_BUSY;
ttwwakeup(com->tp);
@@ -1801,10 +1845,13 @@ repeat:
}
}
if (com->state & CS_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
(*linesw[tp->t_line].l_start)(tp);
}
if (com_events == 0)
@@ -1833,6 +1880,7 @@ comparam(tp, t)
u_char opt;
int s;
int unit;
+ int intrsave;
/* do historical conversions */
if (t->c_ispeed == 0)
@@ -1857,14 +1905,9 @@ comparam(tp, t)
else
(void)commctl(com, TIOCM_DTR, DMBIS);
- /*
- * This returns with interrupts disabled so that we can complete
- * the speed change atomically.
- */
(void) siosetwater(com, t->c_ispeed);
/* XXX we don't actually change the speed atomically. */
- enable_intr();
if (idivisor != 0) {
cd_setreg(com, CD1400_RBPR, idivisor);
@@ -1985,12 +2028,15 @@ comparam(tp, t)
if (cflag & CCTS_OFLOW)
opt |= CD1400_COR2_CCTS_OFLOW;
#endif
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (opt != com->cor[1]) {
cor_change |= CD1400_CCR_COR2;
cd_setreg(com, CD1400_COR2, com->cor[1] = opt);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
/*
* set channel option register 3 -
@@ -2111,7 +2157,9 @@ comparam(tp, t)
* XXX should have done this long ago, but there is too much state
* to change all atomically.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->state &= ~CS_TTGO;
if (!(tp->t_state & TS_TTSTOP))
@@ -2177,7 +2225,8 @@ comparam(tp, t)
| CD1400_SRER_TXMPTY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
splx(s);
comstart(tp);
if (com->ibufold != NULL) {
@@ -2196,6 +2245,7 @@ siosetwater(com, speed)
u_char *ibuf;
int ibufsize;
struct tty *tp;
+ int intrsave;
/*
* Make the buffer size large enough to handle a softtty interrupt
@@ -2207,7 +2257,6 @@ siosetwater(com, speed)
for (ibufsize = 128; ibufsize < cp4ticks;)
ibufsize <<= 1;
if (ibufsize == com->ibufsize) {
- disable_intr();
return (0);
}
@@ -2217,7 +2266,6 @@ siosetwater(com, speed)
*/
ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT);
if (ibuf == NULL) {
- disable_intr();
return (ENOMEM);
}
@@ -2235,7 +2283,9 @@ siosetwater(com, speed)
* Read current input buffer, if any. Continue with interrupts
* disabled.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->iptr != com->ibuf)
sioinput(com);
@@ -2254,6 +2304,9 @@ siosetwater(com, speed)
com->ibufend = ibuf + ibufsize;
com->ierroff = ibufsize;
com->ihighwater = ibuf + 3 * ibufsize / 4;
+
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2267,6 +2320,7 @@ comstart(tp)
bool_t started;
#endif
int unit;
+ int intrsave;
unit = DEV_TO_UNIT(tp->t_dev);
com = com_addr(unit);
@@ -2277,7 +2331,9 @@ comstart(tp)
started = FALSE;
#endif
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (tp->t_state & TS_TTSTOP) {
com->state &= ~CS_TTGO;
if (com->intr_enable & CD1400_SRER_TXRDY)
@@ -2313,7 +2369,8 @@ comstart(tp)
com->mcr_image |= com->mcr_rts);
#endif
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
ttwwakeup(tp);
splx(s);
@@ -2332,7 +2389,9 @@ comstart(tp)
sizeof com->obuf1);
com->obufs[0].l_next = NULL;
com->obufs[0].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2351,7 +2410,8 @@ comstart(tp)
& ~CD1400_SRER_TXMPTY)
| CD1400_SRER_TXRDY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) {
#ifdef CyDebug
@@ -2362,7 +2422,9 @@ comstart(tp)
sizeof com->obuf2);
com->obufs[1].l_next = NULL;
com->obufs[1].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2381,7 +2443,8 @@ comstart(tp)
& ~CD1400_SRER_TXMPTY)
| CD1400_SRER_TXRDY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
tp->t_state |= TS_BUSY;
}
@@ -2390,10 +2453,13 @@ comstart(tp)
++com->start_real;
#endif
#if 0
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state >= (CS_BUSY | CS_TTGO))
siointr1(com); /* fake interrupt to start output */
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif
ttwwakeup(tp);
splx(s);
@@ -2406,10 +2472,13 @@ comstop(tp, rw)
{
struct com_s *com;
bool_t wakeup_etc;
+ int intrsave;
com = com_addr(DEV_TO_UNIT(tp->t_dev));
wakeup_etc = FALSE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (rw & FWRITE) {
com->obufs[0].l_queued = FALSE;
com->obufs[1].l_queued = FALSE;
@@ -2432,7 +2501,8 @@ comstop(tp, rw)
com_events -= (com->iptr - com->ibuf);
com->iptr = com->ibuf;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (wakeup_etc)
wakeup(&com->etc);
if (rw & FWRITE && com->etc == ETC_NONE)
@@ -2448,6 +2518,7 @@ commctl(com, bits, how)
{
int mcr;
int msr;
+ int intrsave;
if (how == DMGET) {
if (com->channel_control & CD1400_CCR_RCVEN)
@@ -2485,7 +2556,9 @@ commctl(com, bits, how)
mcr |= com->mcr_dtr;
if (bits & TIOCM_RTS)
mcr |= com->mcr_rts;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
switch (how) {
case DMSET:
com->mcr_image = mcr;
@@ -2503,7 +2576,8 @@ commctl(com, bits, how)
cd_setreg(com, CD1400_MSVR2, mcr);
break;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2565,9 +2639,14 @@ comwakeup(chan)
com = com_addr(unit);
if (com != NULL
&& (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) {
+ int intrsave;
+
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
siointr1(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
}
#endif
@@ -2587,11 +2666,15 @@ comwakeup(chan)
for (errnum = 0; errnum < CE_NTYPES; ++errnum) {
u_int delta;
u_long total;
+ int intrsave;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
delta = com->delta_error_counts[errnum];
com->delta_error_counts[errnum] = 0;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta == 0)
continue;
total = com->error_counts[errnum] += delta;
@@ -2743,6 +2826,8 @@ cd_etc(com, etc)
struct com_s *com;
int etc;
{
+ int intrsave;
+
/*
* We can't change the hardware's ETC state while there are any
* characters in the tx fifo, since those characters would be
@@ -2754,26 +2839,28 @@ cd_etc(com, etc)
* for the tx to become empty so that the command is sure to be
* executed soon after we issue it.
*/
+ intrsave = save_intr();
disable_intr();
- if (com->etc == etc) {
- enable_intr();
+ COM_LOCK();
+ if (com->etc == etc)
goto wait;
- }
if ((etc == CD1400_ETC_SENDBREAK
&& (com->etc == ETC_BREAK_STARTING
|| com->etc == ETC_BREAK_STARTED))
|| (etc == CD1400_ETC_STOPBREAK
&& (com->etc == ETC_BREAK_ENDING || com->etc == ETC_BREAK_ENDED
|| com->etc == ETC_NONE))) {
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return;
}
com->etc = etc;
cd_setreg(com, CD1400_SRER,
com->intr_enable
= (com->intr_enable & ~CD1400_SRER_TXRDY) | CD1400_SRER_TXMPTY);
- enable_intr();
wait:
+ COM_UNLOCK();
+ restore_intr(intrsave);
while (com->etc == etc
&& tsleep(&com->etc, TTIPRI | PCATCH, "cyetc", 0) == 0)
continue;
@@ -2787,7 +2874,7 @@ cd_getreg(com, reg)
struct com_s *basecom;
u_char car;
int cy_align;
- u_long ef;
+ int intrsave;
cy_addr iobase;
int val;
@@ -2795,14 +2882,16 @@ cd_getreg(com, reg)
car = com->unit & CD1400_CAR_CHAN;
cy_align = com->cy_align;
iobase = com->iobase;
- ef = read_eflags();
- if (ef & PSL_I)
- disable_intr();
+ intrsave = save_intr();
+ disable_intr();
+ if (intrsave & PSL_I)
+ COM_LOCK();
if (basecom->car != car)
cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car);
val = cd_inb(iobase, reg, cy_align);
- if (ef & PSL_I)
- enable_intr();
+ if (intrsave & PSL_I)
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (val);
}
@@ -2815,21 +2904,23 @@ cd_setreg(com, reg, val)
struct com_s *basecom;
u_char car;
int cy_align;
- u_long ef;
+ int intrsave;
cy_addr iobase;
basecom = com_addr(com->unit & ~(CD1400_NO_OF_CHANNELS - 1));
car = com->unit & CD1400_CAR_CHAN;
cy_align = com->cy_align;
iobase = com->iobase;
- ef = read_eflags();
- if (ef & PSL_I)
- disable_intr();
+ intrsave = save_intr();
+ disable_intr();
+ if (intrsave & PSL_I)
+ COM_LOCK();
if (basecom->car != car)
cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car);
cd_outb(iobase, reg, cy_align, val);
- if (ef & PSL_I)
- enable_intr();
+ if (intrsave & PSL_I)
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
#ifdef CyDebug
diff --git a/sys/dev/cy/cy_isa.c b/sys/dev/cy/cy_isa.c
index 52a8cf3..5487d8f 100644
--- a/sys/dev/cy/cy_isa.c
+++ b/sys/dev/cy/cy_isa.c
@@ -94,11 +94,6 @@
#error "The cy device requires the old isa compatibility shims"
#endif
-#ifdef SMP
-#define disable_intr() COM_DISABLE_INTR()
-#define enable_intr() COM_ENABLE_INTR()
-#endif /* SMP */
-
/*
* Dictionary so that I can name everything *sio* or *com* to compare with
* sio.c. There is also lots of ugly formatting and unnecessary ifdefs to
@@ -366,7 +361,7 @@ static struct com_s *p_com_addr[NSIO];
#define com_addr(unit) (p_com_addr[unit])
struct isa_driver siodriver = {
- INTR_TYPE_TTY | INTR_TYPE_FAST,
+ INTR_TYPE_TTY | INTR_FAST,
sioprobe,
sioattach,
driver_name
@@ -604,11 +599,9 @@ cyattach_common(cy_iobase, cy_align)
com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL;
}
if (siosetwater(com, com->it_in.c_ispeed) != 0) {
- enable_intr();
free(com, M_DEVBUF);
return (0);
}
- enable_intr();
termioschars(&com->it_in);
com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate;
com->it_out = com->it_in;
@@ -662,6 +655,7 @@ sioopen(dev, flag, mode, p)
int s;
struct tty *tp;
int unit;
+ int intrsave;
mynor = minor(dev);
unit = MINOR_TO_UNIT(mynor);
@@ -768,14 +762,17 @@ open_top:
}
}
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
(void) inb(com->line_status_port);
(void) inb(com->data_port);
com->prev_modem_status = com->last_modem_status
= inb(com->modem_status_port);
outb(iobase + com_ier, IER_ERXRDY | IER_ETXRDY | IER_ERLS
| IER_EMSC);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#else /* !0 */
/*
* Flush fifos. This requires a full channel reset which
@@ -786,13 +783,16 @@ open_top:
CD1400_CCR_CMDRESET | CD1400_CCR_CHANRESET);
cd1400_channel_cmd(com, com->channel_control);
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->prev_modem_status = com->last_modem_status
= cd_getreg(com, CD1400_MSVR2);
cd_setreg(com, CD1400_SRER,
com->intr_enable
= CD1400_SRER_MDMCH | CD1400_SRER_RXDATA);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif /* 0 */
/*
* Handle initial DCD. Callout devices get a fake initial
@@ -875,6 +875,7 @@ comhardclose(com)
int s;
struct tty *tp;
int unit;
+ int intrsave;
unit = com->unit;
iobase = com->iobase;
@@ -888,10 +889,13 @@ comhardclose(com)
outb(iobase + com_cfcr, com->cfcr_image &= ~CFCR_SBREAK);
#else
/* XXX */
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->etc = ETC_NONE;
cd_setreg(com, CD1400_COR2, com->cor[1] &= ~CD1400_COR2_ETC);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
cd1400_channel_cmd(com, CD1400_CCR_CMDRESET | CD1400_CCR_FTF);
#endif
@@ -899,9 +903,12 @@ comhardclose(com)
#if 0
outb(iobase + com_ier, 0);
#else
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
cd_setreg(com, CD1400_SRER, com->intr_enable = 0);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif
tp = com->tp;
if ((tp->t_cflag & HUPCL)
@@ -991,6 +998,11 @@ siodtrwakeup(chan)
wakeup(&com->dtr_wait);
}
+/*
+ * This function:
+ * a) needs to be called with COM_LOCK() held, and
+ * b) needs to return with COM_LOCK() held.
+ */
static void
sioinput(com)
struct com_s *com;
@@ -1000,6 +1012,7 @@ sioinput(com)
u_char line_status;
int recv_data;
struct tty *tp;
+ int intrsave;
buf = com->ibuf;
tp = com->tp;
@@ -1016,7 +1029,15 @@ sioinput(com)
* slinput is reasonably fast (usually 40 instructions plus
* call overhead).
*/
+
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
incc = com->iptr - buf;
if (tp->t_rawq.c_cc + incc > tp->t_ihiwat
@@ -1038,10 +1059,18 @@ sioinput(com)
tp->t_lflag &= ~FLUSHO;
comstart(tp);
}
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
} else {
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
line_status = buf[com->ierroff];
recv_data = *buf++;
@@ -1057,7 +1086,8 @@ sioinput(com)
recv_data |= TTY_PE;
}
(*linesw[tp->t_line].l_rint)(recv_data, tp);
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
}
com_events -= (com->iptr - com->ibuf);
@@ -1729,6 +1759,7 @@ static void
siopoll()
{
int unit;
+ int intrsave;
#ifdef CyDebug
++cy_timeouts;
@@ -1751,7 +1782,9 @@ repeat:
* (actually never opened devices) so that we don't
* loop.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
incc = com->iptr - com->ibuf;
com->iptr = com->ibuf;
if (com->state & CS_CHECKMSR) {
@@ -1759,7 +1792,8 @@ repeat:
com->state &= ~CS_CHECKMSR;
}
com_events -= incc;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (incc != 0)
log(LOG_DEBUG,
"sio%d: %d events for device with no tp\n",
@@ -1767,29 +1801,39 @@ repeat:
continue;
}
if (com->iptr != com->ibuf) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
sioinput(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (com->state & CS_CHECKMSR) {
u_char delta_modem_status;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
+ sioinput(com);
delta_modem_status = com->last_modem_status
^ com->prev_modem_status;
com->prev_modem_status = com->last_modem_status;
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_CHECKMSR;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta_modem_status & MSR_DCD)
(*linesw[tp->t_line].l_modem)
(tp, com->prev_modem_status & MSR_DCD);
}
if (com->extra_state & CSE_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->extra_state &= ~CSE_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (!(com->state & CS_BUSY)) {
tp->t_state &= ~TS_BUSY;
ttwwakeup(com->tp);
@@ -1801,10 +1845,13 @@ repeat:
}
}
if (com->state & CS_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
(*linesw[tp->t_line].l_start)(tp);
}
if (com_events == 0)
@@ -1833,6 +1880,7 @@ comparam(tp, t)
u_char opt;
int s;
int unit;
+ int intrsave;
/* do historical conversions */
if (t->c_ispeed == 0)
@@ -1857,14 +1905,9 @@ comparam(tp, t)
else
(void)commctl(com, TIOCM_DTR, DMBIS);
- /*
- * This returns with interrupts disabled so that we can complete
- * the speed change atomically.
- */
(void) siosetwater(com, t->c_ispeed);
/* XXX we don't actually change the speed atomically. */
- enable_intr();
if (idivisor != 0) {
cd_setreg(com, CD1400_RBPR, idivisor);
@@ -1985,12 +2028,15 @@ comparam(tp, t)
if (cflag & CCTS_OFLOW)
opt |= CD1400_COR2_CCTS_OFLOW;
#endif
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (opt != com->cor[1]) {
cor_change |= CD1400_CCR_COR2;
cd_setreg(com, CD1400_COR2, com->cor[1] = opt);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
/*
* set channel option register 3 -
@@ -2111,7 +2157,9 @@ comparam(tp, t)
* XXX should have done this long ago, but there is too much state
* to change all atomically.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->state &= ~CS_TTGO;
if (!(tp->t_state & TS_TTSTOP))
@@ -2177,7 +2225,8 @@ comparam(tp, t)
| CD1400_SRER_TXMPTY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
splx(s);
comstart(tp);
if (com->ibufold != NULL) {
@@ -2196,6 +2245,7 @@ siosetwater(com, speed)
u_char *ibuf;
int ibufsize;
struct tty *tp;
+ int intrsave;
/*
* Make the buffer size large enough to handle a softtty interrupt
@@ -2207,7 +2257,6 @@ siosetwater(com, speed)
for (ibufsize = 128; ibufsize < cp4ticks;)
ibufsize <<= 1;
if (ibufsize == com->ibufsize) {
- disable_intr();
return (0);
}
@@ -2217,7 +2266,6 @@ siosetwater(com, speed)
*/
ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT);
if (ibuf == NULL) {
- disable_intr();
return (ENOMEM);
}
@@ -2235,7 +2283,9 @@ siosetwater(com, speed)
* Read current input buffer, if any. Continue with interrupts
* disabled.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->iptr != com->ibuf)
sioinput(com);
@@ -2254,6 +2304,9 @@ siosetwater(com, speed)
com->ibufend = ibuf + ibufsize;
com->ierroff = ibufsize;
com->ihighwater = ibuf + 3 * ibufsize / 4;
+
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2267,6 +2320,7 @@ comstart(tp)
bool_t started;
#endif
int unit;
+ int intrsave;
unit = DEV_TO_UNIT(tp->t_dev);
com = com_addr(unit);
@@ -2277,7 +2331,9 @@ comstart(tp)
started = FALSE;
#endif
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (tp->t_state & TS_TTSTOP) {
com->state &= ~CS_TTGO;
if (com->intr_enable & CD1400_SRER_TXRDY)
@@ -2313,7 +2369,8 @@ comstart(tp)
com->mcr_image |= com->mcr_rts);
#endif
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
ttwwakeup(tp);
splx(s);
@@ -2332,7 +2389,9 @@ comstart(tp)
sizeof com->obuf1);
com->obufs[0].l_next = NULL;
com->obufs[0].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2351,7 +2410,8 @@ comstart(tp)
& ~CD1400_SRER_TXMPTY)
| CD1400_SRER_TXRDY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) {
#ifdef CyDebug
@@ -2362,7 +2422,9 @@ comstart(tp)
sizeof com->obuf2);
com->obufs[1].l_next = NULL;
com->obufs[1].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2381,7 +2443,8 @@ comstart(tp)
& ~CD1400_SRER_TXMPTY)
| CD1400_SRER_TXRDY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
tp->t_state |= TS_BUSY;
}
@@ -2390,10 +2453,13 @@ comstart(tp)
++com->start_real;
#endif
#if 0
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state >= (CS_BUSY | CS_TTGO))
siointr1(com); /* fake interrupt to start output */
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif
ttwwakeup(tp);
splx(s);
@@ -2406,10 +2472,13 @@ comstop(tp, rw)
{
struct com_s *com;
bool_t wakeup_etc;
+ int intrsave;
com = com_addr(DEV_TO_UNIT(tp->t_dev));
wakeup_etc = FALSE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (rw & FWRITE) {
com->obufs[0].l_queued = FALSE;
com->obufs[1].l_queued = FALSE;
@@ -2432,7 +2501,8 @@ comstop(tp, rw)
com_events -= (com->iptr - com->ibuf);
com->iptr = com->ibuf;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (wakeup_etc)
wakeup(&com->etc);
if (rw & FWRITE && com->etc == ETC_NONE)
@@ -2448,6 +2518,7 @@ commctl(com, bits, how)
{
int mcr;
int msr;
+ int intrsave;
if (how == DMGET) {
if (com->channel_control & CD1400_CCR_RCVEN)
@@ -2485,7 +2556,9 @@ commctl(com, bits, how)
mcr |= com->mcr_dtr;
if (bits & TIOCM_RTS)
mcr |= com->mcr_rts;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
switch (how) {
case DMSET:
com->mcr_image = mcr;
@@ -2503,7 +2576,8 @@ commctl(com, bits, how)
cd_setreg(com, CD1400_MSVR2, mcr);
break;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2565,9 +2639,14 @@ comwakeup(chan)
com = com_addr(unit);
if (com != NULL
&& (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) {
+ int intrsave;
+
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
siointr1(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
}
#endif
@@ -2587,11 +2666,15 @@ comwakeup(chan)
for (errnum = 0; errnum < CE_NTYPES; ++errnum) {
u_int delta;
u_long total;
+ int intrsave;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
delta = com->delta_error_counts[errnum];
com->delta_error_counts[errnum] = 0;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta == 0)
continue;
total = com->error_counts[errnum] += delta;
@@ -2743,6 +2826,8 @@ cd_etc(com, etc)
struct com_s *com;
int etc;
{
+ int intrsave;
+
/*
* We can't change the hardware's ETC state while there are any
* characters in the tx fifo, since those characters would be
@@ -2754,26 +2839,28 @@ cd_etc(com, etc)
* for the tx to become empty so that the command is sure to be
* executed soon after we issue it.
*/
+ intrsave = save_intr();
disable_intr();
- if (com->etc == etc) {
- enable_intr();
+ COM_LOCK();
+ if (com->etc == etc)
goto wait;
- }
if ((etc == CD1400_ETC_SENDBREAK
&& (com->etc == ETC_BREAK_STARTING
|| com->etc == ETC_BREAK_STARTED))
|| (etc == CD1400_ETC_STOPBREAK
&& (com->etc == ETC_BREAK_ENDING || com->etc == ETC_BREAK_ENDED
|| com->etc == ETC_NONE))) {
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return;
}
com->etc = etc;
cd_setreg(com, CD1400_SRER,
com->intr_enable
= (com->intr_enable & ~CD1400_SRER_TXRDY) | CD1400_SRER_TXMPTY);
- enable_intr();
wait:
+ COM_UNLOCK();
+ restore_intr(intrsave);
while (com->etc == etc
&& tsleep(&com->etc, TTIPRI | PCATCH, "cyetc", 0) == 0)
continue;
@@ -2787,7 +2874,7 @@ cd_getreg(com, reg)
struct com_s *basecom;
u_char car;
int cy_align;
- u_long ef;
+ int intrsave;
cy_addr iobase;
int val;
@@ -2795,14 +2882,16 @@ cd_getreg(com, reg)
car = com->unit & CD1400_CAR_CHAN;
cy_align = com->cy_align;
iobase = com->iobase;
- ef = read_eflags();
- if (ef & PSL_I)
- disable_intr();
+ intrsave = save_intr();
+ disable_intr();
+ if (intrsave & PSL_I)
+ COM_LOCK();
if (basecom->car != car)
cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car);
val = cd_inb(iobase, reg, cy_align);
- if (ef & PSL_I)
- enable_intr();
+ if (intrsave & PSL_I)
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (val);
}
@@ -2815,21 +2904,23 @@ cd_setreg(com, reg, val)
struct com_s *basecom;
u_char car;
int cy_align;
- u_long ef;
+ int intrsave;
cy_addr iobase;
basecom = com_addr(com->unit & ~(CD1400_NO_OF_CHANNELS - 1));
car = com->unit & CD1400_CAR_CHAN;
cy_align = com->cy_align;
iobase = com->iobase;
- ef = read_eflags();
- if (ef & PSL_I)
- disable_intr();
+ intrsave = save_intr();
+ disable_intr();
+ if (intrsave & PSL_I)
+ COM_LOCK();
if (basecom->car != car)
cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car);
cd_outb(iobase, reg, cy_align, val);
- if (ef & PSL_I)
- enable_intr();
+ if (intrsave & PSL_I)
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
#ifdef CyDebug
diff --git a/sys/dev/sio/sio.c b/sys/dev/sio/sio.c
index 2725a20..a6f05e7 100644
--- a/sys/dev/sio/sio.c
+++ b/sys/dev/sio/sio.c
@@ -95,16 +95,12 @@
#endif
#include <isa/ic/ns16550.h>
+/* XXX - this is ok because we only do sio fast interrupts on i386 */
#ifndef __i386__
#define disable_intr()
#define enable_intr()
#endif
-#ifdef SMP
-#define disable_intr() COM_DISABLE_INTR()
-#define enable_intr() COM_ENABLE_INTR()
-#endif /* SMP */
-
#define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */
#define CALLOUT_MASK 0x80
@@ -760,6 +756,7 @@ sioprobe(dev, xrid)
u_int flags = device_get_flags(dev);
int rid;
struct resource *port;
+ int intrsave;
rid = xrid;
port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid,
@@ -856,7 +853,9 @@ sioprobe(dev, xrid)
* but mask them in the processor as well in case there are some
* (misconfigured) shared interrupts.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
/* EXTRA DELAY? */
/*
@@ -953,7 +952,8 @@ sioprobe(dev, xrid)
CLR_FLAG(dev, COM_C_IIR_TXRDYBUG);
}
sio_setreg(com, com_cfcr, CFCR_8BITS);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
return (iobase == siocniobase ? 0 : result);
}
@@ -993,7 +993,8 @@ sioprobe(dev, xrid)
irqmap[3] = isa_irq_pending();
failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
irqs = irqmap[1] & ~irqmap[0];
if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 &&
@@ -1181,7 +1182,6 @@ sioattach(dev, xrid)
} else
com->it_in.c_ispeed = com->it_in.c_ospeed = TTYDEF_SPEED;
if (siosetwater(com, com->it_in.c_ispeed) != 0) {
- enable_intr();
/*
* Leave i/o resources allocated if this is a `cn'-level
* console, so that other devices can't snarf them.
@@ -1190,7 +1190,6 @@ sioattach(dev, xrid)
bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
return (ENOMEM);
}
- enable_intr();
termioschars(&com->it_in);
com->it_out = com->it_in;
@@ -1340,7 +1339,7 @@ determined_type: ;
RF_ACTIVE);
if (com->irqres) {
ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres,
- INTR_TYPE_TTY | INTR_TYPE_FAST,
+ INTR_TYPE_TTY | INTR_FAST,
siointr, com, &com->cookie);
if (ret) {
ret = BUS_SETUP_INTR(device_get_parent(dev), dev,
@@ -1424,6 +1423,8 @@ open_top:
goto out;
}
} else {
+ int intrsave;
+
/*
* The device isn't open, so there are no conflicts.
* Initialize it. Initialization is done twice in many
@@ -1483,7 +1484,9 @@ open_top:
}
}
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
(void) inb(com->line_status_port);
(void) inb(com->data_port);
com->prev_modem_status = com->last_modem_status
@@ -1495,7 +1498,8 @@ open_top:
outb(com->intr_ctl_port, IER_ERXRDY | IER_ETXRDY
| IER_ERLS | IER_EMSC);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
/*
* Handle initial DCD. Callout devices get a fake initial
* DCD (trapdoor DCD). If we are callout, then any sleeping
@@ -1716,6 +1720,9 @@ siodtrwakeup(chan)
wakeup(&com->dtr_wait);
}
+/*
+ * Call this function with COM_LOCK. It will return with the lock still held.
+ */
static void
sioinput(com)
struct com_s *com;
@@ -1725,6 +1732,7 @@ sioinput(com)
u_char line_status;
int recv_data;
struct tty *tp;
+ int intrsave;
buf = com->ibuf;
tp = com->tp;
@@ -1742,6 +1750,13 @@ sioinput(com)
* call overhead).
*/
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
incc = com->iptr - buf;
if (tp->t_rawq.c_cc + incc > tp->t_ihiwat
@@ -1763,10 +1778,18 @@ sioinput(com)
tp->t_lflag &= ~FLUSHO;
comstart(tp);
}
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
} else {
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
line_status = buf[com->ierroff];
recv_data = *buf++;
@@ -1782,7 +1805,8 @@ sioinput(com)
recv_data |= TTY_PE;
}
(*linesw[tp->t_line].l_rint)(recv_data, tp);
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
}
com_events -= (com->iptr - com->ibuf);
@@ -1893,12 +1917,16 @@ siointr1(com)
if (recv_data == KEY_CR) {
brk_state1 = recv_data;
brk_state2 = 0;
- } else if (brk_state1 == KEY_CR && (recv_data == KEY_TILDE || recv_data == KEY_CRTLB)) {
+ } else if (brk_state1 == KEY_CR
+ && (recv_data == KEY_TILDE
+ || recv_data == KEY_CRTLB)) {
if (recv_data == KEY_TILDE)
brk_state2 = recv_data;
- else if (brk_state2 == KEY_TILDE && recv_data == KEY_CRTLB) {
+ else if (brk_state2 == KEY_TILDE
+ && recv_data == KEY_CRTLB) {
breakpoint();
- brk_state1 = brk_state2 = 0;
+ brk_state1 = 0;
+ brk_state2 = 0;
goto cont;
} else
brk_state2 = 0;
@@ -1949,7 +1977,10 @@ siointr1(com)
if (com->do_timestamp)
microtime(&com->timestamp);
++com_events;
+/* XXX - needs to go away when alpha gets ithreads */
+#ifdef __alpha__
schedsofttty();
+#endif
#if 0 /* for testing input latency vs efficiency */
if (com->iptr - com->ibuf == 8)
setsofttty();
@@ -2217,10 +2248,12 @@ sioioctl(dev, cmd, data, flag, p)
return (0);
}
+/* software interrupt handler for SWI_TTY */
static void
siopoll()
{
int unit;
+ int intrsave;
if (com_events == 0)
return;
@@ -2239,7 +2272,9 @@ repeat:
* Discard any events related to never-opened or
* going-away devices.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
incc = com->iptr - com->ibuf;
com->iptr = com->ibuf;
if (com->state & CS_CHECKMSR) {
@@ -2247,33 +2282,43 @@ repeat:
com->state &= ~CS_CHECKMSR;
}
com_events -= incc;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
continue;
}
if (com->iptr != com->ibuf) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
sioinput(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (com->state & CS_CHECKMSR) {
u_char delta_modem_status;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
delta_modem_status = com->last_modem_status
^ com->prev_modem_status;
com->prev_modem_status = com->last_modem_status;
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_CHECKMSR;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta_modem_status & MSR_DCD)
(*linesw[tp->t_line].l_modem)
(tp, com->prev_modem_status & MSR_DCD);
}
if (com->state & CS_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (!(com->state & CS_BUSY)
&& !(com->extra_state & CSE_BUSYCHECK)) {
timeout(siobusycheck, com, hz / 100);
@@ -2301,6 +2346,7 @@ comparam(tp, t)
u_char dlbl;
int s;
int unit;
+ int intrsave;
/* do historical conversions */
if (t->c_ispeed == 0)
@@ -2367,11 +2413,10 @@ comparam(tp, t)
sio_setreg(com, com_fifo, com->fifo_image);
}
- /*
- * This returns with interrupts disabled so that we can complete
- * the speed change atomically. Keeping interrupts disabled is
- * especially important while com_data is hidden.
- */
+ intrsave = save_intr();
+ disable_intr();
+ COM_LOCK();
+
(void) siosetwater(com, t->c_ispeed);
if (divisor != 0) {
@@ -2459,7 +2504,8 @@ comparam(tp, t)
if (com->state >= (CS_BUSY | CS_TTGO))
siointr1(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
splx(s);
comstart(tp);
if (com->ibufold != NULL) {
@@ -2478,6 +2524,7 @@ siosetwater(com, speed)
u_char *ibuf;
int ibufsize;
struct tty *tp;
+ int intrsave;
/*
* Make the buffer size large enough to handle a softtty interrupt
@@ -2488,20 +2535,16 @@ siosetwater(com, speed)
cp4ticks = speed / 10 / hz * 4;
for (ibufsize = 128; ibufsize < cp4ticks;)
ibufsize <<= 1;
- if (ibufsize == com->ibufsize) {
- disable_intr();
+ if (ibufsize == com->ibufsize)
return (0);
- }
/*
* Allocate input buffer. The extra factor of 2 in the size is
* to allow for an error byte for each input byte.
*/
ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT);
- if (ibuf == NULL) {
- disable_intr();
+ if (ibuf == NULL)
return (ENOMEM);
- }
/* Initialize non-critical variables. */
com->ibufold = com->ibuf;
@@ -2517,7 +2560,9 @@ siosetwater(com, speed)
* Read current input buffer, if any. Continue with interrupts
* disabled.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->iptr != com->ibuf)
sioinput(com);
@@ -2536,6 +2581,8 @@ siosetwater(com, speed)
com->ibufend = ibuf + ibufsize;
com->ierroff = ibufsize;
com->ihighwater = ibuf + 3 * ibufsize / 4;
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2546,13 +2593,16 @@ comstart(tp)
struct com_s *com;
int s;
int unit;
+ int intrsave;
unit = DEV_TO_UNIT(tp->t_dev);
com = com_addr(unit);
if (com == NULL)
return;
s = spltty();
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (tp->t_state & TS_TTSTOP)
com->state &= ~CS_TTGO;
else
@@ -2565,7 +2615,8 @@ comstart(tp)
&& com->state & CS_RTS_IFLOW)
outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
ttwwakeup(tp);
splx(s);
@@ -2581,7 +2632,9 @@ comstart(tp)
sizeof com->obuf1);
com->obufs[0].l_next = NULL;
com->obufs[0].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2593,7 +2646,8 @@ comstart(tp)
com->obufq.l_next = &com->obufs[0];
com->state |= CS_BUSY;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) {
com->obufs[1].l_tail
@@ -2601,7 +2655,9 @@ comstart(tp)
sizeof com->obuf2);
com->obufs[1].l_next = NULL;
com->obufs[1].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2613,14 +2669,18 @@ comstart(tp)
com->obufq.l_next = &com->obufs[1];
com->state |= CS_BUSY;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
tp->t_state |= TS_BUSY;
}
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state >= (CS_BUSY | CS_TTGO))
siointr1(com); /* fake interrupt to start output */
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
ttwwakeup(tp);
splx(s);
}
@@ -2631,11 +2691,14 @@ comstop(tp, rw)
int rw;
{
struct com_s *com;
+ int intrsave;
com = com_addr(DEV_TO_UNIT(tp->t_dev));
if (com == NULL || com->gone)
return;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (rw & FWRITE) {
if (com->hasfifo)
#ifdef COM_ESP
@@ -2662,7 +2725,8 @@ comstop(tp, rw)
com_events -= (com->iptr - com->ibuf);
com->iptr = com->ibuf;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
comstart(tp);
}
@@ -2674,6 +2738,7 @@ commctl(com, bits, how)
{
int mcr;
int msr;
+ int intrsave;
if (how == DMGET) {
bits = TIOCM_LE; /* XXX - always enabled while open */
@@ -2705,7 +2770,9 @@ commctl(com, bits, how)
mcr |= MCR_RTS;
if (com->gone)
return(0);
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
switch (how) {
case DMSET:
outb(com->modem_ctl_port,
@@ -2718,7 +2785,8 @@ commctl(com, bits, how)
outb(com->modem_ctl_port, com->mcr_image &= ~mcr);
break;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2766,6 +2834,7 @@ comwakeup(chan)
{
struct com_s *com;
int unit;
+ int intrsave;
sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout);
@@ -2777,9 +2846,12 @@ comwakeup(chan)
com = com_addr(unit);
if (com != NULL && !com->gone
&& (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
siointr1(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
}
@@ -2801,10 +2873,13 @@ comwakeup(chan)
u_int delta;
u_long total;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
delta = com->delta_error_counts[errnum];
com->delta_error_counts[errnum] = 0;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta == 0)
continue;
total = com->error_counts[errnum] += delta;
diff --git a/sys/fs/cd9660/cd9660_util.c b/sys/fs/cd9660/cd9660_util.c
index 2a11dc2..d0f2e1c 100644
--- a/sys/fs/cd9660/cd9660_util.c
+++ b/sys/fs/cd9660/cd9660_util.c
@@ -41,6 +41,7 @@
*/
#include <sys/param.h>
+#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s
index 2a7559d..54bf003 100644
--- a/sys/i386/i386/apic_vector.s
+++ b/sys/i386/i386/apic_vector.s
@@ -17,7 +17,7 @@
/*
- * Macros for interrupt interrupt entry, call to handler, and exit.
+ * Macros for interrupt entry, call to handler, and exit.
*/
#define FAST_INTR(irq_num, vec_name) \
@@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \
/*
- * Test to see if the source is currntly masked, clear if so.
+ * Test to see if the source is currently masked, clear if so.
*/
#define UNMASK_IRQ(irq_num) \
IMASK_LOCK ; /* into critical reg */ \
@@ -200,7 +200,16 @@ log_intr_event:
#else
#define APIC_ITRACE(name, irq_num, id)
#endif
-
+
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \
maybe_extra_ipending ; \
; \
APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
; \
MASK_LEVEL_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- MP_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f ; /* no */ \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
-; \
incb _intr_nesting_level ; \
; \
/* entry point used by doreti_unpend for HWIs. */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- movl _cpl, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cpl ; \
- lock ; \
- andl $~IRQ_BIT(irq_num), _ipending ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
+ pushl $irq_num; /* pass the IRQ */ \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
MEXITCOUNT ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; /* XXX: iactive bit might be 0 now */ \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl, leave iactive set */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- MP_RELLOCK ; \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- POP_FRAME ; /* and return */ \
- iret
+ jmp doreti_next
/*
* Handle "spurious INTerrupts".
@@ -434,20 +380,10 @@ _Xcpuast:
FAKE_MCOUNT(13*4(%esp))
- /*
- * Giant locks do not come cheap.
- * A lot of cycles are going to be wasted here.
- */
- call _get_mplock
-
- movl _cpl, %eax
- pushl %eax
orl $AST_PENDING, _astpending /* XXX */
incb _intr_nesting_level
sti
- pushl $0
-
movl _cpuid, %eax
lock
btrl %eax, _checkstate_pending_ast
@@ -461,7 +397,7 @@ _Xcpuast:
lock
incl CNAME(cpuast_cnt)
MEXITCOUNT
- jmp _doreti
+ jmp doreti_next
1:
/* We are already in the process of delivering an ast for this CPU */
POP_FRAME
@@ -487,40 +423,24 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- MP_TRYLOCK
- testl %eax,%eax /* Did we get the lock ? */
- jz 1f /* No */
-
lock
incl CNAME(forward_irq_hitcnt)
cmpb $4, _intr_nesting_level
- jae 2f
+ jae 1f
- movl _cpl, %eax
- pushl %eax
incb _intr_nesting_level
sti
- pushl $0
-
MEXITCOUNT
- jmp _doreti /* Handle forwarded interrupt */
+ jmp doreti_next /* Handle forwarded interrupt */
1:
lock
- incl CNAME(forward_irq_misscnt)
- call forward_irq /* Oops, we've lost the isr lock */
- MEXITCOUNT
- POP_FRAME
- iret
-2:
- lock
incl CNAME(forward_irq_toodeepcnt)
-3:
- MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
+#if 0
/*
*
*/
@@ -532,9 +452,11 @@ forward_irq:
cmpl $0, CNAME(forward_irq_enabled)
jz 4f
+/* XXX - this is broken now, because mp_lock doesn't exist
movl _mp_lock,%eax
cmpl $FREE_LOCK,%eax
jne 1f
+ */
movl $0, %eax /* Pick CPU #0 if noone has lock */
1:
shrl $24,%eax
@@ -559,6 +481,7 @@ forward_irq:
jnz 3b
4:
ret
+#endif
/*
* Executed by a CPU when it receives an Xcpustop IPI from another CPU,
@@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr)
FAST_INTR(22,fastintr22)
FAST_INTR(23,fastintr23)
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, CLKINTR_PENDING)
INTR(1,intr1,)
INTR(2,intr2,)
@@ -728,15 +652,11 @@ _ihandlers:
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
+#if 0
/* active flag for lazy masking */
iactive:
.long 0
+#endif
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
diff --git a/sys/i386/i386/autoconf.c b/sys/i386/i386/autoconf.c
index b209065..4edda4b 100644
--- a/sys/i386/i386/autoconf.c
+++ b/sys/i386/i386/autoconf.c
@@ -163,14 +163,6 @@ configure(dummy)
* XXX this is slightly misplaced.
*/
spl0();
-
- /*
- * Allow lowering of the ipl to the lowest kernel level if we
- * panic (or call tsleep() before clearing `cold'). No level is
- * completely safe (since a panic may occur in a critical region
- * at splhigh()), but we want at least bio interrupts to work.
- */
- safepri = cpl;
}
static void
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index acb8b40..9e77114 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -38,6 +38,7 @@
#include <machine/asmacros.h>
#include <machine/ipl.h>
#include <machine/lock.h>
+#include <machine/mutex.h>
#include <machine/psl.h>
#include <machine/trap.h>
#ifdef SMP
@@ -175,20 +176,12 @@ IDTVEC(fpu)
mov %ax,%fs
FAKE_MCOUNT(13*4(%esp))
-#ifdef SMP
MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%eax
- pushl %eax /* save original cpl */
- pushl $0 /* dummy unit to finish intr frame */
-#else /* SMP */
- movl _cpl,%eax
- pushl %eax
pushl $0 /* dummy unit to finish intr frame */
- incl _cnt+V_TRAP
-#endif /* SMP */
+ call __mtx_enter_giant_def
call _npx_intr
+ call __mtx_exit_giant_def
incb _intr_nesting_level
MEXITCOUNT
@@ -205,9 +198,6 @@ IDTVEC(align)
* gate (TGT), else disabled if this was an interrupt gate (IGT).
* Note that int0x80_syscall is a trap gate. Only page faults
* use an interrupt gate.
- *
- * Note that all calls to MP_LOCK must occur with interrupts enabled
- * in order to be able to take IPI's while waiting for the lock.
*/
SUPERALIGN_TEXT
@@ -227,16 +217,12 @@ alltraps_with_regs_pushed:
FAKE_MCOUNT(13*4(%esp))
calltrap:
FAKE_MCOUNT(_btrap) /* init "from" _btrap -> calltrap */
- MPLOCKED incl _cnt+V_TRAP
- MP_LOCK
- movl _cpl,%ebx /* keep orig. cpl here during trap() */
call _trap
/*
* Return via _doreti to handle ASTs. Have to change trap frame
* to interrupt frame.
*/
- pushl %ebx /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
incb _intr_nesting_level
MEXITCOUNT
@@ -274,16 +260,11 @@ IDTVEC(syscall)
movl %eax,TF_EFLAGS(%esp)
movl $7,TF_ERR(%esp) /* sizeof "lcall 7,0" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
@@ -312,21 +293,18 @@ IDTVEC(int0x80_syscall)
mov %ax,%fs
movl $2,TF_ERR(%esp) /* sizeof "int 0x80" */
FAKE_MCOUNT(13*4(%esp))
- MPLOCKED incl _cnt+V_SYSCALL
call _syscall2
MEXITCOUNT
cli /* atomic astpending access */
- cmpl $0,_astpending
- je doreti_syscall_ret
-#ifdef SMP
- MP_LOCK
-#endif
- pushl $0 /* cpl to restore */
+ cmpl $0,_astpending /* AST pending? */
+ je doreti_syscall_ret /* no, get out of here */
subl $4,%esp /* dummy unit for interrupt frame */
movb $1,_intr_nesting_level
jmp _doreti
ENTRY(fork_trampoline)
+ MTX_EXIT(_sched_lock, %ecx)
+ sti
call _spl0
#ifdef SMP
@@ -355,7 +333,6 @@ ENTRY(fork_trampoline)
/*
* Return via _doreti to handle ASTs.
*/
- pushl $0 /* cpl to restore */
subl $4,%esp /* dummy unit to finish intr frame */
movb $1,_intr_nesting_level
MEXITCOUNT
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index 60accd1..78c6075 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -51,6 +51,10 @@
#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/resourcevar.h>
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
#include <machine/frame.h>
#include <machine/bootinfo.h>
#include <machine/tss.h>
@@ -73,6 +77,7 @@
#include <machine/sigframe.h>
#include <machine/globaldata.h>
#include <machine/vm86.h>
+#include <machine/mutex.h>
ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
@@ -127,9 +132,7 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
-#ifdef SMP
-ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest));
-#endif
+ASSYM(PCB_SCHEDNEST, offsetof(struct pcb, pcb_schednest));
ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
@@ -170,7 +173,9 @@ ASSYM(BI_ESYMTAB, offsetof(struct bootinfo, bi_esymtab));
ASSYM(BI_KERNEND, offsetof(struct bootinfo, bi_kernend));
ASSYM(GD_SIZEOF, sizeof(struct globaldata));
ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
+ASSYM(GD_PREVPROC, offsetof(struct globaldata, gd_prevproc));
ASSYM(GD_NPXPROC, offsetof(struct globaldata, gd_npxproc));
+ASSYM(GD_IDLEPROC, offsetof(struct globaldata, gd_idleproc));
ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
ASSYM(GD_COMMON_TSS, offsetof(struct globaldata, gd_common_tss));
ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
@@ -178,11 +183,21 @@ ASSYM(GD_SWITCHTICKS, offsetof(struct globaldata, gd_switchticks));
ASSYM(GD_COMMON_TSSD, offsetof(struct globaldata, gd_common_tssd));
ASSYM(GD_TSS_GDT, offsetof(struct globaldata, gd_tss_gdt));
ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
+ASSYM(GD_INTR_NESTING_LEVEL, offsetof(struct globaldata, gd_intr_nesting_level));
#ifdef USER_LDT
ASSYM(GD_CURRENTLDT, offsetof(struct globaldata, gd_currentldt));
#endif
+ASSYM(GD_WITNESS_SPIN_CHECK, offsetof(struct globaldata, gd_witness_spin_check));
+
+/* XXX */
+#ifdef KTR_PERCPU
+ASSYM(GD_KTR_IDX, offsetof(struct globaldata, gd_ktr_idx));
+ASSYM(GD_KTR_BUF, offsetof(struct globaldata, gd_ktr_buf));
+ASSYM(GD_KTR_BUF_DATA, offsetof(struct globaldata, gd_ktr_buf_data));
+#endif
+
#ifdef SMP
ASSYM(GD_CPUID, offsetof(struct globaldata, gd_cpuid));
ASSYM(GD_CPU_LOCKID, offsetof(struct globaldata, gd_cpu_lockid));
@@ -211,3 +226,9 @@ ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse));
+ASSYM(MTX_SAVEFL, offsetof(struct mtx, mtx_savefl));
+
+ASSYM(MTX_UNOWNED, MTX_UNOWNED);
diff --git a/sys/i386/i386/globals.s b/sys/i386/i386/globals.s
index 31fbfd5..f318142 100644
--- a/sys/i386/i386/globals.s
+++ b/sys/i386/i386/globals.s
@@ -61,44 +61,74 @@ globaldata:
#else
.set globaldata,0
#endif
- .globl gd_curproc, gd_curpcb, gd_npxproc, gd_astpending
- .globl gd_common_tss, gd_switchtime, gd_switchticks
+ .globl gd_curproc, gd_prevproc, gd_curpcb, gd_npxproc, gd_idleproc
+ .globl gd_astpending, gd_common_tss, gd_switchtime, gd_switchticks
+ .globl gd_intr_nesting_level
.set gd_curproc,globaldata + GD_CURPROC
+ .set gd_prevproc,globaldata + GD_PREVPROC
.set gd_astpending,globaldata + GD_ASTPENDING
.set gd_curpcb,globaldata + GD_CURPCB
.set gd_npxproc,globaldata + GD_NPXPROC
+ .set gd_idleproc,globaldata + GD_IDLEPROC
.set gd_common_tss,globaldata + GD_COMMON_TSS
.set gd_switchtime,globaldata + GD_SWITCHTIME
.set gd_switchticks,globaldata + GD_SWITCHTICKS
+ .set gd_intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL
.globl gd_common_tssd, gd_tss_gdt
.set gd_common_tssd,globaldata + GD_COMMON_TSSD
.set gd_tss_gdt,globaldata + GD_TSS_GDT
+ .globl gd_witness_spin_check
+ .set gd_witness_spin_check, globaldata + GD_WITNESS_SPIN_CHECK
+
#ifdef USER_LDT
.globl gd_currentldt
.set gd_currentldt,globaldata + GD_CURRENTLDT
#endif
+/* XXX - doesn't work yet */
+#ifdef KTR_PERCPU
+ .globl gd_ktr_idx, gd_ktr_buf, gd_ktr_buf_data
+ .set gd_ktr_idx,globaldata + GD_KTR_IDX
+ .set gd_ktr_buf,globaldata + GD_KTR_BUF
+ .set gd_ktr_buf_data,globaldata + GD_KTR_BUF_DATA
+#endif
+
#ifndef SMP
- .globl _curproc, _curpcb, _npxproc, _astpending
- .globl _common_tss, _switchtime, _switchticks
+ .globl _curproc, _prevproc, _curpcb, _npxproc, _idleproc,
+ .globl _astpending, _common_tss, _switchtime, _switchticks
+ .global _intr_nesting_level
.set _curproc,globaldata + GD_CURPROC
+ .set _prevproc,globaldata + GD_PREVPROC
.set _astpending,globaldata + GD_ASTPENDING
.set _curpcb,globaldata + GD_CURPCB
.set _npxproc,globaldata + GD_NPXPROC
+ .set _idleproc,globaldata + GD_IDLEPROC
.set _common_tss,globaldata + GD_COMMON_TSS
.set _switchtime,globaldata + GD_SWITCHTIME
.set _switchticks,globaldata + GD_SWITCHTICKS
+ .set _intr_nesting_level,globaldata + GD_INTR_NESTING_LEVEL
.globl _common_tssd, _tss_gdt
.set _common_tssd,globaldata + GD_COMMON_TSSD
.set _tss_gdt,globaldata + GD_TSS_GDT
+ .globl _witness_spin_check
+ .set _witness_spin_check,globaldata + GD_WITNESS_SPIN_CHECK
+
#ifdef USER_LDT
.globl _currentldt
.set _currentldt,globaldata + GD_CURRENTLDT
#endif
+
+/* XXX - doesn't work yet */
+#ifdef KTR_PERCPU
+ .globl _ktr_idx, _ktr_buf, _ktr_buf_data
+ .set _ktr_idx,globaldata + GD_KTR_IDX
+ .set _ktr_buf,globaldata + GD_KTR_BUF
+ .set _ktr_buf_data,globaldata + GD_KTR_BUF_DATA
+#endif
#endif
#ifdef SMP
diff --git a/sys/i386/i386/i386-gdbstub.c b/sys/i386/i386/i386-gdbstub.c
index 986b8d4..b442a37 100644
--- a/sys/i386/i386/i386-gdbstub.c
+++ b/sys/i386/i386/i386-gdbstub.c
@@ -188,7 +188,8 @@ getpacket (char *buffer)
unsigned char ch;
int s;
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/* wait around for the start character, ignore all other characters */
@@ -239,7 +240,7 @@ getpacket (char *buffer)
}
}
while (checksum != xmitcsum);
- splx (s);
+ write_eflags(s);
}
/* send the packet in buffer. */
@@ -253,7 +254,8 @@ putpacket (char *buffer)
int s;
/* $<packet info>#<checksum>. */
- s = spltty ();
+ s = read_eflags();
+ disable_intr();
do
{
/*
@@ -285,7 +287,7 @@ putpacket (char *buffer)
putDebugChar (hexchars[checksum & 0xf]);
}
while ((getDebugChar () & 0x7f) != '+');
- splx (s);
+ write_eflags(s);
}
static char remcomInBuffer[BUFMAX];
diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c
index 0e11e2b..71ecd63 100644
--- a/sys/i386/i386/identcpu.c
+++ b/sys/i386/i386/identcpu.c
@@ -42,6 +42,7 @@
#include "opt_cpu.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -53,6 +54,8 @@
#include <machine/specialreg.h>
#include <machine/md_var.h>
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#define IDENTBLUE_CYRIX486 0
diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c
index be86c65..b9395bf 100644
--- a/sys/i386/i386/initcpu.c
+++ b/sys/i386/i386/initcpu.c
@@ -607,12 +607,14 @@ void
enable_K5_wt_alloc(void)
{
u_int64_t msr;
+ int intrstate;
/*
* Write allocate is supported only on models 1, 2, and 3, with
* a stepping of 4 or greater.
*/
if (((cpu_id & 0xf0) > 0) && ((cpu_id & 0x0f) > 3)) {
+ intrstate = save_intr();
disable_intr();
msr = rdmsr(0x83); /* HWCR */
wrmsr(0x83, msr & !(0x10));
@@ -645,7 +647,7 @@ enable_K5_wt_alloc(void)
msr=rdmsr(0x83);
wrmsr(0x83, msr|0x10); /* enable write allocate */
- enable_intr();
+ restore_intr(intrstate);
}
}
@@ -708,7 +710,6 @@ enable_K6_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
void
@@ -770,7 +771,6 @@ enable_K6_2_wt_alloc(void)
wrmsr(0x0c0000082, whcr);
write_eflags(eflags);
- enable_intr();
}
#endif /* I585_CPU && CPU_WT_ALLOC */
diff --git a/sys/i386/i386/legacy.c b/sys/i386/i386/legacy.c
index 8a30770..5b6cdbc 100644
--- a/sys/i386/i386/legacy.c
+++ b/sys/i386/i386/legacy.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s
index bddd7d5..fa95fb0 100644
--- a/sys/i386/i386/locore.s
+++ b/sys/i386/i386/locore.s
@@ -862,9 +862,6 @@ map_read_write:
movl $(NPTEPG-1), %ebx /* pte offset = NTEPG-1 */
movl $1, %ecx /* one private pt coming right up */
fillkpt(R(SMPptpa), $PG_RW)
-
-/* Initialize mp lock to allow early traps */
- movl $1, R(_mp_lock)
#endif /* SMP */
/* install a pde for temporary double map of bottom of VA */
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 6edecf0..875c9d5 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -58,6 +58,7 @@
#include <sys/sysproto.h>
#include <sys/signalvar.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/linker.h>
#include <sys/malloc.h>
#include <sys/proc.h>
@@ -98,10 +99,12 @@
#include <machine/bootinfo.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb_ext.h> /* pcb.h included via sys/user.h */
+#include <machine/globaldata.h>
+#include <machine/globals.h>
#ifdef SMP
#include <machine/smp.h>
-#include <machine/globaldata.h>
#endif
#ifdef PERFMON
#include <machine/perfmon.h>
@@ -110,6 +113,7 @@
#ifdef OLD_BUS_ARCH
#include <i386/isa/isa_device.h>
#endif
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#include <isa/rtc.h>
#include <machine/vm86.h>
@@ -247,6 +251,11 @@ vm_offset_t clean_sva, clean_eva;
static vm_offset_t pager_sva, pager_eva;
static struct trapframe proc0_tf;
+struct cpuhead cpuhead;
+
+mtx_t sched_lock;
+mtx_t Giant;
+
#define offsetof(type, member) ((size_t)(&((type *)0)->member))
static void
@@ -431,6 +440,11 @@ again:
bufinit();
vm_pager_bufferinit();
+ SLIST_INIT(&cpuhead);
+ SLIST_INSERT_HEAD(&cpuhead, GLOBALDATA, gd_allcpu);
+
+ mtx_init(&sched_lock, "sched lock", MTX_SPIN);
+
#ifdef SMP
/*
* OK, enough kmem_alloc/malloc state should be up, lets get on with it!
@@ -1817,11 +1831,6 @@ init386(first)
#endif
int off;
- /*
- * Prevent lowering of the ipl if we call tsleep() early.
- */
- safepri = cpl;
-
proc0.p_addr = proc0paddr;
atdevbase = ISA_HOLE_START + KERNBASE;
@@ -1871,6 +1880,10 @@ init386(first)
r_gdt.rd_base = (int) gdt;
lgdt(&r_gdt);
+ /* setup curproc so that mutexes work */
+ PCPU_SET(curproc, &proc0);
+ PCPU_SET(prevproc, &proc0);
+
/* make ldt memory segments */
/*
* The data segment limit must not cover the user area because we
@@ -1953,7 +1966,7 @@ init386(first)
/* make an initial tss so cpu can get interrupt stack on syscall! */
common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE - 16;
- common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ;
+ common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
private_tss = 0;
tss_gdt = &gdt[GPROC0_SEL].sd;
@@ -1974,6 +1987,12 @@ init386(first)
dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+ /*
+ * We grab Giant during the vm86bios routines, so we need to ensure
+ * that it is up and running before we use vm86.
+ */
+ mtx_init(&Giant, "Giant", MTX_DEF);
+
vm86_initialize();
getmemsize(first);
@@ -2009,9 +2028,7 @@ init386(first)
/* setup proc 0's pcb */
proc0.p_addr->u_pcb.pcb_flags = 0;
proc0.p_addr->u_pcb.pcb_cr3 = (int)IdlePTD;
-#ifdef SMP
- proc0.p_addr->u_pcb.pcb_mpnest = 1;
-#endif
+ proc0.p_addr->u_pcb.pcb_schednest = 0;
proc0.p_addr->u_pcb.pcb_ext = 0;
proc0.p_md.md_regs = &proc0_tf;
}
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 61c5ecf..95b5759 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/i386/i386/mpapic.c b/sys/i386/i386/mpapic.c
index a3594a8..3f971d8 100644
--- a/sys/i386/i386/mpapic.c
+++ b/sys/i386/i386/mpapic.c
@@ -28,11 +28,14 @@
#include "opt_smp.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
+#include <sys/proc.h>
#include <machine/smptests.h> /** TEST_TEST1 */
#include <machine/smp.h>
#include <machine/mpapic.h>
+#include <machine/globaldata.h>
#include <machine/segments.h>
#include <i386/isa/intr_machdep.h> /* Xspuriousint() */
diff --git a/sys/i386/i386/mpboot.s b/sys/i386/i386/mpboot.s
index d3602d2..9ede02c 100644
--- a/sys/i386/i386/mpboot.s
+++ b/sys/i386/i386/mpboot.s
@@ -114,43 +114,9 @@ mp_begin: /* now running relocated at KERNBASE */
CHECKPOINT(0x39, 6)
- /* wait till we can get into the kernel */
- call _boot_get_mplock
-
- /* Now, let's prepare for some REAL WORK :-) */
+ /* Now, let's prepare for some REAL WORK :-) This doesn't return. */
call _ap_init
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,-4(%esp)
-2:
- cmpl $0, CNAME(smp_started) /* Wait for last AP to be ready */
- jz 2b
- call _get_mplock
-
- /* let her rip! (loads new stack) */
- jmp _cpu_switch
-
-NON_GPROF_ENTRY(wait_ap)
- pushl %ebp
- movl %esp, %ebp
- call _rel_mplock
- lock /* Avoid livelock (PIII Errata 39) */
- addl $0,0(%esp)
- movl %eax, 8(%ebp)
-1:
- cmpl $0, CNAME(smp_started)
- jnz 2f
- decl %eax
- cmpl $0, %eax
- jge 1b
-2:
- call _get_mplock
- movl %ebp, %esp
- popl %ebp
- ret
-
-
/*
* This is the embedded trampoline or bootstrap that is
* copied into 'real-mode' low memory, it is where the
diff --git a/sys/i386/i386/mplock.s b/sys/i386/i386/mplock.s
deleted file mode 100644
index dc5ba01..0000000
--- a/sys/i386/i386/mplock.s
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * ----------------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you think
- * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
- * ----------------------------------------------------------------------------
- *
- * $FreeBSD$
- *
- * Functions for locking between CPUs in a SMP system.
- *
- * This is an "exclusive counting semaphore". This means that it can be
- * free (0xffffffff) or be owned by a CPU (0xXXYYYYYY where XX is CPU-id
- * and YYYYYY is the count).
- *
- * Contrary to most implementations around, this one is entirely atomic:
- * The attempt to seize/release the semaphore and the increment/decrement
- * is done in one atomic operation. This way we are safe from all kinds
- * of weird reentrancy situations.
- */
-
-#include <machine/asmacros.h>
-#include <machine/smptests.h> /** GRAB_LOPRIO */
-#include <machine/apic.h>
-
-#define GLPROFILE_NOT
-
-#ifdef CHEAP_TPR
-
-/* we assumme that the 'reserved bits' can be written with zeros */
-
-#else /* CHEAP_TPR */
-
-#error HEADS UP: this code needs work
-/*
- * The APIC doc says that reserved bits must be written with whatever
- * value they currently contain, ie you should: read, modify, write,
- * instead of just writing new values to the TPR register. Current
- * silicon seems happy with just writing. If the behaviour of the
- * silicon changes, all code that access the lapic_tpr must be modified.
- * The last version to contain such code was:
- * Id: mplock.s,v 1.17 1997/08/10 20:59:07 fsmp Exp
- */
-
-#endif /* CHEAP_TPR */
-
-#ifdef GRAB_LOPRIO
-/*
- * Claim LOWest PRIOrity, ie. attempt to grab ALL INTerrupts.
- */
-
-/* after 1st acquire of lock we grab all hardware INTs */
-#define GRAB_HWI movl $ALLHWI_LEVEL, lapic_tpr
-
-/* after last release of lock give up LOW PRIO (ie, arbitrate INTerrupts) */
-#define ARB_HWI movl $LOPRIO_LEVEL, lapic_tpr /* CHEAP_TPR */
-
-#else /* GRAB_LOPRIO */
-
-#define GRAB_HWI /* nop */
-#define ARB_HWI /* nop */
-
-#endif /* GRAB_LOPRIO */
-
-
- .text
-
-#ifdef SMP
-
-/***********************************************************************
- * void MPgetlock_edx(unsigned int *lock : %edx)
- * ----------------------------------
- * Destroys %eax, %ecx. %edx must hold lock argument.
- *
- * Grabs hardware interrupts on first aquire.
- *
- * NOTE: Serialization is not required if we already hold the lock, since
- * we already hold the lock, nor do we need a locked instruction if we
- * already hold the lock.
- */
-
-NON_GPROF_ENTRY(MPgetlock_edx)
-1:
- movl (%edx), %eax /* Get current contents of lock */
- movl %eax, %ecx
- andl $CPU_FIELD,%ecx
- cmpl _cpu_lockid, %ecx /* Do we already own the lock? */
- jne 2f
- incl %eax /* yes, just bump the count */
- movl %eax, (%edx) /* serialization not required */
- ret
-2:
- movl $FREE_LOCK, %eax /* lock must be free */
- movl _cpu_lockid, %ecx
- incl %ecx
- lock
- cmpxchg %ecx, (%edx) /* attempt to replace %eax<->%ecx */
-#ifdef GLPROFILE
- jne 3f
- incl _gethits2
-#else
- jne 1b
-#endif /* GLPROFILE */
- GRAB_HWI /* 1st acquire, grab hw INTs */
- ret
-#ifdef GLPROFILE
-3:
- incl _gethits3
- jmp 1b
-#endif
-
-/***********************************************************************
- * int MPtrylock(unsigned int *lock)
- * ---------------------------------
- * Destroys %eax, %ecx and %edx.
- * Returns 1 if lock was successfull
- */
-
-NON_GPROF_ENTRY(MPtrylock)
- movl 4(%esp), %edx /* Get the address of the lock */
-
- movl $FREE_LOCK, %eax /* Assume it's free */
- movl _cpu_lockid, %ecx /* - get pre-shifted logical cpu id */
- incl %ecx /* - new count is one */
- lock
- cmpxchg %ecx, (%edx) /* - try it atomically */
- jne 1f /* ...do not collect $200 */
-#ifdef GLPROFILE
- incl _tryhits2
-#endif /* GLPROFILE */
- GRAB_HWI /* 1st acquire, grab hw INTs */
- movl $1, %eax
- ret
-1:
- movl (%edx), %eax /* Try to see if we have it already */
- andl $COUNT_FIELD, %eax /* - get count */
- movl _cpu_lockid, %ecx /* - get pre-shifted logical cpu id */
- orl %ecx, %eax /* - combine them */
- movl %eax, %ecx
- incl %ecx /* - new count is one more */
- lock
- cmpxchg %ecx, (%edx) /* - try it atomically */
- jne 2f /* - miss */
-#ifdef GLPROFILE
- incl _tryhits
-#endif /* GLPROFILE */
- movl $1, %eax
- ret
-2:
-#ifdef GLPROFILE
- incl _tryhits3
-#endif /* GLPROFILE */
- movl $0, %eax
- ret
-
-
-/***********************************************************************
- * void MPrellock_edx(unsigned int *lock : %edx)
- * ----------------------------------
- * Destroys %ecx, argument must be in %edx
- *
- * SERIALIZATION NOTE!
- *
- * After a lot of arguing, it turns out that there is no problem with
- * not having a synchronizing instruction in the MP unlock code. There
- * are two things to keep in mind: First, Intel guarentees that writes
- * are ordered amoungst themselves. Second, the P6 is allowed to reorder
- * reads around writes. Third, the P6 maintains cache consistency (snoops
- * the bus). The second is not an issue since the one read we do is the
- * basis for the conditional which determines whether the write will be
- * made or not.
- *
- * Therefore, no synchronizing instruction is required on unlock. There are
- * three performance cases: First, if a single cpu is getting and releasing
- * the lock the removal of the synchronizing instruction saves approx
- * 200 nS (testing w/ duel cpu PIII 450). Second, if one cpu is contending
- * for the lock while the other holds it, the removal of the synchronizing
- * instruction results in a 700nS LOSS in performance. Third, if two cpu's
- * are switching off ownership of the MP lock but not contending for it (the
- * most common case), this results in a 400nS IMPROVEMENT in performance.
- *
- * Since our goal is to reduce lock contention in the first place, we have
- * decided to remove the synchronizing instruction from the unlock code.
- */
-
-NON_GPROF_ENTRY(MPrellock_edx)
- movl (%edx), %ecx /* - get the value */
- decl %ecx /* - new count is one less */
- testl $COUNT_FIELD, %ecx /* - Unless it's zero... */
- jnz 2f
- ARB_HWI /* last release, arbitrate hw INTs */
- movl $FREE_LOCK, %ecx /* - In which case we release it */
-#if 0
- lock
- addl $0,0(%esp) /* see note above */
-#endif
-2:
- movl %ecx, (%edx)
- ret
-
-/***********************************************************************
- * void get_mplock()
- * -----------------
- * All registers preserved
- *
- * Stack (after call to _MPgetlock):
- *
- * edx 4(%esp)
- * ecx 8(%esp)
- * eax 12(%esp)
- *
- * Requirements: Interrupts should be enabled on call so we can take
- * IPI's and FAST INTs while we are waiting for the lock
- * (else the system may not be able to halt).
- *
- * XXX there are still places where get_mplock() is called
- * with interrupts disabled, so we have to temporarily reenable
- * interrupts.
- *
- * Side effects: The current cpu will be given ownership of the
- * hardware interrupts when it first aquires the lock.
- *
- * Costs: Initial aquisition requires the use of a costly locked
- * instruction, but recursive aquisition is cheap. Release
- * is very cheap.
- */
-
-NON_GPROF_ENTRY(get_mplock)
- pushl %eax
- pushl %ecx
- pushl %edx
- movl $_mp_lock, %edx
- pushfl
- testl $(1<<9), (%esp)
- jz 2f
- call _MPgetlock_edx
- addl $4,%esp
-1:
- popl %edx
- popl %ecx
- popl %eax
- ret
-2:
- sti
- call _MPgetlock_edx
- popfl
- jmp 1b
-
-/*
- * Special version of get_mplock that is used during bootstrap when we can't
- * yet enable interrupts of any sort since the APIC isn't online yet. We
- * do an endrun around MPgetlock_edx to avoid enabling interrupts.
- *
- * XXX FIXME.. - APIC should be online from the start to simplify IPI's.
- */
-NON_GPROF_ENTRY(boot_get_mplock)
- pushl %eax
- pushl %ecx
- pushl %edx
-#ifdef GRAB_LOPRIO
- pushfl
- pushl lapic_tpr
- cli
-#endif
-
- movl $_mp_lock, %edx
- call _MPgetlock_edx
-
-#ifdef GRAB_LOPRIO
- popl lapic_tpr
- popfl
-#endif
- popl %edx
- popl %ecx
- popl %eax
- ret
-
-/***********************************************************************
- * void try_mplock()
- * -----------------
- * reg %eax == 1 if success
- */
-
-NON_GPROF_ENTRY(try_mplock)
- pushl %ecx
- pushl %edx
- pushl $_mp_lock
- call _MPtrylock
- add $4, %esp
- popl %edx
- popl %ecx
- ret
-
-/***********************************************************************
- * void rel_mplock()
- * -----------------
- * All registers preserved
- */
-
-NON_GPROF_ENTRY(rel_mplock)
- pushl %ecx
- pushl %edx
- movl $_mp_lock,%edx
- call _MPrellock_edx
- popl %edx
- popl %ecx
- ret
-
-#endif
-
-/***********************************************************************
- *
- */
- .data
- .p2align 2 /* xx_lock aligned on int boundary */
-
-#ifdef SMP
-
- .globl _mp_lock
-_mp_lock: .long 0
-
-#ifdef GLPROFILE
- .globl _gethits
-_gethits:
- .long 0
-_gethits2:
- .long 0
-_gethits3:
- .long 0
-
- .globl _tryhits
-_tryhits:
- .long 0
-_tryhits2:
- .long 0
-_tryhits3:
- .long 0
-
-msg:
- .asciz "lock hits: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n"
-#endif /* GLPROFILE */
-#endif /* SMP */
diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c
index 61c5ecf..95b5759 100644
--- a/sys/i386/i386/mptable.c
+++ b/sys/i386/i386/mptable.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/i386/i386/nexus.c b/sys/i386/i386/nexus.c
index 8a30770..5b6cdbc 100644
--- a/sys/i386/i386/nexus.c
+++ b/sys/i386/i386/nexus.c
@@ -68,7 +68,10 @@
#else
#include <i386/isa/isa.h>
#endif
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
+#include <sys/rtprio.h>
static struct rman irq_rman, drq_rman, port_rman, mem_rman;
@@ -397,9 +400,9 @@ static int
nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
int flags, void (*ihand)(void *), void *arg, void **cookiep)
{
- intrmask_t *mask;
driver_t *driver;
- int error, icflags;
+ int error, icflags;
+ int pri; /* interrupt thread priority */
/* somebody tried to setup an irq that failed to allocate! */
if (irq == NULL)
@@ -413,27 +416,32 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
driver = device_get_driver(child);
switch (flags) {
- case INTR_TYPE_TTY:
- mask = &tty_imask;
+ case INTR_TYPE_TTY: /* keyboard or parallel port */
+ pri = PI_TTYLOW;
break;
- case (INTR_TYPE_TTY | INTR_TYPE_FAST):
- mask = &tty_imask;
+ case (INTR_TYPE_TTY | INTR_FAST): /* sio */
+ pri = PI_TTYHIGH;
icflags |= INTR_FAST;
break;
case INTR_TYPE_BIO:
- mask = &bio_imask;
+ /*
+ * XXX We need to refine this. BSD/OS distinguishes
+ * between tape and disk priorities.
+ */
+ pri = PI_DISK;
break;
case INTR_TYPE_NET:
- mask = &net_imask;
+ pri = PI_NET;
break;
case INTR_TYPE_CAM:
- mask = &cam_imask;
+ pri = PI_DISK; /* XXX or PI_CAM? */
break;
case INTR_TYPE_MISC:
- mask = 0;
+ pri = PI_DULL; /* don't care */
break;
+ /* We didn't specify an interrupt level. */
default:
- panic("still using grody create_intr interface");
+ panic("nexus_setup_intr: no interrupt type in flags");
}
/*
@@ -444,7 +452,7 @@ nexus_setup_intr(device_t bus, device_t child, struct resource *irq,
return (error);
*cookiep = inthand_add(device_get_nameunit(child), irq->r_start,
- ihand, arg, mask, icflags);
+ ihand, arg, pri, icflags);
if (*cookiep == NULL)
error = EINVAL; /* XXX ??? */
diff --git a/sys/i386/i386/perfmon.c b/sys/i386/i386/perfmon.c
index 574f416..2efa516 100644
--- a/sys/i386/i386/perfmon.c
+++ b/sys/i386/i386/perfmon.c
@@ -118,16 +118,19 @@ perfmon_avail(void)
int
perfmon_setup(int pmc, unsigned int control)
{
+ int intrstate;
+
if (pmc < 0 || pmc >= NPMC)
return EINVAL;
perfmon_inuse |= (1 << pmc);
control &= ~(PMCF_SYS_FLAGS << 16);
+ intrstate = save_intr();
disable_intr();
ctl_shadow[pmc] = control;
writectl(pmc);
wrmsr(msr_pmc[pmc], pmc_shadow[pmc] = 0);
- enable_intr();
+ restore_intr(intrstate);
return 0;
}
@@ -162,15 +165,18 @@ perfmon_fini(int pmc)
int
perfmon_start(int pmc)
{
+ int intrstate;
+
if (pmc < 0 || pmc >= NPMC)
return EINVAL;
if (perfmon_inuse & (1 << pmc)) {
+ intrstate = save_intr();
disable_intr();
ctl_shadow[pmc] |= (PMCF_EN << 16);
wrmsr(msr_pmc[pmc], pmc_shadow[pmc]);
writectl(pmc);
- enable_intr();
+ restore_intr(intrstate);
return 0;
}
return EBUSY;
@@ -179,15 +185,18 @@ perfmon_start(int pmc)
int
perfmon_stop(int pmc)
{
+ int intrstate;
+
if (pmc < 0 || pmc >= NPMC)
return EINVAL;
if (perfmon_inuse & (1 << pmc)) {
+ intrstate = save_intr();
disable_intr();
pmc_shadow[pmc] = rdmsr(msr_pmc[pmc]) & 0xffffffffffULL;
ctl_shadow[pmc] &= ~(PMCF_EN << 16);
writectl(pmc);
- enable_intr();
+ restore_intr(intrstate);
return 0;
}
return EBUSY;
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index edae292..7ce9120 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -668,7 +668,7 @@ pmap_pte_quick(pmap, va)
* (unsigned *) prv_PMAP1 = newpf | PG_RW | PG_V;
cpu_invlpg(prv_PADDR1);
}
- return prv_PADDR1 + ((unsigned) index & (NPTEPG - 1));
+ return (unsigned *)(prv_PADDR1 + (index & (NPTEPG - 1)));
#else
if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) {
* (unsigned *) PMAP1 = newpf | PG_RW | PG_V;
diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s
index c895fef..db56a1b 100644
--- a/sys/i386/i386/swtch.s
+++ b/sys/i386/i386/swtch.s
@@ -73,189 +73,6 @@ _tlb_flush_count: .long 0
.text
-/*
- * When no processes are on the runq, cpu_switch() branches to _idle
- * to wait for something to come ready.
- */
- ALIGN_TEXT
- .type _idle,@function
-_idle:
- xorl %ebp,%ebp
- movl %ebp,_switchtime
-
-#ifdef SMP
-
- /* when called, we have the mplock, intr disabled */
- /* use our idleproc's "context" */
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
- /* Keep space for nonexisting return addr, or profiling bombs */
- movl $gd_idlestack_top-4, %ecx
- addl %fs:0, %ecx
- movl %ecx, %esp
-
- /* update common_tss.tss_esp0 pointer */
- movl %ecx, _common_tss + TSS_ESP0
-
- movl _cpuid, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $gd_common_tssd, %edi
- addl %fs:0, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- *
- * NOTE: spl*() may only be called while we hold the MP lock (which
- * we do).
- */
- call _spl0
-
- cli
-
- /*
- * _REALLY_ free the lock, no matter how deep the prior nesting.
- * We will recover the nesting on the way out when we have a new
- * proc to load.
- *
- * XXX: we had damn well better be sure we had it before doing this!
- */
- movl $FREE_LOCK, %eax
- movl %eax, _mp_lock
-
- /* do NOT have lock, intrs disabled */
- .globl idle_loop
-idle_loop:
-
- cmpl $0,_smp_active
- jne 1f
- cmpl $0,_cpuid
- je 1f
- jmp 2f
-
-1:
- call _procrunnable
- testl %eax,%eax
- jnz 3f
-
- /*
- * Handle page-zeroing in the idle loop. Called with interrupts
- * disabled and the MP lock released. Inside vm_page_zero_idle
- * we enable interrupts and grab the mplock as required.
- */
- cmpl $0,_do_page_zero_idle
- je 2f
-
- call _vm_page_zero_idle /* internal locking */
- testl %eax, %eax
- jnz idle_loop
-2:
-
- /* enable intrs for a halt */
- movl $0, lapic_tpr /* 1st candidate for an INT */
- call *_hlt_vector /* wait for interrupt */
- cli
- jmp idle_loop
-
- /*
- * Note that interrupts must be enabled while obtaining the MP lock
- * in order to be able to take IPI's while blocked.
- */
-3:
- movl $LOPRIO_LEVEL, lapic_tpr /* arbitrate for INTs */
- sti
- call _get_mplock
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _rel_mplock
- jmp idle_loop
-
-#else /* !SMP */
-
- movl $HIDENAME(tmpstk),%esp
-#if defined(OVERLY_CONSERVATIVE_PTD_MGMT)
-#if defined(SWTCH_OPTIM_STATS)
- incl _swtch_optim_stats
-#endif
- movl _IdlePTD, %ecx
- movl %cr3, %eax
- cmpl %ecx, %eax
- je 2f
-#if defined(SWTCH_OPTIM_STATS)
- decl _swtch_optim_stats
- incl _tlb_flush_count
-#endif
- movl %ecx, %cr3
-2:
-#endif
-
- /* update common_tss.tss_esp0 pointer */
- movl %esp, _common_tss + TSS_ESP0
-
- movl $0, %esi
- btrl %esi, _private_tss
- jae 1f
-
- movl $_common_tssd, %edi
-
- /* move correct tss descriptor into GDT slot, then reload tr */
- movl _tss_gdt, %ebx /* entry in GDT */
- movl 0(%edi), %eax
- movl %eax, 0(%ebx)
- movl 4(%edi), %eax
- movl %eax, 4(%ebx)
- movl $GPROC0_SEL*8, %esi /* GSEL(entry, SEL_KPL) */
- ltr %si
-1:
-
- sti
-
- /*
- * XXX callers of cpu_switch() do a bogus splclock(). Locking should
- * be left to cpu_switch().
- */
- call _spl0
-
- ALIGN_TEXT
-idle_loop:
- cli
- call _procrunnable
- testl %eax,%eax
- CROSSJUMP(jnz, sw1a, jz)
- call _vm_page_zero_idle
- testl %eax, %eax
- jnz idle_loop
- call *_hlt_vector /* wait for interrupt */
- jmp idle_loop
-
-#endif /* SMP */
-
-CROSSJUMPTARGET(_idle)
-
ENTRY(default_halt)
sti
#ifndef SMP
@@ -264,16 +81,23 @@ ENTRY(default_halt)
ret
/*
+ * cpu_throw()
+ */
+ENTRY(cpu_throw)
+ jmp sw1
+
+/*
* cpu_switch()
*/
ENTRY(cpu_switch)
/* switch to new process. first, save context as needed */
movl _curproc,%ecx
+ movl %ecx,_prevproc
/* if no process to save, don't bother */
testl %ecx,%ecx
- je sw1
+ jz sw1
#ifdef SMP
movb P_ONCPU(%ecx), %al /* save "last" cpu */
@@ -299,7 +123,7 @@ ENTRY(cpu_switch)
movl %edi,PCB_EDI(%edx)
movl %gs,PCB_GS(%edx)
- /* test if debug regisers should be saved */
+ /* test if debug registers should be saved */
movb PCB_FLAGS(%edx),%al
andb $PCB_DBREGS,%al
jz 1f /* no, skip over */
@@ -319,15 +143,12 @@ ENTRY(cpu_switch)
movl %eax,PCB_DR0(%edx)
1:
+ /* save sched_lock recursion count */
+ movl _sched_lock+MTX_RECURSE,%eax
+ movl %eax,PCB_SCHEDNEST(%edx)
+
#ifdef SMP
- movl _mp_lock, %eax
/* XXX FIXME: we should be saving the local APIC TPR */
-#ifdef DIAGNOSTIC
- cmpl $FREE_LOCK, %eax /* is it free? */
- je badsw4 /* yes, bad medicine! */
-#endif /* DIAGNOSTIC */
- andl $COUNT_FIELD, %eax /* clear CPU portion */
- movl %eax, PCB_MPNEST(%edx) /* store it */
#endif /* SMP */
#if NNPX > 0
@@ -341,25 +162,33 @@ ENTRY(cpu_switch)
1:
#endif /* NNPX > 0 */
- movl $0,_curproc /* out of process */
-
- /* save is done, now choose a new process or idle */
+ /* save is done, now choose a new process */
sw1:
- cli
#ifdef SMP
/* Stop scheduling if smp_active goes zero and we are not BSP */
cmpl $0,_smp_active
jne 1f
cmpl $0,_cpuid
- CROSSJUMP(je, _idle, jne) /* wind down */
+ je 1f
+
+ movl _idleproc, %eax
+ jmp sw1b
1:
#endif
+ /*
+ * Choose a new process to schedule. chooseproc() returns idleproc
+ * if it cannot find another process to run.
+ */
sw1a:
call _chooseproc /* trash ecx, edx, ret eax*/
- testl %eax,%eax
- CROSSJUMP(je, _idle, jne) /* if no proc, idle */
+
+#ifdef DIAGNOSTIC
+ testl %eax,%eax /* no process? */
+ jz badsw3 /* no, panic */
+#endif
+sw1b:
movl %eax,%ecx
xorl %eax,%eax
@@ -456,9 +285,6 @@ sw1a:
movl %ecx, _curproc /* into next process */
#ifdef SMP
- movl _cpu_lockid, %eax
- orl PCB_MPNEST(%edx), %eax /* add next count from PROC */
- movl %eax, _mp_lock /* load the mp_lock */
/* XXX FIXME: we should be restoring the local APIC TPR */
#endif /* SMP */
@@ -500,7 +326,22 @@ cpu_switch_load_gs:
movl %eax,%dr7
1:
- sti
+ /*
+ * restore sched_lock recursion count and transfer ownership to
+ * new process
+ */
+ movl PCB_SCHEDNEST(%edx),%eax
+ movl %eax,_sched_lock+MTX_RECURSE
+
+ movl _curproc,%eax
+ movl %eax,_sched_lock+MTX_LOCK
+
+#ifdef DIAGNOSTIC
+ pushfl
+ popl %ecx
+ testl $0x200, %ecx /* interrupts enabled? */
+ jnz badsw6 /* that way madness lies */
+#endif
ret
CROSSJUMPTARGET(sw1a)
@@ -517,15 +358,27 @@ badsw2:
call _panic
sw0_2: .asciz "cpu_switch: not SRUN"
+
+badsw3:
+ pushl $sw0_3
+ call _panic
+
+sw0_3: .asciz "cpu_switch: chooseproc returned NULL"
+
#endif
-#if defined(SMP) && defined(DIAGNOSTIC)
-badsw4:
- pushl $sw0_4
+#ifdef DIAGNOSTIC
+badsw5:
+ pushl $sw0_5
+ call _panic
+
+sw0_5: .asciz "cpu_switch: interrupts enabled (again)"
+badsw6:
+ pushl $sw0_6
call _panic
-sw0_4: .asciz "cpu_switch: do not have lock"
-#endif /* SMP && DIAGNOSTIC */
+sw0_6: .asciz "cpu_switch: interrupts enabled"
+#endif
/*
* savectx(pcb)
diff --git a/sys/i386/i386/synch_machdep.c b/sys/i386/i386/synch_machdep.c
new file mode 100644
index 0000000..029225d
--- /dev/null
+++ b/sys/i386/i386/synch_machdep.c
@@ -0,0 +1,559 @@
+/*-
+ * Copyright (c) 1997, 1998 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
+ * $FreeBSD$
+ */
+
+#define MTX_STRS /* define common strings */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <ddb/ddb.h>
+#include <machine/atomic.h>
+#include <machine/clock.h>
+#include <machine/cpu.h>
+#include <machine/mutex.h>
+
+/* All mutexes in system (used for debug/panic) */
+mtx_t all_mtx = { MTX_UNOWNED, 0, 0, 0, "All mutexes queue head",
+ TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
+ { NULL, NULL }, &all_mtx, &all_mtx
+#ifdef SMP_DEBUG
+ , NULL, { NULL, NULL }, NULL, 0
+#endif
+};
+
+int mtx_cur_cnt;
+int mtx_max_cnt;
+
+extern void _mtx_enter_giant_def(void);
+extern void _mtx_exit_giant_def(void);
+
+static void propagate_priority(struct proc *) __unused;
+
+#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED)
+#define mtx_owner(m) (mtx_unowned(m) ? NULL \
+ : (struct proc *)((m)->mtx_lock & MTX_FLAGMASK))
+
+#define RETIP(x) *(((int *)(&x)) - 1)
+#define SET_PRIO(p, pri) (p)->p_priority = (pri)
+
+/*
+ * XXX Temporary, for use from assembly language
+ */
+
+void
+_mtx_enter_giant_def(void)
+{
+
+ mtx_enter(&Giant, MTX_DEF);
+}
+
+void
+_mtx_exit_giant_def(void)
+{
+
+ mtx_exit(&Giant, MTX_DEF);
+}
+
+static void
+propagate_priority(struct proc *p)
+{
+ int pri = p->p_priority;
+ mtx_t *m = p->p_blocked;
+
+ for (;;) {
+ struct proc *p1;
+
+ p = mtx_owner(m);
+
+ if (p == NULL) {
+ /*
+ * This really isn't quite right. Really
+ * ought to bump priority of process that
+ * next acquires the mutex.
+ */
+ MPASS(m->mtx_lock == MTX_CONTESTED);
+ return;
+ }
+ MPASS(p->p_magic == P_MAGIC);
+ if (p->p_priority <= pri)
+ return;
+ /*
+ * If lock holder is actually running, just bump priority.
+ */
+ if (TAILQ_NEXT(p, p_procq) == NULL) {
+ MPASS(p->p_stat == SRUN || p->p_stat == SZOMB);
+ SET_PRIO(p, pri);
+ return;
+ }
+ /*
+ * If on run queue move to new run queue, and
+ * quit.
+ */
+#if 1
+ if (p->p_stat == SRUN) {
+#else
+ if ((m = p->p_blocked) == NULL) {
+#endif
+ MPASS(p->p_stat == SRUN);
+ remrunqueue(p);
+ SET_PRIO(p, pri);
+ setrunqueue(p);
+ return;
+ }
+
+ /*
+ * If we aren't blocked on a mutex, give up and quit.
+ */
+ if (p->p_stat != SMTX) {
+ printf(
+ "XXX: process %d(%s):%d holds %s but isn't blocked on a mutex\n",
+ p->p_pid, p->p_comm, p->p_stat, m->mtx_description);
+ return;
+ }
+
+ /*
+ * Pick up the mutex that p is blocked on.
+ */
+ m = p->p_blocked;
+ MPASS(m != NULL);
+
+ printf("XXX: process %d(%s) is blocked on %s\n", p->p_pid,
+ p->p_comm, m->mtx_description);
+ /*
+ * Check if the proc needs to be moved up on
+ * the blocked chain
+ */
+ if ((p1 = TAILQ_PREV(p, rq, p_procq)) == NULL ||
+ p1->p_priority <= pri) {
+ if (p1)
+ printf(
+ "XXX: previous process %d(%s) has higher priority\n",
+ p->p_pid, p->p_comm);
+ else
+ printf("XXX: process at head of run queue\n");
+ continue;
+ }
+
+ /*
+ * Remove proc from blocked chain
+ */
+ TAILQ_REMOVE(&m->mtx_blocked, p, p_procq);
+ TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq) {
+ MPASS(p1->p_magic == P_MAGIC);
+ if (p1->p_priority > pri)
+ break;
+ }
+ if (p1)
+ TAILQ_INSERT_BEFORE(p1, p, p_procq);
+ else
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
+ CTR4(KTR_LOCK,
+ "propagate priority: p 0x%p moved before 0x%p on [0x%p] %s",
+ p, p1, m, m->mtx_description);
+ }
+}
+
+void
+mtx_enter_hard(mtx_t *m, int type, int flags)
+{
+ struct proc *p = CURPROC;
+
+ KASSERT(p != NULL, ("curproc is NULL in mutex"));
+
+ switch (type) {
+ case MTX_DEF:
+ if ((m->mtx_lock & MTX_FLAGMASK) == (u_int)p) {
+ m->mtx_recurse++;
+ atomic_set_int(&m->mtx_lock, MTX_RECURSE);
+ CTR1(KTR_LOCK, "mtx_enter: 0x%p recurse", m);
+ return;
+ }
+ CTR3(KTR_LOCK, "mtx_enter: 0x%p contested (lock=%x) [0x%x]",
+ m, m->mtx_lock, RETIP(m));
+ while (!atomic_cmpset_int(&m->mtx_lock, MTX_UNOWNED, (int)p)) {
+ int v;
+ struct proc *p1;
+
+ mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
+ /*
+ * check if the lock has been released while
+ * waiting for the schedlock.
+ */
+ if ((v = m->mtx_lock) == MTX_UNOWNED) {
+ mtx_exit(&sched_lock, MTX_SPIN);
+ continue;
+ }
+ /*
+ * The mutex was marked contested on release. This
+ * means that there are processes blocked on it.
+ */
+ if (v == MTX_CONTESTED) {
+ p1 = TAILQ_FIRST(&m->mtx_blocked);
+ KASSERT(p1 != NULL, ("contested mutex has no contesters"));
+ KASSERT(p != NULL, ("curproc is NULL for contested mutex"));
+ m->mtx_lock = (int)p | MTX_CONTESTED;
+ if (p1->p_priority < p->p_priority) {
+ SET_PRIO(p, p1->p_priority);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ return;
+ }
+ /*
+ * If the mutex isn't already contested and
+ * a failure occurs setting the contested bit the
+ * mutex was either release or the
+ * state of the RECURSION bit changed.
+ */
+ if ((v & MTX_CONTESTED) == 0 &&
+ !atomic_cmpset_int(&m->mtx_lock, v,
+ v | MTX_CONTESTED)) {
+ mtx_exit(&sched_lock, MTX_SPIN);
+ continue;
+ }
+
+ /* We definitely have to sleep for this lock */
+ mtx_assert(m, MA_NOTOWNED);
+
+#ifdef notyet
+ /*
+ * If we're borrowing an interrupted thread's VM
+ * context must clean up before going to sleep.
+ */
+ if (p->p_flag & (P_ITHD | P_SITHD)) {
+ ithd_t *it = (ithd_t *)p;
+
+ if (it->it_interrupted) {
+ CTR2(KTR_LOCK,
+ "mtx_enter: 0x%x interrupted 0x%x",
+ it, it->it_interrupted);
+ intr_thd_fixup(it);
+ }
+ }
+#endif
+
+ /* Put us on the list of procs blocked on this mutex */
+ if (TAILQ_EMPTY(&m->mtx_blocked)) {
+ p1 = (struct proc *)(m->mtx_lock &
+ MTX_FLAGMASK);
+ LIST_INSERT_HEAD(&p1->p_contested, m,
+ mtx_contested);
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p, p_procq);
+ } else {
+ TAILQ_FOREACH(p1, &m->mtx_blocked, p_procq)
+ if (p1->p_priority > p->p_priority)
+ break;
+ if (p1)
+ TAILQ_INSERT_BEFORE(p1, p, p_procq);
+ else
+ TAILQ_INSERT_TAIL(&m->mtx_blocked, p,
+ p_procq);
+ }
+
+ p->p_blocked = m; /* Who we're blocked on */
+ p->p_stat = SMTX;
+#if 0
+ propagate_priority(p);
+#endif
+ CTR3(KTR_LOCK, "mtx_enter: p 0x%p blocked on [0x%p] %s",
+ p, m, m->mtx_description);
+ mi_switch();
+ CTR3(KTR_LOCK,
+ "mtx_enter: p 0x%p free from blocked on [0x%p] %s",
+ p, m, m->mtx_description);
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+ return;
+ case MTX_SPIN:
+ case MTX_SPIN | MTX_FIRST:
+ case MTX_SPIN | MTX_TOPHALF:
+ {
+ int i = 0;
+
+ if (m->mtx_lock == (u_int)p) {
+ m->mtx_recurse++;
+ return;
+ }
+ CTR1(KTR_LOCK, "mtx_enter: %p spinning", m);
+ for (;;) {
+ if (atomic_cmpset_int(&m->mtx_lock, MTX_UNOWNED,
+ (u_int)p))
+ break;
+ while (m->mtx_lock != MTX_UNOWNED) {
+ if (i++ < 1000000)
+ continue;
+ if (i++ < 6000000)
+ DELAY (1);
+#ifdef DDB
+ else if (!db_active) {
+#else
+ else {
+#endif
+#if 0
+ Debugger ("spinning");
+ panic("spin lock %s held by 0x%x for > 5 seconds",
+ m->mtx_description,
+ m->mtx_lock);
+#endif
+ }
+ }
+ }
+
+#ifdef SMP_DEBUG
+ if (type != MTX_SPIN)
+ m->mtx_savefl = 0xdeadbeef;
+ else
+#endif
+ m->mtx_savefl = flags;
+ CTR1(KTR_LOCK, "mtx_enter: 0x%p spin done", m);
+ return;
+ }
+ }
+}
+
+void
+mtx_exit_hard(mtx_t *m, int type)
+{
+ struct proc *p, *p1;
+ mtx_t *m1;
+ int pri;
+
+ switch (type) {
+ case MTX_DEF:
+ case MTX_DEF | MTX_NOSWITCH:
+ if (m->mtx_recurse != 0) {
+ if (--(m->mtx_recurse) == 0)
+ atomic_clear_int(&m->mtx_lock, MTX_RECURSE);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%p unrecurse", m);
+ return;
+ }
+ mtx_enter(&sched_lock, MTX_SPIN);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%p contested", m);
+ p = CURPROC;
+ p1 = TAILQ_FIRST(&m->mtx_blocked);
+ MPASS(p->p_magic == P_MAGIC);
+ MPASS(p1->p_magic == P_MAGIC);
+ TAILQ_REMOVE(&m->mtx_blocked, p1, p_procq);
+ if (TAILQ_EMPTY(&m->mtx_blocked)) {
+ LIST_REMOVE(m, mtx_contested);
+ atomic_cmpset_int(&m->mtx_lock, m->mtx_lock,
+ MTX_UNOWNED);
+ CTR1(KTR_LOCK, "mtx_exit: 0x%p not held", m);
+ } else
+ m->mtx_lock = MTX_CONTESTED;
+ pri = MAXPRI;
+ LIST_FOREACH(m1, &p->p_contested, mtx_contested) {
+ int cp = TAILQ_FIRST(&m1->mtx_blocked)->p_priority;
+ if (cp < pri)
+ pri = cp;
+ }
+ if (pri > p->p_nativepri)
+ pri = p->p_nativepri;
+ SET_PRIO(p, pri);
+ CTR2(KTR_LOCK, "mtx_exit: 0x%p contested setrunqueue 0x%p",
+ m, p1);
+ p1->p_blocked = NULL;
+ p1->p_stat = SRUN;
+ setrunqueue(p1);
+ if ((type & MTX_NOSWITCH) == 0 && p1->p_priority < pri) {
+#ifdef notyet
+ if (p->p_flag & (P_ITHD | P_SITHD)) {
+ ithd_t *it = (ithd_t *)p;
+
+ if (it->it_interrupted) {
+ CTR2(KTR_LOCK,
+ "mtx_exit: 0x%x interruped 0x%x",
+ it, it->it_interrupted);
+ intr_thd_fixup(it);
+ }
+ }
+#endif
+ setrunqueue(p);
+ CTR2(KTR_LOCK, "mtx_exit: 0x%p switching out lock=0x%x",
+ m, m->mtx_lock);
+ mi_switch();
+ CTR2(KTR_LOCK, "mtx_exit: 0x%p resuming lock=0x%x",
+ m, m->mtx_lock);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ return;
+ case MTX_SPIN:
+ case MTX_SPIN | MTX_FIRST:
+ if (m->mtx_recurse != 0) {
+ m->mtx_recurse--;
+ return;
+ }
+ if (atomic_cmpset_int(&m->mtx_lock, CURTHD, MTX_UNOWNED)) {
+ if (type & MTX_FIRST) {
+ enable_intr(); /* XXX is this kosher? */
+ } else {
+ MPASS(m->mtx_savefl != 0xdeadbeef);
+ write_eflags(m->mtx_savefl);
+ }
+ return;
+ }
+ panic("unsucuessful release of spin lock");
+ case MTX_SPIN | MTX_TOPHALF:
+ if (m->mtx_recurse != 0) {
+ m->mtx_recurse--;
+ return;
+ }
+ if (atomic_cmpset_int(&m->mtx_lock, CURTHD, MTX_UNOWNED))
+ return;
+ panic("unsucuessful release of spin lock");
+ default:
+ panic("mtx_exit_hard: unsupported type 0x%x\n", type);
+ }
+}
+
+#define MV_DESTROY 0 /* validate before destory */
+#define MV_INIT 1 /* validate before init */
+
+#ifdef SMP_DEBUG
+
+int mtx_validate __P((mtx_t *, int));
+
+int
+mtx_validate(mtx_t *m, int when)
+{
+ mtx_t *mp;
+ int i;
+ int retval = 0;
+
+ if (m == &all_mtx || cold)
+ return 0;
+
+ mtx_enter(&all_mtx, MTX_DEF);
+ ASS(kernacc((caddr_t)all_mtx.mtx_next, 4, 1) == 1);
+ ASS(all_mtx.mtx_next->mtx_prev == &all_mtx);
+ for (i = 0, mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next) {
+ if (kernacc((caddr_t)mp->mtx_next, 4, 1) != 1) {
+ panic("mtx_validate: mp=%p mp->mtx_next=%p",
+ mp, mp->mtx_next);
+ }
+ i++;
+ if (i > mtx_cur_cnt) {
+ panic("mtx_validate: too many in chain, known=%d\n",
+ mtx_cur_cnt);
+ }
+ }
+ ASS(i == mtx_cur_cnt);
+ switch (when) {
+ case MV_DESTROY:
+ for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
+ if (mp == m)
+ break;
+ ASS(mp == m);
+ break;
+ case MV_INIT:
+ for (mp = all_mtx.mtx_next; mp != &all_mtx; mp = mp->mtx_next)
+ if (mp == m) {
+ /*
+ * Not good. This mutex already exits
+ */
+ retval = 1;
+#if 1
+ printf("re-initing existing mutex %s\n",
+ m->mtx_description);
+ ASS(m->mtx_lock == MTX_UNOWNED);
+ retval = 1;
+#else
+ panic("re-initing existing mutex %s",
+ m->mtx_description);
+#endif
+ }
+ }
+ mtx_exit(&all_mtx, MTX_DEF);
+ return (retval);
+}
+#endif
+
+void
+mtx_init(mtx_t *m, char *t, int flag)
+{
+
+ CTR2(KTR_LOCK, "mtx_init 0x%p (%s)", m, t);
+#ifdef SMP_DEBUG
+ if (mtx_validate(m, MV_INIT)) /* diagnostic and error correction */
+ return;
+#endif
+ bzero((void *)m, sizeof *m);
+ TAILQ_INIT(&m->mtx_blocked);
+ m->mtx_description = t;
+ m->mtx_lock = MTX_UNOWNED;
+ /* Put on all mutex queue */
+ mtx_enter(&all_mtx, MTX_DEF);
+ m->mtx_next = &all_mtx;
+ m->mtx_prev = all_mtx.mtx_prev;
+ m->mtx_prev->mtx_next = m;
+ all_mtx.mtx_prev = m;
+ if (++mtx_cur_cnt > mtx_max_cnt)
+ mtx_max_cnt = mtx_cur_cnt;
+ mtx_exit(&all_mtx, MTX_DEF);
+ witness_init(m, flag);
+}
+
+void
+mtx_destroy(mtx_t *m)
+{
+
+ CTR2(KTR_LOCK, "mtx_destroy 0x%p (%s)", m, m->mtx_description);
+#ifdef SMP_DEBUG
+ if (m->mtx_next == NULL)
+ panic("mtx_destroy: %p (%s) already destroyed",
+ m, m->mtx_description);
+
+ if (!mtx_owned(m)) {
+ ASS(m->mtx_lock == MTX_UNOWNED);
+ } else {
+ ASS((m->mtx_lock & (MTX_RECURSE|MTX_CONTESTED)) == 0);
+ }
+ mtx_validate(m, MV_DESTROY); /* diagnostic */
+#endif
+
+#ifdef WITNESS
+ if (m->mtx_witness)
+ witness_destroy(m);
+#endif /* WITNESS */
+
+ /* Remove from the all mutex queue */
+ mtx_enter(&all_mtx, MTX_DEF);
+ m->mtx_next->mtx_prev = m->mtx_prev;
+ m->mtx_prev->mtx_next = m->mtx_next;
+#ifdef SMP_DEBUG
+ m->mtx_next = m->mtx_prev = NULL;
+#endif
+ mtx_cur_cnt--;
+ mtx_exit(&all_mtx, MTX_DEF);
+}
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 51de1ac..f32dfae 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -49,10 +49,12 @@
#include "opt_trap.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
@@ -76,12 +78,14 @@
#include <machine/cpu.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb.h>
#ifdef SMP
#include <machine/smp.h>
#endif
#include <machine/tss.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#ifdef POWERFAIL_NMI
@@ -96,11 +100,14 @@
#include "isa.h"
#include "npx.h"
+#include <sys/sysctl.h>
+
int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
extern void syscall2 __P((struct trapframe frame));
+extern void ast __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -142,7 +149,7 @@ static char *trap_msg[] = {
};
static __inline int userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks, int have_mplock));
+ u_quad_t oticks, int have_giant));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
@@ -158,18 +165,18 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&panic_on_nmi, 0, "Panic on NMI");
static __inline int
-userret(p, frame, oticks, have_mplock)
+userret(p, frame, oticks, have_giant)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
- int have_mplock;
+ int have_giant;
{
int sig, s;
while ((sig = CURSIG(p)) != 0) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
postsig(sig);
}
@@ -184,31 +191,34 @@ userret(p, frame, oticks, have_mplock)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
- }
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
postsig(sig);
+ }
}
/*
* Charge system time if profiling.
*/
if (p->p_flag & P_PROFIL) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
}
curpriority = p->p_priority;
- return(have_mplock);
+ return(have_giant);
}
/*
@@ -226,13 +236,20 @@ trap(frame)
u_quad_t sticks = 0;
int i = 0, ucode = 0, type, code;
vm_offset_t eva;
+#ifdef POWERFAIL_NMI
+ static int lastalert = 0;
+#endif
- if (!(frame.tf_eflags & PSL_I)) {
+ atomic_add_int(&cnt.v_trap, 1);
+
+ if ((frame.tf_eflags & PSL_I) == 0) {
/*
- * Buggy application or kernel code has disabled interrupts
- * and then trapped. Enabling interrupts now is wrong, but
- * it is better than running with interrupts disabled until
- * they are accidentally enabled later.
+ * Buggy application or kernel code has disabled
+ * interrupts and then trapped. Enabling interrupts
+ * now is wrong, but it is better than running with
+ * interrupts disabled until they are accidentally
+ * enabled later. XXX Consider whether is this still
+ * correct.
*/
type = frame.tf_trapno;
if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
@@ -252,54 +269,27 @@ trap(frame)
eva = 0;
if (frame.tf_trapno == T_PAGEFLT) {
/*
- * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
- * This problem is worked around by using an interrupt
- * gate for the pagefault handler. We are finally ready
- * to read %cr2 and then must reenable interrupts.
- *
- * XXX this should be in the switch statement, but the
- * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
- * flow of control too much for this to be obviously
- * correct.
+ * For some Cyrix CPUs, %cr2 is clobbered by
+ * interrupts. This problem is worked around by using
+ * an interrupt gate for the pagefault handler. We
+ * are finally ready to read %cr2 and then must
+ * reenable interrupts.
*/
eva = rcr2();
enable_intr();
- }
+ }
+
+ mtx_enter(&Giant, MTX_DEF);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
restart:
#endif
+
type = frame.tf_trapno;
code = frame.tf_err;
- if (in_vm86call) {
- if (frame.tf_eflags & PSL_VM &&
- (type == T_PROTFLT || type == T_STKFLT)) {
- i = vm86_emulate((struct vm86frame *)&frame);
- if (i != 0)
- /*
- * returns to original process
- */
- vm86_trap((struct vm86frame *)&frame);
- return;
- }
- switch (type) {
- /*
- * these traps want either a process context, or
- * assume a normal userspace trap.
- */
- case T_PROTFLT:
- case T_SEGNPFLT:
- trap_fatal(&frame, eva);
- return;
- case T_TRCTRAP:
- type = T_BPTFLT; /* kernel breakpoint */
- /* FALL THROUGH */
- }
- goto kernel_trap; /* normal kernel trap handling */
- }
-
- if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
+ if ((ISPL(frame.tf_cs) == SEL_UPL) ||
+ ((frame.tf_eflags & PSL_VM) && !in_vm86call)) {
/* user trap */
sticks = p->p_sticks;
@@ -322,16 +312,6 @@ restart:
i = SIGFPE;
break;
- case T_ASTFLT: /* Allow process switch */
- astoff();
- cnt.v_soft++;
- if (p->p_flag & P_OWEUPC) {
- p->p_flag &= ~P_OWEUPC;
- addupc_task(p, p->p_stats->p_prof.pr_addr,
- p->p_stats->p_prof.pr_ticks);
- }
- goto out;
-
/*
* The following two traps can happen in
* vm86 mode, and, if so, we want to handle
@@ -342,7 +322,7 @@ restart:
if (frame.tf_eflags & PSL_VM) {
i = vm86_emulate((struct vm86frame *)&frame);
if (i == 0)
- goto out;
+ goto user;
break;
}
/* FALL THROUGH */
@@ -357,14 +337,20 @@ restart:
case T_PAGEFLT: /* page fault */
i = trap_pfault(&frame, TRUE, eva);
- if (i == -1)
- return;
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if (i == -2)
+ if (i == -2) {
+ /*
+ * f00f hack workaround has triggered, treat
+ * as illegal instruction not page fault.
+ */
+ frame.tf_trapno = T_PRIVINFLT;
goto restart;
+ }
#endif
- if (i == 0)
+ if (i == -1)
goto out;
+ if (i == 0)
+ goto user;
ucode = T_PAGEFLT;
break;
@@ -377,7 +363,15 @@ restart:
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
- goto handle_powerfail;
+#ifndef TIMER_FREQ
+# define TIMER_FREQ 1193182
+#endif
+ if (time_second - lastalert > 10) {
+ log(LOG_WARNING, "NMI: power fail\n");
+ sysbeep(TIMER_FREQ/880, hz);
+ lastalert = time_second;
+ }
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -391,7 +385,7 @@ restart:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi)
panic("NMI indicates hardware failure");
break;
@@ -410,9 +404,9 @@ restart:
case T_DNA:
#if NNPX > 0
- /* if a transparent fault (due to context switch "late") */
+ /* transparent fault (due to context switch "late") */
if (npxdna())
- return;
+ goto out;
#endif
if (!pmath_emulate) {
i = SIGFPE;
@@ -422,7 +416,7 @@ restart:
i = (*pmath_emulate)(&frame);
if (i == 0) {
if (!(frame.tf_eflags & PSL_T))
- return;
+ goto out;
frame.tf_eflags &= ~PSL_T;
i = SIGTRAP;
}
@@ -435,13 +429,12 @@ restart:
break;
}
} else {
-kernel_trap:
/* kernel trap */
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(&frame, FALSE, eva);
- return;
+ goto out;
case T_DNA:
#if NNPX > 0
@@ -451,31 +444,35 @@ kernel_trap:
* registered such use.
*/
if (npxdna())
- return;
+ goto out;
#endif
break;
- case T_PROTFLT: /* general protection fault */
- case T_SEGNPFLT: /* segment not present fault */
/*
- * Invalid segment selectors and out of bounds
- * %eip's and %esp's can be set up in user mode.
- * This causes a fault in kernel mode when the
- * kernel tries to return to user mode. We want
- * to get this fault so that we can fix the
- * problem here and not have to check all the
- * selectors and pointers when the user changes
- * them.
+ * The following two traps can happen in
+ * vm86 mode, and, if so, we want to handle
+ * them specially.
*/
-#define MAYBE_DORETI_FAULT(where, whereto) \
- do { \
- if (frame.tf_eip == (int)where) { \
- frame.tf_eip = (int)whereto; \
- return; \
- } \
- } while (0)
-
- if (intr_nesting_level == 0) {
+ case T_PROTFLT: /* general protection fault */
+ case T_STKFLT: /* stack fault */
+ if (frame.tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)&frame);
+ if (i != 0)
+ /*
+ * returns to original process
+ */
+ vm86_trap((struct vm86frame *)&frame);
+ goto out;
+ }
+ /* FALL THROUGH */
+
+ case T_SEGNPFLT: /* segment not present fault */
+ if (in_vm86call)
+ break;
+
+ if (intr_nesting_level != 0)
+ break;
+
/*
* Invalid %fs's and %gs's can be created using
* procfs or PT_SETREGS or by invalidating the
@@ -488,20 +485,38 @@ kernel_trap:
if (frame.tf_eip == (int)cpu_switch_load_gs) {
curpcb->pcb_gs = 0;
psignal(p, SIGBUS);
- return;
+ goto out;
+ }
+
+ /*
+ * Invalid segment selectors and out of bounds
+ * %eip's and %esp's can be set up in user mode.
+ * This causes a fault in kernel mode when the
+ * kernel tries to return to user mode. We want
+ * to get this fault so that we can fix the
+ * problem here and not have to check all the
+ * selectors and pointers when the user changes
+ * them.
+ */
+ if (frame.tf_eip == (int)doreti_iret) {
+ frame.tf_eip = (int)doreti_iret_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_ds) {
+ frame.tf_eip = (int)doreti_popl_ds_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_es) {
+ frame.tf_eip = (int)doreti_popl_es_fault;
+ goto out;
}
- MAYBE_DORETI_FAULT(doreti_iret,
- doreti_iret_fault);
- MAYBE_DORETI_FAULT(doreti_popl_ds,
- doreti_popl_ds_fault);
- MAYBE_DORETI_FAULT(doreti_popl_es,
- doreti_popl_es_fault);
- MAYBE_DORETI_FAULT(doreti_popl_fs,
- doreti_popl_fs_fault);
+ if (frame.tf_eip == (int)doreti_popl_fs) {
+ frame.tf_eip = (int)doreti_popl_fs_fault;
+ goto out;
+ }
if (curpcb && curpcb->pcb_onfault) {
frame.tf_eip = (int)curpcb->pcb_onfault;
- return;
- }
+ goto out;
}
break;
@@ -517,7 +532,7 @@ kernel_trap:
*/
if (frame.tf_eflags & PSL_NT) {
frame.tf_eflags &= ~PSL_NT;
- return;
+ goto out;
}
break;
@@ -529,7 +544,7 @@ kernel_trap:
* silently until the syscall handler has
* saved the flags.
*/
- return;
+ goto out;
}
if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
/*
@@ -537,7 +552,7 @@ kernel_trap:
* flags. Stop single stepping it.
*/
frame.tf_eflags &= ~PSL_T;
- return;
+ goto out;
}
/*
* Ignore debug register trace traps due to
@@ -549,13 +564,13 @@ kernel_trap:
* in kernel space because that is useful when
* debugging the kernel.
*/
- if (user_dbreg_trap()) {
+ if (user_dbreg_trap() && !in_vm86call) {
/*
* Reset breakpoint bits because the
* processor doesn't
*/
load_dr6(rdr6() & 0xfffffff0);
- return;
+ goto out;
}
/*
* Fall through (TRCTRAP kernel mode, kernel address)
@@ -567,28 +582,19 @@ kernel_trap:
*/
#ifdef DDB
if (kdb_trap (type, 0, &frame))
- return;
+ goto out;
#endif
break;
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
-#ifndef TIMER_FREQ
-# define TIMER_FREQ 1193182
-#endif
- handle_powerfail:
- {
- static unsigned lastalert = 0;
-
- if(time_second - lastalert > 10)
- {
+ if (time_second - lastalert > 10) {
log(LOG_WARNING, "NMI: power fail\n");
sysbeep(TIMER_FREQ/880, hz);
lastalert = time_second;
- }
- return;
}
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -602,16 +608,16 @@ kernel_trap:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi == 0)
- return;
+ goto out;
/* FALL THROUGH */
#endif /* POWERFAIL_NMI */
#endif /* NISA > 0 */
}
trap_fatal(&frame, eva);
- return;
+ goto out;
}
/* Translate fault for emulators (e.g. Linux) */
@@ -630,8 +636,10 @@ kernel_trap:
}
#endif
-out:
+user:
userret(p, &frame, sticks, 1);
+out:
+ mtx_exit(&Giant, MTX_DEF);
}
#ifdef notyet
@@ -769,10 +777,8 @@ trap_pfault(frame, usermode, eva)
* fault.
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) {
- frame->tf_trapno = T_PRIVINFLT;
+ if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
return -2;
- }
#endif
if (usermode)
goto nogo;
@@ -869,8 +875,7 @@ trap_fatal(frame, eva)
frame->tf_eflags & PSL_VM ? "vm86" :
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -917,26 +922,6 @@ trap_fatal(frame, eva)
} else {
printf("Idle\n");
}
- printf("interrupt mask = ");
- if ((cpl & net_imask) == net_imask)
- printf("net ");
- if ((cpl & tty_imask) == tty_imask)
- printf("tty ");
- if ((cpl & bio_imask) == bio_imask)
- printf("bio ");
- if ((cpl & cam_imask) == cam_imask)
- printf("cam ");
- if (cpl == 0)
- printf("none");
-#ifdef SMP
-/**
- * XXX FIXME:
- * we probably SHOULD have stopped the other CPUs before now!
- * another CPU COULD have been touching cpl at this moment...
- */
- printf(" <- SMP: XXX");
-#endif
- printf("\n");
#ifdef KDB
if (kdb_trap(&psl))
@@ -973,8 +958,7 @@ dblfault_handler()
printf("esp = 0x%x\n", common_tss.tss_esp);
printf("ebp = 0x%x\n", common_tss.tss_ebp);
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -1048,12 +1032,14 @@ syscall2(frame)
int error;
int narg;
int args[8];
- int have_mplock = 0;
+ int have_giant = 0;
u_int code;
+ atomic_add_int(&cnt.v_syscall, 1);
+
#ifdef DIAGNOSTIC
if (ISPL(frame.tf_cs) != SEL_UPL) {
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
panic("syscall");
/* NOT REACHED */
}
@@ -1075,9 +1061,9 @@ syscall2(frame)
/*
* The prep code is not MP aware.
*/
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
- rel_mplock();
+ mtx_exit(&Giant, MTX_DEF);
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@@ -1114,8 +1100,8 @@ syscall2(frame)
*/
if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
ktrsyscall(p->p_tracep, code, narg, args);
@@ -1129,15 +1115,15 @@ syscall2(frame)
* we are ktracing
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsyscall(p->p_tracep, code, narg, args);
}
@@ -1192,9 +1178,9 @@ bad:
* Traced syscall. trapsignal() is not MP aware.
*/
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
@@ -1203,13 +1189,13 @@ bad:
/*
* Handle reschedule and other end-of-syscall issues
*/
- have_mplock = userret(p, &frame, sticks, have_mplock);
+ have_giant = userret(p, &frame, sticks, have_giant);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
}
@@ -1225,27 +1211,66 @@ bad:
/*
* Release the MP lock if we had to get it
*/
- if (have_mplock)
- rel_mplock();
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
+
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
+
+void
+ast(frame)
+ struct trapframe frame;
+{
+ struct proc *p = CURPROC;
+ u_quad_t sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
+ astoff();
+ atomic_add_int(&cnt.v_soft, 1);
+ if (p->p_flag & P_OWEUPC) {
+ mtx_enter(&Giant, MTX_DEF);
+ p->p_flag &= ~P_OWEUPC;
+ addupc_task(p, p->p_stats->p_prof.pr_addr,
+ p->p_stats->p_prof.pr_ticks);
+}
+ if (userret(p, &frame, sticks, mtx_owned(&Giant)) != 0)
+ mtx_exit(&Giant, MTX_DEF);
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode. MP lock is held on entry and should be
- * held on return.
+ * directly into user mode. Giant is not held on entry, and must not
+ * be held on return.
*/
void
fork_return(p, frame)
struct proc *p;
struct trapframe frame;
{
+ int have_giant;
+
frame.tf_eax = 0; /* Child returns zero */
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0, 1);
+ have_giant = userret(p, &frame, 0, mtx_owned(&Giant));
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
+ }
#endif
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
}
diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c
index 15044ab..724f3c2 100644
--- a/sys/i386/i386/tsc.c
+++ b/sys/i386/i386/tsc.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/i386/i386/vm86bios.s b/sys/i386/i386/vm86bios.s
index 6a11c26..14b4259 100644
--- a/sys/i386/i386/vm86bios.s
+++ b/sys/i386/i386/vm86bios.s
@@ -62,11 +62,9 @@ ENTRY(vm86_bioscall)
pushl %edi
pushl %gs
-#ifdef SMP
pushl %edx
- MP_LOCK /* Get global lock */
+ call __mtx_enter_giant_def /* Get global lock */
popl %edx
-#endif
#if NNPX > 0
movl _curproc,%ecx
@@ -135,13 +133,9 @@ ENTRY(vm86_bioscall)
/*
* Return via _doreti
*/
-#ifdef SMP
- pushl _cpl /* cpl to restore */
-#else
- pushl _cpl /* cpl to restore */
-#endif
subl $4,%esp /* dummy unit */
incb _intr_nesting_level
+ call __mtx_exit_giant_def
MEXITCOUNT
jmp _doreti
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index cfb6cee..831ab3b 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -57,12 +57,14 @@
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <machine/clock.h>
#include <machine/cpu.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#ifdef SMP
#include <machine/smp.h>
#endif
@@ -177,9 +179,8 @@ cpu_fork(p1, p2, flags)
* pcb2->pcb_onfault: cloned above (always NULL here?).
*/
-#ifdef SMP
- pcb2->pcb_mpnest = 1;
-#endif
+ pcb2->pcb_schednest = 0;
+
/*
* XXX don't copy the i/o pages. this should probably be fixed.
*/
@@ -256,8 +257,11 @@ cpu_exit(p)
reset_dbregs();
pcb->pcb_flags &= ~PCB_DBREGS;
}
+ mtx_enter(&sched_lock, MTX_SPIN);
+ mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH);
+ mtx_assert(&Giant, MA_NOTOWNED);
cnt.v_swtch++;
- cpu_switch(p);
+ cpu_switch();
panic("cpu_exit");
}
@@ -406,17 +410,10 @@ vunmapbuf(bp)
static void
cpu_reset_proxy()
{
- u_int saved_mp_lock;
cpu_reset_proxy_active = 1;
while (cpu_reset_proxy_active == 1)
- ; /* Wait for other cpu to disable interupts */
- saved_mp_lock = mp_lock;
- mp_lock = 1;
- printf("cpu_reset_proxy: Grabbed mp lock for BSP\n");
- cpu_reset_proxy_active = 3;
- while (cpu_reset_proxy_active == 3)
- ; /* Wait for other cpu to enable interrupts */
+ ; /* Wait for other cpu to see that we've started */
stop_cpus((1<<cpu_reset_proxyid));
printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
DELAY(1000000);
@@ -453,6 +450,7 @@ cpu_reset()
cpu_reset_proxyid = cpuid;
cpustop_restartfunc = cpu_reset_proxy;
+ cpu_reset_proxy_active = 0;
printf("cpu_reset: Restarting BSP\n");
started_cpus = (1<<0); /* Restart CPU #0 */
@@ -461,17 +459,9 @@ cpu_reset()
cnt++; /* Wait for BSP to announce restart */
if (cpu_reset_proxy_active == 0)
printf("cpu_reset: Failed to restart BSP\n");
- __asm __volatile("cli" : : : "memory");
+ enable_intr();
cpu_reset_proxy_active = 2;
- cnt = 0;
- while (cpu_reset_proxy_active == 2 && cnt < 10000000)
- cnt++; /* Do nothing */
- if (cpu_reset_proxy_active == 2) {
- printf("cpu_reset: BSP did not grab mp lock\n");
- cpu_reset_real(); /* XXX: Bogus ? */
- }
- cpu_reset_proxy_active = 4;
- __asm __volatile("sti" : : : "memory");
+
while (1);
/* NOTREACHED */
}
@@ -553,7 +543,7 @@ vm_page_zero_idle()
static int free_rover;
static int zero_state;
vm_page_t m;
- int s;
+ int s, intrsave;
/*
* Attempt to maintain approximately 1/2 of our free pages in a
@@ -569,11 +559,10 @@ vm_page_zero_idle()
if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
return(0);
-#ifdef SMP
- if (try_mplock()) {
-#endif
+ if (mtx_try_enter(&Giant, MTX_DEF)) {
s = splvm();
- __asm __volatile("sti" : : : "memory");
+ intrsave = save_intr();
+ enable_intr();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
if (m != NULL && (m->flags & PG_ZERO) == 0) {
@@ -595,14 +584,10 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- __asm __volatile("cli" : : : "memory");
-#ifdef SMP
- rel_mplock();
-#endif
+ restore_intr(intrsave);
+ mtx_exit(&Giant, MTX_DEF);
return (1);
-#ifdef SMP
}
-#endif
/*
* We have to enable interrupts for a moment if the try_mplock fails
* in order to potentially take an IPI. XXX this should be in
diff --git a/sys/i386/include/asnames.h b/sys/i386/include/asnames.h
index 3ccbee6..efdb0f9 100644
--- a/sys/i386/include/asnames.h
+++ b/sys/i386/include/asnames.h
@@ -131,6 +131,7 @@
#define _Xintr7 Xintr7
#define _Xintr8 Xintr8
#define _Xintr9 Xintr9
+#define _Xtintr0 Xtintr0
#define _Xinvltlb Xinvltlb
#define _Xrendezvous Xrendezvous
#define _Xmchk Xmchk
@@ -155,6 +156,7 @@
#define _arith_invalid arith_invalid
#define _arith_overflow arith_overflow
#define _arith_underflow arith_underflow
+#define _ast ast
#define _bcopy bcopy
#define _bcopy_vector bcopy_vector
#define _bigJump bigJump
@@ -184,7 +186,6 @@
#define _cnt cnt
#define _copyin_vector copyin_vector
#define _copyout_vector copyout_vector
-#define _cpl cpl
#define _cpl_lock cpl_lock
#define _cpu cpu
#define _cpu0prvpage cpu0prvpage
@@ -222,6 +223,7 @@
#define _get_isrlock get_isrlock
#define _get_mplock get_mplock
#define _get_syscall_lock get_syscall_lock
+#define _Giant Giant
#define _idle idle
#define _ihandlers ihandlers
#define _imen imen
@@ -232,13 +234,11 @@
#define _intr_countp intr_countp
#define _intr_handler intr_handler
#define _intr_mask intr_mask
-#define _intr_nesting_level intr_nesting_level
#define _intr_unit intr_unit
#define _intrcnt intrcnt
#define _intrnames intrnames
#define _invltlb_ok invltlb_ok
#define _ioapic ioapic
-#define _ipending ipending
#define _isr_lock isr_lock
#define _kernelname kernelname
#define _lapic lapic
@@ -249,6 +249,8 @@
#define _mp_gdtbase mp_gdtbase
#define _mp_lock mp_lock
#define _mp_ncpus mp_ncpus
+#define __mtx_enter_giant_def _mtx_enter_giant_def
+#define __mtx_exit_giant_def _mtx_exit_giant_def
#define _mul64 mul64
#define _net_imask net_imask
#define _netisr netisr
@@ -281,6 +283,8 @@
#define _round_reg round_reg
#define _s_lock s_lock
#define _s_unlock s_unlock
+#define _sched_ithd sched_ithd
+#define _sched_lock sched_lock
#define _set_precision_flag_down set_precision_flag_down
#define _set_precision_flag_up set_precision_flag_up
#define _set_user_ldt set_user_ldt
@@ -293,6 +297,7 @@
#define _softclock softclock
#define _softnet_imask softnet_imask
#define _softtty_imask softtty_imask
+#define _spending spending
#define _spl0 spl0
#define _splz splz
#define _ss_lock ss_lock
@@ -326,9 +331,9 @@
#if defined(SMP) || defined(__ELF__)
#ifdef SMP
-#define FS(x) %fs:gd_ ## x
+#define FS(x) %fs:gd_ ## x
#else
-#define FS(x) x
+#define FS(x) x
#endif
#define _common_tss FS(common_tss)
@@ -337,6 +342,8 @@
#define _cpu_lockid FS(cpu_lockid)
#define _curpcb FS(curpcb)
#define _curproc FS(curproc)
+#define _prevproc FS(prevproc)
+#define _idleproc FS(idleproc)
#define _astpending FS(astpending)
#define _currentldt FS(currentldt)
#define _inside_intr FS(inside_intr)
@@ -353,9 +360,16 @@
#define _ss_eflags FS(ss_eflags)
#define _switchticks FS(switchticks)
#define _switchtime FS(switchtime)
+#define _intr_nesting_level FS(intr_nesting_level)
#define _tss_gdt FS(tss_gdt)
#define _idlestack FS(idlestack)
#define _idlestack_top FS(idlestack_top)
+#define _witness_spin_check FS(witness_spin_check)
+/*
+#define _ktr_idx FS(ktr_idx)
+#define _ktr_buf FS(ktr_buf)
+#define _ktr_buf_data FS(ktr_buf_data)
+*/
#endif
diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h
index ffabf7f..18822b8 100644
--- a/sys/i386/include/cpu.h
+++ b/sys/i386/include/cpu.h
@@ -46,6 +46,7 @@
#include <machine/psl.h>
#include <machine/frame.h>
#include <machine/segments.h>
+#include <machine/globals.h>
/*
* definitions of cpu-dependent requirements
@@ -86,7 +87,9 @@
* added, we will have an atomicy problem. The type of atomicy we need is
* a non-locked orl.
*/
-#define need_resched() do { astpending = AST_RESCHED|AST_PENDING; } while (0)
+#define need_resched() do { \
+ PCPU_SET(astpending, AST_RESCHED|AST_PENDING); \
+} while (0)
#define resched_wanted() (astpending & AST_RESCHED)
/*
@@ -109,8 +112,9 @@
* it off (asynchronous need_resched() conflicts are not critical).
*/
#define signotify(p) aston()
-
-#define aston() do { astpending |= AST_PENDING; } while (0)
+#define aston() do { \
+ PCPU_SET(astpending, astpending | AST_PENDING); \
+} while (0)
#define astoff()
/*
@@ -135,7 +139,9 @@
#ifdef _KERNEL
extern char btext[];
extern char etext[];
+#ifndef intr_nesting_level
extern u_char intr_nesting_level;
+#endif
void fork_trampoline __P((void));
void fork_return __P((struct proc *, struct trapframe));
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index 9a4052f..39868df 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -86,20 +86,29 @@ static __inline void
disable_intr(void)
{
__asm __volatile("cli" : : : "memory");
-#ifdef SMP
- MPINTR_LOCK();
-#endif
}
static __inline void
enable_intr(void)
{
-#ifdef SMP
- MPINTR_UNLOCK();
-#endif
__asm __volatile("sti");
}
+static __inline u_int
+save_intr(void)
+{
+ u_int ef;
+
+ __asm __volatile("pushfl; popl %0" : "=r" (ef));
+ return (ef);
+}
+
+static __inline void
+restore_intr(u_int ef)
+{
+ __asm __volatile("pushl %0; popfl" : : "r" (ef) : "memory" );
+}
+
#define HAVE_INLINE_FFS
static __inline int
diff --git a/sys/i386/include/globaldata.h b/sys/i386/include/globaldata.h
index 58bd9cf..440da60 100644
--- a/sys/i386/include/globaldata.h
+++ b/sys/i386/include/globaldata.h
@@ -26,6 +26,20 @@
* $FreeBSD$
*/
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/pmap.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
+
/*
* This structure maps out the global data that needs to be kept on a
* per-cpu basis. genassym uses this to generate offsets for the assembler
@@ -41,11 +55,14 @@
struct globaldata {
struct privatespace *gd_prvspace; /* self-reference */
struct proc *gd_curproc;
+ struct proc *gd_prevproc;
struct proc *gd_npxproc;
struct pcb *gd_curpcb;
+ struct proc *gd_idleproc;
struct timeval gd_switchtime;
struct i386tss gd_common_tss;
int gd_switchticks;
+ int gd_intr_nesting_level;
struct segment_descriptor gd_common_tssd;
struct segment_descriptor *gd_tss_gdt;
#ifdef USER_LDT
@@ -67,8 +84,22 @@ struct globaldata {
unsigned *gd_prv_PADDR1;
#endif
u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+ int gd_witness_spin_check;
+#ifdef KTR_PERCPU
+#ifdef KTR
+ volatile int gd_ktr_idx;
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[KTR_SIZE];
+#endif
+#endif
};
+extern struct globaldata globaldata;
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
#ifdef SMP
/*
* This is the upper (0xff800000) address space layout that is per-cpu.
@@ -93,3 +124,5 @@ struct privatespace {
extern struct privatespace SMP_prvspace[];
#endif
+
+#endif /* ! _MACHINE_GLOBALDATA_H_ */
diff --git a/sys/i386/include/globals.h b/sys/i386/include/globals.h
index ae05d56..71bbbd5 100644
--- a/sys/i386/include/globals.h
+++ b/sys/i386/include/globals.h
@@ -74,6 +74,14 @@
__asm("movl %0,%%fs:gd_" #name : : "r" (val)); \
}
+static __inline int
+_global_globaldata(void)
+{
+ int val;
+ __asm("movl %%fs:globaldata,%0" : "=r" (val));
+ return (val);
+}
+
#if defined(SMP) || defined(KLD_MODULE) || defined(ACTUALLY_LKM_NOT_KERNEL)
/*
* The following set of macros works for UP kernel as well, but for maximum
@@ -82,18 +90,21 @@
* portability between UP and SMP kernels.
*/
#define curproc GLOBAL_RVALUE_NV(curproc, struct proc *)
+#define prevproc GLOBAL_RVALUE_NV(prevproc, struct proc *)
#define curpcb GLOBAL_RVALUE_NV(curpcb, struct pcb *)
-#define npxproc GLOBAL_LVALUE(npxproc, struct proc *)
+#define npxproc GLOBAL_RVALUE_NV(npxproc, struct proc *)
+#define idleproc GLOBAL_RVALUE_NV(idleproc, struct proc *)
#define common_tss GLOBAL_LVALUE(common_tss, struct i386tss)
#define switchtime GLOBAL_LVALUE(switchtime, struct timeval)
#define switchticks GLOBAL_LVALUE(switchticks, int)
+#define intr_nesting_level GLOBAL_RVALUE(intr_nesting_level, u_char)
#define common_tssd GLOBAL_LVALUE(common_tssd, struct segment_descriptor)
#define tss_gdt GLOBAL_LVALUE(tss_gdt, struct segment_descriptor *)
-#define astpending GLOBAL_LVALUE(astpending, u_int)
+#define astpending GLOBAL_RVALUE(astpending, u_int)
#ifdef USER_LDT
-#define currentldt GLOBAL_LVALUE(currentldt, int)
+#define currentldt GLOBAL_RVALUE(currentldt, int)
#endif
#ifdef SMP
@@ -109,19 +120,32 @@
#define prv_CADDR3 GLOBAL_RVALUE(prv_CADDR3, caddr_t)
#define prv_PADDR1 GLOBAL_RVALUE(prv_PADDR1, unsigned *)
#endif
+
+#define witness_spin_check GLOBAL_RVALUE(witness_spin_check, int)
+
#endif /*UP kernel*/
GLOBAL_FUNC(curproc)
+GLOBAL_FUNC(prevproc)
GLOBAL_FUNC(astpending)
GLOBAL_FUNC(curpcb)
GLOBAL_FUNC(npxproc)
+GLOBAL_FUNC(idleproc)
GLOBAL_FUNC(common_tss)
GLOBAL_FUNC(switchtime)
GLOBAL_FUNC(switchticks)
+GLOBAL_FUNC(intr_nesting_level)
GLOBAL_FUNC(common_tssd)
GLOBAL_FUNC(tss_gdt)
+/* XXX */
+#ifdef KTR_PERCPU
+GLOBAL_FUNC(ktr_idx)
+GLOBAL_FUNC(ktr_buf)
+GLOBAL_FUNC(ktr_buf_data)
+#endif
+
#ifdef USER_LDT
GLOBAL_FUNC(currentldt)
#endif
@@ -140,7 +164,17 @@ GLOBAL_FUNC(prv_CADDR3)
GLOBAL_FUNC(prv_PADDR1)
#endif
-#define SET_CURPROC(x) (_global_curproc_set_nv((int)x))
+GLOBAL_FUNC(witness_spin_check)
+
+#ifdef SMP
+#define GLOBALDATA GLOBAL_RVALUE(globaldata, struct globaldata *)
+#else
+#define GLOBALDATA (&globaldata)
+#endif
+
+#define CURPROC curproc
+
+#define PCPU_SET(name, value) (_global_##name##_set((int)value))
#endif /* _KERNEL */
diff --git a/sys/i386/include/ipl.h b/sys/i386/include/ipl.h
index 54d3f4b..08726df 100644
--- a/sys/i386/include/ipl.h
+++ b/sys/i386/include/ipl.h
@@ -43,9 +43,19 @@
#endif
/*
+ * Software interrupt level. We treat the software interrupt as a
+ * single interrupt at a fictive hardware interrupt level.
+ */
+#define SOFTINTR (NHWI + 0)
+
+/*
* Software interrupt bit numbers in priority order. The priority only
* determines which swi will be dispatched next; a higher priority swi
* may be dispatched when a nested h/w interrupt handler returns.
+ *
+ * XXX FIXME: There's no longer a relation between the SWIs and the
+ * HWIs, so it makes more sense for these values to start at 0, but
+ * there's lots of code which expects them to start at NHWI.
*/
#define SWI_TTY (NHWI + 0)
#define SWI_NET (NHWI + 1)
@@ -104,12 +114,9 @@
#ifdef notyet /* in <sys/interrupt.h> until pci drivers stop hacking on them */
extern unsigned bio_imask; /* group of interrupts masked with splbio() */
#endif
-extern unsigned cpl; /* current priority level mask */
-#ifdef SMP
-extern unsigned cil; /* current INTerrupt level mask */
-#endif
+
extern volatile unsigned idelayed; /* interrupts to become pending */
-extern volatile unsigned ipending; /* active interrupts masked by cpl */
+extern volatile unsigned spending; /* pending software interrupts */
#ifdef notyet /* in <sys/systm.h> until pci drivers stop hacking on them */
extern unsigned net_imask; /* group of interrupts masked with splimp() */
extern unsigned stat_imask; /* interrupts masked with splstatclock() */
diff --git a/sys/i386/include/lock.h b/sys/i386/include/lock.h
index 534f77e..b4af09d 100644
--- a/sys/i386/include/lock.h
+++ b/sys/i386/include/lock.h
@@ -37,21 +37,6 @@
#define MPLOCKED lock ;
/*
- * Some handy macros to allow logical organization.
- */
-
-#define MP_LOCK call _get_mplock
-
-#define MP_TRYLOCK \
- pushl $_mp_lock ; /* GIANT_LOCK */ \
- call _MPtrylock ; /* try to get lock */ \
- add $4, %esp
-
-#define MP_RELLOCK \
- movl $_mp_lock,%edx ; /* GIANT_LOCK */ \
- call _MPrellock_edx
-
-/*
* Protects the IO APIC and apic_imen as a critical region.
*/
#define IMASK_LOCK \
@@ -66,7 +51,8 @@
#define MPLOCKED /* NOP */
-#define MP_LOCK /* NOP */
+#define IMASK_LOCK /* NOP */
+#define IMASK_UNLOCK /* NOP */
#endif /* SMP */
@@ -77,32 +63,15 @@
#include <machine/smptests.h> /** xxx_LOCK */
/*
- * Locks regions protected in UP kernel via cli/sti.
- */
-#ifdef USE_MPINTRLOCK
-#define MPINTR_LOCK() s_lock(&mpintr_lock)
-#define MPINTR_UNLOCK() s_unlock(&mpintr_lock)
-#else
-#define MPINTR_LOCK()
-#define MPINTR_UNLOCK()
-#endif /* USE_MPINTRLOCK */
-
-/*
* sio/cy lock.
* XXX should rc (RISCom/8) use this?
*/
#ifdef USE_COMLOCK
#define COM_LOCK() s_lock(&com_lock)
#define COM_UNLOCK() s_unlock(&com_lock)
-#define COM_DISABLE_INTR() \
- { __asm __volatile("cli" : : : "memory"); COM_LOCK(); }
-#define COM_ENABLE_INTR() \
- { COM_UNLOCK(); __asm __volatile("sti"); }
#else
#define COM_LOCK()
#define COM_UNLOCK()
-#define COM_DISABLE_INTR() disable_intr()
-#define COM_ENABLE_INTR() enable_intr()
#endif /* USE_COMLOCK */
/*
@@ -112,22 +81,13 @@
#ifdef USE_CLOCKLOCK
#define CLOCK_LOCK() s_lock(&clock_lock)
#define CLOCK_UNLOCK() s_unlock(&clock_lock)
-#define CLOCK_DISABLE_INTR() \
- { __asm __volatile("cli" : : : "memory"); CLOCK_LOCK(); }
-#define CLOCK_ENABLE_INTR() \
- { CLOCK_UNLOCK(); __asm __volatile("sti"); }
#else
#define CLOCK_LOCK()
#define CLOCK_UNLOCK()
-#define CLOCK_DISABLE_INTR() disable_intr()
-#define CLOCK_ENABLE_INTR() enable_intr()
#endif /* USE_CLOCKLOCK */
#else /* SMP */
-#define MPINTR_LOCK()
-#define MPINTR_UNLOCK()
-
#define COM_LOCK()
#define COM_UNLOCK()
#define CLOCK_LOCK()
@@ -168,6 +128,7 @@ extern struct simplelock clock_lock;
extern struct simplelock com_lock;
extern struct simplelock mpintr_lock;
extern struct simplelock mcount_lock;
+extern struct simplelock panic_lock;
#if !defined(SIMPLELOCK_DEBUG) && NCPUS > 1
/*
diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h
index 61c5ecf..95b5759 100644
--- a/sys/i386/include/mptable.h
+++ b/sys/i386/include/mptable.h
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/i386/include/mutex.h b/sys/i386/include/mutex.h
new file mode 100644
index 0000000..ef0c963
--- /dev/null
+++ b/sys/i386/include/mutex.h
@@ -0,0 +1,786 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_MUTEX_H_
+#define _MACHINE_MUTEX_H_
+
+#ifndef LOCORE
+
+#include <sys/ktr.h>
+#include <sys/queue.h>
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine/globals.h>
+
+/*
+ * If kern_mutex.c is being built, compile non-inlined versions of various
+ * functions so that kernel modules can use them.
+ */
+#ifndef _KERN_MUTEX_C_
+#define _MTX_INLINE static __inline
+#else
+#define _MTX_INLINE
+#endif
+
+/*
+ * Mutex flags
+ *
+ * Types
+ */
+#define MTX_DEF 0x0 /* Default (spin/sleep) */
+#define MTX_SPIN 0x1 /* Spin only lock */
+
+/* Options */
+#define MTX_RLIKELY 0x4 /* (opt) Recursion likely */
+#define MTX_NORECURSE 0x8 /* No recursion possible */
+#define MTX_NOSPIN 0x10 /* Don't spin before sleeping */
+#define MTX_NOSWITCH 0x20 /* Do not switch on release */
+#define MTX_FIRST 0x40 /* First spin lock holder */
+#define MTX_TOPHALF 0x80 /* Interrupts not disabled on spin */
+
+/* options that should be passed on to mtx_enter_hard, mtx_exit_hard */
+#define MTX_HARDOPTS (MTX_SPIN | MTX_FIRST | MTX_TOPHALF | MTX_NOSWITCH)
+
+/* Flags/value used in mtx_lock */
+#define MTX_RECURSE 0x01 /* (non-spin) lock held recursively */
+#define MTX_CONTESTED 0x02 /* (non-spin) lock contested */
+#define MTX_FLAGMASK ~(MTX_RECURSE | MTX_CONTESTED)
+#define MTX_UNOWNED 0x8 /* Cookie for free mutex */
+
+struct proc; /* XXX */
+
+/*
+ * Sleep/spin mutex
+ */
+struct mtx {
+ volatile u_int mtx_lock; /* lock owner/gate/flags */
+ volatile u_short mtx_recurse; /* number of recursive holds */
+ u_short mtx_f1;
+ u_int mtx_savefl; /* saved flags (for spin locks) */
+ char *mtx_description;
+ TAILQ_HEAD(, proc) mtx_blocked;
+ LIST_ENTRY(mtx) mtx_contested;
+ struct mtx *mtx_next; /* all locks in system */
+ struct mtx *mtx_prev;
+#ifdef SMP_DEBUG
+ /* If you add anything here, adjust the mtxf_t definition below */
+ struct witness *mtx_witness;
+ LIST_ENTRY(mtx) mtx_held;
+ char *mtx_file;
+ int mtx_line;
+#endif /* SMP_DEBUG */
+};
+
+typedef struct mtx mtx_t;
+
+/*
+ * Filler for structs which need to remain the same size
+ * whether or not SMP_DEBUG is turned on.
+ */
+typedef struct mtxf {
+#ifdef SMP_DEBUG
+ char mtxf_data[0];
+#else
+ char mtxf_data[4*sizeof(void *) + sizeof(int)];
+#endif
+} mtxf_t;
+
+#define mp_fixme(string)
+
+#ifdef _KERNEL
+/* Misc */
+#define CURTHD ((u_int)CURPROC) /* Current thread ID */
+
+/* Prototypes */
+void mtx_init(mtx_t *m, char *description, int flag);
+void mtx_enter_hard(mtx_t *, int type, int flags);
+void mtx_exit_hard(mtx_t *, int type);
+void mtx_destroy(mtx_t *m);
+
+#if (defined(KLD_MODULE) || defined(_KERN_MUTEX_C_))
+void mtx_enter(mtx_t *mtxp, int type);
+int mtx_try_enter(mtx_t *mtxp, int type);
+void mtx_exit(mtx_t *mtxp, int type);
+#endif
+
+/* Global locks */
+extern mtx_t sched_lock;
+extern mtx_t Giant;
+
+/*
+ * Used to replace return with an exit Giant and return.
+ */
+
+#define EGAR(a) \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return (a); \
+} while (0)
+
+#define VEGAR \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return; \
+} while (0)
+
+#define DROP_GIANT() \
+do { \
+ int _giantcnt; \
+ WITNESS_SAVE_DECL(Giant); \
+ \
+ WITNESS_SAVE(&Giant, Giant); \
+ for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \
+ mtx_exit(&Giant, MTX_DEF)
+
+#define PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant); \
+} while (0)
+
+#define PARTIAL_PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant)
+
+
+/*
+ * Debugging
+ */
+#ifndef SMP_DEBUG
+#define mtx_assert(m, what)
+#else /* SMP_DEBUG */
+
+#define MA_OWNED 1
+#define MA_NOTOWNED 2
+#define mtx_assert(m, what) { \
+ switch ((what)) { \
+ case MA_OWNED: \
+ ASS(mtx_owned((m))); \
+ break; \
+ case MA_NOTOWNED: \
+ ASS(!mtx_owned((m))); \
+ break; \
+ default: \
+ panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__); \
+ } \
+}
+
+#ifdef INVARIANTS
+#define ASS(ex) MPASS(ex)
+#define MPASS(ex) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ #ex, __FILE__, __LINE__)
+#define MPASS2(ex, what) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ what, __FILE__, __LINE__)
+
+#ifdef MTX_STRS
+char STR_IEN[] = "fl & 0x200";
+char STR_IDIS[] = "!(fl & 0x200)";
+#else /* MTX_STRS */
+extern char STR_IEN[];
+extern char STR_IDIS[];
+#endif /* MTX_STRS */
+#define ASS_IEN MPASS2(read_eflags() & 0x200, STR_IEN)
+#define ASS_IDIS MPASS2((read_eflags() & 0x200) == 0, STR_IDIS)
+#endif /* INVARIANTS */
+
+#endif /* SMP_DEBUG */
+
+#if !defined(SMP_DEBUG) || !defined(INVARIANTS)
+#define ASS(ex)
+#define MPASS(ex)
+#define MPASS2(ex, where)
+#define ASS_IEN
+#define ASS_IDIS
+#endif /* !defined(SMP_DEBUG) || !defined(INVARIANTS) */
+
+#ifdef WITNESS
+#ifndef SMP_DEBUG
+#error WITNESS requires SMP_DEBUG
+#endif /* SMP_DEBUG */
+#define WITNESS_ENTER(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_enter((m), (f), __FILE__, __LINE__)
+#define WITNESS_EXIT(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_exit((m), (f), __FILE__, __LINE__)
+
+#define WITNESS_SLEEP(check, m) witness_sleep(check, (m), __FILE__, __LINE__)
+#define WITNESS_SAVE_DECL(n) \
+ char * __CONCAT(n, __wf); \
+ int __CONCAT(n, __wl)
+
+#define WITNESS_SAVE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_save(m, &__CONCAT(n, __wf), &__CONCAT(n, __wl)); \
+} while (0)
+
+#define WITNESS_RESTORE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_restore(m, __CONCAT(n, __wf), __CONCAT(n, __wl)); \
+} while (0)
+
+void witness_init(mtx_t *, int flag);
+void witness_destroy(mtx_t *);
+void witness_enter(mtx_t *, int, char *, int);
+void witness_try_enter(mtx_t *, int, char *, int);
+void witness_exit(mtx_t *, int, char *, int);
+void witness_display(void(*)(const char *fmt, ...));
+void witness_list(struct proc *);
+int witness_sleep(int, mtx_t *, char *, int);
+void witness_save(mtx_t *, char **, int *);
+void witness_restore(mtx_t *, char *, int);
+#else /* WITNESS */
+#define WITNESS_ENTER(m, flag)
+#define WITNESS_EXIT(m, flag)
+#define WITNESS_SLEEP(check, m)
+#define WITNESS_SAVE_DECL(n)
+#define WITNESS_SAVE(m, n)
+#define WITNESS_RESTORE(m, n)
+
+/*
+ * flag++ is slezoid way of shutting up unused parameter warning
+ * in mtx_init()
+ */
+#define witness_init(m, flag) flag++
+#define witness_destroy(m)
+#define witness_enter(m, flag, f, l)
+#define witness_try_enter(m, flag, f, l )
+#define witness_exit(m, flag, f, l)
+#endif /* WITNESS */
+
+/*
+ * Assembly macros (for internal use only)
+ *------------------------------------------------------------------------------
+ */
+
+#define _V(x) __STRING(x)
+
+#ifndef I386_CPU
+
+/*
+ * For 486 and newer processors.
+ */
+
+/* Get a sleep lock, deal with recursion inline. */
+#define _getlock_sleep(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %3,%1;" /* Try */ \
+" jz 1f;" /* Got it */ \
+" andl $" _V(MTX_FLAGMASK) ",%%eax;" /* turn off spec bits */ \
+" cmpl %%eax,%3;" /* already have it? */ \
+" je 2f;" /* yes, recurse */ \
+" pushl %4;" \
+" pushl %5;" \
+" call mtx_enter_hard;" \
+" addl $8,%%esp;" \
+" jmp 1f;" \
+"2: lock; orl $" _V(MTX_RECURSE) ",%1;" \
+" incw %2;" \
+"1:" \
+"# getlock_sleep" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock), /* 1 */ \
+ "+m" (mtxp->mtx_recurse) /* 2 */ \
+ : "r" (tid), /* 3 (input) */ \
+ "gi" (type), /* 4 */ \
+ "g" (mtxp) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/* Get a spin lock, handle recursion inline (as the less common case) */
+#define _getlock_spin_block(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" pushfl;" \
+" cli;" \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %3,%1;" /* Try */ \
+" jz 2f;" /* got it */ \
+" pushl %4;" \
+" pushl %5;" \
+" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type, oflags) */ \
+" addl $0xc,%%esp;" \
+" jmp 1f;" \
+"2: popl %2;" /* save flags */ \
+"1:" \
+"# getlock_spin_block" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock), /* 1 */ \
+ "=m" (mtxp->mtx_savefl) /* 2 */ \
+ : "r" (tid), /* 3 (input) */ \
+ "gi" (type), /* 4 */ \
+ "g" (mtxp) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter function if
+ * we can't get it inline.
+ */
+#define _getlock_norecurse(mtxp, tid, type) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movl $" _V(MTX_UNOWNED) ",%%eax;" /* Unowned cookie */ \
+" " MPLOCKED "" \
+" cmpxchgl %2,%1;" /* Try */ \
+" jz 1f;" /* got it */ \
+" pushl %3;" \
+" pushl %4;" \
+" call mtx_enter_hard;" /* mtx_enter_hard(mtxp, type) */ \
+" addl $8,%%esp;" \
+"1:" \
+"# getlock_norecurse" \
+ : "=&a" (_res), /* 0 (dummy output) */ \
+ "+m" (mtxp->mtx_lock) /* 1 */ \
+ : "r" (tid), /* 2 (input) */ \
+ "gi" (type), /* 3 */ \
+ "g" (mtxp) /* 4 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is handled
+ * in the hard function.
+ */
+#define _exitlock_norecurse(mtxp, tid, type) ({ \
+ int _tid = (int)(tid); \
+ \
+ __asm __volatile ( \
+" " MPLOCKED "" \
+" cmpxchgl %4,%0;" /* try easy rel */ \
+" jz 1f;" /* released! */ \
+" pushl %2;" \
+" pushl %3;" \
+" call mtx_exit_hard;" \
+" addl $8,%%esp;" \
+"1:" \
+"# exitlock_norecurse" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+a" (_tid) /* 1 */ \
+ : "gi" (type), /* 2 (input) */ \
+ "g" (mtxp), /* 3 */ \
+ "r" (MTX_UNOWNED) /* 4 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+#define _exitlock(mtxp, tid, type) ({ \
+ int _tid = (int)(tid); \
+ \
+ __asm __volatile ( \
+" " MPLOCKED "" \
+" cmpxchgl %5,%0;" /* try easy rel */ \
+" jz 1f;" /* released! */ \
+" testl $" _V(MTX_RECURSE) ",%%eax;" /* recursed? */ \
+" jnz 3f;" /* handle recursion */ \
+ /* Lock not recursed and contested: do the hard way */ \
+" pushl %3;" \
+" pushl %4;" \
+" call mtx_exit_hard;" /* mtx_exit_hard(mtxp,type) */ \
+" addl $8,%%esp;" \
+" jmp 1f;" \
+ /* lock recursed, lower recursion level */ \
+"3: decw %1;" /* one less level */ \
+" jnz 1f;" /* still recursed, done */ \
+" lock; andl $~" _V(MTX_RECURSE) ",%0;" /* turn off recurse flag */ \
+"1:" \
+"# exitlock" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+m" (mtxp->mtx_recurse), /* 1 */ \
+ "+a" (_tid) /* 2 */ \
+ : "gi" (type), /* 3 (input) */ \
+ "g" (mtxp), /* 4 */ \
+ "r" (MTX_UNOWNED) /* 5 */ \
+ : "memory", "ecx", "edx" /* used */ ); \
+})
+
+/*
+ * Release a spin lock (with possible recursion).
+ *
+ * We use cmpxchgl to clear lock (instead of simple store) to flush posting
+ * buffers and make the change visible to other CPU's.
+ */
+#define _exitlock_spin(mtxp, inten1, inten2) ({ \
+ int _res; \
+ \
+ __asm __volatile ( \
+" movw %1,%%ax;" \
+" decw %%ax;" \
+" js 1f;" \
+" movw %%ax,%1;" \
+" jmp 2f;" \
+"1: movl %0,%%eax;" \
+" movl $ " _V(MTX_UNOWNED) ",%%ecx;" \
+" " inten1 ";" \
+" " MPLOCKED "" \
+" cmpxchgl %%ecx,%0;" \
+" " inten2 ";" \
+"2:" \
+"# exitlock_spin" \
+ : "+m" (mtxp->mtx_lock), /* 0 */ \
+ "+m" (mtxp->mtx_recurse), /* 1 */ \
+ "=&a" (_res) /* 2 */ \
+ : "g" (mtxp->mtx_savefl) /* 3 (used in 'inten') */ \
+ : "memory", "ecx" /* used */ ); \
+})
+
+#else /* I386_CPU */
+
+/*
+ * For 386 processors only.
+ */
+
+/* Get a sleep lock, deal with recursion inline. */
+#define _getlock_sleep(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) { \
+ if (((mp)->mtx_lock & MTX_FLAGMASK) != (tid)) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0); \
+ else { \
+ atomic_set_int(&(mp)->mtx_lock, MTX_RECURSE); \
+ (mp)->mtx_recurse++; \
+ } \
+ } \
+} while (0)
+
+/* Get a spin lock, handle recursion inline (as the less common case) */
+#define _getlock_spin_block(mp, tid, type) do { \
+ u_int _mtx_fl = read_eflags(); \
+ disable_intr(); \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _mtx_fl); \
+ else \
+ (mp)->mtx_savefl = _mtx_fl; \
+} while (0)
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter function if
+ * we can't get it inline.
+ */
+#define _getlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0); \
+} while (0)
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is handled
+ * in the hard function.
+ */
+#define _exitlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+} while (0)
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+#define _exitlock(mp, tid, type) do { \
+ if (atomic_cmpset_int(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) { \
+ if ((mp)->mtx_lock & MTX_RECURSE) { \
+ if (--((mp)->mtx_recurse) == 0) \
+ atomic_clear_int(&(mp)->mtx_lock, \
+ MTX_RECURSE); \
+ } else { \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+ } \
+ } \
+} while (0)
+
+/* Release a spin lock (with possible recursion). */
+#define _exitlock_spin(mp, inten1, inten2) do { \
+ if ((mp)->mtx_recurse == 0) { \
+ atomic_cmpset_int(&(mp)->mtx_lock, (mp)->mtx_lock, \
+ MTX_UNOWNED); \
+ write_eflags((mp)->mtx_savefl); \
+ } else { \
+ (mp)->mtx_recurse--; \
+ } \
+} while (0)
+
+#endif /* I386_CPU */
+
+/*
+ * Externally visible mutex functions.
+ *------------------------------------------------------------------------------
+ */
+
+/*
+ * Return non-zero if a mutex is already owned by the current thread.
+ */
+#define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == CURTHD)
+
+/* Common strings */
+#ifdef MTX_STRS
+#ifdef KTR_EXTEND
+
+/*
+ * KTR_EXTEND saves file name and line for all entries, so we don't need them
+ * here. Theoretically we should also change the entries which refer to them
+ * (from CTR5 to CTR3), but since they're just passed to snprinf as the last
+ * parameters, it doesn't do any harm to leave them.
+ */
+char STR_mtx_enter_fmt[] = "GOT %s [%x] r=%d";
+char STR_mtx_exit_fmt[] = "REL %s [%x] r=%d";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] result=%d";
+#else
+char STR_mtx_enter_fmt[] = "GOT %s [%x] at %s:%d r=%d";
+char STR_mtx_exit_fmt[] = "REL %s [%x] at %s:%d r=%d";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%x] at %s:%d result=%d";
+#endif
+char STR_mtx_bad_type[] = "((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0";
+char STR_mtx_owned[] = "mtx_owned(_mpp)";
+char STR_mtx_recurse[] = "_mpp->mtx_recurse == 0";
+#else /* MTX_STRS */
+extern char STR_mtx_enter_fmt[];
+extern char STR_mtx_bad_type[];
+extern char STR_mtx_exit_fmt[];
+extern char STR_mtx_owned[];
+extern char STR_mtx_recurse[];
+extern char STR_mtx_try_enter_fmt[];
+#endif /* MTX_STRS */
+
+#ifndef KLD_MODULE
+/*
+ * Get lock 'm', the macro handles the easy (and most common cases) and leaves
+ * the slow stuff to the mtx_enter_hard() function.
+ *
+ * Note: since type is usually a constant much of this code is optimized out.
+ */
+_MTX_INLINE void
+mtx_enter(mtx_t *mtxp, int type)
+{
+ mtx_t *_mpp = mtxp;
+
+ /* bits only valid on mtx_exit() */
+ MPASS2(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0,
+ STR_mtx_bad_type);
+
+ do {
+ if ((type) & MTX_SPIN) {
+ /*
+ * Easy cases of spin locks:
+ *
+ * 1) We already own the lock and will simply
+ * recurse on it (if RLIKELY)
+ *
+ * 2) The lock is free, we just get it
+ */
+ if ((type) & MTX_RLIKELY) {
+ /*
+ * Check for recursion, if we already
+ * have this lock we just bump the
+ * recursion count.
+ */
+ if (_mpp->mtx_lock == CURTHD) {
+ _mpp->mtx_recurse++;
+ break; /* Done */
+ }
+ }
+
+ if (((type) & MTX_TOPHALF) == 0) {
+ /*
+ * If an interrupt thread uses this
+ * we must block interrupts here.
+ */
+ if ((type) & MTX_FIRST) {
+ ASS_IEN;
+ disable_intr();
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ } else {
+ _getlock_spin_block(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ } else
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ } else {
+ /* Sleep locks */
+ if ((type) & MTX_RLIKELY)
+ _getlock_sleep(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ else
+ _getlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ } while (0);
+ WITNESS_ENTER(_mpp, type);
+ CTR5(KTR_LOCK, STR_mtx_enter_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__,
+ (_mpp)->mtx_recurse);
+}
+
+/*
+ * Attempt to get MTX_DEF lock, return non-zero if lock acquired.
+ *
+ * XXX DOES NOT HANDLE RECURSION
+ */
+_MTX_INLINE int
+mtx_try_enter(mtx_t *mtxp, int type)
+{
+ mtx_t *const _mpp = mtxp;
+ int _rval;
+
+ _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD);
+#ifdef SMP_DEBUG
+ if (_rval && (_mpp)->mtx_witness != NULL) {
+ ASS((_mpp)->mtx_recurse == 0);
+ witness_try_enter(_mpp, type, __FILE__, __LINE__);
+ }
+#endif
+ CTR5(KTR_LOCK, STR_mtx_try_enter_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, _rval);
+
+ return _rval;
+}
+
+#define mtx_legal2block() (read_eflags() & 0x200)
+
+/*
+ * Release lock m.
+ */
+_MTX_INLINE void
+mtx_exit(mtx_t *mtxp, int type)
+{
+ mtx_t *const _mpp = mtxp;
+
+ MPASS2(mtx_owned(_mpp), STR_mtx_owned);
+ WITNESS_EXIT(_mpp, type);
+ CTR5(KTR_LOCK, STR_mtx_exit_fmt,
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__,
+ (_mpp)->mtx_recurse);
+ if ((type) & MTX_SPIN) {
+ if ((type) & MTX_NORECURSE) {
+ MPASS2(_mpp->mtx_recurse == 0, STR_mtx_recurse);
+ atomic_cmpset_int(&_mpp->mtx_lock, _mpp->mtx_lock,
+ MTX_UNOWNED);
+ if (((type) & MTX_TOPHALF) == 0) {
+ if ((type) & MTX_FIRST) {
+ ASS_IDIS;
+ enable_intr();
+ } else
+ write_eflags(_mpp->mtx_savefl);
+ }
+ } else {
+ if ((type) & MTX_TOPHALF)
+ _exitlock_spin(_mpp,,);
+ else {
+ if ((type) & MTX_FIRST) {
+ ASS_IDIS;
+ _exitlock_spin(_mpp,, "sti");
+ } else {
+ _exitlock_spin(_mpp,
+ "pushl %3", "popfl");
+ }
+ }
+ }
+ } else {
+ /* Handle sleep locks */
+ if ((type) & MTX_RLIKELY)
+ _exitlock(_mpp, CURTHD, (type) & MTX_HARDOPTS);
+ else {
+ _exitlock_norecurse(_mpp, CURTHD,
+ (type) & MTX_HARDOPTS);
+ }
+ }
+}
+
+#endif /* KLD_MODULE */
+#endif /* _KERNEL */
+
+#else /* !LOCORE */
+
+/*
+ * Simple assembly macros to get and release non-recursive spin locks
+ */
+
+#if defined(I386_CPU)
+
+#define MTX_EXIT(lck, reg) \
+ movl $ MTX_UNOWNED,lck+MTX_LOCK;
+
+#else /* I386_CPU */
+
+#define MTX_ENTER(reg, lck) \
+9: movl $ MTX_UNOWNED,%eax; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+ jnz 9b
+
+/* Must use locked bus op (cmpxchg) when setting to unowned (barrier) */
+#define MTX_EXIT(lck,reg) \
+ movl lck+MTX_LOCK,%eax; \
+ movl $ MTX_UNOWNED,reg; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+
+#define MTX_ENTER_WITH_RECURSION(reg, lck) \
+ movl lck+MTX_LOCK,%eax; \
+ cmpl PCPU_CURPROC,%eax; \
+ jne 9f; \
+ incw lck+MTX_RECURSECNT; \
+ jmp 8f; \
+9: movl $ MTX_UNOWNED,%eax; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+ jnz 9b; \
+8:
+
+#define MTX_EXIT_WITH_RECURSION(lck,reg) \
+ movw lck+MTX_RECURSECNT,%ax; \
+ decw %ax; \
+ js 9f; \
+ movw %ax,lck+MTX_RECURSECNT; \
+ jmp 8f; \
+9: movl lck+MTX_LOCK,%eax; \
+ movl $ MTX_UNOWNED,reg; \
+ MPLOCKED \
+ cmpxchgl reg,lck+MTX_LOCK; \
+8:
+
+#endif /* I386_CPU */
+#endif /* !LOCORE */
+#endif /* __MACHINE_MUTEX_H */
diff --git a/sys/i386/include/pcb.h b/sys/i386/include/pcb.h
index 08beb5a..1c7af85 100644
--- a/sys/i386/include/pcb.h
+++ b/sys/i386/include/pcb.h
@@ -72,11 +72,7 @@ struct pcb {
#define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */
#define PCB_DBREGS 0x02 /* process using debug registers */
caddr_t pcb_onfault; /* copyin/out fault recovery */
-#ifdef SMP
- u_long pcb_mpnest;
-#else
- u_long pcb_mpnest_dontuse;
-#endif
+ int pcb_schednest;
int pcb_gs;
struct pcb_ext *pcb_ext; /* optional pcb extension */
u_long __pcb_spare[3]; /* adjust to avoid core dump size changes */
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index 58bd9cf..440da60 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -26,6 +26,20 @@
* $FreeBSD$
*/
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/pmap.h>
+#include <machine/segments.h>
+#include <machine/tss.h>
+
+/* XXX */
+#ifdef KTR_PERCPU
+#include <sys/ktr.h>
+#endif
+
/*
* This structure maps out the global data that needs to be kept on a
* per-cpu basis. genassym uses this to generate offsets for the assembler
@@ -41,11 +55,14 @@
struct globaldata {
struct privatespace *gd_prvspace; /* self-reference */
struct proc *gd_curproc;
+ struct proc *gd_prevproc;
struct proc *gd_npxproc;
struct pcb *gd_curpcb;
+ struct proc *gd_idleproc;
struct timeval gd_switchtime;
struct i386tss gd_common_tss;
int gd_switchticks;
+ int gd_intr_nesting_level;
struct segment_descriptor gd_common_tssd;
struct segment_descriptor *gd_tss_gdt;
#ifdef USER_LDT
@@ -67,8 +84,22 @@ struct globaldata {
unsigned *gd_prv_PADDR1;
#endif
u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+ int gd_witness_spin_check;
+#ifdef KTR_PERCPU
+#ifdef KTR
+ volatile int gd_ktr_idx;
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[KTR_SIZE];
+#endif
+#endif
};
+extern struct globaldata globaldata;
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
#ifdef SMP
/*
* This is the upper (0xff800000) address space layout that is per-cpu.
@@ -93,3 +124,5 @@ struct privatespace {
extern struct privatespace SMP_prvspace[];
#endif
+
+#endif /* ! _MACHINE_GLOBALDATA_H_ */
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index 69b716b..20d4fa3 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -15,6 +15,9 @@
#ifdef _KERNEL
+#ifdef I386_CPU
+#error SMP not supported with I386_CPU
+#endif
#if defined(SMP) && !defined(APIC_IO)
# error APIC_IO required for SMP, add "options APIC_IO" to your config file.
#endif /* SMP && !APIC_IO */
@@ -57,23 +60,6 @@ extern int bootMP_size;
/* functions in mpboot.s */
void bootMP __P((void));
-/* global data in mplock.s */
-extern u_int mp_lock;
-extern u_int isr_lock;
-#ifdef RECURSIVE_MPINTRLOCK
-extern u_int mpintr_lock;
-#endif /* RECURSIVE_MPINTRLOCK */
-
-/* functions in mplock.s */
-void get_mplock __P((void));
-void rel_mplock __P((void));
-int try_mplock __P((void));
-#ifdef RECURSIVE_MPINTRLOCK
-void get_mpintrlock __P((void));
-void rel_mpintrlock __P((void));
-int try_mpintrlock __P((void));
-#endif /* RECURSIVE_MPINTRLOCK */
-
/* global data in apic_vector.s */
extern volatile u_int stopped_cpus;
extern volatile u_int started_cpus;
@@ -185,23 +171,7 @@ extern int smp_started;
extern volatile int smp_idle_loops;
#endif /* !LOCORE */
-#else /* !SMP && !APIC_IO */
-
-/*
- * Create dummy MP lock empties
- */
-
-static __inline void
-get_mplock(void)
-{
-}
-
-static __inline void
-rel_mplock(void)
-{
-}
-
-#endif
+#endif /* SMP && !APIC_IO */
#endif /* _KERNEL */
#endif /* _MACHINE_SMP_H_ */
diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h
index f9ac4a3..304e990 100644
--- a/sys/i386/include/smptests.h
+++ b/sys/i386/include/smptests.h
@@ -86,7 +86,6 @@
* These defines enable critical region locking of areas that were
* protected via cli/sti in the UP kernel.
*
- * MPINTRLOCK protects all the generic areas.
* COMLOCK protects the sio/cy drivers.
* CLOCKLOCK protects clock hardware and data
* known to be incomplete:
@@ -94,7 +93,6 @@
* ?
*/
#ifdef PUSHDOWN_LEVEL_1
-#define USE_MPINTRLOCK
#define USE_COMLOCK
#define USE_CLOCKLOCK
#endif
@@ -176,9 +174,8 @@
/*
* Send CPUSTOP IPI for stop/restart of other CPUs on DDB break.
- *
-#define VERBOSE_CPUSTOP_ON_DDBBREAK
*/
+#define VERBOSE_CPUSTOP_ON_DDBBREAK
#define CPUSTOP_ON_DDBBREAK
diff --git a/sys/i386/isa/apic_ipl.s b/sys/i386/isa/apic_ipl.s
index 94771f3..0def1de 100644
--- a/sys/i386/isa/apic_ipl.s
+++ b/sys/i386/isa/apic_ipl.s
@@ -69,78 +69,6 @@ _apic_imen:
SUPERALIGN_TEXT
/*
- * splz() - dispatch pending interrupts after cpl reduced
- *
- * Interrupt priority mechanism
- * -- soft splXX masks with group mechanism (cpl)
- * -- h/w masks for currently active or unused interrupts (imen)
- * -- ipending = active interrupts currently masked by cpl
- */
-
-ENTRY(splz)
- /*
- * The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. However, since ipending can change at any time
- * (by an interrupt or, with SMP, by another cpu), we have to
- * repeat the check. At the moment we must own the MP lock in
- * the SMP case because the interruput handlers require it. We
- * loop until no unmasked pending interrupts remain.
- *
- * No new unmaksed pending interrupts will be added during the
- * loop because, being unmasked, the interrupt code will be able
- * to execute the interrupts.
- *
- * Interrupts come in two flavors: Hardware interrupts and software
- * interrupts. We have to detect the type of interrupt (based on the
- * position of the interrupt bit) and call the appropriate dispatch
- * routine.
- *
- * NOTE: "bsfl %ecx,%ecx" is undefined when %ecx is 0 so we can't
- * rely on the secondary btrl tests.
- */
- movl _cpl,%eax
-splz_next:
- /*
- * We don't need any locking here. (ipending & ~cpl) cannot grow
- * while we're looking at it - any interrupt will shrink it to 0.
- */
- movl %eax,%ecx
- notl %ecx /* set bit = unmasked level */
- andl _ipending,%ecx /* set bit = unmasked pending INT */
- jne splz_unpend
- ret
-
- ALIGN_TEXT
-splz_unpend:
- bsfl %ecx,%ecx
- lock
- btrl %ecx,_ipending
- jnc splz_next
- cmpl $NHWI,%ecx
- jae splz_swi
- /*
- * We would prefer to call the intr handler directly here but that
- * doesn't work for badly behaved handlers that want the interrupt
- * frame. Also, there's a problem determining the unit number.
- * We should change the interface so that the unit number is not
- * determined at config time.
- *
- * The vec[] routines build the proper frame on the stack,
- * then call one of _Xintr0 thru _XintrNN.
- */
- jmp *_vec(,%ecx,4)
-
- ALIGN_TEXT
-splz_swi:
- pushl %eax
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
- call *_ihandlers(,%ecx,4)
- popl %eax
- movl %eax,_cpl
- jmp splz_next
-
-/*
* Fake clock interrupt(s) so that they appear to come from our caller instead
* of from here, so that system profiling works.
* XXX do this more generally (for all vectors; look up the C entry point).
@@ -161,8 +89,6 @@ __CONCAT(vec,irq_num): ; \
pushl $KCSEL ; \
pushl %eax ; \
cli ; \
- lock ; /* MP-safe */ \
- andl $~IRQ_BIT(irq_num), iactive ; /* lazy masking */ \
MEXITCOUNT ; \
APIC_ITRACE(apic_itrace_splz, irq_num, APIC_ITRACE_SPLZ) ; \
jmp __CONCAT(_Xintr,irq_num)
diff --git a/sys/i386/isa/apic_vector.s b/sys/i386/isa/apic_vector.s
index 2a7559d..54bf003 100644
--- a/sys/i386/isa/apic_vector.s
+++ b/sys/i386/isa/apic_vector.s
@@ -17,7 +17,7 @@
/*
- * Macros for interrupt interrupt entry, call to handler, and exit.
+ * Macros for interrupt entry, call to handler, and exit.
*/
#define FAST_INTR(irq_num, vec_name) \
@@ -121,7 +121,7 @@ IDTVEC(vec_name) ; \
/*
- * Test to see if the source is currntly masked, clear if so.
+ * Test to see if the source is currently masked, clear if so.
*/
#define UNMASK_IRQ(irq_num) \
IMASK_LOCK ; /* into critical reg */ \
@@ -200,7 +200,16 @@ log_intr_event:
#else
#define APIC_ITRACE(name, irq_num, id)
#endif
-
+
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -216,87 +225,24 @@ IDTVEC(vec_name) ; \
maybe_extra_ipending ; \
; \
APIC_ITRACE(apic_itrace_enter, irq_num, APIC_ITRACE_ENTER) ; \
- lock ; /* MP-safe */ \
- btsl $(irq_num), iactive ; /* lazy masking */ \
- jc 1f ; /* already active */ \
; \
MASK_LEVEL_IRQ(irq_num) ; \
EOI_IRQ(irq_num) ; \
0: ; \
- APIC_ITRACE(apic_itrace_tryisrlock, irq_num, APIC_ITRACE_TRYISRLOCK) ;\
- MP_TRYLOCK ; /* XXX this is going away... */ \
- testl %eax, %eax ; /* did we get it? */ \
- jz 3f ; /* no */ \
-; \
- APIC_ITRACE(apic_itrace_gotisrlock, irq_num, APIC_ITRACE_GOTISRLOCK) ;\
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 2f ; /* this INT masked */ \
-; \
incb _intr_nesting_level ; \
; \
/* entry point used by doreti_unpend for HWIs. */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX avoid dbl cnt */ \
- lock ; incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4, %eax ; \
- lock ; incl (%eax) ; \
-; \
- movl _cpl, %eax ; \
- pushl %eax ; \
- orl _intr_mask + (irq_num) * 4, %eax ; \
- movl %eax, _cpl ; \
- lock ; \
- andl $~IRQ_BIT(irq_num), _ipending ; \
-; \
- pushl _intr_unit + (irq_num) * 4 ; \
+ pushl $irq_num; /* pass the IRQ */ \
APIC_ITRACE(apic_itrace_enter2, irq_num, APIC_ITRACE_ENTER2) ; \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
APIC_ITRACE(apic_itrace_leave, irq_num, APIC_ITRACE_LEAVE) ; \
; \
- lock ; andl $~IRQ_BIT(irq_num), iactive ; \
- UNMASK_IRQ(irq_num) ; \
- APIC_ITRACE(apic_itrace_unmask, irq_num, APIC_ITRACE_UNMASK) ; \
- sti ; /* doreti repeats cli/sti */ \
MEXITCOUNT ; \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-1: ; /* active */ \
- APIC_ITRACE(apic_itrace_active, irq_num, APIC_ITRACE_ACTIVE) ; \
- MASK_IRQ(irq_num) ; \
- EOI_IRQ(irq_num) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- lock ; \
- btsl $(irq_num), iactive ; /* still active */ \
- jnc 0b ; /* retry */ \
- POP_FRAME ; \
- iret ; /* XXX: iactive bit might be 0 now */ \
- ALIGN_TEXT ; \
-2: ; /* masked by cpl, leave iactive set */ \
- APIC_ITRACE(apic_itrace_masked, irq_num, APIC_ITRACE_MASKED) ; \
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- MP_RELLOCK ; \
- POP_FRAME ; \
- iret ; \
- ALIGN_TEXT ; \
-3: ; /* other cpu has isr lock */ \
- APIC_ITRACE(apic_itrace_noisrlock, irq_num, APIC_ITRACE_NOISRLOCK) ;\
- lock ; \
- orl $IRQ_BIT(irq_num), _ipending ; \
- testl $IRQ_BIT(irq_num), _cpl ; \
- jne 4f ; /* this INT masked */ \
- call forward_irq ; /* forward irq to lock holder */ \
- POP_FRAME ; /* and return */ \
- iret ; \
- ALIGN_TEXT ; \
-4: ; /* blocked */ \
- APIC_ITRACE(apic_itrace_masked2, irq_num, APIC_ITRACE_MASKED2) ;\
- POP_FRAME ; /* and return */ \
- iret
+ jmp doreti_next
/*
* Handle "spurious INTerrupts".
@@ -434,20 +380,10 @@ _Xcpuast:
FAKE_MCOUNT(13*4(%esp))
- /*
- * Giant locks do not come cheap.
- * A lot of cycles are going to be wasted here.
- */
- call _get_mplock
-
- movl _cpl, %eax
- pushl %eax
orl $AST_PENDING, _astpending /* XXX */
incb _intr_nesting_level
sti
- pushl $0
-
movl _cpuid, %eax
lock
btrl %eax, _checkstate_pending_ast
@@ -461,7 +397,7 @@ _Xcpuast:
lock
incl CNAME(cpuast_cnt)
MEXITCOUNT
- jmp _doreti
+ jmp doreti_next
1:
/* We are already in the process of delivering an ast for this CPU */
POP_FRAME
@@ -487,40 +423,24 @@ _Xforward_irq:
FAKE_MCOUNT(13*4(%esp))
- MP_TRYLOCK
- testl %eax,%eax /* Did we get the lock ? */
- jz 1f /* No */
-
lock
incl CNAME(forward_irq_hitcnt)
cmpb $4, _intr_nesting_level
- jae 2f
+ jae 1f
- movl _cpl, %eax
- pushl %eax
incb _intr_nesting_level
sti
- pushl $0
-
MEXITCOUNT
- jmp _doreti /* Handle forwarded interrupt */
+ jmp doreti_next /* Handle forwarded interrupt */
1:
lock
- incl CNAME(forward_irq_misscnt)
- call forward_irq /* Oops, we've lost the isr lock */
- MEXITCOUNT
- POP_FRAME
- iret
-2:
- lock
incl CNAME(forward_irq_toodeepcnt)
-3:
- MP_RELLOCK
MEXITCOUNT
POP_FRAME
iret
+#if 0
/*
*
*/
@@ -532,9 +452,11 @@ forward_irq:
cmpl $0, CNAME(forward_irq_enabled)
jz 4f
+/* XXX - this is broken now, because mp_lock doesn't exist
movl _mp_lock,%eax
cmpl $FREE_LOCK,%eax
jne 1f
+ */
movl $0, %eax /* Pick CPU #0 if noone has lock */
1:
shrl $24,%eax
@@ -559,6 +481,7 @@ forward_irq:
jnz 3b
4:
ret
+#endif
/*
* Executed by a CPU when it receives an Xcpustop IPI from another CPU,
@@ -654,6 +577,7 @@ MCOUNT_LABEL(bintr)
FAST_INTR(22,fastintr22)
FAST_INTR(23,fastintr23)
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, CLKINTR_PENDING)
INTR(1,intr1,)
INTR(2,intr2,)
@@ -728,15 +652,11 @@ _ihandlers:
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
+#if 0
/* active flag for lazy masking */
iactive:
.long 0
+#endif
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
diff --git a/sys/i386/isa/atpic_vector.s b/sys/i386/isa/atpic_vector.s
index e427351..d2b88bf 100644
--- a/sys/i386/isa/atpic_vector.s
+++ b/sys/i386/isa/atpic_vector.s
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/i386/isa/bs/bsif.h b/sys/i386/isa/bs/bsif.h
index 5a89681..6dcc2ab 100644
--- a/sys/i386/isa/bs/bsif.h
+++ b/sys/i386/isa/bs/bsif.h
@@ -208,17 +208,10 @@ static BS_INLINE void memcopy __P((void *from, void *to, register size_t len));
u_int32_t bs_adapter_info __P((int));
#define delay(y) DELAY(y)
extern int dma_init_flag;
-#ifdef SMP
-#error XXX see comments in i386/isa/bs/bsif.h for details
-/*
- * ipending is 'opaque' in SMP, and can't be accessed this way.
- * Since its my belief that this is PC98 code, and that PC98 and SMP
- * are mutually exclusive, the above compile-time error is the "fix".
- * Please inform smp@freebsd.org if this is NOT the case.
- */
-#else
+
#define softintr(y) ipending |= (1 << y)
-#endif /* SMP */
+
+#endif /* IPENDING */
static BS_INLINE void
memcopy(from, to, len)
diff --git a/sys/i386/isa/clock.c b/sys/i386/isa/clock.c
index 15044ab..724f3c2 100644
--- a/sys/i386/isa/clock.c
+++ b/sys/i386/isa/clock.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/i386/isa/cy.c b/sys/i386/isa/cy.c
index 52a8cf3..5487d8f 100644
--- a/sys/i386/isa/cy.c
+++ b/sys/i386/isa/cy.c
@@ -94,11 +94,6 @@
#error "The cy device requires the old isa compatibility shims"
#endif
-#ifdef SMP
-#define disable_intr() COM_DISABLE_INTR()
-#define enable_intr() COM_ENABLE_INTR()
-#endif /* SMP */
-
/*
* Dictionary so that I can name everything *sio* or *com* to compare with
* sio.c. There is also lots of ugly formatting and unnecessary ifdefs to
@@ -366,7 +361,7 @@ static struct com_s *p_com_addr[NSIO];
#define com_addr(unit) (p_com_addr[unit])
struct isa_driver siodriver = {
- INTR_TYPE_TTY | INTR_TYPE_FAST,
+ INTR_TYPE_TTY | INTR_FAST,
sioprobe,
sioattach,
driver_name
@@ -604,11 +599,9 @@ cyattach_common(cy_iobase, cy_align)
com->lt_out.c_cflag = com->lt_in.c_cflag = CLOCAL;
}
if (siosetwater(com, com->it_in.c_ispeed) != 0) {
- enable_intr();
free(com, M_DEVBUF);
return (0);
}
- enable_intr();
termioschars(&com->it_in);
com->it_in.c_ispeed = com->it_in.c_ospeed = comdefaultrate;
com->it_out = com->it_in;
@@ -662,6 +655,7 @@ sioopen(dev, flag, mode, p)
int s;
struct tty *tp;
int unit;
+ int intrsave;
mynor = minor(dev);
unit = MINOR_TO_UNIT(mynor);
@@ -768,14 +762,17 @@ open_top:
}
}
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
(void) inb(com->line_status_port);
(void) inb(com->data_port);
com->prev_modem_status = com->last_modem_status
= inb(com->modem_status_port);
outb(iobase + com_ier, IER_ERXRDY | IER_ETXRDY | IER_ERLS
| IER_EMSC);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#else /* !0 */
/*
* Flush fifos. This requires a full channel reset which
@@ -786,13 +783,16 @@ open_top:
CD1400_CCR_CMDRESET | CD1400_CCR_CHANRESET);
cd1400_channel_cmd(com, com->channel_control);
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->prev_modem_status = com->last_modem_status
= cd_getreg(com, CD1400_MSVR2);
cd_setreg(com, CD1400_SRER,
com->intr_enable
= CD1400_SRER_MDMCH | CD1400_SRER_RXDATA);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif /* 0 */
/*
* Handle initial DCD. Callout devices get a fake initial
@@ -875,6 +875,7 @@ comhardclose(com)
int s;
struct tty *tp;
int unit;
+ int intrsave;
unit = com->unit;
iobase = com->iobase;
@@ -888,10 +889,13 @@ comhardclose(com)
outb(iobase + com_cfcr, com->cfcr_image &= ~CFCR_SBREAK);
#else
/* XXX */
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->etc = ETC_NONE;
cd_setreg(com, CD1400_COR2, com->cor[1] &= ~CD1400_COR2_ETC);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
cd1400_channel_cmd(com, CD1400_CCR_CMDRESET | CD1400_CCR_FTF);
#endif
@@ -899,9 +903,12 @@ comhardclose(com)
#if 0
outb(iobase + com_ier, 0);
#else
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
cd_setreg(com, CD1400_SRER, com->intr_enable = 0);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif
tp = com->tp;
if ((tp->t_cflag & HUPCL)
@@ -991,6 +998,11 @@ siodtrwakeup(chan)
wakeup(&com->dtr_wait);
}
+/*
+ * This function:
+ * a) needs to be called with COM_LOCK() held, and
+ * b) needs to return with COM_LOCK() held.
+ */
static void
sioinput(com)
struct com_s *com;
@@ -1000,6 +1012,7 @@ sioinput(com)
u_char line_status;
int recv_data;
struct tty *tp;
+ int intrsave;
buf = com->ibuf;
tp = com->tp;
@@ -1016,7 +1029,15 @@ sioinput(com)
* slinput is reasonably fast (usually 40 instructions plus
* call overhead).
*/
+
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
incc = com->iptr - buf;
if (tp->t_rawq.c_cc + incc > tp->t_ihiwat
@@ -1038,10 +1059,18 @@ sioinput(com)
tp->t_lflag &= ~FLUSHO;
comstart(tp);
}
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
} else {
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
line_status = buf[com->ierroff];
recv_data = *buf++;
@@ -1057,7 +1086,8 @@ sioinput(com)
recv_data |= TTY_PE;
}
(*linesw[tp->t_line].l_rint)(recv_data, tp);
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
}
com_events -= (com->iptr - com->ibuf);
@@ -1729,6 +1759,7 @@ static void
siopoll()
{
int unit;
+ int intrsave;
#ifdef CyDebug
++cy_timeouts;
@@ -1751,7 +1782,9 @@ repeat:
* (actually never opened devices) so that we don't
* loop.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
incc = com->iptr - com->ibuf;
com->iptr = com->ibuf;
if (com->state & CS_CHECKMSR) {
@@ -1759,7 +1792,8 @@ repeat:
com->state &= ~CS_CHECKMSR;
}
com_events -= incc;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (incc != 0)
log(LOG_DEBUG,
"sio%d: %d events for device with no tp\n",
@@ -1767,29 +1801,39 @@ repeat:
continue;
}
if (com->iptr != com->ibuf) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
sioinput(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (com->state & CS_CHECKMSR) {
u_char delta_modem_status;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
+ sioinput(com);
delta_modem_status = com->last_modem_status
^ com->prev_modem_status;
com->prev_modem_status = com->last_modem_status;
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_CHECKMSR;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta_modem_status & MSR_DCD)
(*linesw[tp->t_line].l_modem)
(tp, com->prev_modem_status & MSR_DCD);
}
if (com->extra_state & CSE_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->extra_state &= ~CSE_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (!(com->state & CS_BUSY)) {
tp->t_state &= ~TS_BUSY;
ttwwakeup(com->tp);
@@ -1801,10 +1845,13 @@ repeat:
}
}
if (com->state & CS_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
(*linesw[tp->t_line].l_start)(tp);
}
if (com_events == 0)
@@ -1833,6 +1880,7 @@ comparam(tp, t)
u_char opt;
int s;
int unit;
+ int intrsave;
/* do historical conversions */
if (t->c_ispeed == 0)
@@ -1857,14 +1905,9 @@ comparam(tp, t)
else
(void)commctl(com, TIOCM_DTR, DMBIS);
- /*
- * This returns with interrupts disabled so that we can complete
- * the speed change atomically.
- */
(void) siosetwater(com, t->c_ispeed);
/* XXX we don't actually change the speed atomically. */
- enable_intr();
if (idivisor != 0) {
cd_setreg(com, CD1400_RBPR, idivisor);
@@ -1985,12 +2028,15 @@ comparam(tp, t)
if (cflag & CCTS_OFLOW)
opt |= CD1400_COR2_CCTS_OFLOW;
#endif
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (opt != com->cor[1]) {
cor_change |= CD1400_CCR_COR2;
cd_setreg(com, CD1400_COR2, com->cor[1] = opt);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
/*
* set channel option register 3 -
@@ -2111,7 +2157,9 @@ comparam(tp, t)
* XXX should have done this long ago, but there is too much state
* to change all atomically.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com->state &= ~CS_TTGO;
if (!(tp->t_state & TS_TTSTOP))
@@ -2177,7 +2225,8 @@ comparam(tp, t)
| CD1400_SRER_TXMPTY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
splx(s);
comstart(tp);
if (com->ibufold != NULL) {
@@ -2196,6 +2245,7 @@ siosetwater(com, speed)
u_char *ibuf;
int ibufsize;
struct tty *tp;
+ int intrsave;
/*
* Make the buffer size large enough to handle a softtty interrupt
@@ -2207,7 +2257,6 @@ siosetwater(com, speed)
for (ibufsize = 128; ibufsize < cp4ticks;)
ibufsize <<= 1;
if (ibufsize == com->ibufsize) {
- disable_intr();
return (0);
}
@@ -2217,7 +2266,6 @@ siosetwater(com, speed)
*/
ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT);
if (ibuf == NULL) {
- disable_intr();
return (ENOMEM);
}
@@ -2235,7 +2283,9 @@ siosetwater(com, speed)
* Read current input buffer, if any. Continue with interrupts
* disabled.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->iptr != com->ibuf)
sioinput(com);
@@ -2254,6 +2304,9 @@ siosetwater(com, speed)
com->ibufend = ibuf + ibufsize;
com->ierroff = ibufsize;
com->ihighwater = ibuf + 3 * ibufsize / 4;
+
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2267,6 +2320,7 @@ comstart(tp)
bool_t started;
#endif
int unit;
+ int intrsave;
unit = DEV_TO_UNIT(tp->t_dev);
com = com_addr(unit);
@@ -2277,7 +2331,9 @@ comstart(tp)
started = FALSE;
#endif
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (tp->t_state & TS_TTSTOP) {
com->state &= ~CS_TTGO;
if (com->intr_enable & CD1400_SRER_TXRDY)
@@ -2313,7 +2369,8 @@ comstart(tp)
com->mcr_image |= com->mcr_rts);
#endif
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
ttwwakeup(tp);
splx(s);
@@ -2332,7 +2389,9 @@ comstart(tp)
sizeof com->obuf1);
com->obufs[0].l_next = NULL;
com->obufs[0].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2351,7 +2410,8 @@ comstart(tp)
& ~CD1400_SRER_TXMPTY)
| CD1400_SRER_TXRDY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) {
#ifdef CyDebug
@@ -2362,7 +2422,9 @@ comstart(tp)
sizeof com->obuf2);
com->obufs[1].l_next = NULL;
com->obufs[1].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2381,7 +2443,8 @@ comstart(tp)
& ~CD1400_SRER_TXMPTY)
| CD1400_SRER_TXRDY);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
tp->t_state |= TS_BUSY;
}
@@ -2390,10 +2453,13 @@ comstart(tp)
++com->start_real;
#endif
#if 0
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state >= (CS_BUSY | CS_TTGO))
siointr1(com); /* fake interrupt to start output */
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
#endif
ttwwakeup(tp);
splx(s);
@@ -2406,10 +2472,13 @@ comstop(tp, rw)
{
struct com_s *com;
bool_t wakeup_etc;
+ int intrsave;
com = com_addr(DEV_TO_UNIT(tp->t_dev));
wakeup_etc = FALSE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (rw & FWRITE) {
com->obufs[0].l_queued = FALSE;
com->obufs[1].l_queued = FALSE;
@@ -2432,7 +2501,8 @@ comstop(tp, rw)
com_events -= (com->iptr - com->ibuf);
com->iptr = com->ibuf;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (wakeup_etc)
wakeup(&com->etc);
if (rw & FWRITE && com->etc == ETC_NONE)
@@ -2448,6 +2518,7 @@ commctl(com, bits, how)
{
int mcr;
int msr;
+ int intrsave;
if (how == DMGET) {
if (com->channel_control & CD1400_CCR_RCVEN)
@@ -2485,7 +2556,9 @@ commctl(com, bits, how)
mcr |= com->mcr_dtr;
if (bits & TIOCM_RTS)
mcr |= com->mcr_rts;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
switch (how) {
case DMSET:
com->mcr_image = mcr;
@@ -2503,7 +2576,8 @@ commctl(com, bits, how)
cd_setreg(com, CD1400_MSVR2, mcr);
break;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2565,9 +2639,14 @@ comwakeup(chan)
com = com_addr(unit);
if (com != NULL
&& (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) {
+ int intrsave;
+
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
siointr1(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
}
#endif
@@ -2587,11 +2666,15 @@ comwakeup(chan)
for (errnum = 0; errnum < CE_NTYPES; ++errnum) {
u_int delta;
u_long total;
+ int intrsave;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
delta = com->delta_error_counts[errnum];
com->delta_error_counts[errnum] = 0;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta == 0)
continue;
total = com->error_counts[errnum] += delta;
@@ -2743,6 +2826,8 @@ cd_etc(com, etc)
struct com_s *com;
int etc;
{
+ int intrsave;
+
/*
* We can't change the hardware's ETC state while there are any
* characters in the tx fifo, since those characters would be
@@ -2754,26 +2839,28 @@ cd_etc(com, etc)
* for the tx to become empty so that the command is sure to be
* executed soon after we issue it.
*/
+ intrsave = save_intr();
disable_intr();
- if (com->etc == etc) {
- enable_intr();
+ COM_LOCK();
+ if (com->etc == etc)
goto wait;
- }
if ((etc == CD1400_ETC_SENDBREAK
&& (com->etc == ETC_BREAK_STARTING
|| com->etc == ETC_BREAK_STARTED))
|| (etc == CD1400_ETC_STOPBREAK
&& (com->etc == ETC_BREAK_ENDING || com->etc == ETC_BREAK_ENDED
|| com->etc == ETC_NONE))) {
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return;
}
com->etc = etc;
cd_setreg(com, CD1400_SRER,
com->intr_enable
= (com->intr_enable & ~CD1400_SRER_TXRDY) | CD1400_SRER_TXMPTY);
- enable_intr();
wait:
+ COM_UNLOCK();
+ restore_intr(intrsave);
while (com->etc == etc
&& tsleep(&com->etc, TTIPRI | PCATCH, "cyetc", 0) == 0)
continue;
@@ -2787,7 +2874,7 @@ cd_getreg(com, reg)
struct com_s *basecom;
u_char car;
int cy_align;
- u_long ef;
+ int intrsave;
cy_addr iobase;
int val;
@@ -2795,14 +2882,16 @@ cd_getreg(com, reg)
car = com->unit & CD1400_CAR_CHAN;
cy_align = com->cy_align;
iobase = com->iobase;
- ef = read_eflags();
- if (ef & PSL_I)
- disable_intr();
+ intrsave = save_intr();
+ disable_intr();
+ if (intrsave & PSL_I)
+ COM_LOCK();
if (basecom->car != car)
cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car);
val = cd_inb(iobase, reg, cy_align);
- if (ef & PSL_I)
- enable_intr();
+ if (intrsave & PSL_I)
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (val);
}
@@ -2815,21 +2904,23 @@ cd_setreg(com, reg, val)
struct com_s *basecom;
u_char car;
int cy_align;
- u_long ef;
+ int intrsave;
cy_addr iobase;
basecom = com_addr(com->unit & ~(CD1400_NO_OF_CHANNELS - 1));
car = com->unit & CD1400_CAR_CHAN;
cy_align = com->cy_align;
iobase = com->iobase;
- ef = read_eflags();
- if (ef & PSL_I)
- disable_intr();
+ intrsave = save_intr();
+ disable_intr();
+ if (intrsave & PSL_I)
+ COM_LOCK();
if (basecom->car != car)
cd_outb(iobase, CD1400_CAR, cy_align, basecom->car = car);
cd_outb(iobase, reg, cy_align, val);
- if (ef & PSL_I)
- enable_intr();
+ if (intrsave & PSL_I)
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
#ifdef CyDebug
diff --git a/sys/i386/isa/icu_ipl.s b/sys/i386/isa/icu_ipl.s
index 3475358..d178d5c 100644
--- a/sys/i386/isa/icu_ipl.s
+++ b/sys/i386/isa/icu_ipl.s
@@ -55,63 +55,6 @@ _imen: .long HWI_MASK
SUPERALIGN_TEXT
/*
- * Interrupt priority mechanism
- * -- soft splXX masks with group mechanism (cpl)
- * -- h/w masks for currently active or unused interrupts (imen)
- * -- ipending = active interrupts currently masked by cpl
- */
-
-ENTRY(splz)
- /*
- * The caller has restored cpl and checked that (ipending & ~cpl)
- * is nonzero. We have to repeat the check since if there is an
- * interrupt while we're looking, _doreti processing for the
- * interrupt will handle all the unmasked pending interrupts
- * because we restored early. We're repeating the calculation
- * of (ipending & ~cpl) anyway so that the caller doesn't have
- * to pass it, so this only costs one "jne". "bsfl %ecx,%ecx"
- * is undefined when %ecx is 0 so we can't rely on the secondary
- * btrl tests.
- */
- movl _cpl,%eax
-splz_next:
- /*
- * We don't need any locking here. (ipending & ~cpl) cannot grow
- * while we're looking at it - any interrupt will shrink it to 0.
- */
- movl %eax,%ecx
- notl %ecx
- andl _ipending,%ecx
- jne splz_unpend
- ret
-
- ALIGN_TEXT
-splz_unpend:
- bsfl %ecx,%ecx
- btrl %ecx,_ipending
- jnc splz_next
- cmpl $NHWI,%ecx
- jae splz_swi
- /*
- * We would prefer to call the intr handler directly here but that
- * doesn't work for badly behaved handlers that want the interrupt
- * frame. Also, there's a problem determining the unit number.
- * We should change the interface so that the unit number is not
- * determined at config time.
- */
- jmp *vec(,%ecx,4)
-
- ALIGN_TEXT
-splz_swi:
- pushl %eax
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
- call *_ihandlers(,%ecx,4)
- popl %eax
- movl %eax,_cpl
- jmp splz_next
-
-/*
* Fake clock interrupt(s) so that they appear to come from our caller instead
* of from here, so that system profiling works.
* XXX do this more generally (for all vectors; look up the C entry point).
diff --git a/sys/i386/isa/icu_vector.s b/sys/i386/isa/icu_vector.s
index e427351..d2b88bf 100644
--- a/sys/i386/isa/icu_vector.s
+++ b/sys/i386/isa/icu_vector.s
@@ -53,9 +53,11 @@ IDTVEC(vec_name) ; \
pushl %ecx ; \
pushl %edx ; \
pushl %ds ; \
+ pushl %fs ; \
MAYBE_PUSHL_ES ; \
mov $KDSEL,%ax ; \
mov %ax,%ds ; \
+ mov %ax,%fs ; \
MAYBE_MOVW_AX_ES ; \
FAKE_MCOUNT((4+ACTUALLY_PUSHED)*4(%esp)) ; \
pushl _intr_unit + (irq_num) * 4 ; \
@@ -65,18 +67,21 @@ IDTVEC(vec_name) ; \
incl _cnt+V_INTR ; /* book-keeping can wait */ \
movl _intr_countp + (irq_num) * 4,%eax ; \
incl (%eax) ; \
- movl _cpl,%eax ; /* are we unmasking pending HWIs or SWIs? */ \
+/* movl _cpl,%eax ; // are we unmasking pending SWIs? / \
notl %eax ; \
- andl _ipending,%eax ; \
- jne 2f ; /* yes, maybe handle them */ \
+ andl _spending,$SWI_MASK ; \
+ jne 2f ; // yes, maybe handle them */ \
1: ; \
MEXITCOUNT ; \
MAYBE_POPL_ES ; \
+ popl %fs ; \
popl %ds ; \
popl %edx ; \
popl %ecx ; \
popl %eax ; \
iret ; \
+
+#if 0
; \
ALIGN_TEXT ; \
2: ; \
@@ -88,6 +93,7 @@ IDTVEC(vec_name) ; \
incb _intr_nesting_level ; /* ... really limit it ... */ \
sti ; /* ... to do this as early as possible */ \
MAYBE_POPL_ES ; /* discard most of thin frame ... */ \
+ popl %fs ; \
popl %ecx ; /* ... original %ds ... */ \
popl %edx ; \
xchgl %eax,4(%esp) ; /* orig %eax; save cpl */ \
@@ -101,11 +107,20 @@ IDTVEC(vec_name) ; \
movl (3+8+0)*4(%esp),%ecx ; /* ... %ecx from thin frame ... */ \
movl %ecx,(3+6)*4(%esp) ; /* ... to fat frame ... */ \
movl (3+8+1)*4(%esp),%eax ; /* ... cpl from thin frame */ \
- pushl %eax ; \
subl $4,%esp ; /* junk for unit number */ \
MEXITCOUNT ; \
jmp _doreti
+#endif
+/*
+ * Slow, threaded interrupts.
+ *
+ * XXX Most of the parameters here are obsolete. Fix this when we're
+ * done.
+ * XXX we really shouldn't return via doreti if we just schedule the
+ * interrupt handler and don't run anything. We could just do an
+ * iret. FIXME.
+ */
#define INTR(irq_num, vec_name, icu, enable_icus, reg, maybe_extra_ipending) \
.text ; \
SUPERALIGN_TEXT ; \
@@ -116,8 +131,8 @@ IDTVEC(vec_name) ; \
pushl %ds ; /* save our data and extra segments ... */ \
pushl %es ; \
pushl %fs ; \
- mov $KDSEL,%ax ; /* ... and reload with kernel's own ... */ \
- mov %ax,%ds ; /* ... early for obsolete reasons */ \
+ mov $KDSEL,%ax ; /* load kernel ds, es and fs */ \
+ mov %ax,%ds ; \
mov %ax,%es ; \
mov %ax,%fs ; \
maybe_extra_ipending ; \
@@ -126,43 +141,37 @@ IDTVEC(vec_name) ; \
movb %al,_imen + IRQ_BYTE(irq_num) ; \
outb %al,$icu+ICU_IMR_OFFSET ; \
enable_icus ; \
- movl _cpl,%eax ; \
- testb $IRQ_BIT(irq_num),%reg ; \
- jne 2f ; \
- incb _intr_nesting_level ; \
+ incb _intr_nesting_level ; /* XXX do we need this? */ \
__CONCAT(Xresume,irq_num): ; \
FAKE_MCOUNT(13*4(%esp)) ; /* XXX late to avoid double count */ \
- incl _cnt+V_INTR ; /* tally interrupts */ \
- movl _intr_countp + (irq_num) * 4,%eax ; \
- incl (%eax) ; \
- movl _cpl,%eax ; \
- pushl %eax ; \
- pushl _intr_unit + (irq_num) * 4 ; \
- orl _intr_mask + (irq_num) * 4,%eax ; \
- movl %eax,_cpl ; \
+ pushl $irq_num; /* pass the IRQ */ \
sti ; \
- call *_intr_handler + (irq_num) * 4 ; \
- cli ; /* must unmask _imen and icu atomically */ \
- movb _imen + IRQ_BYTE(irq_num),%al ; \
- andb $~IRQ_BIT(irq_num),%al ; \
- movb %al,_imen + IRQ_BYTE(irq_num) ; \
- outb %al,$icu+ICU_IMR_OFFSET ; \
- sti ; /* XXX _doreti repeats the cli/sti */ \
+ call _sched_ithd ; \
+ addl $4, %esp ; /* discard the parameter */ \
MEXITCOUNT ; \
/* We could usually avoid the following jmp by inlining some of */ \
/* _doreti, but it's probably better to use less cache. */ \
- jmp _doreti ; \
-; \
- ALIGN_TEXT ; \
-2: ; \
- /* XXX skip mcounting here to avoid double count */ \
- orb $IRQ_BIT(irq_num),_ipending + IRQ_BYTE(irq_num) ; \
- popl %fs ; \
- popl %es ; \
- popl %ds ; \
- popal ; \
- addl $4+4,%esp ; \
- iret
+ jmp doreti_next /* and catch up inside doreti */
+
+/*
+ * Reenable the interrupt mask after completing an interrupt. Called
+ * from ithd_loop. There are two separate functions, one for each
+ * ICU.
+ */
+ .globl setimask0, setimask1
+setimask0:
+ cli
+ movb _imen,%al
+ outb %al,$IO_ICU1 + ICU_IMR_OFFSET
+ sti
+ ret
+
+setimask1:
+ cli
+ movb _imen + 1,%al
+ outb %al,$IO_ICU2 + ICU_IMR_OFFSET
+ sti
+ ret
MCOUNT_LABEL(bintr)
FAST_INTR(0,fastintr0, ENABLE_ICU1)
@@ -181,7 +190,9 @@ MCOUNT_LABEL(bintr)
FAST_INTR(13,fastintr13, ENABLE_ICU1_AND_2)
FAST_INTR(14,fastintr14, ENABLE_ICU1_AND_2)
FAST_INTR(15,fastintr15, ENABLE_ICU1_AND_2)
+
#define CLKINTR_PENDING movl $1,CNAME(clkintr_pending)
+/* Threaded interrupts */
INTR(0,intr0, IO_ICU1, ENABLE_ICU1, al, CLKINTR_PENDING)
INTR(1,intr1, IO_ICU1, ENABLE_ICU1, al,)
INTR(2,intr2, IO_ICU1, ENABLE_ICU1, al,)
@@ -198,6 +209,7 @@ MCOUNT_LABEL(bintr)
INTR(13,intr13, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(14,intr14, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
INTR(15,intr15, IO_ICU2, ENABLE_ICU1_AND_2, ah,)
+
MCOUNT_LABEL(eintr)
.data
@@ -211,10 +223,4 @@ _ihandlers: /* addresses of interrupt handlers */
.long _swi_null, swi_net, _swi_null, _swi_null
.long _swi_vm, _swi_null, _softclock
-imasks: /* masks for interrupt handlers */
- .space NHWI*4 /* padding; HWI masks are elsewhere */
-
- .long SWI_TTY_MASK, SWI_NET_MASK, SWI_CAMNET_MASK, SWI_CAMBIO_MASK
- .long SWI_VM_MASK, SWI_TQ_MASK, SWI_CLOCK_MASK
-
.text
diff --git a/sys/i386/isa/intr_machdep.c b/sys/i386/isa/intr_machdep.c
index 34a8c22..870760e 100644
--- a/sys/i386/isa/intr_machdep.c
+++ b/sys/i386/isa/intr_machdep.c
@@ -36,12 +36,6 @@
* from: @(#)isa.c 7.2 (Berkeley) 5/13/91
* $FreeBSD$
*/
-/*
- * This file contains an aggregated module marked:
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- * See the notice for details.
- */
#include "opt_auto_eoi.h"
@@ -51,11 +45,14 @@
#ifndef SMP
#include <machine/lock.h>
#endif
+#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/unistd.h>
#include <sys/errno.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
@@ -91,30 +88,14 @@
#include <i386/isa/mca_machdep.h>
#endif
-/* XXX should be in suitable include files */
-#ifdef PC98
-#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
-#define ICU_SLAVEID 7
-#else
-#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
-#define ICU_SLAVEID 2
-#endif
-
-#ifdef APIC_IO
/*
- * This is to accommodate "mixed-mode" programming for
- * motherboards that don't connect the 8254 to the IO APIC.
+ * Per-interrupt data. We consider the soft interrupt to be a special
+ * case, so these arrays have NHWI + NSWI entries, not ICU_LEN.
*/
-#define AUTO_EOI_1 1
-#endif
-
-#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
-
-u_long *intr_countp[ICU_LEN];
-inthand2_t *intr_handler[ICU_LEN];
-u_int intr_mask[ICU_LEN];
-static u_int* intr_mptr[ICU_LEN];
-void *intr_unit[ICU_LEN];
+u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */
+inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */
+ithd *ithds[NHWI + NSWI]; /* real interrupt handler */
+void *intr_unit[NHWI + NSWI];
static inthand_t *fastintr[ICU_LEN] = {
&IDTVEC(fastintr0), &IDTVEC(fastintr1),
@@ -292,8 +273,9 @@ isa_nmi(cd)
}
/*
- * Fill in default interrupt table (in case of spuruious interrupt
- * during configuration of kernel, setup interrupt control unit
+ * Create a default interrupt table to avoid problems caused by
+ * spurious interrupts during configuration of kernel, then setup
+ * interrupt control unit.
*/
void
isa_defaultirq()
@@ -364,16 +346,6 @@ isa_strayintr(vcookiep)
{
int intr = (void **)vcookiep - &intr_unit[0];
- /* DON'T BOTHER FOR NOW! */
- /* for some reason, we get bursts of intr #7, even if not enabled! */
- /*
- * Well the reason you got bursts of intr #7 is because someone
- * raised an interrupt line and dropped it before the 8259 could
- * prioritize it. This is documented in the intel data book. This
- * means you have BAD hardware! I have changed this so that only
- * the first 5 get logged, then it quits logging them, and puts
- * out a special message. rgrimes 3/25/1993
- */
/*
* XXX TODO print a different message for #7 if it is for a
* glitch. Glitches can be distinguished from real #7's by
@@ -405,36 +377,10 @@ isa_irq_pending()
}
#endif
-int
-update_intr_masks(void)
-{
- int intr, n=0;
- u_int mask,*maskptr;
-
- for (intr=0; intr < ICU_LEN; intr ++) {
-#if defined(APIC_IO)
- /* no 8259 SLAVE to ignore */
-#else
- if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */
-#endif /* APIC_IO */
- maskptr = intr_mptr[intr];
- if (!maskptr)
- continue;
- *maskptr |= SWI_LOW_MASK | (1 << intr);
- mask = *maskptr;
- if (mask != intr_mask[intr]) {
-#if 0
- printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n",
- intr, intr_mask[intr], mask, maskptr);
-#endif
- intr_mask[intr]=mask;
- n++;
- }
-
- }
- return (n);
-}
-
+/*
+ * Update intrnames array with the specified name. This is used by
+ * vmstat(8) and the like.
+ */
static void
update_intrname(int intr, char *name)
{
@@ -485,7 +431,7 @@ found:
}
int
-icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
+icu_setup(int intr, inthand2_t *handler, void *arg, int flags)
{
#ifdef FAST_HI
int select; /* the select register is 8 bits */
@@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
u_int32_t value; /* the window register is 32 bits */
#endif /* FAST_HI */
u_long ef;
- u_int mask = (maskptr ? *maskptr : 0);
#if defined(APIC_IO)
if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */
@@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
ef = read_eflags();
disable_intr();
intr_handler[intr] = handler;
- intr_mptr[intr] = maskptr;
- intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr);
intr_unit[intr] = arg;
#ifdef FAST_HI
if (flags & INTR_FAST) {
@@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
INTREN(1 << intr);
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
+/*
+ * Dissociate an interrupt handler from an IRQ and set the handler to
+ * the stray interrupt handler. The 'handler' parameter is used only
+ * for consistency checking.
+ */
int
icu_unset(intr, handler)
int intr;
@@ -567,8 +514,6 @@ icu_unset(intr, handler)
disable_intr();
intr_countp[intr] = &intrcnt[1 + intr];
intr_handler[intr] = isa_strayintr;
- intr_mptr[intr] = NULL;
- intr_mask[intr] = HWI_MASK | SWI_MASK;
intr_unit[intr] = &intr_unit[intr];
#ifdef FAST_HI_XXX
/* XXX how do I re-create dvp here? */
@@ -581,353 +526,172 @@ icu_unset(intr, handler)
setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
-/* The following notice applies beyond this point in the file */
-
-/*
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-typedef struct intrec {
- intrmask_t mask;
- inthand2_t *handler;
- void *argument;
- struct intrec *next;
- char *name;
- int intr;
- intrmask_t *maskptr;
- int flags;
-} intrec;
-
-static intrec *intreclist_head[ICU_LEN];
-
-/*
- * The interrupt multiplexer calls each of the handlers in turn. The
- * ipl is initially quite low. It is raised as necessary for each call
- * and lowered after the call. Thus out of order handling is possible
- * even for interrupts of the same type. This is probably no more
- * harmful than out of order handling in general (not harmful except
- * for real time response which we don't support anyway).
- */
-static void
-intr_mux(void *arg)
-{
- intrec *p;
- intrmask_t oldspl;
-
- for (p = arg; p != NULL; p = p->next) {
- oldspl = splq(p->mask);
- p->handler(p->argument);
- splx(oldspl);
- }
-}
-
-static intrec*
-find_idesc(unsigned *maskptr, int irq)
-{
- intrec *p = intreclist_head[irq];
-
- while (p && p->maskptr != maskptr)
- p = p->next;
-
- return (p);
-}
-
-static intrec**
-find_pred(intrec *idesc, int irq)
+intrec *
+inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
+ int pri, int flags)
{
- intrec **pp = &intreclist_head[irq];
- intrec *p = *pp;
-
- while (p != idesc) {
- if (p == NULL)
- return (NULL);
- pp = &p->next;
- p = *pp;
- }
- return (pp);
-}
-
-/*
- * Both the low level handler and the shared interrupt multiplexer
- * block out further interrupts as set in the handlers "mask", while
- * the handler is running. In fact *maskptr should be used for this
- * purpose, but since this requires one more pointer dereference on
- * each interrupt, we rather bother update "mask" whenever *maskptr
- * changes. The function "update_masks" should be called **after**
- * all manipulation of the linked list of interrupt handlers hung
- * off of intrdec_head[irq] is complete, since the chain of handlers
- * will both determine the *maskptr values and the instances of mask
- * that are fixed. This function should be called with the irq for
- * which a new handler has been add blocked, since the masks may not
- * yet know about the use of this irq for a device of a certain class.
- */
+ ithd *ithd = ithds[irq]; /* descriptor for the IRQ */
+ intrec *head; /* chain of handlers for IRQ */
+ intrec *idesc; /* descriptor for this handler */
+ struct proc *p; /* interrupt thread */
+ int errcode = 0;
-static void
-update_mux_masks(void)
-{
- int irq;
- for (irq = 0; irq < ICU_LEN; irq++) {
- intrec *idesc = intreclist_head[irq];
- while (idesc != NULL) {
- if (idesc->maskptr != NULL) {
- /* our copy of *maskptr may be stale, refresh */
- idesc->mask = *idesc->maskptr;
- }
- idesc = idesc->next;
+ if (name == NULL) /* no name? */
+ panic ("anonymous interrupt");
+ if (ithd == NULL || ithd->it_ih == NULL) {
+ /* first handler for this irq. */
+ if (ithd == NULL) {
+ ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (ithd == NULL)
+ return (NULL);
+ bzero(ithd, sizeof(struct ithd));
+ ithd->irq = irq;
+ ithds[irq] = ithd;
}
- }
-}
-
-static void
-update_masks(intrmask_t *maskptr, int irq)
-{
- intrmask_t mask = 1 << irq;
-
- if (maskptr == NULL)
- return;
-
- if (find_idesc(maskptr, irq) == NULL) {
- /* no reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) == 0)
- return;
- /* the irq was included in the classes mask, remove it */
- *maskptr &= ~mask;
- } else {
- /* a reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) != 0)
- return;
- /* put the irq into the classes mask */
- *maskptr |= mask;
- }
- /* we need to update all values in the intr_mask[irq] array */
- update_intr_masks();
- /* update mask in chains of the interrupt multiplex handler as well */
- update_mux_masks();
-}
-
-/*
- * Add interrupt handler to linked list hung off of intreclist_head[irq]
- * and install shared interrupt multiplex handler, if necessary
- */
-
-static int
-add_intrdesc(intrec *idesc)
-{
- int irq = idesc->intr;
-
- intrec *head = intreclist_head[irq];
-
- if (head == NULL) {
- /* first handler for this irq, just install it */
- if (icu_setup(irq, idesc->handler, idesc->argument,
- idesc->maskptr, idesc->flags) != 0)
- return (-1);
-
- update_intrname(irq, idesc->name);
- /* keep reference */
- intreclist_head[irq] = idesc;
- } else {
- if ((idesc->flags & INTR_EXCL) != 0
- || (head->flags & INTR_EXCL) != 0) {
+ /*
+ * If we have a fast interrupt, we need to set the
+ * handler address directly. Do that below. For a
+ * slow interrupt, we don't need to know more details,
+ * so do it here because it's tidier.
+ */
+ if ((flags & INTR_FAST) == 0) {
/*
- * can't append new handler, if either list head or
- * new handler do not allow interrupts to be shared
+ * Only create a kernel thread if we don't already
+ * have one.
*/
- if (bootverbose)
- printf("\tdevice combination doesn't support "
- "shared irq%d\n", irq);
- return (-1);
- }
- if (head->next == NULL) {
+ if (ithd->it_proc == NULL) {
+ errcode = kthread_create(ithd_loop, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "irq%d: %s", irq,
+ name);
+ if (errcode)
+ panic("inthand_add: Can't create "
+ "interrupt thread");
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ ithd->it_proc = p;
+ p->p_ithd = ithd;
+ } else
+ snprintf(ithd->it_proc->p_comm, MAXCOMLEN,
+ "irq%d: %s", irq, name);
+ p->p_rtprio.prio = pri;
+
/*
- * second handler for this irq, replace device driver's
- * handler by shared interrupt multiplexer function
+ * The interrupt process must be in place, but
+ * not necessarily schedulable, before we
+ * initialize the ICU, since it may cause an
+ * immediate interrupt.
*/
- icu_unset(irq, head->handler);
- if (icu_setup(irq, intr_mux, head, 0, 0) != 0)
- return (-1);
- if (bootverbose)
- printf("\tusing shared irq%d.\n", irq);
- update_intrname(irq, "mux");
+ if (icu_setup(irq, &sched_ithd, arg, flags) != 0)
+ panic("inthand_add: Can't initialize ICU");
}
- /* just append to the end of the chain */
- while (head->next != NULL)
- head = head->next;
- head->next = idesc;
- }
- update_masks(idesc->maskptr, irq);
- return (0);
-}
-
-/*
- * Create and activate an interrupt handler descriptor data structure.
- *
- * The dev_instance pointer is required for resource management, and will
- * only be passed through to resource_claim().
- *
- * There will be functions that derive a driver and unit name from a
- * dev_instance variable, and those functions will be used to maintain the
- * interrupt counter label array referenced by systat and vmstat to report
- * device interrupt rates (->update_intrlabels).
- *
- * Add the interrupt handler descriptor data structure created by an
- * earlier call of create_intr() to the linked list for its irq and
- * adjust the interrupt masks if necessary.
- *
- * WARNING: This is an internal function and not to be used by device
- * drivers. It is subject to change without notice.
- */
-
-intrec *
-inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
- intrmask_t *maskptr, int flags)
-{
- intrec *idesc;
- int errcode = -1;
- intrmask_t oldspl;
-
- if (ICU_LEN > 8 * sizeof *maskptr) {
- printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n",
- ICU_LEN, 8 * sizeof *maskptr);
+ } else if ((flags & INTR_EXCL) != 0
+ || (ithd->it_ih->flags & INTR_EXCL) != 0) {
+ /*
+ * We can't append the new handler if either
+ * list ithd or new handler do not allow
+ * interrupts to be shared.
+ */
+ if (bootverbose)
+ printf("\tdevice combination %s and %s "
+ "doesn't support shared irq%d\n",
+ ithd->it_ih->name, name, irq);
+ return(NULL);
+ } else if (flags & INTR_FAST) {
+ /* We can only have one fast interrupt by itself. */
+ if (bootverbose)
+ printf("\tCan't add fast interrupt %s"
+ " to normal interrupt %s on irq%d",
+ name, ithd->it_ih->name, irq);
return (NULL);
+ } else { /* update p_comm */
+ p = ithd->it_proc;
+ if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) {
+ strcat(p->p_comm, " ");
+ strcat(p->p_comm, name);
+ } else if (strlen(p->p_comm) == MAXCOMLEN)
+ p->p_comm[MAXCOMLEN - 1] = '+';
+ else
+ strcat(p->p_comm, "+");
}
- if ((unsigned)irq >= ICU_LEN) {
- printf("create_intr: requested irq%d too high, limit is %d\n",
- irq, ICU_LEN -1);
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
return (NULL);
- }
+ bzero(idesc, sizeof (struct intrec));
- idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK);
- if (idesc == NULL)
- return NULL;
- bzero(idesc, sizeof *idesc);
+ idesc->handler = handler;
+ idesc->argument = arg;
+ idesc->flags = flags;
+ idesc->ithd = ithd;
- if (name == NULL)
- name = "???";
idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
if (idesc->name == NULL) {
free(idesc, M_DEVBUF);
- return NULL;
+ return (NULL);
}
strcpy(idesc->name, name);
- idesc->handler = handler;
- idesc->argument = arg;
- idesc->maskptr = maskptr;
- idesc->intr = irq;
- idesc->flags = flags;
-
- /* block this irq */
- oldspl = splq(1 << irq);
-
- /* add irq to class selected by maskptr */
- errcode = add_intrdesc(idesc);
- splx(oldspl);
-
- if (errcode != 0) {
+ /* Slow interrupts got set up above. */
+ if ((flags & INTR_FAST)
+ && (icu_setup(irq, idesc->handler, idesc->argument,
+ idesc->flags) != 0) ) {
if (bootverbose)
- printf("\tintr_connect(irq%d) failed, result=%d\n",
+ printf("\tinthand_add(irq%d) failed, result=%d\n",
irq, errcode);
free(idesc->name, M_DEVBUF);
free(idesc, M_DEVBUF);
- idesc = NULL;
+ return NULL;
}
-
+ head = ithd->it_ih; /* look at chain of handlers */
+ if (head) {
+ while (head->next != NULL)
+ head = head->next; /* find the end */
+ head->next = idesc; /* hook it in there */
+ } else
+ ithd->it_ih = idesc; /* put it up front */
+ update_intrname(irq, idesc->name);
return (idesc);
}
/*
- * Deactivate and remove the interrupt handler descriptor data connected
- * created by an earlier call of intr_connect() from the linked list and
- * adjust theinterrupt masks if necessary.
+ * Deactivate and remove linked list the interrupt handler descriptor
+ * data connected created by an earlier call of inthand_add(), then
+ * adjust the interrupt masks if necessary.
*
- * Return the memory held by the interrupt handler descriptor data structure
- * to the system. Make sure, the handler is not actively used anymore, before.
+ * Return the memory held by the interrupt handler descriptor data
+ * structure to the system. First ensure the handler is not actively
+ * in use.
*/
int
inthand_remove(intrec *idesc)
{
- intrec **hook, *head;
- int irq;
- int errcode = 0;
- intrmask_t oldspl;
+ ithd *ithd; /* descriptor for the IRQ */
+ intrec *ih; /* chain of handlers */
if (idesc == NULL)
return (-1);
+ ithd = idesc->ithd;
+ ih = ithd->it_ih;
- irq = idesc->intr;
-
- /* find pointer that keeps the reference to this interrupt descriptor */
- hook = find_pred(idesc, irq);
- if (hook == NULL)
+ if (ih == idesc) /* first in the chain */
+ ithd->it_ih = idesc->next; /* unhook it */
+ else {
+ while ((ih != NULL)
+ && (ih->next != idesc) )
+ ih = ih->next;
+ if (ih->next != idesc)
return (-1);
-
- /* make copy of original list head, the line after may overwrite it */
- head = intreclist_head[irq];
-
- /* unlink: make predecessor point to idesc->next instead of to idesc */
- *hook = idesc->next;
-
- /* now check whether the element we removed was the list head */
- if (idesc == head) {
-
- oldspl = splq(1 << irq);
-
- /* check whether the new list head is the only element on list */
- head = intreclist_head[irq];
- if (head != NULL) {
- icu_unset(irq, intr_mux);
- if (head->next != NULL) {
- /* install the multiplex handler with new list head as argument */
- errcode = icu_setup(irq, intr_mux, head, 0, 0);
- if (errcode == 0)
- update_intrname(irq, NULL);
- } else {
- /* install the one remaining handler for this irq */
- errcode = icu_setup(irq, head->handler,
- head->argument,
- head->maskptr, head->flags);
- if (errcode == 0)
- update_intrname(irq, head->name);
+ ih->next = ih->next->next;
}
- } else {
- /* revert to old handler, eg: strayintr */
- icu_unset(irq, idesc->handler);
- }
- splx(oldspl);
- }
- update_masks(idesc->maskptr, irq);
+
+ if (ithd->it_ih == NULL) /* no handlers left, */
+ icu_unset(ithd->irq, idesc->handler);
free(idesc, M_DEVBUF);
return (0);
}
diff --git a/sys/i386/isa/intr_machdep.h b/sys/i386/isa/intr_machdep.h
index 5982295..87c97a3 100644
--- a/sys/i386/isa/intr_machdep.h
+++ b/sys/i386/isa/intr_machdep.h
@@ -98,7 +98,6 @@
#define TPR_BLOCK_XCPUSTOP 0xaf /* */
#define TPR_BLOCK_ALL 0xff /* all INTs */
-
#ifdef TEST_TEST1
/* put a 'fake' HWI in top of APIC prio 0x3x, 32 + 31 = 63 = 0x3f */
#define XTEST1_OFFSET (ICU_OFFSET + 31)
@@ -145,8 +144,9 @@ extern u_long intrcnt[]; /* counts for for each device and stray */
extern char intrnames[]; /* string table containing device names */
extern u_long *intr_countp[]; /* pointers into intrcnt[] */
extern inthand2_t *intr_handler[]; /* C entry points of intr handlers */
-extern u_int intr_mask[]; /* sets of intrs masked during handling of 1 */
+extern ithd *ithds[];
extern void *intr_unit[]; /* cookies to pass to intr handlers */
+extern ithd softinterrupt; /* soft interrupt thread */
inthand_t
IDTVEC(fastintr0), IDTVEC(fastintr1),
@@ -190,26 +190,60 @@ inthand_t
#endif /** TEST_TEST1 */
#endif /* SMP || APIC_IO */
+#ifdef PC98
+#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
+#define ICU_SLAVEID 7
+#else
+#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
+#define ICU_SLAVEID 2
+#endif
+
+#ifdef APIC_IO
+/*
+ * This is to accommodate "mixed-mode" programming for
+ * motherboards that don't connect the 8254 to the IO APIC.
+ */
+#define AUTO_EOI_1 1
+#endif
+
+#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
+
void isa_defaultirq __P((void));
int isa_nmi __P((int cd));
int icu_setup __P((int intr, inthand2_t *func, void *arg,
- u_int *maskptr, int flags));
+ int flags));
int icu_unset __P((int intr, inthand2_t *handler));
-int update_intr_masks __P((void));
intrmask_t splq __P((intrmask_t mask));
-#define INTR_FAST 0x00000001 /* fast interrupt handler */
-#define INTR_EXCL 0x00010000 /* excl. intr, default is shared */
+/*
+ * Describe a hardware interrupt handler. These structures are
+ * accessed via the array intreclist, which contains one pointer per
+ * hardware interrupt.
+ *
+ * Multiple interrupt handlers for a specific IRQ can be chained
+ * together via the 'next' pointer.
+ */
+typedef struct intrec {
+ inthand2_t *handler; /* code address of handler */
+ void *argument; /* argument to pass to handler */
+ enum intr_type flags; /* flag bits (sys/bus.h) */
+ char *name; /* name of handler */
+ ithd *ithd; /* handler we're connected to */
+ struct intrec *next; /* next handler for this irq */
+} intrec;
/*
* WARNING: These are internal functions and not to be used by device drivers!
* They are subject to change without notice.
*/
struct intrec *inthand_add(const char *name, int irq, inthand2_t handler,
- void *arg, intrmask_t *maskptr, int flags);
-
+ void *arg, int pri, int flags);
int inthand_remove(struct intrec *idesc);
+void sched_ithd(void *);
+void ithd_loop(void *);
+void start_softintr(void *);
+void intr_soft(void *);
#endif /* LOCORE */
diff --git a/sys/i386/isa/ipl.s b/sys/i386/isa/ipl.s
index 9361230..1ee9ace 100644
--- a/sys/i386/isa/ipl.s
+++ b/sys/i386/isa/ipl.s
@@ -44,7 +44,6 @@
* AT/386
* Vector interrupt control section
*
- * cpl - Current interrupt disable mask
* *_imask - Interrupt masks for various spl*() functions
* ipending - Pending interrupts (set when a masked interrupt occurs)
*/
@@ -53,8 +52,6 @@
ALIGN_DATA
/* current priority (all off) */
- .globl _cpl
-_cpl: .long HWI_MASK | SWI_MASK
.globl _tty_imask
_tty_imask: .long SWI_TTY_MASK
@@ -71,9 +68,9 @@ _softnet_imask: .long SWI_NET_MASK
.globl _softtty_imask
_softtty_imask: .long SWI_TTY_MASK
-/* pending interrupts blocked by splxxx() */
- .globl _ipending
-_ipending: .long 0
+/* pending software interrupts */
+ .globl _spending
+_spending: .long 0
/* set with bits for which queue to service */
.globl _netisr
@@ -100,59 +97,30 @@ _netisrs:
_doreti:
FAKE_MCOUNT(_bintr) /* init "from" _bintr -> _doreti */
addl $4,%esp /* discard unit number */
- popl %eax /* cpl or cml to restore */
doreti_next:
- /*
- * Check for pending HWIs and SWIs atomically with restoring cpl
- * and exiting. The check has to be atomic with exiting to stop
- * (ipending & ~cpl) changing from zero to nonzero while we're
- * looking at it (this wouldn't be fatal but it would increase
- * interrupt latency). Restoring cpl has to be atomic with exiting
- * so that the stack cannot pile up (the nesting level of interrupt
- * handlers is limited by the number of bits in cpl).
- */
-#ifdef SMP
- cli /* early to prevent INT deadlock */
-doreti_next2:
-#endif
- movl %eax,%ecx
- notl %ecx /* set bit = unmasked level */
-#ifndef SMP
- cli
-#endif
- andl _ipending,%ecx /* set bit = unmasked pending INT */
- jne doreti_unpend
- movl %eax,_cpl
decb _intr_nesting_level
/* Check for ASTs that can be handled now. */
testl $AST_PENDING,_astpending
- je doreti_exit
- testb $SEL_RPL_MASK,TF_CS(%esp)
- jne doreti_ast
- testl $PSL_VM,TF_EFLAGS(%esp)
- je doreti_exit
- cmpl $1,_in_vm86call
- jne doreti_ast
+ je doreti_exit /* no AST, exit */
+ testb $SEL_RPL_MASK,TF_CS(%esp) /* are we in user mode? */
+ jne doreti_ast /* yes, do it now. */
+ testl $PSL_VM,TF_EFLAGS(%esp) /* kernel mode */
+ je doreti_exit /* and not VM86 mode, defer */
+ cmpl $1,_in_vm86call /* are we in a VM86 call? */
+ jne doreti_ast /* yes, we can do it */
/*
- * doreti_exit - release MP lock, pop registers, iret.
+ * doreti_exit: release MP lock, pop registers, iret.
*
- * Note that the syscall trap shotcuts to doreti_syscall_ret.
+ * Note that the syscall trap shortcuts to doreti_syscall_ret.
* The segment register pop is a special case, since it may
* fault if (for example) a sigreturn specifies bad segment
- * registers. The fault is handled in trap.c
+ * registers. The fault is handled in trap.c.
*/
-
doreti_exit:
MEXITCOUNT
-#ifdef SMP
- /* release the kernel lock */
- movl $_mp_lock, %edx /* GIANT_LOCK */
- call _MPrellock_edx
-#endif /* SMP */
-
.globl doreti_popl_fs
.globl doreti_syscall_ret
doreti_syscall_ret:
@@ -170,6 +138,13 @@ doreti_popl_ds:
doreti_iret:
iret
+ /*
+ * doreti_iret_fault and friends. Alternative return code for
+ * the case where we get a fault in the doreti_exit code
+ * above. trap() (i386/i386/trap.c) catches this specific
+ * case, sends the process a signal and continues in the
+ * corresponding place in the code below.
+ */
ALIGN_TEXT
.globl doreti_iret_fault
doreti_iret_fault:
@@ -189,93 +164,11 @@ doreti_popl_fs_fault:
jmp alltraps_with_regs_pushed
ALIGN_TEXT
-doreti_unpend:
- /*
- * Enabling interrupts is safe because we haven't restored cpl yet.
- * %ecx contains the next probable ready interrupt (~cpl & ipending)
- */
-#ifdef SMP
- bsfl %ecx, %ecx /* locate the next dispatchable int */
- lock
- btrl %ecx, _ipending /* is it really still pending? */
- jnc doreti_next2 /* some intr cleared memory copy */
- sti /* late to prevent INT deadlock */
-#else
- sti
- bsfl %ecx,%ecx /* slow, but not worth optimizing */
- btrl %ecx,_ipending
- jnc doreti_next /* some intr cleared memory copy */
-#endif /* SMP */
- /*
- * Execute handleable interrupt
- *
- * Set up JUMP to _ihandlers[%ecx] for HWIs.
- * Set up CALL of _ihandlers[%ecx] for SWIs.
- * This is a bit early for the SMP case - we have to push %ecx and
- * %edx, but could push only %ecx and load %edx later.
- */
- movl _ihandlers(,%ecx,4),%edx
- cmpl $NHWI,%ecx
- jae doreti_swi /* software interrupt handling */
- cli /* else hardware int handling */
-#ifdef SMP
- movl %eax,_cpl /* same as non-smp case right now */
-#else
- movl %eax,_cpl
-#endif
- MEXITCOUNT
-#ifdef APIC_INTR_DIAGNOSTIC
- lock
- incl CNAME(apic_itrace_doreti)(,%ecx,4)
-#ifdef APIC_INTR_DIAGNOSTIC_IRQ
- cmpl $APIC_INTR_DIAGNOSTIC_IRQ,%ecx
- jne 9f
- pushl %eax
- pushl %ecx
- pushl %edx
- pushl $APIC_ITRACE_DORETI
- call log_intr_event
- addl $4,%esp
- popl %edx
- popl %ecx
- popl %eax
-9:
-#endif
-#endif
- jmp *%edx
-
- ALIGN_TEXT
-doreti_swi:
- pushl %eax
- /*
- * At least the SWI_CLOCK handler has to run at a possibly strictly
- * lower cpl, so we have to restore
- * all the h/w bits in cpl now and have to worry about stack growth.
- * The worst case is currently (30 Jan 1994) 2 SWI handlers nested
- * in dying interrupt frames and about 12 HWIs nested in active
- * interrupt frames. There are only 4 different SWIs and the HWI
- * and SWI masks limit the nesting further.
- *
- * The SMP case is currently the same as the non-SMP case.
- */
-#ifdef SMP
- orl imasks(,%ecx,4), %eax /* or in imasks */
- movl %eax,_cpl /* set cpl for call */
-#else
- orl imasks(,%ecx,4),%eax
- movl %eax,_cpl
-#endif
- call *%edx
- popl %eax /* cpl to restore */
- jmp doreti_next
-
- ALIGN_TEXT
doreti_ast:
andl $~AST_PENDING,_astpending
sti
movl $T_ASTFLT,TF_TRAPNO(%esp)
- call _trap
- subl %eax,%eax /* recover cpl|cml */
+ call _ast
movb $1,_intr_nesting_level /* for doreti_next to decrement */
jmp doreti_next
diff --git a/sys/i386/isa/ipl_funcs.c b/sys/i386/isa/ipl_funcs.c
index d27d97f..14eb240 100644
--- a/sys/i386/isa/ipl_funcs.c
+++ b/sys/i386/isa/ipl_funcs.c
@@ -27,11 +27,13 @@
*/
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <machine/ipl.h>
-#include <machine/globals.h>
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
/*
@@ -45,236 +47,55 @@
void name(void) \
{ \
atomic_set_int(var, bits); \
+ sched_ithd((void *) SOFTINTR); \
}
-DO_SETBITS(setdelayed, &ipending, loadandclear(&idelayed))
+DO_SETBITS(setdelayed, &spending, loadandclear(&idelayed))
+DO_SETBITS(setsoftcamnet,&spending, SWI_CAMNET_PENDING)
+DO_SETBITS(setsoftcambio,&spending, SWI_CAMBIO_PENDING)
+DO_SETBITS(setsoftclock, &spending, SWI_CLOCK_PENDING)
+DO_SETBITS(setsoftnet, &spending, SWI_NET_PENDING)
+DO_SETBITS(setsofttty, &spending, SWI_TTY_PENDING)
+DO_SETBITS(setsoftvm, &spending, SWI_VM_PENDING)
+DO_SETBITS(setsofttq, &spending, SWI_TQ_PENDING)
-DO_SETBITS(setsoftcamnet,&ipending, SWI_CAMNET_PENDING)
-DO_SETBITS(setsoftcambio,&ipending, SWI_CAMBIO_PENDING)
-DO_SETBITS(setsoftclock, &ipending, SWI_CLOCK_PENDING)
-DO_SETBITS(setsoftnet, &ipending, SWI_NET_PENDING)
-DO_SETBITS(setsofttty, &ipending, SWI_TTY_PENDING)
-DO_SETBITS(setsoftvm, &ipending, SWI_VM_PENDING)
-DO_SETBITS(setsofttq, &ipending, SWI_TQ_PENDING)
-
-DO_SETBITS(schedsoftcamnet, &idelayed, SWI_CAMNET_PENDING)
-DO_SETBITS(schedsoftcambio, &idelayed, SWI_CAMBIO_PENDING)
-DO_SETBITS(schedsoftnet, &idelayed, SWI_NET_PENDING)
-DO_SETBITS(schedsofttty, &idelayed, SWI_TTY_PENDING)
-DO_SETBITS(schedsoftvm, &idelayed, SWI_VM_PENDING)
-DO_SETBITS(schedsofttq, &idelayed, SWI_TQ_PENDING)
+/*
+ * We don't need to schedule soft interrupts any more, it happens
+ * automatically.
+ */
+#define schedsoftcamnet
+#define schedsoftcambio
+#define schedsoftnet
+#define schedsofttty
+#define schedsoftvm
+#define schedsofttq
unsigned
softclockpending(void)
{
- return (ipending & SWI_CLOCK_PENDING);
+ return (spending & SWI_CLOCK_PENDING);
}
/*
- * Support for SPL assertions.
- */
-
-#ifdef INVARIANT_SUPPORT
-
-#define SPLASSERT_IGNORE 0
-#define SPLASSERT_LOG 1
-#define SPLASSERT_PANIC 2
-
-static int splassertmode = SPLASSERT_LOG;
-SYSCTL_INT(_kern, OID_AUTO, splassertmode, CTLFLAG_RW,
- &splassertmode, 0, "Set the mode of SPLASSERT");
-
-static void
-init_splassertmode(void *ignored)
-{
- TUNABLE_INT_FETCH("kern.splassertmode", 0, splassertmode);
-}
-SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_splassertmode, NULL);
-
-static void
-splassertfail(char *str, const char *msg, char *name, int level)
-{
- switch (splassertmode) {
- case SPLASSERT_IGNORE:
- break;
- case SPLASSERT_LOG:
- printf(str, msg, name, level);
- printf("\n");
- break;
- case SPLASSERT_PANIC:
- panic(str, msg, name, level);
- break;
- }
-}
-
-#define GENSPLASSERT(NAME, MODIFIER) \
-void \
-NAME##assert(const char *msg) \
-{ \
- if ((cpl & (MODIFIER)) != (MODIFIER)) \
- splassertfail("%s: not %s, cpl == %#x", \
- msg, __XSTRING(NAME) + 3, cpl); \
-}
-#else
-#define GENSPLASSERT(NAME, MODIFIER)
-#endif
-
-/************************************************************************
- * GENERAL SPL CODE *
- ************************************************************************
- *
- * Implement splXXX(), spl0(), splx(), and splq(). splXXX() disables a
- * set of interrupts (e.g. splbio() disables interrupts relating to
- * device I/O) and returns the previous interrupt mask. splx() restores
- * the previous interrupt mask, spl0() is a special case which enables
- * all interrupts and is typically used inside i386/i386 swtch.s and
- * fork_trampoline. splq() is a generic version of splXXX().
- *
- * The SPL routines mess around with the 'cpl' global, which masks
- * interrupts. Interrupts are not *actually* masked. What happens is
- * that if an interrupt masked by the cpl occurs, the appropriate bit
- * in 'ipending' is set and the interrupt is defered. When we clear
- * bits in the cpl we must check to see if any ipending interrupts have
- * been unmasked and issue the synchronously, which is what the splz()
- * call does.
- *
- * Because the cpl is often saved and restored in a nested fashion, cpl
- * modifications are only allowed in the SMP case when the MP lock is held
- * to prevent multiple processes from tripping over each other's masks.
- * The cpl is saved when you do a context switch (mi_switch()) and restored
- * when your process gets cpu again.
- *
- * An interrupt routine is allowed to modify the cpl as long as it restores
- * it prior to returning (thus the interrupted mainline code doesn't notice
- * anything amiss). For the SMP case, the interrupt routine must hold
- * the MP lock for any cpl manipulation.
- *
- * Likewise, due to the deterministic nature of cpl modifications, we do
- * NOT need to use locked instructions to modify it.
+ * Dummy spl calls. The only reason for these is to not break
+ * all the code which expects to call them.
*/
-
-#ifndef SMP
-
-#define GENSPL(NAME, OP, MODIFIER, PC) \
-GENSPLASSERT(NAME, MODIFIER) \
-unsigned NAME(void) \
-{ \
- unsigned x; \
- \
- x = cpl; \
- cpl OP MODIFIER; \
- return (x); \
-}
-
-void
-spl0(void)
-{
- cpl = 0;
- if (ipending)
- splz();
-}
-
-void
-splx(unsigned ipl)
-{
- cpl = ipl;
- if (ipending & ~ipl)
- splz();
-}
-
-intrmask_t
-splq(intrmask_t mask)
-{
- intrmask_t tmp = cpl;
- cpl |= mask;
- return (tmp);
-}
-
-#else /* !SMP */
-
-#include <machine/smp.h>
-#include <machine/smptests.h>
-
-/*
- * SMP CASE
- *
- * Mostly the same as the non-SMP case now, but it didn't used to be
- * this clean.
- */
-
-#define GENSPL(NAME, OP, MODIFIER, PC) \
-GENSPLASSERT(NAME, MODIFIER) \
-unsigned NAME(void) \
-{ \
- unsigned x; \
- \
- x = cpl; \
- cpl OP MODIFIER; \
- \
- return (x); \
-}
-
-/*
- * spl0() - unmask all interrupts
- *
- * The MP lock must be held on entry
- * This routine may only be called from mainline code.
- */
-void
-spl0(void)
-{
- KASSERT(inside_intr == 0, ("spl0: called from interrupt"));
- cpl = 0;
- if (ipending)
- splz();
-}
-
-/*
- * splx() - restore previous interrupt mask
- *
- * The MP lock must be held on entry
- */
-
-void
-splx(unsigned ipl)
-{
- cpl = ipl;
- if (inside_intr == 0 && (ipending & ~cpl) != 0)
- splz();
-}
-
-
-/*
- * splq() - blocks specified interrupts
- *
- * The MP lock must be held on entry
- */
-intrmask_t
-splq(intrmask_t mask)
-{
- intrmask_t tmp = cpl;
- cpl |= mask;
- return (tmp);
-}
-
-#endif /* !SMP */
-
-/* Finally, generate the actual spl*() functions */
-
-/* NAME: OP: MODIFIER: PC: */
-GENSPL(splbio, |=, bio_imask, 2)
-GENSPL(splcam, |=, cam_imask, 7)
-GENSPL(splclock, =, HWI_MASK | SWI_MASK, 3)
-GENSPL(splhigh, =, HWI_MASK | SWI_MASK, 4)
-GENSPL(splimp, |=, net_imask, 5)
-GENSPL(splnet, |=, SWI_NET_MASK, 6)
-GENSPL(splsoftcam, |=, SWI_CAMBIO_MASK | SWI_CAMNET_MASK, 8)
-GENSPL(splsoftcambio, |=, SWI_CAMBIO_MASK, 9)
-GENSPL(splsoftcamnet, |=, SWI_CAMNET_MASK, 10)
-GENSPL(splsoftclock, =, SWI_CLOCK_MASK, 11)
-GENSPL(splsofttty, |=, SWI_TTY_MASK, 12)
-GENSPL(splsoftvm, |=, SWI_VM_MASK, 16)
-GENSPL(splsofttq, |=, SWI_TQ_MASK, 17)
-GENSPL(splstatclock, |=, stat_imask, 13)
-GENSPL(spltty, |=, tty_imask, 14)
-GENSPL(splvm, |=, net_imask | bio_imask | cam_imask, 15)
+void spl0 (void) {}
+void splx (intrmask_t x) {}
+intrmask_t splq(intrmask_t mask) {return 0; }
+intrmask_t splbio(void) {return 0; }
+intrmask_t splcam(void) {return 0; }
+intrmask_t splclock(void) {return 0; }
+intrmask_t splhigh(void) {return 0; }
+intrmask_t splimp(void) {return 0; }
+intrmask_t splnet(void) {return 0; }
+intrmask_t splsoftcam(void) {return 0; }
+intrmask_t splsoftcambio(void) {return 0; }
+intrmask_t splsoftcamnet(void) {return 0; }
+intrmask_t splsoftclock(void) {return 0; }
+intrmask_t splsofttty(void) {return 0; }
+intrmask_t splsoftvm(void) {return 0; }
+intrmask_t splsofttq(void) {return 0; }
+intrmask_t splstatclock(void) {return 0; }
+intrmask_t spltty(void) {return 0; }
+intrmask_t splvm(void) {return 0; }
diff --git a/sys/i386/isa/ithread.c b/sys/i386/isa/ithread.c
new file mode 100644
index 0000000..4ceac42
--- /dev/null
+++ b/sys/i386/isa/ithread.c
@@ -0,0 +1,353 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From BSDI: intr.c,v 1.6.2.5 1999/07/06 19:16:52 cp Exp
+ * $FreeBSD$
+ */
+
+/* Interrupt thread code. */
+
+#include "opt_auto_eoi.h"
+
+#include "isa.h"
+
+#include <sys/param.h>
+#include <sys/rtprio.h> /* change this name XXX */
+#ifndef SMP
+#include <machine/lock.h>
+#endif
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/unistd.h>
+#include <sys/errno.h>
+#include <sys/interrupt.h>
+#include <machine/ipl.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
+#include <sys/bus.h>
+
+#if defined(APIC_IO)
+#include <machine/smp.h>
+#include <machine/smptests.h> /** FAST_HI */
+#include <machine/resource.h>
+#endif /* APIC_IO */
+#ifdef PC98
+#include <pc98/pc98/pc98.h>
+#include <pc98/pc98/pc98_machdep.h>
+#include <pc98/pc98/epsonio.h>
+#else
+#include <i386/isa/isa.h>
+#endif
+#include <i386/isa/icu.h>
+
+#if NISA > 0
+#include <isa/isavar.h>
+#endif
+#include <i386/isa/intr_machdep.h>
+#include <sys/interrupt.h>
+#ifdef APIC_IO
+#include <machine/clock.h>
+#endif
+
+#include "mca.h"
+#if NMCA > 0
+#include <i386/isa/mca_machdep.h>
+#endif
+
+#include <sys/vmmeter.h>
+#include <machine/mutex.h>
+#include <sys/ktr.h>
+#include <machine/cpu.h>
+#if 0
+#include <ddb/ddb.h>
+#endif
+
+u_long softintrcnt [NSWI];
+
+SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, NULL)
+
+/*
+ * Schedule a heavyweight interrupt process. This function is called
+ * from the interrupt handlers Xintr<num>.
+ */
+void
+sched_ithd(void *cookie)
+{
+ int irq = (int) cookie; /* IRQ we're handling */
+ ithd *ir = ithds[irq]; /* and the process that does it */
+
+ /* This used to be in icu_vector.s */
+ /*
+ * We count software interrupts when we process them. The
+ * code here follows previous practice, but there's an
+ * argument for counting hardware interrupts when they're
+ * processed too.
+ */
+ if (irq < NHWI) /* real interrupt, */
+ atomic_add_long(intr_countp[irq], 1); /* one more for this IRQ */
+ atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */
+
+ CTR3(KTR_INTR, "sched_ithd pid %d(%s) need=%d",
+ ir->it_proc->p_pid, ir->it_proc->p_comm, ir->it_need);
+
+#if 0
+ /*
+ * If we are in the debugger, we can't use interrupt threads to
+ * process interrupts since the threads are scheduled. Instead,
+ * call the interrupt handlers directly. This should be able to
+ * go away once we have light-weight interrupt handlers.
+ */
+ if (db_active) {
+ intrec *ih; /* and our interrupt handler chain */
+#if 0
+ membar_unlock(); /* push out "it_need=0" */
+#endif
+ for (ih = ir->it_ih; ih != NULL; ih = ih->next) {
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ ih->handler(ih->argument);
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_exit(&Giant, MTX_DEF);
+ }
+
+ INTREN (1 << ir->irq); /* reset the mask bit */
+ return;
+ }
+#endif
+
+ /*
+ * Set it_need so that if the thread is already running but close
+ * to done, it will do another go-round. Then get the sched lock
+ * and see if the thread is on whichkqs yet. If not, put it on
+ * there. In any case, kick everyone so that if the new thread
+ * is higher priority than their current thread, it gets run now.
+ */
+ ir->it_need = 1;
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (ir->it_proc->p_stat == SWAIT) { /* not on run queue */
+ CTR1(KTR_INTR, "sched_ithd: setrunqueue %d",
+ ir->it_proc->p_pid);
+/* membar_lock(); */
+ ir->it_proc->p_stat = SRUN;
+ setrunqueue(ir->it_proc);
+ aston();
+ }
+ else {
+if (irq < NHWI && (irq & 7) != 0)
+ CTR3(KTR_INTR, "sched_ithd %d: it_need %d, state %d",
+ ir->it_proc->p_pid,
+ ir->it_need,
+ ir->it_proc->p_stat );
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+#if 0
+ aston(); /* ??? check priorities first? */
+#else
+ need_resched();
+#endif
+}
+
+/*
+ * This is the main code for all interrupt threads. It gets put on
+ * whichkqs by setrunqueue above.
+ */
+void
+ithd_loop(void *dummy)
+{
+ ithd *me; /* our thread context */
+ intrec *ih; /* and our interrupt handler chain */
+
+ me = curproc->p_ithd; /* point to myself */
+
+ /*
+ * As long as we have interrupts outstanding, go through the
+ * list of handlers, giving each one a go at it.
+ */
+ for (;;) {
+ CTR3(KTR_INTR, "ithd_loop pid %d(%s) need=%d",
+ me->it_proc->p_pid, me->it_proc->p_comm, me->it_need);
+ while (me->it_need) {
+ /*
+ * Service interrupts. If another interrupt
+ * arrives while we are running, they will set
+ * it_need to denote that we should make
+ * another pass.
+ */
+ me->it_need = 0;
+#if 0
+ membar_unlock(); /* push out "it_need=0" */
+#endif
+ for (ih = me->it_ih; ih != NULL; ih = ih->next) {
+ CTR5(KTR_INTR,
+ "ithd_loop pid %d ih=%p: %p(%p) flg=%x",
+ me->it_proc->p_pid, (void *)ih,
+ (void *)ih->handler, ih->argument,
+ ih->flags);
+
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_enter(&Giant, MTX_DEF);
+ ih->handler(ih->argument);
+ if ((ih->flags & INTR_MPSAFE) == 0)
+ mtx_exit(&Giant, MTX_DEF);
+ }
+ }
+
+ /*
+ * Processed all our interrupts. Now get the sched
+ * lock. This may take a while and it_need may get
+ * set again, so we have to check it again.
+ */
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (!me->it_need) {
+
+ INTREN (1 << me->irq); /* reset the mask bit */
+ me->it_proc->p_stat = SWAIT; /* we're idle */
+#ifdef APIC_IO
+ CTR1(KTR_INTR, "ithd_loop pid %d: done",
+ me->it_proc->p_pid);
+#else
+ CTR2(KTR_INTR, "ithd_loop pid %d: done, imen=%x",
+ me->it_proc->p_pid, imen);
+#endif
+ mi_switch();
+ CTR1(KTR_INTR, "ithd_loop pid %d: resumed",
+ me->it_proc->p_pid);
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+}
+
+/*
+ * Start soft interrupt thread.
+ */
+void
+start_softintr(void *dummy)
+{
+ int error;
+ struct proc *p;
+ ithd *softintr; /* descriptor for the "IRQ" */
+ intrec *idesc; /* descriptor for this handler */
+ char *name = "sintr"; /* name for idesc */
+ int i;
+
+ if (ithds[SOFTINTR]) { /* we already have a thread */
+ printf("start_softintr: already running");
+ return;
+ }
+ /* first handler for this irq. */
+ softintr = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (softintr == NULL)
+ panic ("Can't create soft interrupt thread");
+ bzero(softintr, sizeof(struct ithd));
+ softintr->irq = SOFTINTR;
+ ithds[SOFTINTR] = softintr;
+ error = kthread_create(intr_soft, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "softinterrupt");
+ if (error)
+ panic("start_softintr: kthread_create error %d\n", error);
+
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_rtprio.prio = PI_SOFT; /* soft interrupt */
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ softintr->it_proc = p;
+ p->p_ithd = softintr; /* reverse link */
+
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
+ panic ("Can't create soft interrupt thread");
+ bzero(idesc, sizeof (struct intrec));
+
+ idesc->ithd = softintr;
+ idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
+ if (idesc->name == NULL)
+ panic ("Can't create soft interrupt thread");
+ strcpy(idesc->name, name);
+ for (i = NHWI; i < NHWI + NSWI; i++)
+ intr_countp[i] = &softintrcnt [i - NHWI];
+}
+
+/*
+ * Software interrupt process code.
+ */
+void
+intr_soft(void *dummy)
+{
+ int i;
+ ithd *me; /* our thread context */
+
+ me = curproc->p_ithd; /* point to myself */
+
+ /* Main loop */
+ for (;;) {
+#if 0
+ CTR3(KTR_INTR, "intr_soft pid %d(%s) need=%d",
+ me->it_proc->p_pid, me->it_proc->p_comm,
+ me->it_need);
+#endif
+
+ /*
+ * Service interrupts. If another interrupt arrives
+ * while we are running, they will set it_need to
+ * denote that we should make another pass.
+ */
+ me->it_need = 0;
+ while ((i = ffs(spending))) {
+ i--;
+ atomic_add_long(intr_countp[i], 1);
+ spending &= ~ (1 << i);
+ mtx_enter(&Giant, MTX_DEF);
+ (ihandlers[i])();
+ mtx_exit(&Giant, MTX_DEF);
+ }
+ /*
+ * Processed all our interrupts. Now get the sched
+ * lock. This may take a while and it_need may get
+ * set again, so we have to check it again.
+ */
+ mtx_enter(&sched_lock, MTX_SPIN);
+ if (!me->it_need) {
+#if 0
+ CTR1(KTR_INTR, "intr_soft pid %d: done",
+ me->it_proc->p_pid);
+#endif
+ me->it_proc->p_stat = SWAIT; /* we're idle */
+ mi_switch();
+#if 0
+ CTR1(KTR_INTR, "intr_soft pid %d: resumed",
+ me->it_proc->p_pid);
+#endif
+ }
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
+}
diff --git a/sys/i386/isa/loran.c b/sys/i386/isa/loran.c
index 577a608..c43bf85 100644
--- a/sys/i386/isa/loran.c
+++ b/sys/i386/isa/loran.c
@@ -620,7 +620,7 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, loran_timecounter, CTLFLAG_RD,
/**********************************************************************/
struct isa_driver lorandriver = {
- INTR_TYPE_TTY | INTR_TYPE_FAST,
+ INTR_TYPE_TTY | INTR_FAST,
loranprobe,
loranattach,
"loran"
diff --git a/sys/i386/isa/nmi.c b/sys/i386/isa/nmi.c
index 34a8c22..870760e 100644
--- a/sys/i386/isa/nmi.c
+++ b/sys/i386/isa/nmi.c
@@ -36,12 +36,6 @@
* from: @(#)isa.c 7.2 (Berkeley) 5/13/91
* $FreeBSD$
*/
-/*
- * This file contains an aggregated module marked:
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- * See the notice for details.
- */
#include "opt_auto_eoi.h"
@@ -51,11 +45,14 @@
#ifndef SMP
#include <machine/lock.h>
#endif
+#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/module.h>
+#include <sys/unistd.h>
#include <sys/errno.h>
#include <sys/interrupt.h>
#include <machine/ipl.h>
@@ -91,30 +88,14 @@
#include <i386/isa/mca_machdep.h>
#endif
-/* XXX should be in suitable include files */
-#ifdef PC98
-#define ICU_IMR_OFFSET 2 /* IO_ICU{1,2} + 2 */
-#define ICU_SLAVEID 7
-#else
-#define ICU_IMR_OFFSET 1 /* IO_ICU{1,2} + 1 */
-#define ICU_SLAVEID 2
-#endif
-
-#ifdef APIC_IO
/*
- * This is to accommodate "mixed-mode" programming for
- * motherboards that don't connect the 8254 to the IO APIC.
+ * Per-interrupt data. We consider the soft interrupt to be a special
+ * case, so these arrays have NHWI + NSWI entries, not ICU_LEN.
*/
-#define AUTO_EOI_1 1
-#endif
-
-#define NR_INTRNAMES (1 + ICU_LEN + 2 * ICU_LEN)
-
-u_long *intr_countp[ICU_LEN];
-inthand2_t *intr_handler[ICU_LEN];
-u_int intr_mask[ICU_LEN];
-static u_int* intr_mptr[ICU_LEN];
-void *intr_unit[ICU_LEN];
+u_long *intr_countp[NHWI + NSWI]; /* pointers to interrupt counters */
+inthand2_t *intr_handler[NHWI + NSWI]; /* first level interrupt handler */
+ithd *ithds[NHWI + NSWI]; /* real interrupt handler */
+void *intr_unit[NHWI + NSWI];
static inthand_t *fastintr[ICU_LEN] = {
&IDTVEC(fastintr0), &IDTVEC(fastintr1),
@@ -292,8 +273,9 @@ isa_nmi(cd)
}
/*
- * Fill in default interrupt table (in case of spuruious interrupt
- * during configuration of kernel, setup interrupt control unit
+ * Create a default interrupt table to avoid problems caused by
+ * spurious interrupts during configuration of kernel, then setup
+ * interrupt control unit.
*/
void
isa_defaultirq()
@@ -364,16 +346,6 @@ isa_strayintr(vcookiep)
{
int intr = (void **)vcookiep - &intr_unit[0];
- /* DON'T BOTHER FOR NOW! */
- /* for some reason, we get bursts of intr #7, even if not enabled! */
- /*
- * Well the reason you got bursts of intr #7 is because someone
- * raised an interrupt line and dropped it before the 8259 could
- * prioritize it. This is documented in the intel data book. This
- * means you have BAD hardware! I have changed this so that only
- * the first 5 get logged, then it quits logging them, and puts
- * out a special message. rgrimes 3/25/1993
- */
/*
* XXX TODO print a different message for #7 if it is for a
* glitch. Glitches can be distinguished from real #7's by
@@ -405,36 +377,10 @@ isa_irq_pending()
}
#endif
-int
-update_intr_masks(void)
-{
- int intr, n=0;
- u_int mask,*maskptr;
-
- for (intr=0; intr < ICU_LEN; intr ++) {
-#if defined(APIC_IO)
- /* no 8259 SLAVE to ignore */
-#else
- if (intr==ICU_SLAVEID) continue; /* ignore 8259 SLAVE output */
-#endif /* APIC_IO */
- maskptr = intr_mptr[intr];
- if (!maskptr)
- continue;
- *maskptr |= SWI_LOW_MASK | (1 << intr);
- mask = *maskptr;
- if (mask != intr_mask[intr]) {
-#if 0
- printf ("intr_mask[%2d] old=%08x new=%08x ptr=%p.\n",
- intr, intr_mask[intr], mask, maskptr);
-#endif
- intr_mask[intr]=mask;
- n++;
- }
-
- }
- return (n);
-}
-
+/*
+ * Update intrnames array with the specified name. This is used by
+ * vmstat(8) and the like.
+ */
static void
update_intrname(int intr, char *name)
{
@@ -485,7 +431,7 @@ found:
}
int
-icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
+icu_setup(int intr, inthand2_t *handler, void *arg, int flags)
{
#ifdef FAST_HI
int select; /* the select register is 8 bits */
@@ -493,7 +439,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
u_int32_t value; /* the window register is 32 bits */
#endif /* FAST_HI */
u_long ef;
- u_int mask = (maskptr ? *maskptr : 0);
#if defined(APIC_IO)
if ((u_int)intr >= ICU_LEN) /* no 8259 SLAVE to ignore */
@@ -506,8 +451,6 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
ef = read_eflags();
disable_intr();
intr_handler[intr] = handler;
- intr_mptr[intr] = maskptr;
- intr_mask[intr] = mask | SWI_LOW_MASK | (1 << intr);
intr_unit[intr] = arg;
#ifdef FAST_HI
if (flags & INTR_FAST) {
@@ -547,11 +490,15 @@ icu_setup(int intr, inthand2_t *handler, void *arg, u_int *maskptr, int flags)
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
INTREN(1 << intr);
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
+/*
+ * Dissociate an interrupt handler from an IRQ and set the handler to
+ * the stray interrupt handler. The 'handler' parameter is used only
+ * for consistency checking.
+ */
int
icu_unset(intr, handler)
int intr;
@@ -567,8 +514,6 @@ icu_unset(intr, handler)
disable_intr();
intr_countp[intr] = &intrcnt[1 + intr];
intr_handler[intr] = isa_strayintr;
- intr_mptr[intr] = NULL;
- intr_mask[intr] = HWI_MASK | SWI_MASK;
intr_unit[intr] = &intr_unit[intr];
#ifdef FAST_HI_XXX
/* XXX how do I re-create dvp here? */
@@ -581,353 +526,172 @@ icu_unset(intr, handler)
setidt(ICU_OFFSET + intr, slowintr[intr], SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif /* FAST_HI */
- MPINTR_UNLOCK();
write_eflags(ef);
return (0);
}
-/* The following notice applies beyond this point in the file */
-
-/*
- * Copyright (c) 1997, Stefan Esser <se@freebsd.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice unmodified, this list of conditions, and the following
- * disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- *
- */
-
-typedef struct intrec {
- intrmask_t mask;
- inthand2_t *handler;
- void *argument;
- struct intrec *next;
- char *name;
- int intr;
- intrmask_t *maskptr;
- int flags;
-} intrec;
-
-static intrec *intreclist_head[ICU_LEN];
-
-/*
- * The interrupt multiplexer calls each of the handlers in turn. The
- * ipl is initially quite low. It is raised as necessary for each call
- * and lowered after the call. Thus out of order handling is possible
- * even for interrupts of the same type. This is probably no more
- * harmful than out of order handling in general (not harmful except
- * for real time response which we don't support anyway).
- */
-static void
-intr_mux(void *arg)
-{
- intrec *p;
- intrmask_t oldspl;
-
- for (p = arg; p != NULL; p = p->next) {
- oldspl = splq(p->mask);
- p->handler(p->argument);
- splx(oldspl);
- }
-}
-
-static intrec*
-find_idesc(unsigned *maskptr, int irq)
-{
- intrec *p = intreclist_head[irq];
-
- while (p && p->maskptr != maskptr)
- p = p->next;
-
- return (p);
-}
-
-static intrec**
-find_pred(intrec *idesc, int irq)
+intrec *
+inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
+ int pri, int flags)
{
- intrec **pp = &intreclist_head[irq];
- intrec *p = *pp;
-
- while (p != idesc) {
- if (p == NULL)
- return (NULL);
- pp = &p->next;
- p = *pp;
- }
- return (pp);
-}
-
-/*
- * Both the low level handler and the shared interrupt multiplexer
- * block out further interrupts as set in the handlers "mask", while
- * the handler is running. In fact *maskptr should be used for this
- * purpose, but since this requires one more pointer dereference on
- * each interrupt, we rather bother update "mask" whenever *maskptr
- * changes. The function "update_masks" should be called **after**
- * all manipulation of the linked list of interrupt handlers hung
- * off of intrdec_head[irq] is complete, since the chain of handlers
- * will both determine the *maskptr values and the instances of mask
- * that are fixed. This function should be called with the irq for
- * which a new handler has been add blocked, since the masks may not
- * yet know about the use of this irq for a device of a certain class.
- */
+ ithd *ithd = ithds[irq]; /* descriptor for the IRQ */
+ intrec *head; /* chain of handlers for IRQ */
+ intrec *idesc; /* descriptor for this handler */
+ struct proc *p; /* interrupt thread */
+ int errcode = 0;
-static void
-update_mux_masks(void)
-{
- int irq;
- for (irq = 0; irq < ICU_LEN; irq++) {
- intrec *idesc = intreclist_head[irq];
- while (idesc != NULL) {
- if (idesc->maskptr != NULL) {
- /* our copy of *maskptr may be stale, refresh */
- idesc->mask = *idesc->maskptr;
- }
- idesc = idesc->next;
+ if (name == NULL) /* no name? */
+ panic ("anonymous interrupt");
+ if (ithd == NULL || ithd->it_ih == NULL) {
+ /* first handler for this irq. */
+ if (ithd == NULL) {
+ ithd = malloc(sizeof (struct ithd), M_DEVBUF, M_WAITOK);
+ if (ithd == NULL)
+ return (NULL);
+ bzero(ithd, sizeof(struct ithd));
+ ithd->irq = irq;
+ ithds[irq] = ithd;
}
- }
-}
-
-static void
-update_masks(intrmask_t *maskptr, int irq)
-{
- intrmask_t mask = 1 << irq;
-
- if (maskptr == NULL)
- return;
-
- if (find_idesc(maskptr, irq) == NULL) {
- /* no reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) == 0)
- return;
- /* the irq was included in the classes mask, remove it */
- *maskptr &= ~mask;
- } else {
- /* a reference to this maskptr was found in this irq's chain */
- if ((*maskptr & mask) != 0)
- return;
- /* put the irq into the classes mask */
- *maskptr |= mask;
- }
- /* we need to update all values in the intr_mask[irq] array */
- update_intr_masks();
- /* update mask in chains of the interrupt multiplex handler as well */
- update_mux_masks();
-}
-
-/*
- * Add interrupt handler to linked list hung off of intreclist_head[irq]
- * and install shared interrupt multiplex handler, if necessary
- */
-
-static int
-add_intrdesc(intrec *idesc)
-{
- int irq = idesc->intr;
-
- intrec *head = intreclist_head[irq];
-
- if (head == NULL) {
- /* first handler for this irq, just install it */
- if (icu_setup(irq, idesc->handler, idesc->argument,
- idesc->maskptr, idesc->flags) != 0)
- return (-1);
-
- update_intrname(irq, idesc->name);
- /* keep reference */
- intreclist_head[irq] = idesc;
- } else {
- if ((idesc->flags & INTR_EXCL) != 0
- || (head->flags & INTR_EXCL) != 0) {
+ /*
+ * If we have a fast interrupt, we need to set the
+ * handler address directly. Do that below. For a
+ * slow interrupt, we don't need to know more details,
+ * so do it here because it's tidier.
+ */
+ if ((flags & INTR_FAST) == 0) {
/*
- * can't append new handler, if either list head or
- * new handler do not allow interrupts to be shared
+ * Only create a kernel thread if we don't already
+ * have one.
*/
- if (bootverbose)
- printf("\tdevice combination doesn't support "
- "shared irq%d\n", irq);
- return (-1);
- }
- if (head->next == NULL) {
+ if (ithd->it_proc == NULL) {
+ errcode = kthread_create(ithd_loop, NULL, &p,
+ RFSTOPPED | RFHIGHPID, "irq%d: %s", irq,
+ name);
+ if (errcode)
+ panic("inthand_add: Can't create "
+ "interrupt thread");
+ p->p_rtprio.type = RTP_PRIO_ITHREAD;
+ p->p_stat = SWAIT; /* we're idle */
+
+ /* Put in linkages. */
+ ithd->it_proc = p;
+ p->p_ithd = ithd;
+ } else
+ snprintf(ithd->it_proc->p_comm, MAXCOMLEN,
+ "irq%d: %s", irq, name);
+ p->p_rtprio.prio = pri;
+
/*
- * second handler for this irq, replace device driver's
- * handler by shared interrupt multiplexer function
+ * The interrupt process must be in place, but
+ * not necessarily schedulable, before we
+ * initialize the ICU, since it may cause an
+ * immediate interrupt.
*/
- icu_unset(irq, head->handler);
- if (icu_setup(irq, intr_mux, head, 0, 0) != 0)
- return (-1);
- if (bootverbose)
- printf("\tusing shared irq%d.\n", irq);
- update_intrname(irq, "mux");
+ if (icu_setup(irq, &sched_ithd, arg, flags) != 0)
+ panic("inthand_add: Can't initialize ICU");
}
- /* just append to the end of the chain */
- while (head->next != NULL)
- head = head->next;
- head->next = idesc;
- }
- update_masks(idesc->maskptr, irq);
- return (0);
-}
-
-/*
- * Create and activate an interrupt handler descriptor data structure.
- *
- * The dev_instance pointer is required for resource management, and will
- * only be passed through to resource_claim().
- *
- * There will be functions that derive a driver and unit name from a
- * dev_instance variable, and those functions will be used to maintain the
- * interrupt counter label array referenced by systat and vmstat to report
- * device interrupt rates (->update_intrlabels).
- *
- * Add the interrupt handler descriptor data structure created by an
- * earlier call of create_intr() to the linked list for its irq and
- * adjust the interrupt masks if necessary.
- *
- * WARNING: This is an internal function and not to be used by device
- * drivers. It is subject to change without notice.
- */
-
-intrec *
-inthand_add(const char *name, int irq, inthand2_t handler, void *arg,
- intrmask_t *maskptr, int flags)
-{
- intrec *idesc;
- int errcode = -1;
- intrmask_t oldspl;
-
- if (ICU_LEN > 8 * sizeof *maskptr) {
- printf("create_intr: ICU_LEN of %d too high for %d bit intrmask\n",
- ICU_LEN, 8 * sizeof *maskptr);
+ } else if ((flags & INTR_EXCL) != 0
+ || (ithd->it_ih->flags & INTR_EXCL) != 0) {
+ /*
+ * We can't append the new handler if either
+ * list ithd or new handler do not allow
+ * interrupts to be shared.
+ */
+ if (bootverbose)
+ printf("\tdevice combination %s and %s "
+ "doesn't support shared irq%d\n",
+ ithd->it_ih->name, name, irq);
+ return(NULL);
+ } else if (flags & INTR_FAST) {
+ /* We can only have one fast interrupt by itself. */
+ if (bootverbose)
+ printf("\tCan't add fast interrupt %s"
+ " to normal interrupt %s on irq%d",
+ name, ithd->it_ih->name, irq);
return (NULL);
+ } else { /* update p_comm */
+ p = ithd->it_proc;
+ if (strlen(p->p_comm) + strlen(name) < MAXCOMLEN) {
+ strcat(p->p_comm, " ");
+ strcat(p->p_comm, name);
+ } else if (strlen(p->p_comm) == MAXCOMLEN)
+ p->p_comm[MAXCOMLEN - 1] = '+';
+ else
+ strcat(p->p_comm, "+");
}
- if ((unsigned)irq >= ICU_LEN) {
- printf("create_intr: requested irq%d too high, limit is %d\n",
- irq, ICU_LEN -1);
+ idesc = malloc(sizeof (struct intrec), M_DEVBUF, M_WAITOK);
+ if (idesc == NULL)
return (NULL);
- }
+ bzero(idesc, sizeof (struct intrec));
- idesc = malloc(sizeof *idesc, M_DEVBUF, M_WAITOK);
- if (idesc == NULL)
- return NULL;
- bzero(idesc, sizeof *idesc);
+ idesc->handler = handler;
+ idesc->argument = arg;
+ idesc->flags = flags;
+ idesc->ithd = ithd;
- if (name == NULL)
- name = "???";
idesc->name = malloc(strlen(name) + 1, M_DEVBUF, M_WAITOK);
if (idesc->name == NULL) {
free(idesc, M_DEVBUF);
- return NULL;
+ return (NULL);
}
strcpy(idesc->name, name);
- idesc->handler = handler;
- idesc->argument = arg;
- idesc->maskptr = maskptr;
- idesc->intr = irq;
- idesc->flags = flags;
-
- /* block this irq */
- oldspl = splq(1 << irq);
-
- /* add irq to class selected by maskptr */
- errcode = add_intrdesc(idesc);
- splx(oldspl);
-
- if (errcode != 0) {
+ /* Slow interrupts got set up above. */
+ if ((flags & INTR_FAST)
+ && (icu_setup(irq, idesc->handler, idesc->argument,
+ idesc->flags) != 0) ) {
if (bootverbose)
- printf("\tintr_connect(irq%d) failed, result=%d\n",
+ printf("\tinthand_add(irq%d) failed, result=%d\n",
irq, errcode);
free(idesc->name, M_DEVBUF);
free(idesc, M_DEVBUF);
- idesc = NULL;
+ return NULL;
}
-
+ head = ithd->it_ih; /* look at chain of handlers */
+ if (head) {
+ while (head->next != NULL)
+ head = head->next; /* find the end */
+ head->next = idesc; /* hook it in there */
+ } else
+ ithd->it_ih = idesc; /* put it up front */
+ update_intrname(irq, idesc->name);
return (idesc);
}
/*
- * Deactivate and remove the interrupt handler descriptor data connected
- * created by an earlier call of intr_connect() from the linked list and
- * adjust theinterrupt masks if necessary.
+ * Deactivate and remove linked list the interrupt handler descriptor
+ * data connected created by an earlier call of inthand_add(), then
+ * adjust the interrupt masks if necessary.
*
- * Return the memory held by the interrupt handler descriptor data structure
- * to the system. Make sure, the handler is not actively used anymore, before.
+ * Return the memory held by the interrupt handler descriptor data
+ * structure to the system. First ensure the handler is not actively
+ * in use.
*/
int
inthand_remove(intrec *idesc)
{
- intrec **hook, *head;
- int irq;
- int errcode = 0;
- intrmask_t oldspl;
+ ithd *ithd; /* descriptor for the IRQ */
+ intrec *ih; /* chain of handlers */
if (idesc == NULL)
return (-1);
+ ithd = idesc->ithd;
+ ih = ithd->it_ih;
- irq = idesc->intr;
-
- /* find pointer that keeps the reference to this interrupt descriptor */
- hook = find_pred(idesc, irq);
- if (hook == NULL)
+ if (ih == idesc) /* first in the chain */
+ ithd->it_ih = idesc->next; /* unhook it */
+ else {
+ while ((ih != NULL)
+ && (ih->next != idesc) )
+ ih = ih->next;
+ if (ih->next != idesc)
return (-1);
-
- /* make copy of original list head, the line after may overwrite it */
- head = intreclist_head[irq];
-
- /* unlink: make predecessor point to idesc->next instead of to idesc */
- *hook = idesc->next;
-
- /* now check whether the element we removed was the list head */
- if (idesc == head) {
-
- oldspl = splq(1 << irq);
-
- /* check whether the new list head is the only element on list */
- head = intreclist_head[irq];
- if (head != NULL) {
- icu_unset(irq, intr_mux);
- if (head->next != NULL) {
- /* install the multiplex handler with new list head as argument */
- errcode = icu_setup(irq, intr_mux, head, 0, 0);
- if (errcode == 0)
- update_intrname(irq, NULL);
- } else {
- /* install the one remaining handler for this irq */
- errcode = icu_setup(irq, head->handler,
- head->argument,
- head->maskptr, head->flags);
- if (errcode == 0)
- update_intrname(irq, head->name);
+ ih->next = ih->next->next;
}
- } else {
- /* revert to old handler, eg: strayintr */
- icu_unset(irq, idesc->handler);
- }
- splx(oldspl);
- }
- update_masks(idesc->maskptr, irq);
+
+ if (ithd->it_ih == NULL) /* no handlers left, */
+ icu_unset(ithd->irq, idesc->handler);
free(idesc, M_DEVBUF);
return (0);
}
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c
index 637853e..8610e35 100644
--- a/sys/i386/isa/npx.c
+++ b/sys/i386/isa/npx.c
@@ -245,6 +245,12 @@ npx_probe(dev)
setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
npx_idt_probeintr = idt[npx_intrno];
+
+ /*
+ * XXX This looks highly bogus, but it appears that npc_probe1
+ * needs interrupts enabled. Does this make any difference
+ * here?
+ */
enable_intr();
result = npx_probe1(dev);
disable_intr();
@@ -797,7 +803,7 @@ npxdna()
/*
* Record new context early in case frstor causes an IRQ13.
*/
- npxproc = curproc;
+ PCPU_SET(npxproc, CURPROC);
curpcb->pcb_savefpu.sv_ex_sw = 0;
/*
* The following frstor may cause an IRQ13 when the state being
@@ -834,16 +840,18 @@ npxsave(addr)
fnsave(addr);
/* fnop(); */
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
#else /* SMP */
+ int intrstate;
u_char icu1_mask;
u_char icu2_mask;
u_char old_icu1_mask;
u_char old_icu2_mask;
struct gate_descriptor save_idt_npxintr;
+ intrstate = save_intr();
disable_intr();
old_icu1_mask = inb(IO_ICU1 + 1);
old_icu2_mask = inb(IO_ICU2 + 1);
@@ -851,12 +859,12 @@ npxsave(addr)
outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask));
outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8));
idt[npx_intrno] = npx_idt_probeintr;
- enable_intr();
+ write_eflags(intrstate);
stop_emulating();
fnsave(addr);
fnop();
start_emulating();
- npxproc = NULL;
+ PCPU_SET(npxproc, NULL);
disable_intr();
icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */
icu2_mask = inb(IO_ICU2 + 1);
@@ -866,7 +874,7 @@ npxsave(addr)
(icu2_mask & ~(npx0_imask >> 8))
| (old_icu2_mask & (npx0_imask >> 8)));
idt[npx_intrno] = save_idt_npxintr;
- enable_intr(); /* back to usual state */
+ restore_intr(intrstate); /* back to previous state */
#endif /* SMP */
}
diff --git a/sys/i386/isa/vector.s b/sys/i386/isa/vector.s
index 5447a90..79f2320 100644
--- a/sys/i386/isa/vector.s
+++ b/sys/i386/isa/vector.s
@@ -16,9 +16,10 @@
#include <i386/isa/isa.h>
#endif
+#define FAST_INTR_HANDLER_USES_ES 1
#ifdef FAST_INTR_HANDLER_USES_ES
#define ACTUALLY_PUSHED 1
-#define MAYBE_MOVW_AX_ES movl %ax,%es
+#define MAYBE_MOVW_AX_ES movw %ax,%es
#define MAYBE_POPL_ES popl %es
#define MAYBE_PUSHL_ES pushl %es
#else
@@ -36,11 +37,6 @@
.data
ALIGN_DATA
- .globl _intr_nesting_level
-_intr_nesting_level:
- .byte 0
- .space 3
-
/*
* Interrupt counters and names for export to vmstat(8) and friends.
*
@@ -58,7 +54,6 @@ _eintrcnt:
_intrnames:
.space NR_INTRNAMES * 16
_eintrnames:
-
.text
/*
diff --git a/sys/isa/atrtc.c b/sys/isa/atrtc.c
index 15044ab..724f3c2 100644
--- a/sys/isa/atrtc.c
+++ b/sys/isa/atrtc.c
@@ -54,6 +54,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/proc.h>
#include <sys/time.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
@@ -93,10 +94,6 @@
#include <i386/isa/mca_machdep.h>
#endif
-#ifdef SMP
-#define disable_intr() CLOCK_DISABLE_INTR()
-#define enable_intr() CLOCK_ENABLE_INTR()
-
#ifdef APIC_IO
#include <i386/isa/intr_machdep.h>
/* The interrupt triggered by the 8254 (timer) chip */
@@ -104,7 +101,6 @@ int apic_8254_intr;
static u_long read_intr_count __P((int vec));
static void setup_8254_mixed_mode __P((void));
#endif
-#endif /* SMP */
/*
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
@@ -147,7 +143,9 @@ int tsc_is_broken;
int wall_cmos_clock; /* wall CMOS clock assumed if != 0 */
static int beeping = 0;
+#if 0
static u_int clk_imask = HWI_MASK | SWI_MASK;
+#endif
static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
static u_int hardclock_max_count;
static u_int32_t i8254_lastcount;
@@ -205,8 +203,12 @@ SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD,
static void
clkintr(struct clockframe frame)
{
+ int intrsave;
+
if (timecounter->tc_get_timecount == i8254_get_timecount) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
if (i8254_ticked)
i8254_ticked = 0;
else {
@@ -214,7 +216,8 @@ clkintr(struct clockframe frame)
i8254_lastcount = 0;
}
clkintr_pending = 0;
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
}
timer_func(&frame);
switch (timer0_state) {
@@ -233,14 +236,17 @@ clkintr(struct clockframe frame)
break;
case ACQUIRE_PENDING:
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = TIMER_DIV(new_rate);
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer_func = new_function;
timer0_state = ACQUIRED;
setdelayed();
@@ -249,7 +255,9 @@ clkintr(struct clockframe frame)
case RELEASE_PENDING:
if ((timer0_prescaler_count += timer0_max_count)
>= hardclock_max_count) {
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
i8254_offset = i8254_get_timecount(NULL);
i8254_lastcount = 0;
timer0_max_count = hardclock_max_count;
@@ -257,7 +265,8 @@ clkintr(struct clockframe frame)
TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
timer0_prescaler_count = 0;
timer_func = hardclock;
timer0_state = RELEASED;
@@ -404,11 +413,11 @@ DB_SHOW_COMMAND(rtc, rtc)
static int
getit(void)
{
- u_long ef;
- int high, low;
+ int high, low, intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -417,7 +426,7 @@ getit(void)
high = inb(TIMER_CNTR0);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return ((high << 8) | low);
}
@@ -523,6 +532,7 @@ sysbeepstop(void *chan)
int
sysbeep(int pitch, int period)
{
+ int intrsave;
int x = splclock();
if (acquire_timer2(TIMER_SQWAVE|TIMER_16BIT))
@@ -531,10 +541,13 @@ sysbeep(int pitch, int period)
splx(x);
return (-1); /* XXX Should be EBUSY, but nobody cares anyway. */
}
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_CNTR2, pitch);
outb(TIMER_CNTR2, (pitch>>8));
- enable_intr();
+ CLOCK_UNLOCK();
+ restore_intr(intrsave);
if (!beeping) {
/* enable counter2 output to speaker */
outb(IO_PPI, inb(IO_PPI) | 3);
@@ -683,11 +696,12 @@ fail:
static void
set_timer_freq(u_int freq, int intr_freq)
{
- u_long ef;
+ int intrsave;
int new_timer0_max_count;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
timer_freq = freq;
new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
if (new_timer0_max_count != timer0_max_count) {
@@ -697,7 +711,7 @@ set_timer_freq(u_int freq, int intr_freq)
outb(TIMER_CNTR0, timer0_max_count >> 8);
}
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -711,15 +725,16 @@ set_timer_freq(u_int freq, int intr_freq)
void
i8254_restore(void)
{
- u_long ef;
+ int intrsave;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
outb(TIMER_CNTR0, timer0_max_count & 0xff);
outb(TIMER_CNTR0, timer0_max_count >> 8);
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
}
/*
@@ -979,8 +994,8 @@ cpu_initclocks()
{
int diag;
#ifdef APIC_IO
- int apic_8254_trial;
- struct intrec *clkdesc;
+ int apic_8254_trial, num_8254_ticks;
+ struct intrec *clkdesc, *rtcdesc;
#endif /* APIC_IO */
if (statclock_disable) {
@@ -1014,14 +1029,15 @@ cpu_initclocks()
} else
panic("APIC_IO: Cannot route 8254 interrupt to CPU");
}
-
- clkdesc = inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
-
#else /* APIC_IO */
- inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, &clk_imask,
+ /*
+ * XXX Check the priority of this interrupt handler. I
+ * couldn't find anything suitable in the BSD/OS code (grog,
+ * 19 July 2000).
+ */
+ /* Setup the PIC clk handler. The APIC handler is setup later */
+ inthand_add("clk", 0, (inthand2_t *)clkintr, NULL, PI_REALTIME,
INTR_EXCL);
INTREN(IRQ0);
@@ -1032,8 +1048,18 @@ cpu_initclocks()
writertc(RTC_STATUSB, RTCSB_24HR);
/* Don't bother enabling the statistics clock. */
- if (statclock_disable)
+ if (statclock_disable) {
+#ifdef APIC_IO
+ /*
+ * XXX - if statclock is disabled, don't attempt the APIC
+ * trial. Not sure this is sane for APIC_IO.
+ */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif /* APIC_IO */
return;
+ }
diag = rtcin(RTC_DIAG);
if (diag != 0)
printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS);
@@ -1041,34 +1067,44 @@ cpu_initclocks()
#ifdef APIC_IO
if (isa_apic_irq(8) != 8)
panic("APIC RTC != 8");
-#endif /* APIC_IO */
- inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, &stat_imask,
- INTR_EXCL);
-
-#ifdef APIC_IO
- INTREN(APIC_IRQ8);
-#else
- INTREN(IRQ8);
-#endif /* APIC_IO */
+ if (apic_8254_trial) {
+ /*
+ * XXX - We use fast interrupts for clk and rtc long enough to
+ * perform the APIC probe and then revert to exclusive
+ * interrupts.
+ */
+ clkdesc = inthand_add("clk", apic_8254_intr,
+ (inthand2_t *)clkintr, NULL, PI_REALTIME, INTR_FAST);
+ INTREN(1 << apic_8254_intr);
- writertc(RTC_STATUSB, rtc_statusb);
+ rtcdesc = inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL,
+ PI_REALTIME, INTR_FAST); /* XXX */
+ INTREN(APIC_IRQ8);
+ writertc(RTC_STATUSB, rtc_statusb);
-#ifdef APIC_IO
- if (apic_8254_trial) {
-
printf("APIC_IO: Testing 8254 interrupt delivery\n");
while (read_intr_count(8) < 6)
; /* nothing */
- if (read_intr_count(apic_8254_intr) < 3) {
+ num_8254_ticks = read_intr_count(apic_8254_intr);
+
+ /* disable and remove our fake handlers */
+ INTRDIS(1 << apic_8254_intr);
+ inthand_remove(clkdesc);
+
+ writertc(RTC_STATUSA, rtc_statusa);
+ writertc(RTC_STATUSB, RTCSB_24HR);
+
+ INTRDIS(APIC_IRQ8);
+ inthand_remove(rtcdesc);
+
+ if (num_8254_ticks < 3) {
/*
* The MP table is broken.
* The 8254 was not connected to the specified pin
* on the IO APIC.
* Workaround: Limited variant of mixed mode.
*/
- INTRDIS(1 << apic_8254_intr);
- inthand_remove(clkdesc);
printf("APIC_IO: Broken MP table detected: "
"8254 is not connected to "
"IOAPIC #%d intpin %d\n",
@@ -1087,13 +1123,27 @@ cpu_initclocks()
}
apic_8254_intr = apic_irq(0, 0);
setup_8254_mixed_mode();
- inthand_add("clk", apic_8254_intr,
- (inthand2_t *)clkintr,
- NULL, &clk_imask, INTR_EXCL);
- INTREN(1 << apic_8254_intr);
}
}
+
+ /* Finally, setup the real clock handlers */
+ inthand_add("clk", apic_8254_intr, (inthand2_t *)clkintr, NULL,
+ PI_REALTIME, INTR_EXCL);
+ INTREN(1 << apic_8254_intr);
+#endif
+
+ inthand_add("rtc", 8, (inthand2_t *)rtcintr, NULL, PI_REALTIME,
+ INTR_EXCL);
+#ifdef APIC_IO
+ INTREN(APIC_IRQ8);
+#else
+ INTREN(IRQ8);
+#endif
+
+ writertc(RTC_STATUSB, rtc_statusb);
+
+#ifdef APIC_IO
if (apic_int_type(0, 0) != 3 ||
int_to_apicintpin[apic_8254_intr].ioapic != 0 ||
int_to_apicintpin[apic_8254_intr].int_pin != 0)
@@ -1198,11 +1248,12 @@ static unsigned
i8254_get_timecount(struct timecounter *tc)
{
u_int count;
- u_long ef;
+ int intrsave;
u_int high, low;
- ef = read_eflags();
+ intrsave = save_intr();
disable_intr();
+ CLOCK_LOCK();
/* Select timer0 and latch counter value. */
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
@@ -1212,7 +1263,7 @@ i8254_get_timecount(struct timecounter *tc)
count = timer0_max_count - ((high << 8) | low);
if (count < i8254_lastcount ||
(!i8254_ticked && (clkintr_pending ||
- ((count < 20 || (!(ef & PSL_I) && count < timer0_max_count / 2u)) &&
+ ((count < 20 || (!(intrsave & PSL_I) && count < timer0_max_count / 2u)) &&
#ifdef APIC_IO
#define lapic_irr1 ((volatile u_int *)&lapic)[0x210 / 4] /* XXX XXX */
/* XXX this assumes that apic_8254_intr is < 24. */
@@ -1227,7 +1278,7 @@ i8254_get_timecount(struct timecounter *tc)
i8254_lastcount = count;
count += i8254_offset;
CLOCK_UNLOCK();
- write_eflags(ef);
+ restore_intr(intrsave);
return (count);
}
diff --git a/sys/isa/sio.c b/sys/isa/sio.c
index 2725a20..a6f05e7 100644
--- a/sys/isa/sio.c
+++ b/sys/isa/sio.c
@@ -95,16 +95,12 @@
#endif
#include <isa/ic/ns16550.h>
+/* XXX - this is ok because we only do sio fast interrupts on i386 */
#ifndef __i386__
#define disable_intr()
#define enable_intr()
#endif
-#ifdef SMP
-#define disable_intr() COM_DISABLE_INTR()
-#define enable_intr() COM_ENABLE_INTR()
-#endif /* SMP */
-
#define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */
#define CALLOUT_MASK 0x80
@@ -760,6 +756,7 @@ sioprobe(dev, xrid)
u_int flags = device_get_flags(dev);
int rid;
struct resource *port;
+ int intrsave;
rid = xrid;
port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid,
@@ -856,7 +853,9 @@ sioprobe(dev, xrid)
* but mask them in the processor as well in case there are some
* (misconfigured) shared interrupts.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
/* EXTRA DELAY? */
/*
@@ -953,7 +952,8 @@ sioprobe(dev, xrid)
CLR_FLAG(dev, COM_C_IIR_TXRDYBUG);
}
sio_setreg(com, com_cfcr, CFCR_8BITS);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
return (iobase == siocniobase ? 0 : result);
}
@@ -993,7 +993,8 @@ sioprobe(dev, xrid)
irqmap[3] = isa_irq_pending();
failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
irqs = irqmap[1] & ~irqmap[0];
if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 &&
@@ -1181,7 +1182,6 @@ sioattach(dev, xrid)
} else
com->it_in.c_ispeed = com->it_in.c_ospeed = TTYDEF_SPEED;
if (siosetwater(com, com->it_in.c_ispeed) != 0) {
- enable_intr();
/*
* Leave i/o resources allocated if this is a `cn'-level
* console, so that other devices can't snarf them.
@@ -1190,7 +1190,6 @@ sioattach(dev, xrid)
bus_release_resource(dev, SYS_RES_IOPORT, rid, port);
return (ENOMEM);
}
- enable_intr();
termioschars(&com->it_in);
com->it_out = com->it_in;
@@ -1340,7 +1339,7 @@ determined_type: ;
RF_ACTIVE);
if (com->irqres) {
ret = BUS_SETUP_INTR(device_get_parent(dev), dev, com->irqres,
- INTR_TYPE_TTY | INTR_TYPE_FAST,
+ INTR_TYPE_TTY | INTR_FAST,
siointr, com, &com->cookie);
if (ret) {
ret = BUS_SETUP_INTR(device_get_parent(dev), dev,
@@ -1424,6 +1423,8 @@ open_top:
goto out;
}
} else {
+ int intrsave;
+
/*
* The device isn't open, so there are no conflicts.
* Initialize it. Initialization is done twice in many
@@ -1483,7 +1484,9 @@ open_top:
}
}
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
(void) inb(com->line_status_port);
(void) inb(com->data_port);
com->prev_modem_status = com->last_modem_status
@@ -1495,7 +1498,8 @@ open_top:
outb(com->intr_ctl_port, IER_ERXRDY | IER_ETXRDY
| IER_ERLS | IER_EMSC);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
/*
* Handle initial DCD. Callout devices get a fake initial
* DCD (trapdoor DCD). If we are callout, then any sleeping
@@ -1716,6 +1720,9 @@ siodtrwakeup(chan)
wakeup(&com->dtr_wait);
}
+/*
+ * Call this function with COM_LOCK. It will return with the lock still held.
+ */
static void
sioinput(com)
struct com_s *com;
@@ -1725,6 +1732,7 @@ sioinput(com)
u_char line_status;
int recv_data;
struct tty *tp;
+ int intrsave;
buf = com->ibuf;
tp = com->tp;
@@ -1742,6 +1750,13 @@ sioinput(com)
* call overhead).
*/
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
incc = com->iptr - buf;
if (tp->t_rawq.c_cc + incc > tp->t_ihiwat
@@ -1763,10 +1778,18 @@ sioinput(com)
tp->t_lflag &= ~FLUSHO;
comstart(tp);
}
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
} else {
do {
+ /*
+ * This may look odd, but it is using save-and-enable
+ * semantics instead of the save-and-disable semantics
+ * that are used everywhere else.
+ */
+ intrsave = save_intr();
+ COM_UNLOCK();
enable_intr();
line_status = buf[com->ierroff];
recv_data = *buf++;
@@ -1782,7 +1805,8 @@ sioinput(com)
recv_data |= TTY_PE;
}
(*linesw[tp->t_line].l_rint)(recv_data, tp);
- disable_intr();
+ restore_intr(intrsave);
+ COM_LOCK();
} while (buf < com->iptr);
}
com_events -= (com->iptr - com->ibuf);
@@ -1893,12 +1917,16 @@ siointr1(com)
if (recv_data == KEY_CR) {
brk_state1 = recv_data;
brk_state2 = 0;
- } else if (brk_state1 == KEY_CR && (recv_data == KEY_TILDE || recv_data == KEY_CRTLB)) {
+ } else if (brk_state1 == KEY_CR
+ && (recv_data == KEY_TILDE
+ || recv_data == KEY_CRTLB)) {
if (recv_data == KEY_TILDE)
brk_state2 = recv_data;
- else if (brk_state2 == KEY_TILDE && recv_data == KEY_CRTLB) {
+ else if (brk_state2 == KEY_TILDE
+ && recv_data == KEY_CRTLB) {
breakpoint();
- brk_state1 = brk_state2 = 0;
+ brk_state1 = 0;
+ brk_state2 = 0;
goto cont;
} else
brk_state2 = 0;
@@ -1949,7 +1977,10 @@ siointr1(com)
if (com->do_timestamp)
microtime(&com->timestamp);
++com_events;
+/* XXX - needs to go away when alpha gets ithreads */
+#ifdef __alpha__
schedsofttty();
+#endif
#if 0 /* for testing input latency vs efficiency */
if (com->iptr - com->ibuf == 8)
setsofttty();
@@ -2217,10 +2248,12 @@ sioioctl(dev, cmd, data, flag, p)
return (0);
}
+/* software interrupt handler for SWI_TTY */
static void
siopoll()
{
int unit;
+ int intrsave;
if (com_events == 0)
return;
@@ -2239,7 +2272,9 @@ repeat:
* Discard any events related to never-opened or
* going-away devices.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
incc = com->iptr - com->ibuf;
com->iptr = com->ibuf;
if (com->state & CS_CHECKMSR) {
@@ -2247,33 +2282,43 @@ repeat:
com->state &= ~CS_CHECKMSR;
}
com_events -= incc;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
continue;
}
if (com->iptr != com->ibuf) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
sioinput(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (com->state & CS_CHECKMSR) {
u_char delta_modem_status;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
delta_modem_status = com->last_modem_status
^ com->prev_modem_status;
com->prev_modem_status = com->last_modem_status;
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_CHECKMSR;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta_modem_status & MSR_DCD)
(*linesw[tp->t_line].l_modem)
(tp, com->prev_modem_status & MSR_DCD);
}
if (com->state & CS_ODONE) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
com_events -= LOTS_OF_EVENTS;
com->state &= ~CS_ODONE;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (!(com->state & CS_BUSY)
&& !(com->extra_state & CSE_BUSYCHECK)) {
timeout(siobusycheck, com, hz / 100);
@@ -2301,6 +2346,7 @@ comparam(tp, t)
u_char dlbl;
int s;
int unit;
+ int intrsave;
/* do historical conversions */
if (t->c_ispeed == 0)
@@ -2367,11 +2413,10 @@ comparam(tp, t)
sio_setreg(com, com_fifo, com->fifo_image);
}
- /*
- * This returns with interrupts disabled so that we can complete
- * the speed change atomically. Keeping interrupts disabled is
- * especially important while com_data is hidden.
- */
+ intrsave = save_intr();
+ disable_intr();
+ COM_LOCK();
+
(void) siosetwater(com, t->c_ispeed);
if (divisor != 0) {
@@ -2459,7 +2504,8 @@ comparam(tp, t)
if (com->state >= (CS_BUSY | CS_TTGO))
siointr1(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
splx(s);
comstart(tp);
if (com->ibufold != NULL) {
@@ -2478,6 +2524,7 @@ siosetwater(com, speed)
u_char *ibuf;
int ibufsize;
struct tty *tp;
+ int intrsave;
/*
* Make the buffer size large enough to handle a softtty interrupt
@@ -2488,20 +2535,16 @@ siosetwater(com, speed)
cp4ticks = speed / 10 / hz * 4;
for (ibufsize = 128; ibufsize < cp4ticks;)
ibufsize <<= 1;
- if (ibufsize == com->ibufsize) {
- disable_intr();
+ if (ibufsize == com->ibufsize)
return (0);
- }
/*
* Allocate input buffer. The extra factor of 2 in the size is
* to allow for an error byte for each input byte.
*/
ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT);
- if (ibuf == NULL) {
- disable_intr();
+ if (ibuf == NULL)
return (ENOMEM);
- }
/* Initialize non-critical variables. */
com->ibufold = com->ibuf;
@@ -2517,7 +2560,9 @@ siosetwater(com, speed)
* Read current input buffer, if any. Continue with interrupts
* disabled.
*/
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->iptr != com->ibuf)
sioinput(com);
@@ -2536,6 +2581,8 @@ siosetwater(com, speed)
com->ibufend = ibuf + ibufsize;
com->ierroff = ibufsize;
com->ihighwater = ibuf + 3 * ibufsize / 4;
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2546,13 +2593,16 @@ comstart(tp)
struct com_s *com;
int s;
int unit;
+ int intrsave;
unit = DEV_TO_UNIT(tp->t_dev);
com = com_addr(unit);
if (com == NULL)
return;
s = spltty();
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (tp->t_state & TS_TTSTOP)
com->state &= ~CS_TTGO;
else
@@ -2565,7 +2615,8 @@ comstart(tp)
&& com->state & CS_RTS_IFLOW)
outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS);
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
ttwwakeup(tp);
splx(s);
@@ -2581,7 +2632,9 @@ comstart(tp)
sizeof com->obuf1);
com->obufs[0].l_next = NULL;
com->obufs[0].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2593,7 +2646,8 @@ comstart(tp)
com->obufq.l_next = &com->obufs[0];
com->state |= CS_BUSY;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) {
com->obufs[1].l_tail
@@ -2601,7 +2655,9 @@ comstart(tp)
sizeof com->obuf2);
com->obufs[1].l_next = NULL;
com->obufs[1].l_queued = TRUE;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state & CS_BUSY) {
qp = com->obufq.l_next;
while ((next = qp->l_next) != NULL)
@@ -2613,14 +2669,18 @@ comstart(tp)
com->obufq.l_next = &com->obufs[1];
com->state |= CS_BUSY;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
tp->t_state |= TS_BUSY;
}
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (com->state >= (CS_BUSY | CS_TTGO))
siointr1(com); /* fake interrupt to start output */
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
ttwwakeup(tp);
splx(s);
}
@@ -2631,11 +2691,14 @@ comstop(tp, rw)
int rw;
{
struct com_s *com;
+ int intrsave;
com = com_addr(DEV_TO_UNIT(tp->t_dev));
if (com == NULL || com->gone)
return;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
if (rw & FWRITE) {
if (com->hasfifo)
#ifdef COM_ESP
@@ -2662,7 +2725,8 @@ comstop(tp, rw)
com_events -= (com->iptr - com->ibuf);
com->iptr = com->ibuf;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
comstart(tp);
}
@@ -2674,6 +2738,7 @@ commctl(com, bits, how)
{
int mcr;
int msr;
+ int intrsave;
if (how == DMGET) {
bits = TIOCM_LE; /* XXX - always enabled while open */
@@ -2705,7 +2770,9 @@ commctl(com, bits, how)
mcr |= MCR_RTS;
if (com->gone)
return(0);
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
switch (how) {
case DMSET:
outb(com->modem_ctl_port,
@@ -2718,7 +2785,8 @@ commctl(com, bits, how)
outb(com->modem_ctl_port, com->mcr_image &= ~mcr);
break;
}
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
return (0);
}
@@ -2766,6 +2834,7 @@ comwakeup(chan)
{
struct com_s *com;
int unit;
+ int intrsave;
sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout);
@@ -2777,9 +2846,12 @@ comwakeup(chan)
com = com_addr(unit);
if (com != NULL && !com->gone
&& (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) {
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
siointr1(com);
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
}
}
@@ -2801,10 +2873,13 @@ comwakeup(chan)
u_int delta;
u_long total;
+ intrsave = save_intr();
disable_intr();
+ COM_LOCK();
delta = com->delta_error_counts[errnum];
com->delta_error_counts[errnum] = 0;
- enable_intr();
+ COM_UNLOCK();
+ restore_intr(intrsave);
if (delta == 0)
continue;
total = com->error_counts[errnum] += delta;
diff --git a/sys/isofs/cd9660/cd9660_util.c b/sys/isofs/cd9660/cd9660_util.c
index 2a11dc2..d0f2e1c 100644
--- a/sys/isofs/cd9660/cd9660_util.c
+++ b/sys/isofs/cd9660/cd9660_util.c
@@ -41,6 +41,7 @@
*/
#include <sys/param.h>
+#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 6d0d915..f5ae66c 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -48,6 +48,7 @@
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
@@ -64,6 +65,8 @@
#include <sys/conf.h>
#include <machine/cpu.h>
+#include <machine/globals.h>
+#include <machine/mutex.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -260,6 +263,11 @@ proc0_init(void *dummy __unused)
p = &proc0;
/*
+ * Initialize magic number.
+ */
+ p->p_magic = P_MAGIC;
+
+ /*
* Initialize process and pgrp structures.
*/
procinit();
@@ -364,11 +372,20 @@ proc0_init(void *dummy __unused)
*/
(void)chgproccnt(cred0.p_uidinfo, 1, 0);
+ LIST_INIT(&p->p_heldmtx);
+ LIST_INIT(&p->p_contested);
+
/*
* Initialize the current process pointer (curproc) before
* any possible traps/probes to simplify trap processing.
*/
- SET_CURPROC(p);
+ PCPU_SET(curproc, p);
+
+ /*
+ * Enter the Giant mutex.
+ * XXX This should be done BEFORE cpu_startup().
+ */
+ mtx_enter(&Giant, MTX_DEF);
}
SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
@@ -389,7 +406,7 @@ proc0_post(void *dummy __unused)
p->p_runtime = 0;
}
microuptime(&switchtime);
- switchticks = ticks;
+ PCPU_SET(switchticks, ticks);
/*
* Give the ``random'' number generator a thump.
@@ -418,7 +435,6 @@ SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
***************************************************************************
*/
-
/*
* List of paths to try when searching for "init".
*/
@@ -444,6 +460,8 @@ start_init(void *dummy)
char *ucp, **uap, *arg0, *arg1;
struct proc *p;
+ mtx_enter(&Giant, MTX_DEF);
+
p = curproc;
/* Get the vnode for '/'. Set p->p_fd->fd_cdir to reference it. */
@@ -562,16 +580,12 @@ static void
create_init(const void *udata __unused)
{
int error;
- int s;
- s = splhigh();
- error = fork1(&proc0, RFFDG | RFPROC, &initproc);
+ error = fork1(&proc0, RFFDG | RFPROC | RFSTOPPED, &initproc);
if (error)
panic("cannot fork init: %d\n", error);
initproc->p_flag |= P_INMEM | P_SYSTEM;
cpu_set_fork_handler(initproc, start_init, NULL);
- remrunqueue(initproc);
- splx(s);
}
SYSINIT(init,SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
@@ -581,6 +595,9 @@ SYSINIT(init,SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
static void
kick_init(const void *udata __unused)
{
+ mtx_enter(&sched_lock, MTX_SPIN);
+ initproc->p_stat = SRUN;
setrunqueue(initproc);
+ mtx_exit(&sched_lock, MTX_SPIN);
}
SYSINIT(kickinit,SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 11e63a7..33eef3c 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -70,11 +70,7 @@ static void initclocks __P((void *dummy));
SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
/* Some of these don't belong here, but it's easiest to concentrate them. */
-#if defined(SMP) && defined(BETTER_CLOCK)
long cp_time[CPUSTATES];
-#else
-static long cp_time[CPUSTATES];
-#endif
long tk_cancc;
long tk_nin;
@@ -156,7 +152,7 @@ hardclock(frame)
register struct proc *p;
p = curproc;
- if (p) {
+ if (p != idleproc) {
register struct pstats *pstats;
/*
@@ -325,12 +321,12 @@ statclock(frame)
struct rusage *ru;
struct vmspace *vm;
- if (curproc != NULL && CLKF_USERMODE(frame)) {
+ if (CLKF_USERMODE(frame)) {
/*
* Came from user mode; CPU was in user state.
* If this process is being profiled, record the tick.
*/
- p = curproc;
+ p = prevproc;
if (p->p_flag & P_PROFIL)
addupc_intr(p, CLKF_PC(frame), 1);
#if defined(SMP) && defined(BETTER_CLOCK)
@@ -379,20 +375,21 @@ statclock(frame)
* so that we know how much of its real time was spent
* in ``non-process'' (i.e., interrupt) work.
*/
- p = curproc;
- if (CLKF_INTR(frame)) {
- if (p != NULL)
- p->p_iticks++;
+ p = prevproc;
+ if (p->p_ithd) {
+ p->p_iticks++;
cp_time[CP_INTR]++;
- } else if (p != NULL) {
+ } else {
p->p_sticks++;
- cp_time[CP_SYS]++;
- } else
- cp_time[CP_IDLE]++;
+ if (p != idleproc)
+ cp_time[CP_SYS]++;
+ else
+ cp_time[CP_IDLE]++;
+ }
}
pscnt = psdiv;
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index df71fe0..7fccc16 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -349,7 +349,6 @@ exit1(p, rv)
*
* Other substructures are freed from wait().
*/
- SET_CURPROC(NULL);
if (--p->p_limit->p_refcnt == 0) {
FREE(p->p_limit, M_SUBPROC);
p->p_limit = NULL;
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index f24c97e..0aa31ab 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -52,6 +52,7 @@
#include <sys/resourcevar.h>
#include <sys/vnode.h>
#include <sys/acct.h>
+#include <sys/ktr.h>
#include <sys/ktrace.h>
#include <sys/unistd.h>
#include <sys/jail.h>
@@ -65,6 +66,8 @@
#include <sys/user.h>
+#include <machine/mutex.h>
+
static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback");
static int fast_vfork = 1;
@@ -131,7 +134,8 @@ rfork(p, uap)
int error;
struct proc *p2;
- error = fork1(p, uap->flags, &p2);
+ /* mask kernel only flags out of the user flags */
+ error = fork1(p, uap->flags & ~RFKERNELONLY, &p2);
if (error == 0) {
p->p_retval[0] = p2 ? p2->p_pid : 0;
p->p_retval[1] = 0;
@@ -177,17 +181,19 @@ SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
int
fork1(p1, flags, procp)
- struct proc *p1;
+ struct proc *p1; /* parent proc */
int flags;
- struct proc **procp;
+ struct proc **procp; /* child proc */
{
struct proc *p2, *pptr;
uid_t uid;
struct proc *newproc;
+ int trypid;
int ok;
static int pidchecked = 0;
struct forklist *ep;
+ /* Can't copy and clear */
if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
return (EINVAL);
@@ -278,47 +284,56 @@ fork1(p1, flags, procp)
/*
* Find an unused process ID. We remember a range of unused IDs
* ready to use (from nextpid+1 through pidchecked-1).
+ *
+ * If RFHIGHPID is set (used during system boot), do not allocate
+ * low-numbered pids.
*/
- nextpid++;
+ trypid = nextpid + 1;
+ if (flags & RFHIGHPID) {
+ if (trypid < 10) {
+ trypid = 10;
+ }
+ } else {
if (randompid)
- nextpid += arc4random() % randompid;
+ trypid += arc4random() % randompid;
+ }
retry:
/*
* If the process ID prototype has wrapped around,
* restart somewhat above 0, as the low-numbered procs
* tend to include daemons that don't exit.
*/
- if (nextpid >= PID_MAX) {
- nextpid = nextpid % PID_MAX;
- if (nextpid < 100)
- nextpid += 100;
+ if (trypid >= PID_MAX) {
+ trypid = trypid % PID_MAX;
+ if (trypid < 100)
+ trypid += 100;
pidchecked = 0;
}
- if (nextpid >= pidchecked) {
+ if (trypid >= pidchecked) {
int doingzomb = 0;
pidchecked = PID_MAX;
/*
* Scan the active and zombie procs to check whether this pid
* is in use. Remember the lowest pid that's greater
- * than nextpid, so we can avoid checking for a while.
+ * than trypid, so we can avoid checking for a while.
*/
p2 = LIST_FIRST(&allproc);
again:
for (; p2 != 0; p2 = LIST_NEXT(p2, p_list)) {
- while (p2->p_pid == nextpid ||
- p2->p_pgrp->pg_id == nextpid ||
- p2->p_session->s_sid == nextpid) {
- nextpid++;
- if (nextpid >= pidchecked)
+ while (p2->p_pid == trypid ||
+ p2->p_pgrp->pg_id == trypid ||
+ p2->p_session->s_sid == trypid) {
+ trypid++;
+ if (trypid >= pidchecked)
goto retry;
}
- if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
+ if (p2->p_pid > trypid && pidchecked > p2->p_pid)
pidchecked = p2->p_pid;
- if (p2->p_pgrp->pg_id > nextpid &&
+ if (p2->p_pgrp->pg_id > trypid &&
pidchecked > p2->p_pgrp->pg_id)
pidchecked = p2->p_pgrp->pg_id;
- if (p2->p_session->s_sid > nextpid &&
+ if (p2->p_session->s_sid > trypid &&
pidchecked > p2->p_session->s_sid)
pidchecked = p2->p_session->s_sid;
}
@@ -331,11 +346,19 @@ again:
p2 = newproc;
p2->p_stat = SIDL; /* protect against others */
- p2->p_pid = nextpid;
+ p2->p_pid = trypid;
LIST_INSERT_HEAD(&allproc, p2, p_list);
LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
/*
+ * RFHIGHPID does not mess with the nextpid counter during boot.
+ */
+ if (flags & RFHIGHPID)
+ pidchecked = 0;
+ else
+ nextpid = trypid;
+
+ /*
* Make a proc table entry for the new process.
* Start by zeroing the section of proc that is zero-initialized,
* then copy the section that is copied directly from the parent.
@@ -456,6 +479,8 @@ again:
p2->p_pptr = pptr;
LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
LIST_INIT(&p2->p_children);
+ LIST_INIT(&p2->p_heldmtx);
+ LIST_INIT(&p2->p_contested);
#ifdef KTRACE
/*
@@ -496,14 +521,19 @@ again:
}
/*
- * Make child runnable and add to run queue.
+ * If RFSTOPPED not requested, make child runnable and add to
+ * run queue.
*/
microtime(&(p2->p_stats->p_start));
p2->p_acflag = AFORK;
- (void) splhigh();
- p2->p_stat = SRUN;
- setrunqueue(p2);
- (void) spl0();
+ if ((flags & RFSTOPPED) == 0) {
+ splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ p2->p_stat = SRUN;
+ setrunqueue(p2);
+ mtx_exit(&sched_lock, MTX_SPIN);
+ spl0();
+ }
/*
* Now can be swapped.
diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c
new file mode 100644
index 0000000..840c0f9
--- /dev/null
+++ b/sys/kern/kern_idle.c
@@ -0,0 +1,108 @@
+/*-
+ * Copyright (c) 2000, All rights reserved. See /usr/src/COPYRIGHT
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_ktrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/signalvar.h>
+#include <sys/resourcevar.h>
+#include <sys/vmmeter.h>
+#include <sys/sysctl.h>
+#include <sys/unistd.h>
+#include <sys/kthread.h>
+#include <sys/queue.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#ifdef KTRACE
+#include <sys/uio.h>
+#include <sys/ktrace.h>
+#endif
+
+#include <machine/cpu.h>
+#include <machine/ipl.h>
+#include <machine/mutex.h>
+#include <machine/smp.h>
+
+#include <machine/globaldata.h>
+#include <machine/globals.h>
+
+#ifdef SMP_DEBUG
+#include <sys/bus.h>
+#include <i386/isa/icu.h>
+#include <i386/isa/intr_machdep.h>
+#endif
+
+static void idle_setup(void *dummy);
+SYSINIT(idle_setup, SI_SUB_SCHED_IDLE, SI_ORDER_FIRST, idle_setup, NULL)
+
+static void idle_proc(void *dummy);
+
+/*
+ * setup per-cpu idle process contexts
+ */
+static void
+idle_setup(void *dummy)
+{
+ struct globaldata *gd;
+ int error;
+
+ SLIST_FOREACH(gd, &cpuhead, gd_allcpu) {
+#ifdef SMP
+ error = kthread_create(idle_proc, NULL, &gd->gd_idleproc,
+ RFSTOPPED|RFHIGHPID, "idle: cpu%d",
+ gd->gd_cpuid);
+#else
+ error = kthread_create(idle_proc, NULL, &gd->gd_idleproc,
+ RFSTOPPED|RFHIGHPID, "idle");
+#endif
+ if (error)
+ panic("idle_setup: kthread_create error %d\n", error);
+
+ gd->gd_idleproc->p_stat = SWAIT;
+ }
+}
+
+/*
+ * idle process context
+ */
+static void
+idle_proc(void *dummy)
+{
+ int count;
+
+ for (;;) {
+ /*
+ * Clear switchtime, which prevents the idle process's time
+ * from being counted.
+ switchtime.tv_usec = 0;
+ switchtime.tv_sec = 0;
+ */
+
+ mtx_assert(&Giant, MA_NOTOWNED);
+
+ count = 0;
+
+ while (count >= 0 && procrunnable() == 0) {
+ /*
+ * This is a good place to put things to be done in
+ * the background, including sanity checks.
+ */
+ if (count++ < 0)
+ CTR0(KTR_PROC, "idle_proc: timed out waiting"
+ " for a process");
+ }
+
+ mtx_enter(&sched_lock, MTX_SPIN);
+ idleproc->p_stat = SWAIT;
+ mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
+ spl0();
+ }
+}
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index 6373750..e684b78 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -52,24 +52,33 @@ kproc_start(udata)
int error;
error = kthread_create((void (*)(void *))kp->func, NULL,
- kp->global_procpp, kp->arg0);
+ kp->global_procpp, 0, kp->arg0);
if (error)
panic("kproc_start: %s: error %d", kp->arg0, error);
}
/*
- * Create a kernel process/thread/whatever. It shares it's address space
+ * Create a kernel process/thread/whatever. It shares its address space
* with proc0 - ie: kernel only.
+ *
+ * func is the function to start.
+ * arg is the parameter to pass to function on first startup.
+ * newpp is the return value pointing to the thread's struct proc.
+ * flags are flags to fork1 (in unistd.h)
+ * fmt and following will be *printf'd into (*newpp)->p_comm (for ps, etc.).
*/
int
kthread_create(void (*func)(void *), void *arg,
- struct proc **newpp, const char *fmt, ...)
+ struct proc **newpp, int flags, const char *fmt, ...)
{
int error;
va_list ap;
struct proc *p2;
- error = fork1(&proc0, RFMEM | RFFDG | RFPROC, &p2);
+ if (!proc0.p_stats /* || proc0.p_stats->p_start.tv_sec == 0 */)
+ panic("kthread_create called too soon");
+
+ error = fork1(&proc0, RFMEM | RFFDG | RFPROC | flags, &p2);
if (error)
return error;
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
new file mode 100644
index 0000000..1ac3f58
--- /dev/null
+++ b/sys/kern/kern_mutex.c
@@ -0,0 +1,799 @@
+/*-
+ * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
+ * $FreeBSD$
+ */
+
+/*
+ * Main Entry: witness
+ * Pronunciation: 'wit-n&s
+ * Function: noun
+ * Etymology: Middle English witnesse, from Old English witnes knowledge,
+ * testimony, witness, from 2wit
+ * Date: before 12th century
+ * 1 : attestation of a fact or event : TESTIMONY
+ * 2 : one that gives evidence; specifically : one who testifies in
+ * a cause or before a judicial tribunal
+ * 3 : one asked to be present at a transaction so as to be able to
+ * testify to its having taken place
+ * 4 : one who has personal knowledge of something
+ * 5 a : something serving as evidence or proof : SIGN
+ * b : public affirmation by word or example of usually
+ * religious faith or conviction <the heroic witness to divine
+ * life -- Pilot>
+ * 6 capitalized : a member of the Jehovah's Witnesses
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/ktr.h>
+
+#include <machine/cpu.h>
+#define _KERN_MUTEX_C_ /* Cause non-inlined mtx_*() to be compiled. */
+#include <machine/mutex.h>
+
+/*
+ * The non-inlined versions of the mtx_*() functions are always built (above),
+ * but the witness code depends on the SMP_DEBUG and WITNESS kernel options
+ * being specified.
+ */
+#if (defined(SMP_DEBUG) && defined(WITNESS))
+
+#define WITNESS_COUNT 200
+#define WITNESS_NCHILDREN 2
+
+#ifndef WITNESS
+#define WITNESS 0 /* default off */
+#endif
+
+#ifndef SMP
+extern int witness_spin_check;
+#endif
+
+int witness_watch;
+
+typedef struct witness {
+ struct witness *w_next;
+ char *w_description;
+ char *w_file;
+ int w_line;
+ struct witness *w_morechildren;
+ u_char w_childcnt;
+ u_char w_Giant_squawked:1;
+ u_char w_other_squawked:1;
+ u_char w_same_squawked:1;
+ u_char w_sleep:1;
+ u_char w_spin:1; /* this is a spin mutex */
+ u_int w_level;
+ struct witness *w_children[WITNESS_NCHILDREN];
+} witness_t;
+
+typedef struct witness_blessed {
+ char *b_lock1;
+ char *b_lock2;
+} witness_blessed_t;
+
+#ifdef KDEBUG
+/*
+ * When WITNESS_KDEBUG is set to 1, it will cause the system to
+ * drop into kdebug() when:
+ * - a lock heirarchy violation occurs
+ * - locks are held when going to sleep.
+ */
+#ifndef WITNESS_KDEBUG
+#define WITNESS_KDEBUG 0
+#endif
+int witness_kdebug = WITNESS_KDEBUG;
+#endif /* KDEBUG */
+
+#ifndef WITNESS_SKIPSPIN
+#define WITNESS_SKIPSPIN 0
+#endif
+int witness_skipspin = WITNESS_SKIPSPIN;
+
+
+static mtx_t w_mtx;
+static witness_t *w_free;
+static witness_t *w_all;
+static int w_inited;
+static int witness_dead; /* fatal error, probably no memory */
+
+static witness_t w_data[WITNESS_COUNT];
+
+static witness_t *enroll __P((char *description, int flag));
+static int itismychild __P((witness_t *parent, witness_t *child));
+static void removechild __P((witness_t *parent, witness_t *child));
+static int isitmychild __P((witness_t *parent, witness_t *child));
+static int isitmydescendant __P((witness_t *parent, witness_t *child));
+static int dup_ok __P((witness_t *));
+static int blessed __P((witness_t *, witness_t *));
+static void witness_displaydescendants
+ __P((void(*)(const char *fmt, ...), witness_t *));
+static void witness_leveldescendents __P((witness_t *parent, int level));
+static void witness_levelall __P((void));
+static witness_t * witness_get __P((void));
+static void witness_free __P((witness_t *m));
+
+
+static char *ignore_list[] = {
+ "witness lock",
+ "Kdebug", /* breaks rules and may or may not work */
+ "Page Alias", /* sparc only, witness lock won't block intr */
+ NULL
+};
+
+static char *spin_order_list[] = {
+ "sched lock",
+ "log mtx",
+ "zslock", /* sparc only above log, this one is a real hack */
+ "time lock", /* above callout */
+ "callout mtx", /* above wayout */
+ /*
+ * leaf locks
+ */
+ "wayout mtx",
+ "kernel_pmap", /* sparc only, logically equal "pmap" below */
+ "pmap", /* sparc only */
+ NULL
+};
+
+static char *order_list[] = {
+ "tcb", "inp", "so_snd", "so_rcv", "Giant lock", NULL,
+ "udb", "inp", NULL,
+ "unp head", "unp", "so_snd", NULL,
+ "de0", "Giant lock", NULL,
+ "ifnet", "Giant lock", NULL,
+ "fifo", "so_snd", NULL,
+ "hme0", "Giant lock", NULL,
+ "esp0", "Giant lock", NULL,
+ "hfa0", "Giant lock", NULL,
+ "so_rcv", "atm_global", NULL,
+ "so_snd", "atm_global", NULL,
+ "NFS", "Giant lock", NULL,
+ NULL
+};
+
+static char *dup_list[] = {
+ "inp",
+ "process group",
+ "session",
+ "unp",
+ "rtentry",
+ "rawcb",
+ NULL
+};
+
+static char *sleep_list[] = {
+ "Giant lock",
+ NULL
+};
+
+/*
+ * Pairs of locks which have been blessed
+ * Don't complain about order problems with blessed locks
+ */
+static witness_blessed_t blessed_list[] = {
+};
+static int blessed_count = sizeof (blessed_list) / sizeof (witness_blessed_t);
+
+void
+witness_init(mtx_t *m, int flag)
+{
+ m->mtx_witness = enroll(m->mtx_description, flag);
+}
+
+void
+witness_destroy(mtx_t *m)
+{
+ mtx_t *m1;
+ struct proc *p;
+ p = CURPROC;
+ for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
+ m1 = LIST_NEXT(m1, mtx_held)) {
+ if (m1 == m) {
+ LIST_REMOVE(m, mtx_held);
+ break;
+ }
+ }
+ return;
+
+}
+
+void
+witness_enter(mtx_t *m, int flags, char *file, int line)
+{
+ witness_t *w, *w1;
+ mtx_t *m1;
+ struct proc *p;
+ int i;
+#ifdef KDEBUG
+ int go_into_kdebug = 0;
+#endif /* KDEBUG */
+
+ w = m->mtx_witness;
+ p = CURPROC;
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ i = witness_spin_check;
+ if (i != 0 && w->w_level < i) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ panic("mutex_enter(%s:%x, MTX_SPIN) out of order @ %s:%d"
+ " already holding %s:%x",
+ m->mtx_description, w->w_level, file, line,
+ spin_order_list[ffs(i)-1], i);
+ }
+ PCPU_SET(witness_spin_check, i | w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+ if (w->w_spin)
+ panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+ if (witness_dead)
+ goto out;
+ if (cold)
+ goto out;
+
+ if (!mtx_legal2block())
+ panic("blockable mtx_enter() of %s when not legal @ %s:%d",
+ m->mtx_description, file, line);
+ /*
+ * Is this the first mutex acquired
+ */
+ if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
+ goto out;
+
+
+ if ((w1 = m1->mtx_witness) == w) {
+ if (w->w_same_squawked || dup_ok(w))
+ goto out;
+ w->w_same_squawked = 1;
+ printf("acquring duplicate lock of same type: \"%s\"\n",
+ m->mtx_description);
+ printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
+ printf(" 2nd @ %s:%d\n", file, line);
+#ifdef KDEBUG
+ go_into_kdebug = 1;
+#endif /* KDEBUG */
+ goto out;
+ }
+ MPASS(!mtx_owned(&w_mtx));
+ mtx_enter(&w_mtx, MTX_SPIN);
+ /*
+ * If we have a known higher number just say ok
+ */
+ if (witness_watch > 1 && w->w_level > w1->w_level) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ goto out;
+ }
+ if (isitmydescendant(m1->mtx_witness, w)) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ goto out;
+ }
+ for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
+
+ ASS(i < 200);
+ w1 = m1->mtx_witness;
+ if (isitmydescendant(w, w1)) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ if (blessed(w, w1))
+ goto out;
+ if (m1 == &Giant) {
+ if (w1->w_Giant_squawked)
+ goto out;
+ else
+ w1->w_Giant_squawked = 1;
+ } else {
+ if (w1->w_other_squawked)
+ goto out;
+ else
+ w1->w_other_squawked = 1;
+ }
+ printf("lock order reversal\n");
+ printf(" 1st %s last acquired @ %s:%d\n",
+ w->w_description, w->w_file, w->w_line);
+ printf(" 2nd %p %s @ %s:%d\n",
+ m1, w1->w_description, w1->w_file, w1->w_line);
+ printf(" 3rd %p %s @ %s:%d\n",
+ m, w->w_description, file, line);
+#ifdef KDEBUG
+ go_into_kdebug = 1;
+#endif /* KDEBUG */
+ goto out;
+ }
+ }
+ m1 = LIST_FIRST(&p->p_heldmtx);
+ if (!itismychild(m1->mtx_witness, w))
+ mtx_exit(&w_mtx, MTX_SPIN);
+
+out:
+#ifdef KDEBUG
+ if (witness_kdebug && go_into_kdebug)
+ kdebug();
+#endif /* KDEBUG */
+ w->w_file = file;
+ w->w_line = line;
+ m->mtx_line = line;
+ m->mtx_file = file;
+
+ /*
+ * If this pays off it likely means that a mutex being witnessed
+ * is acquired in hardclock. Put it in the ignore list. It is
+ * likely not the mutex this assert fails on.
+ */
+ ASS(m->mtx_held.le_prev == NULL);
+ LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
+}
+
+void
+witness_exit(mtx_t *m, int flags, char *file, int line)
+{
+ witness_t *w;
+
+ w = m->mtx_witness;
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+ if (w->w_spin)
+ panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+
+ if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
+ panic("switchable mtx_exit() of %s when not legal @ %s:%d",
+ m->mtx_description, file, line);
+ LIST_REMOVE(m, mtx_held);
+ m->mtx_held.le_prev = NULL;
+}
+
+void
+witness_try_enter(mtx_t *m, int flags, char *file, int line)
+{
+ struct proc *p;
+ witness_t *w = m->mtx_witness;
+
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_try_enter: "
+ "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+
+ if (w->w_spin)
+ panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+
+ w->w_file = file;
+ w->w_line = line;
+ m->mtx_line = line;
+ m->mtx_file = file;
+ p = CURPROC;
+ ASS(m->mtx_held.le_prev == NULL);
+ LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
+}
+
+void
+witness_display(void(*prnt)(const char *fmt, ...))
+{
+ witness_t *w, *w1;
+
+ witness_levelall();
+
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_file == NULL)
+ continue;
+ for (w1 = w_all; w1; w1 = w1->w_next) {
+ if (isitmychild(w1, w))
+ break;
+ }
+ if (w1 != NULL)
+ continue;
+ /*
+ * This lock has no anscestors, display its descendants.
+ */
+ witness_displaydescendants(prnt, w);
+ }
+ prnt("\nMutex which were never acquired\n");
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_file != NULL)
+ continue;
+ prnt("%s\n", w->w_description);
+ }
+}
+
+int
+witness_sleep(int check_only, mtx_t *mtx, char *file, int line)
+{
+ mtx_t *m;
+ struct proc *p;
+ char **sleep;
+ int n = 0;
+
+ p = CURPROC;
+ for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
+ m = LIST_NEXT(m, mtx_held)) {
+ if (m == mtx)
+ continue;
+ for (sleep = sleep_list; *sleep!= NULL; sleep++)
+ if (strcmp(m->mtx_description, *sleep) == 0)
+ goto next;
+ printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
+ file, line, check_only ? "could sleep" : "sleeping",
+ m->mtx_description,
+ m->mtx_witness->w_file, m->mtx_witness->w_line);
+ n++;
+ next:
+ }
+#ifdef KDEBUG
+ if (witness_kdebug && n)
+ kdebug();
+#endif /* KDEBUG */
+ return (n);
+}
+
+static witness_t *
+enroll(char *description, int flag)
+{
+ int i;
+ witness_t *w, *w1;
+ char **ignore;
+ char **order;
+
+ if (!witness_watch)
+ return (NULL);
+ for (ignore = ignore_list; *ignore != NULL; ignore++)
+ if (strcmp(description, *ignore) == 0)
+ return (NULL);
+
+ if (w_inited == 0) {
+ mtx_init(&w_mtx, "witness lock", MTX_DEF);
+ for (i = 0; i < WITNESS_COUNT; i++) {
+ w = &w_data[i];
+ witness_free(w);
+ }
+ w_inited = 1;
+ for (order = order_list; *order != NULL; order++) {
+ w = enroll(*order, MTX_DEF);
+ w->w_file = "order list";
+ for (order++; *order != NULL; order++) {
+ w1 = enroll(*order, MTX_DEF);
+ w1->w_file = "order list";
+ itismychild(w, w1);
+ w = w1;
+ }
+ }
+ }
+ if ((flag & MTX_SPIN) && witness_skipspin)
+ return (NULL);
+ mtx_enter(&w_mtx, MTX_SPIN);
+ for (w = w_all; w; w = w->w_next) {
+ if (strcmp(description, w->w_description) == 0) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return (w);
+ }
+ }
+ if ((w = witness_get()) == NULL)
+ return (NULL);
+ w->w_next = w_all;
+ w_all = w;
+ w->w_description = description;
+ mtx_exit(&w_mtx, MTX_SPIN);
+ if (flag & MTX_SPIN) {
+ w->w_spin = 1;
+
+ i = 1;
+ for (order = spin_order_list; *order != NULL; order++) {
+ if (strcmp(description, *order) == 0)
+ break;
+ i <<= 1;
+ }
+ if (*order == NULL)
+ panic("spin lock %s not in order list", description);
+ w->w_level = i;
+ }
+ return (w);
+}
+
+static int
+itismychild(witness_t *parent, witness_t *child)
+{
+ static int recursed;
+
+ /*
+ * Insert "child" after "parent"
+ */
+ while (parent->w_morechildren)
+ parent = parent->w_morechildren;
+
+ if (parent->w_childcnt == WITNESS_NCHILDREN) {
+ if ((parent->w_morechildren = witness_get()) == NULL)
+ return (1);
+ parent = parent->w_morechildren;
+ }
+ ASS(child != NULL);
+ parent->w_children[parent->w_childcnt++] = child;
+ /*
+ * now prune whole tree
+ */
+ if (recursed)
+ return (0);
+ recursed = 1;
+ for (child = w_all; child != NULL; child = child->w_next) {
+ for (parent = w_all; parent != NULL;
+ parent = parent->w_next) {
+ if (!isitmychild(parent, child))
+ continue;
+ removechild(parent, child);
+ if (isitmydescendant(parent, child))
+ continue;
+ itismychild(parent, child);
+ }
+ }
+ recursed = 0;
+ witness_levelall();
+ return (0);
+}
+
+static void
+removechild(witness_t *parent, witness_t *child)
+{
+ witness_t *w, *w1;
+ int i;
+
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ if (w->w_children[i] == child)
+ goto found;
+ return;
+found:
+ for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
+ continue;
+ w->w_children[i] = w1->w_children[--w1->w_childcnt];
+ ASS(w->w_children[i] != NULL);
+
+ if (w1->w_childcnt != 0)
+ return;
+
+ if (w1 == parent)
+ return;
+ for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
+ continue;
+ w->w_morechildren = 0;
+ witness_free(w1);
+}
+
+static int
+isitmychild(witness_t *parent, witness_t *child)
+{
+ witness_t *w;
+ int i;
+
+ for (w = parent; w != NULL; w = w->w_morechildren) {
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (w->w_children[i] == child)
+ return (1);
+ }
+ }
+ return (0);
+}
+
+static int
+isitmydescendant(witness_t *parent, witness_t *child)
+{
+ witness_t *w;
+ int i;
+ int j;
+
+ for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
+ ASS(j < 1000);
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (w->w_children[i] == child)
+ return (1);
+ }
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (isitmydescendant(w->w_children[i], child))
+ return (1);
+ }
+ }
+ return (0);
+}
+
+void
+witness_levelall (void)
+{
+ witness_t *w, *w1;
+
+ for (w = w_all; w; w = w->w_next)
+ if (!w->w_spin)
+ w->w_level = 0;
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_spin)
+ continue;
+ for (w1 = w_all; w1; w1 = w1->w_next) {
+ if (isitmychild(w1, w))
+ break;
+ }
+ if (w1 != NULL)
+ continue;
+ witness_leveldescendents(w, 0);
+ }
+}
+
+static void
+witness_leveldescendents(witness_t *parent, int level)
+{
+ int i;
+ witness_t *w;
+
+ if (parent->w_level < level)
+ parent->w_level = level;
+ level++;
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ witness_leveldescendents(w->w_children[i], level);
+}
+
+static void
+witness_displaydescendants(void(*prnt)(const char *fmt, ...), witness_t *parent)
+{
+ witness_t *w;
+ int i;
+ int level = parent->w_level;
+
+ prnt("%d", level);
+ if (level < 10)
+ prnt(" ");
+ for (i = 0; i < level; i++)
+ prnt(" ");
+ prnt("%s", parent->w_description);
+ if (parent->w_file != NULL) {
+ prnt(" -- last acquired @ %s", parent->w_file);
+#ifndef W_USE_WHERE
+ prnt(":%d", parent->w_line);
+#endif
+ prnt("\n");
+ }
+
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ witness_displaydescendants(prnt, w->w_children[i]);
+ }
+
+static int
+dup_ok(witness_t *w)
+{
+ char **dup;
+
+ for (dup = dup_list; *dup!= NULL; dup++)
+ if (strcmp(w->w_description, *dup) == 0)
+ return (1);
+ return (0);
+}
+
+static int
+blessed(witness_t *w1, witness_t *w2)
+{
+ int i;
+ witness_blessed_t *b;
+
+ for (i = 0; i < blessed_count; i++) {
+ b = &blessed_list[i];
+ if (strcmp(w1->w_description, b->b_lock1) == 0) {
+ if (strcmp(w2->w_description, b->b_lock2) == 0)
+ return (1);
+ continue;
+ }
+ if (strcmp(w1->w_description, b->b_lock2) == 0)
+ if (strcmp(w2->w_description, b->b_lock1) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+static witness_t *
+witness_get()
+{
+ witness_t *w;
+
+ if ((w = w_free) == NULL) {
+ witness_dead = 1;
+ mtx_exit(&w_mtx, MTX_SPIN);
+ printf("witness exhausted\n");
+ return (NULL);
+ }
+ w_free = w->w_next;
+ bzero(w, sizeof (*w));
+ return (w);
+}
+
+static void
+witness_free(witness_t *w)
+{
+ w->w_next = w_free;
+ w_free = w;
+}
+
+void
+witness_list(struct proc *p)
+{
+ mtx_t *m;
+
+ for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
+ m = LIST_NEXT(m, mtx_held)) {
+ printf("\t\"%s\" (%p) locked at %s:%d\n",
+ m->mtx_description, m,
+ m->mtx_witness->w_file, m->mtx_witness->w_line);
+ }
+}
+
+void
+witness_save(mtx_t *m, char **filep, int *linep)
+{
+ *filep = m->mtx_witness->w_file;
+ *linep = m->mtx_witness->w_line;
+}
+
+void
+witness_restore(mtx_t *m, char *file, int line)
+{
+ m->mtx_witness->w_file = file;
+ m->mtx_witness->w_line = line;
+}
+
+#endif /* (defined(SMP_DEBUG) && defined(WITNESS)) */
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 7ec2628..4800747 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -73,6 +73,7 @@ u_long pgrphash;
struct proclist allproc;
struct proclist zombproc;
vm_zone_t proc_zone;
+vm_zone_t ithread_zone;
/*
* Initialize global process hashing structures.
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index f2a8fa6..3344f7e 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -530,7 +530,7 @@ calcru(p, up, sp, ip)
microuptime(&tv);
if (timevalcmp(&tv, &switchtime, <))
printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
- switchtime.tv_sec, switchtime.tv_usec,
+ switchtime.tv_sec, switchtime.tv_usec,
tv.tv_sec, tv.tv_usec);
else
tu += (tv.tv_usec - switchtime.tv_usec) +
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 9c744c7..8a6ccd8 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -63,6 +63,7 @@
#include <machine/pcb.h>
#include <machine/clock.h>
+#include <machine/lock.h>
#include <machine/md_var.h>
#include <machine/smp.h> /* smp_active, cpuid */
@@ -524,6 +525,11 @@ panic(const char *fmt, ...)
va_list ap;
static char buf[256];
+#ifdef SMP
+ /* Only 1 CPU can panic at a time */
+ s_lock(&panic_lock);
+#endif
+
bootopt = RB_AUTOBOOT | RB_DUMP;
if (panicstr)
bootopt |= RB_NOSYNC;
@@ -537,8 +543,7 @@ panic(const char *fmt, ...)
va_end(ap);
printf("panic: %s\n", buf);
#ifdef SMP
- /* three seperate prints in case of an unmapped page and trap */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of an unmapped page and trap */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index a2ff2ef..a39a4c8 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -56,6 +56,7 @@
#include <sys/acct.h>
#include <sys/fcntl.h>
#include <sys/wait.h>
+#include <sys/ktr.h>
#include <sys/ktrace.h>
#include <sys/syslog.h>
#include <sys/stat.h>
@@ -1465,6 +1466,8 @@ killproc(p, why)
struct proc *p;
char *why;
{
+ CTR3(KTR_PROC, "killproc: proc %p (pid %d, %s)",
+ p, p->p_pid, p->p_comm);
log(LOG_ERR, "pid %d (%s), uid %d, was killed: %s\n", p->p_pid, p->p_comm,
p->p_cred && p->p_ucred ? p->p_ucred->cr_uid : -1, why);
psignal(p, SIGKILL);
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index c0f7f64..d9a599a 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/lock.h>
@@ -52,6 +53,8 @@
#include <vm/vm_page.h>
#include <vm/vm_map.h>
+#include <machine/mutex.h>
+
static void uio_yield __P((void));
int
@@ -421,10 +424,12 @@ uio_yield()
int s;
p = curproc;
- p->p_priority = p->p_usrpri;
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ p->p_priority = p->p_usrpri;
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
}
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index 3146f9e..8f47dba 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -29,27 +29,39 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/proc.h>
#include <sys/rtprio.h>
#include <sys/queue.h>
+#include <machine/mutex.h>
+
/*
* We have NQS (32) run queues per scheduling class. For the normal
* class, there are 128 priorities scaled onto these 32 queues. New
* processes are added to the last entry in each queue, and processes
* are selected for running by taking them from the head and maintaining
- * a simple FIFO arrangement. Realtime and Idle priority processes have
- * and explicit 0-31 priority which maps directly onto their class queue
- * index. When a queue has something in it, the corresponding bit is
- * set in the queuebits variable, allowing a single read to determine
- * the state of all 32 queues and then a ffs() to find the first busy
+ * a simple FIFO arrangement.
+ *
+ * Interrupt, real time and idle priority processes have and explicit
+ * 0-31 priority which maps directly onto their class queue index.
+ * When a queue has something in it, the corresponding bit is set in
+ * the queuebits variable, allowing a single read to determine the
+ * state of all 32 queues and then a ffs() to find the first busy
* queue.
+ *
+ * XXX This needs fixing. First, we only have one idle process, so we
+ * hardly need 32 queues for it. Secondly, the number of classes
+ * makes things unwieldy. We should be able to merge them into a
+ * single 96 or 128 entry queue.
*/
-struct rq queues[NQS];
-struct rq rtqueues[NQS];
-struct rq idqueues[NQS];
-u_int32_t queuebits;
+struct rq itqueues[NQS]; /* interrupt threads */
+struct rq rtqueues[NQS]; /* real time processes */
+struct rq queues[NQS]; /* time sharing processes */
+struct rq idqueues[NQS]; /* idle process */
+u_int32_t itqueuebits;
u_int32_t rtqueuebits;
+u_int32_t queuebits;
u_int32_t idqueuebits;
/*
@@ -61,8 +73,9 @@ rqinit(void *dummy)
int i;
for (i = 0; i < NQS; i++) {
- TAILQ_INIT(&queues[i]);
+ TAILQ_INIT(&itqueues[i]);
TAILQ_INIT(&rtqueues[i]);
+ TAILQ_INIT(&queues[i]);
TAILQ_INIT(&idqueues[i]);
}
}
@@ -81,22 +94,37 @@ setrunqueue(struct proc *p)
struct rq *q;
u_int8_t pri;
- KASSERT(p->p_stat == SRUN, ("setrunqueue: proc not SRUN"));
- if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
- pri = p->p_priority >> 2;
- q = &queues[pri];
- queuebits |= 1 << pri;
- } else if (p->p_rtprio.type == RTP_PRIO_REALTIME ||
+ mtx_assert(&sched_lock, MA_OWNED);
+ KASSERT(p->p_stat == SRUN, ("setrunqueue: proc %p (%s) not SRUN", p, \
+ p->p_comm));
+
+ /*
+ * Decide which class we want to run. We now have four
+ * queues, and this is becoming ugly. We should be able to
+ * collapse the first three classes into a single contiguous
+ * queue. XXX FIXME.
+ */
+ CTR4(KTR_PROC, "setrunqueue: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
+ if (p->p_rtprio.type == RTP_PRIO_ITHREAD) { /* interrupt thread */
+ pri = p->p_rtprio.prio;
+ q = &itqueues[pri];
+ itqueuebits |= 1 << pri;
+ } else if (p->p_rtprio.type == RTP_PRIO_REALTIME || /* real time */
p->p_rtprio.type == RTP_PRIO_FIFO) {
pri = p->p_rtprio.prio;
q = &rtqueues[pri];
rtqueuebits |= 1 << pri;
- } else if (p->p_rtprio.type == RTP_PRIO_IDLE) {
+ } else if (p->p_rtprio.type == RTP_PRIO_NORMAL) { /* time sharing */
+ pri = p->p_priority >> 2;
+ q = &queues[pri];
+ queuebits |= 1 << pri;
+ } else if (p->p_rtprio.type == RTP_PRIO_IDLE) { /* idle proc */
pri = p->p_rtprio.prio;
q = &idqueues[pri];
idqueuebits |= 1 << pri;
} else {
- panic("setrunqueue: invalid rtprio type");
+ panic("setrunqueue: invalid rtprio type %d", p->p_rtprio.type);
}
p->p_rqindex = pri; /* remember the queue index */
TAILQ_INSERT_TAIL(q, p, p_procq);
@@ -114,14 +142,20 @@ remrunqueue(struct proc *p)
u_int32_t *which;
u_int8_t pri;
+ CTR4(KTR_PROC, "remrunqueue: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
+ mtx_assert(&sched_lock, MA_OWNED);
pri = p->p_rqindex;
- if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
- q = &queues[pri];
- which = &queuebits;
+ if (p->p_rtprio.type == RTP_PRIO_ITHREAD) {
+ q = &itqueues[pri];
+ which = &itqueuebits;
} else if (p->p_rtprio.type == RTP_PRIO_REALTIME ||
p->p_rtprio.type == RTP_PRIO_FIFO) {
q = &rtqueues[pri];
which = &rtqueuebits;
+ } else if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
+ q = &queues[pri];
+ which = &queuebits;
} else if (p->p_rtprio.type == RTP_PRIO_IDLE) {
q = &idqueues[pri];
which = &idqueuebits;
@@ -142,11 +176,17 @@ remrunqueue(struct proc *p)
* loop to avoid the more expensive (and destructive) chooseproc().
*
* MP SAFE. CALLED WITHOUT THE MP LOCK
+ *
+ * XXX I doubt this. It's possibly fail-safe, but there's obviously
+ * the case here where one of the bits words gets loaded, the
+ * processor gets preempted, and by the time it returns from this
+ * function, some other processor has picked the runnable process.
+ * What am I missing? (grog, 23 July 2000).
*/
u_int32_t
procrunnable(void)
{
- return (rtqueuebits || queuebits || idqueuebits);
+ return (itqueuebits || rtqueuebits || queuebits || idqueuebits);
}
/*
@@ -173,7 +213,12 @@ chooseproc(void)
u_char id;
#endif
- if (rtqueuebits) {
+ mtx_assert(&sched_lock, MA_OWNED);
+ if (itqueuebits) {
+ pri = ffs(itqueuebits) - 1;
+ q = &itqueues[pri];
+ which = &itqueuebits;
+ } else if (rtqueuebits) {
pri = ffs(rtqueuebits) - 1;
q = &rtqueues[pri];
which = &rtqueuebits;
@@ -186,10 +231,12 @@ chooseproc(void)
q = &idqueues[pri];
which = &idqueuebits;
} else {
- return NULL;
+ CTR1(KTR_PROC, "chooseproc: idleproc, schedlock %x",
+ sched_lock.mtx_lock);
+ idleproc->p_stat = SRUN;
+ return idleproc;
}
p = TAILQ_FIRST(q);
- KASSERT(p, ("chooseproc: no proc on busy queue"));
#ifdef SMP
/* wander down the current run queue for this pri level for a match */
id = cpuid;
@@ -201,6 +248,9 @@ chooseproc(void)
}
}
#endif
+ CTR4(KTR_PROC, "chooseproc: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
+ KASSERT(p, ("chooseproc: no proc on busy queue"));
TAILQ_REMOVE(q, p, p_procq);
if (TAILQ_EMPTY(q))
*which &= ~(1 << pri);
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index f747759..f397f40 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -45,6 +45,7 @@
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/signalvar.h>
#include <sys/resourcevar.h>
#include <sys/vmmeter.h>
@@ -59,6 +60,7 @@
#include <machine/cpu.h>
#include <machine/ipl.h>
#include <machine/smp.h>
+#include <machine/mutex.h>
static void sched_setup __P((void *dummy));
SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL)
@@ -135,7 +137,7 @@ maybe_resched(chk)
* standard process becomes runaway cpu-bound, the system can lockup
* due to idle-scheduler processes in wakeup never getting any cpu.
*/
- if (p == NULL) {
+ if (p == idleproc) {
#if 0
need_resched();
#endif
@@ -169,7 +171,7 @@ roundrobin(arg)
need_resched();
forward_roundrobin();
#else
- if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type))
+ if (p == idleproc || RTP_PRIO_NEED_RR(p->p_rtprio.type))
need_resched();
#endif
@@ -284,6 +286,8 @@ schedcpu(arg)
* Increment time in/out of memory and sleep time
* (if sleeping). We ignore overflow; with 16-bit int's
* (remember them?) overflow takes 45 days.
+ if (p->p_stat == SWAIT)
+ continue;
*/
p->p_swtime++;
if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
@@ -295,7 +299,12 @@ schedcpu(arg)
*/
if (p->p_slptime > 1)
continue;
- s = splhigh(); /* prevent state changes and protect run queue */
+ /*
+ * prevent state changes and protect run queue
+ */
+ s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
+
/*
* p_pctcpu is only for ps.
*/
@@ -325,6 +334,7 @@ schedcpu(arg)
} else
p->p_priority = p->p_usrpri;
}
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
}
vmmeter();
@@ -364,6 +374,7 @@ updatepri(p)
static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE];
#define LOOKUP(x) (((intptr_t)(x) >> 8) & (TABLESIZE - 1))
+#if 0
/*
* During autoconfiguration or after a panic, a sleep will simply
* lower the priority briefly to allow interrupts, then return.
@@ -374,6 +385,7 @@ static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE];
* higher to block network software interrupts after panics.
*/
int safepri;
+#endif
void
sleepinit(void)
@@ -406,11 +418,15 @@ tsleep(ident, priority, wmesg, timo)
struct proc *p = curproc;
int s, sig, catch = priority & PCATCH;
struct callout_handle thandle;
+ int rval = 0;
#ifdef KTRACE
if (p && KTRPOINT(p, KTR_CSW))
ktrcsw(p->p_tracep, 1, 0);
#endif
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_enter(&sched_lock, MTX_SPIN);
+
s = splhigh();
if (cold || panicstr) {
/*
@@ -419,10 +435,14 @@ tsleep(ident, priority, wmesg, timo)
* don't run any other procs or panic below,
* in case this is the idle process and already asleep.
*/
+ mtx_exit(&sched_lock, MTX_SPIN);
+#if 0
splx(safepri);
+#endif
splx(s);
return (0);
}
+
KASSERT(p != NULL, ("tsleep1"));
KASSERT(ident != NULL && p->p_stat == SRUN, ("tsleep"));
/*
@@ -436,6 +456,9 @@ tsleep(ident, priority, wmesg, timo)
p->p_wmesg = wmesg;
p->p_slptime = 0;
p->p_priority = priority & PRIMASK;
+ p->p_nativepri = p->p_priority;
+ CTR4(KTR_PROC, "tsleep: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
if (timo)
thandle = timeout(endtsleep, (void *)p, timo);
@@ -449,6 +472,9 @@ tsleep(ident, priority, wmesg, timo)
* stopped, p->p_wchan will be 0 upon return from CURSIG.
*/
if (catch) {
+ CTR4(KTR_PROC,
+ "tsleep caught: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
p->p_flag |= P_SINTR;
if ((sig = CURSIG(p))) {
if (p->p_wchan)
@@ -465,6 +491,9 @@ tsleep(ident, priority, wmesg, timo)
p->p_stat = SSLEEP;
p->p_stats->p_ru.ru_nvcsw++;
mi_switch();
+ CTR4(KTR_PROC,
+ "tsleep resume: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
resume:
curpriority = p->p_usrpri;
splx(s);
@@ -476,7 +505,8 @@ resume:
if (KTRPOINT(p, KTR_CSW))
ktrcsw(p->p_tracep, 0, 0);
#endif
- return (EWOULDBLOCK);
+ rval = EWOULDBLOCK;
+ goto out;
}
} else if (timo)
untimeout(endtsleep, (void *)p, thandle);
@@ -486,14 +516,19 @@ resume:
ktrcsw(p->p_tracep, 0, 0);
#endif
if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
- return (EINTR);
- return (ERESTART);
+ rval = EINTR;
+ else
+ rval = ERESTART;
+ goto out;
}
+out:
+ mtx_exit(&sched_lock, MTX_SPIN);
#ifdef KTRACE
if (KTRPOINT(p, KTR_CSW))
ktrcsw(p->p_tracep, 0, 0);
#endif
- return (0);
+
+ return (rval);
}
/*
@@ -519,13 +554,14 @@ asleep(void *ident, int priority, const char *wmesg, int timo)
int s;
/*
- * splhigh() while manipulating sleep structures and slpque.
+ * obtain sched_lock while manipulating sleep structures and slpque.
*
* Remove preexisting wait condition (if any) and place process
* on appropriate slpque, but do not put process to sleep.
*/
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
if (p->p_wchan != NULL)
unsleep(p);
@@ -539,6 +575,7 @@ asleep(void *ident, int priority, const char *wmesg, int timo)
TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
}
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
return(0);
@@ -560,8 +597,12 @@ int
await(int priority, int timo)
{
struct proc *p = curproc;
+ int rval = 0;
int s;
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_enter(&sched_lock, MTX_SPIN);
+
s = splhigh();
if (p->p_wchan != NULL) {
@@ -616,7 +657,8 @@ resume:
if (KTRPOINT(p, KTR_CSW))
ktrcsw(p->p_tracep, 0, 0);
#endif
- return (EWOULDBLOCK);
+ rval = EWOULDBLOCK;
+ goto out;
}
} else if (timo)
untimeout(endtsleep, (void *)p, thandle);
@@ -626,8 +668,10 @@ resume:
ktrcsw(p->p_tracep, 0, 0);
#endif
if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
- return (EINTR);
- return (ERESTART);
+ rval = EINTR;
+ else
+ rval = ERESTART;
+ goto out;
}
#ifdef KTRACE
if (KTRPOINT(p, KTR_CSW))
@@ -655,7 +699,10 @@ resume:
*/
p->p_asleep.as_priority = 0;
- return (0);
+out:
+ mtx_exit(&sched_lock, MTX_SPIN);
+
+ return (rval);
}
/*
@@ -673,7 +720,11 @@ endtsleep(arg)
int s;
p = (struct proc *)arg;
+ CTR4(KTR_PROC,
+ "endtsleep: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
if (p->p_wchan) {
if (p->p_stat == SSLEEP)
setrunnable(p);
@@ -681,6 +732,7 @@ endtsleep(arg)
unsleep(p);
p->p_flag |= P_TIMEOUT;
}
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
}
@@ -694,10 +746,12 @@ unsleep(p)
int s;
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
if (p->p_wchan) {
TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq);
p->p_wchan = 0;
}
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
}
@@ -713,6 +767,7 @@ wakeup(ident)
int s;
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
qp = &slpque[LOOKUP(ident)];
restart:
TAILQ_FOREACH(p, qp, p_procq) {
@@ -721,6 +776,9 @@ restart:
p->p_wchan = 0;
if (p->p_stat == SSLEEP) {
/* OPTIMIZED EXPANSION OF setrunnable(p); */
+ CTR4(KTR_PROC,
+ "wakeup: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
if (p->p_slptime > 1)
updatepri(p);
p->p_slptime = 0;
@@ -737,6 +795,7 @@ restart:
}
}
}
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
}
@@ -754,6 +813,7 @@ wakeup_one(ident)
int s;
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
qp = &slpque[LOOKUP(ident)];
TAILQ_FOREACH(p, qp, p_procq) {
@@ -762,6 +822,9 @@ wakeup_one(ident)
p->p_wchan = 0;
if (p->p_stat == SSLEEP) {
/* OPTIMIZED EXPANSION OF setrunnable(p); */
+ CTR4(KTR_PROC,
+ "wakeup1: proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
if (p->p_slptime > 1)
updatepri(p);
p->p_slptime = 0;
@@ -778,6 +841,7 @@ wakeup_one(ident)
}
}
}
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
}
@@ -791,7 +855,9 @@ mi_switch()
struct timeval new_switchtime;
register struct proc *p = curproc; /* XXX */
register struct rlimit *rlim;
+ int giantreleased;
int x;
+ WITNESS_SAVE_DECL(Giant);
/*
* XXX this spl is almost unnecessary. It is partly to allow for
@@ -812,6 +878,14 @@ mi_switch()
*/
x = splstatclock();
+ CTR4(KTR_PROC, "mi_switch: old proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
+ mtx_enter(&sched_lock, MTX_SPIN | MTX_RLIKELY);
+
+ WITNESS_SAVE(&Giant, Giant);
+ for (giantreleased = 0; mtx_owned(&Giant); giantreleased++)
+ mtx_exit(&Giant, MTX_DEF | MTX_NOSWITCH);
+
#ifdef SIMPLELOCK_DEBUG
if (p->p_simple_locks)
printf("sleep: holding simple lock\n");
@@ -823,7 +897,7 @@ mi_switch()
microuptime(&new_switchtime);
if (timevalcmp(&new_switchtime, &switchtime, <)) {
printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
- switchtime.tv_sec, switchtime.tv_usec,
+ switchtime.tv_sec, switchtime.tv_usec,
new_switchtime.tv_sec, new_switchtime.tv_usec);
new_switchtime = switchtime;
} else {
@@ -834,6 +908,8 @@ mi_switch()
/*
* Check if the process exceeds its cpu resource allocation.
* If over max, kill it.
+ *
+ * XXX drop sched_lock, pickup Giant
*/
if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY &&
p->p_runtime > p->p_limit->p_cpulimit) {
@@ -854,10 +930,18 @@ mi_switch()
*/
cnt.v_swtch++;
switchtime = new_switchtime;
- cpu_switch(p);
+ CTR4(KTR_PROC, "mi_switch: old proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
+ cpu_switch();
+ CTR4(KTR_PROC, "mi_switch: new proc %p (pid %d, %s), schedlock %x",
+ p, p->p_pid, p->p_comm, sched_lock.mtx_lock);
if (switchtime.tv_sec == 0)
microuptime(&switchtime);
switchticks = ticks;
+ mtx_exit(&sched_lock, MTX_SPIN);
+ while (giantreleased--)
+ mtx_enter(&Giant, MTX_DEF);
+ WITNESS_RESTORE(&Giant, Giant);
splx(x);
}
@@ -874,10 +958,12 @@ setrunnable(p)
register int s;
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
switch (p->p_stat) {
case 0:
case SRUN:
case SZOMB:
+ case SWAIT:
default:
panic("setrunnable");
case SSTOP:
@@ -891,6 +977,7 @@ setrunnable(p)
p->p_stat = SRUN;
if (p->p_flag & P_INMEM)
setrunqueue(p);
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
if (p->p_slptime > 1)
updatepri(p);
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index b8d5833..1128c2e 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -24,7 +24,7 @@
* Number of timecounters used to implement stable storage
*/
#ifndef NTIMECOUNTER
-#define NTIMECOUNTER 5
+#define NTIMECOUNTER 45
#endif
static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter",
@@ -148,6 +148,13 @@ nanotime(struct timespec *ts)
nnanotime++;
tc = timecounter;
+#ifdef KTR
+ if (tc == NULL) { /* called before initialization */
+ ts->tv_sec = 0;
+ ts->tv_nsec = 0;
+ return;
+ }
+#endif
ts->tv_sec = tc->tc_offset_sec;
count = tco_delta(tc);
delta = tc->tc_offset_nano;
diff --git a/sys/kern/kern_threads.c b/sys/kern/kern_threads.c
index 3531e2c..ba2b4bf 100644
--- a/sys/kern/kern_threads.c
+++ b/sys/kern/kern_threads.c
@@ -52,10 +52,13 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sysproto.h>
+#include <machine/mutex.h>
+
/*
* Low level support for sleep/wakeup paradigm
* If a timeout is specified:
@@ -145,10 +148,12 @@ yield(struct proc *p, struct yield_args *uap) {
p->p_retval[0] = 0;
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
p->p_priority = MAXPRI;
setrunqueue(p);
p->p_stats->p_ru.ru_nvcsw++;
mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
return(0);
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
index 3794ccf..a989152 100644
--- a/sys/kern/subr_prf.c
+++ b/sys/kern/subr_prf.c
@@ -110,7 +110,8 @@ uprintf(const char *fmt, ...)
struct putchar_arg pca;
int retval = 0;
- if (p && p->p_flag & P_CONTROLT && p->p_session->s_ttyvp) {
+ if (p && p != idleproc && p->p_flag & P_CONTROLT &&
+ p->p_session->s_ttyvp) {
va_start(ap, fmt);
pca.tty = p->p_session->s_ttyp;
pca.flags = TOTTY;
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index 4fa5223..294c649 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -93,6 +93,7 @@ kmstartup(dummy)
int nullfunc_loop_profiled_time;
uintfptr_t tmp_addr;
#endif
+ int intrstate;
/*
* Round lowpc and highpc to multiples of the density we're using
@@ -135,6 +136,7 @@ kmstartup(dummy)
* Disable interrupts to avoid interference while we calibrate
* things.
*/
+ intrstate = save_intr();
disable_intr();
/*
@@ -189,7 +191,7 @@ kmstartup(dummy)
p->state = GMON_PROF_OFF;
stopguprof(p);
- enable_intr();
+ restore_intr(intrstate);
nullfunc_loop_profiled_time = 0;
for (tmp_addr = (uintfptr_t)nullfunc_loop_profiled;
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 61c5ecf..95b5759 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -36,6 +36,7 @@
#endif
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/proc.h>
@@ -65,6 +66,7 @@
#include <machine/apic.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
+#include <machine/mutex.h>
#include <machine/mpapic.h>
#include <machine/psl.h>
#include <machine/segments.h>
@@ -236,6 +238,8 @@ typedef struct BASETABLE_ENTRY {
#define MP_ANNOUNCE_POST 0x19
+/* used to hold the AP's until we are ready to release them */
+struct simplelock ap_boot_lock;
/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
int current_postcode;
@@ -336,6 +340,7 @@ static int start_all_aps(u_int boot_addr);
static void install_ap_tramp(u_int boot_addr);
static int start_ap(int logicalCpu, u_int boot_addr);
static int apic_int_is_bus_type(int intr, int bus_type);
+static void release_aps(void *dummy);
/*
* Calculate usable address in base memory for AP trampoline code.
@@ -403,7 +408,7 @@ found:
/*
- * Startup the SMP processors.
+ * Initialize the SMP hardware and the APIC and start up the AP's.
*/
void
mp_start(void)
@@ -619,6 +624,9 @@ mp_enable(u_int boot_addr)
/* initialize all SMP locks */
init_locks();
+ /* obtain the ap_boot_lock */
+ s_lock(&ap_boot_lock);
+
/* start each Application Processor */
start_all_aps(boot_addr);
}
@@ -1866,9 +1874,6 @@ struct simplelock fast_intr_lock;
/* critical region around INTR() routines */
struct simplelock intr_lock;
-/* lock regions protected in UP kernel via cli/sti */
-struct simplelock mpintr_lock;
-
/* lock region used by kernel profiling */
struct simplelock mcount_lock;
@@ -1885,26 +1890,16 @@ struct simplelock clock_lock;
/* lock around the MP rendezvous */
static struct simplelock smp_rv_lock;
+/* only 1 CPU can panic at a time :) */
+struct simplelock panic_lock;
+
static void
init_locks(void)
{
- /*
- * Get the initial mp_lock with a count of 1 for the BSP.
- * This uses a LOGICAL cpu ID, ie BSP == 0.
- */
- mp_lock = 0x00000001;
-
-#if 0
- /* ISR uses its own "giant lock" */
- isr_lock = FREE_LOCK;
-#endif
-
#if defined(APIC_INTR_DIAGNOSTIC) && defined(APIC_INTR_DIAGNOSTIC_IRQ)
s_lock_init((struct simplelock*)&apic_itrace_debuglock);
#endif
- s_lock_init((struct simplelock*)&mpintr_lock);
-
s_lock_init((struct simplelock*)&mcount_lock);
s_lock_init((struct simplelock*)&fast_intr_lock);
@@ -1912,6 +1907,7 @@ init_locks(void)
s_lock_init((struct simplelock*)&imen_lock);
s_lock_init((struct simplelock*)&cpl_lock);
s_lock_init(&smp_rv_lock);
+ s_lock_init(&panic_lock);
#ifdef USE_COMLOCK
s_lock_init((struct simplelock*)&com_lock);
@@ -1919,11 +1915,9 @@ init_locks(void)
#ifdef USE_CLOCKLOCK
s_lock_init((struct simplelock*)&clock_lock);
#endif /* USE_CLOCKLOCK */
-}
-
-/* Wait for all APs to be fully initialized */
-extern int wait_ap(unsigned int);
+ s_lock_init(&ap_boot_lock);
+}
/*
* start each AP in our list
@@ -1987,6 +1981,7 @@ start_all_aps(u_int boot_addr)
SMPpt[pg + 4] = 0; /* *prv_PMAP1 */
/* prime data page for it to use */
+ SLIST_INSERT_HEAD(&cpuhead, gd, gd_allcpu);
gd->gd_cpuid = x;
gd->gd_cpu_lockid = x << 24;
gd->gd_prv_CMAP1 = &SMPpt[pg + 1];
@@ -2211,7 +2206,6 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
-
/*
* Flush the TLB on all other CPU's
*
@@ -2348,10 +2342,13 @@ SYSCTL_INT(_machdep, OID_AUTO, forward_roundrobin_enabled, CTLFLAG_RW,
void ap_init(void);
void
-ap_init()
+ap_init(void)
{
u_int apic_id;
+ /* lock against other AP's that are waking up */
+ s_lock(&ap_boot_lock);
+
/* BSP may have changed PTD while we're waiting for the lock */
cpu_invltlb();
@@ -2397,6 +2394,30 @@ ap_init()
smp_started = 1; /* enable IPI's, tlb shootdown, freezes etc */
smp_active = 1; /* historic */
}
+
+ /* let other AP's wake up now */
+ s_unlock(&ap_boot_lock);
+
+ /* wait until all the AP's are up */
+ while (smp_started == 0)
+ ; /* nothing */
+
+ /*
+ * Set curproc to our per-cpu idleproc so that mutexes have
+ * something unique to lock with.
+ */
+ PCPU_SET(curproc,idleproc);
+ PCPU_SET(prevproc,idleproc);
+
+ microuptime(&switchtime);
+ switchticks = ticks;
+
+ /* ok, now grab sched_lock and enter the scheduler */
+ enable_intr();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ cpu_throw(); /* doesn't return */
+
+ panic("scheduler returned us to ap_init");
}
#ifdef BETTER_CLOCK
@@ -2453,6 +2474,12 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p = checkstate_curproc[id];
cpustate = checkstate_cpustate[id];
+ /* XXX */
+ if (p->p_ithd)
+ cpustate = CHECKSTATE_INTR;
+ else if (p == idleproc)
+ cpustate = CHECKSTATE_SYS;
+
switch (cpustate) {
case CHECKSTATE_USER:
if (p->p_flag & P_PROFIL)
@@ -2482,9 +2509,10 @@ forwarded_statclock(int id, int pscnt, int *astmap)
if (pscnt > 1)
return;
- if (!p)
+ if (p == idleproc) {
+ p->p_sticks++;
cp_time[CP_IDLE]++;
- else {
+ } else {
p->p_sticks++;
cp_time[CP_SYS]++;
}
@@ -2510,7 +2538,7 @@ forwarded_statclock(int id, int pscnt, int *astmap)
p->p_iticks++;
cp_time[CP_INTR]++;
}
- if (p != NULL) {
+ if (p != idleproc) {
schedclock(p);
/* Update resource usage integrals and maximums. */
@@ -2863,3 +2891,11 @@ smp_rendezvous(void (* setup_func)(void *),
/* release lock */
s_unlock(&smp_rv_lock);
}
+
+void
+release_aps(void *dummy __unused)
+{
+ s_unlock(&ap_boot_lock);
+}
+
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 51de1ac..f32dfae 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -49,10 +49,12 @@
#include "opt_trap.h"
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscall.h>
@@ -76,12 +78,14 @@
#include <machine/cpu.h>
#include <machine/ipl.h>
#include <machine/md_var.h>
+#include <machine/mutex.h>
#include <machine/pcb.h>
#ifdef SMP
#include <machine/smp.h>
#endif
#include <machine/tss.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#ifdef POWERFAIL_NMI
@@ -96,11 +100,14 @@
#include "isa.h"
#include "npx.h"
+#include <sys/sysctl.h>
+
int (*pmath_emulate) __P((struct trapframe *));
extern void trap __P((struct trapframe frame));
extern int trapwrite __P((unsigned addr));
extern void syscall2 __P((struct trapframe frame));
+extern void ast __P((struct trapframe frame));
static int trap_pfault __P((struct trapframe *, int, vm_offset_t));
static void trap_fatal __P((struct trapframe *, vm_offset_t));
@@ -142,7 +149,7 @@ static char *trap_msg[] = {
};
static __inline int userret __P((struct proc *p, struct trapframe *frame,
- u_quad_t oticks, int have_mplock));
+ u_quad_t oticks, int have_giant));
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
extern int has_f00f_bug;
@@ -158,18 +165,18 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW,
&panic_on_nmi, 0, "Panic on NMI");
static __inline int
-userret(p, frame, oticks, have_mplock)
+userret(p, frame, oticks, have_giant)
struct proc *p;
struct trapframe *frame;
u_quad_t oticks;
- int have_mplock;
+ int have_giant;
{
int sig, s;
while ((sig = CURSIG(p)) != 0) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
postsig(sig);
}
@@ -184,31 +191,34 @@ userret(p, frame, oticks, have_mplock)
* mi_switch()'ed, we might not be on the queue indicated by
* our priority.
*/
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
- }
s = splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
+ mtx_exit(&sched_lock, MTX_SPIN);
splx(s);
- while ((sig = CURSIG(p)) != 0)
+ while ((sig = CURSIG(p)) != 0) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
postsig(sig);
+ }
}
/*
* Charge system time if profiling.
*/
if (p->p_flag & P_PROFIL) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
addupc_task(p, frame->tf_eip,
(u_int)(p->p_sticks - oticks) * psratio);
}
curpriority = p->p_priority;
- return(have_mplock);
+ return(have_giant);
}
/*
@@ -226,13 +236,20 @@ trap(frame)
u_quad_t sticks = 0;
int i = 0, ucode = 0, type, code;
vm_offset_t eva;
+#ifdef POWERFAIL_NMI
+ static int lastalert = 0;
+#endif
- if (!(frame.tf_eflags & PSL_I)) {
+ atomic_add_int(&cnt.v_trap, 1);
+
+ if ((frame.tf_eflags & PSL_I) == 0) {
/*
- * Buggy application or kernel code has disabled interrupts
- * and then trapped. Enabling interrupts now is wrong, but
- * it is better than running with interrupts disabled until
- * they are accidentally enabled later.
+ * Buggy application or kernel code has disabled
+ * interrupts and then trapped. Enabling interrupts
+ * now is wrong, but it is better than running with
+ * interrupts disabled until they are accidentally
+ * enabled later. XXX Consider whether is this still
+ * correct.
*/
type = frame.tf_trapno;
if (ISPL(frame.tf_cs) == SEL_UPL || (frame.tf_eflags & PSL_VM))
@@ -252,54 +269,27 @@ trap(frame)
eva = 0;
if (frame.tf_trapno == T_PAGEFLT) {
/*
- * For some Cyrix CPUs, %cr2 is clobbered by interrupts.
- * This problem is worked around by using an interrupt
- * gate for the pagefault handler. We are finally ready
- * to read %cr2 and then must reenable interrupts.
- *
- * XXX this should be in the switch statement, but the
- * NO_FOOF_HACK and VM86 goto and ifdefs obfuscate the
- * flow of control too much for this to be obviously
- * correct.
+ * For some Cyrix CPUs, %cr2 is clobbered by
+ * interrupts. This problem is worked around by using
+ * an interrupt gate for the pagefault handler. We
+ * are finally ready to read %cr2 and then must
+ * reenable interrupts.
*/
eva = rcr2();
enable_intr();
- }
+ }
+
+ mtx_enter(&Giant, MTX_DEF);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
restart:
#endif
+
type = frame.tf_trapno;
code = frame.tf_err;
- if (in_vm86call) {
- if (frame.tf_eflags & PSL_VM &&
- (type == T_PROTFLT || type == T_STKFLT)) {
- i = vm86_emulate((struct vm86frame *)&frame);
- if (i != 0)
- /*
- * returns to original process
- */
- vm86_trap((struct vm86frame *)&frame);
- return;
- }
- switch (type) {
- /*
- * these traps want either a process context, or
- * assume a normal userspace trap.
- */
- case T_PROTFLT:
- case T_SEGNPFLT:
- trap_fatal(&frame, eva);
- return;
- case T_TRCTRAP:
- type = T_BPTFLT; /* kernel breakpoint */
- /* FALL THROUGH */
- }
- goto kernel_trap; /* normal kernel trap handling */
- }
-
- if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
+ if ((ISPL(frame.tf_cs) == SEL_UPL) ||
+ ((frame.tf_eflags & PSL_VM) && !in_vm86call)) {
/* user trap */
sticks = p->p_sticks;
@@ -322,16 +312,6 @@ restart:
i = SIGFPE;
break;
- case T_ASTFLT: /* Allow process switch */
- astoff();
- cnt.v_soft++;
- if (p->p_flag & P_OWEUPC) {
- p->p_flag &= ~P_OWEUPC;
- addupc_task(p, p->p_stats->p_prof.pr_addr,
- p->p_stats->p_prof.pr_ticks);
- }
- goto out;
-
/*
* The following two traps can happen in
* vm86 mode, and, if so, we want to handle
@@ -342,7 +322,7 @@ restart:
if (frame.tf_eflags & PSL_VM) {
i = vm86_emulate((struct vm86frame *)&frame);
if (i == 0)
- goto out;
+ goto user;
break;
}
/* FALL THROUGH */
@@ -357,14 +337,20 @@ restart:
case T_PAGEFLT: /* page fault */
i = trap_pfault(&frame, TRUE, eva);
- if (i == -1)
- return;
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if (i == -2)
+ if (i == -2) {
+ /*
+ * f00f hack workaround has triggered, treat
+ * as illegal instruction not page fault.
+ */
+ frame.tf_trapno = T_PRIVINFLT;
goto restart;
+ }
#endif
- if (i == 0)
+ if (i == -1)
goto out;
+ if (i == 0)
+ goto user;
ucode = T_PAGEFLT;
break;
@@ -377,7 +363,15 @@ restart:
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
- goto handle_powerfail;
+#ifndef TIMER_FREQ
+# define TIMER_FREQ 1193182
+#endif
+ if (time_second - lastalert > 10) {
+ log(LOG_WARNING, "NMI: power fail\n");
+ sysbeep(TIMER_FREQ/880, hz);
+ lastalert = time_second;
+ }
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -391,7 +385,7 @@ restart:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi)
panic("NMI indicates hardware failure");
break;
@@ -410,9 +404,9 @@ restart:
case T_DNA:
#if NNPX > 0
- /* if a transparent fault (due to context switch "late") */
+ /* transparent fault (due to context switch "late") */
if (npxdna())
- return;
+ goto out;
#endif
if (!pmath_emulate) {
i = SIGFPE;
@@ -422,7 +416,7 @@ restart:
i = (*pmath_emulate)(&frame);
if (i == 0) {
if (!(frame.tf_eflags & PSL_T))
- return;
+ goto out;
frame.tf_eflags &= ~PSL_T;
i = SIGTRAP;
}
@@ -435,13 +429,12 @@ restart:
break;
}
} else {
-kernel_trap:
/* kernel trap */
switch (type) {
case T_PAGEFLT: /* page fault */
(void) trap_pfault(&frame, FALSE, eva);
- return;
+ goto out;
case T_DNA:
#if NNPX > 0
@@ -451,31 +444,35 @@ kernel_trap:
* registered such use.
*/
if (npxdna())
- return;
+ goto out;
#endif
break;
- case T_PROTFLT: /* general protection fault */
- case T_SEGNPFLT: /* segment not present fault */
/*
- * Invalid segment selectors and out of bounds
- * %eip's and %esp's can be set up in user mode.
- * This causes a fault in kernel mode when the
- * kernel tries to return to user mode. We want
- * to get this fault so that we can fix the
- * problem here and not have to check all the
- * selectors and pointers when the user changes
- * them.
+ * The following two traps can happen in
+ * vm86 mode, and, if so, we want to handle
+ * them specially.
*/
-#define MAYBE_DORETI_FAULT(where, whereto) \
- do { \
- if (frame.tf_eip == (int)where) { \
- frame.tf_eip = (int)whereto; \
- return; \
- } \
- } while (0)
-
- if (intr_nesting_level == 0) {
+ case T_PROTFLT: /* general protection fault */
+ case T_STKFLT: /* stack fault */
+ if (frame.tf_eflags & PSL_VM) {
+ i = vm86_emulate((struct vm86frame *)&frame);
+ if (i != 0)
+ /*
+ * returns to original process
+ */
+ vm86_trap((struct vm86frame *)&frame);
+ goto out;
+ }
+ /* FALL THROUGH */
+
+ case T_SEGNPFLT: /* segment not present fault */
+ if (in_vm86call)
+ break;
+
+ if (intr_nesting_level != 0)
+ break;
+
/*
* Invalid %fs's and %gs's can be created using
* procfs or PT_SETREGS or by invalidating the
@@ -488,20 +485,38 @@ kernel_trap:
if (frame.tf_eip == (int)cpu_switch_load_gs) {
curpcb->pcb_gs = 0;
psignal(p, SIGBUS);
- return;
+ goto out;
+ }
+
+ /*
+ * Invalid segment selectors and out of bounds
+ * %eip's and %esp's can be set up in user mode.
+ * This causes a fault in kernel mode when the
+ * kernel tries to return to user mode. We want
+ * to get this fault so that we can fix the
+ * problem here and not have to check all the
+ * selectors and pointers when the user changes
+ * them.
+ */
+ if (frame.tf_eip == (int)doreti_iret) {
+ frame.tf_eip = (int)doreti_iret_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_ds) {
+ frame.tf_eip = (int)doreti_popl_ds_fault;
+ goto out;
+ }
+ if (frame.tf_eip == (int)doreti_popl_es) {
+ frame.tf_eip = (int)doreti_popl_es_fault;
+ goto out;
}
- MAYBE_DORETI_FAULT(doreti_iret,
- doreti_iret_fault);
- MAYBE_DORETI_FAULT(doreti_popl_ds,
- doreti_popl_ds_fault);
- MAYBE_DORETI_FAULT(doreti_popl_es,
- doreti_popl_es_fault);
- MAYBE_DORETI_FAULT(doreti_popl_fs,
- doreti_popl_fs_fault);
+ if (frame.tf_eip == (int)doreti_popl_fs) {
+ frame.tf_eip = (int)doreti_popl_fs_fault;
+ goto out;
+ }
if (curpcb && curpcb->pcb_onfault) {
frame.tf_eip = (int)curpcb->pcb_onfault;
- return;
- }
+ goto out;
}
break;
@@ -517,7 +532,7 @@ kernel_trap:
*/
if (frame.tf_eflags & PSL_NT) {
frame.tf_eflags &= ~PSL_NT;
- return;
+ goto out;
}
break;
@@ -529,7 +544,7 @@ kernel_trap:
* silently until the syscall handler has
* saved the flags.
*/
- return;
+ goto out;
}
if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
/*
@@ -537,7 +552,7 @@ kernel_trap:
* flags. Stop single stepping it.
*/
frame.tf_eflags &= ~PSL_T;
- return;
+ goto out;
}
/*
* Ignore debug register trace traps due to
@@ -549,13 +564,13 @@ kernel_trap:
* in kernel space because that is useful when
* debugging the kernel.
*/
- if (user_dbreg_trap()) {
+ if (user_dbreg_trap() && !in_vm86call) {
/*
* Reset breakpoint bits because the
* processor doesn't
*/
load_dr6(rdr6() & 0xfffffff0);
- return;
+ goto out;
}
/*
* Fall through (TRCTRAP kernel mode, kernel address)
@@ -567,28 +582,19 @@ kernel_trap:
*/
#ifdef DDB
if (kdb_trap (type, 0, &frame))
- return;
+ goto out;
#endif
break;
#if NISA > 0
case T_NMI:
#ifdef POWERFAIL_NMI
-#ifndef TIMER_FREQ
-# define TIMER_FREQ 1193182
-#endif
- handle_powerfail:
- {
- static unsigned lastalert = 0;
-
- if(time_second - lastalert > 10)
- {
+ if (time_second - lastalert > 10) {
log(LOG_WARNING, "NMI: power fail\n");
sysbeep(TIMER_FREQ/880, hz);
lastalert = time_second;
- }
- return;
}
+ goto out;
#else /* !POWERFAIL_NMI */
/* machine/parity/power fail/"kitchen sink" faults */
if (isa_nmi(code) == 0) {
@@ -602,16 +608,16 @@ kernel_trap:
kdb_trap (type, 0, &frame);
}
#endif /* DDB */
- return;
+ goto out;
} else if (panic_on_nmi == 0)
- return;
+ goto out;
/* FALL THROUGH */
#endif /* POWERFAIL_NMI */
#endif /* NISA > 0 */
}
trap_fatal(&frame, eva);
- return;
+ goto out;
}
/* Translate fault for emulators (e.g. Linux) */
@@ -630,8 +636,10 @@ kernel_trap:
}
#endif
-out:
+user:
userret(p, &frame, sticks, 1);
+out:
+ mtx_exit(&Giant, MTX_DEF);
}
#ifdef notyet
@@ -769,10 +777,8 @@ trap_pfault(frame, usermode, eva)
* fault.
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) {
- frame->tf_trapno = T_PRIVINFLT;
+ if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
return -2;
- }
#endif
if (usermode)
goto nogo;
@@ -869,8 +875,7 @@ trap_fatal(frame, eva)
frame->tf_eflags & PSL_VM ? "vm86" :
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -917,26 +922,6 @@ trap_fatal(frame, eva)
} else {
printf("Idle\n");
}
- printf("interrupt mask = ");
- if ((cpl & net_imask) == net_imask)
- printf("net ");
- if ((cpl & tty_imask) == tty_imask)
- printf("tty ");
- if ((cpl & bio_imask) == bio_imask)
- printf("bio ");
- if ((cpl & cam_imask) == cam_imask)
- printf("cam ");
- if (cpl == 0)
- printf("none");
-#ifdef SMP
-/**
- * XXX FIXME:
- * we probably SHOULD have stopped the other CPUs before now!
- * another CPU COULD have been touching cpl at this moment...
- */
- printf(" <- SMP: XXX");
-#endif
- printf("\n");
#ifdef KDB
if (kdb_trap(&psl))
@@ -973,8 +958,7 @@ dblfault_handler()
printf("esp = 0x%x\n", common_tss.tss_esp);
printf("ebp = 0x%x\n", common_tss.tss_ebp);
#ifdef SMP
- /* three seperate prints in case of a trap on an unmapped page */
- printf("mp_lock = %08x; ", mp_lock);
+ /* two seperate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", cpuid);
printf("lapic.id = %08x\n", lapic.id);
#endif
@@ -1048,12 +1032,14 @@ syscall2(frame)
int error;
int narg;
int args[8];
- int have_mplock = 0;
+ int have_giant = 0;
u_int code;
+ atomic_add_int(&cnt.v_syscall, 1);
+
#ifdef DIAGNOSTIC
if (ISPL(frame.tf_cs) != SEL_UPL) {
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
panic("syscall");
/* NOT REACHED */
}
@@ -1075,9 +1061,9 @@ syscall2(frame)
/*
* The prep code is not MP aware.
*/
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
- rel_mplock();
+ mtx_exit(&Giant, MTX_DEF);
} else {
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@@ -1114,8 +1100,8 @@ syscall2(frame)
*/
if (params && (i = narg * sizeof(int)) &&
(error = copyin(params, (caddr_t)args, (u_int)i))) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL))
ktrsyscall(p->p_tracep, code, narg, args);
@@ -1129,15 +1115,15 @@ syscall2(frame)
* we are ktracing
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
- get_mplock();
- have_mplock = 1;
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSCALL)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsyscall(p->p_tracep, code, narg, args);
}
@@ -1192,9 +1178,9 @@ bad:
* Traced syscall. trapsignal() is not MP aware.
*/
if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
frame.tf_eflags &= ~PSL_T;
trapsignal(p, SIGTRAP, 0);
@@ -1203,13 +1189,13 @@ bad:
/*
* Handle reschedule and other end-of-syscall issues
*/
- have_mplock = userret(p, &frame, sticks, have_mplock);
+ have_giant = userret(p, &frame, sticks, have_giant);
#ifdef KTRACE
if (KTRPOINT(p, KTR_SYSRET)) {
- if (have_mplock == 0) {
- get_mplock();
- have_mplock = 1;
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
}
ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
}
@@ -1225,27 +1211,66 @@ bad:
/*
* Release the MP lock if we had to get it
*/
- if (have_mplock)
- rel_mplock();
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
+
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
+
+void
+ast(frame)
+ struct trapframe frame;
+{
+ struct proc *p = CURPROC;
+ u_quad_t sticks;
+
+ /*
+ * handle atomicy by looping since interrupts are enabled and the
+ * MP lock is not held.
+ */
+ sticks = ((volatile struct proc *)p)->p_sticks;
+ while (sticks != ((volatile struct proc *)p)->p_sticks)
+ sticks = ((volatile struct proc *)p)->p_sticks;
+
+ astoff();
+ atomic_add_int(&cnt.v_soft, 1);
+ if (p->p_flag & P_OWEUPC) {
+ mtx_enter(&Giant, MTX_DEF);
+ p->p_flag &= ~P_OWEUPC;
+ addupc_task(p, p->p_stats->p_prof.pr_addr,
+ p->p_stats->p_prof.pr_ticks);
+}
+ if (userret(p, &frame, sticks, mtx_owned(&Giant)) != 0)
+ mtx_exit(&Giant, MTX_DEF);
}
/*
* Simplified back end of syscall(), used when returning from fork()
- * directly into user mode. MP lock is held on entry and should be
- * held on return.
+ * directly into user mode. Giant is not held on entry, and must not
+ * be held on return.
*/
void
fork_return(p, frame)
struct proc *p;
struct trapframe frame;
{
+ int have_giant;
+
frame.tf_eax = 0; /* Child returns zero */
frame.tf_eflags &= ~PSL_C; /* success */
frame.tf_edx = 1;
- userret(p, &frame, 0, 1);
+ have_giant = userret(p, &frame, 0, mtx_owned(&Giant));
#ifdef KTRACE
- if (KTRPOINT(p, KTR_SYSRET))
+ if (KTRPOINT(p, KTR_SYSRET)) {
+ if (have_giant == 0) {
+ mtx_enter(&Giant, MTX_DEF);
+ have_giant = 1;
+ }
ktrsysret(p->p_tracep, SYS_fork, 0, 0);
+ }
#endif
+ if (have_giant)
+ mtx_exit(&Giant, MTX_DEF);
}
diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c
new file mode 100644
index 0000000..1ac3f58
--- /dev/null
+++ b/sys/kern/subr_turnstile.c
@@ -0,0 +1,799 @@
+/*-
+ * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
+ * $FreeBSD$
+ */
+
+/*
+ * Main Entry: witness
+ * Pronunciation: 'wit-n&s
+ * Function: noun
+ * Etymology: Middle English witnesse, from Old English witnes knowledge,
+ * testimony, witness, from 2wit
+ * Date: before 12th century
+ * 1 : attestation of a fact or event : TESTIMONY
+ * 2 : one that gives evidence; specifically : one who testifies in
+ * a cause or before a judicial tribunal
+ * 3 : one asked to be present at a transaction so as to be able to
+ * testify to its having taken place
+ * 4 : one who has personal knowledge of something
+ * 5 a : something serving as evidence or proof : SIGN
+ * b : public affirmation by word or example of usually
+ * religious faith or conviction <the heroic witness to divine
+ * life -- Pilot>
+ * 6 capitalized : a member of the Jehovah's Witnesses
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/ktr.h>
+
+#include <machine/cpu.h>
+#define _KERN_MUTEX_C_ /* Cause non-inlined mtx_*() to be compiled. */
+#include <machine/mutex.h>
+
+/*
+ * The non-inlined versions of the mtx_*() functions are always built (above),
+ * but the witness code depends on the SMP_DEBUG and WITNESS kernel options
+ * being specified.
+ */
+#if (defined(SMP_DEBUG) && defined(WITNESS))
+
+#define WITNESS_COUNT 200
+#define WITNESS_NCHILDREN 2
+
+#ifndef WITNESS
+#define WITNESS 0 /* default off */
+#endif
+
+#ifndef SMP
+extern int witness_spin_check;
+#endif
+
+int witness_watch;
+
+typedef struct witness {
+ struct witness *w_next;
+ char *w_description;
+ char *w_file;
+ int w_line;
+ struct witness *w_morechildren;
+ u_char w_childcnt;
+ u_char w_Giant_squawked:1;
+ u_char w_other_squawked:1;
+ u_char w_same_squawked:1;
+ u_char w_sleep:1;
+ u_char w_spin:1; /* this is a spin mutex */
+ u_int w_level;
+ struct witness *w_children[WITNESS_NCHILDREN];
+} witness_t;
+
+typedef struct witness_blessed {
+ char *b_lock1;
+ char *b_lock2;
+} witness_blessed_t;
+
+#ifdef KDEBUG
+/*
+ * When WITNESS_KDEBUG is set to 1, it will cause the system to
+ * drop into kdebug() when:
+ * - a lock heirarchy violation occurs
+ * - locks are held when going to sleep.
+ */
+#ifndef WITNESS_KDEBUG
+#define WITNESS_KDEBUG 0
+#endif
+int witness_kdebug = WITNESS_KDEBUG;
+#endif /* KDEBUG */
+
+#ifndef WITNESS_SKIPSPIN
+#define WITNESS_SKIPSPIN 0
+#endif
+int witness_skipspin = WITNESS_SKIPSPIN;
+
+
+static mtx_t w_mtx;
+static witness_t *w_free;
+static witness_t *w_all;
+static int w_inited;
+static int witness_dead; /* fatal error, probably no memory */
+
+static witness_t w_data[WITNESS_COUNT];
+
+static witness_t *enroll __P((char *description, int flag));
+static int itismychild __P((witness_t *parent, witness_t *child));
+static void removechild __P((witness_t *parent, witness_t *child));
+static int isitmychild __P((witness_t *parent, witness_t *child));
+static int isitmydescendant __P((witness_t *parent, witness_t *child));
+static int dup_ok __P((witness_t *));
+static int blessed __P((witness_t *, witness_t *));
+static void witness_displaydescendants
+ __P((void(*)(const char *fmt, ...), witness_t *));
+static void witness_leveldescendents __P((witness_t *parent, int level));
+static void witness_levelall __P((void));
+static witness_t * witness_get __P((void));
+static void witness_free __P((witness_t *m));
+
+
+static char *ignore_list[] = {
+ "witness lock",
+ "Kdebug", /* breaks rules and may or may not work */
+ "Page Alias", /* sparc only, witness lock won't block intr */
+ NULL
+};
+
+static char *spin_order_list[] = {
+ "sched lock",
+ "log mtx",
+ "zslock", /* sparc only above log, this one is a real hack */
+ "time lock", /* above callout */
+ "callout mtx", /* above wayout */
+ /*
+ * leaf locks
+ */
+ "wayout mtx",
+ "kernel_pmap", /* sparc only, logically equal "pmap" below */
+ "pmap", /* sparc only */
+ NULL
+};
+
+static char *order_list[] = {
+ "tcb", "inp", "so_snd", "so_rcv", "Giant lock", NULL,
+ "udb", "inp", NULL,
+ "unp head", "unp", "so_snd", NULL,
+ "de0", "Giant lock", NULL,
+ "ifnet", "Giant lock", NULL,
+ "fifo", "so_snd", NULL,
+ "hme0", "Giant lock", NULL,
+ "esp0", "Giant lock", NULL,
+ "hfa0", "Giant lock", NULL,
+ "so_rcv", "atm_global", NULL,
+ "so_snd", "atm_global", NULL,
+ "NFS", "Giant lock", NULL,
+ NULL
+};
+
+static char *dup_list[] = {
+ "inp",
+ "process group",
+ "session",
+ "unp",
+ "rtentry",
+ "rawcb",
+ NULL
+};
+
+static char *sleep_list[] = {
+ "Giant lock",
+ NULL
+};
+
+/*
+ * Pairs of locks which have been blessed
+ * Don't complain about order problems with blessed locks
+ */
+static witness_blessed_t blessed_list[] = {
+};
+static int blessed_count = sizeof (blessed_list) / sizeof (witness_blessed_t);
+
+void
+witness_init(mtx_t *m, int flag)
+{
+ m->mtx_witness = enroll(m->mtx_description, flag);
+}
+
+void
+witness_destroy(mtx_t *m)
+{
+ mtx_t *m1;
+ struct proc *p;
+ p = CURPROC;
+ for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
+ m1 = LIST_NEXT(m1, mtx_held)) {
+ if (m1 == m) {
+ LIST_REMOVE(m, mtx_held);
+ break;
+ }
+ }
+ return;
+
+}
+
+void
+witness_enter(mtx_t *m, int flags, char *file, int line)
+{
+ witness_t *w, *w1;
+ mtx_t *m1;
+ struct proc *p;
+ int i;
+#ifdef KDEBUG
+ int go_into_kdebug = 0;
+#endif /* KDEBUG */
+
+ w = m->mtx_witness;
+ p = CURPROC;
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ i = witness_spin_check;
+ if (i != 0 && w->w_level < i) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ panic("mutex_enter(%s:%x, MTX_SPIN) out of order @ %s:%d"
+ " already holding %s:%x",
+ m->mtx_description, w->w_level, file, line,
+ spin_order_list[ffs(i)-1], i);
+ }
+ PCPU_SET(witness_spin_check, i | w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+ if (w->w_spin)
+ panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+ if (witness_dead)
+ goto out;
+ if (cold)
+ goto out;
+
+ if (!mtx_legal2block())
+ panic("blockable mtx_enter() of %s when not legal @ %s:%d",
+ m->mtx_description, file, line);
+ /*
+ * Is this the first mutex acquired
+ */
+ if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
+ goto out;
+
+
+ if ((w1 = m1->mtx_witness) == w) {
+ if (w->w_same_squawked || dup_ok(w))
+ goto out;
+ w->w_same_squawked = 1;
+ printf("acquring duplicate lock of same type: \"%s\"\n",
+ m->mtx_description);
+ printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
+ printf(" 2nd @ %s:%d\n", file, line);
+#ifdef KDEBUG
+ go_into_kdebug = 1;
+#endif /* KDEBUG */
+ goto out;
+ }
+ MPASS(!mtx_owned(&w_mtx));
+ mtx_enter(&w_mtx, MTX_SPIN);
+ /*
+ * If we have a known higher number just say ok
+ */
+ if (witness_watch > 1 && w->w_level > w1->w_level) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ goto out;
+ }
+ if (isitmydescendant(m1->mtx_witness, w)) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ goto out;
+ }
+ for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
+
+ ASS(i < 200);
+ w1 = m1->mtx_witness;
+ if (isitmydescendant(w, w1)) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ if (blessed(w, w1))
+ goto out;
+ if (m1 == &Giant) {
+ if (w1->w_Giant_squawked)
+ goto out;
+ else
+ w1->w_Giant_squawked = 1;
+ } else {
+ if (w1->w_other_squawked)
+ goto out;
+ else
+ w1->w_other_squawked = 1;
+ }
+ printf("lock order reversal\n");
+ printf(" 1st %s last acquired @ %s:%d\n",
+ w->w_description, w->w_file, w->w_line);
+ printf(" 2nd %p %s @ %s:%d\n",
+ m1, w1->w_description, w1->w_file, w1->w_line);
+ printf(" 3rd %p %s @ %s:%d\n",
+ m, w->w_description, file, line);
+#ifdef KDEBUG
+ go_into_kdebug = 1;
+#endif /* KDEBUG */
+ goto out;
+ }
+ }
+ m1 = LIST_FIRST(&p->p_heldmtx);
+ if (!itismychild(m1->mtx_witness, w))
+ mtx_exit(&w_mtx, MTX_SPIN);
+
+out:
+#ifdef KDEBUG
+ if (witness_kdebug && go_into_kdebug)
+ kdebug();
+#endif /* KDEBUG */
+ w->w_file = file;
+ w->w_line = line;
+ m->mtx_line = line;
+ m->mtx_file = file;
+
+ /*
+ * If this pays off it likely means that a mutex being witnessed
+ * is acquired in hardclock. Put it in the ignore list. It is
+ * likely not the mutex this assert fails on.
+ */
+ ASS(m->mtx_held.le_prev == NULL);
+ LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
+}
+
+void
+witness_exit(mtx_t *m, int flags, char *file, int line)
+{
+ witness_t *w;
+
+ w = m->mtx_witness;
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+ if (w->w_spin)
+ panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+
+ if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
+ panic("switchable mtx_exit() of %s when not legal @ %s:%d",
+ m->mtx_description, file, line);
+ LIST_REMOVE(m, mtx_held);
+ m->mtx_held.le_prev = NULL;
+}
+
+void
+witness_try_enter(mtx_t *m, int flags, char *file, int line)
+{
+ struct proc *p;
+ witness_t *w = m->mtx_witness;
+
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_try_enter: "
+ "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+
+ if (w->w_spin)
+ panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+
+ w->w_file = file;
+ w->w_line = line;
+ m->mtx_line = line;
+ m->mtx_file = file;
+ p = CURPROC;
+ ASS(m->mtx_held.le_prev == NULL);
+ LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
+}
+
+void
+witness_display(void(*prnt)(const char *fmt, ...))
+{
+ witness_t *w, *w1;
+
+ witness_levelall();
+
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_file == NULL)
+ continue;
+ for (w1 = w_all; w1; w1 = w1->w_next) {
+ if (isitmychild(w1, w))
+ break;
+ }
+ if (w1 != NULL)
+ continue;
+ /*
+ * This lock has no anscestors, display its descendants.
+ */
+ witness_displaydescendants(prnt, w);
+ }
+ prnt("\nMutex which were never acquired\n");
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_file != NULL)
+ continue;
+ prnt("%s\n", w->w_description);
+ }
+}
+
+int
+witness_sleep(int check_only, mtx_t *mtx, char *file, int line)
+{
+ mtx_t *m;
+ struct proc *p;
+ char **sleep;
+ int n = 0;
+
+ p = CURPROC;
+ for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
+ m = LIST_NEXT(m, mtx_held)) {
+ if (m == mtx)
+ continue;
+ for (sleep = sleep_list; *sleep!= NULL; sleep++)
+ if (strcmp(m->mtx_description, *sleep) == 0)
+ goto next;
+ printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
+ file, line, check_only ? "could sleep" : "sleeping",
+ m->mtx_description,
+ m->mtx_witness->w_file, m->mtx_witness->w_line);
+ n++;
+ next:
+ }
+#ifdef KDEBUG
+ if (witness_kdebug && n)
+ kdebug();
+#endif /* KDEBUG */
+ return (n);
+}
+
+static witness_t *
+enroll(char *description, int flag)
+{
+ int i;
+ witness_t *w, *w1;
+ char **ignore;
+ char **order;
+
+ if (!witness_watch)
+ return (NULL);
+ for (ignore = ignore_list; *ignore != NULL; ignore++)
+ if (strcmp(description, *ignore) == 0)
+ return (NULL);
+
+ if (w_inited == 0) {
+ mtx_init(&w_mtx, "witness lock", MTX_DEF);
+ for (i = 0; i < WITNESS_COUNT; i++) {
+ w = &w_data[i];
+ witness_free(w);
+ }
+ w_inited = 1;
+ for (order = order_list; *order != NULL; order++) {
+ w = enroll(*order, MTX_DEF);
+ w->w_file = "order list";
+ for (order++; *order != NULL; order++) {
+ w1 = enroll(*order, MTX_DEF);
+ w1->w_file = "order list";
+ itismychild(w, w1);
+ w = w1;
+ }
+ }
+ }
+ if ((flag & MTX_SPIN) && witness_skipspin)
+ return (NULL);
+ mtx_enter(&w_mtx, MTX_SPIN);
+ for (w = w_all; w; w = w->w_next) {
+ if (strcmp(description, w->w_description) == 0) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return (w);
+ }
+ }
+ if ((w = witness_get()) == NULL)
+ return (NULL);
+ w->w_next = w_all;
+ w_all = w;
+ w->w_description = description;
+ mtx_exit(&w_mtx, MTX_SPIN);
+ if (flag & MTX_SPIN) {
+ w->w_spin = 1;
+
+ i = 1;
+ for (order = spin_order_list; *order != NULL; order++) {
+ if (strcmp(description, *order) == 0)
+ break;
+ i <<= 1;
+ }
+ if (*order == NULL)
+ panic("spin lock %s not in order list", description);
+ w->w_level = i;
+ }
+ return (w);
+}
+
+static int
+itismychild(witness_t *parent, witness_t *child)
+{
+ static int recursed;
+
+ /*
+ * Insert "child" after "parent"
+ */
+ while (parent->w_morechildren)
+ parent = parent->w_morechildren;
+
+ if (parent->w_childcnt == WITNESS_NCHILDREN) {
+ if ((parent->w_morechildren = witness_get()) == NULL)
+ return (1);
+ parent = parent->w_morechildren;
+ }
+ ASS(child != NULL);
+ parent->w_children[parent->w_childcnt++] = child;
+ /*
+ * now prune whole tree
+ */
+ if (recursed)
+ return (0);
+ recursed = 1;
+ for (child = w_all; child != NULL; child = child->w_next) {
+ for (parent = w_all; parent != NULL;
+ parent = parent->w_next) {
+ if (!isitmychild(parent, child))
+ continue;
+ removechild(parent, child);
+ if (isitmydescendant(parent, child))
+ continue;
+ itismychild(parent, child);
+ }
+ }
+ recursed = 0;
+ witness_levelall();
+ return (0);
+}
+
+static void
+removechild(witness_t *parent, witness_t *child)
+{
+ witness_t *w, *w1;
+ int i;
+
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ if (w->w_children[i] == child)
+ goto found;
+ return;
+found:
+ for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
+ continue;
+ w->w_children[i] = w1->w_children[--w1->w_childcnt];
+ ASS(w->w_children[i] != NULL);
+
+ if (w1->w_childcnt != 0)
+ return;
+
+ if (w1 == parent)
+ return;
+ for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
+ continue;
+ w->w_morechildren = 0;
+ witness_free(w1);
+}
+
+static int
+isitmychild(witness_t *parent, witness_t *child)
+{
+ witness_t *w;
+ int i;
+
+ for (w = parent; w != NULL; w = w->w_morechildren) {
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (w->w_children[i] == child)
+ return (1);
+ }
+ }
+ return (0);
+}
+
+static int
+isitmydescendant(witness_t *parent, witness_t *child)
+{
+ witness_t *w;
+ int i;
+ int j;
+
+ for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
+ ASS(j < 1000);
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (w->w_children[i] == child)
+ return (1);
+ }
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (isitmydescendant(w->w_children[i], child))
+ return (1);
+ }
+ }
+ return (0);
+}
+
+void
+witness_levelall (void)
+{
+ witness_t *w, *w1;
+
+ for (w = w_all; w; w = w->w_next)
+ if (!w->w_spin)
+ w->w_level = 0;
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_spin)
+ continue;
+ for (w1 = w_all; w1; w1 = w1->w_next) {
+ if (isitmychild(w1, w))
+ break;
+ }
+ if (w1 != NULL)
+ continue;
+ witness_leveldescendents(w, 0);
+ }
+}
+
+static void
+witness_leveldescendents(witness_t *parent, int level)
+{
+ int i;
+ witness_t *w;
+
+ if (parent->w_level < level)
+ parent->w_level = level;
+ level++;
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ witness_leveldescendents(w->w_children[i], level);
+}
+
+static void
+witness_displaydescendants(void(*prnt)(const char *fmt, ...), witness_t *parent)
+{
+ witness_t *w;
+ int i;
+ int level = parent->w_level;
+
+ prnt("%d", level);
+ if (level < 10)
+ prnt(" ");
+ for (i = 0; i < level; i++)
+ prnt(" ");
+ prnt("%s", parent->w_description);
+ if (parent->w_file != NULL) {
+ prnt(" -- last acquired @ %s", parent->w_file);
+#ifndef W_USE_WHERE
+ prnt(":%d", parent->w_line);
+#endif
+ prnt("\n");
+ }
+
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ witness_displaydescendants(prnt, w->w_children[i]);
+ }
+
+static int
+dup_ok(witness_t *w)
+{
+ char **dup;
+
+ for (dup = dup_list; *dup!= NULL; dup++)
+ if (strcmp(w->w_description, *dup) == 0)
+ return (1);
+ return (0);
+}
+
+static int
+blessed(witness_t *w1, witness_t *w2)
+{
+ int i;
+ witness_blessed_t *b;
+
+ for (i = 0; i < blessed_count; i++) {
+ b = &blessed_list[i];
+ if (strcmp(w1->w_description, b->b_lock1) == 0) {
+ if (strcmp(w2->w_description, b->b_lock2) == 0)
+ return (1);
+ continue;
+ }
+ if (strcmp(w1->w_description, b->b_lock2) == 0)
+ if (strcmp(w2->w_description, b->b_lock1) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+static witness_t *
+witness_get()
+{
+ witness_t *w;
+
+ if ((w = w_free) == NULL) {
+ witness_dead = 1;
+ mtx_exit(&w_mtx, MTX_SPIN);
+ printf("witness exhausted\n");
+ return (NULL);
+ }
+ w_free = w->w_next;
+ bzero(w, sizeof (*w));
+ return (w);
+}
+
+static void
+witness_free(witness_t *w)
+{
+ w->w_next = w_free;
+ w_free = w;
+}
+
+void
+witness_list(struct proc *p)
+{
+ mtx_t *m;
+
+ for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
+ m = LIST_NEXT(m, mtx_held)) {
+ printf("\t\"%s\" (%p) locked at %s:%d\n",
+ m->mtx_description, m,
+ m->mtx_witness->w_file, m->mtx_witness->w_line);
+ }
+}
+
+void
+witness_save(mtx_t *m, char **filep, int *linep)
+{
+ *filep = m->mtx_witness->w_file;
+ *linep = m->mtx_witness->w_line;
+}
+
+void
+witness_restore(mtx_t *m, char *file, int line)
+{
+ m->mtx_witness->w_file = file;
+ m->mtx_witness->w_line = line;
+}
+
+#endif /* (defined(SMP_DEBUG) && defined(WITNESS)) */
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
new file mode 100644
index 0000000..1ac3f58
--- /dev/null
+++ b/sys/kern/subr_witness.c
@@ -0,0 +1,799 @@
+/*-
+ * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
+ * $FreeBSD$
+ */
+
+/*
+ * Main Entry: witness
+ * Pronunciation: 'wit-n&s
+ * Function: noun
+ * Etymology: Middle English witnesse, from Old English witnes knowledge,
+ * testimony, witness, from 2wit
+ * Date: before 12th century
+ * 1 : attestation of a fact or event : TESTIMONY
+ * 2 : one that gives evidence; specifically : one who testifies in
+ * a cause or before a judicial tribunal
+ * 3 : one asked to be present at a transaction so as to be able to
+ * testify to its having taken place
+ * 4 : one who has personal knowledge of something
+ * 5 a : something serving as evidence or proof : SIGN
+ * b : public affirmation by word or example of usually
+ * religious faith or conviction <the heroic witness to divine
+ * life -- Pilot>
+ * 6 capitalized : a member of the Jehovah's Witnesses
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/ktr.h>
+
+#include <machine/cpu.h>
+#define _KERN_MUTEX_C_ /* Cause non-inlined mtx_*() to be compiled. */
+#include <machine/mutex.h>
+
+/*
+ * The non-inlined versions of the mtx_*() functions are always built (above),
+ * but the witness code depends on the SMP_DEBUG and WITNESS kernel options
+ * being specified.
+ */
+#if (defined(SMP_DEBUG) && defined(WITNESS))
+
+#define WITNESS_COUNT 200
+#define WITNESS_NCHILDREN 2
+
+#ifndef WITNESS
+#define WITNESS 0 /* default off */
+#endif
+
+#ifndef SMP
+extern int witness_spin_check;
+#endif
+
+int witness_watch;
+
+typedef struct witness {
+ struct witness *w_next;
+ char *w_description;
+ char *w_file;
+ int w_line;
+ struct witness *w_morechildren;
+ u_char w_childcnt;
+ u_char w_Giant_squawked:1;
+ u_char w_other_squawked:1;
+ u_char w_same_squawked:1;
+ u_char w_sleep:1;
+ u_char w_spin:1; /* this is a spin mutex */
+ u_int w_level;
+ struct witness *w_children[WITNESS_NCHILDREN];
+} witness_t;
+
+typedef struct witness_blessed {
+ char *b_lock1;
+ char *b_lock2;
+} witness_blessed_t;
+
+#ifdef KDEBUG
+/*
+ * When WITNESS_KDEBUG is set to 1, it will cause the system to
+ * drop into kdebug() when:
+ * - a lock heirarchy violation occurs
+ * - locks are held when going to sleep.
+ */
+#ifndef WITNESS_KDEBUG
+#define WITNESS_KDEBUG 0
+#endif
+int witness_kdebug = WITNESS_KDEBUG;
+#endif /* KDEBUG */
+
+#ifndef WITNESS_SKIPSPIN
+#define WITNESS_SKIPSPIN 0
+#endif
+int witness_skipspin = WITNESS_SKIPSPIN;
+
+
+static mtx_t w_mtx;
+static witness_t *w_free;
+static witness_t *w_all;
+static int w_inited;
+static int witness_dead; /* fatal error, probably no memory */
+
+static witness_t w_data[WITNESS_COUNT];
+
+static witness_t *enroll __P((char *description, int flag));
+static int itismychild __P((witness_t *parent, witness_t *child));
+static void removechild __P((witness_t *parent, witness_t *child));
+static int isitmychild __P((witness_t *parent, witness_t *child));
+static int isitmydescendant __P((witness_t *parent, witness_t *child));
+static int dup_ok __P((witness_t *));
+static int blessed __P((witness_t *, witness_t *));
+static void witness_displaydescendants
+ __P((void(*)(const char *fmt, ...), witness_t *));
+static void witness_leveldescendents __P((witness_t *parent, int level));
+static void witness_levelall __P((void));
+static witness_t * witness_get __P((void));
+static void witness_free __P((witness_t *m));
+
+
+static char *ignore_list[] = {
+ "witness lock",
+ "Kdebug", /* breaks rules and may or may not work */
+ "Page Alias", /* sparc only, witness lock won't block intr */
+ NULL
+};
+
+static char *spin_order_list[] = {
+ "sched lock",
+ "log mtx",
+ "zslock", /* sparc only above log, this one is a real hack */
+ "time lock", /* above callout */
+ "callout mtx", /* above wayout */
+ /*
+ * leaf locks
+ */
+ "wayout mtx",
+ "kernel_pmap", /* sparc only, logically equal "pmap" below */
+ "pmap", /* sparc only */
+ NULL
+};
+
+static char *order_list[] = {
+ "tcb", "inp", "so_snd", "so_rcv", "Giant lock", NULL,
+ "udb", "inp", NULL,
+ "unp head", "unp", "so_snd", NULL,
+ "de0", "Giant lock", NULL,
+ "ifnet", "Giant lock", NULL,
+ "fifo", "so_snd", NULL,
+ "hme0", "Giant lock", NULL,
+ "esp0", "Giant lock", NULL,
+ "hfa0", "Giant lock", NULL,
+ "so_rcv", "atm_global", NULL,
+ "so_snd", "atm_global", NULL,
+ "NFS", "Giant lock", NULL,
+ NULL
+};
+
+static char *dup_list[] = {
+ "inp",
+ "process group",
+ "session",
+ "unp",
+ "rtentry",
+ "rawcb",
+ NULL
+};
+
+static char *sleep_list[] = {
+ "Giant lock",
+ NULL
+};
+
+/*
+ * Pairs of locks which have been blessed
+ * Don't complain about order problems with blessed locks
+ */
+static witness_blessed_t blessed_list[] = {
+};
+static int blessed_count = sizeof (blessed_list) / sizeof (witness_blessed_t);
+
+void
+witness_init(mtx_t *m, int flag)
+{
+ m->mtx_witness = enroll(m->mtx_description, flag);
+}
+
+void
+witness_destroy(mtx_t *m)
+{
+ mtx_t *m1;
+ struct proc *p;
+ p = CURPROC;
+ for ((m1 = LIST_FIRST(&p->p_heldmtx)); m1 != NULL;
+ m1 = LIST_NEXT(m1, mtx_held)) {
+ if (m1 == m) {
+ LIST_REMOVE(m, mtx_held);
+ break;
+ }
+ }
+ return;
+
+}
+
+void
+witness_enter(mtx_t *m, int flags, char *file, int line)
+{
+ witness_t *w, *w1;
+ mtx_t *m1;
+ struct proc *p;
+ int i;
+#ifdef KDEBUG
+ int go_into_kdebug = 0;
+#endif /* KDEBUG */
+
+ w = m->mtx_witness;
+ p = CURPROC;
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_enter: MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ i = witness_spin_check;
+ if (i != 0 && w->w_level < i) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ panic("mutex_enter(%s:%x, MTX_SPIN) out of order @ %s:%d"
+ " already holding %s:%x",
+ m->mtx_description, w->w_level, file, line,
+ spin_order_list[ffs(i)-1], i);
+ }
+ PCPU_SET(witness_spin_check, i | w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+ if (w->w_spin)
+ panic("mutex_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+ if (witness_dead)
+ goto out;
+ if (cold)
+ goto out;
+
+ if (!mtx_legal2block())
+ panic("blockable mtx_enter() of %s when not legal @ %s:%d",
+ m->mtx_description, file, line);
+ /*
+ * Is this the first mutex acquired
+ */
+ if ((m1 = LIST_FIRST(&p->p_heldmtx)) == NULL)
+ goto out;
+
+
+ if ((w1 = m1->mtx_witness) == w) {
+ if (w->w_same_squawked || dup_ok(w))
+ goto out;
+ w->w_same_squawked = 1;
+ printf("acquring duplicate lock of same type: \"%s\"\n",
+ m->mtx_description);
+ printf(" 1st @ %s:%d\n", w->w_file, w->w_line);
+ printf(" 2nd @ %s:%d\n", file, line);
+#ifdef KDEBUG
+ go_into_kdebug = 1;
+#endif /* KDEBUG */
+ goto out;
+ }
+ MPASS(!mtx_owned(&w_mtx));
+ mtx_enter(&w_mtx, MTX_SPIN);
+ /*
+ * If we have a known higher number just say ok
+ */
+ if (witness_watch > 1 && w->w_level > w1->w_level) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ goto out;
+ }
+ if (isitmydescendant(m1->mtx_witness, w)) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ goto out;
+ }
+ for (i = 0; m1 != NULL; m1 = LIST_NEXT(m1, mtx_held), i++) {
+
+ ASS(i < 200);
+ w1 = m1->mtx_witness;
+ if (isitmydescendant(w, w1)) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ if (blessed(w, w1))
+ goto out;
+ if (m1 == &Giant) {
+ if (w1->w_Giant_squawked)
+ goto out;
+ else
+ w1->w_Giant_squawked = 1;
+ } else {
+ if (w1->w_other_squawked)
+ goto out;
+ else
+ w1->w_other_squawked = 1;
+ }
+ printf("lock order reversal\n");
+ printf(" 1st %s last acquired @ %s:%d\n",
+ w->w_description, w->w_file, w->w_line);
+ printf(" 2nd %p %s @ %s:%d\n",
+ m1, w1->w_description, w1->w_file, w1->w_line);
+ printf(" 3rd %p %s @ %s:%d\n",
+ m, w->w_description, file, line);
+#ifdef KDEBUG
+ go_into_kdebug = 1;
+#endif /* KDEBUG */
+ goto out;
+ }
+ }
+ m1 = LIST_FIRST(&p->p_heldmtx);
+ if (!itismychild(m1->mtx_witness, w))
+ mtx_exit(&w_mtx, MTX_SPIN);
+
+out:
+#ifdef KDEBUG
+ if (witness_kdebug && go_into_kdebug)
+ kdebug();
+#endif /* KDEBUG */
+ w->w_file = file;
+ w->w_line = line;
+ m->mtx_line = line;
+ m->mtx_file = file;
+
+ /*
+ * If this pays off it likely means that a mutex being witnessed
+ * is acquired in hardclock. Put it in the ignore list. It is
+ * likely not the mutex this assert fails on.
+ */
+ ASS(m->mtx_held.le_prev == NULL);
+ LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
+}
+
+void
+witness_exit(mtx_t *m, int flags, char *file, int line)
+{
+ witness_t *w;
+
+ w = m->mtx_witness;
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_exit: MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ PCPU_SET(witness_spin_check, witness_spin_check & ~w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+ if (w->w_spin)
+ panic("mutex_exit: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+
+ if ((flags & MTX_NOSWITCH) == 0 && !mtx_legal2block() && !cold)
+ panic("switchable mtx_exit() of %s when not legal @ %s:%d",
+ m->mtx_description, file, line);
+ LIST_REMOVE(m, mtx_held);
+ m->mtx_held.le_prev = NULL;
+}
+
+void
+witness_try_enter(mtx_t *m, int flags, char *file, int line)
+{
+ struct proc *p;
+ witness_t *w = m->mtx_witness;
+
+
+ if (flags & MTX_SPIN) {
+ if (!w->w_spin)
+ panic("mutex_try_enter: "
+ "MTX_SPIN on MTX_DEF mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+ if (m->mtx_recurse != 0)
+ return;
+ mtx_enter(&w_mtx, MTX_SPIN);
+ PCPU_SET(witness_spin_check, witness_spin_check | w->w_level);
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return;
+ }
+
+ if (w->w_spin)
+ panic("mutex_try_enter: MTX_DEF on MTX_SPIN mutex %s @ %s:%d",
+ m->mtx_description, file, line);
+
+ if (m->mtx_recurse != 0)
+ return;
+
+ w->w_file = file;
+ w->w_line = line;
+ m->mtx_line = line;
+ m->mtx_file = file;
+ p = CURPROC;
+ ASS(m->mtx_held.le_prev == NULL);
+ LIST_INSERT_HEAD(&p->p_heldmtx, (struct mtx*)m, mtx_held);
+}
+
+void
+witness_display(void(*prnt)(const char *fmt, ...))
+{
+ witness_t *w, *w1;
+
+ witness_levelall();
+
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_file == NULL)
+ continue;
+ for (w1 = w_all; w1; w1 = w1->w_next) {
+ if (isitmychild(w1, w))
+ break;
+ }
+ if (w1 != NULL)
+ continue;
+ /*
+ * This lock has no anscestors, display its descendants.
+ */
+ witness_displaydescendants(prnt, w);
+ }
+ prnt("\nMutex which were never acquired\n");
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_file != NULL)
+ continue;
+ prnt("%s\n", w->w_description);
+ }
+}
+
+int
+witness_sleep(int check_only, mtx_t *mtx, char *file, int line)
+{
+ mtx_t *m;
+ struct proc *p;
+ char **sleep;
+ int n = 0;
+
+ p = CURPROC;
+ for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
+ m = LIST_NEXT(m, mtx_held)) {
+ if (m == mtx)
+ continue;
+ for (sleep = sleep_list; *sleep!= NULL; sleep++)
+ if (strcmp(m->mtx_description, *sleep) == 0)
+ goto next;
+ printf("%s:%d: %s with \"%s\" locked from %s:%d\n",
+ file, line, check_only ? "could sleep" : "sleeping",
+ m->mtx_description,
+ m->mtx_witness->w_file, m->mtx_witness->w_line);
+ n++;
+ next:
+ }
+#ifdef KDEBUG
+ if (witness_kdebug && n)
+ kdebug();
+#endif /* KDEBUG */
+ return (n);
+}
+
+static witness_t *
+enroll(char *description, int flag)
+{
+ int i;
+ witness_t *w, *w1;
+ char **ignore;
+ char **order;
+
+ if (!witness_watch)
+ return (NULL);
+ for (ignore = ignore_list; *ignore != NULL; ignore++)
+ if (strcmp(description, *ignore) == 0)
+ return (NULL);
+
+ if (w_inited == 0) {
+ mtx_init(&w_mtx, "witness lock", MTX_DEF);
+ for (i = 0; i < WITNESS_COUNT; i++) {
+ w = &w_data[i];
+ witness_free(w);
+ }
+ w_inited = 1;
+ for (order = order_list; *order != NULL; order++) {
+ w = enroll(*order, MTX_DEF);
+ w->w_file = "order list";
+ for (order++; *order != NULL; order++) {
+ w1 = enroll(*order, MTX_DEF);
+ w1->w_file = "order list";
+ itismychild(w, w1);
+ w = w1;
+ }
+ }
+ }
+ if ((flag & MTX_SPIN) && witness_skipspin)
+ return (NULL);
+ mtx_enter(&w_mtx, MTX_SPIN);
+ for (w = w_all; w; w = w->w_next) {
+ if (strcmp(description, w->w_description) == 0) {
+ mtx_exit(&w_mtx, MTX_SPIN);
+ return (w);
+ }
+ }
+ if ((w = witness_get()) == NULL)
+ return (NULL);
+ w->w_next = w_all;
+ w_all = w;
+ w->w_description = description;
+ mtx_exit(&w_mtx, MTX_SPIN);
+ if (flag & MTX_SPIN) {
+ w->w_spin = 1;
+
+ i = 1;
+ for (order = spin_order_list; *order != NULL; order++) {
+ if (strcmp(description, *order) == 0)
+ break;
+ i <<= 1;
+ }
+ if (*order == NULL)
+ panic("spin lock %s not in order list", description);
+ w->w_level = i;
+ }
+ return (w);
+}
+
+static int
+itismychild(witness_t *parent, witness_t *child)
+{
+ static int recursed;
+
+ /*
+ * Insert "child" after "parent"
+ */
+ while (parent->w_morechildren)
+ parent = parent->w_morechildren;
+
+ if (parent->w_childcnt == WITNESS_NCHILDREN) {
+ if ((parent->w_morechildren = witness_get()) == NULL)
+ return (1);
+ parent = parent->w_morechildren;
+ }
+ ASS(child != NULL);
+ parent->w_children[parent->w_childcnt++] = child;
+ /*
+ * now prune whole tree
+ */
+ if (recursed)
+ return (0);
+ recursed = 1;
+ for (child = w_all; child != NULL; child = child->w_next) {
+ for (parent = w_all; parent != NULL;
+ parent = parent->w_next) {
+ if (!isitmychild(parent, child))
+ continue;
+ removechild(parent, child);
+ if (isitmydescendant(parent, child))
+ continue;
+ itismychild(parent, child);
+ }
+ }
+ recursed = 0;
+ witness_levelall();
+ return (0);
+}
+
+static void
+removechild(witness_t *parent, witness_t *child)
+{
+ witness_t *w, *w1;
+ int i;
+
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ if (w->w_children[i] == child)
+ goto found;
+ return;
+found:
+ for (w1 = w; w1->w_morechildren != NULL; w1 = w1->w_morechildren)
+ continue;
+ w->w_children[i] = w1->w_children[--w1->w_childcnt];
+ ASS(w->w_children[i] != NULL);
+
+ if (w1->w_childcnt != 0)
+ return;
+
+ if (w1 == parent)
+ return;
+ for (w = parent; w->w_morechildren != w1; w = w->w_morechildren)
+ continue;
+ w->w_morechildren = 0;
+ witness_free(w1);
+}
+
+static int
+isitmychild(witness_t *parent, witness_t *child)
+{
+ witness_t *w;
+ int i;
+
+ for (w = parent; w != NULL; w = w->w_morechildren) {
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (w->w_children[i] == child)
+ return (1);
+ }
+ }
+ return (0);
+}
+
+static int
+isitmydescendant(witness_t *parent, witness_t *child)
+{
+ witness_t *w;
+ int i;
+ int j;
+
+ for (j = 0, w = parent; w != NULL; w = w->w_morechildren, j++) {
+ ASS(j < 1000);
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (w->w_children[i] == child)
+ return (1);
+ }
+ for (i = 0; i < w->w_childcnt; i++) {
+ if (isitmydescendant(w->w_children[i], child))
+ return (1);
+ }
+ }
+ return (0);
+}
+
+void
+witness_levelall (void)
+{
+ witness_t *w, *w1;
+
+ for (w = w_all; w; w = w->w_next)
+ if (!w->w_spin)
+ w->w_level = 0;
+ for (w = w_all; w; w = w->w_next) {
+ if (w->w_spin)
+ continue;
+ for (w1 = w_all; w1; w1 = w1->w_next) {
+ if (isitmychild(w1, w))
+ break;
+ }
+ if (w1 != NULL)
+ continue;
+ witness_leveldescendents(w, 0);
+ }
+}
+
+static void
+witness_leveldescendents(witness_t *parent, int level)
+{
+ int i;
+ witness_t *w;
+
+ if (parent->w_level < level)
+ parent->w_level = level;
+ level++;
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ witness_leveldescendents(w->w_children[i], level);
+}
+
+static void
+witness_displaydescendants(void(*prnt)(const char *fmt, ...), witness_t *parent)
+{
+ witness_t *w;
+ int i;
+ int level = parent->w_level;
+
+ prnt("%d", level);
+ if (level < 10)
+ prnt(" ");
+ for (i = 0; i < level; i++)
+ prnt(" ");
+ prnt("%s", parent->w_description);
+ if (parent->w_file != NULL) {
+ prnt(" -- last acquired @ %s", parent->w_file);
+#ifndef W_USE_WHERE
+ prnt(":%d", parent->w_line);
+#endif
+ prnt("\n");
+ }
+
+ for (w = parent; w != NULL; w = w->w_morechildren)
+ for (i = 0; i < w->w_childcnt; i++)
+ witness_displaydescendants(prnt, w->w_children[i]);
+ }
+
+static int
+dup_ok(witness_t *w)
+{
+ char **dup;
+
+ for (dup = dup_list; *dup!= NULL; dup++)
+ if (strcmp(w->w_description, *dup) == 0)
+ return (1);
+ return (0);
+}
+
+static int
+blessed(witness_t *w1, witness_t *w2)
+{
+ int i;
+ witness_blessed_t *b;
+
+ for (i = 0; i < blessed_count; i++) {
+ b = &blessed_list[i];
+ if (strcmp(w1->w_description, b->b_lock1) == 0) {
+ if (strcmp(w2->w_description, b->b_lock2) == 0)
+ return (1);
+ continue;
+ }
+ if (strcmp(w1->w_description, b->b_lock2) == 0)
+ if (strcmp(w2->w_description, b->b_lock1) == 0)
+ return (1);
+ }
+ return (0);
+}
+
+static witness_t *
+witness_get()
+{
+ witness_t *w;
+
+ if ((w = w_free) == NULL) {
+ witness_dead = 1;
+ mtx_exit(&w_mtx, MTX_SPIN);
+ printf("witness exhausted\n");
+ return (NULL);
+ }
+ w_free = w->w_next;
+ bzero(w, sizeof (*w));
+ return (w);
+}
+
+static void
+witness_free(witness_t *w)
+{
+ w->w_next = w_free;
+ w_free = w;
+}
+
+void
+witness_list(struct proc *p)
+{
+ mtx_t *m;
+
+ for ((m = LIST_FIRST(&p->p_heldmtx)); m != NULL;
+ m = LIST_NEXT(m, mtx_held)) {
+ printf("\t\"%s\" (%p) locked at %s:%d\n",
+ m->mtx_description, m,
+ m->mtx_witness->w_file, m->mtx_witness->w_line);
+ }
+}
+
+void
+witness_save(mtx_t *m, char **filep, int *linep)
+{
+ *filep = m->mtx_witness->w_file;
+ *linep = m->mtx_witness->w_line;
+}
+
+void
+witness_restore(mtx_t *m, char *file, int line)
+{
+ m->mtx_witness->w_file = file;
+ m->mtx_witness->w_line = line;
+}
+
+#endif /* (defined(SMP_DEBUG) && defined(WITNESS)) */
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
index 29b6288..87fb980 100644
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -2266,7 +2266,8 @@ ttyinfo(tp)
tmp = (pick->p_pctcpu * 10000 + FSCALE / 2) >> FSHIFT;
ttyprintf(tp, "%d%% %ldk\n",
tmp / 100,
- pick->p_stat == SIDL || pick->p_stat == SZOMB ? 0 :
+ pick->p_stat == SIDL || pick->p_stat == SWAIT ||
+ pick->p_stat == SZOMB ? 0 :
(long)pgtok(vmspace_resident_count(pick->p_vmspace)));
}
tp->t_rocount = 0; /* so pending input will be retyped if BS */
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 99c0754..34cff17 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -37,6 +37,7 @@
#include <sys/mount.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
+#include <sys/ktr.h>
#include <sys/proc.h>
#include <sys/reboot.h>
#include <sys/resourcevar.h>
@@ -52,6 +53,8 @@
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
+#include <machine/mutex.h>
+
static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
struct bio_ops bioops; /* I/O operation notification */
@@ -461,7 +464,7 @@ bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
/* if not found in cache, do some I/O */
if ((bp->b_flags & B_CACHE) == 0) {
- if (curproc != NULL)
+ if (curproc != idleproc)
curproc->p_stats->p_ru.ru_inblock++;
KASSERT(!(bp->b_flags & B_ASYNC), ("bread: illegal async bp %p", bp));
bp->b_iocmd = BIO_READ;
@@ -498,7 +501,7 @@ breadn(struct vnode * vp, daddr_t blkno, int size,
/* if not found in cache, do some I/O */
if ((bp->b_flags & B_CACHE) == 0) {
- if (curproc != NULL)
+ if (curproc != idleproc)
curproc->p_stats->p_ru.ru_inblock++;
bp->b_iocmd = BIO_READ;
bp->b_flags &= ~B_INVAL;
@@ -519,7 +522,7 @@ breadn(struct vnode * vp, daddr_t blkno, int size,
rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
if ((rabp->b_flags & B_CACHE) == 0) {
- if (curproc != NULL)
+ if (curproc != idleproc)
curproc->p_stats->p_ru.ru_inblock++;
rabp->b_flags |= B_ASYNC;
rabp->b_flags &= ~B_INVAL;
@@ -640,7 +643,7 @@ bwrite(struct buf * bp)
bp->b_vp->v_numoutput++;
vfs_busy_pages(bp, 1);
- if (curproc != NULL)
+ if (curproc != idleproc)
curproc->p_stats->p_ru.ru_oublock++;
splx(s);
if (oldflags & B_ASYNC)
@@ -1420,7 +1423,8 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
int isspecial;
static int flushingbufs;
- if (curproc && (curproc->p_flag & (P_COWINPROGRESS|P_BUFEXHAUST)) == 0)
+ if (curproc != idleproc &&
+ (curproc->p_flag & (P_COWINPROGRESS|P_BUFEXHAUST)) == 0)
isspecial = 0;
else
isspecial = 1;
@@ -1745,6 +1749,8 @@ buf_daemon()
{
int s;
+ mtx_enter(&Giant, MTX_DEF);
+
/*
* This process needs to be suspended prior to shutdown sync.
*/
@@ -2070,9 +2076,9 @@ loop:
* move it into the else, when gbincore() fails. At the moment
* it isn't a problem.
*/
- if (!curproc || (curproc->p_flag & P_BUFEXHAUST)) {
+ if (curproc == idleproc || (curproc->p_flag & P_BUFEXHAUST)) {
if (numfreebuffers == 0) {
- if (!curproc)
+ if (curproc == idleproc)
return NULL;
needsbuffer |= VFS_BIO_NEED_ANY;
tsleep(&needsbuffer, (PRIBIO + 4) | slpflag, "newbuf",
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 3e4b17f..52ad0ef 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -56,6 +56,7 @@
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
+#include <sys/ktr.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/namei.h>
@@ -68,6 +69,7 @@
#include <sys/vnode.h>
#include <machine/limits.h>
+#include <machine/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -960,6 +962,8 @@ sched_sync(void)
int s;
struct proc *p = updateproc;
+ mtx_enter(&Giant, MTX_DEF);
+
EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p,
SHUTDOWN_PRI_LAST);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 3e4b17f..52ad0ef 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -56,6 +56,7 @@
#include <sys/fcntl.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
+#include <sys/ktr.h>
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/namei.h>
@@ -68,6 +69,7 @@
#include <sys/vnode.h>
#include <machine/limits.h>
+#include <machine/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -960,6 +962,8 @@ sched_sync(void)
int s;
struct proc *p = updateproc;
+ mtx_enter(&Giant, MTX_DEF);
+
EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p,
SHUTDOWN_PRI_LAST);
diff --git a/sys/modules/if_ppp/Makefile b/sys/modules/if_ppp/Makefile
index 3e6c9a5..c165a49 100644
--- a/sys/modules/if_ppp/Makefile
+++ b/sys/modules/if_ppp/Makefile
@@ -2,8 +2,8 @@
.PATH: ${.CURDIR}/../../net
KMOD= if_ppp
-SRCS= if_ppp.c ppp_tty.c slcompress.c \
- ppp.h opt_inet.h opt_ipx.h opt_ppp.h vnode_if.h
+SRCS= bus_if.h device_if.h if_ppp.c ppp_tty.c slcompress.c \
+ ppp.h opt_bus.h opt_inet.h opt_ipx.h opt_ppp.h vnode_if.h
NOMAN=
NPPP?= 2
diff --git a/sys/modules/netgraph/tty/Makefile b/sys/modules/netgraph/tty/Makefile
index 824b082..3ee6198 100644
--- a/sys/modules/netgraph/tty/Makefile
+++ b/sys/modules/netgraph/tty/Makefile
@@ -2,7 +2,8 @@
# $Whistle: Makefile,v 1.2 1999/01/19 19:39:22 archie Exp $
KMOD= ng_tty
-SRCS= ng_tty.c
+SRCS= ng_tty.c device_if.h bus_if.h pci_if.h
+MFILES= kern/device_if.m kern/bus_if.m pci/agp_if.m pci/pci_if.m
NOMAN=
.include <bsd.kmod.mk>
diff --git a/sys/net/ppp_tty.c b/sys/net/ppp_tty.c
index 2c4a1cf..906de00 100644
--- a/sys/net/ppp_tty.c
+++ b/sys/net/ppp_tty.c
@@ -89,6 +89,8 @@
#include <sys/vnode.h>
#ifdef __i386__
+#include <sys/bus.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#endif
@@ -160,27 +162,6 @@ void
pppasyncattach(dummy)
void *dummy;
{
-#ifdef __i386__
- int s;
-
- s = splhigh();
-
- /*
- * Make sure that the soft net "engine" cannot run while spltty code is
- * active. The if_ppp.c code can walk down into b_to_q etc, and it is
- * bad if the tty system was in the middle of another b_to_q...
- */
- tty_imask |= softnet_imask; /* spltty() block spl[soft]net() */
- net_imask |= softtty_imask; /* splimp() block splsofttty() */
- net_imask |= tty_imask; /* splimp() block spltty() */
- update_intr_masks();
-
- splx(s);
- if ( bootverbose )
- printf("new masks: bio %x, tty %x, net %x\n",
- bio_imask, tty_imask, net_imask);
-#endif
-
/* register line discipline */
linesw[PPPDISC] = pppdisc;
}
diff --git a/sys/netgraph/ng_tty.c b/sys/netgraph/ng_tty.c
index ef2cc5d..70f9fb3 100644
--- a/sys/netgraph/ng_tty.c
+++ b/sys/netgraph/ng_tty.c
@@ -77,6 +77,7 @@
#include <netgraph/ng_tty.h>
#ifdef __i386__ /* fiddle with the spl locking */
+#include <sys/bus.h>
#include <machine/ipl.h>
#include <i386/isa/intr_machdep.h>
#endif
@@ -660,19 +661,6 @@ ngt_mod_event(module_t mod, int event, void *data)
switch (event) {
case MOD_LOAD:
-#ifdef __i386__
- /* Insure the soft net "engine" can't run during spltty code */
- s = splhigh();
- tty_imask |= softnet_imask; /* spltty() block spl[soft]net() */
- net_imask |= softtty_imask; /* splimp() block splsofttty() */
- net_imask |= tty_imask; /* splimp() block spltty() */
- update_intr_masks();
- splx(s);
-
- if (bootverbose)
- log(LOG_DEBUG, "new masks: bio %x, tty %x, net %x\n",
- bio_imask, tty_imask, net_imask);
-#endif
/* Register line discipline */
s = spltty();
diff --git a/sys/nfs/nfs_srvcache.c b/sys/nfs/nfs_srvcache.c
index 9eb168f..6c4af8e 100644
--- a/sys/nfs/nfs_srvcache.c
+++ b/sys/nfs/nfs_srvcache.c
@@ -44,6 +44,7 @@
*/
#include <sys/param.h>
#include <sys/malloc.h>
+#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
diff --git a/sys/nfsserver/nfs_srvcache.c b/sys/nfsserver/nfs_srvcache.c
index 9eb168f..6c4af8e 100644
--- a/sys/nfsserver/nfs_srvcache.c
+++ b/sys/nfsserver/nfs_srvcache.c
@@ -44,6 +44,7 @@
*/
#include <sys/param.h>
#include <sys/malloc.h>
+#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
diff --git a/sys/pci/pci_compat.c b/sys/pci/pci_compat.c
index bf833b1..2e7eba5 100644
--- a/sys/pci/pci_compat.c
+++ b/sys/pci/pci_compat.c
@@ -54,6 +54,8 @@
#endif
#ifdef __i386__
+#include <sys/proc.h>
+#include <i386/isa/icu.h>
#include <i386/isa/intr_machdep.h>
#endif
@@ -141,7 +143,7 @@ pci_map_int_right(pcici_t cfg, pci_inthand_t *handler, void *arg,
#ifdef INTR_FAST
if (intflags & INTR_FAST)
- flags |= INTR_TYPE_FAST;
+ flags |= INTR_FAST;
if (intflags & INTR_EXCL)
resflags &= ~RF_SHAREABLE;
#endif
diff --git a/sys/powerpc/aim/vm_machdep.c b/sys/powerpc/aim/vm_machdep.c
index 8baea02..3831d67 100644
--- a/sys/powerpc/aim/vm_machdep.c
+++ b/sys/powerpc/aim/vm_machdep.c
@@ -84,6 +84,7 @@
#include <machine/fpu.h>
#include <machine/md_var.h>
#include <machine/prom.h>
+#include <machine/mutex.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -246,8 +247,10 @@ cpu_exit(p)
alpha_fpstate_drop(p);
(void) splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ mtx_exit(&Giant, MTX_DEF);
cnt.v_swtch++;
- cpu_switch(p);
+ cpu_switch();
panic("cpu_exit");
}
@@ -358,7 +361,7 @@ vunmapbuf(bp)
}
/*
- * Force reset the processor by invalidating the entire address space!
+ * Reset back to firmware.
*/
void
cpu_reset()
@@ -416,7 +419,7 @@ vm_page_zero_idle()
return(0);
#ifdef SMP
- if (try_mplock()) {
+ if (KLOCK_ENTER(M_TRY)) {
#endif
s = splvm();
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
@@ -447,7 +450,7 @@ vm_page_zero_idle()
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
#ifdef SMP
- rel_mplock();
+ KLOCK_EXIT;
#endif
return (1);
#ifdef SMP
diff --git a/sys/powerpc/include/globaldata.h b/sys/powerpc/include/globaldata.h
new file mode 100644
index 0000000..b246bb1
--- /dev/null
+++ b/sys/powerpc/include/globaldata.h
@@ -0,0 +1,79 @@
+/*-
+ * Copyright (c) 1999 Luoqi Chen <luoqi@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+
+/*
+ * This structure maps out the global data that needs to be kept on a
+ * per-cpu basis. genassym uses this to generate offsets for the assembler
+ * code, which also provides external symbols so that C can get at them as
+ * though they were really globals. This structure is pointed to by
+ * the per-cpu system value (see alpha_pal_rdval() and alpha_pal_wrval()).
+ * Inside the kernel, the globally reserved register t7 is used to
+ * point at the globaldata structure.
+ */
+struct globaldata {
+ struct alpha_pcb gd_idlepcb; /* pcb for idling */
+ struct proc *gd_curproc; /* current process */
+ struct proc *gd_idleproc; /* idle process */
+ struct proc *gd_fpcurproc; /* fp state owner */
+ struct pcb *gd_curpcb; /* current pcb */
+ struct timeval gd_switchtime;
+ int gd_switchticks;
+ u_int gd_cpuno; /* this cpu number */
+ u_int gd_other_cpus; /* all other cpus */
+ int gd_inside_intr;
+ u_int64_t gd_idlepcbphys; /* pa of gd_idlepcb */
+ u_int64_t gd_pending_ipis; /* pending IPI events */
+ u_int32_t gd_next_asn; /* next ASN to allocate */
+ u_int32_t gd_current_asngen; /* ASN rollover check */
+ u_int32_t gd_intr_nesting_level; /* interrupt recursion */
+
+ u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+#ifdef KTR_PERCPU
+ volatile int gd_ktr_idx; /* Index into trace table */
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[0];
+#endif
+};
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
+void globaldata_init(struct globaldata *pcpu, int cpuno, size_t sz);
+struct globaldata *globaldata_find(int cpuno);
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_GLOBALDATA_H_ */
diff --git a/sys/powerpc/include/globals.h b/sys/powerpc/include/globals.h
new file mode 100644
index 0000000..303efdf
--- /dev/null
+++ b/sys/powerpc/include/globals.h
@@ -0,0 +1,63 @@
+/*-
+ * Copyright (c) 1999 Luoqi Chen <luoqi@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_GLOBALS_H_
+#define _MACHINE_GLOBALS_H_
+
+#ifdef _KERNEL
+
+register struct globaldata *globalp __asm__("$8");
+
+#if 1
+#define GLOBALP globalp
+#else
+#define GLOBALP ((struct globaldata *) alpha_pal_rdval())
+#endif
+
+#define PCPU_GET(name) (GLOBALP->gd_##name)
+#define PCPU_SET(name,value) (GLOBALP->gd_##name = (value))
+
+/*
+ * The following set of macros works for UP kernel as well, but for maximum
+ * performance we allow the global variables to be accessed directly. On the
+ * other hand, kernel modules should always use these macros to maintain
+ * portability between UP and SMP kernels.
+ */
+#define CURPROC PCPU_GET(curproc)
+#define curproc PCPU_GET(curproc)
+#define idleproc PCPU_GET(idleproc)
+#define curpcb PCPU_GET(curpcb)
+#define fpcurproc PCPU_GET(fpcurproc)
+#define switchtime PCPU_GET(switchtime)
+#define switchticks PCPU_GET(switchticks)
+#define cpuid PCPU_GET(cpuno)
+#define prevproc PCPU_GET(curproc) /* XXX - until ithreads */
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_GLOBALS_H_ */
diff --git a/sys/powerpc/include/mutex.h b/sys/powerpc/include/mutex.h
new file mode 100644
index 0000000..ac13b8c
--- /dev/null
+++ b/sys/powerpc/include/mutex.h
@@ -0,0 +1,563 @@
+/*-
+ * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Berkeley Software Design Inc's name may not be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from BSDI $Id: mutex.h,v 2.7.2.35 2000/04/27 03:10:26 cp Exp $
+ * $FreeBSD$
+ */
+
+
+#ifndef _MACHINE_MUTEX_H_
+#define _MACHINE_MUTEX_H_
+
+#ifndef LOCORE
+
+#include <sys/queue.h>
+#include <machine/atomic.h>
+#include <machine/cpufunc.h>
+#include <machine/globaldata.h>
+
+/*
+ * Mutex flags
+ *
+ * Types
+ */
+#define MTX_DEF 0x1 /* Default (spin/sleep) */
+#define MTX_SPIN 0x2 /* Spin only lock */
+
+/* Options */
+#define MTX_RLIKELY 0x4 /* (opt) Recursion likely */
+#define MTX_NORECURSE 0x8 /* No recursion possible */
+#define MTX_NOSPIN 0x10 /* Don't spin before sleeping */
+#define MTX_NOSWITCH 0x20 /* Do not switch on release */
+#define MTX_FIRST 0x40 /* First spin lock holder */
+#define MTX_TOPHALF 0x80 /* Interrupts not disabled on spin */
+
+/* options that should be passed on to mtx_enter_hard, mtx_exit_hard */
+#define MTX_HARDOPTS (MTX_DEF | MTX_SPIN | MTX_FIRST | MTX_TOPHALF | MTX_NOSWITCH)
+
+/* Flags/value used in mtx_lock */
+#define MTX_RECURSE 0x01 /* (non-spin) lock held recursively */
+#define MTX_CONTESTED 0x02 /* (non-spin) lock contested */
+#define MTX_FLAGMASK ~(MTX_RECURSE | MTX_CONTESTED)
+#define MTX_UNOWNED 0x8 /* Cookie for free mutex */
+
+struct proc; /* XXX */
+
+/*
+ * Sleep/spin mutex
+ */
+struct mtx {
+ volatile u_int64_t mtx_lock; /* lock owner/gate/flags */
+ volatile u_int32_t mtx_recurse; /* number of recursive holds */
+ u_int32_t mtx_saveipl; /* saved ipl (for spin locks) */
+ char *mtx_description;
+ TAILQ_HEAD(, proc) mtx_blocked;
+ LIST_ENTRY(mtx) mtx_contested;
+ struct mtx *mtx_next; /* all locks in system */
+ struct mtx *mtx_prev;
+#ifdef SMP_DEBUG
+ /* If you add anything here, adjust the mtxf_t definition below */
+ struct witness *mtx_witness;
+ LIST_ENTRY(mtx) mtx_held;
+ char *mtx_file;
+ int mtx_line;
+#endif /* SMP_DEBUG */
+};
+
+typedef struct mtx mtx_t;
+
+/*
+ * Filler for structs which need to remain the same size
+ * whether or not SMP_DEBUG is turned on.
+ */
+typedef struct mtxf {
+#ifdef SMP_DEBUG
+ char mtxf_data[0];
+#else
+ char mtxf_data[4*sizeof(void *) + sizeof(int)];
+#endif
+} mtxf_t;
+
+#define mp_fixme(string)
+
+#ifdef _KERNEL
+/* Misc */
+#define CURTHD ((u_int64_t)CURPROC) /* Current thread ID */
+
+/* Prototypes */
+void mtx_init(mtx_t *m, char *description, int flag);
+void mtx_enter_hard(mtx_t *, int type, int ipl);
+void mtx_exit_hard(mtx_t *, int type);
+void mtx_destroy(mtx_t *m);
+
+/* Global locks */
+extern mtx_t sched_lock;
+extern mtx_t Giant;
+
+/*
+ * Used to replace return with an exit Giant and return.
+ */
+
+#define EGAR(a) \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return (a); \
+} while (0)
+
+#define VEGAR \
+do { \
+ mtx_exit(&Giant, MTX_DEF); \
+ return; \
+} while (0)
+
+#define DROP_GIANT() \
+do { \
+ int _giantcnt; \
+ WITNESS_SAVE_DECL(Giant); \
+ \
+ WITNESS_SAVE(&Giant, Giant); \
+ for (_giantcnt = 0; mtx_owned(&Giant); _giantcnt++) \
+ mtx_exit(&Giant, MTX_DEF)
+
+#define PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant); \
+} while (0)
+
+#define PARTIAL_PICKUP_GIANT() \
+ mtx_assert(&Giant, MA_NOTOWNED); \
+ while (_giantcnt--) \
+ mtx_enter(&Giant, MTX_DEF); \
+ WITNESS_RESTORE(&Giant, Giant)
+
+
+/*
+ * Debugging
+ */
+#ifndef SMP_DEBUG
+#define mtx_assert(m, what)
+#else /* SMP_DEBUG */
+
+#define MA_OWNED 1
+#define MA_NOTOWNED 2
+#define mtx_assert(m, what) { \
+ switch ((what)) { \
+ case MA_OWNED: \
+ ASS(mtx_owned((m))); \
+ break; \
+ case MA_NOTOWNED: \
+ ASS(!mtx_owned((m))); \
+ break; \
+ default: \
+ panic("unknown mtx_assert at %s:%d", __FILE__, __LINE__); \
+ } \
+}
+
+#ifdef INVARIANTS
+#define ASS(ex) MPASS(ex)
+#define MPASS(ex) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ #ex, __FILE__, __LINE__)
+#define MPASS2(ex, what) if (!(ex)) panic("Assertion %s failed at %s:%d", \
+ what, __FILE__, __LINE__)
+
+#ifdef MTX_STRS
+char STR_IEN[] = "fl & 0x200";
+char STR_IDIS[] = "!(fl & 0x200)";
+#else /* MTX_STRS */
+extern char STR_IEN[];
+extern char STR_IDIS[];
+#endif /* MTX_STRS */
+#define ASS_IEN MPASS2((alpha_pal_rdps & ALPHA_PSL_IPL_MASK)
+ == ALPHA_PSL_IPL_HIGH, STR_IEN)
+#define ASS_IDIS MPASS2((alpha_pal_rdps & ALPHA_PSL_IPL_MASK)
+ != ALPHA_PSL_IPL_HIGH, STR_IDIS)
+#endif /* INVARIANTS */
+
+#endif /* SMP_DEBUG */
+
+#if !defined(SMP_DEBUG) || !defined(INVARIANTS)
+#define ASS(ex)
+#define MPASS(ex)
+#define MPASS2(ex, where)
+#define ASS_IEN
+#define ASS_IDIS
+#endif /* !defined(SMP_DEBUG) || !defined(INVARIANTS) */
+
+#ifdef WITNESS
+#ifndef SMP_DEBUG
+#error WITNESS requires SMP_DEBUG
+#endif /* SMP_DEBUG */
+#define WITNESS_ENTER(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_enter((m), (f), __FILE__, __LINE__)
+#define WITNESS_EXIT(m, f) \
+ if ((m)->mtx_witness != NULL) \
+ witness_exit((m), (f), __FILE__, __LINE__)
+
+#define WITNESS_SLEEP(check, m) witness_sleep(check, (m), __FILE__, __LINE__)
+#define WITNESS_SAVE_DECL(n) \
+ char * __CONCAT(n, __wf); \
+ int __CONCAT(n, __wl)
+
+#define WITNESS_SAVE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_save(m, &__CONCAT(n, __wf), &__CONCAT(n, __wl)); \
+} while (0)
+
+#define WITNESS_RESTORE(m, n) \
+do { \
+ if ((m)->mtx_witness != NULL) \
+ witness_restore(m, __CONCAT(n, __wf), __CONCAT(n, __wl)); \
+} while (0)
+
+void witness_init(mtx_t *, int flag);
+void witness_destroy(mtx_t *);
+void witness_enter(mtx_t *, int, char *, int);
+void witness_try_enter(mtx_t *, int, char *, int);
+void witness_exit(mtx_t *, int, char *, int);
+void witness_display(void(*)(const char *fmt, ...));
+void witness_list(struct proc *);
+int witness_sleep(int, mtx_t *, char *, int);
+void witness_save(mtx_t *, char **, int *);
+void witness_restore(mtx_t *, char *, int);
+#else /* WITNESS */
+#define WITNESS_ENTER(m, flag)
+#define WITNESS_EXIT(m, flag)
+#define WITNESS_SLEEP(check, m)
+#define WITNESS_SAVE_DECL(n)
+#define WITNESS_SAVE(m, n)
+#define WITNESS_RESTORE(m, n)
+
+/*
+ * flag++ is slezoid way of shutting up unused parameter warning
+ * in mtx_init()
+ */
+#define witness_init(m, flag) flag++
+#define witness_destroy(m)
+#define witness_enter(m, flag, f, l)
+#define witness_try_enter(m, flag, f, l )
+#define witness_exit(m, flag, f, l)
+#endif /* WITNESS */
+
+/*
+ * Assembly macros (for internal use only)
+ *--------------------------------------------------------------------------
+ */
+
+/*
+ * Get a sleep lock, deal with recursion inline
+ */
+
+#define _V(x) __STRING(x)
+
+#define _getlock_sleep(mp, tid, type) do { \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) { \
+ if (((mp)->mtx_lock & MTX_FLAGMASK) != (tid)) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, 0); \
+ else { \
+ if (((mp)->mtx_lock & MTX_RECURSE) == 0) \
+ atomic_set_64(&(mp)->mtx_lock, MTX_RECURSE); \
+ (mp)->mtx_recurse++; \
+ } \
+ } else { \
+ alpha_mb(); \
+ } \
+} while (0)
+
+/*
+ * Get a spin lock, handle recusion inline (as the less common case)
+ */
+
+#define _getlock_spin_block(mp, tid, type) do { \
+ u_int _ipl = alpha_pal_rdps() & ALPHA_PSL_IPL_MASK; \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard(mp, (type) & MTX_HARDOPTS, _ipl); \
+ else { \
+ alpha_mb(); \
+ (mp)->mtx_saveipl = _ipl; \
+ } \
+} while (0)
+
+/*
+ * Get a lock without any recursion handling. Calls the hard enter
+ * function if we can't get it inline.
+ */
+
+#define _getlock_norecurse(mp, tid, type) do { \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) == 0) \
+ mtx_enter_hard((mp), (type) & MTX_HARDOPTS, 0); \
+ else \
+ alpha_mb(); \
+} while (0)
+
+/*
+ * Release a sleep lock assuming we haven't recursed on it, recursion is
+ * handled in the hard function.
+ */
+
+#define _exitlock_norecurse(mp, tid, type) do { \
+ alpha_mb(); \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+} while (0)
+
+/*
+ * Release a sleep lock when its likely we recursed (the code to
+ * deal with simple recursion is inline).
+ */
+
+#define _exitlock(mp, tid, type) do { \
+ alpha_mb(); \
+ if (atomic_cmpset_64(&(mp)->mtx_lock, (tid), MTX_UNOWNED) == 0) {\
+ if (((mp)->mtx_lock & MTX_RECURSE) && \
+ (--(mp)->mtx_recurse == 0)) \
+ atomic_clear_64(&(mp)->mtx_lock, MTX_RECURSE); \
+ else \
+ mtx_exit_hard((mp), (type) & MTX_HARDOPTS); \
+ } \
+} while (0)
+
+/*
+ * Release a spin lock (with possible recursion)
+ */
+
+#define _exitlock_spin(mp) do { \
+ int _ipl = (mp)->mtx_saveipl; \
+ alpha_mb(); \
+ if ((mp)->mtx_recurse == 0 || (--(mp)->mtx_recurse) == 0) \
+ atomic_cmpset_64(&(mp)->mtx_lock, (mp)->mtx_lock, \
+ MTX_UNOWNED); \
+ alpha_pal_swpipl(_ipl); \
+} while (0)
+
+/*
+ * Externally visible mutex functions
+ *------------------------------------------------------------------------
+ */
+
+/*
+ * Return non-zero if a mutex is already owned by the current thread
+ */
+#define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == CURTHD)
+
+/* Common strings */
+#ifdef MTX_STRS
+char STR_mtx_enter_fmt[] = "GOT %s [%p] at %s:%d r=%d";
+char STR_mtx_bad_type[] = "((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0";
+char STR_mtx_exit_fmt[] = "REL %s [%p] at %s:%d r=%d";
+char STR_mtx_owned[] = "mtx_owned(_mpp)";
+char STR_mtx_recurse[] = "_mpp->mtx_recurse == 0";
+char STR_mtx_try_enter_fmt[] = "TRY_ENTER %s [%p] at %s:%d result=%d";
+#else /* MTX_STRS */
+extern char STR_mtx_enter_fmt[];
+extern char STR_mtx_bad_type[];
+extern char STR_mtx_exit_fmt[];
+extern char STR_mtx_owned[];
+extern char STR_mtx_recurse[];
+extern char STR_mtx_try_enter_fmt[];
+#endif /* MTX_STRS */
+
+/*
+ * Get lock 'm', the macro handles the easy (and most common cases) and
+ * leaves the slow stuff to the mtx_enter_hard() function.
+ *
+ * Note: since type is usually a constant much of this code is optimized out
+ */
+#define mtx_enter(mtxp, type) do { \
+ mtx_t * _mpp = mtxp; \
+ \
+ /* bits only valid on mtx_exit() */ \
+ MPASS2(((type) & (MTX_NORECURSE | MTX_NOSWITCH)) == 0, STR_mtx_bad_type); \
+ \
+ do { \
+ if ((type) & MTX_SPIN) { \
+ /* \
+ * Easy cases of spin locks: \
+ * \
+ * 1) We already own the lock and will simply \
+ * recurse on it (if RLIKELY) \
+ * \
+ * 2) The lock is free, we just get it \
+ */ \
+ if ((type) & MTX_RLIKELY) { \
+ /* \
+ * Check for recursion, if we already \
+ * have this lock we just bump the \
+ * recursion count. \
+ */ \
+ if (_mpp->mtx_lock == CURTHD) { \
+ _mpp->mtx_recurse++; \
+ break; /* Done */ \
+ } \
+ } \
+ \
+ if (((type) & MTX_TOPHALF) == 0) \
+ /* \
+ * If an interrupt thread uses this \
+ * we must block interrupts here. \
+ */ \
+ _getlock_spin_block(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ else \
+ _getlock_norecurse(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ } else { \
+ /* Sleep locks */ \
+ if ((type) & MTX_RLIKELY) \
+ _getlock_sleep(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ else \
+ _getlock_norecurse(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ } \
+ } while (0); \
+ WITNESS_ENTER(_mpp, type); \
+ CTR5(KTR_LOCK, STR_mtx_enter_fmt, \
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, \
+ (_mpp)->mtx_recurse); \
+} while (0)
+
+/*
+ * Attempt to get MTX_DEF lock, return non-zero if lock acquired
+ *
+ * XXX DOES NOT HANDLE RECURSION
+ */
+#ifdef SMP_DEBUG
+#define mtx_try_enter(mtxp, type) ({ \
+ mtx_t *const _mpp = mtxp; \
+ int _rval; \
+ \
+ _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD);\
+ if (_rval && (_mpp)->mtx_witness != NULL) { \
+ ASS((_mpp)->mtx_recurse == 0); \
+ witness_try_enter(_mpp, type, __FILE__, __LINE__); \
+ } \
+ CTR5(KTR_LOCK, STR_mtx_try_enter_fmt, \
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, \
+ _rval); \
+ _rval; \
+})
+
+#else /* SMP_DEBUG */
+
+#define mtx_try_enter(mtxp, type) ({ \
+ mtx_t *const _mpp = mtxp; \
+ int _rval; \
+ \
+ _rval = atomic_cmpset_int(&_mpp->mtx_lock, MTX_UNOWNED, CURTHD);\
+ CTR5(KTR_LOCK, STR_mtx_try_enter_fmt, \
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, \
+ _rval); \
+ _rval; \
+})
+
+#endif /* SMP_DEBUG */
+
+#if 0
+#define mtx_legal2block() ({ \
+ register int _l2b; \
+ __asm __volatile ( \
+" pushfl;" \
+" popl %%eax;" \
+" andl $0x200, %%eax;" \
+ : "=a" (_l2b) \
+ : \
+ : "cc"); \
+ _l2b; \
+})
+#endif
+
+#define mtx_legal2block() (read_eflags() & 0x200)
+
+/*
+ * Release lock m
+ */
+#define mtx_exit(mtxp, type) do { \
+ mtx_t *const _mpp = mtxp; \
+ \
+ MPASS2(mtx_owned(_mpp), STR_mtx_owned); \
+ WITNESS_EXIT(_mpp, type); \
+ CTR5(KTR_LOCK, STR_mtx_exit_fmt, \
+ (_mpp)->mtx_description, (_mpp), __FILE__, __LINE__, \
+ (_mpp)->mtx_recurse); \
+ if ((type) & MTX_SPIN) { \
+ if ((type) & MTX_NORECURSE) { \
+ MPASS2(_mpp->mtx_recurse == 0, STR_mtx_recurse); \
+ atomic_cmpset_64(&_mpp->mtx_lock, _mpp->mtx_lock, \
+ MTX_UNOWNED); \
+ if (((type) & MTX_TOPHALF) == 0) { \
+ splx(_mpp->mtx_saveipl); \
+ } \
+ } else \
+ if ((type) & MTX_TOPHALF) \
+ _exitlock_norecurse(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ else \
+ _exitlock_spin(_mpp); \
+ } else { \
+ /* Handle sleep locks */ \
+ if ((type) & MTX_RLIKELY) \
+ _exitlock(_mpp, CURTHD, (type) & MTX_HARDOPTS); \
+ else \
+ _exitlock_norecurse(_mpp, CURTHD, \
+ (type) & MTX_HARDOPTS); \
+ } \
+} while (0)
+#endif /* _KERNEL */
+
+#else /* !LOCORE */
+
+/*
+ * Simple assembly macros to get and release non-recursive spin locks
+ */
+#define MTX_ENTER(lck) \
+ call_pal PAL_OSF1_rdps; \
+ and v0, ALPHA_PSL_IPL_MASK, v0; \
+1: ldq_l a0, lck+MTX_LOCK; \
+ cmpeq a0, MTX_UNOWNED, a1; \
+ beq a1, 1b; \
+ ldq a0, PC_CURPROC(globalp); \
+ stq_c a0, lck+MTX_LOCK; \
+ beq a0, 1b; \
+ mb; \
+ stl v0, lck+MTX_SAVEIPL; \
+ ldq a0, ALPHA_PSL_IPL_HIGH; \
+ call_pal PSL_OSF1_swpipl
+
+#define MTX_EXIT(lck) \
+ mb; \
+ ldiq a0, MTX_UNOWNED; \
+ stq a0, lck+MTX_LOCK; \
+ ldl a0, lck+MTX_SAVEIPL; \
+ call_pal PAL_OSF1_swpipl
+
+#endif /* !LOCORE */
+
+#endif /* __MACHINE_MUTEX_H */
diff --git a/sys/powerpc/include/pcpu.h b/sys/powerpc/include/pcpu.h
new file mode 100644
index 0000000..b246bb1
--- /dev/null
+++ b/sys/powerpc/include/pcpu.h
@@ -0,0 +1,79 @@
+/*-
+ * Copyright (c) 1999 Luoqi Chen <luoqi@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _MACHINE_GLOBALDATA_H_
+#define _MACHINE_GLOBALDATA_H_
+
+#ifdef _KERNEL
+
+#include <sys/queue.h>
+
+/*
+ * This structure maps out the global data that needs to be kept on a
+ * per-cpu basis. genassym uses this to generate offsets for the assembler
+ * code, which also provides external symbols so that C can get at them as
+ * though they were really globals. This structure is pointed to by
+ * the per-cpu system value (see alpha_pal_rdval() and alpha_pal_wrval()).
+ * Inside the kernel, the globally reserved register t7 is used to
+ * point at the globaldata structure.
+ */
+struct globaldata {
+ struct alpha_pcb gd_idlepcb; /* pcb for idling */
+ struct proc *gd_curproc; /* current process */
+ struct proc *gd_idleproc; /* idle process */
+ struct proc *gd_fpcurproc; /* fp state owner */
+ struct pcb *gd_curpcb; /* current pcb */
+ struct timeval gd_switchtime;
+ int gd_switchticks;
+ u_int gd_cpuno; /* this cpu number */
+ u_int gd_other_cpus; /* all other cpus */
+ int gd_inside_intr;
+ u_int64_t gd_idlepcbphys; /* pa of gd_idlepcb */
+ u_int64_t gd_pending_ipis; /* pending IPI events */
+ u_int32_t gd_next_asn; /* next ASN to allocate */
+ u_int32_t gd_current_asngen; /* ASN rollover check */
+ u_int32_t gd_intr_nesting_level; /* interrupt recursion */
+
+ u_int gd_astpending;
+ SLIST_ENTRY(globaldata) gd_allcpu;
+#ifdef KTR_PERCPU
+ volatile int gd_ktr_idx; /* Index into trace table */
+ char *gd_ktr_buf;
+ char gd_ktr_buf_data[0];
+#endif
+};
+
+SLIST_HEAD(cpuhead, globaldata);
+extern struct cpuhead cpuhead;
+
+void globaldata_init(struct globaldata *pcpu, int cpuno, size_t sz);
+struct globaldata *globaldata_find(int cpuno);
+
+#endif /* _KERNEL */
+
+#endif /* !_MACHINE_GLOBALDATA_H_ */
diff --git a/sys/powerpc/powerpc/genassym.c b/sys/powerpc/powerpc/genassym.c
index a67f2d1..066d87b 100644
--- a/sys/powerpc/powerpc/genassym.c
+++ b/sys/powerpc/powerpc/genassym.c
@@ -51,8 +51,11 @@
#include <sys/socket.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
+#include <sys/ktr.h>
#include <machine/frame.h>
#include <machine/chipset.h>
+#include <machine/globaldata.h>
+#include <machine/mutex.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -66,6 +69,21 @@
#include <nfs/nfs.h>
#include <nfs/nfsdiskless.h>
+#include "opt_smp.h"
+
+ASSYM(GD_CURPROC, offsetof(struct globaldata, gd_curproc));
+ASSYM(GD_FPCURPROC, offsetof(struct globaldata, gd_fpcurproc));
+ASSYM(GD_CURPCB, offsetof(struct globaldata, gd_curpcb));
+ASSYM(GD_SWITCHTIME, offsetof(struct globaldata, gd_switchtime));
+ASSYM(GD_CPUNO, offsetof(struct globaldata, gd_cpuno));
+ASSYM(GD_IDLEPCBPHYS, offsetof(struct globaldata, gd_idlepcbphys));
+ASSYM(GD_ASTPENDING, offsetof(struct globaldata, gd_astpending));
+
+ASSYM(MTX_LOCK, offsetof(struct mtx, mtx_lock));
+ASSYM(MTX_RECURSE, offsetof(struct mtx, mtx_recurse));
+ASSYM(MTX_SAVEIPL, offsetof(struct mtx, mtx_saveipl));
+ASSYM(MTX_UNOWNED, MTX_UNOWNED);
+
ASSYM(P_ADDR, offsetof(struct proc, p_addr));
ASSYM(P_MD_FLAGS, offsetof(struct proc, p_md.md_flags));
ASSYM(P_MD_PCBPADDR, offsetof(struct proc, p_md.md_pcbpaddr));
@@ -81,6 +99,7 @@ ASSYM(PTESIZE, PTESIZE);
ASSYM(U_PCB_ONFAULT, offsetof(struct user, u_pcb.pcb_onfault));
ASSYM(U_PCB_HWPCB_KSP, offsetof(struct user, u_pcb.pcb_hw.apcb_ksp));
ASSYM(U_PCB_CONTEXT, offsetof(struct user, u_pcb.pcb_context));
+ASSYM(U_PCB_SCHEDNEST, offsetof(struct user, u_pcb.pcb_schednest));
ASSYM(PCB_HW, offsetof(struct pcb, pcb_hw));
diff --git a/sys/powerpc/powerpc/vm_machdep.c b/sys/powerpc/powerpc/vm_machdep.c
index 8baea02..3831d67 100644
--- a/sys/powerpc/powerpc/vm_machdep.c
+++ b/sys/powerpc/powerpc/vm_machdep.c
@@ -84,6 +84,7 @@
#include <machine/fpu.h>
#include <machine/md_var.h>
#include <machine/prom.h>
+#include <machine/mutex.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -246,8 +247,10 @@ cpu_exit(p)
alpha_fpstate_drop(p);
(void) splhigh();
+ mtx_enter(&sched_lock, MTX_SPIN);
+ mtx_exit(&Giant, MTX_DEF);
cnt.v_swtch++;
- cpu_switch(p);
+ cpu_switch();
panic("cpu_exit");
}
@@ -358,7 +361,7 @@ vunmapbuf(bp)
}
/*
- * Force reset the processor by invalidating the entire address space!
+ * Reset back to firmware.
*/
void
cpu_reset()
@@ -416,7 +419,7 @@ vm_page_zero_idle()
return(0);
#ifdef SMP
- if (try_mplock()) {
+ if (KLOCK_ENTER(M_TRY)) {
#endif
s = splvm();
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
@@ -447,7 +450,7 @@ vm_page_zero_idle()
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
#ifdef SMP
- rel_mplock();
+ KLOCK_EXIT;
#endif
return (1);
#ifdef SMP
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 42424d6..d469a04 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -310,7 +310,7 @@ BUF_KERNPROC(struct buf *bp)
{
struct proc *p = curproc;
- if (p != NULL && bp->b_lock.lk_lockholder == p->p_pid)
+ if (p != idleproc && bp->b_lock.lk_lockholder == p->p_pid)
p->p_locks--;
bp->b_lock.lk_lockholder = LK_KERNPROC;
}
diff --git a/sys/sys/bus.h b/sys/sys/bus.h
index 2eff10c..171728d 100644
--- a/sys/sys/bus.h
+++ b/sys/sys/bus.h
@@ -45,6 +45,14 @@ typedef struct devclass *devclass_t;
typedef void driver_intr_t(void*);
/*
+ * Interrupt type bits. These flags are used both by newbus interrupt
+ * registration (nexus.c) and also in struct intrec, which defines
+ * interrupt properties.
+ *
+ * XXX We should probably revisit this and remove the vestiges of the
+ * spls implicit in names like INTR_TYPE_TTY. In the meantime, don't
+ * confuse things by renaming them (Grog, 18 July 2000).
+ *
* We define this in terms of bits because some devices may belong
* to multiple classes (and therefore need to be included in
* multiple interrupt masks, which is what this really serves to
@@ -57,7 +65,12 @@ enum intr_type {
INTR_TYPE_NET = 4,
INTR_TYPE_CAM = 8,
INTR_TYPE_MISC = 16,
- INTR_TYPE_FAST = 128
+ INTR_HEAVY = 32, /* heavyweight interrupt process */
+ INTR_LIGHT = 64, /* light weight interrupt thread */
+ INTR_THREADED = INTR_LIGHT | INTR_HEAVY, /* any kind of interrupt thread */
+ INTR_FAST = 128,
+ INTR_EXCL = 256, /* exclusive interrupt */
+ INTR_MPSAFE = 512 /* this interrupt is SMP safe */
};
typedef int (*devop_t)(void);
diff --git a/sys/sys/kernel.h b/sys/sys/kernel.h
index f87630d..2f54efe 100644
--- a/sys/sys/kernel.h
+++ b/sys/sys/kernel.h
@@ -119,6 +119,8 @@ enum sysinit_sub_id {
SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/
SI_SUB_RUN_QUEUE = 0x2400000, /* set up run queue*/
SI_SUB_CREATE_INIT = 0x2500000, /* create init process*/
+ SI_SUB_SCHED_IDLE = 0x2600000, /* required idle procs */
+ SI_SUB_SOFTINTR = 0x2700000, /* start soft interrupt thread */
SI_SUB_DRIVERS = 0x3100000, /* Let Drivers initialize */
SI_SUB_CONFIGURE = 0x3800000, /* Configure devices */
SI_SUB_VFS = 0x4000000, /* virtual file system*/
@@ -150,7 +152,7 @@ enum sysinit_sub_id {
SI_SUB_KTHREAD_BUF = 0xea00000, /* buffer daemon*/
SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/
SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/
- SI_SUB_SMP = 0xf000000, /* idle procs*/
+ SI_SUB_SMP = 0xf000000, /* start the APs*/
SI_SUB_RUN_SCHEDULER = 0xfffffff /* scheduler*/
};
diff --git a/sys/sys/kthread.h b/sys/sys/kthread.h
index fb0d3f9..5ca3736 100644
--- a/sys/sys/kthread.h
+++ b/sys/sys/kthread.h
@@ -44,7 +44,7 @@ struct kproc_desc {
void kproc_start __P((const void *));
int kthread_create __P((void (*)(void *), void *, struct proc **,
- const char *, ...)) __printflike(4, 5);
+ int flags, const char *, ...)) __printflike(5, 6);
void kthread_exit __P((int)) __dead2;
int suspend_kproc __P((struct proc *, int));
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 4173fea..900ac5d 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -122,6 +122,10 @@ struct pargs {
struct jail;
+struct mtx;
+
+struct ithd;
+
struct proc {
TAILQ_ENTRY(proc) p_procq; /* run/sleep queue. */
LIST_ENTRY(proc) p_list; /* List of all processes. */
@@ -207,6 +211,9 @@ struct proc {
int p_sig; /* for core dump/debugger XXX */
u_long p_code; /* for core dump/debugger XXX */
struct klist p_klist; /* knotes attached to this process */
+ LIST_HEAD(, mtx) p_heldmtx; /* for debugging code */
+ struct mtx *p_blocked; /* Mutex process is blocked on */
+ LIST_HEAD(, mtx) p_contested; /* contested locks */
/* End area that is zeroed on creation. */
#define p_endzero p_startcopy
@@ -216,8 +223,11 @@ struct proc {
sigset_t p_sigmask; /* Current signal mask. */
stack_t p_sigstk; /* sp & on stack state variable */
+
+ int p_magic; /* Magic number. */
u_char p_priority; /* Process priority. */
u_char p_usrpri; /* User-priority based on p_cpu and p_nice. */
+ u_char p_nativepri; /* Priority before propogation. */
char p_nice; /* Process "nice" value. */
char p_comm[MAXCOMLEN+1];
@@ -244,17 +254,20 @@ struct proc {
struct proc *p_leader;
struct pasleep p_asleep; /* Used by asleep()/await(). */
void *p_emuldata; /* process-specific emulator state data */
+ struct ithd *p_ithd; /* for interrupt threads only */
};
#define p_session p_pgrp->pg_session
#define p_pgid p_pgrp->pg_id
-/* Status values. */
+/* Status values (p_stat) */
#define SIDL 1 /* Process being created by fork. */
#define SRUN 2 /* Currently runnable. */
#define SSLEEP 3 /* Sleeping on an address. */
#define SSTOP 4 /* Process debugging or suspension. */
#define SZOMB 5 /* Awaiting collection by parent. */
+#define SWAIT 6 /* Waiting for interrupt or CPU. */
+#define SMTX 7 /* Blocked on a mutex. */
/* These flags are kept in p_flags. */
#define P_ADVLOCK 0x00001 /* Process may hold a POSIX advisory lock. */
@@ -293,6 +306,8 @@ struct proc {
#define P_OLDMASK 0x2000000 /* need to restore mask before pause */
#define P_ALTSTACK 0x4000000 /* have alternate signal stack */
+#define P_MAGIC 0xbeefface
+
#define P_CAN_SEE 1
#define P_CAN_KILL 2
#define P_CAN_SCHED 3
@@ -315,6 +330,56 @@ struct pcred {
struct uidinfo *p_uidinfo; /* Per uid resource consumption */
};
+/*
+ * Describe an interrupt thread. There is one of these per irq. BSD/OS makes
+ * this a superset of struct proc, i.e. it_proc is the struct itself and not a
+ * pointer. We point in both directions, because it feels good that way.
+ */
+typedef struct ithd {
+ struct proc *it_proc; /* interrupt process */
+
+ LIST_HEAD(ihhead, intrhand) it_ihhead;
+ LIST_HEAD(srchead, isrc) it_isrchead;
+
+ /* Fields used by all interrupt threads */
+ LIST_ENTRY(ithd) it_list; /* All interrupt threads */
+ int it_need; /* Needs service */
+ int irq; /* irq */
+ struct intrec *it_ih; /* head of handler queue */
+ struct ithd *it_interrupted; /* Who we interrupted */
+
+ /* Fields used only for hard interrupt threads */
+ int it_stray; /* Stray interrupts */
+
+#ifdef APIC_IO
+ /* Used by APIC interrupt sources */
+ int it_needeoi; /* An EOI is needed */
+ int it_blocked; /* at least 1 blocked apic src */
+#endif
+
+ /* stats */
+#ifdef SMP_DEBUG
+ int it_busy; /* failed attempts on runlock */
+ int it_lostneeded; /* Number of it_need races lost */
+ int it_invprio; /* Startup priority inversions */
+#endif
+#ifdef NEEDED
+ /*
+ * These are in the BSD/OS i386 sources only, not in SPARC.
+ * I'm not yet sure we need them.
+ */
+ LIST_HEAD(ihhead, intrhand) it_ihhead;
+ LIST_HEAD(srchead, isrc) it_isrchead;
+
+ /* Fields used by all interrupt threads */
+ LIST_ENTRY(ithd) it_list; /* All interrupt threads */
+
+ /* Fields user only for soft interrupt threads */
+ sifunc_t it_service; /* service routine */
+ int it_cnt; /* number of schedule events */
+
+#endif
+} ithd;
#ifdef _KERNEL
@@ -351,13 +416,13 @@ MALLOC_DECLARE(M_PARGS);
* STOPEVENT is MP SAFE.
*/
extern void stopevent(struct proc*, unsigned int, unsigned int);
-#define STOPEVENT(p,e,v) \
- do { \
- if ((p)->p_stops & (e)) { \
- get_mplock(); \
- stopevent(p,e,v); \
- rel_mplock(); \
- } \
+#define STOPEVENT(p,e,v) \
+ do { \
+ if ((p)->p_stops & (e)) { \
+ mtx_enter(&Giant, MTX_DEF); \
+ stopevent(p,e,v); \
+ mtx_exit(&Giant, MTX_DEF); \
+ } \
} while (0)
/* hold process U-area in memory, normally for ptrace/procfs work */
@@ -381,6 +446,8 @@ extern u_long pgrphash;
#ifndef curproc
extern struct proc *curproc; /* Current running proc. */
+extern struct proc *prevproc; /* Previously running proc. */
+extern struct proc *idleproc; /* Current idle proc. */
extern u_int astpending; /* software interrupt pending */
extern int switchticks; /* `ticks' at last context switch. */
extern struct timeval switchtime; /* Uptime at last context switch */
@@ -398,12 +465,10 @@ extern struct proc *initproc, *pageproc, *updateproc; /* Process slots for init,
#define NQS 32 /* 32 run queues. */
TAILQ_HEAD(rq, proc);
-extern struct rq queues[];
+extern struct rq itqueues[];
extern struct rq rtqueues[];
+extern struct rq queues[];
extern struct rq idqueues[];
-extern int whichqs; /* Bit mask summary of non-empty Q's. */
-extern int whichrtqs; /* Bit mask summary of non-empty Q's. */
-extern int whichidqs; /* Bit mask summary of non-empty Q's. */
/*
* XXX macros for scheduler. Shouldn't be here, but currently needed for
@@ -447,7 +512,8 @@ int suser __P((const struct proc *));
int suser_xxx __P((const struct ucred *cred, const struct proc *proc,
int flag));
void remrunqueue __P((struct proc *));
-void cpu_switch __P((struct proc *));
+void cpu_switch __P((void));
+void cpu_throw __P((void)) __dead2;
void unsleep __P((struct proc *));
void cpu_exit __P((struct proc *)) __dead2;
diff --git a/sys/sys/rtprio.h b/sys/sys/rtprio.h
index 5178b0e..578afc5 100644
--- a/sys/sys/rtprio.h
+++ b/sys/sys/rtprio.h
@@ -38,11 +38,12 @@
* Process realtime-priority specifications to rtprio.
*/
-/* priority types */
+/* priority types. Start at 1 to catch uninitialized fields. */
-#define RTP_PRIO_REALTIME 0
-#define RTP_PRIO_NORMAL 1
-#define RTP_PRIO_IDLE 2
+#define RTP_PRIO_ITHREAD 1 /* interrupt thread */
+#define RTP_PRIO_REALTIME 2 /* real time process */
+#define RTP_PRIO_NORMAL 3 /* time sharing process */
+#define RTP_PRIO_IDLE 4 /* idle process */
/* RTP_PRIO_FIFO is POSIX.1B SCHED_FIFO.
*/
@@ -64,12 +65,34 @@
#define RTP_SET 1
#ifndef LOCORE
+/*
+ * Scheduling class information. This is strictly speaking not only
+ * for real-time processes. We should replace it with two variables:
+ * class and priority. At the moment we use prio here for real-time
+ * and interrupt processes, and for others we use proc.p_pri. FIXME.
+ */
struct rtprio {
- u_short type;
+ u_short type; /* scheduling class */
u_short prio;
};
#endif
+/*
+ * Interrupt thread priorities, after BSD/OS.
+ */
+#define PI_REALTIME 1 /* very high priority (clock) */
+#define PI_AV 2 /* Audio/video devices */
+#define PI_TTYHIGH 3 /* High priority tty's (small FIFOs) */
+#define PI_TAPE 4 /* Tape devices (high for streaming) */
+#define PI_NET 5 /* Network interfaces */
+#define PI_DISK 6 /* Disks and SCSI */
+#define PI_TTYLOW 7 /* Ttys with big buffers */
+#define PI_DISKLOW 8 /* Disks that do programmed I/O */
+#define PI_DULL 9 /* We don't know or care */
+
+/* Soft interrupt threads */
+#define PI_SOFT 15 /* All soft interrupts */
+
#ifndef _KERNEL
#include <sys/cdefs.h>
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 85ad07b..0d1757f 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -189,6 +189,10 @@ __sigseteq(sigset_t *set1, sigset_t *set2)
#ifdef _KERNEL
+#include <sys/ktr.h>
+#include <sys/systm.h>
+#include <machine/mutex.h>
+
struct pgrp;
struct proc;
struct sigio;
@@ -240,9 +244,9 @@ static __inline int __cursig(struct proc *p)
(!(p->p_flag & P_TRACED) && SIGISEMPTY(tmpset))) {
return(0);
}
- get_mplock();
+ mtx_enter(&Giant, MTX_DEF);
r = issignal(p);
- rel_mplock();
+ mtx_exit(&Giant, MTX_DEF);
return(r);
}
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index 69b716b..20d4fa3 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -15,6 +15,9 @@
#ifdef _KERNEL
+#ifdef I386_CPU
+#error SMP not supported with I386_CPU
+#endif
#if defined(SMP) && !defined(APIC_IO)
# error APIC_IO required for SMP, add "options APIC_IO" to your config file.
#endif /* SMP && !APIC_IO */
@@ -57,23 +60,6 @@ extern int bootMP_size;
/* functions in mpboot.s */
void bootMP __P((void));
-/* global data in mplock.s */
-extern u_int mp_lock;
-extern u_int isr_lock;
-#ifdef RECURSIVE_MPINTRLOCK
-extern u_int mpintr_lock;
-#endif /* RECURSIVE_MPINTRLOCK */
-
-/* functions in mplock.s */
-void get_mplock __P((void));
-void rel_mplock __P((void));
-int try_mplock __P((void));
-#ifdef RECURSIVE_MPINTRLOCK
-void get_mpintrlock __P((void));
-void rel_mpintrlock __P((void));
-int try_mpintrlock __P((void));
-#endif /* RECURSIVE_MPINTRLOCK */
-
/* global data in apic_vector.s */
extern volatile u_int stopped_cpus;
extern volatile u_int started_cpus;
@@ -185,23 +171,7 @@ extern int smp_started;
extern volatile int smp_idle_loops;
#endif /* !LOCORE */
-#else /* !SMP && !APIC_IO */
-
-/*
- * Create dummy MP lock empties
- */
-
-static __inline void
-get_mplock(void)
-{
-}
-
-static __inline void
-rel_mplock(void)
-{
-}
-
-#endif
+#endif /* SMP && !APIC_IO */
#endif /* _KERNEL */
#endif /* _MACHINE_SMP_H_ */
diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
index 515c69f..83b60d6 100644
--- a/sys/sys/unistd.h
+++ b/sys/sys/unistd.h
@@ -202,7 +202,7 @@
/*
* rfork() options.
*
- * XXX currently, operations without RFPROC set are not supported.
+ * XXX currently, some operations without RFPROC set are not supported.
*/
#define RFNAMEG (1<<0) /* UNIMPL new plan9 `name space' */
#define RFENVG (1<<1) /* UNIMPL copy plan9 `env space' */
@@ -210,14 +210,17 @@
#define RFNOTEG (1<<3) /* UNIMPL create new plan9 `note group' */
#define RFPROC (1<<4) /* change child (else changes curproc) */
#define RFMEM (1<<5) /* share `address space' */
-#define RFNOWAIT (1<<6) /* parent need not wait() on child */
+#define RFNOWAIT (1<<6) /* give child to init */
#define RFCNAMEG (1<<10) /* UNIMPL zero plan9 `name space' */
#define RFCENVG (1<<11) /* UNIMPL zero plan9 `env space' */
-#define RFCFDG (1<<12) /* zero fd table */
+#define RFCFDG (1<<12) /* close all fds, zero fd table */
#define RFTHREAD (1<<13) /* enable kernel thread support */
#define RFSIGSHARE (1<<14) /* share signal handlers */
#define RFLINUXTHPN (1<<16) /* do linux clone exit parent notification */
+#define RFSTOPPED (1<<17) /* leave child in a stopped state */
+#define RFHIGHPID (1<<18) /* use a pid higher then 10 (idleproc) */
#define RFPPWAIT (1<<31) /* parent sleeps until child exits (vfork) */
+#define RFKERNELONLY RFSTOPPED
#endif /* !_POSIX_SOURCE */
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index ba74b31..bbe5f2f 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -57,7 +57,6 @@
#include <ufs/ffs/ffs_extern.h>
#define KERNCRED proc0.p_ucred
-#define CURPROC curproc
#define DEBUG
static int indiracct __P((struct vnode *, struct vnode *, int, ufs_daddr_t,
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index cbc37ad..ad30011 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -133,7 +133,6 @@ static struct malloc_type *memtype[] = {
*/
#define TYPENAME(type) \
((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???")
-#define CURPROC curproc
/*
* End system adaptaion definitions.
*/
diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c
index 906a402..30ce635 100644
--- a/sys/ufs/ufs/ufs_vfsops.c
+++ b/sys/ufs/ufs/ufs_vfsops.c
@@ -43,8 +43,8 @@
#include <sys/param.h>
#include <sys/kernel.h>
-#include <sys/mount.h>
#include <sys/proc.h>
+#include <sys/mount.h>
#include <sys/malloc.h>
#include <sys/vnode.h>
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 849a30a..ea39d7f 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -74,9 +74,11 @@
#include <sys/sysctl.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/unistd.h>
#include <machine/limits.h>
+#include <machine/mutex.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -316,8 +318,11 @@ faultin(p)
s = splhigh();
- if (p->p_stat == SRUN)
+ if (p->p_stat == SRUN) {
+ mtx_enter(&sched_lock, MTX_SPIN);
setrunqueue(p);
+ mtx_exit(&sched_lock, MTX_SPIN);
+ }
p->p_flag |= P_INMEM;
@@ -332,6 +337,8 @@ faultin(p)
* This swapin algorithm attempts to swap-in processes only if there
* is enough space for them. Of course, if a process waits for a long
* time, it will be swapped in anyway.
+ *
+ * Giant is still held at this point, to be released in tsleep.
*/
/* ARGSUSED*/
static void
@@ -343,6 +350,8 @@ scheduler(dummy)
struct proc *pp;
int ppri;
+ mtx_assert(&Giant, MA_OWNED);
+
loop:
if (vm_page_count_min()) {
VM_WAIT;
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 66829bb..0f584c8 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -153,6 +153,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
case 0:
continue;
+ case SMTX:
case SSLEEP:
case SSTOP:
if (p->p_flag & P_INMEM) {
@@ -166,6 +167,10 @@ vmtotal(SYSCTL_HANDLER_ARGS)
continue;
break;
+ case SWAIT:
+ totalp->t_sl++;
+ continue;
+
case SRUN:
case SIDL:
if (p->p_flag & P_INMEM)
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 97b221e..d12ecac 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -78,6 +78,7 @@
#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/kthread.h>
+#include <sys/ktr.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/vnode.h>
@@ -95,6 +96,8 @@
#include <vm/swap_pager.h>
#include <vm/vm_extern.h>
+#include <machine/mutex.h>
+
/*
* System initialization
*/
@@ -1280,6 +1283,9 @@ vm_size_t count;
static void
vm_pageout()
{
+
+ mtx_enter(&Giant, MTX_DEF);
+
/*
* Initialize some paging parameters.
*/
@@ -1399,6 +1405,8 @@ vm_daemon()
{
struct proc *p;
+ mtx_enter(&Giant, MTX_DEF);
+
while (TRUE) {
tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0);
if (vm_pageout_req_swapout) {
diff --git a/usr.bin/top/machine.c b/usr.bin/top/machine.c
index 0b52a70..06b3168 100644
--- a/usr.bin/top/machine.c
+++ b/usr.bin/top/machine.c
@@ -130,7 +130,7 @@ static char up_header[] =
char *state_abbrev[] =
{
- "", "START", "RUN\0\0\0", "SLEEP", "STOP", "ZOMB",
+ "", "START", "RUN\0\0\0", "SLEEP", "STOP", "ZOMB", "WAIT", "MUTEX"
};
@@ -162,10 +162,10 @@ static long cp_diff[CPUSTATES];
/* these are for detailing the process states */
-int process_states[6];
+int process_states[8];
char *procstatenames[] = {
"", " starting, ", " running, ", " sleeping, ", " stopped, ",
- " zombie, ",
+ " zombie, ", " waiting, ", " mutex, ",
NULL
};
OpenPOWER on IntegriCloud