summaryrefslogtreecommitdiffstats
path: root/sys/ia64
diff options
context:
space:
mode:
authormarcel <marcel@FreeBSD.org>2009-10-31 22:27:31 +0000
committermarcel <marcel@FreeBSD.org>2009-10-31 22:27:31 +0000
commit943e1b107a9100189f9c88ee1313ce81a7ed1ba5 (patch)
treecbe2460180af94e36765a193aa0c297777924c50 /sys/ia64
parent113d2ed8a6d71af32a5432b6d3892b57eaf4f0cc (diff)
downloadFreeBSD-src-943e1b107a9100189f9c88ee1313ce81a7ed1ba5.zip
FreeBSD-src-943e1b107a9100189f9c88ee1313ce81a7ed1ba5.tar.gz
Reimplement the lazy FP context switching:
o Move all code into a single file for easier maintenance. o Use a single global lock to avoid having to handle either multiple locks or race conditions. o Make sure to disable the high FP registers after saving or dropping them. o use msleep() to wait for the other CPU to save the high FP registers. This change fixes the high FP inconsistency panics. A single global lock typically serializes too much, which may be noticable when a lot of threads use the high FP registers, but in that case it's probably better to switch the high FP context synchronuously. Put differently: cpu_switch() should switch the high FP registers if the incoming and outgoing threads both use the high FP registers.
Diffstat (limited to 'sys/ia64')
-rw-r--r--sys/ia64/ia64/highfp.c181
-rw-r--r--sys/ia64/ia64/interrupt.c9
-rw-r--r--sys/ia64/ia64/machdep.c75
-rw-r--r--sys/ia64/ia64/trap.c62
-rw-r--r--sys/ia64/ia64/vm_machdep.c5
-rw-r--r--sys/ia64/include/md_var.h2
-rw-r--r--sys/ia64/include/proc.h1
7 files changed, 189 insertions, 146 deletions
diff --git a/sys/ia64/ia64/highfp.c b/sys/ia64/ia64/highfp.c
new file mode 100644
index 0000000..145ee48
--- /dev/null
+++ b/sys/ia64/ia64/highfp.c
@@ -0,0 +1,181 @@
+/*-
+ * Copyright (c) 2009 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/smp.h>
+
+static struct mtx ia64_highfp_mtx;
+
+static void
+ia64_highfp_init(void *_)
+{
+ mtx_init(&ia64_highfp_mtx, "High FP lock", NULL, MTX_SPIN);
+}
+SYSINIT(ia64_highfp_init, SI_SUB_LOCK, SI_ORDER_ANY, ia64_highfp_init, NULL);
+
+#ifdef SMP
+static int
+ia64_highfp_ipi(struct pcpu *cpu)
+{
+ int error;
+
+ ipi_send(cpu, IPI_HIGH_FP);
+ error = msleep_spin(&cpu->pc_fpcurthread, &ia64_highfp_mtx,
+ "High FP", 0);
+ return (error);
+}
+#endif
+
+int
+ia64_highfp_drop(struct thread *td)
+{
+ struct pcb *pcb;
+ struct pcpu *cpu;
+
+ pcb = td->td_pcb;
+
+ mtx_lock_spin(&ia64_highfp_mtx);
+ cpu = pcb->pcb_fpcpu;
+ if (cpu != NULL) {
+ KASSERT(cpu->pc_fpcurthread == td,
+ ("cpu->pc_fpcurthread != td"));
+ td->td_frame->tf_special.psr |= IA64_PSR_DFH;
+ pcb->pcb_fpcpu = NULL;
+ cpu->pc_fpcurthread = NULL;
+ }
+ mtx_unlock_spin(&ia64_highfp_mtx);
+
+ return ((cpu != NULL) ? 1 : 0);
+}
+
+int
+ia64_highfp_enable(struct thread *td, struct trapframe *tf)
+{
+ struct pcb *pcb;
+ struct pcpu *cpu;
+ struct thread *td1;
+
+ pcb = td->td_pcb;
+
+ mtx_lock_spin(&ia64_highfp_mtx);
+ KASSERT((tf->tf_special.psr & IA64_PSR_DFH) != 0,
+ ("(tf->tf_special.psr & IA64_PSR_DFH) == 0"));
+ cpu = pcb->pcb_fpcpu;
+#ifdef SMP
+ if (cpu != NULL && cpu != pcpup) {
+ KASSERT(cpu->pc_fpcurthread == td,
+ ("cpu->pc_fpcurthread != td"));
+ ia64_highfp_ipi(cpu);
+ }
+#endif
+ td1 = PCPU_GET(fpcurthread);
+ if (td1 != NULL && td1 != td) {
+ KASSERT(td1->td_pcb->pcb_fpcpu == pcpup,
+ ("td1->td_pcb->pcb_fpcpu != pcpup"));
+ save_high_fp(&td1->td_pcb->pcb_high_fp);
+ td1->td_frame->tf_special.psr |= IA64_PSR_DFH;
+ td1->td_pcb->pcb_fpcpu = NULL;
+ PCPU_SET(fpcurthread, NULL);
+ td1 = NULL;
+ }
+ if (td1 == NULL) {
+ KASSERT(pcb->pcb_fpcpu == NULL, ("pcb->pcb_fpcpu != NULL"));
+ KASSERT(PCPU_GET(fpcurthread) == NULL,
+ ("PCPU_GET(fpcurthread) != NULL"));
+ restore_high_fp(&pcb->pcb_high_fp);
+ PCPU_SET(fpcurthread, td);
+ pcb->pcb_fpcpu = pcpup;
+ tf->tf_special.psr &= ~IA64_PSR_MFH;
+ }
+ tf->tf_special.psr &= ~IA64_PSR_DFH;
+ mtx_unlock_spin(&ia64_highfp_mtx);
+
+ return ((td1 != NULL) ? 1 : 0);
+}
+
+int
+ia64_highfp_save(struct thread *td)
+{
+ struct pcb *pcb;
+ struct pcpu *cpu;
+
+ pcb = td->td_pcb;
+
+ mtx_lock_spin(&ia64_highfp_mtx);
+ cpu = pcb->pcb_fpcpu;
+#ifdef SMP
+ if (cpu != NULL && cpu != pcpup) {
+ KASSERT(cpu->pc_fpcurthread == td,
+ ("cpu->pc_fpcurthread != td"));
+ ia64_highfp_ipi(cpu);
+ } else
+#endif
+ if (cpu != NULL) {
+ KASSERT(cpu->pc_fpcurthread == td,
+ ("cpu->pc_fpcurthread != td"));
+ save_high_fp(&pcb->pcb_high_fp);
+ td->td_frame->tf_special.psr |= IA64_PSR_DFH;
+ pcb->pcb_fpcpu = NULL;
+ cpu->pc_fpcurthread = NULL;
+ }
+ mtx_unlock_spin(&ia64_highfp_mtx);
+
+ return ((cpu != NULL) ? 1 : 0);
+}
+
+#ifdef SMP
+int
+ia64_highfp_save_ipi(void)
+{
+ struct thread *td;
+
+ mtx_lock_spin(&ia64_highfp_mtx);
+ td = PCPU_GET(fpcurthread);
+ if (td != NULL) {
+ KASSERT(td->td_pcb->pcb_fpcpu == pcpup,
+ ("td->td_pcb->pcb_fpcpu != pcpup"));
+ save_high_fp(&td->td_pcb->pcb_high_fp);
+ td->td_frame->tf_special.psr |= IA64_PSR_DFH;
+ td->td_pcb->pcb_fpcpu = NULL;
+ PCPU_SET(fpcurthread, NULL);
+ }
+ mtx_unlock_spin(&ia64_highfp_mtx);
+ wakeup(&PCPU_GET(fpcurthread));
+
+ return ((td != NULL) ? 1 : 0);
+}
+#endif
diff --git a/sys/ia64/ia64/interrupt.c b/sys/ia64/ia64/interrupt.c
index b70a807..a2b1ec5 100644
--- a/sys/ia64/ia64/interrupt.c
+++ b/sys/ia64/ia64/interrupt.c
@@ -216,14 +216,7 @@ interrupt(struct trapframe *tf)
asts[PCPU_GET(cpuid)]++;
CTR1(KTR_SMP, "IPI_AST, cpuid=%d", PCPU_GET(cpuid));
} else if (vector == ipi_vector[IPI_HIGH_FP]) {
- struct thread *thr = PCPU_GET(fpcurthread);
- if (thr != NULL) {
- mtx_lock_spin(&thr->td_md.md_highfp_mtx);
- save_high_fp(&thr->td_pcb->pcb_high_fp);
- thr->td_pcb->pcb_fpcpu = NULL;
- PCPU_SET(fpcurthread, NULL);
- mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
- }
+ ia64_highfp_save_ipi();
} else if (vector == ipi_vector[IPI_RENDEZVOUS]) {
rdvs[PCPU_GET(cpuid)]++;
CTR1(KTR_SMP, "IPI_RENDEZVOUS, cpuid=%d", PCPU_GET(cpuid));
diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c
index 299c9ec..e578e8b 100644
--- a/sys/ia64/ia64/machdep.c
+++ b/sys/ia64/ia64/machdep.c
@@ -1461,81 +1461,6 @@ set_fpregs(struct thread *td, struct fpreg *fpregs)
return (0);
}
-/*
- * High FP register functions.
- */
-
-int
-ia64_highfp_drop(struct thread *td)
-{
- struct pcb *pcb;
- struct pcpu *cpu;
- struct thread *thr;
-
- mtx_lock_spin(&td->td_md.md_highfp_mtx);
- pcb = td->td_pcb;
- cpu = pcb->pcb_fpcpu;
- if (cpu == NULL) {
- mtx_unlock_spin(&td->td_md.md_highfp_mtx);
- return (0);
- }
- pcb->pcb_fpcpu = NULL;
- thr = cpu->pc_fpcurthread;
- cpu->pc_fpcurthread = NULL;
- mtx_unlock_spin(&td->td_md.md_highfp_mtx);
-
- /* Post-mortem sanity checking. */
- KASSERT(thr == td, ("Inconsistent high FP state"));
- return (1);
-}
-
-int
-ia64_highfp_save(struct thread *td)
-{
- struct pcb *pcb;
- struct pcpu *cpu;
- struct thread *thr;
-
- /* Don't save if the high FP registers weren't modified. */
- if ((td->td_frame->tf_special.psr & IA64_PSR_MFH) == 0)
- return (ia64_highfp_drop(td));
-
- mtx_lock_spin(&td->td_md.md_highfp_mtx);
- pcb = td->td_pcb;
- cpu = pcb->pcb_fpcpu;
- if (cpu == NULL) {
- mtx_unlock_spin(&td->td_md.md_highfp_mtx);
- return (0);
- }
-#ifdef SMP
- if (td == curthread)
- sched_pin();
- if (cpu != pcpup) {
- mtx_unlock_spin(&td->td_md.md_highfp_mtx);
- ipi_send(cpu, IPI_HIGH_FP);
- if (td == curthread)
- sched_unpin();
- while (pcb->pcb_fpcpu == cpu)
- DELAY(100);
- return (1);
- } else {
- save_high_fp(&pcb->pcb_high_fp);
- if (td == curthread)
- sched_unpin();
- }
-#else
- save_high_fp(&pcb->pcb_high_fp);
-#endif
- pcb->pcb_fpcpu = NULL;
- thr = cpu->pc_fpcurthread;
- cpu->pc_fpcurthread = NULL;
- mtx_unlock_spin(&td->td_md.md_highfp_mtx);
-
- /* Post-mortem sanity cxhecking. */
- KASSERT(thr == td, ("Inconsistent high FP state"));
- return (1);
-}
-
void
ia64_sync_icache(vm_offset_t va, vm_offset_t sz)
{
diff --git a/sys/ia64/ia64/trap.c b/sys/ia64/ia64/trap.c
index aa31e6c..2633ad2 100644
--- a/sys/ia64/ia64/trap.c
+++ b/sys/ia64/ia64/trap.c
@@ -652,66 +652,10 @@ trap(int vector, struct trapframe *tf)
break;
case IA64_VEC_DISABLED_FP: {
- struct pcpu *pcpu;
- struct pcb *pcb;
- struct thread *thr;
-
- /* Always fatal in kernel. Should never happen. */
- if (!user)
+ if (user)
+ ia64_highfp_enable(td, tf);
+ else
trap_panic(vector, tf);
-
- sched_pin();
- thr = PCPU_GET(fpcurthread);
- if (thr == td) {
- /*
- * Short-circuit handling the trap when this CPU
- * already holds the high FP registers for this
- * thread. We really shouldn't get the trap in the
- * first place, but since it's only a performance
- * issue and not a correctness issue, we emit a
- * message for now, enable the high FP registers and
- * return.
- */
- printf("XXX: bogusly disabled high FP regs\n");
- tf->tf_special.psr &= ~IA64_PSR_DFH;
- sched_unpin();
- goto out;
- } else if (thr != NULL) {
- mtx_lock_spin(&thr->td_md.md_highfp_mtx);
- pcb = thr->td_pcb;
- save_high_fp(&pcb->pcb_high_fp);
- pcb->pcb_fpcpu = NULL;
- PCPU_SET(fpcurthread, NULL);
- mtx_unlock_spin(&thr->td_md.md_highfp_mtx);
- thr = NULL;
- }
-
- mtx_lock_spin(&td->td_md.md_highfp_mtx);
- pcb = td->td_pcb;
- pcpu = pcb->pcb_fpcpu;
-
-#ifdef SMP
- if (pcpu != NULL) {
- mtx_unlock_spin(&td->td_md.md_highfp_mtx);
- ipi_send(pcpu, IPI_HIGH_FP);
- while (pcb->pcb_fpcpu == pcpu)
- DELAY(100);
- mtx_lock_spin(&td->td_md.md_highfp_mtx);
- pcpu = pcb->pcb_fpcpu;
- thr = PCPU_GET(fpcurthread);
- }
-#endif
-
- if (thr == NULL && pcpu == NULL) {
- restore_high_fp(&pcb->pcb_high_fp);
- PCPU_SET(fpcurthread, td);
- pcb->pcb_fpcpu = pcpup;
- tf->tf_special.psr &= ~IA64_PSR_MFH;
- tf->tf_special.psr &= ~IA64_PSR_DFH;
- }
-
- mtx_unlock_spin(&td->td_md.md_highfp_mtx);
- sched_unpin();
goto out;
}
diff --git a/sys/ia64/ia64/vm_machdep.c b/sys/ia64/ia64/vm_machdep.c
index 4259875..e5088a5 100644
--- a/sys/ia64/ia64/vm_machdep.c
+++ b/sys/ia64/ia64/vm_machdep.c
@@ -120,14 +120,11 @@ cpu_thread_alloc(struct thread *td)
sp -= sizeof(struct trapframe);
td->td_frame = (struct trapframe *)sp;
td->td_frame->tf_length = sizeof(struct trapframe);
- mtx_init(&td->td_md.md_highfp_mtx, "High FP lock", NULL, MTX_SPIN);
}
void
cpu_thread_free(struct thread *td)
{
-
- mtx_destroy(&td->td_md.md_highfp_mtx);
}
void
@@ -148,6 +145,8 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
struct pcb *pcb;
struct trapframe *tf;
+ ia64_highfp_save(td0);
+
tf = td->td_frame;
KASSERT(tf != NULL, ("foo"));
bcopy(td0->td_frame, tf, sizeof(*tf));
diff --git a/sys/ia64/include/md_var.h b/sys/ia64/include/md_var.h
index adc4725..6ee4cb4 100644
--- a/sys/ia64/include/md_var.h
+++ b/sys/ia64/include/md_var.h
@@ -86,7 +86,9 @@ int ia64_emulate(struct trapframe *, struct thread *);
int ia64_flush_dirty(struct thread *, struct _special *);
uint64_t ia64_get_hcdp(void);
int ia64_highfp_drop(struct thread *);
+int ia64_highfp_enable(struct thread *, struct trapframe *);
int ia64_highfp_save(struct thread *);
+int ia64_highfp_save_ipi(void);
struct ia64_init_return ia64_init(void);
void ia64_probe_sapics(void);
void ia64_sync_icache(vm_offset_t, vm_size_t);
diff --git a/sys/ia64/include/proc.h b/sys/ia64/include/proc.h
index 5cbc0bf..6bf9c78 100644
--- a/sys/ia64/include/proc.h
+++ b/sys/ia64/include/proc.h
@@ -30,7 +30,6 @@
#define _MACHINE_PROC_H_
struct mdthread {
- struct mtx md_highfp_mtx;
int md_spinlock_count; /* (k) */
int md_saved_intr; /* (k) */
};
OpenPOWER on IntegriCloud