summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgibbs <gibbs@FreeBSD.org>2013-09-20 05:06:03 +0000
committergibbs <gibbs@FreeBSD.org>2013-09-20 05:06:03 +0000
commita9c07a6f6791cbf5f8350541942b0dab762b0423 (patch)
tree297f8425e6baa2d90b45079c46930ff9d40e9994
parent45812e0f976ce4fd4915dda57316808d8bb5f23e (diff)
downloadFreeBSD-src-a9c07a6f6791cbf5f8350541942b0dab762b0423.zip
FreeBSD-src-a9c07a6f6791cbf5f8350541942b0dab762b0423.tar.gz
Add support for suspend/resume/migration operations when running as a
Xen PVHVM guest. Submitted by: Roger Pau Monné Sponsored by: Citrix Systems R&D Reviewed by: gibbs Approved by: re (blanket Xen) MFC after: 2 weeks sys/amd64/amd64/mp_machdep.c: sys/i386/i386/mp_machdep.c: - Make sure that are no MMU related IPIs pending on migration. - Reset pending IPI_BITMAP on resume. - Init vcpu_info on resume. sys/amd64/include/intr_machdep.h: sys/i386/include/intr_machdep.h: sys/x86/acpica/acpi_wakeup.c: sys/x86/x86/intr_machdep.c: sys/x86/isa/atpic.c: sys/x86/x86/io_apic.c: sys/x86/x86/local_apic.c: - Add a "suspend_cancelled" parameter to pic_resume(). For the Xen PIC, restoration of interrupt services differs between the aborted suspend and normal resume cases, so we must provide this information. sys/dev/acpica/acpi_timer.c: sys/dev/xen/timer/timer.c: sys/timetc.h: - Don't swap out "suspend safe" timers across a suspend/resume cycle. This includes the Xen PV and ACPI timers. sys/dev/xen/control/control.c: - Perform proper suspend/resume process for PVHVM: - Suspend all APs before going into suspension, this allows us to reset the vcpu_info on resume for each AP. - Reset shared info page and callback on resume. sys/dev/xen/timer/timer.c: - Implement suspend/resume support for the PV timer. Since FreeBSD doesn't perform a per-cpu resume of the timer, we need to call smp_rendezvous in order to correctly resume the timer on each CPU. sys/dev/xen/xenpci/xenpci.c: - Don't reset the PCI interrupt on each suspend/resume. sys/kern/subr_smp.c: - When suspending a PVHVM domain make sure there are no MMU IPIs in-flight, or we will get a lockup on resume due to the fact that pending event channels are not carried over on migration. - Implement a generic version of restart_cpus that can be used by suspended and stopped cpus. sys/x86/xen/hvm.c: - Implement resume support for the hypercall page and shared info. - Clear vcpu_info so it can be reset by APs when resuming from suspension. sys/dev/xen/xenpci/xenpci.c: sys/x86/xen/hvm.c: sys/x86/xen/xen_intr.c: - Support UP kernel configurations. sys/x86/xen/xen_intr.c: - Properly rebind per-cpus VIRQs and IPIs on resume.
-rw-r--r--sys/amd64/amd64/mp_machdep.c16
-rw-r--r--sys/amd64/include/intr_machdep.h4
-rw-r--r--sys/dev/acpica/acpi_timer.c9
-rw-r--r--sys/dev/xen/control/control.c77
-rw-r--r--sys/dev/xen/timer/timer.c88
-rw-r--r--sys/dev/xen/xenpci/xenpci.c18
-rw-r--r--sys/i386/i386/mp_machdep.c16
-rw-r--r--sys/i386/include/intr_machdep.h4
-rw-r--r--sys/kern/subr_smp.c55
-rw-r--r--sys/sys/smp.h1
-rw-r--r--sys/sys/timetc.h4
-rw-r--r--sys/x86/acpica/acpi_wakeup.c2
-rw-r--r--sys/x86/isa/atpic.c4
-rw-r--r--sys/x86/x86/intr_machdep.c4
-rw-r--r--sys/x86/x86/io_apic.c4
-rw-r--r--sys/x86/x86/local_apic.c4
-rw-r--r--sys/x86/xen/hvm.c95
-rw-r--r--sys/x86/xen/xen_intr.c135
-rw-r--r--sys/xen/hvm.h2
19 files changed, 420 insertions, 122 deletions
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 0fdb668..1f02211 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -1468,6 +1468,10 @@ cpususpend_handler(void)
cpu = PCPU_GET(cpuid);
+#ifdef XENHVM
+ mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+#endif
+
if (savectx(susppcbs[cpu])) {
ctx_fpusave(susppcbs[cpu]->pcb_fpususpend);
wbinvd();
@@ -1486,11 +1490,23 @@ cpususpend_handler(void)
while (!CPU_ISSET(cpu, &started_cpus))
ia32_pause();
+#ifdef XENHVM
+ /*
+ * Reset pending bitmap IPIs, because Xen doesn't preserve pending
+ * event channels on migration.
+ */
+ cpu_ipi_pending[cpu] = 0;
+ /* register vcpu_info area */
+ xen_hvm_init_cpu();
+#endif
+
/* Resume MCA and local APIC */
mca_resume();
lapic_setup(0);
CPU_CLR_ATOMIC(cpu, &started_cpus);
+ /* Indicate that we are resumed */
+ CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
/*
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
index c8b454d..fb71b5a 100644
--- a/sys/amd64/include/intr_machdep.h
+++ b/sys/amd64/include/intr_machdep.h
@@ -102,7 +102,7 @@ struct pic {
int (*pic_vector)(struct intsrc *);
int (*pic_source_pending)(struct intsrc *);
void (*pic_suspend)(struct pic *);
- void (*pic_resume)(struct pic *);
+ void (*pic_resume)(struct pic *, bool suspend_cancelled);
int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
enum intr_polarity);
int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
@@ -170,7 +170,7 @@ struct intsrc *intr_lookup_source(int vector);
int intr_register_pic(struct pic *pic);
int intr_register_source(struct intsrc *isrc);
int intr_remove_handler(void *cookie);
-void intr_resume(void);
+void intr_resume(bool suspend_cancelled);
void intr_suspend(void);
void intrcnt_add(const char *name, u_long **countp);
void nexus_add_irq(u_long irq);
diff --git a/sys/dev/acpica/acpi_timer.c b/sys/dev/acpica/acpi_timer.c
index 80e6d18..b974f1f 100644
--- a/sys/dev/acpica/acpi_timer.c
+++ b/sys/dev/acpica/acpi_timer.c
@@ -189,6 +189,7 @@ acpi_timer_probe(device_t dev)
else
acpi_timer_timecounter.tc_counter_mask = 0x00ffffff;
acpi_timer_timecounter.tc_frequency = acpi_timer_frequency;
+ acpi_timer_timecounter.tc_flags = TC_FLAGS_SUSPEND_SAFE;
if (testenv("debug.acpi.timer_test"))
acpi_timer_boot_test();
@@ -285,6 +286,14 @@ acpi_timer_suspend_handler(struct timecounter *newtc)
acpi_timer_eh = NULL;
}
+ if ((timecounter->tc_flags & TC_FLAGS_SUSPEND_SAFE) != 0) {
+ /*
+ * If we are using a suspend safe timecounter, don't
+ * save/restore it across suspend/resume.
+ */
+ return;
+ }
+
KASSERT(newtc == &acpi_timer_timecounter,
("acpi_timer_suspend_handler: wrong timecounter"));
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index 649f281..a74042b 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -119,11 +119,9 @@ __FBSDID("$FreeBSD$");
#include <sys/taskqueue.h>
#include <sys/types.h>
#include <sys/vnode.h>
-
-#ifndef XENHVM
#include <sys/sched.h>
#include <sys/smp.h>
-#endif
+#include <sys/eventhandler.h>
#include <geom/geom.h>
@@ -140,6 +138,10 @@ __FBSDID("$FreeBSD$");
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
+#ifdef XENHVM
+#include <xen/hvm.h>
+#endif
+
#include <xen/interface/event_channel.h>
#include <xen/interface/grant_table.h>
@@ -199,7 +201,7 @@ extern void xencons_resume(void);
static void
xctrl_suspend()
{
- int i, j, k, fpp;
+ int i, j, k, fpp, suspend_cancelled;
unsigned long max_pfn, start_info_mfn;
EVENTHANDLER_INVOKE(power_suspend);
@@ -264,7 +266,7 @@ xctrl_suspend()
*/
start_info_mfn = VTOMFN(xen_start_info);
pmap_suspend();
- HYPERVISOR_suspend(start_info_mfn);
+ suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
pmap_resume();
pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
@@ -287,7 +289,7 @@ xctrl_suspend()
HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
gnttab_resume();
- intr_resume();
+ intr_resume(suspend_cancelled != 0);
local_irq_enable();
xencons_resume();
@@ -331,16 +333,31 @@ xen_pv_shutdown_final(void *arg, int howto)
}
#else
-extern void xenpci_resume(void);
/* HVM mode suspension. */
static void
xctrl_suspend()
{
+#ifdef SMP
+ cpuset_t cpu_suspend_map;
+#endif
int suspend_cancelled;
EVENTHANDLER_INVOKE(power_suspend);
+ if (smp_started) {
+ thread_lock(curthread);
+ sched_bind(curthread, 0);
+ thread_unlock(curthread);
+ }
+ KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
+
+ /*
+ * Clear our XenStore node so the toolstack knows we are
+ * responding to the suspend request.
+ */
+ xs_write(XST_NIL, "control", "shutdown", "");
+
/*
* Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
* drivers need this.
@@ -353,31 +370,67 @@ xctrl_suspend()
}
mtx_unlock(&Giant);
+#ifdef SMP
+ if (smp_started) {
+ /*
+ * Suspend other CPUs. This prevents IPIs while we
+ * are resuming, and will allow us to reset per-cpu
+ * vcpu_info on resume.
+ */
+ cpu_suspend_map = all_cpus;
+ CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
+ if (!CPU_EMPTY(&cpu_suspend_map))
+ suspend_cpus(cpu_suspend_map);
+ }
+#endif
+
/*
* Prevent any races with evtchn_interrupt() handler.
*/
disable_intr();
intr_suspend();
+ xen_hvm_suspend();
suspend_cancelled = HYPERVISOR_suspend(0);
- intr_resume();
+ xen_hvm_resume(suspend_cancelled != 0);
+ intr_resume(suspend_cancelled != 0);
+ enable_intr();
/*
- * Re-enable interrupts and put the scheduler back to normal.
+ * Reset grant table info.
*/
- enable_intr();
+ gnttab_resume();
+
+#ifdef SMP
+ if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
+ /*
+ * Now that event channels have been initialized,
+ * resume CPUs.
+ */
+ resume_cpus(cpu_suspend_map);
+ }
+#endif
/*
* FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
* similar.
*/
mtx_lock(&Giant);
- if (!suspend_cancelled)
- DEVICE_RESUME(root_bus);
+ DEVICE_RESUME(root_bus);
mtx_unlock(&Giant);
+ if (smp_started) {
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+ }
+
EVENTHANDLER_INVOKE(power_resume);
+
+ if (bootverbose)
+ printf("System resumed after suspension\n");
+
}
#endif
diff --git a/sys/dev/xen/timer/timer.c b/sys/dev/xen/timer/timer.c
index 9c8db59..605e5c5 100644
--- a/sys/dev/xen/timer/timer.c
+++ b/sys/dev/xen/timer/timer.c
@@ -1,4 +1,4 @@
-/**
+/*-
* Copyright (c) 2009 Adrian Chadd
* Copyright (c) 2012 Spectra Logic Corporation
* All rights reserved.
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <machine/clock.h>
#include <machine/_inttypes.h>
+#include <machine/smp.h>
#include "clock_if.h"
@@ -316,7 +317,7 @@ xentimer_settime(device_t dev __unused, struct timespec *ts)
* Don't return EINVAL here; just silently fail if the domain isn't
* privileged enough to set the TOD.
*/
- return(0);
+ return (0);
}
/**
@@ -339,7 +340,7 @@ xentimer_gettime(device_t dev, struct timespec *ts)
xen_fetch_uptime(&u_ts);
timespecadd(ts, &u_ts);
- return(0);
+ return (0);
}
/**
@@ -457,8 +458,9 @@ xentimer_attach(device_t dev)
/* Bind an event channel to a VIRQ on each VCPU. */
CPU_FOREACH(i) {
- struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
+ struct xentimer_pcpu_data *pcpu;
+ pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
if (error) {
device_printf(dev, "Error disabling Xen periodic timer "
@@ -493,6 +495,7 @@ xentimer_attach(device_t dev)
/* Register the timecounter. */
sc->tc.tc_name = "XENTIMER";
sc->tc.tc_quality = XENTIMER_QUALITY;
+ sc->tc.tc_flags = TC_FLAGS_SUSPEND_SAFE;
/*
* The underlying resolution is in nanoseconds, since the timer info
* scales TSC frequencies using a fraction that represents time in
@@ -523,75 +526,60 @@ xentimer_detach(device_t dev)
return (EBUSY);
}
-/**
- * The following device methods are disabled because they wouldn't work
- * properly.
- */
-#ifdef NOTYET
+static void
+xentimer_percpu_resume(void *arg)
+{
+ device_t dev = (device_t) arg;
+ struct xentimer_softc *sc = device_get_softc(dev);
+
+ xentimer_et_start(&sc->et, sc->et.et_min_period, 0);
+}
+
static int
xentimer_resume(device_t dev)
{
- struct xentimer_softc *sc = device_get_softc(dev);
- int error = 0;
+ int error;
int i;
- device_printf(sc->dev, "%s", __func__);
+ /* Disable the periodic timer */
CPU_FOREACH(i) {
- struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
-
- /* Skip inactive timers. */
- if (pcpu->timer == 0)
- continue;
-
- /*
- * XXX This won't actually work, because Xen requires that
- * singleshot timers be set while running on the given CPU.
- */
- error = xentimer_vcpu_start_timer(i, pcpu->timer);
- if (error == -ETIME) {
- /* Event time has already passed; process. */
- xentimer_intr(sc);
- } else if (error != 0) {
- panic("%s: error %d restarting vcpu %d\n",
- __func__, error, i);
+ error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
+ if (error != 0) {
+ device_printf(dev,
+ "Error disabling Xen periodic timer on CPU %d\n",
+ i);
+ return (error);
}
}
- return (error);
+ /* Reset the last uptime value */
+ xen_timer_last_time = 0;
+
+ /* Reset the RTC clock */
+ inittodr(time_second);
+
+ /* Kick the timers on all CPUs */
+ smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev);
+
+ if (bootverbose)
+ device_printf(dev, "resumed operation after suspension\n");
+
+ return (0);
}
static int
xentimer_suspend(device_t dev)
{
- struct xentimer_softc *sc = device_get_softc(dev);
- int error = 0;
- int i;
-
- device_printf(sc->dev, "%s", __func__);
- CPU_FOREACH(i) {
- struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
-
- /* Skip inactive timers. */
- if (pcpu->timer == 0)
- continue;
- error = xentimer_vcpu_stop_timer(i);
- if (error)
- panic("Error %d stopping VCPU %d timer\n", error, i);
- }
-
- return (error);
+ return (0);
}
-#endif
static device_method_t xentimer_methods[] = {
DEVMETHOD(device_identify, xentimer_identify),
DEVMETHOD(device_probe, xentimer_probe),
DEVMETHOD(device_attach, xentimer_attach),
DEVMETHOD(device_detach, xentimer_detach),
-#ifdef NOTYET
DEVMETHOD(device_suspend, xentimer_suspend),
DEVMETHOD(device_resume, xentimer_resume),
-#endif
/* clock interface */
DEVMETHOD(clock_gettime, xentimer_gettime),
DEVMETHOD(clock_settime, xentimer_settime),
diff --git a/sys/dev/xen/xenpci/xenpci.c b/sys/dev/xen/xenpci/xenpci.c
index 0b1762d..dd2ad92 100644
--- a/sys/dev/xen/xenpci/xenpci.c
+++ b/sys/dev/xen/xenpci/xenpci.c
@@ -77,6 +77,7 @@ xenpci_irq_init(device_t device, struct xenpci_softc *scp)
if (error)
return error;
+#ifdef SMP
/*
* When using the PCI event delivery callback we cannot assign
* events to specific vCPUs, so all events are delivered to vCPU#0 by
@@ -88,6 +89,7 @@ xenpci_irq_init(device_t device, struct xenpci_softc *scp)
scp->res_irq, 0);
if (error)
return error;
+#endif
xen_hvm_set_callback(device);
return (0);
@@ -309,28 +311,12 @@ xenpci_detach(device_t dev)
static int
xenpci_suspend(device_t dev)
{
- struct xenpci_softc *scp = device_get_softc(dev);
- device_t parent = device_get_parent(dev);
-
- if (scp->intr_cookie != NULL) {
- if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq,
- scp->intr_cookie) != 0)
- printf("intr teardown failed.. continuing\n");
- scp->intr_cookie = NULL;
- }
-
return (bus_generic_suspend(dev));
}
static int
xenpci_resume(device_t dev)
{
- struct xenpci_softc *scp = device_get_softc(dev);
- device_t parent = device_get_parent(dev);
-
- BUS_SETUP_INTR(parent, dev, scp->res_irq,
- INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL,
- /*trap_frame*/NULL, &scp->intr_cookie);
xen_hvm_set_callback(dev);
return (bus_generic_resume(dev));
}
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index 60b36a2..1d6d58a 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -1529,6 +1529,10 @@ cpususpend_handler(void)
cpu = PCPU_GET(cpuid);
+#ifdef XENHVM
+ mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
+#endif
+
if (savectx(susppcbs[cpu])) {
wbinvd();
CPU_SET_ATOMIC(cpu, &suspended_cpus);
@@ -1545,10 +1549,22 @@ cpususpend_handler(void)
while (!CPU_ISSET(cpu, &started_cpus))
ia32_pause();
+#ifdef XENHVM
+ /*
+ * Reset pending bitmap IPIs, because Xen doesn't preserve pending
+ * event channels on migration.
+ */
+ cpu_ipi_pending[cpu] = 0;
+ /* register vcpu_info area */
+ xen_hvm_init_cpu();
+#endif
+
/* Resume MCA and local APIC */
mca_resume();
lapic_setup(0);
+ /* Indicate that we are resumed */
+ CPU_CLR_ATOMIC(cpu, &suspended_cpus);
CPU_CLR_ATOMIC(cpu, &started_cpus);
}
/*
diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h
index 6bbe378..8fb61a5 100644
--- a/sys/i386/include/intr_machdep.h
+++ b/sys/i386/include/intr_machdep.h
@@ -108,7 +108,7 @@ struct pic {
int (*pic_vector)(struct intsrc *);
int (*pic_source_pending)(struct intsrc *);
void (*pic_suspend)(struct pic *);
- void (*pic_resume)(struct pic *);
+ void (*pic_resume)(struct pic *, bool suspend_cancelled);
int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
enum intr_polarity);
int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
@@ -166,7 +166,7 @@ struct intsrc *intr_lookup_source(int vector);
int intr_register_pic(struct pic *pic);
int intr_register_source(struct intsrc *isrc);
int intr_remove_handler(void *cookie);
-void intr_resume(void);
+void intr_resume(bool suspend_cancelled);
void intr_suspend(void);
void intrcnt_add(const char *name, u_long **countp);
void nexus_add_irq(u_long irq);
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 3614798..77d1b2b 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -225,6 +225,18 @@ generic_stop_cpus(cpuset_t map, u_int type)
CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
cpusetobj_strprint(cpusetbuf, &map), type);
+#ifdef XENHVM
+ /*
+ * When migrating a PVHVM domain we need to make sure there are
+ * no IPIs in progress. IPIs that have been issued, but not
+ * yet delivered (not pending on a vCPU) will be lost in the
+ * IPI rebinding process, violating FreeBSD's assumption of
+ * reliable IPI delivery.
+ */
+ if (type == IPI_SUSPEND)
+ mtx_lock_spin(&smp_ipi_mtx);
+#endif
+
if (stopping_cpu != PCPU_GET(cpuid))
while (atomic_cmpset_int(&stopping_cpu, NOCPU,
PCPU_GET(cpuid)) == 0)
@@ -252,6 +264,11 @@ generic_stop_cpus(cpuset_t map, u_int type)
}
}
+#ifdef XENHVM
+ if (type == IPI_SUSPEND)
+ mtx_unlock_spin(&smp_ipi_mtx);
+#endif
+
stopping_cpu = NOCPU;
return (1);
}
@@ -292,28 +309,60 @@ suspend_cpus(cpuset_t map)
* 0: NA
* 1: ok
*/
-int
-restart_cpus(cpuset_t map)
+static int
+generic_restart_cpus(cpuset_t map, u_int type)
{
#ifdef KTR
char cpusetbuf[CPUSETBUFSIZ];
#endif
+ volatile cpuset_t *cpus;
+
+ KASSERT(
+#if defined(__amd64__) || defined(__i386__)
+ type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
+#else
+ type == IPI_STOP || type == IPI_STOP_HARD,
+#endif
+ ("%s: invalid stop type", __func__));
if (!smp_started)
return 0;
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
+#if defined(__amd64__) || defined(__i386__)
+ if (type == IPI_SUSPEND)
+ cpus = &suspended_cpus;
+ else
+#endif
+ cpus = &stopped_cpus;
+
/* signal other cpus to restart */
CPU_COPY_STORE_REL(&map, &started_cpus);
/* wait for each to clear its bit */
- while (CPU_OVERLAP(&stopped_cpus, &map))
+ while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
return 1;
}
+int
+restart_cpus(cpuset_t map)
+{
+
+ return (generic_restart_cpus(map, IPI_STOP));
+}
+
+#if defined(__amd64__) || defined(__i386__)
+int
+resume_cpus(cpuset_t map)
+{
+
+ return (generic_restart_cpus(map, IPI_SUSPEND));
+}
+#endif
+
/*
* All-CPU rendezvous. CPUs are signalled, all execute the setup function
* (if specified), rendezvous, execute the action function (if specified),
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index 4abd2ce..fed12bf 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -166,6 +166,7 @@ int stop_cpus(cpuset_t);
int stop_cpus_hard(cpuset_t);
#if defined(__amd64__) || defined(__i386__)
int suspend_cpus(cpuset_t);
+int resume_cpus(cpuset_t);
#endif
void smp_rendezvous_action(void);
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
index 4f75c3d..48b962f 100644
--- a/sys/sys/timetc.h
+++ b/sys/sys/timetc.h
@@ -59,6 +59,10 @@ struct timecounter {
*/
u_int tc_flags;
#define TC_FLAGS_C3STOP 1 /* Timer dies in C3. */
+#define TC_FLAGS_SUSPEND_SAFE 2 /*
+ * Timer functional across
+ * suspend/resume.
+ */
void *tc_priv;
/* Pointer to the timecounter's private parts. */
diff --git a/sys/x86/acpica/acpi_wakeup.c b/sys/x86/acpica/acpi_wakeup.c
index 0c062bf..5008544 100644
--- a/sys/x86/acpica/acpi_wakeup.c
+++ b/sys/x86/acpica/acpi_wakeup.c
@@ -266,7 +266,7 @@ acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result,
restart_cpus(suspcpus);
#endif
mca_resume();
- intr_resume();
+ intr_resume(/*suspend_cancelled*/false);
AcpiSetFirmwareWakingVector(0);
} else {
diff --git a/sys/x86/isa/atpic.c b/sys/x86/isa/atpic.c
index ddf3fce..7f6cb14 100644
--- a/sys/x86/isa/atpic.c
+++ b/sys/x86/isa/atpic.c
@@ -123,7 +123,7 @@ static void atpic_eoi_slave(struct intsrc *isrc);
static void atpic_enable_intr(struct intsrc *isrc);
static void atpic_disable_intr(struct intsrc *isrc);
static int atpic_vector(struct intsrc *isrc);
-static void atpic_resume(struct pic *pic);
+static void atpic_resume(struct pic *pic, bool suspend_cancelled);
static int atpic_source_pending(struct intsrc *isrc);
static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
enum intr_polarity pol);
@@ -276,7 +276,7 @@ atpic_source_pending(struct intsrc *isrc)
}
static void
-atpic_resume(struct pic *pic)
+atpic_resume(struct pic *pic, bool suspend_cancelled)
{
struct atpic *ap = (struct atpic *)pic;
diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c
index e21635f..8646637 100644
--- a/sys/x86/x86/intr_machdep.c
+++ b/sys/x86/x86/intr_machdep.c
@@ -279,7 +279,7 @@ intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
}
void
-intr_resume(void)
+intr_resume(bool suspend_cancelled)
{
struct pic *pic;
@@ -289,7 +289,7 @@ intr_resume(void)
mtx_lock(&intr_table_lock);
TAILQ_FOREACH(pic, &pics, pics) {
if (pic->pic_resume != NULL)
- pic->pic_resume(pic);
+ pic->pic_resume(pic, suspend_cancelled);
}
mtx_unlock(&intr_table_lock);
}
diff --git a/sys/x86/x86/io_apic.c b/sys/x86/x86/io_apic.c
index 4df27c2..4467f8f 100644
--- a/sys/x86/x86/io_apic.c
+++ b/sys/x86/x86/io_apic.c
@@ -119,7 +119,7 @@ static int ioapic_vector(struct intsrc *isrc);
static int ioapic_source_pending(struct intsrc *isrc);
static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
enum intr_polarity pol);
-static void ioapic_resume(struct pic *pic);
+static void ioapic_resume(struct pic *pic, bool suspend_cancelled);
static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
static void ioapic_program_intpin(struct ioapic_intsrc *intpin);
@@ -486,7 +486,7 @@ ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
}
static void
-ioapic_resume(struct pic *pic)
+ioapic_resume(struct pic *pic, bool suspend_cancelled)
{
struct ioapic *io = (struct ioapic *)pic;
int i;
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index ac651cd..8c8eef6 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -161,7 +161,7 @@ static u_long lapic_timer_divisor;
static struct eventtimer lapic_et;
static void lapic_enable(void);
-static void lapic_resume(struct pic *pic);
+static void lapic_resume(struct pic *pic, bool suspend_cancelled);
static void lapic_timer_oneshot(struct lapic *,
u_int count, int enable_int);
static void lapic_timer_periodic(struct lapic *,
@@ -566,7 +566,7 @@ lapic_enable(void)
/* Reset the local APIC on the BSP during resume. */
static void
-lapic_resume(struct pic *pic)
+lapic_resume(struct pic *pic, bool suspend_cancelled)
{
lapic_setup(0);
diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c
index 0404fe9..b0c2df6 100644
--- a/sys/x86/xen/hvm.c
+++ b/sys/x86/xen/hvm.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <xen/interface/vcpu.h>
/*--------------------------- Forward Declarations ---------------------------*/
+#ifdef SMP
static driver_filter_t xen_smp_rendezvous_action;
static driver_filter_t xen_invltlb;
static driver_filter_t xen_invlpg;
@@ -70,6 +71,7 @@ static driver_filter_t xen_ipi_bitmap_handler;
static driver_filter_t xen_cpustop_handler;
static driver_filter_t xen_cpususpend_handler;
static driver_filter_t xen_cpustophard_handler;
+#endif
/*---------------------------- Extern Declarations ---------------------------*/
/* Variables used by mp_machdep to perform the MMU related IPIs */
@@ -93,6 +95,12 @@ extern void pmap_lazyfix_action(void);
#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
/*-------------------------------- Local Types -------------------------------*/
+enum xen_hvm_init_type {
+ XEN_HVM_INIT_COLD,
+ XEN_HVM_INIT_CANCELLED_SUSPEND,
+ XEN_HVM_INIT_RESUME
+};
+
struct xen_ipi_handler
{
driver_filter_t *filter;
@@ -104,6 +112,7 @@ enum xen_domain_type xen_domain_type = XEN_NATIVE;
static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
+#ifdef SMP
static struct xen_ipi_handler xen_ipis[] =
{
[IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" },
@@ -119,6 +128,7 @@ static struct xen_ipi_handler xen_ipis[] =
[IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" },
[IPI_TO_IDX(IPI_STOP_HARD)] = { xen_cpustophard_handler, "sth" },
};
+#endif
/**
* If non-zero, the hypervisor has been configured to use a direct
@@ -129,13 +139,16 @@ int xen_vector_callback_enabled;
/*------------------------------- Per-CPU Data -------------------------------*/
DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
+#ifdef SMP
DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
+#endif
/*------------------ Hypervisor Access Shared Memory Regions -----------------*/
/** Hypercall table accessed via HYPERVISOR_*_op() methods. */
char *hypercall_stubs;
shared_info_t *HYPERVISOR_shared_info;
+#ifdef SMP
/*---------------------------- XEN PV IPI Handlers ---------------------------*/
/*
* This are C clones of the ASM functions found in apic_vector.s
@@ -496,6 +509,7 @@ xen_init_ipis(void)
/* Set the xen pv ipi ops to replace the native ones */
cpu_ops.ipi_vectored = xen_ipi_vectored;
}
+#endif
/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
static uint32_t
@@ -579,6 +593,9 @@ xen_hvm_set_callback(device_t dev)
struct xen_hvm_param xhp;
int irq;
+ if (xen_vector_callback_enabled)
+ return;
+
xhp.domid = DOMID_SELF;
xhp.index = HVM_PARAM_CALLBACK_IRQ;
if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
@@ -637,41 +654,83 @@ xen_hvm_disable_emulated_devices(void)
outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS);
}
+static void
+xen_hvm_init(enum xen_hvm_init_type init_type)
+{
+ int error;
+ int i;
+
+ if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
+ return;
+
+ error = xen_hvm_init_hypercall_stubs();
+
+ switch (init_type) {
+ case XEN_HVM_INIT_COLD:
+ if (error != 0)
+ return;
+
+ setup_xen_features();
+ break;
+ case XEN_HVM_INIT_RESUME:
+ if (error != 0)
+ panic("Unable to init Xen hypercall stubs on resume");
+ break;
+ default:
+ panic("Unsupported HVM initialization type");
+ }
+
+ /* Clear any stale vcpu_info. */
+ CPU_FOREACH(i)
+ DPCPU_ID_SET(i, vcpu_info, NULL);
+
+ xen_vector_callback_enabled = 0;
+ xen_domain_type = XEN_HVM_DOMAIN;
+ xen_hvm_init_shared_info_page();
+ xen_hvm_set_callback(NULL);
+ xen_hvm_disable_emulated_devices();
+}
+
void
xen_hvm_suspend(void)
{
}
void
-xen_hvm_resume(void)
+xen_hvm_resume(bool suspend_cancelled)
{
- xen_hvm_init_hypercall_stubs();
- xen_hvm_init_shared_info_page();
+ xen_hvm_init(suspend_cancelled ?
+ XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
+
+ /* Register vcpu_info area for CPU#0. */
+ xen_hvm_init_cpu();
}
static void
-xen_hvm_init(void *dummy __unused)
+xen_hvm_sysinit(void *arg __unused)
{
+ xen_hvm_init(XEN_HVM_INIT_COLD);
+}
- if (xen_hvm_init_hypercall_stubs() != 0)
- return;
-
- xen_domain_type = XEN_HVM_DOMAIN;
- setup_xen_features();
- xen_hvm_init_shared_info_page();
- xen_hvm_set_callback(NULL);
- xen_hvm_disable_emulated_devices();
-}
-
-void xen_hvm_init_cpu(void)
+void
+xen_hvm_init_cpu(void)
{
struct vcpu_register_vcpu_info info;
struct vcpu_info *vcpu_info;
int cpu, rc;
- cpu = PCPU_GET(acpi_id);
+ if (DPCPU_GET(vcpu_info) != NULL) {
+ /*
+ * vcpu_info is already set. We're resuming
+ * from a failed migration and our pre-suspend
+ * configuration is still valid.
+ */
+ return;
+ }
+
vcpu_info = DPCPU_PTR(vcpu_local_info);
+ cpu = PCPU_GET(acpi_id);
info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
@@ -682,6 +741,8 @@ void xen_hvm_init_cpu(void)
DPCPU_SET(vcpu_info, vcpu_info);
}
-SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL);
+SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
+#ifdef SMP
SYSINIT(xen_init_ipis, SI_SUB_SMP, SI_ORDER_FIRST, xen_init_ipis, NULL);
+#endif
SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL);
diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c
index 54a6be6..b94f8d9 100644
--- a/sys/x86/xen/xen_intr.c
+++ b/sys/x86/xen/xen_intr.c
@@ -120,7 +120,7 @@ struct xenisrc {
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
static void xen_intr_suspend(struct pic *);
-static void xen_intr_resume(struct pic *);
+static void xen_intr_resume(struct pic *, bool suspend_cancelled);
static void xen_intr_enable_source(struct intsrc *isrc);
static void xen_intr_disable_source(struct intsrc *isrc, int eoi);
static void xen_intr_eoi_source(struct intsrc *isrc);
@@ -334,7 +334,7 @@ xen_intr_release_isrc(struct xenisrc *isrc)
evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
evtchn_cpu_unmask_port(0, isrc->xi_port);
- if (isrc->xi_close != 0) {
+ if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) {
struct evtchn_close close = { .port = isrc->xi_port };
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
panic("EVTCHNOP_close failed");
@@ -408,6 +408,7 @@ xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port,
return (error);
}
*isrcp = isrc;
+ evtchn_unmask_port(local_port);
return (0);
}
@@ -571,6 +572,9 @@ xen_intr_init(void *dummy __unused)
struct xen_intr_pcpu_data *pcpu;
int i;
+ if (!xen_domain())
+ return (0);
+
mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF);
/*
@@ -602,20 +606,116 @@ xen_intr_suspend(struct pic *unused)
{
}
+static void
+xen_rebind_ipi(struct xenisrc *isrc)
+{
+#ifdef SMP
+ int cpu = isrc->xi_cpu;
+ int acpi_id = pcpu_find(cpu)->pc_acpi_id;
+ int error;
+ struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
+
+ error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+ &bind_ipi);
+ if (error != 0)
+ panic("unable to rebind xen IPI: %d", error);
+
+ isrc->xi_port = bind_ipi.port;
+ isrc->xi_cpu = 0;
+ xen_intr_port_to_isrc[bind_ipi.port] = isrc;
+
+ error = xen_intr_assign_cpu(&isrc->xi_intsrc,
+ cpu_apic_ids[cpu]);
+ if (error)
+ panic("unable to bind xen IPI to CPU#%d: %d",
+ cpu, error);
+
+ evtchn_unmask_port(bind_ipi.port);
+#else
+ panic("Resume IPI event channel on UP");
+#endif
+}
+
+static void
+xen_rebind_virq(struct xenisrc *isrc)
+{
+ int cpu = isrc->xi_cpu;
+ int acpi_id = pcpu_find(cpu)->pc_acpi_id;
+ int error;
+ struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq,
+ .vcpu = acpi_id };
+
+ error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq);
+ if (error != 0)
+ panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error);
+
+ isrc->xi_port = bind_virq.port;
+ isrc->xi_cpu = 0;
+ xen_intr_port_to_isrc[bind_virq.port] = isrc;
+
+#ifdef SMP
+ error = xen_intr_assign_cpu(&isrc->xi_intsrc,
+ cpu_apic_ids[cpu]);
+ if (error)
+ panic("unable to bind xen VIRQ#%d to CPU#%d: %d",
+ isrc->xi_virq, cpu, error);
+#endif
+
+ evtchn_unmask_port(bind_virq.port);
+}
+
/**
* Return this PIC to service after being suspended.
*/
static void
-xen_intr_resume(struct pic *unused)
+xen_intr_resume(struct pic *unused, bool suspend_cancelled)
{
- u_int port;
+ shared_info_t *s = HYPERVISOR_shared_info;
+ struct xenisrc *isrc;
+ u_int isrc_idx;
+ int i;
- /*
- * Mask events for all ports. They will be unmasked after
- * drivers have re-registered their handlers.
- */
- for (port = 0; port < NR_EVENT_CHANNELS; port++)
- evtchn_mask_port(port);
+ if (suspend_cancelled)
+ return;
+
+ /* Reset the per-CPU masks */
+ CPU_FOREACH(i) {
+ struct xen_intr_pcpu_data *pcpu;
+
+ pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
+ memset(pcpu->evtchn_enabled,
+ i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled));
+ }
+
+ /* Mask all event channels. */
+ for (i = 0; i < nitems(s->evtchn_mask); i++)
+ atomic_store_rel_long(&s->evtchn_mask[i], ~0);
+
+ /* Remove port -> isrc mappings */
+ memset(xen_intr_port_to_isrc, 0, sizeof(xen_intr_port_to_isrc));
+
+ /* Free unused isrcs and rebind VIRQs and IPIs */
+ for (isrc_idx = 0; isrc_idx < xen_intr_isrc_count; isrc_idx++) {
+ u_int vector;
+
+ vector = FIRST_EVTCHN_INT + isrc_idx;
+ isrc = (struct xenisrc *)intr_lookup_source(vector);
+ if (isrc != NULL) {
+ isrc->xi_port = 0;
+ switch (isrc->xi_type) {
+ case EVTCHN_TYPE_IPI:
+ xen_rebind_ipi(isrc);
+ break;
+ case EVTCHN_TYPE_VIRQ:
+ xen_rebind_virq(isrc);
+ break;
+ default:
+ isrc->xi_cpu = 0;
+ break;
+ }
+ }
+ }
}
/**
@@ -693,6 +793,7 @@ xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig,
static int
xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
{
+#ifdef SMP
struct evtchn_bind_vcpu bind_vcpu;
struct xenisrc *isrc;
u_int to_cpu, acpi_id;
@@ -749,6 +850,9 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
}
mtx_unlock(&xen_intr_isrc_lock);
return (0);
+#else
+ return (EOPNOTSUPP);
+#endif
}
/*------------------- Virtual Interrupt Source PIC Functions -----------------*/
@@ -979,8 +1083,11 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, dev,
filter, handler, arg, flags, port_handlep);
+
+#ifdef SMP
if (error == 0)
error = intr_event_bind(isrc->xi_intsrc.is_event, cpu);
+#endif
if (error != 0) {
evtchn_close_t close = { .port = bind_virq.port };
@@ -991,6 +1098,7 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
return (error);
}
+#ifdef SMP
if (isrc->xi_cpu != cpu) {
/*
* Too early in the boot process for the generic interrupt
@@ -1000,12 +1108,15 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
*/
xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]);
}
+#endif
/*
* The Event Channel API opened this port, so it is
* responsible for closing it automatically on unbind.
*/
isrc->xi_close = 1;
+ isrc->xi_virq = virq;
+
return (0);
}
@@ -1014,6 +1125,7 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu,
driver_filter_t filter, enum intr_type flags,
xen_intr_handle_t *port_handlep)
{
+#ifdef SMP
int acpi_id = pcpu_find(cpu)->pc_acpi_id;
struct xenisrc *isrc;
struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
@@ -1063,6 +1175,9 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu,
*/
isrc->xi_close = 1;
return (0);
+#else
+ return (EOPNOTSUPP);
+#endif
}
int
diff --git a/sys/xen/hvm.h b/sys/xen/hvm.h
index 562aaf9..c7d40cb 100644
--- a/sys/xen/hvm.h
+++ b/sys/xen/hvm.h
@@ -93,6 +93,6 @@ enum {
void xen_hvm_set_callback(device_t);
void xen_hvm_suspend(void);
-void xen_hvm_resume(void);
+void xen_hvm_resume(bool suspend_cancelled);
void xen_hvm_init_cpu(void);
#endif /* __XEN_HVM_H__ */
OpenPOWER on IntegriCloud