From a9c07a6f6791cbf5f8350541942b0dab762b0423 Mon Sep 17 00:00:00 2001 From: gibbs Date: Fri, 20 Sep 2013 05:06:03 +0000 Subject: Add support for suspend/resume/migration operations when running as a Xen PVHVM guest. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Submitted by: Roger Pau Monné Sponsored by: Citrix Systems R&D Reviewed by: gibbs Approved by: re (blanket Xen) MFC after: 2 weeks sys/amd64/amd64/mp_machdep.c: sys/i386/i386/mp_machdep.c: - Make sure that are no MMU related IPIs pending on migration. - Reset pending IPI_BITMAP on resume. - Init vcpu_info on resume. sys/amd64/include/intr_machdep.h: sys/i386/include/intr_machdep.h: sys/x86/acpica/acpi_wakeup.c: sys/x86/x86/intr_machdep.c: sys/x86/isa/atpic.c: sys/x86/x86/io_apic.c: sys/x86/x86/local_apic.c: - Add a "suspend_cancelled" parameter to pic_resume(). For the Xen PIC, restoration of interrupt services differs between the aborted suspend and normal resume cases, so we must provide this information. sys/dev/acpica/acpi_timer.c: sys/dev/xen/timer/timer.c: sys/timetc.h: - Don't swap out "suspend safe" timers across a suspend/resume cycle. This includes the Xen PV and ACPI timers. sys/dev/xen/control/control.c: - Perform proper suspend/resume process for PVHVM: - Suspend all APs before going into suspension, this allows us to reset the vcpu_info on resume for each AP. - Reset shared info page and callback on resume. sys/dev/xen/timer/timer.c: - Implement suspend/resume support for the PV timer. Since FreeBSD doesn't perform a per-cpu resume of the timer, we need to call smp_rendezvous in order to correctly resume the timer on each CPU. sys/dev/xen/xenpci/xenpci.c: - Don't reset the PCI interrupt on each suspend/resume. sys/kern/subr_smp.c: - When suspending a PVHVM domain make sure there are no MMU IPIs in-flight, or we will get a lockup on resume due to the fact that pending event channels are not carried over on migration. - Implement a generic version of restart_cpus that can be used by suspended and stopped cpus. sys/x86/xen/hvm.c: - Implement resume support for the hypercall page and shared info. - Clear vcpu_info so it can be reset by APs when resuming from suspension. sys/dev/xen/xenpci/xenpci.c: sys/x86/xen/hvm.c: sys/x86/xen/xen_intr.c: - Support UP kernel configurations. sys/x86/xen/xen_intr.c: - Properly rebind per-cpus VIRQs and IPIs on resume. --- sys/amd64/amd64/mp_machdep.c | 16 +++++ sys/amd64/include/intr_machdep.h | 4 +- sys/dev/acpica/acpi_timer.c | 9 +++ sys/dev/xen/control/control.c | 77 ++++++++++++++++++---- sys/dev/xen/timer/timer.c | 88 +++++++++++-------------- sys/dev/xen/xenpci/xenpci.c | 18 +----- sys/i386/i386/mp_machdep.c | 16 +++++ sys/i386/include/intr_machdep.h | 4 +- sys/kern/subr_smp.c | 55 +++++++++++++++- sys/sys/smp.h | 1 + sys/sys/timetc.h | 4 ++ sys/x86/acpica/acpi_wakeup.c | 2 +- sys/x86/isa/atpic.c | 4 +- sys/x86/x86/intr_machdep.c | 4 +- sys/x86/x86/io_apic.c | 4 +- sys/x86/x86/local_apic.c | 4 +- sys/x86/xen/hvm.c | 95 ++++++++++++++++++++++----- sys/x86/xen/xen_intr.c | 135 ++++++++++++++++++++++++++++++++++++--- sys/xen/hvm.h | 2 +- 19 files changed, 420 insertions(+), 122 deletions(-) (limited to 'sys') diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 0fdb668..1f02211 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -1468,6 +1468,10 @@ cpususpend_handler(void) cpu = PCPU_GET(cpuid); +#ifdef XENHVM + mtx_assert(&smp_ipi_mtx, MA_NOTOWNED); +#endif + if (savectx(susppcbs[cpu])) { ctx_fpusave(susppcbs[cpu]->pcb_fpususpend); wbinvd(); @@ -1486,11 +1490,23 @@ cpususpend_handler(void) while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); +#ifdef XENHVM + /* + * Reset pending bitmap IPIs, because Xen doesn't preserve pending + * event channels on migration. + */ + cpu_ipi_pending[cpu] = 0; + /* register vcpu_info area */ + xen_hvm_init_cpu(); +#endif + /* Resume MCA and local APIC */ mca_resume(); lapic_setup(0); CPU_CLR_ATOMIC(cpu, &started_cpus); + /* Indicate that we are resumed */ + CPU_CLR_ATOMIC(cpu, &suspended_cpus); } /* diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h index c8b454d..fb71b5a 100644 --- a/sys/amd64/include/intr_machdep.h +++ b/sys/amd64/include/intr_machdep.h @@ -102,7 +102,7 @@ struct pic { int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); - void (*pic_resume)(struct pic *); + void (*pic_resume)(struct pic *, bool suspend_cancelled); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); @@ -170,7 +170,7 @@ struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); -void intr_resume(void); +void intr_resume(bool suspend_cancelled); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); diff --git a/sys/dev/acpica/acpi_timer.c b/sys/dev/acpica/acpi_timer.c index 80e6d18..b974f1f 100644 --- a/sys/dev/acpica/acpi_timer.c +++ b/sys/dev/acpica/acpi_timer.c @@ -189,6 +189,7 @@ acpi_timer_probe(device_t dev) else acpi_timer_timecounter.tc_counter_mask = 0x00ffffff; acpi_timer_timecounter.tc_frequency = acpi_timer_frequency; + acpi_timer_timecounter.tc_flags = TC_FLAGS_SUSPEND_SAFE; if (testenv("debug.acpi.timer_test")) acpi_timer_boot_test(); @@ -285,6 +286,14 @@ acpi_timer_suspend_handler(struct timecounter *newtc) acpi_timer_eh = NULL; } + if ((timecounter->tc_flags & TC_FLAGS_SUSPEND_SAFE) != 0) { + /* + * If we are using a suspend safe timecounter, don't + * save/restore it across suspend/resume. + */ + return; + } + KASSERT(newtc == &acpi_timer_timecounter, ("acpi_timer_suspend_handler: wrong timecounter")); diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c index 649f281..a74042b 100644 --- a/sys/dev/xen/control/control.c +++ b/sys/dev/xen/control/control.c @@ -119,11 +119,9 @@ __FBSDID("$FreeBSD$"); #include #include #include - -#ifndef XENHVM #include #include -#endif +#include #include @@ -140,6 +138,10 @@ __FBSDID("$FreeBSD$"); #include #include +#ifdef XENHVM +#include +#endif + #include #include @@ -199,7 +201,7 @@ extern void xencons_resume(void); static void xctrl_suspend() { - int i, j, k, fpp; + int i, j, k, fpp, suspend_cancelled; unsigned long max_pfn, start_info_mfn; EVENTHANDLER_INVOKE(power_suspend); @@ -264,7 +266,7 @@ xctrl_suspend() */ start_info_mfn = VTOMFN(xen_start_info); pmap_suspend(); - HYPERVISOR_suspend(start_info_mfn); + suspend_cancelled = HYPERVISOR_suspend(start_info_mfn); pmap_resume(); pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info); @@ -287,7 +289,7 @@ xctrl_suspend() HYPERVISOR_shared_info->arch.max_pfn = max_pfn; gnttab_resume(); - intr_resume(); + intr_resume(suspend_cancelled != 0); local_irq_enable(); xencons_resume(); @@ -331,16 +333,31 @@ xen_pv_shutdown_final(void *arg, int howto) } #else -extern void xenpci_resume(void); /* HVM mode suspension. */ static void xctrl_suspend() { +#ifdef SMP + cpuset_t cpu_suspend_map; +#endif int suspend_cancelled; EVENTHANDLER_INVOKE(power_suspend); + if (smp_started) { + thread_lock(curthread); + sched_bind(curthread, 0); + thread_unlock(curthread); + } + KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0")); + + /* + * Clear our XenStore node so the toolstack knows we are + * responding to the suspend request. + */ + xs_write(XST_NIL, "control", "shutdown", ""); + /* * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE * drivers need this. @@ -353,31 +370,67 @@ xctrl_suspend() } mtx_unlock(&Giant); +#ifdef SMP + if (smp_started) { + /* + * Suspend other CPUs. This prevents IPIs while we + * are resuming, and will allow us to reset per-cpu + * vcpu_info on resume. + */ + cpu_suspend_map = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map); + if (!CPU_EMPTY(&cpu_suspend_map)) + suspend_cpus(cpu_suspend_map); + } +#endif + /* * Prevent any races with evtchn_interrupt() handler. */ disable_intr(); intr_suspend(); + xen_hvm_suspend(); suspend_cancelled = HYPERVISOR_suspend(0); - intr_resume(); + xen_hvm_resume(suspend_cancelled != 0); + intr_resume(suspend_cancelled != 0); + enable_intr(); /* - * Re-enable interrupts and put the scheduler back to normal. + * Reset grant table info. */ - enable_intr(); + gnttab_resume(); + +#ifdef SMP + if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) { + /* + * Now that event channels have been initialized, + * resume CPUs. + */ + resume_cpus(cpu_suspend_map); + } +#endif /* * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or * similar. */ mtx_lock(&Giant); - if (!suspend_cancelled) - DEVICE_RESUME(root_bus); + DEVICE_RESUME(root_bus); mtx_unlock(&Giant); + if (smp_started) { + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + } + EVENTHANDLER_INVOKE(power_resume); + + if (bootverbose) + printf("System resumed after suspension\n"); + } #endif diff --git a/sys/dev/xen/timer/timer.c b/sys/dev/xen/timer/timer.c index 9c8db59..605e5c5 100644 --- a/sys/dev/xen/timer/timer.c +++ b/sys/dev/xen/timer/timer.c @@ -1,4 +1,4 @@ -/** +/*- * Copyright (c) 2009 Adrian Chadd * Copyright (c) 2012 Spectra Logic Corporation * All rights reserved. @@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include "clock_if.h" @@ -316,7 +317,7 @@ xentimer_settime(device_t dev __unused, struct timespec *ts) * Don't return EINVAL here; just silently fail if the domain isn't * privileged enough to set the TOD. */ - return(0); + return (0); } /** @@ -339,7 +340,7 @@ xentimer_gettime(device_t dev, struct timespec *ts) xen_fetch_uptime(&u_ts); timespecadd(ts, &u_ts); - return(0); + return (0); } /** @@ -457,8 +458,9 @@ xentimer_attach(device_t dev) /* Bind an event channel to a VIRQ on each VCPU. */ CPU_FOREACH(i) { - struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu); + struct xentimer_pcpu_data *pcpu; + pcpu = DPCPU_ID_PTR(i, xentimer_pcpu); error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL); if (error) { device_printf(dev, "Error disabling Xen periodic timer " @@ -493,6 +495,7 @@ xentimer_attach(device_t dev) /* Register the timecounter. */ sc->tc.tc_name = "XENTIMER"; sc->tc.tc_quality = XENTIMER_QUALITY; + sc->tc.tc_flags = TC_FLAGS_SUSPEND_SAFE; /* * The underlying resolution is in nanoseconds, since the timer info * scales TSC frequencies using a fraction that represents time in @@ -523,75 +526,60 @@ xentimer_detach(device_t dev) return (EBUSY); } -/** - * The following device methods are disabled because they wouldn't work - * properly. - */ -#ifdef NOTYET +static void +xentimer_percpu_resume(void *arg) +{ + device_t dev = (device_t) arg; + struct xentimer_softc *sc = device_get_softc(dev); + + xentimer_et_start(&sc->et, sc->et.et_min_period, 0); +} + static int xentimer_resume(device_t dev) { - struct xentimer_softc *sc = device_get_softc(dev); - int error = 0; + int error; int i; - device_printf(sc->dev, "%s", __func__); + /* Disable the periodic timer */ CPU_FOREACH(i) { - struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu); - - /* Skip inactive timers. */ - if (pcpu->timer == 0) - continue; - - /* - * XXX This won't actually work, because Xen requires that - * singleshot timers be set while running on the given CPU. - */ - error = xentimer_vcpu_start_timer(i, pcpu->timer); - if (error == -ETIME) { - /* Event time has already passed; process. */ - xentimer_intr(sc); - } else if (error != 0) { - panic("%s: error %d restarting vcpu %d\n", - __func__, error, i); + error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL); + if (error != 0) { + device_printf(dev, + "Error disabling Xen periodic timer on CPU %d\n", + i); + return (error); } } - return (error); + /* Reset the last uptime value */ + xen_timer_last_time = 0; + + /* Reset the RTC clock */ + inittodr(time_second); + + /* Kick the timers on all CPUs */ + smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev); + + if (bootverbose) + device_printf(dev, "resumed operation after suspension\n"); + + return (0); } static int xentimer_suspend(device_t dev) { - struct xentimer_softc *sc = device_get_softc(dev); - int error = 0; - int i; - - device_printf(sc->dev, "%s", __func__); - CPU_FOREACH(i) { - struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu); - - /* Skip inactive timers. */ - if (pcpu->timer == 0) - continue; - error = xentimer_vcpu_stop_timer(i); - if (error) - panic("Error %d stopping VCPU %d timer\n", error, i); - } - - return (error); + return (0); } -#endif static device_method_t xentimer_methods[] = { DEVMETHOD(device_identify, xentimer_identify), DEVMETHOD(device_probe, xentimer_probe), DEVMETHOD(device_attach, xentimer_attach), DEVMETHOD(device_detach, xentimer_detach), -#ifdef NOTYET DEVMETHOD(device_suspend, xentimer_suspend), DEVMETHOD(device_resume, xentimer_resume), -#endif /* clock interface */ DEVMETHOD(clock_gettime, xentimer_gettime), DEVMETHOD(clock_settime, xentimer_settime), diff --git a/sys/dev/xen/xenpci/xenpci.c b/sys/dev/xen/xenpci/xenpci.c index 0b1762d..dd2ad92 100644 --- a/sys/dev/xen/xenpci/xenpci.c +++ b/sys/dev/xen/xenpci/xenpci.c @@ -77,6 +77,7 @@ xenpci_irq_init(device_t device, struct xenpci_softc *scp) if (error) return error; +#ifdef SMP /* * When using the PCI event delivery callback we cannot assign * events to specific vCPUs, so all events are delivered to vCPU#0 by @@ -88,6 +89,7 @@ xenpci_irq_init(device_t device, struct xenpci_softc *scp) scp->res_irq, 0); if (error) return error; +#endif xen_hvm_set_callback(device); return (0); @@ -309,28 +311,12 @@ xenpci_detach(device_t dev) static int xenpci_suspend(device_t dev) { - struct xenpci_softc *scp = device_get_softc(dev); - device_t parent = device_get_parent(dev); - - if (scp->intr_cookie != NULL) { - if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq, - scp->intr_cookie) != 0) - printf("intr teardown failed.. continuing\n"); - scp->intr_cookie = NULL; - } - return (bus_generic_suspend(dev)); } static int xenpci_resume(device_t dev) { - struct xenpci_softc *scp = device_get_softc(dev); - device_t parent = device_get_parent(dev); - - BUS_SETUP_INTR(parent, dev, scp->res_irq, - INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL, - /*trap_frame*/NULL, &scp->intr_cookie); xen_hvm_set_callback(dev); return (bus_generic_resume(dev)); } diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 60b36a2..1d6d58a 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -1529,6 +1529,10 @@ cpususpend_handler(void) cpu = PCPU_GET(cpuid); +#ifdef XENHVM + mtx_assert(&smp_ipi_mtx, MA_NOTOWNED); +#endif + if (savectx(susppcbs[cpu])) { wbinvd(); CPU_SET_ATOMIC(cpu, &suspended_cpus); @@ -1545,10 +1549,22 @@ cpususpend_handler(void) while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); +#ifdef XENHVM + /* + * Reset pending bitmap IPIs, because Xen doesn't preserve pending + * event channels on migration. + */ + cpu_ipi_pending[cpu] = 0; + /* register vcpu_info area */ + xen_hvm_init_cpu(); +#endif + /* Resume MCA and local APIC */ mca_resume(); lapic_setup(0); + /* Indicate that we are resumed */ + CPU_CLR_ATOMIC(cpu, &suspended_cpus); CPU_CLR_ATOMIC(cpu, &started_cpus); } /* diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h index 6bbe378..8fb61a5 100644 --- a/sys/i386/include/intr_machdep.h +++ b/sys/i386/include/intr_machdep.h @@ -108,7 +108,7 @@ struct pic { int (*pic_vector)(struct intsrc *); int (*pic_source_pending)(struct intsrc *); void (*pic_suspend)(struct pic *); - void (*pic_resume)(struct pic *); + void (*pic_resume)(struct pic *, bool suspend_cancelled); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); @@ -166,7 +166,7 @@ struct intsrc *intr_lookup_source(int vector); int intr_register_pic(struct pic *pic); int intr_register_source(struct intsrc *isrc); int intr_remove_handler(void *cookie); -void intr_resume(void); +void intr_resume(bool suspend_cancelled); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); void nexus_add_irq(u_long irq); diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 3614798..77d1b2b 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -225,6 +225,18 @@ generic_stop_cpus(cpuset_t map, u_int type) CTR2(KTR_SMP, "stop_cpus(%s) with %u type", cpusetobj_strprint(cpusetbuf, &map), type); +#ifdef XENHVM + /* + * When migrating a PVHVM domain we need to make sure there are + * no IPIs in progress. IPIs that have been issued, but not + * yet delivered (not pending on a vCPU) will be lost in the + * IPI rebinding process, violating FreeBSD's assumption of + * reliable IPI delivery. + */ + if (type == IPI_SUSPEND) + mtx_lock_spin(&smp_ipi_mtx); +#endif + if (stopping_cpu != PCPU_GET(cpuid)) while (atomic_cmpset_int(&stopping_cpu, NOCPU, PCPU_GET(cpuid)) == 0) @@ -252,6 +264,11 @@ generic_stop_cpus(cpuset_t map, u_int type) } } +#ifdef XENHVM + if (type == IPI_SUSPEND) + mtx_unlock_spin(&smp_ipi_mtx); +#endif + stopping_cpu = NOCPU; return (1); } @@ -292,28 +309,60 @@ suspend_cpus(cpuset_t map) * 0: NA * 1: ok */ -int -restart_cpus(cpuset_t map) +static int +generic_restart_cpus(cpuset_t map, u_int type) { #ifdef KTR char cpusetbuf[CPUSETBUFSIZ]; #endif + volatile cpuset_t *cpus; + + KASSERT( +#if defined(__amd64__) || defined(__i386__) + type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND, +#else + type == IPI_STOP || type == IPI_STOP_HARD, +#endif + ("%s: invalid stop type", __func__)); if (!smp_started) return 0; CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); +#if defined(__amd64__) || defined(__i386__) + if (type == IPI_SUSPEND) + cpus = &suspended_cpus; + else +#endif + cpus = &stopped_cpus; + /* signal other cpus to restart */ CPU_COPY_STORE_REL(&map, &started_cpus); /* wait for each to clear its bit */ - while (CPU_OVERLAP(&stopped_cpus, &map)) + while (CPU_OVERLAP(cpus, &map)) cpu_spinwait(); return 1; } +int +restart_cpus(cpuset_t map) +{ + + return (generic_restart_cpus(map, IPI_STOP)); +} + +#if defined(__amd64__) || defined(__i386__) +int +resume_cpus(cpuset_t map) +{ + + return (generic_restart_cpus(map, IPI_SUSPEND)); +} +#endif + /* * All-CPU rendezvous. CPUs are signalled, all execute the setup function * (if specified), rendezvous, execute the action function (if specified), diff --git a/sys/sys/smp.h b/sys/sys/smp.h index 4abd2ce..fed12bf 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -166,6 +166,7 @@ int stop_cpus(cpuset_t); int stop_cpus_hard(cpuset_t); #if defined(__amd64__) || defined(__i386__) int suspend_cpus(cpuset_t); +int resume_cpus(cpuset_t); #endif void smp_rendezvous_action(void); diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h index 4f75c3d..48b962f 100644 --- a/sys/sys/timetc.h +++ b/sys/sys/timetc.h @@ -59,6 +59,10 @@ struct timecounter { */ u_int tc_flags; #define TC_FLAGS_C3STOP 1 /* Timer dies in C3. */ +#define TC_FLAGS_SUSPEND_SAFE 2 /* + * Timer functional across + * suspend/resume. + */ void *tc_priv; /* Pointer to the timecounter's private parts. */ diff --git a/sys/x86/acpica/acpi_wakeup.c b/sys/x86/acpica/acpi_wakeup.c index 0c062bf..5008544 100644 --- a/sys/x86/acpica/acpi_wakeup.c +++ b/sys/x86/acpica/acpi_wakeup.c @@ -266,7 +266,7 @@ acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, restart_cpus(suspcpus); #endif mca_resume(); - intr_resume(); + intr_resume(/*suspend_cancelled*/false); AcpiSetFirmwareWakingVector(0); } else { diff --git a/sys/x86/isa/atpic.c b/sys/x86/isa/atpic.c index ddf3fce..7f6cb14 100644 --- a/sys/x86/isa/atpic.c +++ b/sys/x86/isa/atpic.c @@ -123,7 +123,7 @@ static void atpic_eoi_slave(struct intsrc *isrc); static void atpic_enable_intr(struct intsrc *isrc); static void atpic_disable_intr(struct intsrc *isrc); static int atpic_vector(struct intsrc *isrc); -static void atpic_resume(struct pic *pic); +static void atpic_resume(struct pic *pic, bool suspend_cancelled); static int atpic_source_pending(struct intsrc *isrc); static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); @@ -276,7 +276,7 @@ atpic_source_pending(struct intsrc *isrc) } static void -atpic_resume(struct pic *pic) +atpic_resume(struct pic *pic, bool suspend_cancelled) { struct atpic *ap = (struct atpic *)pic; diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c index e21635f..8646637 100644 --- a/sys/x86/x86/intr_machdep.c +++ b/sys/x86/x86/intr_machdep.c @@ -279,7 +279,7 @@ intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) } void -intr_resume(void) +intr_resume(bool suspend_cancelled) { struct pic *pic; @@ -289,7 +289,7 @@ intr_resume(void) mtx_lock(&intr_table_lock); TAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_resume != NULL) - pic->pic_resume(pic); + pic->pic_resume(pic, suspend_cancelled); } mtx_unlock(&intr_table_lock); } diff --git a/sys/x86/x86/io_apic.c b/sys/x86/x86/io_apic.c index 4df27c2..4467f8f 100644 --- a/sys/x86/x86/io_apic.c +++ b/sys/x86/x86/io_apic.c @@ -119,7 +119,7 @@ static int ioapic_vector(struct intsrc *isrc); static int ioapic_source_pending(struct intsrc *isrc); static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); -static void ioapic_resume(struct pic *pic); +static void ioapic_resume(struct pic *pic, bool suspend_cancelled); static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void ioapic_program_intpin(struct ioapic_intsrc *intpin); @@ -486,7 +486,7 @@ ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, } static void -ioapic_resume(struct pic *pic) +ioapic_resume(struct pic *pic, bool suspend_cancelled) { struct ioapic *io = (struct ioapic *)pic; int i; diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index ac651cd..8c8eef6 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -161,7 +161,7 @@ static u_long lapic_timer_divisor; static struct eventtimer lapic_et; static void lapic_enable(void); -static void lapic_resume(struct pic *pic); +static void lapic_resume(struct pic *pic, bool suspend_cancelled); static void lapic_timer_oneshot(struct lapic *, u_int count, int enable_int); static void lapic_timer_periodic(struct lapic *, @@ -566,7 +566,7 @@ lapic_enable(void) /* Reset the local APIC on the BSP during resume. */ static void -lapic_resume(struct pic *pic) +lapic_resume(struct pic *pic, bool suspend_cancelled) { lapic_setup(0); diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c index 0404fe9..b0c2df6 100644 --- a/sys/x86/xen/hvm.c +++ b/sys/x86/xen/hvm.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include /*--------------------------- Forward Declarations ---------------------------*/ +#ifdef SMP static driver_filter_t xen_smp_rendezvous_action; static driver_filter_t xen_invltlb; static driver_filter_t xen_invlpg; @@ -70,6 +71,7 @@ static driver_filter_t xen_ipi_bitmap_handler; static driver_filter_t xen_cpustop_handler; static driver_filter_t xen_cpususpend_handler; static driver_filter_t xen_cpustophard_handler; +#endif /*---------------------------- Extern Declarations ---------------------------*/ /* Variables used by mp_machdep to perform the MMU related IPIs */ @@ -93,6 +95,12 @@ extern void pmap_lazyfix_action(void); #define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) /*-------------------------------- Local Types -------------------------------*/ +enum xen_hvm_init_type { + XEN_HVM_INIT_COLD, + XEN_HVM_INIT_CANCELLED_SUSPEND, + XEN_HVM_INIT_RESUME +}; + struct xen_ipi_handler { driver_filter_t *filter; @@ -104,6 +112,7 @@ enum xen_domain_type xen_domain_type = XEN_NATIVE; static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); +#ifdef SMP static struct xen_ipi_handler xen_ipis[] = { [IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" }, @@ -119,6 +128,7 @@ static struct xen_ipi_handler xen_ipis[] = [IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" }, [IPI_TO_IDX(IPI_STOP_HARD)] = { xen_cpustophard_handler, "sth" }, }; +#endif /** * If non-zero, the hypervisor has been configured to use a direct @@ -129,13 +139,16 @@ int xen_vector_callback_enabled; /*------------------------------- Per-CPU Data -------------------------------*/ DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); DPCPU_DEFINE(struct vcpu_info *, vcpu_info); +#ifdef SMP DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); +#endif /*------------------ Hypervisor Access Shared Memory Regions -----------------*/ /** Hypercall table accessed via HYPERVISOR_*_op() methods. */ char *hypercall_stubs; shared_info_t *HYPERVISOR_shared_info; +#ifdef SMP /*---------------------------- XEN PV IPI Handlers ---------------------------*/ /* * This are C clones of the ASM functions found in apic_vector.s @@ -496,6 +509,7 @@ xen_init_ipis(void) /* Set the xen pv ipi ops to replace the native ones */ cpu_ops.ipi_vectored = xen_ipi_vectored; } +#endif /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ static uint32_t @@ -579,6 +593,9 @@ xen_hvm_set_callback(device_t dev) struct xen_hvm_param xhp; int irq; + if (xen_vector_callback_enabled) + return; + xhp.domid = DOMID_SELF; xhp.index = HVM_PARAM_CALLBACK_IRQ; if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { @@ -637,41 +654,83 @@ xen_hvm_disable_emulated_devices(void) outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS); } +static void +xen_hvm_init(enum xen_hvm_init_type init_type) +{ + int error; + int i; + + if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) + return; + + error = xen_hvm_init_hypercall_stubs(); + + switch (init_type) { + case XEN_HVM_INIT_COLD: + if (error != 0) + return; + + setup_xen_features(); + break; + case XEN_HVM_INIT_RESUME: + if (error != 0) + panic("Unable to init Xen hypercall stubs on resume"); + break; + default: + panic("Unsupported HVM initialization type"); + } + + /* Clear any stale vcpu_info. */ + CPU_FOREACH(i) + DPCPU_ID_SET(i, vcpu_info, NULL); + + xen_vector_callback_enabled = 0; + xen_domain_type = XEN_HVM_DOMAIN; + xen_hvm_init_shared_info_page(); + xen_hvm_set_callback(NULL); + xen_hvm_disable_emulated_devices(); +} + void xen_hvm_suspend(void) { } void -xen_hvm_resume(void) +xen_hvm_resume(bool suspend_cancelled) { - xen_hvm_init_hypercall_stubs(); - xen_hvm_init_shared_info_page(); + xen_hvm_init(suspend_cancelled ? + XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); + + /* Register vcpu_info area for CPU#0. */ + xen_hvm_init_cpu(); } static void -xen_hvm_init(void *dummy __unused) +xen_hvm_sysinit(void *arg __unused) { + xen_hvm_init(XEN_HVM_INIT_COLD); +} - if (xen_hvm_init_hypercall_stubs() != 0) - return; - - xen_domain_type = XEN_HVM_DOMAIN; - setup_xen_features(); - xen_hvm_init_shared_info_page(); - xen_hvm_set_callback(NULL); - xen_hvm_disable_emulated_devices(); -} - -void xen_hvm_init_cpu(void) +void +xen_hvm_init_cpu(void) { struct vcpu_register_vcpu_info info; struct vcpu_info *vcpu_info; int cpu, rc; - cpu = PCPU_GET(acpi_id); + if (DPCPU_GET(vcpu_info) != NULL) { + /* + * vcpu_info is already set. We're resuming + * from a failed migration and our pre-suspend + * configuration is still valid. + */ + return; + } + vcpu_info = DPCPU_PTR(vcpu_local_info); + cpu = PCPU_GET(acpi_id); info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT; info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info)); @@ -682,6 +741,8 @@ void xen_hvm_init_cpu(void) DPCPU_SET(vcpu_info, vcpu_info); } -SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL); +SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); +#ifdef SMP SYSINIT(xen_init_ipis, SI_SUB_SMP, SI_ORDER_FIRST, xen_init_ipis, NULL); +#endif SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL); diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index 54a6be6..b94f8d9 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -120,7 +120,7 @@ struct xenisrc { #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) static void xen_intr_suspend(struct pic *); -static void xen_intr_resume(struct pic *); +static void xen_intr_resume(struct pic *, bool suspend_cancelled); static void xen_intr_enable_source(struct intsrc *isrc); static void xen_intr_disable_source(struct intsrc *isrc, int eoi); static void xen_intr_eoi_source(struct intsrc *isrc); @@ -334,7 +334,7 @@ xen_intr_release_isrc(struct xenisrc *isrc) evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); evtchn_cpu_unmask_port(0, isrc->xi_port); - if (isrc->xi_close != 0) { + if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) { struct evtchn_close close = { .port = isrc->xi_port }; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); @@ -408,6 +408,7 @@ xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port, return (error); } *isrcp = isrc; + evtchn_unmask_port(local_port); return (0); } @@ -571,6 +572,9 @@ xen_intr_init(void *dummy __unused) struct xen_intr_pcpu_data *pcpu; int i; + if (!xen_domain()) + return (0); + mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF); /* @@ -602,20 +606,116 @@ xen_intr_suspend(struct pic *unused) { } +static void +xen_rebind_ipi(struct xenisrc *isrc) +{ +#ifdef SMP + int cpu = isrc->xi_cpu; + int acpi_id = pcpu_find(cpu)->pc_acpi_id; + int error; + struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id }; + + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi); + if (error != 0) + panic("unable to rebind xen IPI: %d", error); + + isrc->xi_port = bind_ipi.port; + isrc->xi_cpu = 0; + xen_intr_port_to_isrc[bind_ipi.port] = isrc; + + error = xen_intr_assign_cpu(&isrc->xi_intsrc, + cpu_apic_ids[cpu]); + if (error) + panic("unable to bind xen IPI to CPU#%d: %d", + cpu, error); + + evtchn_unmask_port(bind_ipi.port); +#else + panic("Resume IPI event channel on UP"); +#endif +} + +static void +xen_rebind_virq(struct xenisrc *isrc) +{ + int cpu = isrc->xi_cpu; + int acpi_id = pcpu_find(cpu)->pc_acpi_id; + int error; + struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq, + .vcpu = acpi_id }; + + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (error != 0) + panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error); + + isrc->xi_port = bind_virq.port; + isrc->xi_cpu = 0; + xen_intr_port_to_isrc[bind_virq.port] = isrc; + +#ifdef SMP + error = xen_intr_assign_cpu(&isrc->xi_intsrc, + cpu_apic_ids[cpu]); + if (error) + panic("unable to bind xen VIRQ#%d to CPU#%d: %d", + isrc->xi_virq, cpu, error); +#endif + + evtchn_unmask_port(bind_virq.port); +} + /** * Return this PIC to service after being suspended. */ static void -xen_intr_resume(struct pic *unused) +xen_intr_resume(struct pic *unused, bool suspend_cancelled) { - u_int port; + shared_info_t *s = HYPERVISOR_shared_info; + struct xenisrc *isrc; + u_int isrc_idx; + int i; - /* - * Mask events for all ports. They will be unmasked after - * drivers have re-registered their handlers. - */ - for (port = 0; port < NR_EVENT_CHANNELS; port++) - evtchn_mask_port(port); + if (suspend_cancelled) + return; + + /* Reset the per-CPU masks */ + CPU_FOREACH(i) { + struct xen_intr_pcpu_data *pcpu; + + pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); + memset(pcpu->evtchn_enabled, + i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled)); + } + + /* Mask all event channels. */ + for (i = 0; i < nitems(s->evtchn_mask); i++) + atomic_store_rel_long(&s->evtchn_mask[i], ~0); + + /* Remove port -> isrc mappings */ + memset(xen_intr_port_to_isrc, 0, sizeof(xen_intr_port_to_isrc)); + + /* Free unused isrcs and rebind VIRQs and IPIs */ + for (isrc_idx = 0; isrc_idx < xen_intr_isrc_count; isrc_idx++) { + u_int vector; + + vector = FIRST_EVTCHN_INT + isrc_idx; + isrc = (struct xenisrc *)intr_lookup_source(vector); + if (isrc != NULL) { + isrc->xi_port = 0; + switch (isrc->xi_type) { + case EVTCHN_TYPE_IPI: + xen_rebind_ipi(isrc); + break; + case EVTCHN_TYPE_VIRQ: + xen_rebind_virq(isrc); + break; + default: + isrc->xi_cpu = 0; + break; + } + } + } } /** @@ -693,6 +793,7 @@ xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, static int xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) { +#ifdef SMP struct evtchn_bind_vcpu bind_vcpu; struct xenisrc *isrc; u_int to_cpu, acpi_id; @@ -749,6 +850,9 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) } mtx_unlock(&xen_intr_isrc_lock); return (0); +#else + return (EOPNOTSUPP); +#endif } /*------------------- Virtual Interrupt Source PIC Functions -----------------*/ @@ -979,8 +1083,11 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, dev, filter, handler, arg, flags, port_handlep); + +#ifdef SMP if (error == 0) error = intr_event_bind(isrc->xi_intsrc.is_event, cpu); +#endif if (error != 0) { evtchn_close_t close = { .port = bind_virq.port }; @@ -991,6 +1098,7 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, return (error); } +#ifdef SMP if (isrc->xi_cpu != cpu) { /* * Too early in the boot process for the generic interrupt @@ -1000,12 +1108,15 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, */ xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); } +#endif /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. */ isrc->xi_close = 1; + isrc->xi_virq = virq; + return (0); } @@ -1014,6 +1125,7 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu, driver_filter_t filter, enum intr_type flags, xen_intr_handle_t *port_handlep) { +#ifdef SMP int acpi_id = pcpu_find(cpu)->pc_acpi_id; struct xenisrc *isrc; struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id }; @@ -1063,6 +1175,9 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu, */ isrc->xi_close = 1; return (0); +#else + return (EOPNOTSUPP); +#endif } int diff --git a/sys/xen/hvm.h b/sys/xen/hvm.h index 562aaf9..c7d40cb 100644 --- a/sys/xen/hvm.h +++ b/sys/xen/hvm.h @@ -93,6 +93,6 @@ enum { void xen_hvm_set_callback(device_t); void xen_hvm_suspend(void); -void xen_hvm_resume(void); +void xen_hvm_resume(bool suspend_cancelled); void xen_hvm_init_cpu(void); #endif /* __XEN_HVM_H__ */ -- cgit v1.1