diff options
author | gibbs <gibbs@FreeBSD.org> | 2013-09-20 05:06:03 +0000 |
---|---|---|
committer | gibbs <gibbs@FreeBSD.org> | 2013-09-20 05:06:03 +0000 |
commit | a9c07a6f6791cbf5f8350541942b0dab762b0423 (patch) | |
tree | 297f8425e6baa2d90b45079c46930ff9d40e9994 /sys/x86 | |
parent | 45812e0f976ce4fd4915dda57316808d8bb5f23e (diff) | |
download | FreeBSD-src-a9c07a6f6791cbf5f8350541942b0dab762b0423.zip FreeBSD-src-a9c07a6f6791cbf5f8350541942b0dab762b0423.tar.gz |
Add support for suspend/resume/migration operations when running as a
Xen PVHVM guest.
Submitted by: Roger Pau Monné
Sponsored by: Citrix Systems R&D
Reviewed by: gibbs
Approved by: re (blanket Xen)
MFC after: 2 weeks
sys/amd64/amd64/mp_machdep.c:
sys/i386/i386/mp_machdep.c:
- Make sure that are no MMU related IPIs pending on migration.
- Reset pending IPI_BITMAP on resume.
- Init vcpu_info on resume.
sys/amd64/include/intr_machdep.h:
sys/i386/include/intr_machdep.h:
sys/x86/acpica/acpi_wakeup.c:
sys/x86/x86/intr_machdep.c:
sys/x86/isa/atpic.c:
sys/x86/x86/io_apic.c:
sys/x86/x86/local_apic.c:
- Add a "suspend_cancelled" parameter to pic_resume(). For the
Xen PIC, restoration of interrupt services differs between
the aborted suspend and normal resume cases, so we must provide
this information.
sys/dev/acpica/acpi_timer.c:
sys/dev/xen/timer/timer.c:
sys/timetc.h:
- Don't swap out "suspend safe" timers across a suspend/resume
cycle. This includes the Xen PV and ACPI timers.
sys/dev/xen/control/control.c:
- Perform proper suspend/resume process for PVHVM:
- Suspend all APs before going into suspension, this allows us
to reset the vcpu_info on resume for each AP.
- Reset shared info page and callback on resume.
sys/dev/xen/timer/timer.c:
- Implement suspend/resume support for the PV timer. Since FreeBSD
doesn't perform a per-cpu resume of the timer, we need to call
smp_rendezvous in order to correctly resume the timer on each CPU.
sys/dev/xen/xenpci/xenpci.c:
- Don't reset the PCI interrupt on each suspend/resume.
sys/kern/subr_smp.c:
- When suspending a PVHVM domain make sure there are no MMU IPIs
in-flight, or we will get a lockup on resume due to the fact that
pending event channels are not carried over on migration.
- Implement a generic version of restart_cpus that can be used by
suspended and stopped cpus.
sys/x86/xen/hvm.c:
- Implement resume support for the hypercall page and shared info.
- Clear vcpu_info so it can be reset by APs when resuming from
suspension.
sys/dev/xen/xenpci/xenpci.c:
sys/x86/xen/hvm.c:
sys/x86/xen/xen_intr.c:
- Support UP kernel configurations.
sys/x86/xen/xen_intr.c:
- Properly rebind per-cpus VIRQs and IPIs on resume.
Diffstat (limited to 'sys/x86')
-rw-r--r-- | sys/x86/acpica/acpi_wakeup.c | 2 | ||||
-rw-r--r-- | sys/x86/isa/atpic.c | 4 | ||||
-rw-r--r-- | sys/x86/x86/intr_machdep.c | 4 | ||||
-rw-r--r-- | sys/x86/x86/io_apic.c | 4 | ||||
-rw-r--r-- | sys/x86/x86/local_apic.c | 4 | ||||
-rw-r--r-- | sys/x86/xen/hvm.c | 95 | ||||
-rw-r--r-- | sys/x86/xen/xen_intr.c | 135 |
7 files changed, 212 insertions, 36 deletions
diff --git a/sys/x86/acpica/acpi_wakeup.c b/sys/x86/acpica/acpi_wakeup.c index 0c062bf..5008544 100644 --- a/sys/x86/acpica/acpi_wakeup.c +++ b/sys/x86/acpica/acpi_wakeup.c @@ -266,7 +266,7 @@ acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result, restart_cpus(suspcpus); #endif mca_resume(); - intr_resume(); + intr_resume(/*suspend_cancelled*/false); AcpiSetFirmwareWakingVector(0); } else { diff --git a/sys/x86/isa/atpic.c b/sys/x86/isa/atpic.c index ddf3fce..7f6cb14 100644 --- a/sys/x86/isa/atpic.c +++ b/sys/x86/isa/atpic.c @@ -123,7 +123,7 @@ static void atpic_eoi_slave(struct intsrc *isrc); static void atpic_enable_intr(struct intsrc *isrc); static void atpic_disable_intr(struct intsrc *isrc); static int atpic_vector(struct intsrc *isrc); -static void atpic_resume(struct pic *pic); +static void atpic_resume(struct pic *pic, bool suspend_cancelled); static int atpic_source_pending(struct intsrc *isrc); static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); @@ -276,7 +276,7 @@ atpic_source_pending(struct intsrc *isrc) } static void -atpic_resume(struct pic *pic) +atpic_resume(struct pic *pic, bool suspend_cancelled) { struct atpic *ap = (struct atpic *)pic; diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c index e21635f..8646637 100644 --- a/sys/x86/x86/intr_machdep.c +++ b/sys/x86/x86/intr_machdep.c @@ -279,7 +279,7 @@ intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) } void -intr_resume(void) +intr_resume(bool suspend_cancelled) { struct pic *pic; @@ -289,7 +289,7 @@ intr_resume(void) mtx_lock(&intr_table_lock); TAILQ_FOREACH(pic, &pics, pics) { if (pic->pic_resume != NULL) - pic->pic_resume(pic); + pic->pic_resume(pic, suspend_cancelled); } mtx_unlock(&intr_table_lock); } diff --git a/sys/x86/x86/io_apic.c b/sys/x86/x86/io_apic.c index 4df27c2..4467f8f 100644 --- a/sys/x86/x86/io_apic.c +++ b/sys/x86/x86/io_apic.c @@ -119,7 +119,7 @@ static int ioapic_vector(struct intsrc *isrc); static int ioapic_source_pending(struct intsrc *isrc); static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); -static void ioapic_resume(struct pic *pic); +static void ioapic_resume(struct pic *pic, bool suspend_cancelled); static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void ioapic_program_intpin(struct ioapic_intsrc *intpin); @@ -486,7 +486,7 @@ ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, } static void -ioapic_resume(struct pic *pic) +ioapic_resume(struct pic *pic, bool suspend_cancelled) { struct ioapic *io = (struct ioapic *)pic; int i; diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index ac651cd..8c8eef6 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -161,7 +161,7 @@ static u_long lapic_timer_divisor; static struct eventtimer lapic_et; static void lapic_enable(void); -static void lapic_resume(struct pic *pic); +static void lapic_resume(struct pic *pic, bool suspend_cancelled); static void lapic_timer_oneshot(struct lapic *, u_int count, int enable_int); static void lapic_timer_periodic(struct lapic *, @@ -566,7 +566,7 @@ lapic_enable(void) /* Reset the local APIC on the BSP during resume. */ static void -lapic_resume(struct pic *pic) +lapic_resume(struct pic *pic, bool suspend_cancelled) { lapic_setup(0); diff --git a/sys/x86/xen/hvm.c b/sys/x86/xen/hvm.c index 0404fe9..b0c2df6 100644 --- a/sys/x86/xen/hvm.c +++ b/sys/x86/xen/hvm.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include <xen/interface/vcpu.h> /*--------------------------- Forward Declarations ---------------------------*/ +#ifdef SMP static driver_filter_t xen_smp_rendezvous_action; static driver_filter_t xen_invltlb; static driver_filter_t xen_invlpg; @@ -70,6 +71,7 @@ static driver_filter_t xen_ipi_bitmap_handler; static driver_filter_t xen_cpustop_handler; static driver_filter_t xen_cpususpend_handler; static driver_filter_t xen_cpustophard_handler; +#endif /*---------------------------- Extern Declarations ---------------------------*/ /* Variables used by mp_machdep to perform the MMU related IPIs */ @@ -93,6 +95,12 @@ extern void pmap_lazyfix_action(void); #define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) /*-------------------------------- Local Types -------------------------------*/ +enum xen_hvm_init_type { + XEN_HVM_INIT_COLD, + XEN_HVM_INIT_CANCELLED_SUSPEND, + XEN_HVM_INIT_RESUME +}; + struct xen_ipi_handler { driver_filter_t *filter; @@ -104,6 +112,7 @@ enum xen_domain_type xen_domain_type = XEN_NATIVE; static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); +#ifdef SMP static struct xen_ipi_handler xen_ipis[] = { [IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" }, @@ -119,6 +128,7 @@ static struct xen_ipi_handler xen_ipis[] = [IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" }, [IPI_TO_IDX(IPI_STOP_HARD)] = { xen_cpustophard_handler, "sth" }, }; +#endif /** * If non-zero, the hypervisor has been configured to use a direct @@ -129,13 +139,16 @@ int xen_vector_callback_enabled; /*------------------------------- Per-CPU Data -------------------------------*/ DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); DPCPU_DEFINE(struct vcpu_info *, vcpu_info); +#ifdef SMP DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); +#endif /*------------------ Hypervisor Access Shared Memory Regions -----------------*/ /** Hypercall table accessed via HYPERVISOR_*_op() methods. */ char *hypercall_stubs; shared_info_t *HYPERVISOR_shared_info; +#ifdef SMP /*---------------------------- XEN PV IPI Handlers ---------------------------*/ /* * This are C clones of the ASM functions found in apic_vector.s @@ -496,6 +509,7 @@ xen_init_ipis(void) /* Set the xen pv ipi ops to replace the native ones */ cpu_ops.ipi_vectored = xen_ipi_vectored; } +#endif /*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ static uint32_t @@ -579,6 +593,9 @@ xen_hvm_set_callback(device_t dev) struct xen_hvm_param xhp; int irq; + if (xen_vector_callback_enabled) + return; + xhp.domid = DOMID_SELF; xhp.index = HVM_PARAM_CALLBACK_IRQ; if (xen_feature(XENFEAT_hvm_callback_vector) != 0) { @@ -637,41 +654,83 @@ xen_hvm_disable_emulated_devices(void) outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS); } +static void +xen_hvm_init(enum xen_hvm_init_type init_type) +{ + int error; + int i; + + if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND) + return; + + error = xen_hvm_init_hypercall_stubs(); + + switch (init_type) { + case XEN_HVM_INIT_COLD: + if (error != 0) + return; + + setup_xen_features(); + break; + case XEN_HVM_INIT_RESUME: + if (error != 0) + panic("Unable to init Xen hypercall stubs on resume"); + break; + default: + panic("Unsupported HVM initialization type"); + } + + /* Clear any stale vcpu_info. */ + CPU_FOREACH(i) + DPCPU_ID_SET(i, vcpu_info, NULL); + + xen_vector_callback_enabled = 0; + xen_domain_type = XEN_HVM_DOMAIN; + xen_hvm_init_shared_info_page(); + xen_hvm_set_callback(NULL); + xen_hvm_disable_emulated_devices(); +} + void xen_hvm_suspend(void) { } void -xen_hvm_resume(void) +xen_hvm_resume(bool suspend_cancelled) { - xen_hvm_init_hypercall_stubs(); - xen_hvm_init_shared_info_page(); + xen_hvm_init(suspend_cancelled ? + XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME); + + /* Register vcpu_info area for CPU#0. */ + xen_hvm_init_cpu(); } static void -xen_hvm_init(void *dummy __unused) +xen_hvm_sysinit(void *arg __unused) { + xen_hvm_init(XEN_HVM_INIT_COLD); +} - if (xen_hvm_init_hypercall_stubs() != 0) - return; - - xen_domain_type = XEN_HVM_DOMAIN; - setup_xen_features(); - xen_hvm_init_shared_info_page(); - xen_hvm_set_callback(NULL); - xen_hvm_disable_emulated_devices(); -} - -void xen_hvm_init_cpu(void) +void +xen_hvm_init_cpu(void) { struct vcpu_register_vcpu_info info; struct vcpu_info *vcpu_info; int cpu, rc; - cpu = PCPU_GET(acpi_id); + if (DPCPU_GET(vcpu_info) != NULL) { + /* + * vcpu_info is already set. We're resuming + * from a failed migration and our pre-suspend + * configuration is still valid. + */ + return; + } + vcpu_info = DPCPU_PTR(vcpu_local_info); + cpu = PCPU_GET(acpi_id); info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT; info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info)); @@ -682,6 +741,8 @@ void xen_hvm_init_cpu(void) DPCPU_SET(vcpu_info, vcpu_info); } -SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL); +SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL); +#ifdef SMP SYSINIT(xen_init_ipis, SI_SUB_SMP, SI_ORDER_FIRST, xen_init_ipis, NULL); +#endif SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL); diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index 54a6be6..b94f8d9 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -120,7 +120,7 @@ struct xenisrc { #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) static void xen_intr_suspend(struct pic *); -static void xen_intr_resume(struct pic *); +static void xen_intr_resume(struct pic *, bool suspend_cancelled); static void xen_intr_enable_source(struct intsrc *isrc); static void xen_intr_disable_source(struct intsrc *isrc, int eoi); static void xen_intr_eoi_source(struct intsrc *isrc); @@ -334,7 +334,7 @@ xen_intr_release_isrc(struct xenisrc *isrc) evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); evtchn_cpu_unmask_port(0, isrc->xi_port); - if (isrc->xi_close != 0) { + if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) { struct evtchn_close close = { .port = isrc->xi_port }; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) panic("EVTCHNOP_close failed"); @@ -408,6 +408,7 @@ xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port, return (error); } *isrcp = isrc; + evtchn_unmask_port(local_port); return (0); } @@ -571,6 +572,9 @@ xen_intr_init(void *dummy __unused) struct xen_intr_pcpu_data *pcpu; int i; + if (!xen_domain()) + return (0); + mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF); /* @@ -602,20 +606,116 @@ xen_intr_suspend(struct pic *unused) { } +static void +xen_rebind_ipi(struct xenisrc *isrc) +{ +#ifdef SMP + int cpu = isrc->xi_cpu; + int acpi_id = pcpu_find(cpu)->pc_acpi_id; + int error; + struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id }; + + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi); + if (error != 0) + panic("unable to rebind xen IPI: %d", error); + + isrc->xi_port = bind_ipi.port; + isrc->xi_cpu = 0; + xen_intr_port_to_isrc[bind_ipi.port] = isrc; + + error = xen_intr_assign_cpu(&isrc->xi_intsrc, + cpu_apic_ids[cpu]); + if (error) + panic("unable to bind xen IPI to CPU#%d: %d", + cpu, error); + + evtchn_unmask_port(bind_ipi.port); +#else + panic("Resume IPI event channel on UP"); +#endif +} + +static void +xen_rebind_virq(struct xenisrc *isrc) +{ + int cpu = isrc->xi_cpu; + int acpi_id = pcpu_find(cpu)->pc_acpi_id; + int error; + struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq, + .vcpu = acpi_id }; + + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (error != 0) + panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error); + + isrc->xi_port = bind_virq.port; + isrc->xi_cpu = 0; + xen_intr_port_to_isrc[bind_virq.port] = isrc; + +#ifdef SMP + error = xen_intr_assign_cpu(&isrc->xi_intsrc, + cpu_apic_ids[cpu]); + if (error) + panic("unable to bind xen VIRQ#%d to CPU#%d: %d", + isrc->xi_virq, cpu, error); +#endif + + evtchn_unmask_port(bind_virq.port); +} + /** * Return this PIC to service after being suspended. */ static void -xen_intr_resume(struct pic *unused) +xen_intr_resume(struct pic *unused, bool suspend_cancelled) { - u_int port; + shared_info_t *s = HYPERVISOR_shared_info; + struct xenisrc *isrc; + u_int isrc_idx; + int i; - /* - * Mask events for all ports. They will be unmasked after - * drivers have re-registered their handlers. - */ - for (port = 0; port < NR_EVENT_CHANNELS; port++) - evtchn_mask_port(port); + if (suspend_cancelled) + return; + + /* Reset the per-CPU masks */ + CPU_FOREACH(i) { + struct xen_intr_pcpu_data *pcpu; + + pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); + memset(pcpu->evtchn_enabled, + i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled)); + } + + /* Mask all event channels. */ + for (i = 0; i < nitems(s->evtchn_mask); i++) + atomic_store_rel_long(&s->evtchn_mask[i], ~0); + + /* Remove port -> isrc mappings */ + memset(xen_intr_port_to_isrc, 0, sizeof(xen_intr_port_to_isrc)); + + /* Free unused isrcs and rebind VIRQs and IPIs */ + for (isrc_idx = 0; isrc_idx < xen_intr_isrc_count; isrc_idx++) { + u_int vector; + + vector = FIRST_EVTCHN_INT + isrc_idx; + isrc = (struct xenisrc *)intr_lookup_source(vector); + if (isrc != NULL) { + isrc->xi_port = 0; + switch (isrc->xi_type) { + case EVTCHN_TYPE_IPI: + xen_rebind_ipi(isrc); + break; + case EVTCHN_TYPE_VIRQ: + xen_rebind_virq(isrc); + break; + default: + isrc->xi_cpu = 0; + break; + } + } + } } /** @@ -693,6 +793,7 @@ xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, static int xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) { +#ifdef SMP struct evtchn_bind_vcpu bind_vcpu; struct xenisrc *isrc; u_int to_cpu, acpi_id; @@ -749,6 +850,9 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) } mtx_unlock(&xen_intr_isrc_lock); return (0); +#else + return (EOPNOTSUPP); +#endif } /*------------------- Virtual Interrupt Source PIC Functions -----------------*/ @@ -979,8 +1083,11 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, dev, filter, handler, arg, flags, port_handlep); + +#ifdef SMP if (error == 0) error = intr_event_bind(isrc->xi_intsrc.is_event, cpu); +#endif if (error != 0) { evtchn_close_t close = { .port = bind_virq.port }; @@ -991,6 +1098,7 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, return (error); } +#ifdef SMP if (isrc->xi_cpu != cpu) { /* * Too early in the boot process for the generic interrupt @@ -1000,12 +1108,15 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, */ xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); } +#endif /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. */ isrc->xi_close = 1; + isrc->xi_virq = virq; + return (0); } @@ -1014,6 +1125,7 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu, driver_filter_t filter, enum intr_type flags, xen_intr_handle_t *port_handlep) { +#ifdef SMP int acpi_id = pcpu_find(cpu)->pc_acpi_id; struct xenisrc *isrc; struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id }; @@ -1063,6 +1175,9 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu, */ isrc->xi_close = 1; return (0); +#else + return (EOPNOTSUPP); +#endif } int |