From 2a442c9c6453d3d043dfd89f2e03a1deff8a6f06 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 25 Feb 2015 11:42:15 -0800 Subject: x86: Use common outgoing-CPU-notification code This commit removes the open-coded CPU-offline notification with new common code. Among other things, this change avoids calling scheduler code using RCU from an offline CPU that RCU is ignoring. It also allows Xen to notice at online time that the CPU did not go offline correctly. Note that Xen has the surviving CPU carry out some cleanup operations, so if the surviving CPU times out, these cleanup operations might have been carried out while the outgoing CPU was still running. It might therefore be unwise to bring this CPU back online, and this commit avoids doing so. Signed-off-by: Boris Ostrovsky Signed-off-by: Paul E. McKenney Cc: Cc: Konrad Rzeszutek Wilk Cc: David Vrabel Cc: --- arch/x86/xen/smp.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 08e8489..1c5e760 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -90,14 +90,10 @@ static void cpu_bringup(void) set_cpu_online(cpu, true); - this_cpu_write(cpu_state, CPU_ONLINE); - - wmb(); + cpu_set_state_online(cpu); /* Implies full memory barrier. */ /* We can take interrupts now: we're officially "up". */ local_irq_enable(); - - wmb(); /* make sure everything is out */ } /* @@ -459,7 +455,13 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) xen_setup_timer(cpu); xen_init_lock_cpu(cpu); - per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* + * PV VCPUs are always successfully taken down (see 'while' loop + * in xen_cpu_die()), so -EBUSY is an error. + */ + rc = cpu_check_up_prepare(cpu); + if (rc) + return rc; /* make sure interrupts start blocked */ per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; @@ -479,10 +481,8 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); BUG_ON(rc); - while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { + while (cpu_report_state(cpu) != CPU_ONLINE) HYPERVISOR_sched_op(SCHEDOP_yield, NULL); - barrier(); - } return 0; } @@ -511,11 +511,11 @@ static void xen_cpu_die(unsigned int cpu) schedule_timeout(HZ/10); } - cpu_die_common(cpu); - - xen_smp_intr_free(cpu); - xen_uninit_lock_cpu(cpu); - xen_teardown_timer(cpu); + if (common_cpu_die(cpu) == 0) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + xen_teardown_timer(cpu); + } } static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ @@ -747,6 +747,16 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc; + + /* + * This can happen if CPU was offlined earlier and + * offlining timed out in common_cpu_die(). + */ + if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) { + xen_smp_intr_free(cpu); + xen_uninit_lock_cpu(cpu); + } + /* * xen_smp_intr_init() needs to run before native_cpu_up() * so that IPI vectors are set up on the booting CPU before @@ -768,12 +778,6 @@ static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) return rc; } -static void xen_hvm_cpu_die(unsigned int cpu) -{ - xen_cpu_die(cpu); - native_cpu_die(cpu); -} - void __init xen_hvm_smp_init(void) { if (!xen_have_vector_callback) @@ -781,7 +785,7 @@ void __init xen_hvm_smp_init(void) smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; smp_ops.cpu_up = xen_hvm_cpu_up; - smp_ops.cpu_die = xen_hvm_cpu_die; + smp_ops.cpu_die = xen_cpu_die; smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; smp_ops.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu; -- cgit v1.1 From 16b12d605795e775cd80b5901c85dd2e320a6ec2 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 21 Jan 2015 08:49:23 +0100 Subject: xen: synchronize include/xen/interface/xen.h with xen The header include/xen/interface/xen.h doesn't contain all definitions from Xen's version of that header. Update it accordingly. Signed-off-by: Juergen Gross Reviewed-by: David Vrabel Signed-off-by: David Vrabel --- arch/x86/xen/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c index 520022d..8296cdf 100644 --- a/arch/x86/xen/trace.c +++ b/arch/x86/xen/trace.c @@ -29,7 +29,7 @@ static const char *xen_hypercall_names[] = { N(vcpu_op), N(set_segment_base), N(mmuext_op), - N(acm_op), + N(xsm_op), N(nmi_op), N(sched_op), N(callback_op), -- cgit v1.1 From fc903f87364d1b00890fa991a6f116e2bb38ec1e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 21 Jan 2015 08:49:24 +0100 Subject: xen: use generated hypervisor symbols in arch/x86/xen/trace.c Instead of manually list all hypervisor calls in arch/x86/xen/trace.c use the auto generated list. Signed-off-by: Juergen Gross Reviewed-by: David Vrabel Signed-off-by: David Vrabel --- arch/x86/xen/trace.c | 50 ++++---------------------------------------------- 1 file changed, 4 insertions(+), 46 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c index 8296cdf..a702ec2 100644 --- a/arch/x86/xen/trace.c +++ b/arch/x86/xen/trace.c @@ -1,54 +1,12 @@ #include #include +#include -#define N(x) [__HYPERVISOR_##x] = "("#x")" +#define HYPERCALL(x) [__HYPERVISOR_##x] = "("#x")", static const char *xen_hypercall_names[] = { - N(set_trap_table), - N(mmu_update), - N(set_gdt), - N(stack_switch), - N(set_callbacks), - N(fpu_taskswitch), - N(sched_op_compat), - N(dom0_op), - N(set_debugreg), - N(get_debugreg), - N(update_descriptor), - N(memory_op), - N(multicall), - N(update_va_mapping), - N(set_timer_op), - N(event_channel_op_compat), - N(xen_version), - N(console_io), - N(physdev_op_compat), - N(grant_table_op), - N(vm_assist), - N(update_va_mapping_otherdomain), - N(iret), - N(vcpu_op), - N(set_segment_base), - N(mmuext_op), - N(xsm_op), - N(nmi_op), - N(sched_op), - N(callback_op), - N(xenoprof_op), - N(event_channel_op), - N(physdev_op), - N(hvm_op), - -/* Architecture-specific hypercall definitions. */ - N(arch_0), - N(arch_1), - N(arch_2), - N(arch_3), - N(arch_4), - N(arch_5), - N(arch_6), - N(arch_7), +#include }; -#undef N +#undef HYPERCALL static const char *xen_hypercall_name(unsigned op) { -- cgit v1.1 From 526abeaed4971def462f209632b88fd7b8c4a09c Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 21 Jan 2015 08:49:25 +0100 Subject: xen: use generated hypercall symbols in arch/x86/xen/xen-head.S Instead of manually list each hypercall in arch/x86/xen/xen-head.S use the auto generated symbol list. This also corrects the wrong address of xen_hypercall_mca which was located 32 bytes higher than it should. Symbol addresses have been verified to match the correct ones via objdump output. Based-on-patch-by: Jan Beulich Signed-off-by: Juergen Gross Reviewed-by: David Vrabel Signed-off-by: David Vrabel --- arch/x86/xen/xen-head.S | 63 ++++++++----------------------------------------- 1 file changed, 10 insertions(+), 53 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 674b2225..8afdfcc 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -12,6 +12,8 @@ #include #include +#include +#include #include #ifdef CONFIG_XEN_PVH @@ -85,59 +87,14 @@ ENTRY(xen_pvh_early_cpu_init) .pushsection .text .balign PAGE_SIZE ENTRY(hypercall_page) -#define NEXT_HYPERCALL(x) \ - ENTRY(xen_hypercall_##x) \ - .skip 32 - -NEXT_HYPERCALL(set_trap_table) -NEXT_HYPERCALL(mmu_update) -NEXT_HYPERCALL(set_gdt) -NEXT_HYPERCALL(stack_switch) -NEXT_HYPERCALL(set_callbacks) -NEXT_HYPERCALL(fpu_taskswitch) -NEXT_HYPERCALL(sched_op_compat) -NEXT_HYPERCALL(platform_op) -NEXT_HYPERCALL(set_debugreg) -NEXT_HYPERCALL(get_debugreg) -NEXT_HYPERCALL(update_descriptor) -NEXT_HYPERCALL(ni) -NEXT_HYPERCALL(memory_op) -NEXT_HYPERCALL(multicall) -NEXT_HYPERCALL(update_va_mapping) -NEXT_HYPERCALL(set_timer_op) -NEXT_HYPERCALL(event_channel_op_compat) -NEXT_HYPERCALL(xen_version) -NEXT_HYPERCALL(console_io) -NEXT_HYPERCALL(physdev_op_compat) -NEXT_HYPERCALL(grant_table_op) -NEXT_HYPERCALL(vm_assist) -NEXT_HYPERCALL(update_va_mapping_otherdomain) -NEXT_HYPERCALL(iret) -NEXT_HYPERCALL(vcpu_op) -NEXT_HYPERCALL(set_segment_base) -NEXT_HYPERCALL(mmuext_op) -NEXT_HYPERCALL(xsm_op) -NEXT_HYPERCALL(nmi_op) -NEXT_HYPERCALL(sched_op) -NEXT_HYPERCALL(callback_op) -NEXT_HYPERCALL(xenoprof_op) -NEXT_HYPERCALL(event_channel_op) -NEXT_HYPERCALL(physdev_op) -NEXT_HYPERCALL(hvm_op) -NEXT_HYPERCALL(sysctl) -NEXT_HYPERCALL(domctl) -NEXT_HYPERCALL(kexec_op) -NEXT_HYPERCALL(tmem_op) /* 38 */ -ENTRY(xen_hypercall_rsvr) - .skip 320 -NEXT_HYPERCALL(mca) /* 48 */ -NEXT_HYPERCALL(arch_1) -NEXT_HYPERCALL(arch_2) -NEXT_HYPERCALL(arch_3) -NEXT_HYPERCALL(arch_4) -NEXT_HYPERCALL(arch_5) -NEXT_HYPERCALL(arch_6) - .balign PAGE_SIZE + .skip PAGE_SIZE + +#define HYPERCALL(n) \ + .equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \ + .type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32 +#include +#undef HYPERCALL + .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") -- cgit v1.1 From feb44f1f7a4ac299d1ab1c3606860e70b9b89d69 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 2 Mar 2015 12:06:23 -0500 Subject: x86/xen: Provide a "Xen PV" APIC driver to support >255 VCPUs Instead of mangling the default APIC driver, provide a Xen PV guest specific one that explicitly provides appropriate methods. This allows use to report that all APIC IDs are valid, allowing dom0 to boot with more than 255 VCPUs. Since the probe order of APIC drivers is link dependent, we add in an late probe function to change to the Xen PV if it hadn't been done during bootup. Suggested-by: David Vrabel Reported-by: Cathy Avery Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: David Vrabel --- arch/x86/xen/apic.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/xen/enlighten.c | 90 +----------------------- 2 files changed, 181 insertions(+), 89 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 7005ced..5e0ecaf 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -7,6 +7,7 @@ #include #include #include "xen-ops.h" +#include "smp.h" static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) { @@ -28,7 +29,186 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) return 0xfd; } +static unsigned long xen_set_apic_id(unsigned int x) +{ + WARN_ON(1); + return x; +} + +static unsigned int xen_get_apic_id(unsigned long x) +{ + return ((x)>>24) & 0xFFu; +} + +static u32 xen_apic_read(u32 reg) +{ + struct xen_platform_op op = { + .cmd = XENPF_get_cpuinfo, + .interface_version = XENPF_INTERFACE_VERSION, + .u.pcpu_info.xen_cpuid = 0, + }; + int ret = 0; + + /* Shouldn't need this as APIC is turned off for PV, and we only + * get called on the bootup processor. But just in case. */ + if (!xen_initial_domain() || smp_processor_id()) + return 0; + + if (reg == APIC_LVR) + return 0x10; +#ifdef CONFIG_X86_32 + if (reg == APIC_LDR) + return SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); +#endif + if (reg != APIC_ID) + return 0; + + ret = HYPERVISOR_dom0_op(&op); + if (ret) + return 0; + + return op.u.pcpu_info.apic_id << 24; +} + +static void xen_apic_write(u32 reg, u32 val) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static u64 xen_apic_icr_read(void) +{ + return 0; +} + +static void xen_apic_icr_write(u32 low, u32 id) +{ + /* Warn to see if there's any stray references */ + WARN_ON(1); +} + +static u32 xen_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static int xen_apic_probe_pv(void) +{ + if (xen_pv_domain()) + return 1; + + return 0; +} + +static int xen_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return xen_pv_domain(); +} + +static int xen_id_always_valid(int apicid) +{ + return 1; +} + +static int xen_id_always_registered(void) +{ + return 1; +} + +static int xen_phys_pkg_id(int initial_apic_id, int index_msb) +{ + return initial_apic_id >> index_msb; +} + +#ifdef CONFIG_X86_32 +static int xen_x86_32_early_logical_apicid(int cpu) +{ + /* Match with APIC_LDR read. Otherwise setup_local_APIC complains. */ + return 1 << cpu; +} +#endif + +static void xen_noop(void) +{ +} + +static void xen_silent_inquire(int apicid) +{ +} + +static struct apic xen_pv_apic = { + .name = "Xen PV", + .probe = xen_apic_probe_pv, + .acpi_madt_oem_check = xen_madt_oem_check, + .apic_id_valid = xen_id_always_valid, + .apic_id_registered = xen_id_always_registered, + + /* .irq_delivery_mode - used in native_compose_msi_msg only */ + /* .irq_dest_mode - used in native_compose_msi_msg only */ + + .target_cpus = default_target_cpus, + .disable_esr = 0, + /* .dest_logical - default_send_IPI_ use it but we use our own. */ + .check_apicid_used = default_check_apicid_used, /* Used on 32-bit */ + + .vector_allocation_domain = flat_vector_allocation_domain, + .init_apic_ldr = xen_noop, /* setup_local_APIC calls it */ + + .ioapic_phys_id_map = default_ioapic_phys_id_map, /* Used on 32-bit */ + .setup_apic_routing = NULL, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = physid_set_mask_of_physid, /* Used on 32-bit */ + .check_phys_apicid_present = default_check_phys_apicid_present, /* smp_sanity_check needs it */ + .phys_pkg_id = xen_phys_pkg_id, /* detect_ht */ + + .get_apic_id = xen_get_apic_id, + .set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */ + .apic_id_mask = 0xFF << 24, /* Used by verify_local_APIC. Match with what xen_get_apic_id does. */ + + .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, + +#ifdef CONFIG_SMP + .send_IPI_mask = xen_send_IPI_mask, + .send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself, + .send_IPI_allbutself = xen_send_IPI_allbutself, + .send_IPI_all = xen_send_IPI_all, + .send_IPI_self = xen_send_IPI_self, +#endif + /* .wait_for_init_deassert- used by AP bootup - smp_callin which we don't use */ + .inquire_remote_apic = xen_silent_inquire, + + .read = xen_apic_read, + .write = xen_apic_write, + .eoi_write = xen_apic_write, + + .icr_read = xen_apic_icr_read, + .icr_write = xen_apic_icr_write, + .wait_icr_idle = xen_noop, + .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle, + +#ifdef CONFIG_X86_32 + /* generic_processor_info and setup_local_APIC. */ + .x86_32_early_logical_apicid = xen_x86_32_early_logical_apicid, +#endif +}; + +static void __init xen_apic_check(void) +{ + if (apic == &xen_pv_apic) + return; + + pr_info("Switched APIC routing from %s to %s.\n", apic->name, + xen_pv_apic.name); + apic = &xen_pv_apic; +} void __init xen_init_apic(void) { x86_io_apic_ops.read = xen_io_apic_read; + /* On PV guests the APIC CPUID bit is disabled so none of the + * routines end up executing. */ + if (!xen_initial_domain()) + apic = &xen_pv_apic; + + x86_platform.apic_post_init = xen_apic_check; } +apic_driver(xen_pv_apic); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5240f56..b9a2272 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -927,92 +927,6 @@ static void xen_io_delay(void) { } -#ifdef CONFIG_X86_LOCAL_APIC -static unsigned long xen_set_apic_id(unsigned int x) -{ - WARN_ON(1); - return x; -} -static unsigned int xen_get_apic_id(unsigned long x) -{ - return ((x)>>24) & 0xFFu; -} -static u32 xen_apic_read(u32 reg) -{ - struct xen_platform_op op = { - .cmd = XENPF_get_cpuinfo, - .interface_version = XENPF_INTERFACE_VERSION, - .u.pcpu_info.xen_cpuid = 0, - }; - int ret = 0; - - /* Shouldn't need this as APIC is turned off for PV, and we only - * get called on the bootup processor. But just in case. */ - if (!xen_initial_domain() || smp_processor_id()) - return 0; - - if (reg == APIC_LVR) - return 0x10; - - if (reg != APIC_ID) - return 0; - - ret = HYPERVISOR_dom0_op(&op); - if (ret) - return 0; - - return op.u.pcpu_info.apic_id << 24; -} - -static void xen_apic_write(u32 reg, u32 val) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static u64 xen_apic_icr_read(void) -{ - return 0; -} - -static void xen_apic_icr_write(u32 low, u32 id) -{ - /* Warn to see if there's any stray references */ - WARN_ON(1); -} - -static void xen_apic_wait_icr_idle(void) -{ - return; -} - -static u32 xen_safe_apic_wait_icr_idle(void) -{ - return 0; -} - -static void set_xen_basic_apic_ops(void) -{ - apic->read = xen_apic_read; - apic->write = xen_apic_write; - apic->icr_read = xen_apic_icr_read; - apic->icr_write = xen_apic_icr_write; - apic->wait_icr_idle = xen_apic_wait_icr_idle; - apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle; - apic->set_apic_id = xen_set_apic_id; - apic->get_apic_id = xen_get_apic_id; - -#ifdef CONFIG_SMP - apic->send_IPI_allbutself = xen_send_IPI_allbutself; - apic->send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself; - apic->send_IPI_mask = xen_send_IPI_mask; - apic->send_IPI_all = xen_send_IPI_all; - apic->send_IPI_self = xen_send_IPI_self; -#endif -} - -#endif - static void xen_clts(void) { struct multicall_space mcs; @@ -1618,7 +1532,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* * set up the basic apic ops. */ - set_xen_basic_apic_ops(); + xen_init_apic(); #endif if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { @@ -1731,8 +1645,6 @@ asmlinkage __visible void __init xen_start_kernel(void) if (HYPERVISOR_dom0_op(&op) == 0) boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; - xen_init_apic(); - /* Make sure ACS will be enabled */ pci_request_acs(); -- cgit v1.1 From b3b06c7eb7820cea5c15f9faa4964044284c5399 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 3 Mar 2015 16:12:12 -0500 Subject: x86/xen/apic: WARN with details. We should not be writting to the APIC registers under Xen PV. But if we do instead of just giving an blanket warning - include some details to help troubleshoot. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 5e0ecaf..70e060a 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -73,7 +73,7 @@ static u32 xen_apic_read(u32 reg) static void xen_apic_write(u32 reg, u32 val) { /* Warn to see if there's any stray references */ - WARN_ON(1); + WARN(1,"register: %x, value: %x\n", reg, val); } static u64 xen_apic_icr_read(void) -- cgit v1.1 From 628c28eefd6f2cef03b212081b466ae43fd093a3 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 11 Mar 2015 14:49:56 +0000 Subject: xen: unify foreign GFN map/unmap for auto-xlated physmap guests Auto-translated physmap guests (arm, arm64 and x86 PVHVM/PVH) map and unmap foreign GFNs using the same method (updating the physmap). Unify the two arm and x86 implementations into one commont one. Note that on arm and arm64, the correct error code will be returned (instead of always -EFAULT) and map/unmap failure warnings are no longer printed. These changes are required if the foreign domain is paging (-ENOENT failures are expected and must be propagated up to the caller). Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- arch/x86/xen/mmu.c | 110 ++--------------------------------------------------- 1 file changed, 3 insertions(+), 107 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index adca9e2..3d536a5 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2436,95 +2436,6 @@ void __init xen_hvm_init_mmu_ops(void) } #endif -#ifdef CONFIG_XEN_PVH -/* - * Map foreign gfn (fgfn), to local pfn (lpfn). This for the user - * space creating new guest on pvh dom0 and needing to map domU pages. - */ -static int xlate_add_to_p2m(unsigned long lpfn, unsigned long fgfn, - unsigned int domid) -{ - int rc, err = 0; - xen_pfn_t gpfn = lpfn; - xen_ulong_t idx = fgfn; - - struct xen_add_to_physmap_range xatp = { - .domid = DOMID_SELF, - .foreign_domid = domid, - .size = 1, - .space = XENMAPSPACE_gmfn_foreign, - }; - set_xen_guest_handle(xatp.idxs, &idx); - set_xen_guest_handle(xatp.gpfns, &gpfn); - set_xen_guest_handle(xatp.errs, &err); - - rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); - if (rc < 0) - return rc; - return err; -} - -static int xlate_remove_from_p2m(unsigned long spfn, int count) -{ - struct xen_remove_from_physmap xrp; - int i, rc; - - for (i = 0; i < count; i++) { - xrp.domid = DOMID_SELF; - xrp.gpfn = spfn+i; - rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, &xrp); - if (rc) - break; - } - return rc; -} - -struct xlate_remap_data { - unsigned long fgfn; /* foreign domain's gfn */ - pgprot_t prot; - domid_t domid; - int index; - struct page **pages; -}; - -static int xlate_map_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, - void *data) -{ - int rc; - struct xlate_remap_data *remap = data; - unsigned long pfn = page_to_pfn(remap->pages[remap->index++]); - pte_t pteval = pte_mkspecial(pfn_pte(pfn, remap->prot)); - - rc = xlate_add_to_p2m(pfn, remap->fgfn, remap->domid); - if (rc) - return rc; - native_set_pte(ptep, pteval); - - return 0; -} - -static int xlate_remap_gfn_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long mfn, - int nr, pgprot_t prot, unsigned domid, - struct page **pages) -{ - int err; - struct xlate_remap_data pvhdata; - - BUG_ON(!pages); - - pvhdata.fgfn = mfn; - pvhdata.prot = prot; - pvhdata.domid = domid; - pvhdata.index = 0; - pvhdata.pages = pages; - err = apply_to_page_range(vma->vm_mm, addr, nr << PAGE_SHIFT, - xlate_map_pte_fn, &pvhdata); - flush_tlb_all(); - return err; -} -#endif - #define REMAP_BATCH_SIZE 16 struct remap_data { @@ -2564,8 +2475,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (xen_feature(XENFEAT_auto_translated_physmap)) { #ifdef CONFIG_XEN_PVH /* We need to update the local page tables and the xen HAP */ - return xlate_remap_gfn_range(vma, addr, mfn, nr, prot, - domid, pages); + return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot, + domid, pages); #else return -EINVAL; #endif @@ -2609,22 +2520,7 @@ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, return 0; #ifdef CONFIG_XEN_PVH - while (numpgs--) { - /* - * The mmu has already cleaned up the process mmu - * resources at this point (lookup_address will return - * NULL). - */ - unsigned long pfn = page_to_pfn(pages[numpgs]); - - xlate_remove_from_p2m(pfn, 1); - } - /* - * We don't need to flush tlbs because as part of - * xlate_remove_from_p2m, the hypervisor will do tlb flushes - * after removing the p2m entries from the EPT/NPT - */ - return 0; + return xen_xlate_unmap_gfn_range(vma, numpgs, pages); #else return -EINVAL; #endif -- cgit v1.1 From 4e8c0c8c4bf3a5b5c98046e146ab3884bf7a7d0e Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 11 Mar 2015 14:49:57 +0000 Subject: xen/privcmd: improve performance of MMAPBATCH_V2 Make the IOCTL_PRIVCMD_MMAPBATCH_V2 (and older V1 version) map multiple frames at a time rather than one at a time, despite the pages being non-consecutive GFNs. xen_remap_foreign_mfn_array() is added which maps an array of GFNs (instead of a consecutive range of GFNs). Since per-frame errors are returned in an array, privcmd must set the MMAPBATCH_V1 error bits as part of the "report errors" phase, after all the frames are mapped. Migrate times are significantly improved (when using a PV toolstack domain). For example, for an idle 12 GiB PV guest: Before After real 0m38.179s 0m26.868s user 0m15.096s 0m13.652s sys 0m28.988s 0m18.732s Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- arch/x86/xen/mmu.c | 101 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 82 insertions(+), 19 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3d536a5..29b3be2 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2439,7 +2439,8 @@ void __init xen_hvm_init_mmu_ops(void) #define REMAP_BATCH_SIZE 16 struct remap_data { - unsigned long mfn; + xen_pfn_t *mfn; + bool contiguous; pgprot_t prot; struct mmu_update *mmu_update; }; @@ -2448,7 +2449,14 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, unsigned long addr, void *data) { struct remap_data *rmd = data; - pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot)); + pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot)); + + /* If we have a contigious range, just update the mfn itself, + else update pointer to be "next mfn". */ + if (rmd->contiguous) + (*rmd->mfn)++; + else + rmd->mfn++; rmd->mmu_update->ptr = virt_to_machine(ptep).maddr; rmd->mmu_update->val = pte_val_ma(pte); @@ -2457,26 +2465,26 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token, return 0; } -int xen_remap_domain_mfn_range(struct vm_area_struct *vma, - unsigned long addr, - xen_pfn_t mfn, int nr, - pgprot_t prot, unsigned domid, - struct page **pages) - +static int do_remap_mfn(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, + struct page **pages) { + int err = 0; struct remap_data rmd; struct mmu_update mmu_update[REMAP_BATCH_SIZE]; - int batch; unsigned long range; - int err = 0; + int mapped = 0; BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); if (xen_feature(XENFEAT_auto_translated_physmap)) { #ifdef CONFIG_XEN_PVH /* We need to update the local page tables and the xen HAP */ - return xen_xlate_remap_gfn_range(vma, addr, mfn, nr, prot, - domid, pages); + return xen_xlate_remap_gfn_array(vma, addr, mfn, nr, err_ptr, + prot, domid, pages); #else return -EINVAL; #endif @@ -2484,9 +2492,15 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, rmd.mfn = mfn; rmd.prot = prot; + /* We use the err_ptr to indicate if there we are doing a contigious + * mapping or a discontigious mapping. */ + rmd.contiguous = !err_ptr; while (nr) { - batch = min(REMAP_BATCH_SIZE, nr); + int index = 0; + int done = 0; + int batch = min(REMAP_BATCH_SIZE, nr); + int batch_left = batch; range = (unsigned long)batch << PAGE_SHIFT; rmd.mmu_update = mmu_update; @@ -2495,23 +2509,72 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, if (err) goto out; - err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid); - if (err < 0) - goto out; + /* We record the error for each page that gives an error, but + * continue mapping until the whole set is done */ + do { + int i; + + err = HYPERVISOR_mmu_update(&mmu_update[index], + batch_left, &done, domid); + + /* + * @err_ptr may be the same buffer as @mfn, so + * only clear it after each chunk of @mfn is + * used. + */ + if (err_ptr) { + for (i = index; i < index + done; i++) + err_ptr[i] = 0; + } + if (err < 0) { + if (!err_ptr) + goto out; + err_ptr[i] = err; + done++; /* Skip failed frame. */ + } else + mapped += done; + batch_left -= done; + index += done; + } while (batch_left); nr -= batch; addr += range; + if (err_ptr) + err_ptr += batch; } - - err = 0; out: xen_flush_tlb_all(); - return err; + return err < 0 ? err : mapped; +} + +int xen_remap_domain_mfn_range(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t mfn, int nr, + pgprot_t prot, unsigned domid, + struct page **pages) +{ + return do_remap_mfn(vma, addr, &mfn, nr, NULL, prot, domid, pages); } EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); +int xen_remap_domain_mfn_array(struct vm_area_struct *vma, + unsigned long addr, + xen_pfn_t *mfn, int nr, + int *err_ptr, pgprot_t prot, + unsigned domid, struct page **pages) +{ + /* We BUG_ON because it's a programmer error to pass a NULL err_ptr, + * and the consequences later is quite hard to detect what the actual + * cause of "wrong memory was mapped in". + */ + BUG_ON(err_ptr == NULL); + return do_remap_mfn(vma, addr, mfn, nr, err_ptr, prot, domid, pages); +} +EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_array); + + /* Returns: 0 success */ int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, int numpgs, struct page **pages) -- cgit v1.1 From 982333683385343d8d2db9a1df69c98406f42687 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 14 Apr 2015 15:46:14 -0700 Subject: x86: expose number of page table levels on Kconfig level We would want to use number of page table level to define mm_struct. Let's expose it as CONFIG_PGTABLE_LEVELS. Signed-off-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/xen/mmu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index adca9e2..65083ad 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -502,7 +502,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 __visible pudval_t xen_pud_val(pud_t pud) { return pte_mfn_to_pfn(pud.pud); @@ -589,7 +589,7 @@ static void xen_set_pgd(pgd_t *ptr, pgd_t val) xen_mc_issue(PARAVIRT_LAZY_MMU); } -#endif /* PAGETABLE_LEVELS == 4 */ +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ /* * (Yet another) pagetable walker. This one is intended for pinning a @@ -1628,7 +1628,7 @@ static void xen_release_pmd(unsigned long pfn) xen_release_ptpage(pfn, PT_PMD); } -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PUD); @@ -2046,7 +2046,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 pv_mmu_ops.set_pgd = xen_set_pgd; #endif @@ -2056,7 +2056,7 @@ static void __init xen_post_allocator_init(void) pv_mmu_ops.alloc_pmd = xen_alloc_pmd; pv_mmu_ops.release_pte = xen_release_pte; pv_mmu_ops.release_pmd = xen_release_pmd; -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.release_pud = xen_release_pud; #endif @@ -2122,14 +2122,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .make_pmd = PV_CALLEE_SAVE(xen_make_pmd), .pmd_val = PV_CALLEE_SAVE(xen_pmd_val), -#if PAGETABLE_LEVELS == 4 +#if CONFIG_PGTABLE_LEVELS == 4 .pud_val = PV_CALLEE_SAVE(xen_pud_val), .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, .alloc_pud = xen_alloc_pmd_init, .release_pud = xen_release_pmd_init, -#endif /* PAGETABLE_LEVELS == 4 */ +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, -- cgit v1.1 From 2b953a5e994ce279904ec70220f7d4f31d380a0a Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 28 Apr 2015 18:46:20 -0400 Subject: xen: Suspend ticks on all CPUs during suspend Commit 77e32c89a711 ("clockevents: Manage device's state separately for the core") decouples clockevent device's modes from states. With this change when a Xen guest tries to resume, it won't be calling its set_mode op which needs to be done on each VCPU in order to make the hypervisor aware that we are in oneshot mode. This happens because clockevents_tick_resume() (which is an intermediate step of resuming ticks on a processor) doesn't call clockevents_set_state() anymore and because during suspend clockevent devices on all VCPUs (except for the one doing the suspend) are left in ONESHOT state. As result, during resume the clockevents state machine will assume that device is already where it should be and doesn't need to be updated. To avoid this problem we should suspend ticks on all VCPUs during suspend. Signed-off-by: Boris Ostrovsky Signed-off-by: David Vrabel --- arch/x86/xen/suspend.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index d949769..53b4c08 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -88,7 +88,17 @@ static void xen_vcpu_notify_restore(void *data) tick_resume_local(); } +static void xen_vcpu_notify_suspend(void *data) +{ + tick_suspend_local(); +} + void xen_arch_resume(void) { on_each_cpu(xen_vcpu_notify_restore, NULL, 1); } + +void xen_arch_suspend(void) +{ + on_each_cpu(xen_vcpu_notify_suspend, NULL, 1); +} -- cgit v1.1 From a71dbdaa8ca2933391b08e0ae5567083e3af0892 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 4 May 2015 11:02:15 -0400 Subject: hypervisor/x86/xen: Unset X86_BUG_SYSRET_SS_ATTRS on Xen PV guests Commit 61f01dd941ba ("x86_64, asm: Work around AMD SYSRET SS descriptor attribute issue") makes AMD processors set SS to __KERNEL_DS in __switch_to() to deal with cases when SS is NULL. This breaks Xen PV guests who do not want to load SS with__KERNEL_DS. Since the problem that the commit is trying to address would have to be fixed in the hypervisor (if it in fact exists under Xen) there is no reason to set X86_BUG_SYSRET_SS_ATTRS flag for PV VPCUs here. This can be easily achieved by adding x86_hyper_xen_hvm.set_cpu_features op which will clear this flag. (And since this structure is no longer HVM-specific we should do some renaming). Signed-off-by: Boris Ostrovsky Reported-by: Sander Eikelenboom Signed-off-by: David Vrabel --- arch/x86/xen/enlighten.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'arch/x86/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 94578ef..46957ea 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1760,6 +1760,9 @@ static struct notifier_block xen_hvm_cpu_notifier = { static void __init xen_hvm_guest_init(void) { + if (xen_pv_domain()) + return; + init_hvm_pv_info(); xen_hvm_init_shared_info(); @@ -1775,6 +1778,7 @@ static void __init xen_hvm_guest_init(void) xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); } +#endif static bool xen_nopv = false; static __init int xen_parse_nopv(char *arg) @@ -1784,14 +1788,11 @@ static __init int xen_parse_nopv(char *arg) } early_param("xen_nopv", xen_parse_nopv); -static uint32_t __init xen_hvm_platform(void) +static uint32_t __init xen_platform(void) { if (xen_nopv) return 0; - if (xen_pv_domain()) - return 0; - return xen_cpuid_base(); } @@ -1809,11 +1810,19 @@ bool xen_hvm_need_lapic(void) } EXPORT_SYMBOL_GPL(xen_hvm_need_lapic); -const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { - .name = "Xen HVM", - .detect = xen_hvm_platform, +static void xen_set_cpu_features(struct cpuinfo_x86 *c) +{ + if (xen_pv_domain()) + clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); +} + +const struct hypervisor_x86 x86_hyper_xen = { + .name = "Xen", + .detect = xen_platform, +#ifdef CONFIG_XEN_PVHVM .init_platform = xen_hvm_guest_init, +#endif .x2apic_available = xen_x2apic_para_available, + .set_cpu_features = xen_set_cpu_features, }; -EXPORT_SYMBOL(x86_hyper_xen_hvm); -#endif +EXPORT_SYMBOL(x86_hyper_xen); -- cgit v1.1