summaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Makefile3
-rw-r--r--arch/x86/xen/apic.c2
-rw-r--r--arch/x86/xen/efi.c45
-rw-r--r--arch/x86/xen/enlighten.c154
-rw-r--r--arch/x86/xen/enlighten_hvm.c64
-rw-r--r--arch/x86/xen/enlighten_pv.c89
-rw-r--r--arch/x86/xen/mmu_pv.c87
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c14
-rw-r--r--arch/x86/xen/setup.c7
-rw-r--r--arch/x86/xen/smp.c31
-rw-r--r--arch/x86/xen/smp.h2
-rw-r--r--arch/x86/xen/smp_hvm.c14
-rw-r--r--arch/x86/xen/smp_pv.c9
-rw-r--r--arch/x86/xen/suspend_hvm.c11
-rw-r--r--arch/x86/xen/time.c1
-rw-r--r--arch/x86/xen/xen-ops.h3
-rw-r--r--arch/x86/xen/xen-pvh.S2
17 files changed, 302 insertions, 236 deletions
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index fffb0a1..bced7a3 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,3 +1,6 @@
+OBJECT_FILES_NON_STANDARD_xen-asm_$(BITS).o := y
+OBJECT_FILES_NON_STANDARD_xen-pvh.o := y
+
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_spinlock.o = -pg
diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index bcea81f..b5e48da 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -178,7 +178,7 @@ static struct apic xen_pv_apic = {
.get_apic_id = xen_get_apic_id,
.set_apic_id = xen_set_apic_id, /* Can be NULL on 32-bit. */
- .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+ .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
#ifdef CONFIG_SMP
.send_IPI_mask = xen_send_IPI_mask,
diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c
index 30bb2e8..a18703b 100644
--- a/arch/x86/xen/efi.c
+++ b/arch/x86/xen/efi.c
@@ -54,38 +54,6 @@ static efi_system_table_t efi_systab_xen __initdata = {
.tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */
};
-static const struct efi efi_xen __initconst = {
- .systab = NULL, /* Initialized later. */
- .runtime_version = 0, /* Initialized later. */
- .mps = EFI_INVALID_TABLE_ADDR,
- .acpi = EFI_INVALID_TABLE_ADDR,
- .acpi20 = EFI_INVALID_TABLE_ADDR,
- .smbios = EFI_INVALID_TABLE_ADDR,
- .smbios3 = EFI_INVALID_TABLE_ADDR,
- .sal_systab = EFI_INVALID_TABLE_ADDR,
- .boot_info = EFI_INVALID_TABLE_ADDR,
- .hcdp = EFI_INVALID_TABLE_ADDR,
- .uga = EFI_INVALID_TABLE_ADDR,
- .uv_systab = EFI_INVALID_TABLE_ADDR,
- .fw_vendor = EFI_INVALID_TABLE_ADDR,
- .runtime = EFI_INVALID_TABLE_ADDR,
- .config_table = EFI_INVALID_TABLE_ADDR,
- .get_time = xen_efi_get_time,
- .set_time = xen_efi_set_time,
- .get_wakeup_time = xen_efi_get_wakeup_time,
- .set_wakeup_time = xen_efi_set_wakeup_time,
- .get_variable = xen_efi_get_variable,
- .get_next_variable = xen_efi_get_next_variable,
- .set_variable = xen_efi_set_variable,
- .query_variable_info = xen_efi_query_variable_info,
- .update_capsule = xen_efi_update_capsule,
- .query_capsule_caps = xen_efi_query_capsule_caps,
- .get_next_high_mono_count = xen_efi_get_next_high_mono_count,
- .reset_system = xen_efi_reset_system,
- .set_virtual_address_map = NULL, /* Not used under Xen. */
- .flags = 0 /* Initialized later. */
-};
-
static efi_system_table_t __init *xen_efi_probe(void)
{
struct xen_platform_op op = {
@@ -102,7 +70,18 @@ static efi_system_table_t __init *xen_efi_probe(void)
/* Here we know that Xen runs on EFI platform. */
- efi = efi_xen;
+ efi.get_time = xen_efi_get_time;
+ efi.set_time = xen_efi_set_time;
+ efi.get_wakeup_time = xen_efi_get_wakeup_time;
+ efi.set_wakeup_time = xen_efi_set_wakeup_time;
+ efi.get_variable = xen_efi_get_variable;
+ efi.get_next_variable = xen_efi_get_next_variable;
+ efi.set_variable = xen_efi_set_variable;
+ efi.query_variable_info = xen_efi_query_variable_info;
+ efi.update_capsule = xen_efi_update_capsule;
+ efi.query_capsule_caps = xen_efi_query_capsule_caps;
+ efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
+ efi.reset_system = xen_efi_reset_system;
efi_systab_xen.tables = info->cfg.addr;
efi_systab_xen.nr_tables = info->cfg.nent;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index a5ffcbb..0e7ef69 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -106,15 +106,83 @@ int xen_cpuhp_setup(int (*cpu_up_prepare_cb)(unsigned int),
return rc >= 0 ? 0 : rc;
}
-static void clamp_max_cpus(void)
+static int xen_vcpu_setup_restore(int cpu)
{
-#ifdef CONFIG_SMP
- if (setup_max_cpus > MAX_VIRT_CPUS)
- setup_max_cpus = MAX_VIRT_CPUS;
-#endif
+ int rc = 0;
+
+ /* Any per_cpu(xen_vcpu) is stale, so reset it */
+ xen_vcpu_info_reset(cpu);
+
+ /*
+ * For PVH and PVHVM, setup online VCPUs only. The rest will
+ * be handled by hotplug.
+ */
+ if (xen_pv_domain() ||
+ (xen_hvm_domain() && cpu_online(cpu))) {
+ rc = xen_vcpu_setup(cpu);
+ }
+
+ return rc;
+}
+
+/*
+ * On restore, set the vcpu placement up again.
+ * If it fails, then we're in a bad state, since
+ * we can't back out from using it...
+ */
+void xen_vcpu_restore(void)
+{
+ int cpu, rc;
+
+ for_each_possible_cpu(cpu) {
+ bool other_cpu = (cpu != smp_processor_id());
+ bool is_up;
+
+ if (xen_vcpu_nr(cpu) == XEN_VCPU_ID_INVALID)
+ continue;
+
+ /* Only Xen 4.5 and higher support this. */
+ is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up,
+ xen_vcpu_nr(cpu), NULL) > 0;
+
+ if (other_cpu && is_up &&
+ HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
+ BUG();
+
+ if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock))
+ xen_setup_runstate_info(cpu);
+
+ rc = xen_vcpu_setup_restore(cpu);
+ if (rc)
+ pr_emerg_once("vcpu restore failed for cpu=%d err=%d. "
+ "System will hang.\n", cpu, rc);
+ /*
+ * In case xen_vcpu_setup_restore() fails, do not bring up the
+ * VCPU. This helps us avoid the resulting OOPS when the VCPU
+ * accesses pvclock_vcpu_time via xen_vcpu (which is NULL.)
+ * Note that this does not improve the situation much -- now the
+ * VM hangs instead of OOPSing -- with the VCPUs that did not
+ * fail, spinning in stop_machine(), waiting for the failed
+ * VCPUs to come up.
+ */
+ if (other_cpu && is_up && (rc == 0) &&
+ HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
+ BUG();
+ }
}
-void xen_vcpu_setup(int cpu)
+void xen_vcpu_info_reset(int cpu)
+{
+ if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) {
+ per_cpu(xen_vcpu, cpu) =
+ &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
+ } else {
+ /* Set to NULL so that if somebody accesses it we get an OOPS */
+ per_cpu(xen_vcpu, cpu) = NULL;
+ }
+}
+
+int xen_vcpu_setup(int cpu)
{
struct vcpu_register_vcpu_info info;
int err;
@@ -123,11 +191,11 @@ void xen_vcpu_setup(int cpu)
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
/*
- * This path is called twice on PVHVM - first during bootup via
- * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
- * hotplugged: cpu_up -> xen_hvm_cpu_notify.
- * As we can only do the VCPUOP_register_vcpu_info once lets
- * not over-write its result.
+ * This path is called on PVHVM at bootup (xen_hvm_smp_prepare_boot_cpu)
+ * and at restore (xen_vcpu_restore). Also called for hotplugged
+ * VCPUs (cpu_init -> xen_hvm_cpu_prepare_hvm).
+ * However, the hypercall can only be done once (see below) so if a VCPU
+ * is offlined and comes back online then let's not redo the hypercall.
*
* For PV it is called during restore (xen_vcpu_restore) and bootup
* (xen_setup_vcpu_info_placement). The hotplug mechanism does not
@@ -135,42 +203,44 @@ void xen_vcpu_setup(int cpu)
*/
if (xen_hvm_domain()) {
if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
- return;
+ return 0;
}
- if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
- per_cpu(xen_vcpu, cpu) =
- &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
- if (!xen_have_vcpu_info_placement) {
- if (cpu >= MAX_VIRT_CPUS)
- clamp_max_cpus();
- return;
+ if (xen_have_vcpu_info_placement) {
+ vcpup = &per_cpu(xen_vcpu_info, cpu);
+ info.mfn = arbitrary_virt_to_mfn(vcpup);
+ info.offset = offset_in_page(vcpup);
+
+ /*
+ * Check to see if the hypervisor will put the vcpu_info
+ * structure where we want it, which allows direct access via
+ * a percpu-variable.
+ * N.B. This hypercall can _only_ be called once per CPU.
+ * Subsequent calls will error out with -EINVAL. This is due to
+ * the fact that hypervisor has no unregister variant and this
+ * hypercall does not allow to over-write info.mfn and
+ * info.offset.
+ */
+ err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info,
+ xen_vcpu_nr(cpu), &info);
+
+ if (err) {
+ pr_warn_once("register_vcpu_info failed: cpu=%d err=%d\n",
+ cpu, err);
+ xen_have_vcpu_info_placement = 0;
+ } else {
+ /*
+ * This cpu is using the registered vcpu info, even if
+ * later ones fail to.
+ */
+ per_cpu(xen_vcpu, cpu) = vcpup;
+ }
}
- vcpup = &per_cpu(xen_vcpu_info, cpu);
- info.mfn = arbitrary_virt_to_mfn(vcpup);
- info.offset = offset_in_page(vcpup);
-
- /* Check to see if the hypervisor will put the vcpu_info
- structure where we want it, which allows direct access via
- a percpu-variable.
- N.B. This hypercall can _only_ be called once per CPU. Subsequent
- calls will error out with -EINVAL. This is due to the fact that
- hypervisor has no unregister variant and this hypercall does not
- allow to over-write info.mfn and info.offset.
- */
- err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
- &info);
+ if (!xen_have_vcpu_info_placement)
+ xen_vcpu_info_reset(cpu);
- if (err) {
- printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
- xen_have_vcpu_info_placement = 0;
- clamp_max_cpus();
- } else {
- /* This cpu is using the registered vcpu info, even if
- later ones fail to. */
- per_cpu(xen_vcpu, cpu) = vcpup;
- }
+ return ((per_cpu(xen_vcpu, cpu) == NULL) ? -ENODEV : 0);
}
void xen_reboot(int reason)
diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c
index a6d014f..87d7913 100644
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -1,5 +1,6 @@
#include <linux/cpu.h>
#include <linux/kexec.h>
+#include <linux/memblock.h>
#include <xen/features.h>
#include <xen/events.h>
@@ -10,9 +11,11 @@
#include <asm/reboot.h>
#include <asm/setup.h>
#include <asm/hypervisor.h>
+#include <asm/e820/api.h>
#include <asm/xen/cpuid.h>
#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
#include "xen-ops.h"
#include "mmu.h"
@@ -20,37 +23,34 @@
void __ref xen_hvm_init_shared_info(void)
{
- int cpu;
struct xen_add_to_physmap xatp;
- static struct shared_info *shared_info_page;
+ u64 pa;
+
+ if (HYPERVISOR_shared_info == &xen_dummy_shared_info) {
+ /*
+ * Search for a free page starting at 4kB physical address.
+ * Low memory is preferred to avoid an EPT large page split up
+ * by the mapping.
+ * Starting below X86_RESERVE_LOW (usually 64kB) is fine as
+ * the BIOS used for HVM guests is well behaved and won't
+ * clobber memory other than the first 4kB.
+ */
+ for (pa = PAGE_SIZE;
+ !e820__mapped_all(pa, pa + PAGE_SIZE, E820_TYPE_RAM) ||
+ memblock_is_reserved(pa);
+ pa += PAGE_SIZE)
+ ;
+
+ memblock_reserve(pa, PAGE_SIZE);
+ HYPERVISOR_shared_info = __va(pa);
+ }
- if (!shared_info_page)
- shared_info_page = (struct shared_info *)
- extend_brk(PAGE_SIZE, PAGE_SIZE);
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+ xatp.gpfn = virt_to_pfn(HYPERVISOR_shared_info);
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
-
- HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
-
- /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
- * page, we use it in the event channel upcall and in some pvclock
- * related functions. We don't need the vcpu_info placement
- * optimizations because we don't use any pv_mmu or pv_irq op on
- * HVM.
- * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
- * online but xen_hvm_init_shared_info is run at resume time too and
- * in that case multiple vcpus might be online. */
- for_each_online_cpu(cpu) {
- /* Leave it to be NULL. */
- if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
- continue;
- per_cpu(xen_vcpu, cpu) =
- &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
- }
}
static void __init init_hvm_pv_info(void)
@@ -106,7 +106,7 @@ static void xen_hvm_crash_shutdown(struct pt_regs *regs)
static int xen_cpu_up_prepare_hvm(unsigned int cpu)
{
- int rc;
+ int rc = 0;
/*
* This can happen if CPU was offlined earlier and
@@ -121,7 +121,9 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
else
per_cpu(xen_vcpu_id, cpu) = cpu;
- xen_vcpu_setup(cpu);
+ rc = xen_vcpu_setup(cpu);
+ if (rc)
+ return rc;
if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock))
xen_setup_timer(cpu);
@@ -130,9 +132,8 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu)
if (rc) {
WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
cpu, rc);
- return rc;
}
- return 0;
+ return rc;
}
static int xen_cpu_dead_hvm(unsigned int cpu)
@@ -154,6 +155,13 @@ static void __init xen_hvm_guest_init(void)
xen_hvm_init_shared_info();
+ /*
+ * xen_vcpu is a pointer to the vcpu_info struct in the shared_info
+ * page, we use it in the event channel upcall and in some pvclock
+ * related functions.
+ */
+ xen_vcpu_info_reset(0);
+
xen_panic_handler_init();
if (xen_feature(XENFEAT_hvm_callback_vector))
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index f33eef4..811e4dd 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -89,8 +89,6 @@
void *xen_initial_gdt;
-RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
-
static int xen_cpu_up_prepare_pv(unsigned int cpu);
static int xen_cpu_dead_pv(unsigned int cpu);
@@ -107,35 +105,6 @@ struct tls_descs {
*/
static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
-/*
- * On restore, set the vcpu placement up again.
- * If it fails, then we're in a bad state, since
- * we can't back out from using it...
- */
-void xen_vcpu_restore(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- bool other_cpu = (cpu != smp_processor_id());
- bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
- NULL);
-
- if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
- BUG();
-
- xen_setup_runstate_info(cpu);
-
- if (xen_have_vcpu_info_placement)
- xen_vcpu_setup(cpu);
-
- if (other_cpu && is_up &&
- HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
- BUG();
- }
-}
-
static void __init xen_banner(void)
{
unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
@@ -960,30 +929,43 @@ void xen_setup_shared_info(void)
HYPERVISOR_shared_info =
(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
-#ifndef CONFIG_SMP
- /* In UP this is as good a place as any to set up shared info */
- xen_setup_vcpu_info_placement();
-#endif
-
xen_setup_mfn_list_list();
- /*
- * Now that shared info is set up we can start using routines that
- * point to pvclock area.
- */
- if (system_state == SYSTEM_BOOTING)
+ if (system_state == SYSTEM_BOOTING) {
+#ifndef CONFIG_SMP
+ /*
+ * In UP this is as good a place as any to set up shared info.
+ * Limit this to boot only, at restore vcpu setup is done via
+ * xen_vcpu_restore().
+ */
+ xen_setup_vcpu_info_placement();
+#endif
+ /*
+ * Now that shared info is set up we can start using routines
+ * that point to pvclock area.
+ */
xen_init_time_ops();
+ }
}
/* This is called once we have the cpu_possible_mask */
-void xen_setup_vcpu_info_placement(void)
+void __ref xen_setup_vcpu_info_placement(void)
{
int cpu;
for_each_possible_cpu(cpu) {
/* Set up direct vCPU id mapping for PV guests. */
per_cpu(xen_vcpu_id, cpu) = cpu;
- xen_vcpu_setup(cpu);
+
+ /*
+ * xen_vcpu_setup(cpu) can fail -- in which case it
+ * falls back to the shared_info version for cpus
+ * where xen_vcpu_nr(cpu) < MAX_VIRT_CPUS.
+ *
+ * xen_cpu_up_prepare_pv() handles the rest by failing
+ * them in hotplug.
+ */
+ (void) xen_vcpu_setup(cpu);
}
/*
@@ -1332,9 +1314,17 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
acpi_numa = -1;
#endif
- /* Don't do the full vcpu_info placement stuff until we have a
- possible map and a non-dummy shared_info. */
- per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
+ /* Let's presume PV guests always boot on vCPU with id 0. */
+ per_cpu(xen_vcpu_id, 0) = 0;
+
+ /*
+ * Setup xen_vcpu early because start_kernel needs it for
+ * local_irq_disable(), irqs_disabled().
+ *
+ * Don't do the full vcpu_info placement stuff until we have
+ * the cpu_possible_mask and a non-dummy shared_info.
+ */
+ xen_vcpu_info_reset(0);
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
@@ -1431,9 +1421,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
#endif
xen_raw_console_write("about to get started...\n");
- /* Let's presume PV guests always boot on vCPU with id 0. */
- per_cpu(xen_vcpu_id, 0) = 0;
-
+ /* We need this for printk timestamps */
xen_setup_runstate_info(0);
xen_efi_init();
@@ -1451,6 +1439,9 @@ static int xen_cpu_up_prepare_pv(unsigned int cpu)
{
int rc;
+ if (per_cpu(xen_vcpu, cpu) == NULL)
+ return -ENODEV;
+
xen_setup_timer(cpu);
rc = xen_smp_intr_init(cpu);
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 1f386d7..cab28cf 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -975,37 +975,32 @@ static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
spin_unlock(&mm->page_table_lock);
}
-
-#ifdef CONFIG_SMP
-/* Another cpu may still have their %cr3 pointing at the pagetable, so
- we need to repoint it somewhere else before we can unpin it. */
-static void drop_other_mm_ref(void *info)
+static void drop_mm_ref_this_cpu(void *info)
{
struct mm_struct *mm = info;
- struct mm_struct *active_mm;
-
- active_mm = this_cpu_read(cpu_tlbstate.active_mm);
- if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
+ if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
leave_mm(smp_processor_id());
- /* If this cpu still has a stale cr3 reference, then make sure
- it has been flushed. */
+ /*
+ * If this cpu still has a stale cr3 reference, then make sure
+ * it has been flushed.
+ */
if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
- load_cr3(swapper_pg_dir);
+ xen_mc_flush();
}
+#ifdef CONFIG_SMP
+/*
+ * Another cpu may still have their %cr3 pointing at the pagetable, so
+ * we need to repoint it somewhere else before we can unpin it.
+ */
static void xen_drop_mm_ref(struct mm_struct *mm)
{
cpumask_var_t mask;
unsigned cpu;
- if (current->active_mm == mm) {
- if (current->mm == mm)
- load_cr3(swapper_pg_dir);
- else
- leave_mm(smp_processor_id());
- }
+ drop_mm_ref_this_cpu(mm);
/* Get the "official" set of cpus referring to our pagetable. */
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
@@ -1013,31 +1008,31 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
&& per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
continue;
- smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
+ smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
}
return;
}
cpumask_copy(mask, mm_cpumask(mm));
- /* It's possible that a vcpu may have a stale reference to our
- cr3, because its in lazy mode, and it hasn't yet flushed
- its set of pending hypercalls yet. In this case, we can
- look at its actual current cr3 value, and force it to flush
- if needed. */
+ /*
+ * It's possible that a vcpu may have a stale reference to our
+ * cr3, because its in lazy mode, and it hasn't yet flushed
+ * its set of pending hypercalls yet. In this case, we can
+ * look at its actual current cr3 value, and force it to flush
+ * if needed.
+ */
for_each_online_cpu(cpu) {
if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
cpumask_set_cpu(cpu, mask);
}
- if (!cpumask_empty(mask))
- smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
+ smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
free_cpumask_var(mask);
}
#else
static void xen_drop_mm_ref(struct mm_struct *mm)
{
- if (current->active_mm == mm)
- load_cr3(swapper_pg_dir);
+ drop_mm_ref_this_cpu(mm);
}
#endif
@@ -1366,8 +1361,7 @@ static void xen_flush_tlb_single(unsigned long addr)
}
static void xen_flush_tlb_others(const struct cpumask *cpus,
- struct mm_struct *mm, unsigned long start,
- unsigned long end)
+ const struct flush_tlb_info *info)
{
struct {
struct mmuext_op op;
@@ -1379,7 +1373,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
} *args;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
+ trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
if (cpumask_empty(cpus))
return; /* nothing to do */
@@ -1393,9 +1387,10 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
- if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
+ if (info->end != TLB_FLUSH_ALL &&
+ (info->end - info->start) <= PAGE_SIZE) {
args->op.cmd = MMUEXT_INVLPG_MULTI;
- args->op.arg1.linear_addr = start;
+ args->op.arg1.linear_addr = info->start;
}
MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
@@ -1470,8 +1465,8 @@ static void xen_write_cr3(unsigned long cr3)
* At the start of the day - when Xen launches a guest, it has already
* built pagetables for the guest. We diligently look over them
* in xen_setup_kernel_pagetable and graft as appropriate them in the
- * init_level4_pgt and its friends. Then when we are happy we load
- * the new init_level4_pgt - and continue on.
+ * init_top_pgt and its friends. Then when we are happy we load
+ * the new init_top_pgt - and continue on.
*
* The generic code starts (start_kernel) and 'init_mem_mapping' sets
* up the rest of the pagetables. When it has completed it loads the cr3.
@@ -1914,12 +1909,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
pt_end = pt_base + xen_start_info->nr_pt_frames;
/* Zap identity mapping */
- init_level4_pgt[0] = __pgd(0);
+ init_top_pgt[0] = __pgd(0);
/* Pre-constructed entries are in pfn, so convert to mfn */
/* L4[272] -> level3_ident_pgt */
/* L4[511] -> level3_kernel_pgt */
- convert_pfn_mfn(init_level4_pgt);
+ convert_pfn_mfn(init_top_pgt);
/* L3_i[0] -> level2_ident_pgt */
convert_pfn_mfn(level3_ident_pgt);
@@ -1950,10 +1945,10 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Copy the initial P->M table mappings if necessary. */
i = pgd_index(xen_start_info->mfn_list);
if (i && i < pgd_index(__START_KERNEL_map))
- init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
+ init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
/* Make pagetable pieces RO */
- set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+ set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
@@ -1964,7 +1959,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
- PFN_DOWN(__pa_symbol(init_level4_pgt)));
+ PFN_DOWN(__pa_symbol(init_top_pgt)));
/* Unpin Xen-provided one */
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
@@ -1974,7 +1969,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
* attach it to, so make sure we just set kernel pgd.
*/
xen_mc_batch();
- __xen_write_cr3(true, __pa(init_level4_pgt));
+ __xen_write_cr3(true, __pa(init_top_pgt));
xen_mc_issue(PARAVIRT_LAZY_CPU);
/* We can't that easily rip out L3 and L2, as the Xen pagetables are
@@ -2022,7 +2017,7 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
pmd_t pmd;
pte_t pte;
- pa = read_cr3();
+ pa = read_cr3_pa();
pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
sizeof(pgd)));
if (!pgd_present(pgd))
@@ -2102,7 +2097,7 @@ void __init xen_relocate_p2m(void)
pt_phys = pmd_phys + PFN_PHYS(n_pmd);
p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
- pgd = __va(read_cr3());
+ pgd = __va(read_cr3_pa());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
idx_p4d = 0;
save_pud = n_pud;
@@ -2209,7 +2204,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
{
unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
- BUG_ON(read_cr3() != __pa(initial_page_table));
+ BUG_ON(read_cr3_pa() != __pa(initial_page_table));
BUG_ON(cr3 != __pa(swapper_pg_dir));
/*
@@ -2698,8 +2693,8 @@ EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
phys_addr_t paddr_vmcoreinfo_note(void)
{
if (xen_pv_domain())
- return virt_to_machine(&vmcoreinfo_note).maddr;
+ return virt_to_machine(vmcoreinfo_note).maddr;
else
- return __pa_symbol(&vmcoreinfo_note);
+ return __pa(vmcoreinfo_note);
}
#endif /* CONFIG_KEXEC_CORE */
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 42b08f8..37c6056 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -18,20 +18,6 @@
int xen_swiotlb __read_mostly;
-static const struct dma_map_ops xen_swiotlb_dma_ops = {
- .alloc = xen_swiotlb_alloc_coherent,
- .free = xen_swiotlb_free_coherent,
- .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
- .sync_single_for_device = xen_swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = xen_swiotlb_sync_sg_for_device,
- .map_sg = xen_swiotlb_map_sg_attrs,
- .unmap_sg = xen_swiotlb_unmap_sg_attrs,
- .map_page = xen_swiotlb_map_page,
- .unmap_page = xen_swiotlb_unmap_page,
- .dma_supported = xen_swiotlb_dma_supported,
-};
-
/*
* pci_xen_swiotlb_detect - set xen_swiotlb to 1 if necessary
*
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a5bf7c4..c810463 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -499,7 +499,7 @@ static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages,
void __init xen_remap_memory(void)
{
unsigned long buf = (unsigned long)&xen_remap_buf;
- unsigned long mfn_save, mfn, pfn;
+ unsigned long mfn_save, pfn;
unsigned long remapped = 0;
unsigned int i;
unsigned long pfn_s = ~0UL;
@@ -515,8 +515,7 @@ void __init xen_remap_memory(void)
pfn = xen_remap_buf.target_pfn;
for (i = 0; i < xen_remap_buf.size; i++) {
- mfn = xen_remap_buf.mfns[i];
- xen_update_mem_tables(pfn, mfn);
+ xen_update_mem_tables(pfn, xen_remap_buf.mfns[i]);
remapped++;
pfn++;
}
@@ -530,8 +529,6 @@ void __init xen_remap_memory(void)
pfn_s = xen_remap_buf.target_pfn;
len = xen_remap_buf.size;
}
-
- mfn = xen_remap_mfn;
xen_remap_mfn = xen_remap_buf.next_area_mfn;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 82ac611..e7f02eb 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -1,4 +1,5 @@
#include <linux/smp.h>
+#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/cpumask.h>
#include <linux/percpu.h>
@@ -114,6 +115,36 @@ int xen_smp_intr_init(unsigned int cpu)
return rc;
}
+void __init xen_smp_cpus_done(unsigned int max_cpus)
+{
+ int cpu, rc, count = 0;
+
+ if (xen_hvm_domain())
+ native_smp_cpus_done(max_cpus);
+
+ if (xen_have_vcpu_info_placement)
+ return;
+
+ for_each_online_cpu(cpu) {
+ if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
+ continue;
+
+ rc = cpu_down(cpu);
+
+ if (rc == 0) {
+ /*
+ * Reset vcpu_info so this cpu cannot be onlined again.
+ */
+ xen_vcpu_info_reset(cpu);
+ count++;
+ } else {
+ pr_warn("%s: failed to bring CPU %d down, error %d\n",
+ __func__, cpu, rc);
+ }
+ }
+ WARN(count, "%s: brought %d CPUs offline\n", __func__, count);
+}
+
void xen_smp_send_reschedule(int cpu)
{
xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR);
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index 8ebb6ac..87d3c76 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -14,6 +14,8 @@ extern void xen_smp_intr_free(unsigned int cpu);
int xen_smp_intr_init_pv(unsigned int cpu);
void xen_smp_intr_free_pv(unsigned int cpu);
+void xen_smp_cpus_done(unsigned int max_cpus);
+
void xen_smp_send_reschedule(int cpu);
void xen_smp_send_call_function_ipi(const struct cpumask *mask);
void xen_smp_send_call_function_single_ipi(int cpu);
diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c
index f18561b..fd60abe 100644
--- a/arch/x86/xen/smp_hvm.c
+++ b/arch/x86/xen/smp_hvm.c
@@ -12,7 +12,8 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)
native_smp_prepare_boot_cpu();
/*
- * Setup vcpu_info for boot CPU.
+ * Setup vcpu_info for boot CPU. Secondary CPUs get their vcpu_info
+ * in xen_cpu_up_prepare_hvm().
*/
xen_vcpu_setup(0);
@@ -27,10 +28,20 @@ static void __init xen_hvm_smp_prepare_boot_cpu(void)
static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
{
+ int cpu;
+
native_smp_prepare_cpus(max_cpus);
WARN_ON(xen_smp_intr_init(0));
xen_init_lock_cpu(0);
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == 0)
+ continue;
+
+ /* Set default vcpu_id to make sure that we don't use cpu-0's */
+ per_cpu(xen_vcpu_id, cpu) = XEN_VCPU_ID_INVALID;
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -60,4 +71,5 @@ void __init xen_hvm_smp_init(void)
smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi;
smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi;
smp_ops.smp_prepare_boot_cpu = xen_hvm_smp_prepare_boot_cpu;
+ smp_ops.smp_cpus_done = xen_smp_cpus_done;
}
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index aae3253..5147140 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -19,6 +19,7 @@
#include <linux/irq_work.h>
#include <linux/tick.h>
#include <linux/nmi.h>
+#include <linux/cpuhotplug.h>
#include <asm/paravirt.h>
#include <asm/desc.h>
@@ -371,10 +372,6 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
return 0;
}
-static void xen_pv_smp_cpus_done(unsigned int max_cpus)
-{
-}
-
#ifdef CONFIG_HOTPLUG_CPU
static int xen_pv_cpu_disable(void)
{
@@ -417,7 +414,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
*/
tick_nohz_idle_enter();
- cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+ cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
}
#else /* !CONFIG_HOTPLUG_CPU */
@@ -469,7 +466,7 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
static const struct smp_ops xen_smp_ops __initconst = {
.smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_pv_smp_prepare_cpus,
- .smp_cpus_done = xen_pv_smp_cpus_done,
+ .smp_cpus_done = xen_smp_cpus_done,
.cpu_up = xen_pv_cpu_up,
.cpu_die = xen_pv_cpu_die,
diff --git a/arch/x86/xen/suspend_hvm.c b/arch/x86/xen/suspend_hvm.c
index 01afcad..4849994 100644
--- a/arch/x86/xen/suspend_hvm.c
+++ b/arch/x86/xen/suspend_hvm.c
@@ -8,15 +8,10 @@
void xen_hvm_post_suspend(int suspend_cancelled)
{
- int cpu;
-
- if (!suspend_cancelled)
+ if (!suspend_cancelled) {
xen_hvm_init_shared_info();
+ xen_vcpu_restore();
+ }
xen_callback_vector();
xen_unplug_emulated_devices();
- if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
- for_each_online_cpu(cpu) {
- xen_setup_runstate_info(cpu);
- }
- }
}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index a1895a8..1ecb05d 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -309,7 +309,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
void xen_teardown_timer(int cpu)
{
struct clock_event_device *evt;
- BUG_ON(cpu == 0);
evt = &per_cpu(xen_clock_events, cpu).evt;
if (evt->irq >= 0) {
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a440a4..0d50044 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -78,7 +78,8 @@ bool xen_vcpu_stolen(int vcpu);
extern int xen_have_vcpu_info_placement;
-void xen_vcpu_setup(int cpu);
+int xen_vcpu_setup(int cpu);
+void xen_vcpu_info_reset(int cpu);
void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
index 5e24671..e1a5fbe 100644
--- a/arch/x86/xen/xen-pvh.S
+++ b/arch/x86/xen/xen-pvh.S
@@ -87,7 +87,7 @@ ENTRY(pvh_start_xen)
wrmsr
/* Enable pre-constructed page tables. */
- mov $_pa(init_level4_pgt), %eax
+ mov $_pa(init_top_pgt), %eax
mov %eax, %cr3
mov $(X86_CR0_PG | X86_CR0_PE), %eax
mov %eax, %cr0
OpenPOWER on IntegriCloud