diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 11:35:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 11:35:37 -0700 |
commit | 08fd8c17686c6b09fa410a26d516548dd80ff147 (patch) | |
tree | 0d8c17e70a94518a301e85fc7b23fbc09311068c | |
parent | e831101a73fbc8339ef1d1909dad3ef64f089e70 (diff) | |
parent | d34c30cc1fa80f509500ff192ea6bc7d30671061 (diff) | |
download | op-kernel-dev-08fd8c17686c6b09fa410a26d516548dd80ff147.zip op-kernel-dev-08fd8c17686c6b09fa410a26d516548dd80ff147.tar.gz |
Merge tag 'for-linus-4.8-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from David Vrabel:
"Features and fixes for 4.8-rc0:
- ACPI support for guests on ARM platforms.
- Generic steal time support for arm and x86.
- Support cases where kernel cpu is not Xen VCPU number (e.g., if
in-guest kexec is used).
- Use the system workqueue instead of a custom workqueue in various
places"
* tag 'for-linus-4.8-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (47 commits)
xen: add static initialization of steal_clock op to xen_time_ops
xen/pvhvm: run xen_vcpu_setup() for the boot CPU
xen/evtchn: use xen_vcpu_id mapping
xen/events: fifo: use xen_vcpu_id mapping
xen/events: use xen_vcpu_id mapping in events_base
x86/xen: use xen_vcpu_id mapping when pointing vcpu_info to shared_info
x86/xen: use xen_vcpu_id mapping for HYPERVISOR_vcpu_op
xen: introduce xen_vcpu_id mapping
x86/acpi: store ACPI ids from MADT for future usage
x86/xen: update cpuid.h from Xen-4.7
xen/evtchn: add IOCTL_EVTCHN_RESTRICT
xen-blkback: really don't leak mode property
xen-blkback: constify instance of "struct attribute_group"
xen-blkfront: prefer xenbus_scanf() over xenbus_gather()
xen-blkback: prefer xenbus_scanf() over xenbus_gather()
xen: support runqueue steal time on xen
arm/xen: add support for vm_assist hypercall
xen: update xen headers
xen-pciback: drop superfluous variables
xen-pciback: short-circuit read path used for merging write values
...
59 files changed, 1150 insertions, 493 deletions
diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt index 0f7b9c2..c9b9321 100644 --- a/Documentation/devicetree/bindings/arm/xen.txt +++ b/Documentation/devicetree/bindings/arm/xen.txt @@ -11,10 +11,32 @@ the following properties: memory where the grant table should be mapped to, using an HYPERVISOR_memory_op hypercall. The memory region is large enough to map the whole grant table (it is larger or equal to gnttab_max_grant_frames()). + This property is unnecessary when booting Dom0 using ACPI. - interrupts: the interrupt used by Xen to inject event notifications. A GIC node is also required. + This property is unnecessary when booting Dom0 using ACPI. +To support UEFI on Xen ARM virtual platforms, Xen populates the FDT "uefi" node +under /hypervisor with following parameters: + +________________________________________________________________________________ +Name | Size | Description +================================================================================ +xen,uefi-system-table | 64-bit | Guest physical address of the UEFI System + | | Table. +-------------------------------------------------------------------------------- +xen,uefi-mmap-start | 64-bit | Guest physical address of the UEFI memory + | | map. +-------------------------------------------------------------------------------- +xen,uefi-mmap-size | 32-bit | Size in bytes of the UEFI memory map + | | pointed to in previous entry. +-------------------------------------------------------------------------------- +xen,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI + | | memory map. +-------------------------------------------------------------------------------- +xen,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format. +-------------------------------------------------------------------------------- Example (assuming #address-cells = <2> and #size-cells = <2>): @@ -22,4 +44,17 @@ hypervisor { compatible = "xen,xen-4.3", "xen,xen"; reg = <0 0xb0000000 0 0x20000>; interrupts = <1 15 0xf08>; + uefi { + xen,uefi-system-table = <0xXXXXXXXX>; + xen,uefi-mmap-start = <0xXXXXXXXX>; + xen,uefi-mmap-size = <0xXXXXXXXX>; + xen,uefi-mmap-desc-size = <0xXXXXXXXX>; + xen,uefi-mmap-desc-ver = <0xXXXXXXXX>; + }; }; + +The format and meaning of the "xen,uefi-*" parameters are similar to those in +Documentation/arm/uefi.txt, which are provided by the regular UEFI stub. However +they differ because they are provided by the Xen hypervisor, together with a set +of UEFI runtime services implemented via hypercalls, see +http://xenbits.xen.org/docs/unstable/hypercall/x86_64/include,public,platform.h.html. diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index b6b962d..9d874db 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -52,6 +52,7 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); int HYPERVISOR_platform_op_raw(void *arg); static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) { diff --git a/arch/arm/include/asm/xen/xen-ops.h b/arch/arm/include/asm/xen/xen-ops.h new file mode 100644 index 0000000..ec154e7 --- /dev/null +++ b/arch/arm/include/asm/xen/xen-ops.h @@ -0,0 +1,6 @@ +#ifndef _ASM_XEN_OPS_H +#define _ASM_XEN_OPS_H + +void xen_efi_runtime_setup(void); + +#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 7b53500..261dae6 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1064,6 +1064,7 @@ void __init setup_arch(char **cmdline_p) early_paging_init(mdesc); #endif setup_dma_zone(mdesc); + xen_early_init(); efi_init(); sanity_check_meminfo(); arm_memblock_init(mdesc); @@ -1080,7 +1081,6 @@ void __init setup_arch(char **cmdline_p) arm_dt_init_cpu_maps(); psci_dt_init(); - xen_early_init(); #ifdef CONFIG_SMP if (is_smp()) { if (!mdesc->smp_init || !mdesc->smp_init()) { diff --git a/arch/arm/xen/Makefile b/arch/arm/xen/Makefile index 1296952..2279521 100644 --- a/arch/arm/xen/Makefile +++ b/arch/arm/xen/Makefile @@ -1 +1,2 @@ obj-y := enlighten.o hypercall.o grant-table.o p2m.o mm.o +obj-$(CONFIG_XEN_EFI) += efi.o diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c new file mode 100644 index 0000000..16db419 --- /dev/null +++ b/arch/arm/xen/efi.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015, Linaro Limited, Shannon Zhao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/efi.h> +#include <xen/xen-ops.h> +#include <asm/xen/xen-ops.h> + +/* Set XEN EFI runtime services function pointers. Other fields of struct efi, + * e.g. efi.systab, will be set like normal EFI. + */ +void __init xen_efi_runtime_setup(void) +{ + efi.get_time = xen_efi_get_time; + efi.set_time = xen_efi_set_time; + efi.get_wakeup_time = xen_efi_get_wakeup_time; + efi.set_wakeup_time = xen_efi_set_wakeup_time; + efi.get_variable = xen_efi_get_variable; + efi.get_next_variable = xen_efi_get_next_variable; + efi.set_variable = xen_efi_set_variable; + efi.query_variable_info = xen_efi_query_variable_info; + efi.update_capsule = xen_efi_update_capsule; + efi.query_capsule_caps = xen_efi_query_capsule_caps; + efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; + efi.reset_system = NULL; /* Functionality provided by Xen. */ +} +EXPORT_SYMBOL_GPL(xen_efi_runtime_setup); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 75cd734..0bea3d2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -12,14 +12,16 @@ #include <xen/page.h> #include <xen/interface/sched.h> #include <xen/xen-ops.h> -#include <asm/paravirt.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <asm/xen/xen-ops.h> #include <asm/system_misc.h> +#include <asm/efi.h> #include <linux/interrupt.h> #include <linux/irqreturn.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_fdt.h> #include <linux/of_irq.h> #include <linux/of_address.h> #include <linux/cpuidle.h> @@ -30,6 +32,7 @@ #include <linux/time64.h> #include <linux/timekeeping.h> #include <linux/timekeeper_internal.h> +#include <linux/acpi.h> #include <linux/mm.h> @@ -46,14 +49,16 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info; DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); static struct vcpu_info __percpu *xen_vcpu_info; +/* Linux <-> Xen vCPU id mapping */ +DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); + /* These are unused until we support booting "pre-ballooned" */ unsigned long xen_released_pages; struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; static __read_mostly unsigned int xen_events_irq; -static __initdata struct device_node *xen_node; - int xen_remap_domain_gfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *gfn, int nr, @@ -84,19 +89,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); -static unsigned long long xen_stolen_accounting(int cpu) -{ - struct vcpu_runstate_info state; - - BUG_ON(cpu != smp_processor_id()); - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; -} - static void xen_read_wallclock(struct timespec64 *ts) { u32 version; @@ -179,10 +171,14 @@ static void xen_percpu_init(void) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); + /* Direct vCPU id mapping for ARM guests. */ + per_cpu(xen_vcpu_id, cpu) = cpu; + info.mfn = virt_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); BUG_ON(err); per_cpu(xen_vcpu, cpu) = vcpup; @@ -237,6 +233,46 @@ static irqreturn_t xen_arm_callback(int irq, void *arg) return IRQ_HANDLED; } +static __initdata struct { + const char *compat; + const char *prefix; + const char *version; + bool found; +} hyper_node = {"xen,xen", "xen,xen-", NULL, false}; + +static int __init fdt_find_hyper_node(unsigned long node, const char *uname, + int depth, void *data) +{ + const void *s = NULL; + int len; + + if (depth != 1 || strcmp(uname, "hypervisor") != 0) + return 0; + + if (of_flat_dt_is_compatible(node, hyper_node.compat)) + hyper_node.found = true; + + s = of_get_flat_dt_prop(node, "compatible", &len); + if (strlen(hyper_node.prefix) + 3 < len && + !strncmp(hyper_node.prefix, s, strlen(hyper_node.prefix))) + hyper_node.version = s + strlen(hyper_node.prefix); + + /* + * Check if Xen supports EFI by checking whether there is the + * "/hypervisor/uefi" node in DT. If so, runtime services are available + * through proxy functions (e.g. in case of Xen dom0 EFI implementation + * they call special hypercall which executes relevant EFI functions) + * and that is why they are always enabled. + */ + if (IS_ENABLED(CONFIG_XEN_EFI)) { + if ((of_get_flat_dt_subnode_by_name(node, "uefi") > 0) && + !efi_runtime_disabled()) + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + } + + return 0; +} + /* * see Documentation/devicetree/bindings/arm/xen.txt for the * documentation of the Xen Device Tree format. @@ -244,26 +280,18 @@ static irqreturn_t xen_arm_callback(int irq, void *arg) #define GRANT_TABLE_PHYSADDR 0 void __init xen_early_init(void) { - int len; - const char *s = NULL; - const char *version = NULL; - const char *xen_prefix = "xen,xen-"; - - xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); - if (!xen_node) { + of_scan_flat_dt(fdt_find_hyper_node, NULL); + if (!hyper_node.found) { pr_debug("No Xen support\n"); return; } - s = of_get_property(xen_node, "compatible", &len); - if (strlen(xen_prefix) + 3 < len && - !strncmp(xen_prefix, s, strlen(xen_prefix))) - version = s + strlen(xen_prefix); - if (version == NULL) { + + if (hyper_node.version == NULL) { pr_debug("Xen version not found\n"); return; } - pr_info("Xen %s support found\n", version); + pr_info("Xen %s support found\n", hyper_node.version); xen_domain_type = XEN_HVM_DOMAIN; @@ -278,28 +306,68 @@ void __init xen_early_init(void) add_preferred_console("hvc", 0, NULL); } +static void __init xen_acpi_guest_init(void) +{ +#ifdef CONFIG_ACPI + struct xen_hvm_param a; + int interrupt, trigger, polarity; + + a.domid = DOMID_SELF; + a.index = HVM_PARAM_CALLBACK_IRQ; + + if (HYPERVISOR_hvm_op(HVMOP_get_param, &a) + || (a.value >> 56) != HVM_PARAM_CALLBACK_TYPE_PPI) { + xen_events_irq = 0; + return; + } + + interrupt = a.value & 0xff; + trigger = ((a.value >> 8) & 0x1) ? ACPI_EDGE_SENSITIVE + : ACPI_LEVEL_SENSITIVE; + polarity = ((a.value >> 8) & 0x2) ? ACPI_ACTIVE_LOW + : ACPI_ACTIVE_HIGH; + xen_events_irq = acpi_register_gsi(NULL, interrupt, trigger, polarity); +#endif +} + +static void __init xen_dt_guest_init(void) +{ + struct device_node *xen_node; + + xen_node = of_find_compatible_node(NULL, NULL, "xen,xen"); + if (!xen_node) { + pr_err("Xen support was detected before, but it has disappeared\n"); + return; + } + + xen_events_irq = irq_of_parse_and_map(xen_node, 0); +} + static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; struct shared_info *shared_info_page = NULL; - struct resource res; - phys_addr_t grant_frames; if (!xen_domain()) return 0; - if (of_address_to_resource(xen_node, GRANT_TABLE_PHYSADDR, &res)) { - pr_err("Xen grant table base address not found\n"); - return -ENODEV; - } - grant_frames = res.start; + if (!acpi_disabled) + xen_acpi_guest_init(); + else + xen_dt_guest_init(); - xen_events_irq = irq_of_parse_and_map(xen_node, 0); if (!xen_events_irq) { pr_err("Xen event channel interrupt not found\n"); return -ENODEV; } + /* + * The fdt parsing codes have set EFI_RUNTIME_SERVICES if Xen EFI + * parameters are found. Force enable runtime services. + */ + if (efi_enabled(EFI_RUNTIME_SERVICES)) + xen_efi_runtime_setup(); + shared_info_page = (struct shared_info *)get_zeroed_page(GFP_KERNEL); if (!shared_info_page) { @@ -328,7 +396,13 @@ static int __init xen_guest_init(void) if (xen_vcpu_info == NULL) return -ENOMEM; - if (gnttab_setup_auto_xlat_frames(grant_frames)) { + /* Direct vCPU id mapping for ARM guests. */ + per_cpu(xen_vcpu_id, 0) = 0; + + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); + if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, + &xen_auto_xlat_grant_frames.vaddr, + xen_auto_xlat_grant_frames.count)) { free_percpu(xen_vcpu_info); return -ENOMEM; } @@ -355,8 +429,8 @@ static int __init xen_guest_init(void) register_cpu_notifier(&xen_cpu_notifier); - pv_time_ops.steal_clock = xen_stolen_accounting; - static_key_slow_inc(¶virt_steal_enabled); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); @@ -403,4 +477,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); +EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 9a36f4f..a648dfc 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -91,6 +91,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/include/asm/xen/xen-ops.h b/arch/arm64/include/asm/xen/xen-ops.h new file mode 100644 index 0000000..ec154e7 --- /dev/null +++ b/arch/arm64/include/asm/xen/xen-ops.h @@ -0,0 +1,6 @@ +#ifndef _ASM_XEN_OPS_H +#define _ASM_XEN_OPS_H + +void xen_efi_runtime_setup(void); + +#endif /* _ASM_XEN_OPS_H */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 5b82567..2981f1b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -257,6 +257,7 @@ void __init setup_arch(char **cmdline_p) */ cpu_uninstall_idmap(); + xen_early_init(); efi_init(); arm64_memblock_init(); @@ -283,8 +284,6 @@ void __init setup_arch(char **cmdline_p) else psci_acpi_init(); - xen_early_init(); - cpu_read_bootcpu_ops(); smp_init_cpus(); smp_build_mpidr_hash(); diff --git a/arch/arm64/xen/Makefile b/arch/arm64/xen/Makefile index 74a8d87..8ff8aa9 100644 --- a/arch/arm64/xen/Makefile +++ b/arch/arm64/xen/Makefile @@ -1,2 +1,3 @@ xen-arm-y += $(addprefix ../../arm/xen/, enlighten.o grant-table.o p2m.o mm.o) obj-y := xen-arm.o hypercall.o +obj-$(CONFIG_XEN_EFI) += $(addprefix ../../arm/xen/, efi.o) diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 70df80e..329c802 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -82,6 +82,7 @@ HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); +HYPERCALL2(vm_assist); ENTRY(privcmd_call) mov x16, x0 diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 59d34c5..9b7fa63 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -16,6 +16,7 @@ extern void prefill_possible_map(void); static inline void prefill_possible_map(void) {} #define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define cpu_acpi_id(cpu) 0 #define safe_smp_processor_id() 0 #endif /* CONFIG_SMP */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index c9734dc..ebd0c16 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -33,6 +33,7 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) } DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); +DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid); @@ -148,6 +149,7 @@ void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); void smp_store_boot_cpu_info(void); void smp_store_cpu_info(int id); #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) +#define cpu_acpi_id(cpu) per_cpu(x86_cpu_to_acpiid, cpu) #else /* !CONFIG_SMP */ #define wbinvd_on_cpu(cpu) wbinvd() diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h index 0d809e9..3bdd10d 100644 --- a/arch/x86/include/asm/xen/cpuid.h +++ b/arch/x86/include/asm/xen/cpuid.h @@ -76,15 +76,18 @@ /* * Leaf 5 (0x40000x04) * HVM-specific features + * EAX: Features + * EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag) */ -/* EAX Features */ /* Virtualized APIC registers */ #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized x2APIC accesses */ #define XEN_HVM_CPUID_X2APIC_VIRT (1u << 1) /* Memory mapped from other domains has valid IOMMU entries */ #define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) +/* vcpu id is present in EBX */ +#define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) #define XEN_CPUID_MAX_NUM_LEAVES 4 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 9414f84..6738e5c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -161,13 +161,15 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) /** * acpi_register_lapic - register a local apic and generates a logic cpu number * @id: local apic id to register + * @acpiid: ACPI id to register * @enabled: this cpu is enabled or not * * Returns the logic cpu number which maps to the local apic */ -static int acpi_register_lapic(int id, u8 enabled) +static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) { unsigned int ver = 0; + int cpu; if (id >= MAX_LOCAL_APIC) { printk(KERN_INFO PREFIX "skipped apicid that is too big\n"); @@ -182,7 +184,11 @@ static int acpi_register_lapic(int id, u8 enabled) if (boot_cpu_physical_apicid != -1U) ver = apic_version[boot_cpu_physical_apicid]; - return generic_processor_info(id, ver); + cpu = generic_processor_info(id, ver); + if (cpu >= 0) + early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; + + return cpu; } static int __init @@ -212,7 +218,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) if (!apic->apic_id_valid(apic_id) && enabled) printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); else - acpi_register_lapic(apic_id, enabled); + acpi_register_lapic(apic_id, processor->uid, enabled); #else printk(KERN_WARNING PREFIX "x2apic entry ignored\n"); #endif @@ -240,6 +246,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) * when we use CPU hotplug. */ acpi_register_lapic(processor->id, /* APIC ID */ + processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); return 0; @@ -258,6 +265,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) acpi_table_print_madt_entry(header); acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ + processor->processor_id, /* ACPI ID */ processor->lapic_flags & ACPI_MADT_ENABLED); return 0; @@ -714,7 +722,7 @@ int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) { int cpu; - cpu = acpi_register_lapic(physid, ACPI_MADT_ENABLED); + cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED); if (cpu < 0) { pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); return cpu; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f943d2f..ac8d8ad 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -92,8 +92,10 @@ static int apic_extnmi = APIC_EXTNMI_BSP; */ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index e4fcb87..7a40e06 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -236,6 +236,8 @@ void __init setup_per_cpu_areas(void) early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_bios_cpu_apicid, cpu) = early_per_cpu_map(x86_bios_cpu_apicid, cpu); + per_cpu(x86_cpu_to_acpiid, cpu) = + early_per_cpu_map(x86_cpu_to_acpiid, cpu); #endif #ifdef CONFIG_X86_32 per_cpu(x86_cpu_to_logical_apicid, cpu) = @@ -271,6 +273,7 @@ void __init setup_per_cpu_areas(void) #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; + early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL; #endif #ifdef CONFIG_X86_32 early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index be14cc3..3be0121 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -20,10 +20,121 @@ #include <linux/init.h> #include <linux/string.h> +#include <xen/xen.h> #include <xen/xen-ops.h> +#include <xen/interface/platform.h> #include <asm/page.h> #include <asm/setup.h> +#include <asm/xen/hypercall.h> + +static efi_char16_t vendor[100] __initdata; + +static efi_system_table_t efi_systab_xen __initdata = { + .hdr = { + .signature = EFI_SYSTEM_TABLE_SIGNATURE, + .revision = 0, /* Initialized later. */ + .headersize = 0, /* Ignored by Linux Kernel. */ + .crc32 = 0, /* Ignored by Linux Kernel. */ + .reserved = 0 + }, + .fw_vendor = EFI_INVALID_TABLE_ADDR, /* Initialized later. */ + .fw_revision = 0, /* Initialized later. */ + .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR, + /* Not used under Xen. */ + .boottime = (efi_boot_services_t *)EFI_INVALID_TABLE_ADDR, + /* Not used under Xen. */ + .nr_tables = 0, /* Initialized later. */ + .tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */ +}; + +static const struct efi efi_xen __initconst = { + .systab = NULL, /* Initialized later. */ + .runtime_version = 0, /* Initialized later. */ + .mps = EFI_INVALID_TABLE_ADDR, + .acpi = EFI_INVALID_TABLE_ADDR, + .acpi20 = EFI_INVALID_TABLE_ADDR, + .smbios = EFI_INVALID_TABLE_ADDR, + .smbios3 = EFI_INVALID_TABLE_ADDR, + .sal_systab = EFI_INVALID_TABLE_ADDR, + .boot_info = EFI_INVALID_TABLE_ADDR, + .hcdp = EFI_INVALID_TABLE_ADDR, + .uga = EFI_INVALID_TABLE_ADDR, + .uv_systab = EFI_INVALID_TABLE_ADDR, + .fw_vendor = EFI_INVALID_TABLE_ADDR, + .runtime = EFI_INVALID_TABLE_ADDR, + .config_table = EFI_INVALID_TABLE_ADDR, + .get_time = xen_efi_get_time, + .set_time = xen_efi_set_time, + .get_wakeup_time = xen_efi_get_wakeup_time, + .set_wakeup_time = xen_efi_set_wakeup_time, + .get_variable = xen_efi_get_variable, + .get_next_variable = xen_efi_get_next_variable, + .set_variable = xen_efi_set_variable, + .query_variable_info = xen_efi_query_variable_info, + .update_capsule = xen_efi_update_capsule, + .query_capsule_caps = xen_efi_query_capsule_caps, + .get_next_high_mono_count = xen_efi_get_next_high_mono_count, + .reset_system = NULL, /* Functionality provided by Xen. */ + .set_virtual_address_map = NULL, /* Not used under Xen. */ + .flags = 0 /* Initialized later. */ +}; + +static efi_system_table_t __init *xen_efi_probe(void) +{ + struct xen_platform_op op = { + .cmd = XENPF_firmware_info, + .u.firmware_info = { + .type = XEN_FW_EFI_INFO, + .index = XEN_FW_EFI_CONFIG_TABLE + } + }; + union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info; + + if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0) + return NULL; + + /* Here we know that Xen runs on EFI platform. */ + + efi = efi_xen; + + efi_systab_xen.tables = info->cfg.addr; + efi_systab_xen.nr_tables = info->cfg.nent; + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_VENDOR; + info->vendor.bufsz = sizeof(vendor); + set_xen_guest_handle(info->vendor.name, vendor); + + if (HYPERVISOR_platform_op(&op) == 0) { + efi_systab_xen.fw_vendor = __pa_symbol(vendor); + efi_systab_xen.fw_revision = info->vendor.revision; + } else + efi_systab_xen.fw_vendor = __pa_symbol(L"UNKNOWN"); + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_VERSION; + + if (HYPERVISOR_platform_op(&op) == 0) + efi_systab_xen.hdr.revision = info->version; + + op.cmd = XENPF_firmware_info; + op.u.firmware_info.type = XEN_FW_EFI_INFO; + op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION; + + if (HYPERVISOR_platform_op(&op) == 0) + efi.runtime_version = info->version; + + return &efi_systab_xen; +} void __init xen_efi_init(void) { diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0f87db2..69b4b6d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -59,6 +59,7 @@ #include <asm/xen/pci.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> +#include <asm/xen/cpuid.h> #include <asm/fixmap.h> #include <asm/processor.h> #include <asm/proto.h> @@ -118,6 +119,10 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); */ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +/* Linux <-> Xen vCPU id mapping */ +DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); + enum xen_domain_type xen_domain_type = XEN_NATIVE; EXPORT_SYMBOL_GPL(xen_domain_type); @@ -179,7 +184,7 @@ static void clamp_max_cpus(void) #endif } -static void xen_vcpu_setup(int cpu) +void xen_vcpu_setup(int cpu) { struct vcpu_register_vcpu_info info; int err; @@ -202,8 +207,9 @@ static void xen_vcpu_setup(int cpu) if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) return; } - if (cpu < MAX_VIRT_CPUS) - per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS) + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; if (!have_vcpu_info_placement) { if (cpu >= MAX_VIRT_CPUS) @@ -223,7 +229,8 @@ static void xen_vcpu_setup(int cpu) hypervisor has no unregister variant and this hypercall does not allow to over-write info.mfn and info.offset. */ - err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); + err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), + &info); if (err) { printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err); @@ -247,10 +254,11 @@ void xen_vcpu_restore(void) for_each_possible_cpu(cpu) { bool other_cpu = (cpu != smp_processor_id()); - bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL); + bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), + NULL); if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL)) BUG(); xen_setup_runstate_info(cpu); @@ -259,7 +267,7 @@ void xen_vcpu_restore(void) xen_vcpu_setup(cpu); if (other_cpu && is_up && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL)) BUG(); } } @@ -588,7 +596,7 @@ static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE); unsigned long frames[pages]; int f; @@ -637,7 +645,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE); unsigned long frames[pages]; int f; @@ -1135,8 +1143,11 @@ void xen_setup_vcpu_info_placement(void) { int cpu; - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + /* Set up direct vCPU id mapping for PV guests. */ + per_cpu(xen_vcpu_id, cpu) = cpu; xen_vcpu_setup(cpu); + } /* xen_vcpu_setup managed to place the vcpu_info within the * percpu area for all cpus, so make use of it. Note that for @@ -1727,6 +1738,9 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif xen_raw_console_write("about to get started...\n"); + /* Let's presume PV guests always boot on vCPU with id 0. */ + per_cpu(xen_vcpu_id, 0) = 0; + xen_setup_runstate_info(0); xen_efi_init(); @@ -1768,9 +1782,10 @@ void __ref xen_hvm_init_shared_info(void) * in that case multiple vcpus might be online. */ for_each_online_cpu(cpu) { /* Leave it to be NULL. */ - if (cpu >= MAX_VIRT_CPUS) + if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS) continue; - per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + per_cpu(xen_vcpu, cpu) = + &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)]; } } @@ -1795,6 +1810,12 @@ static void __init init_hvm_pv_info(void) xen_setup_features(); + cpuid(base + 4, &eax, &ebx, &ecx, &edx); + if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT) + this_cpu_write(xen_vcpu_id, ebx); + else + this_cpu_write(xen_vcpu_id, smp_processor_id()); + pv_info.name = "Xen HVM"; xen_domain_type = XEN_HVM_DOMAIN; @@ -1806,6 +1827,10 @@ static int xen_hvm_cpu_notify(struct notifier_block *self, unsigned long action, int cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: + if (cpu_acpi_id(cpu) != U32_MAX) + per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); + else + per_cpu(xen_vcpu_id, cpu) = cpu; xen_vcpu_setup(cpu); if (xen_have_vector_callback) { if (xen_feature(XENFEAT_hvm_safe_pvclock)) diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index e079500..de4144c 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -111,63 +111,18 @@ int arch_gnttab_init(unsigned long nr_shared) } #ifdef CONFIG_XEN_PVH -#include <xen/balloon.h> #include <xen/events.h> -#include <linux/slab.h> -static int __init xlated_setup_gnttab_pages(void) -{ - struct page **pages; - xen_pfn_t *pfns; - void *vaddr; - int rc; - unsigned int i; - unsigned long nr_grant_frames = gnttab_max_grant_frames(); - - BUG_ON(nr_grant_frames == 0); - pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); - if (!pages) - return -ENOMEM; - - pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); - if (!pfns) { - kfree(pages); - return -ENOMEM; - } - rc = alloc_xenballooned_pages(nr_grant_frames, pages); - if (rc) { - pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, - nr_grant_frames, rc); - kfree(pages); - kfree(pfns); - return rc; - } - for (i = 0; i < nr_grant_frames; i++) - pfns[i] = page_to_pfn(pages[i]); - - vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL); - if (!vaddr) { - pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, - nr_grant_frames, rc); - free_xenballooned_pages(nr_grant_frames, pages); - kfree(pages); - kfree(pfns); - return -ENOMEM; - } - kfree(pages); - - xen_auto_xlat_grant_frames.pfn = pfns; - xen_auto_xlat_grant_frames.count = nr_grant_frames; - xen_auto_xlat_grant_frames.vaddr = vaddr; - - return 0; -} - +#include <xen/xen-ops.h> static int __init xen_pvh_gnttab_setup(void) { if (!xen_pvh_domain()) return -ENODEV; - return xlated_setup_gnttab_pages(); + xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); + + return xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, + &xen_auto_xlat_grant_frames.vaddr, + xen_auto_xlat_grant_frames.count); } /* Call it _before_ __gnttab_init as we need to initialize the * xen_auto_xlat_grant_frames first. */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index a1207cb..33e9295 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -109,7 +109,8 @@ static void xen_safe_halt(void) static void xen_halt(void) { if (irqs_disabled()) - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, + xen_vcpu_nr(smp_processor_id()), NULL); else xen_safe_halt(); } diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 9466354..32bdc2c 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -547,7 +547,7 @@ void xen_pmu_init(int cpu) return; fail: - pr_warn_once("Could not initialize VPMU for cpu %d, error %d\n", + pr_info_once("Could not initialize VPMU for cpu %d, error %d\n", cpu, err); free_pages((unsigned long)xenpmu_data, 0); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 719cf29..0b4d04c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -322,6 +322,13 @@ static void __init xen_smp_prepare_boot_cpu(void) xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); } + + /* + * Setup vcpu_info for boot CPU. + */ + if (xen_hvm_domain()) + xen_vcpu_setup(0); + /* * The alternative logic (which patches the unlock/lock) runs before * the smp bootup up code is activated. Hence we need to set this up @@ -454,7 +461,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) #endif ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); - if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) + if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) BUG(); kfree(ctxt); @@ -492,7 +499,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle) if (rc) return rc; - rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + rc = HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL); BUG_ON(rc); while (cpu_report_state(cpu) != CPU_ONLINE) @@ -520,7 +527,8 @@ static int xen_cpu_disable(void) static void xen_cpu_die(unsigned int cpu) { - while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { + while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, + xen_vcpu_nr(cpu), NULL)) { __set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(HZ/10); } @@ -536,7 +544,7 @@ static void xen_cpu_die(unsigned int cpu) static void xen_play_dead(void) /* used only with HOTPLUG_CPU */ { play_dead_common(); - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL); cpu_bringup(); /* * commit 4b0c0f294 (tick: Cleanup NOHZ per cpu data on cpu down) @@ -576,7 +584,7 @@ static void stop_self(void *v) set_cpu_online(cpu, false); - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); + HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL); BUG(); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 6deba5b..67356d2 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -11,8 +11,6 @@ #include <linux/interrupt.h> #include <linux/clocksource.h> #include <linux/clockchips.h> -#include <linux/kernel_stat.h> -#include <linux/math64.h> #include <linux/gfp.h> #include <linux/slab.h> #include <linux/pvclock_gtod.h> @@ -31,44 +29,6 @@ /* Xen may fire a timer up to this many ns early */ #define TIMER_SLOP 100000 -#define NS_PER_TICK (1000000000LL / HZ) - -/* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); - -/* unused ns of stolen time */ -static DEFINE_PER_CPU(u64, xen_residual_stolen); - -static void do_stolen_accounting(void) -{ - struct vcpu_runstate_info state; - struct vcpu_runstate_info *snap; - s64 runnable, offline, stolen; - cputime_t ticks; - - xen_get_runstate_snapshot(&state); - - WARN_ON(state.state != RUNSTATE_running); - - snap = this_cpu_ptr(&xen_runstate_snapshot); - - /* work out how much time the VCPU has not been runn*ing* */ - runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable]; - offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline]; - - *snap = state; - - /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. */ - stolen = runnable + offline + __this_cpu_read(xen_residual_stolen); - - if (stolen < 0) - stolen = 0; - - ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __this_cpu_write(xen_residual_stolen, stolen); - account_steal_ticks(ticks); -} /* Get the TSC speed from Xen */ static unsigned long xen_tsc_khz(void) @@ -263,8 +223,10 @@ static int xen_vcpuop_shutdown(struct clock_event_device *evt) { int cpu = smp_processor_id(); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) || - HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu), + NULL) || + HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL)) BUG(); return 0; @@ -274,7 +236,8 @@ static int xen_vcpuop_set_oneshot(struct clock_event_device *evt) { int cpu = smp_processor_id(); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL)) BUG(); return 0; @@ -293,7 +256,8 @@ static int xen_vcpuop_set_next_event(unsigned long delta, /* Get an event anyway, even if the timeout is already expired */ single.flags = 0; - ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single); + ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu), + &single); BUG_ON(ret != 0); return ret; @@ -335,8 +299,6 @@ static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) ret = IRQ_HANDLED; } - do_stolen_accounting(); - return ret; } @@ -394,13 +356,15 @@ void xen_timer_resume(void) return; for_each_online_cpu(cpu) { - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL)) + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, + xen_vcpu_nr(cpu), NULL)) BUG(); } } static const struct pv_time_ops xen_time_ops __initconst = { .sched_clock = xen_clocksource_read, + .steal_clock = xen_steal_clock, }; static void __init xen_time_init(void) @@ -414,7 +378,8 @@ static void __init xen_time_init(void) clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC); - if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { + if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu), + NULL) == 0) { /* Successfully turned off 100Hz tick, so we have the vcpuop-based timer interface */ printk(KERN_DEBUG "Xen: using vcpuop timer interface\n"); @@ -431,6 +396,8 @@ static void __init xen_time_init(void) xen_setup_timer(cpu); xen_setup_cpu_clockevents(); + xen_time_setup_guest(); + if (xen_initial_domain()) pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); } diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 4140b07..3cbce3b 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -76,6 +76,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); +void xen_vcpu_setup(int cpu); void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 405056b..ad9fc84 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -46,6 +46,13 @@ DEFINE_MUTEX(acpi_device_lock); LIST_HEAD(acpi_wakeup_device_list); static DEFINE_MUTEX(acpi_hp_context_lock); +/* + * The UART device described by the SPCR table is the only object which needs + * special-casing. Everything else is covered by ACPI namespace paths in STAO + * table. + */ +static u64 spcr_uart_addr; + struct acpi_dep_data { struct list_head node; acpi_handle master; @@ -1458,6 +1465,41 @@ static int acpi_add_single_object(struct acpi_device **child, return 0; } +static acpi_status acpi_get_resource_memory(struct acpi_resource *ares, + void *context) +{ + struct resource *res = context; + + if (acpi_dev_resource_memory(ares, res)) + return AE_CTRL_TERMINATE; + + return AE_OK; +} + +static bool acpi_device_should_be_hidden(acpi_handle handle) +{ + acpi_status status; + struct resource res; + + /* Check if it should ignore the UART device */ + if (!(spcr_uart_addr && acpi_has_method(handle, METHOD_NAME__CRS))) + return false; + + /* + * The UART device described in SPCR table is assumed to have only one + * memory resource present. So we only look for the first one here. + */ + status = acpi_walk_resources(handle, METHOD_NAME__CRS, + acpi_get_resource_memory, &res); + if (ACPI_FAILURE(status) || res.start != spcr_uart_addr) + return false; + + acpi_handle_info(handle, "The UART device @%pa in SPCR table will be hidden\n", + &res.start); + + return true; +} + static int acpi_bus_type_and_status(acpi_handle handle, int *type, unsigned long long *sta) { @@ -1471,6 +1513,9 @@ static int acpi_bus_type_and_status(acpi_handle handle, int *type, switch (acpi_type) { case ACPI_TYPE_ANY: /* for ACPI_ROOT_OBJECT */ case ACPI_TYPE_DEVICE: + if (acpi_device_should_be_hidden(handle)) + return -ENODEV; + *type = ACPI_BUS_TYPE_DEVICE; status = acpi_bus_get_status_handle(handle, sta); if (ACPI_FAILURE(status)) @@ -1925,11 +1970,26 @@ static int acpi_bus_scan_fixed(void) return result < 0 ? result : 0; } +static void __init acpi_get_spcr_uart_addr(void) +{ + acpi_status status; + struct acpi_table_spcr *spcr_ptr; + + status = acpi_get_table(ACPI_SIG_SPCR, 0, + (struct acpi_table_header **)&spcr_ptr); + if (ACPI_SUCCESS(status)) + spcr_uart_addr = spcr_ptr->serial_port.address; + else + printk(KERN_WARNING PREFIX "STAO table present, but SPCR is missing\n"); +} + static bool acpi_scan_initialized; int __init acpi_scan_init(void) { int result; + acpi_status status; + struct acpi_table_stao *stao_ptr; acpi_pci_root_init(); acpi_pci_link_init(); @@ -1945,6 +2005,20 @@ int __init acpi_scan_init(void) acpi_scan_add_handler(&generic_device_handler); + /* + * If there is STAO table, check whether it needs to ignore the UART + * device in SPCR table. + */ + status = acpi_get_table(ACPI_SIG_STAO, 0, + (struct acpi_table_header **)&stao_ptr); + if (ACPI_SUCCESS(status)) { + if (stao_ptr->header.length > sizeof(struct acpi_table_stao)) + printk(KERN_INFO PREFIX "STAO Name List not yet supported."); + + if (stao_ptr->ignore_uart) + acpi_get_spcr_uart_addr(); + } + mutex_lock(&acpi_scan_lock); /* * Enumerate devices in the ACPI namespace. diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 2994cfa..3cc6d1d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -379,7 +379,7 @@ static struct attribute *xen_vbdstat_attrs[] = { NULL }; -static struct attribute_group xen_vbdstat_group = { +static const struct attribute_group xen_vbdstat_group = { .name = "statistics", .attrs = xen_vbdstat_attrs, }; @@ -715,8 +715,11 @@ static void backend_changed(struct xenbus_watch *watch, /* Front end dir is a number, which is used as the handle. */ err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); - if (err) + if (err) { + kfree(be->mode); + be->mode = NULL; return; + } be->major = major; be->minor = minor; @@ -1022,9 +1025,9 @@ static int connect_ring(struct backend_info *be) pr_debug("%s %s\n", __func__, dev->otherend); be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; - err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", - "%63s", protocol, NULL); - if (err) + err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol); + if (err <= 0) strcpy(protocol, "unspecified, assuming default"); else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; @@ -1036,10 +1039,9 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -ENOSYS; } - err = xenbus_gather(XBT_NIL, dev->otherend, - "feature-persistent", "%u", - &pers_grants, NULL); - if (err) + err = xenbus_scanf(XBT_NIL, dev->otherend, + "feature-persistent", "%u", &pers_grants); + if (err <= 0) pers_grants = 0; be->blkif->vbd.feature_gnt_persistent = pers_grants; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 0b6682a..be4fea6 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2197,10 +2197,9 @@ static void blkfront_setup_discard(struct blkfront_info *info) info->discard_granularity = discard_granularity; info->discard_alignment = discard_alignment; } - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "discard-secure", "%d", &discard_secure, - NULL); - if (!err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "discard-secure", "%u", &discard_secure); + if (err > 0) info->feature_secdiscard = !!discard_secure; } @@ -2300,9 +2299,8 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) info->feature_flush = 0; info->feature_fua = 0; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-barrier", "%d", &barrier, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-barrier", "%d", &barrier); /* * If there's no "feature-barrier" defined, then it means @@ -2311,7 +2309,7 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) * * If there are barriers, then we use flush. */ - if (!err && barrier) { + if (err > 0 && barrier) { info->feature_flush = 1; info->feature_fua = 1; } @@ -2320,34 +2318,31 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) * And if there is "feature-flush-cache" use that above * barriers. */ - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-flush-cache", "%d", &flush, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-flush-cache", "%d", &flush); - if (!err && flush) { + if (err > 0 && flush) { info->feature_flush = 1; info->feature_fua = 0; } - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-discard", "%d", &discard, - NULL); + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-discard", "%d", &discard); - if (!err && discard) + if (err > 0 && discard) blkfront_setup_discard(info); - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-persistent", "%u", &persistent, - NULL); - if (err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-persistent", "%d", &persistent); + if (err <= 0) info->feature_persistent = 0; else info->feature_persistent = persistent; - err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-max-indirect-segments", "%u", &indirect_segments, - NULL); - if (err) + err = xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "feature-max-indirect-segments", "%u", + &indirect_segments); + if (err <= 0) info->max_indirect_segments = 0; else info->max_indirect_segments = min(indirect_segments, diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 17ccf0a..c394b81 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,6 +107,11 @@ static int __init arm_enable_runtime_services(void) return 0; } + if (efi_enabled(EFI_RUNTIME_SERVICES)) { + pr_info("EFI runtime services access via paravirt.\n"); + return 0; + } + pr_info("Remapping and enabling EFI services.\n"); mapsize = efi.memmap.map_end - efi.memmap.map; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 8730fd4..5a2631a 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -568,12 +568,14 @@ device_initcall(efi_load_efivars); FIELD_SIZEOF(struct efi_fdt_params, field) \ } -static __initdata struct { +struct params { const char name[32]; const char propname[32]; int offset; int size; -} dt_params[] = { +}; + +static __initdata struct params fdt_params[] = { UEFI_PARAM("System Table", "linux,uefi-system-table", system_table), UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap), UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size), @@ -581,44 +583,91 @@ static __initdata struct { UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver) }; +static __initdata struct params xen_fdt_params[] = { + UEFI_PARAM("System Table", "xen,uefi-system-table", system_table), + UEFI_PARAM("MemMap Address", "xen,uefi-mmap-start", mmap), + UEFI_PARAM("MemMap Size", "xen,uefi-mmap-size", mmap_size), + UEFI_PARAM("MemMap Desc. Size", "xen,uefi-mmap-desc-size", desc_size), + UEFI_PARAM("MemMap Desc. Version", "xen,uefi-mmap-desc-ver", desc_ver) +}; + +#define EFI_FDT_PARAMS_SIZE ARRAY_SIZE(fdt_params) + +static __initdata struct { + const char *uname; + const char *subnode; + struct params *params; +} dt_params[] = { + { "hypervisor", "uefi", xen_fdt_params }, + { "chosen", NULL, fdt_params }, +}; + struct param_info { int found; void *params; + const char *missing; }; -static int __init fdt_find_uefi_params(unsigned long node, const char *uname, - int depth, void *data) +static int __init __find_uefi_params(unsigned long node, + struct param_info *info, + struct params *params) { - struct param_info *info = data; const void *prop; void *dest; u64 val; int i, len; - if (depth != 1 || strcmp(uname, "chosen") != 0) - return 0; - - for (i = 0; i < ARRAY_SIZE(dt_params); i++) { - prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len); - if (!prop) + for (i = 0; i < EFI_FDT_PARAMS_SIZE; i++) { + prop = of_get_flat_dt_prop(node, params[i].propname, &len); + if (!prop) { + info->missing = params[i].name; return 0; - dest = info->params + dt_params[i].offset; + } + + dest = info->params + params[i].offset; info->found++; val = of_read_number(prop, len / sizeof(u32)); - if (dt_params[i].size == sizeof(u32)) + if (params[i].size == sizeof(u32)) *(u32 *)dest = val; else *(u64 *)dest = val; if (efi_enabled(EFI_DBG)) - pr_info(" %s: 0x%0*llx\n", dt_params[i].name, - dt_params[i].size * 2, val); + pr_info(" %s: 0x%0*llx\n", params[i].name, + params[i].size * 2, val); } + return 1; } +static int __init fdt_find_uefi_params(unsigned long node, const char *uname, + int depth, void *data) +{ + struct param_info *info = data; + int i; + + for (i = 0; i < ARRAY_SIZE(dt_params); i++) { + const char *subnode = dt_params[i].subnode; + + if (depth != 1 || strcmp(uname, dt_params[i].uname) != 0) { + info->missing = dt_params[i].params[0].name; + continue; + } + + if (subnode) { + node = of_get_flat_dt_subnode_by_name(node, subnode); + if (node < 0) + return 0; + } + + return __find_uefi_params(node, info, dt_params[i].params); + } + + return 0; +} + int __init efi_get_fdt_params(struct efi_fdt_params *params) { struct param_info info; @@ -634,7 +683,7 @@ int __init efi_get_fdt_params(struct efi_fdt_params *params) pr_info("UEFI not found.\n"); else if (!ret) pr_err("Can't find '%s' in device tree!\n", - dt_params[info.found].name); + info.missing); return ret; } diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 33daffc..0e02947 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -744,6 +744,19 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node, } /** + * of_get_flat_dt_subnode_by_name - get the subnode by given name + * + * @node: the parent node + * @uname: the name of subnode + * @return offset of the subnode, or -FDT_ERR_NOTFOUND if there is none + */ + +int of_get_flat_dt_subnode_by_name(unsigned long node, const char *uname) +{ + return fdt_subnode_offset(initial_boot_params, node, uname); +} + +/** * of_get_flat_dt_root - find the root node in the flat blob */ unsigned long __init of_get_flat_dt_root(void) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 979a831..f15bb3b7 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -275,7 +275,7 @@ config XEN_HAVE_PVMMU config XEN_EFI def_bool y - depends on X86_64 && EFI + depends on (ARM || ARM64 || X86_64) && EFI config XEN_AUTO_XLATE def_bool y diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 030e91b..8feab810 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -10,6 +10,7 @@ CFLAGS_features.o := $(nostackp) CFLAGS_efi.o += -fshort-wchar LDFLAGS += $(call ld-option, --no-wchar-size-warning) +dom0-$(CONFIG_ARM64) += arm-device.o dom0-$(CONFIG_PCI) += pci.o dom0-$(CONFIG_USB_SUPPORT) += dbgp.o dom0-$(CONFIG_XEN_ACPI) += acpi.o $(xen-pad-y) diff --git a/drivers/xen/arm-device.c b/drivers/xen/arm-device.c new file mode 100644 index 0000000..778acf8 --- /dev/null +++ b/drivers/xen/arm-device.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2015, Linaro Limited, Shannon Zhao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/platform_device.h> +#include <linux/acpi.h> +#include <xen/xen.h> +#include <xen/page.h> +#include <xen/interface/memory.h> +#include <asm/xen/hypervisor.h> +#include <asm/xen/hypercall.h> + +static int xen_unmap_device_mmio(const struct resource *resources, + unsigned int count) +{ + unsigned int i, j, nr; + int rc = 0; + const struct resource *r; + struct xen_remove_from_physmap xrp; + + for (i = 0; i < count; i++) { + r = &resources[i]; + nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE); + if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0)) + continue; + + for (j = 0; j < nr; j++) { + xrp.domid = DOMID_SELF; + xrp.gpfn = XEN_PFN_DOWN(r->start) + j; + rc = HYPERVISOR_memory_op(XENMEM_remove_from_physmap, + &xrp); + if (rc) + return rc; + } + } + + return rc; +} + +static int xen_map_device_mmio(const struct resource *resources, + unsigned int count) +{ + unsigned int i, j, nr; + int rc = 0; + const struct resource *r; + xen_pfn_t *gpfns; + xen_ulong_t *idxs; + int *errs; + struct xen_add_to_physmap_range xatp; + + for (i = 0; i < count; i++) { + r = &resources[i]; + nr = DIV_ROUND_UP(resource_size(r), XEN_PAGE_SIZE); + if ((resource_type(r) != IORESOURCE_MEM) || (nr == 0)) + continue; + + gpfns = kzalloc(sizeof(xen_pfn_t) * nr, GFP_KERNEL); + idxs = kzalloc(sizeof(xen_ulong_t) * nr, GFP_KERNEL); + errs = kzalloc(sizeof(int) * nr, GFP_KERNEL); + if (!gpfns || !idxs || !errs) { + kfree(gpfns); + kfree(idxs); + kfree(errs); + rc = -ENOMEM; + goto unmap; + } + + for (j = 0; j < nr; j++) { + /* + * The regions are always mapped 1:1 to DOM0 and this is + * fine because the memory map for DOM0 is the same as + * the host (except for the RAM). + */ + gpfns[j] = XEN_PFN_DOWN(r->start) + j; + idxs[j] = XEN_PFN_DOWN(r->start) + j; + } + + xatp.domid = DOMID_SELF; + xatp.size = nr; + xatp.space = XENMAPSPACE_dev_mmio; + + set_xen_guest_handle(xatp.gpfns, gpfns); + set_xen_guest_handle(xatp.idxs, idxs); + set_xen_guest_handle(xatp.errs, errs); + + rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap_range, &xatp); + kfree(gpfns); + kfree(idxs); + kfree(errs); + if (rc) + goto unmap; + } + + return rc; + +unmap: + xen_unmap_device_mmio(resources, i); + return rc; +} + +static int xen_platform_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct platform_device *pdev = to_platform_device(data); + int r = 0; + + if (pdev->num_resources == 0 || pdev->resource == NULL) + return NOTIFY_OK; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + r = xen_map_device_mmio(pdev->resource, pdev->num_resources); + break; + case BUS_NOTIFY_DEL_DEVICE: + r = xen_unmap_device_mmio(pdev->resource, pdev->num_resources); + break; + default: + return NOTIFY_DONE; + } + if (r) + dev_err(&pdev->dev, "Platform: Failed to %s device %s MMIO!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "map" : + (action == BUS_NOTIFY_DEL_DEVICE ? "unmap" : "?"), + pdev->name); + + return NOTIFY_OK; +} + +static struct notifier_block platform_device_nb = { + .notifier_call = xen_platform_notifier, +}; + +static int __init register_xen_platform_notifier(void) +{ + if (!xen_initial_domain() || acpi_disabled) + return 0; + + return bus_register_notifier(&platform_bus_type, &platform_device_nb); +} + +arch_initcall(register_xen_platform_notifier); + +#ifdef CONFIG_ARM_AMBA +#include <linux/amba/bus.h> + +static int xen_amba_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct amba_device *adev = to_amba_device(data); + int r = 0; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + r = xen_map_device_mmio(&adev->res, 1); + break; + case BUS_NOTIFY_DEL_DEVICE: + r = xen_unmap_device_mmio(&adev->res, 1); + break; + default: + return NOTIFY_DONE; + } + if (r) + dev_err(&adev->dev, "AMBA: Failed to %s device %s MMIO!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "map" : + (action == BUS_NOTIFY_DEL_DEVICE ? "unmap" : "?"), + adev->dev.init_name); + + return NOTIFY_OK; +} + +static struct notifier_block amba_device_nb = { + .notifier_call = xen_amba_notifier, +}; + +static int __init register_xen_amba_notifier(void) +{ + if (!xen_initial_domain() || acpi_disabled) + return 0; + + return bus_register_notifier(&amba_bustype, &amba_device_nb); +} + +arch_initcall(register_xen_amba_notifier); +#endif diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index e9d2135..22f71ff 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -38,7 +38,7 @@ #define efi_data(op) (op.u.efi_runtime_call) -static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) +efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { struct xen_platform_op op = INIT_EFI_OP(get_time); @@ -59,8 +59,9 @@ static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_time); -static efi_status_t xen_efi_set_time(efi_time_t *tm) +efi_status_t xen_efi_set_time(efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_time); @@ -72,10 +73,10 @@ static efi_status_t xen_efi_set_time(efi_time_t *tm) return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_set_time); -static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, - efi_bool_t *pending, - efi_time_t *tm) +efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time); @@ -95,8 +96,9 @@ static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_wakeup_time); -static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) { struct xen_platform_op op = INIT_EFI_OP(set_wakeup_time); @@ -113,12 +115,11 @@ static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_set_wakeup_time); -static efi_status_t xen_efi_get_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 *attr, - unsigned long *data_size, - void *data) +efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(get_variable); @@ -138,10 +139,11 @@ static efi_status_t xen_efi_get_variable(efi_char16_t *name, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_variable); -static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, - efi_char16_t *name, - efi_guid_t *vendor) +efi_status_t xen_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) { struct xen_platform_op op = INIT_EFI_OP(get_next_variable_name); @@ -161,12 +163,11 @@ static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_next_variable); -static efi_status_t xen_efi_set_variable(efi_char16_t *name, - efi_guid_t *vendor, - u32 attr, - unsigned long data_size, - void *data) +efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data) { struct xen_platform_op op = INIT_EFI_OP(set_variable); @@ -183,11 +184,11 @@ static efi_status_t xen_efi_set_variable(efi_char16_t *name, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_set_variable); -static efi_status_t xen_efi_query_variable_info(u32 attr, - u64 *storage_space, - u64 *remaining_space, - u64 *max_variable_size) +efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) { struct xen_platform_op op = INIT_EFI_OP(query_variable_info); @@ -205,8 +206,9 @@ static efi_status_t xen_efi_query_variable_info(u32 attr, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_query_variable_info); -static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) +efi_status_t xen_efi_get_next_high_mono_count(u32 *count) { struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count); @@ -217,10 +219,10 @@ static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_get_next_high_mono_count); -static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, - unsigned long count, - unsigned long sg_list) +efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) { struct xen_platform_op op = INIT_EFI_OP(update_capsule); @@ -237,11 +239,11 @@ static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, return efi_data(op).status; } +EXPORT_SYMBOL_GPL(xen_efi_update_capsule); -static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, - unsigned long count, - u64 *max_size, - int *reset_type) +efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type) { struct xen_platform_op op = INIT_EFI_OP(query_capsule_capabilities); @@ -260,111 +262,4 @@ static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, return efi_data(op).status; } - -static efi_char16_t vendor[100] __initdata; - -static efi_system_table_t efi_systab_xen __initdata = { - .hdr = { - .signature = EFI_SYSTEM_TABLE_SIGNATURE, - .revision = 0, /* Initialized later. */ - .headersize = 0, /* Ignored by Linux Kernel. */ - .crc32 = 0, /* Ignored by Linux Kernel. */ - .reserved = 0 - }, - .fw_vendor = EFI_INVALID_TABLE_ADDR, /* Initialized later. */ - .fw_revision = 0, /* Initialized later. */ - .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_out = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .stderr = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .runtime = (efi_runtime_services_t *)EFI_INVALID_TABLE_ADDR, - /* Not used under Xen. */ - .boottime = (efi_boot_services_t *)EFI_INVALID_TABLE_ADDR, - /* Not used under Xen. */ - .nr_tables = 0, /* Initialized later. */ - .tables = EFI_INVALID_TABLE_ADDR /* Initialized later. */ -}; - -static const struct efi efi_xen __initconst = { - .systab = NULL, /* Initialized later. */ - .runtime_version = 0, /* Initialized later. */ - .mps = EFI_INVALID_TABLE_ADDR, - .acpi = EFI_INVALID_TABLE_ADDR, - .acpi20 = EFI_INVALID_TABLE_ADDR, - .smbios = EFI_INVALID_TABLE_ADDR, - .smbios3 = EFI_INVALID_TABLE_ADDR, - .sal_systab = EFI_INVALID_TABLE_ADDR, - .boot_info = EFI_INVALID_TABLE_ADDR, - .hcdp = EFI_INVALID_TABLE_ADDR, - .uga = EFI_INVALID_TABLE_ADDR, - .uv_systab = EFI_INVALID_TABLE_ADDR, - .fw_vendor = EFI_INVALID_TABLE_ADDR, - .runtime = EFI_INVALID_TABLE_ADDR, - .config_table = EFI_INVALID_TABLE_ADDR, - .get_time = xen_efi_get_time, - .set_time = xen_efi_set_time, - .get_wakeup_time = xen_efi_get_wakeup_time, - .set_wakeup_time = xen_efi_set_wakeup_time, - .get_variable = xen_efi_get_variable, - .get_next_variable = xen_efi_get_next_variable, - .set_variable = xen_efi_set_variable, - .query_variable_info = xen_efi_query_variable_info, - .update_capsule = xen_efi_update_capsule, - .query_capsule_caps = xen_efi_query_capsule_caps, - .get_next_high_mono_count = xen_efi_get_next_high_mono_count, - .reset_system = NULL, /* Functionality provided by Xen. */ - .set_virtual_address_map = NULL, /* Not used under Xen. */ - .flags = 0 /* Initialized later. */ -}; - -efi_system_table_t __init *xen_efi_probe(void) -{ - struct xen_platform_op op = { - .cmd = XENPF_firmware_info, - .u.firmware_info = { - .type = XEN_FW_EFI_INFO, - .index = XEN_FW_EFI_CONFIG_TABLE - } - }; - union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info; - - if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0) - return NULL; - - /* Here we know that Xen runs on EFI platform. */ - - efi = efi_xen; - - efi_systab_xen.tables = info->cfg.addr; - efi_systab_xen.nr_tables = info->cfg.nent; - - op.cmd = XENPF_firmware_info; - op.u.firmware_info.type = XEN_FW_EFI_INFO; - op.u.firmware_info.index = XEN_FW_EFI_VENDOR; - info->vendor.bufsz = sizeof(vendor); - set_xen_guest_handle(info->vendor.name, vendor); - - if (HYPERVISOR_platform_op(&op) == 0) { - efi_systab_xen.fw_vendor = __pa_symbol(vendor); - efi_systab_xen.fw_revision = info->vendor.revision; - } else - efi_systab_xen.fw_vendor = __pa_symbol(L"UNKNOWN"); - - op.cmd = XENPF_firmware_info; - op.u.firmware_info.type = XEN_FW_EFI_INFO; - op.u.firmware_info.index = XEN_FW_EFI_VERSION; - - if (HYPERVISOR_platform_op(&op) == 0) - efi_systab_xen.hdr.revision = info->version; - - op.cmd = XENPF_firmware_info; - op.u.firmware_info.type = XEN_FW_EFI_INFO; - op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION; - - if (HYPERVISOR_platform_op(&op) == 0) - efi.runtime_version = info->version; - - return &efi_systab_xen; -} +EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps); diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 71d49a9..d5dbdb9 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -895,7 +895,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) irq_set_chip_and_handler_name(irq, &xen_percpu_chip, handle_percpu_irq, "ipi"); - bind_ipi.vcpu = cpu; + bind_ipi.vcpu = xen_vcpu_nr(cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0) BUG(); @@ -991,7 +991,7 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) handle_edge_irq, "virq"); bind_virq.virq = virq; - bind_virq.vcpu = cpu; + bind_virq.vcpu = xen_vcpu_nr(cpu); ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (ret == 0) @@ -1211,7 +1211,8 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) #ifdef CONFIG_X86 if (unlikely(vector == XEN_NMI_VECTOR)) { - int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, cpu, NULL); + int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu), + NULL); if (rc < 0) printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc); return; @@ -1318,7 +1319,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) /* Send future instances of this interrupt to other vcpu. */ bind_vcpu.port = evtchn; - bind_vcpu.vcpu = tcpu; + bind_vcpu.vcpu = xen_vcpu_nr(tcpu); /* * Mask the event while changing the VCPU binding to prevent @@ -1458,7 +1459,7 @@ static void restore_cpu_virqs(unsigned int cpu) /* Get a new binding from Xen. */ bind_virq.virq = virq; - bind_virq.vcpu = cpu; + bind_virq.vcpu = xen_vcpu_nr(cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq) != 0) BUG(); @@ -1482,7 +1483,7 @@ static void restore_cpu_ipis(unsigned int cpu) BUG_ON(ipi_from_irq(irq) != ipi); /* Get a new binding from Xen. */ - bind_ipi.vcpu = cpu; + bind_ipi.vcpu = xen_vcpu_nr(cpu); if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0) BUG(); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 9289a17..266c2c7 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -113,7 +113,7 @@ static int init_control_block(int cpu, init_control.control_gfn = virt_to_gfn(control_block); init_control.offset = 0; - init_control.vcpu = cpu; + init_control.vcpu = xen_vcpu_nr(cpu); return HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control); } diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index f4edd6d..e8c7f09 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -55,6 +55,7 @@ #include <xen/xen.h> #include <xen/events.h> #include <xen/evtchn.h> +#include <xen/xen-ops.h> #include <asm/xen/hypervisor.h> struct per_user_data { @@ -73,8 +74,12 @@ struct per_user_data { wait_queue_head_t evtchn_wait; struct fasync_struct *evtchn_async_queue; const char *name; + + domid_t restrict_domid; }; +#define UNRESTRICTED_DOMID ((domid_t)-1) + struct user_evtchn { struct rb_node node; struct per_user_data *user; @@ -443,12 +448,16 @@ static long evtchn_ioctl(struct file *file, struct ioctl_evtchn_bind_virq bind; struct evtchn_bind_virq bind_virq; + rc = -EACCES; + if (u->restrict_domid != UNRESTRICTED_DOMID) + break; + rc = -EFAULT; if (copy_from_user(&bind, uarg, sizeof(bind))) break; bind_virq.virq = bind.virq; - bind_virq.vcpu = 0; + bind_virq.vcpu = xen_vcpu_nr(0); rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (rc != 0) @@ -468,6 +477,11 @@ static long evtchn_ioctl(struct file *file, if (copy_from_user(&bind, uarg, sizeof(bind))) break; + rc = -EACCES; + if (u->restrict_domid != UNRESTRICTED_DOMID && + u->restrict_domid != bind.remote_domain) + break; + bind_interdomain.remote_dom = bind.remote_domain; bind_interdomain.remote_port = bind.remote_port; rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, @@ -485,6 +499,10 @@ static long evtchn_ioctl(struct file *file, struct ioctl_evtchn_bind_unbound_port bind; struct evtchn_alloc_unbound alloc_unbound; + rc = -EACCES; + if (u->restrict_domid != UNRESTRICTED_DOMID) + break; + rc = -EFAULT; if (copy_from_user(&bind, uarg, sizeof(bind))) break; @@ -553,6 +571,27 @@ static long evtchn_ioctl(struct file *file, break; } + case IOCTL_EVTCHN_RESTRICT_DOMID: { + struct ioctl_evtchn_restrict_domid ierd; + + rc = -EACCES; + if (u->restrict_domid != UNRESTRICTED_DOMID) + break; + + rc = -EFAULT; + if (copy_from_user(&ierd, uarg, sizeof(ierd))) + break; + + rc = -EINVAL; + if (ierd.domid == 0 || ierd.domid >= DOMID_FIRST_RESERVED) + break; + + u->restrict_domid = ierd.domid; + rc = 0; + + break; + } + default: rc = -ENOSYS; break; @@ -601,6 +640,8 @@ static int evtchn_open(struct inode *inode, struct file *filp) mutex_init(&u->ring_cons_mutex); spin_lock_init(&u->ring_prod_lock); + u->restrict_domid = UNRESTRICTED_DOMID; + filp->private_data = u; return nonseekable_open(inode, filp); diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 4547a91..7a47c4c 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -504,7 +504,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) struct gntalloc_file_private_data *priv = filp->private_data; struct gntalloc_vma_private_data *vm_priv; struct gntalloc_gref *gref; - int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int count = vma_pages(vma); int rv, i; if (!(vma->vm_flags & VM_SHARED)) { diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 6793957..bb95212 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -982,7 +982,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) { struct gntdev_priv *priv = flip->private_data; int index = vma->vm_pgoff; - int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int count = vma_pages(vma); struct grant_map *map; int i, err = -EINVAL; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index df2e6f7..702040f 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -582,7 +582,7 @@ static long privcmd_ioctl(struct file *file, static void privcmd_close(struct vm_area_struct *vma) { struct page **pages = vma->vm_private_data; - int numpgs = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int numpgs = vma_pages(vma); int numgfns = (vma->vm_end - vma->vm_start) >> XEN_PAGE_SHIFT; int rc; diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 7107842..ac5f23f 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -6,6 +6,7 @@ #include <linux/math64.h> #include <linux/gfp.h> +#include <asm/paravirt.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -46,27 +47,31 @@ static u64 get64(const u64 *p) return ret; } -/* - * Runstate accounting - */ -void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) +static void xen_get_runstate_snapshot_cpu(struct vcpu_runstate_info *res, + unsigned int cpu) { u64 state_time; struct vcpu_runstate_info *state; BUG_ON(preemptible()); - state = this_cpu_ptr(&xen_runstate); + state = per_cpu_ptr(&xen_runstate, cpu); - /* - * The runstate info is always updated by the hypervisor on - * the current CPU, so there's no need to use anything - * stronger than a compiler barrier when fetching it. - */ do { state_time = get64(&state->state_entry_time); + rmb(); /* Hypervisor might update data. */ *res = READ_ONCE(*state); - } while (get64(&state->state_entry_time) != state_time); + rmb(); /* Hypervisor might update data. */ + } while (get64(&state->state_entry_time) != state_time || + (state_time & XEN_RUNSTATE_UPDATE)); +} + +/* + * Runstate accounting + */ +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) +{ + xen_get_runstate_snapshot_cpu(res, smp_processor_id()); } /* return true when a vcpu could run but has no real cpu to run on */ @@ -75,6 +80,14 @@ bool xen_vcpu_stolen(int vcpu) return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } +u64 xen_steal_clock(int cpu) +{ + struct vcpu_runstate_info state; + + xen_get_runstate_snapshot_cpu(&state, cpu); + return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; +} + void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; @@ -82,7 +95,20 @@ void xen_setup_runstate_info(int cpu) area.addr.v = &per_cpu(xen_runstate, cpu); if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, - cpu, &area)) + xen_vcpu_nr(cpu), &area)) BUG(); } +void __init xen_time_setup_guest(void) +{ + bool xen_runstate_remote; + + xen_runstate_remote = !HYPERVISOR_vm_assist(VMASST_CMD_enable, + VMASST_TYPE_runstate_update_flag); + + pv_time_ops.steal_clock = xen_steal_clock; + + static_key_slow_inc(¶virt_steal_enabled); + if (xen_runstate_remote) + static_key_slow_inc(¶virt_steal_rq_enabled); +} diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 6a25533..9e9286d 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -148,7 +148,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); const struct config_field_entry *cfg_entry; const struct config_field *field; - int req_start, req_end, field_start, field_end; + int field_start, field_end; /* if read fails for any reason, return 0 * (as if device didn't respond) */ u32 value = 0, tmp_val; @@ -178,12 +178,10 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { field = cfg_entry->field; - req_start = offset; - req_end = offset + size; field_start = OFFSET(cfg_entry); field_end = OFFSET(cfg_entry) + field->size; - if (req_end > field_start && field_end > req_start) { + if (offset + size > field_start && field_end > offset) { err = conf_space_read(dev, cfg_entry, field_start, &tmp_val); if (err) @@ -191,7 +189,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size, value = merge_value(value, tmp_val, get_mask(field->size), - field_start - req_start); + field_start - offset); } } @@ -211,7 +209,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) const struct config_field_entry *cfg_entry; const struct config_field *field; u32 tmp_val; - int req_start, req_end, field_start, field_end; + int field_start, field_end; if (unlikely(verbose_request)) printk(KERN_DEBUG @@ -224,21 +222,17 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value) list_for_each_entry(cfg_entry, &dev_data->config_fields, list) { field = cfg_entry->field; - req_start = offset; - req_end = offset + size; field_start = OFFSET(cfg_entry); field_end = OFFSET(cfg_entry) + field->size; - if (req_end > field_start && field_end > req_start) { - tmp_val = 0; - - err = xen_pcibk_config_read(dev, field_start, - field->size, &tmp_val); + if (offset + size > field_start && field_end > offset) { + err = conf_space_read(dev, cfg_entry, field_start, + &tmp_val); if (err) break; tmp_val = merge_value(tmp_val, value, get_mask(size), - req_start - field_start); + offset - field_start); err = conf_space_write(dev, cfg_entry, field_start, tmp_val); diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index 9ead1c2..5fbfd9c 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -209,58 +209,35 @@ static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data) return 0; } -static inline void read_dev_bar(struct pci_dev *dev, - struct pci_bar_info *bar_info, int offset, - u32 len_mask) +static void *bar_init(struct pci_dev *dev, int offset) { - int pos; - struct resource *res = dev->resource; + unsigned int pos; + const struct resource *res = dev->resource; + struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); + + if (!bar) + return ERR_PTR(-ENOMEM); if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1) pos = PCI_ROM_RESOURCE; else { pos = (offset - PCI_BASE_ADDRESS_0) / 4; - if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE | - PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == - (PCI_BASE_ADDRESS_SPACE_MEMORY | - PCI_BASE_ADDRESS_MEM_TYPE_64))) { - bar_info->val = res[pos - 1].start >> 32; - bar_info->len_val = -resource_size(&res[pos - 1]) >> 32; - return; + if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64)) { + bar->val = res[pos - 1].start >> 32; + bar->len_val = -resource_size(&res[pos - 1]) >> 32; + return bar; } } if (!res[pos].flags || (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET | IORESOURCE_BUSY))) - return; - - bar_info->val = res[pos].start | - (res[pos].flags & PCI_REGION_FLAG_MASK); - bar_info->len_val = -resource_size(&res[pos]) | - (res[pos].flags & PCI_REGION_FLAG_MASK); -} + return bar; -static void *bar_init(struct pci_dev *dev, int offset) -{ - struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); - - if (!bar) - return ERR_PTR(-ENOMEM); - - read_dev_bar(dev, bar, offset, ~0); - - return bar; -} - -static void *rom_init(struct pci_dev *dev, int offset) -{ - struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL); - - if (!bar) - return ERR_PTR(-ENOMEM); - - read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE); + bar->val = res[pos].start | + (res[pos].flags & PCI_REGION_FLAG_MASK); + bar->len_val = -resource_size(&res[pos]) | + (res[pos].flags & PCI_REGION_FLAG_MASK); return bar; } @@ -383,7 +360,7 @@ static const struct config_field header_common[] = { { \ .offset = reg_offset, \ .size = 4, \ - .init = rom_init, \ + .init = bar_init, \ .reset = bar_reset, \ .release = bar_release, \ .u.dw.read = bar_read, \ diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index 4d529f3..7af369b6 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -55,7 +55,6 @@ struct xen_pcibk_dev_data { /* Used by XenBus and xen_pcibk_ops.c */ extern wait_queue_head_t xen_pcibk_aer_wait_queue; -extern struct workqueue_struct *xen_pcibk_wq; /* Used by pcistub.c and conf_space_quirks.c */ extern struct list_head xen_pcibk_quirks; diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 2f19dd7..f8c7775 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -310,7 +310,7 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) * already processing a request */ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { - queue_work(xen_pcibk_wq, &pdev->op_work); + schedule_work(&pdev->op_work); } /*_XEN_PCIB_active should have been cleared by pcifront. And also make sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index c252eb3..5ce878c 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -17,7 +17,6 @@ #include "pciback.h" #define INVALID_EVTCHN_IRQ (-1) -struct workqueue_struct *xen_pcibk_wq; static bool __read_mostly passthrough; module_param(passthrough, bool, S_IRUGO); @@ -76,8 +75,7 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) /* If the driver domain started an op, make sure we complete it * before releasing the shared memory */ - /* Note, the workqueue does not use spinlocks at all.*/ - flush_workqueue(xen_pcibk_wq); + flush_work(&pdev->op_work); if (pdev->sh_info != NULL) { xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); @@ -733,11 +731,6 @@ const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; int __init xen_pcibk_xenbus_register(void) { - xen_pcibk_wq = create_workqueue("xen_pciback_workqueue"); - if (!xen_pcibk_wq) { - pr_err("%s: create xen_pciback_workqueue failed\n", __func__); - return -EFAULT; - } xen_pcibk_backend = &xen_pcibk_vpci_backend; if (passthrough) xen_pcibk_backend = &xen_pcibk_passthrough_backend; @@ -747,6 +740,5 @@ int __init xen_pcibk_xenbus_register(void) void __exit xen_pcibk_xenbus_unregister(void) { - destroy_workqueue(xen_pcibk_wq); xenbus_unregister_driver(&xen_pcibk_driver); } diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index bcb53bd..611a231 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -31,7 +31,6 @@ #include "xenbus_probe.h" -static struct workqueue_struct *xenbus_frontend_wq; /* device/<type>/<id> => <type>-<id> */ static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) @@ -109,13 +108,7 @@ static int xenbus_frontend_dev_resume(struct device *dev) if (xen_store_domain_type == XS_LOCAL) { struct xenbus_device *xdev = to_xenbus_device(dev); - if (!xenbus_frontend_wq) { - pr_err("%s: no workqueue to process delayed resume\n", - xdev->nodename); - return -EFAULT; - } - - queue_work(xenbus_frontend_wq, &xdev->work); + schedule_work(&xdev->work); return 0; } @@ -485,12 +478,6 @@ static int __init xenbus_probe_frontend_init(void) register_xenstore_notifier(&xenstore_notifier); - if (xen_store_domain_type == XS_LOCAL) { - xenbus_frontend_wq = create_workqueue("xenbus_frontend"); - if (!xenbus_frontend_wq) - pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n"); - } - return 0; } subsys_initcall(xenbus_probe_frontend_init); diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c index 5063c5e..23f1387 100644 --- a/drivers/xen/xlate_mmu.c +++ b/drivers/xen/xlate_mmu.c @@ -29,6 +29,8 @@ */ #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> #include <asm/xen/hypercall.h> #include <asm/xen/hypervisor.h> @@ -37,6 +39,7 @@ #include <xen/page.h> #include <xen/interface/xen.h> #include <xen/interface/memory.h> +#include <xen/balloon.h> typedef void (*xen_gfn_fn_t)(unsigned long gfn, void *data); @@ -185,3 +188,77 @@ int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, return 0; } EXPORT_SYMBOL_GPL(xen_xlate_unmap_gfn_range); + +struct map_balloon_pages { + xen_pfn_t *pfns; + unsigned int idx; +}; + +static void setup_balloon_gfn(unsigned long gfn, void *data) +{ + struct map_balloon_pages *info = data; + + info->pfns[info->idx++] = gfn; +} + +/** + * xen_xlate_map_ballooned_pages - map a new set of ballooned pages + * @gfns: returns the array of corresponding GFNs + * @virt: returns the virtual address of the mapped region + * @nr_grant_frames: number of GFNs + * @return 0 on success, error otherwise + * + * This allocates a set of ballooned pages and maps them into the + * kernel's address space. + */ +int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt, + unsigned long nr_grant_frames) +{ + struct page **pages; + xen_pfn_t *pfns; + void *vaddr; + struct map_balloon_pages data; + int rc; + unsigned long nr_pages; + + BUG_ON(nr_grant_frames == 0); + nr_pages = DIV_ROUND_UP(nr_grant_frames, XEN_PFN_PER_PAGE); + pages = kcalloc(nr_pages, sizeof(pages[0]), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); + if (!pfns) { + kfree(pages); + return -ENOMEM; + } + rc = alloc_xenballooned_pages(nr_pages, pages); + if (rc) { + pr_warn("%s Couldn't balloon alloc %ld pages rc:%d\n", __func__, + nr_pages, rc); + kfree(pages); + kfree(pfns); + return rc; + } + + data.pfns = pfns; + data.idx = 0; + xen_for_each_gfn(pages, nr_grant_frames, setup_balloon_gfn, &data); + + vaddr = vmap(pages, nr_pages, 0, PAGE_KERNEL); + if (!vaddr) { + pr_warn("%s Couldn't map %ld pages rc:%d\n", __func__, + nr_pages, rc); + free_xenballooned_pages(nr_pages, pages); + kfree(pages); + kfree(pfns); + return -ENOMEM; + } + kfree(pages); + + *gfns = pfns; + *virt = vaddr; + + return 0; +} +EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 25a822f..44fda64 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -92,7 +92,6 @@ static inline void account_process_tick(struct task_struct *tsk, int user) extern void account_process_tick(struct task_struct *, int user); #endif -extern void account_steal_ticks(unsigned long ticks); extern void account_idle_ticks(unsigned long ticks); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 901ec01..26c3302 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -53,6 +53,8 @@ extern char __dtb_end[]; extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, void *data), void *data); +extern int of_get_flat_dt_subnode_by_name(unsigned long node, + const char *uname); extern const void *of_get_flat_dt_prop(unsigned long node, const char *name, int *size); extern int of_flat_dt_is_compatible(unsigned long node, const char *name); diff --git a/include/uapi/xen/evtchn.h b/include/uapi/xen/evtchn.h index 14e833ee4..cb4aa4b 100644 --- a/include/uapi/xen/evtchn.h +++ b/include/uapi/xen/evtchn.h @@ -85,4 +85,19 @@ struct ioctl_evtchn_notify { #define IOCTL_EVTCHN_RESET \ _IOC(_IOC_NONE, 'E', 5, 0) +/* + * Restrict this file descriptor so that it can only be used to bind + * new interdomain events from one domain. + * + * Once a file descriptor has been restricted it cannot be + * de-restricted, and must be closed and re-opened. Event channels + * which were bound before restricting remain bound afterwards, and + * can be notified as usual. + */ +#define IOCTL_EVTCHN_RESTRICT_DOMID \ + _IOC(_IOC_NONE, 'E', 6, sizeof(struct ioctl_evtchn_restrict_domid)) +struct ioctl_evtchn_restrict_domid { + domid_t domid; +}; + #endif /* __LINUX_PUBLIC_EVTCHN_H__ */ diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h index a6c7991..4d61fc58 100644 --- a/include/xen/interface/hvm/params.h +++ b/include/xen/interface/hvm/params.h @@ -27,16 +27,44 @@ * Parameter space for HVMOP_{set,get}_param. */ +#define HVM_PARAM_CALLBACK_IRQ 0 /* * How should CPU0 event-channel notifications be delivered? - * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). - * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: - * Domain = val[47:32], Bus = val[31:16], - * DevFn = val[15: 8], IntX = val[ 1: 0] - * val[63:56] == 2: val[7:0] is a vector number. + * * If val == 0 then CPU0 event-channel notifications are not delivered. + * If val != 0, val[63:56] encodes the type, as follows: */ -#define HVM_PARAM_CALLBACK_IRQ 0 + +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 +/* + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, + * and disables all notifications. + */ + +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 +/* + * val[55:0] is a delivery PCI INTx line: + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] + */ + +#if defined(__i386__) || defined(__x86_64__) +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 +/* + * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know + * if this delivery method is available. + */ +#elif defined(__arm__) || defined(__aarch64__) +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 +/* + * val[55:16] needs to be zero. + * val[15:8] is interrupt flag of the PPI used by event-channel: + * bit 8: the PPI is edge(1) or level(0) triggered + * bit 9: the PPI is active low(1) or high(0) + * val[7:0] is a PPI number used by event-channel. + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to + * the notification is handled by the interrupt controller. + */ +#endif #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index 2ecfe4f..9aa8988 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -160,6 +160,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); #define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, * XENMEM_add_to_physmap_range only. */ +#define XENMAPSPACE_dev_mmio 5 /* device mmio region */ /* * Sets the GPFN at which a particular page appears in the specified guest's diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index b05288c..98188c8 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h @@ -75,15 +75,21 @@ */ #define VCPUOP_get_runstate_info 4 struct vcpu_runstate_info { - /* VCPU's current state (RUNSTATE_*). */ - int state; - /* When was current state entered (system time, ns)? */ - uint64_t state_entry_time; - /* - * Time spent in each RUNSTATE_* (ns). The sum of these times is - * guaranteed not to drift from system time. - */ - uint64_t time[4]; + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Update indicator set in state_entry_time: + * When activated via VMASST_TYPE_runstate_update_flag, set during + * updates in guest memory mapped copy of vcpu_runstate_info. + */ +#define XEN_RUNSTATE_UPDATE (1ULL << 63) + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info); diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index d133112..1b0d189 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -413,7 +413,22 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); /* x86/PAE guests: support PDPTs above 4GB. */ #define VMASST_TYPE_pae_extended_cr3 3 -#define MAX_VMASST_TYPE 3 +/* + * x86 guests: Sane behaviour for virtual iopl + * - virtual iopl updated from do_iret() hypercalls. + * - virtual iopl reported in bounce frames. + * - guest kernels assumed to be level 0 for the purpose of iopl checks. + */ +#define VMASST_TYPE_architectural_iopl 4 + +/* + * All guests: activate update indicator in vcpu_runstate_info + * Enable setting the XEN_RUNSTATE_UPDATE flag in guest memory mapped + * vcpu_runstate_info during updates of the runstate information. + */ +#define VMASST_TYPE_runstate_update_flag 5 + +#define MAX_VMASST_TYPE 5 #ifndef __ASSEMBLY__ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 86abe07..9a37c541 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -9,6 +9,12 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +DECLARE_PER_CPU(int, xen_vcpu_id); +static inline int xen_vcpu_nr(int cpu) +{ + return per_cpu(xen_vcpu_id, cpu); +} + void xen_arch_pre_suspend(void); void xen_arch_post_suspend(int suspend_cancelled); @@ -21,7 +27,9 @@ void xen_resume_notifier_unregister(struct notifier_block *nb); bool xen_vcpu_stolen(int vcpu); void xen_setup_runstate_info(int cpu); +void xen_time_setup_guest(void); void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); +u64 xen_steal_clock(int cpu); int xen_setup_shutdown_event(void); @@ -85,17 +93,33 @@ int xen_xlate_remap_gfn_array(struct vm_area_struct *vma, struct page **pages); int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma, int nr, struct page **pages); +int xen_xlate_map_ballooned_pages(xen_pfn_t **pfns, void **vaddr, + unsigned long nr_grant_frames); bool xen_running_on_version_or_later(unsigned int major, unsigned int minor); -#ifdef CONFIG_XEN_EFI -extern efi_system_table_t *xen_efi_probe(void); -#else -static inline efi_system_table_t __init *xen_efi_probe(void) -{ - return NULL; -} -#endif +efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc); +efi_status_t xen_efi_set_time(efi_time_t *tm); +efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm); +efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm); +efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, + void *data); +efi_status_t xen_efi_get_next_variable(unsigned long *name_size, + efi_char16_t *name, efi_guid_t *vendor); +efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, + void *data); +efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size); +efi_status_t xen_efi_get_next_high_mono_count(u32 *count); +efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list); +efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type); #ifdef CONFIG_PREEMPT diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index ea0f6f3..1934f65 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -503,16 +503,6 @@ void account_process_tick(struct task_struct *p, int user_tick) } /* - * Account multiple ticks of steal time. - * @p: the process from which the cpu time has been stolen - * @ticks: number of stolen ticks - */ -void account_steal_ticks(unsigned long ticks) -{ - account_steal_time(jiffies_to_cputime(ticks)); -} - -/* * Account multiple ticks of idle time. * @ticks: number of stolen ticks */ |