diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-12 13:05:36 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-12 13:05:36 -0800 |
commit | c9bed1cf51011c815d88288b774865d013ca78a8 (patch) | |
tree | 77e205b9442338a3d11567d4abcdb8df616979cf | |
parent | 75777c1855e10c010c9c3a21611d95dd9be01ab1 (diff) | |
parent | a4cdb556cae05cd3e7b602b3a44c01420c4e2258 (diff) | |
download | op-kernel-dev-c9bed1cf51011c815d88288b774865d013ca78a8.zip op-kernel-dev-c9bed1cf51011c815d88288b774865d013ca78a8.tar.gz |
Merge tag 'for-linus-4.5-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from David Vrabel:
"Xen features and fixes for 4.5-rc0:
- Stolen ticks and PV wallclock support for arm/arm64
- Add grant copy ioctl to gntdev device"
* tag 'for-linus-4.5-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen/gntdev: add ioctl for grant copy
x86/xen: don't reset vcpu_info on a cancelled suspend
xen/gntdev: constify mmu_notifier_ops structures
xen/grant-table: constify gnttab_ops structure
xen/time: use READ_ONCE
xen/x86: convert remaining timespec to timespec64 in xen_pvclock_gtod_notify
xen/x86: support XENPF_settime64
xen/arm: set the system time in Xen via the XENPF_settime64 hypercall
xen/arm: introduce xen_read_wallclock
arm: extend pvclock_wall_clock with sec_hi
xen: introduce XENPF_settime64
xen/arm: introduce HYPERVISOR_platform_op on arm and arm64
xen: rename dom0_op to platform_op
xen/arm: account for stolen ticks
arm64: introduce CONFIG_PARAVIRT, PARAVIRT_TIME_ACCOUNTING and pv_time_ops
arm: introduce CONFIG_PARAVIRT, PARAVIRT_TIME_ACCOUNTING and pv_time_ops
missing include asm/paravirt.h in cputime.c
xen: move xen_setup_runstate_info and get_runstate_snapshot to drivers/xen/time.c
34 files changed, 677 insertions, 131 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b0e7ff1..426115f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1824,6 +1824,25 @@ config SWIOTLB config IOMMU_HELPER def_bool SWIOTLB +config PARAVIRT + bool "Enable paravirtualization code" + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + default n + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + config XEN_DOM0 def_bool y depends on XEN @@ -1837,6 +1856,7 @@ config XEN select ARCH_DMA_ADDR_T_64BIT select ARM_PSCI select SWIOTLB_XEN + select PARAVIRT help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM. diff --git a/arch/arm/include/asm/paravirt.h b/arch/arm/include/asm/paravirt.h new file mode 100644 index 0000000..8435ff5 --- /dev/null +++ b/arch/arm/include/asm/paravirt.h @@ -0,0 +1,20 @@ +#ifndef _ASM_ARM_PARAVIRT_H +#define _ASM_ARM_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops { + unsigned long long (*steal_clock)(int cpu); +}; +extern struct pv_time_ops pv_time_ops; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return pv_time_ops.steal_clock(cpu); +} +#endif + +#endif diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h index 712b50e..d769972 100644 --- a/arch/arm/include/asm/xen/hypercall.h +++ b/arch/arm/include/asm/xen/hypercall.h @@ -35,6 +35,7 @@ #include <xen/interface/xen.h> #include <xen/interface/sched.h> +#include <xen/interface/platform.h> long privcmd_call(unsigned call, unsigned long a1, unsigned long a2, unsigned long a3, @@ -49,6 +50,12 @@ int HYPERVISOR_memory_op(unsigned int cmd, void *arg); int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); +int HYPERVISOR_platform_op_raw(void *arg); +static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) +{ + op->interface_version = XENPF_INTERFACE_VERSION; + return HYPERVISOR_platform_op_raw(op); +} int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr); static inline int diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h index 5006600..75d5968 100644 --- a/arch/arm/include/asm/xen/interface.h +++ b/arch/arm/include/asm/xen/interface.h @@ -27,6 +27,8 @@ (hnd).p = val; \ } while (0) +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op + #ifndef __ASSEMBLY__ /* Explicitly size integers that represent pfns in the interface with * Xen so that we can have one ABI that works for 32 and 64 bit guests. @@ -76,6 +78,7 @@ struct pvclock_wall_clock { u32 version; u32 sec; u32 nsec; + u32 sec_hi; } __attribute__((__packed__)); #endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index f729085..2c5f160 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_EFI) += efi.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o endif +obj-$(CONFIG_PARAVIRT) += paravirt.o head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o diff --git a/arch/arm/kernel/paravirt.c b/arch/arm/kernel/paravirt.c new file mode 100644 index 0000000..53f371e --- /dev/null +++ b/arch/arm/kernel/paravirt.c @@ -0,0 +1,25 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2013 Citrix Systems + * + * Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com> + */ + +#include <linux/export.h> +#include <linux/jump_label.h> +#include <linux/types.h> +#include <asm/paravirt.h> + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops pv_time_ops; +EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index fc7ea52..75cd734 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -12,6 +12,7 @@ #include <xen/page.h> #include <xen/interface/sched.h> #include <xen/xen-ops.h> +#include <asm/paravirt.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> #include <asm/system_misc.h> @@ -25,6 +26,10 @@ #include <linux/cpufreq.h> #include <linux/cpu.h> #include <linux/console.h> +#include <linux/pvclock_gtod.h> +#include <linux/time64.h> +#include <linux/timekeeping.h> +#include <linux/timekeeper_internal.h> #include <linux/mm.h> @@ -79,6 +84,83 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); +static unsigned long long xen_stolen_accounting(int cpu) +{ + struct vcpu_runstate_info state; + + BUG_ON(cpu != smp_processor_id()); + + xen_get_runstate_snapshot(&state); + + WARN_ON(state.state != RUNSTATE_running); + + return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline]; +} + +static void xen_read_wallclock(struct timespec64 *ts) +{ + u32 version; + struct timespec64 now, ts_monotonic; + struct shared_info *s = HYPERVISOR_shared_info; + struct pvclock_wall_clock *wall_clock = &(s->wc); + + /* get wallclock at system boot */ + do { + version = wall_clock->version; + rmb(); /* fetch version before time */ + now.tv_sec = ((uint64_t)wall_clock->sec_hi << 32) | wall_clock->sec; + now.tv_nsec = wall_clock->nsec; + rmb(); /* fetch time before checking version */ + } while ((wall_clock->version & 1) || (version != wall_clock->version)); + + /* time since system boot */ + ktime_get_ts64(&ts_monotonic); + *ts = timespec64_add(now, ts_monotonic); +} + +static int xen_pvclock_gtod_notify(struct notifier_block *nb, + unsigned long was_set, void *priv) +{ + /* Protected by the calling core code serialization */ + static struct timespec64 next_sync; + + struct xen_platform_op op; + struct timespec64 now, system_time; + struct timekeeper *tk = priv; + + now.tv_sec = tk->xtime_sec; + now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); + system_time = timespec64_add(now, tk->wall_to_monotonic); + + /* + * We only take the expensive HV call when the clock was set + * or when the 11 minutes RTC synchronization time elapsed. + */ + if (!was_set && timespec64_compare(&now, &next_sync) < 0) + return NOTIFY_OK; + + op.cmd = XENPF_settime64; + op.u.settime64.mbz = 0; + op.u.settime64.secs = now.tv_sec; + op.u.settime64.nsecs = now.tv_nsec; + op.u.settime64.system_time = timespec64_to_ns(&system_time); + (void)HYPERVISOR_platform_op(&op); + + /* + * Move the next drift compensation time 11 minutes + * ahead. That's emulating the sync_cmos_clock() update for + * the hardware RTC. + */ + next_sync = now; + next_sync.tv_sec += 11 * 60; + + return NOTIFY_OK; +} + +static struct notifier_block xen_pvclock_gtod_notifier = { + .notifier_call = xen_pvclock_gtod_notify, +}; + static void xen_percpu_init(void) { struct vcpu_register_vcpu_info info; @@ -104,6 +186,8 @@ static void xen_percpu_init(void) BUG_ON(err); per_cpu(xen_vcpu, cpu) = vcpup; + xen_setup_runstate_info(cpu); + after_register_vcpu_info: enable_percpu_irq(xen_events_irq, 0); put_cpu(); @@ -271,6 +355,11 @@ static int __init xen_guest_init(void) register_cpu_notifier(&xen_cpu_notifier); + pv_time_ops.steal_clock = xen_stolen_accounting; + static_key_slow_inc(¶virt_steal_enabled); + if (xen_initial_domain()) + pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier); + return 0; } early_initcall(xen_guest_init); @@ -282,6 +371,11 @@ static int __init xen_pm_init(void) pm_power_off = xen_power_off; arm_pm_restart = xen_restart; + if (!xen_initial_domain()) { + struct timespec64 ts; + xen_read_wallclock(&ts); + do_settimeofday64(&ts); + } return 0; } @@ -307,5 +401,6 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_memory_op); EXPORT_SYMBOL_GPL(HYPERVISOR_physdev_op); EXPORT_SYMBOL_GPL(HYPERVISOR_vcpu_op); EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); +EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 10fd99c..9a36f4f 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -89,6 +89,7 @@ HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); +HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); ENTRY(privcmd_call) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bf82ffe..d6ebffd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -555,6 +555,25 @@ config SECCOMP and the task is only allowed to execute a few safe syscalls defined by each seccomp mode. +config PARAVIRT + bool "Enable paravirtualization code" + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. + +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + default n + help + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + config XEN_DOM0 def_bool y depends on XEN @@ -563,6 +582,7 @@ config XEN bool "Xen guest support on ARM64" depends on ARM64 && OF select SWIOTLB_XEN + select PARAVIRT help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h new file mode 100644 index 0000000..fd5f428 --- /dev/null +++ b/arch/arm64/include/asm/paravirt.h @@ -0,0 +1,20 @@ +#ifndef _ASM_ARM64_PARAVIRT_H +#define _ASM_ARM64_PARAVIRT_H + +#ifdef CONFIG_PARAVIRT +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops { + unsigned long long (*steal_clock)(int cpu); +}; +extern struct pv_time_ops pv_time_ops; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return pv_time_ops.steal_clock(cpu); +} +#endif + +#endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 27bf1e5..83cd7e6 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -41,6 +41,7 @@ arm64-obj-$(CONFIG_EFI) += efi.o efi-entry.stub.o arm64-obj-$(CONFIG_PCI) += pci.o arm64-obj-$(CONFIG_ARMV8_DEPRECATED) += armv8_deprecated.o arm64-obj-$(CONFIG_ACPI) += acpi.o +arm64-obj-$(CONFIG_PARAVIRT) += paravirt.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c new file mode 100644 index 0000000..53f371e --- /dev/null +++ b/arch/arm64/kernel/paravirt.c @@ -0,0 +1,25 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2013 Citrix Systems + * + * Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com> + */ + +#include <linux/export.h> +#include <linux/jump_label.h> +#include <linux/types.h> +#include <asm/paravirt.h> + +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; + +struct pv_time_ops pv_time_ops; +EXPORT_SYMBOL_GPL(pv_time_ops); diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 8bbe940..70df80e 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -80,6 +80,7 @@ HYPERCALL2(memory_op); HYPERCALL2(physdev_op); HYPERCALL3(vcpu_op); HYPERCALL1(tmem_op); +HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); ENTRY(privcmd_call) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 4c20dd3..3bcdcc8 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -310,10 +310,10 @@ HYPERVISOR_mca(struct xen_mc *mc_op) } static inline int -HYPERVISOR_dom0_op(struct xen_platform_op *platform_op) +HYPERVISOR_platform_op(struct xen_platform_op *op) { - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, dom0_op, platform_op); + op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, op); } static inline int diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index acda713..abf4901 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -64,7 +64,7 @@ static u32 xen_apic_read(u32 reg) if (reg != APIC_ID) return 0; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) return 0; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 2306392..d09e4c9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -415,7 +415,7 @@ static bool __init xen_check_mwait(void) set_xen_guest_handle(op.u.set_pminfo.pdc, buf); - if ((HYPERVISOR_dom0_op(&op) == 0) && + if ((HYPERVISOR_platform_op(&op) == 0) && (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { cpuid_leaf5_ecx_val = cx; cpuid_leaf5_edx_val = dx; @@ -1365,7 +1365,7 @@ static void __init xen_boot_params_init_edd(void) info->params.length = sizeof(info->params); set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params, &info->params); - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) break; @@ -1383,7 +1383,7 @@ static void __init xen_boot_params_init_edd(void) op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE; for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) { op.u.firmware_info.index = nr; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) break; mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature; @@ -1690,7 +1690,7 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_start_info->console.domU.mfn = 0; xen_start_info->console.domU.evtchn = 0; - if (HYPERVISOR_dom0_op(&op) == 0) + if (HYPERVISOR_platform_op(&op) == 0) boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags; /* Make sure ACS will be enabled */ diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index df0c405..7f664c4 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -34,7 +34,8 @@ static void xen_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_init_shared_info(); + if (!suspend_cancelled) + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index f1ba6a0..a0a4e55 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -16,6 +16,7 @@ #include <linux/gfp.h> #include <linux/slab.h> #include <linux/pvclock_gtod.h> +#include <linux/timekeeper_internal.h> #include <asm/pvclock.h> #include <asm/xen/hypervisor.h> @@ -32,86 +33,12 @@ #define TIMER_SLOP 100000 #define NS_PER_TICK (1000000000LL / HZ) -/* runstate info updated by Xen */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); - /* snapshots of runstate info */ static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); /* unused ns of stolen time */ static DEFINE_PER_CPU(u64, xen_residual_stolen); -/* return an consistent snapshot of 64-bit time/counter value */ -static u64 get64(const u64 *p) -{ - u64 ret; - - if (BITS_PER_LONG < 64) { - u32 *p32 = (u32 *)p; - u32 h, l; - - /* - * Read high then low, and then make sure high is - * still the same; this will only loop if low wraps - * and carries into high. - * XXX some clean way to make this endian-proof? - */ - do { - h = p32[1]; - barrier(); - l = p32[0]; - barrier(); - } while (p32[1] != h); - - ret = (((u64)h) << 32) | l; - } else - ret = *p; - - return ret; -} - -/* - * Runstate accounting - */ -static void get_runstate_snapshot(struct vcpu_runstate_info *res) -{ - u64 state_time; - struct vcpu_runstate_info *state; - - BUG_ON(preemptible()); - - state = this_cpu_ptr(&xen_runstate); - - /* - * The runstate info is always updated by the hypervisor on - * the current CPU, so there's no need to use anything - * stronger than a compiler barrier when fetching it. - */ - do { - state_time = get64(&state->state_entry_time); - barrier(); - *res = *state; - barrier(); - } while (get64(&state->state_entry_time) != state_time); -} - -/* return true when a vcpu could run but has no real cpu to run on */ -bool xen_vcpu_stolen(int vcpu) -{ - return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; -} - -void xen_setup_runstate_info(int cpu) -{ - struct vcpu_register_runstate_memory_area area; - - area.addr.v = &per_cpu(xen_runstate, cpu); - - if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, - cpu, &area)) - BUG(); -} - static void do_stolen_accounting(void) { struct vcpu_runstate_info state; @@ -119,7 +46,7 @@ static void do_stolen_accounting(void) s64 runnable, offline, stolen; cputime_t ticks; - get_runstate_snapshot(&state); + xen_get_runstate_snapshot(&state); WARN_ON(state.state != RUNSTATE_running); @@ -194,26 +121,46 @@ static int xen_pvclock_gtod_notify(struct notifier_block *nb, unsigned long was_set, void *priv) { /* Protected by the calling core code serialization */ - static struct timespec next_sync; + static struct timespec64 next_sync; struct xen_platform_op op; - struct timespec now; + struct timespec64 now; + struct timekeeper *tk = priv; + static bool settime64_supported = true; + int ret; - now = __current_kernel_time(); + now.tv_sec = tk->xtime_sec; + now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); /* * We only take the expensive HV call when the clock was set * or when the 11 minutes RTC synchronization time elapsed. */ - if (!was_set && timespec_compare(&now, &next_sync) < 0) + if (!was_set && timespec64_compare(&now, &next_sync) < 0) return NOTIFY_OK; - op.cmd = XENPF_settime; - op.u.settime.secs = now.tv_sec; - op.u.settime.nsecs = now.tv_nsec; - op.u.settime.system_time = xen_clocksource_read(); +again: + if (settime64_supported) { + op.cmd = XENPF_settime64; + op.u.settime64.mbz = 0; + op.u.settime64.secs = now.tv_sec; + op.u.settime64.nsecs = now.tv_nsec; + op.u.settime64.system_time = xen_clocksource_read(); + } else { + op.cmd = XENPF_settime32; + op.u.settime32.secs = now.tv_sec; + op.u.settime32.nsecs = now.tv_nsec; + op.u.settime32.system_time = xen_clocksource_read(); + } + + ret = HYPERVISOR_platform_op(&op); - (void)HYPERVISOR_dom0_op(&op); + if (ret == -ENOSYS && settime64_supported) { + settime64_supported = false; + goto again; + } + if (ret < 0) + return NOTIFY_BAD; /* * Move the next drift compensation time 11 minutes diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index aa8a7f7..9b7a35c 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,6 +1,6 @@ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_X86) += fallback.o -obj-y += grant-table.o features.o balloon.o manage.o preempt.o +obj-y += grant-table.o features.o balloon.o manage.o preempt.o time.o obj-y += events/ obj-y += xenbus/ diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c index 90307c0..6893c79 100644 --- a/drivers/xen/acpi.c +++ b/drivers/xen/acpi.c @@ -58,7 +58,7 @@ static int xen_acpi_notify_hypervisor_state(u8 sleep_state, bits, val_a, val_b)) return -1; - HYPERVISOR_dom0_op(&op); + HYPERVISOR_platform_op(&op); return 1; } diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c index f745db2..be7e56a 100644 --- a/drivers/xen/efi.c +++ b/drivers/xen/efi.c @@ -42,7 +42,7 @@ static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { struct xen_platform_op op = INIT_EFI_OP(get_time); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; if (tm) { @@ -67,7 +67,7 @@ static efi_status_t xen_efi_set_time(efi_time_t *tm) BUILD_BUG_ON(sizeof(*tm) != sizeof(efi_data(op).u.set_time)); memcpy(&efi_data(op).u.set_time, tm, sizeof(*tm)); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; return efi_data(op).status; @@ -79,7 +79,7 @@ static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, { struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; if (tm) { @@ -108,7 +108,7 @@ static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) else efi_data(op).misc |= XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY; - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; return efi_data(op).status; @@ -129,7 +129,7 @@ static efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_data(op).u.get_variable.size = *data_size; set_xen_guest_handle(efi_data(op).u.get_variable.data, data); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *data_size = efi_data(op).u.get_variable.size; @@ -152,7 +152,7 @@ static efi_status_t xen_efi_get_next_variable(unsigned long *name_size, memcpy(&efi_data(op).u.get_next_variable_name.vendor_guid, vendor, sizeof(*vendor)); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *name_size = efi_data(op).u.get_next_variable_name.size; @@ -178,7 +178,7 @@ static efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_data(op).u.set_variable.size = data_size; set_xen_guest_handle(efi_data(op).u.set_variable.data, data); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; return efi_data(op).status; @@ -196,7 +196,7 @@ static efi_status_t xen_efi_query_variable_info(u32 attr, efi_data(op).u.query_variable_info.attr = attr; - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *storage_space = efi_data(op).u.query_variable_info.max_store_size; @@ -210,7 +210,7 @@ static efi_status_t xen_efi_get_next_high_mono_count(u32 *count) { struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count); - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *count = efi_data(op).misc; @@ -232,7 +232,7 @@ static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules, efi_data(op).u.update_capsule.capsule_count = count; efi_data(op).u.update_capsule.sg_list = sg_list; - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; return efi_data(op).status; @@ -252,7 +252,7 @@ static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules, capsules); efi_data(op).u.query_capsule_capabilities.capsule_count = count; - if (HYPERVISOR_dom0_op(&op) < 0) + if (HYPERVISOR_platform_op(&op) < 0) return EFI_UNSUPPORTED; *max_size = efi_data(op).u.query_capsule_capabilities.max_capsule_size; @@ -331,7 +331,7 @@ efi_system_table_t __init *xen_efi_probe(void) }; union xenpf_efi_info *info = &op.u.firmware_info.u.efi_info; - if (!xen_initial_domain() || HYPERVISOR_dom0_op(&op) < 0) + if (!xen_initial_domain() || HYPERVISOR_platform_op(&op) < 0) return NULL; /* Here we know that Xen runs on EFI platform. */ @@ -347,7 +347,7 @@ efi_system_table_t __init *xen_efi_probe(void) info->vendor.bufsz = sizeof(vendor); set_xen_guest_handle(info->vendor.name, vendor); - if (HYPERVISOR_dom0_op(&op) == 0) { + if (HYPERVISOR_platform_op(&op) == 0) { efi_systab_xen.fw_vendor = __pa_symbol(vendor); efi_systab_xen.fw_revision = info->vendor.revision; } else @@ -357,14 +357,14 @@ efi_system_table_t __init *xen_efi_probe(void) op.u.firmware_info.type = XEN_FW_EFI_INFO; op.u.firmware_info.index = XEN_FW_EFI_VERSION; - if (HYPERVISOR_dom0_op(&op) == 0) + if (HYPERVISOR_platform_op(&op) == 0) efi_systab_xen.hdr.revision = info->version; op.cmd = XENPF_firmware_info; op.u.firmware_info.type = XEN_FW_EFI_INFO; op.u.firmware_info.index = XEN_FW_EFI_RT_VERSION; - if (HYPERVISOR_dom0_op(&op) == 0) + if (HYPERVISOR_platform_op(&op) == 0) efi.runtime_version = info->version; return &efi_systab_xen; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 1be5dd0..dc49538 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -518,7 +518,7 @@ static void mn_release(struct mmu_notifier *mn, mutex_unlock(&priv->lock); } -static struct mmu_notifier_ops gntdev_mmu_ops = { +static const struct mmu_notifier_ops gntdev_mmu_ops = { .release = mn_release, .invalidate_page = mn_invl_page, .invalidate_range_start = mn_invl_range_start, @@ -748,6 +748,206 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) return rc; } +#define GNTDEV_COPY_BATCH 24 + +struct gntdev_copy_batch { + struct gnttab_copy ops[GNTDEV_COPY_BATCH]; + struct page *pages[GNTDEV_COPY_BATCH]; + s16 __user *status[GNTDEV_COPY_BATCH]; + unsigned int nr_ops; + unsigned int nr_pages; +}; + +static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt, + bool writeable, unsigned long *gfn) +{ + unsigned long addr = (unsigned long)virt; + struct page *page; + unsigned long xen_pfn; + int ret; + + ret = get_user_pages_fast(addr, 1, writeable, &page); + if (ret < 0) + return ret; + + batch->pages[batch->nr_pages++] = page; + + xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(addr & ~PAGE_MASK); + *gfn = pfn_to_gfn(xen_pfn); + + return 0; +} + +static void gntdev_put_pages(struct gntdev_copy_batch *batch) +{ + unsigned int i; + + for (i = 0; i < batch->nr_pages; i++) + put_page(batch->pages[i]); + batch->nr_pages = 0; +} + +static int gntdev_copy(struct gntdev_copy_batch *batch) +{ + unsigned int i; + + gnttab_batch_copy(batch->ops, batch->nr_ops); + gntdev_put_pages(batch); + + /* + * For each completed op, update the status if the op failed + * and all previous ops for the segment were successful. + */ + for (i = 0; i < batch->nr_ops; i++) { + s16 status = batch->ops[i].status; + s16 old_status; + + if (status == GNTST_okay) + continue; + + if (__get_user(old_status, batch->status[i])) + return -EFAULT; + + if (old_status != GNTST_okay) + continue; + + if (__put_user(status, batch->status[i])) + return -EFAULT; + } + + batch->nr_ops = 0; + return 0; +} + +static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch, + struct gntdev_grant_copy_segment *seg, + s16 __user *status) +{ + uint16_t copied = 0; + + /* + * Disallow local -> local copies since there is only space in + * batch->pages for one page per-op and this would be a very + * expensive memcpy(). + */ + if (!(seg->flags & (GNTCOPY_source_gref | GNTCOPY_dest_gref))) + return -EINVAL; + + /* Can't cross page if source/dest is a grant ref. */ + if (seg->flags & GNTCOPY_source_gref) { + if (seg->source.foreign.offset + seg->len > XEN_PAGE_SIZE) + return -EINVAL; + } + if (seg->flags & GNTCOPY_dest_gref) { + if (seg->dest.foreign.offset + seg->len > XEN_PAGE_SIZE) + return -EINVAL; + } + + if (put_user(GNTST_okay, status)) + return -EFAULT; + + while (copied < seg->len) { + struct gnttab_copy *op; + void __user *virt; + size_t len, off; + unsigned long gfn; + int ret; + + if (batch->nr_ops >= GNTDEV_COPY_BATCH) { + ret = gntdev_copy(batch); + if (ret < 0) + return ret; + } + + len = seg->len - copied; + + op = &batch->ops[batch->nr_ops]; + op->flags = 0; + + if (seg->flags & GNTCOPY_source_gref) { + op->source.u.ref = seg->source.foreign.ref; + op->source.domid = seg->source.foreign.domid; + op->source.offset = seg->source.foreign.offset + copied; + op->flags |= GNTCOPY_source_gref; + } else { + virt = seg->source.virt + copied; + off = (unsigned long)virt & ~XEN_PAGE_MASK; + len = min(len, (size_t)XEN_PAGE_SIZE - off); + + ret = gntdev_get_page(batch, virt, false, &gfn); + if (ret < 0) + return ret; + + op->source.u.gmfn = gfn; + op->source.domid = DOMID_SELF; + op->source.offset = off; + } + + if (seg->flags & GNTCOPY_dest_gref) { + op->dest.u.ref = seg->dest.foreign.ref; + op->dest.domid = seg->dest.foreign.domid; + op->dest.offset = seg->dest.foreign.offset + copied; + op->flags |= GNTCOPY_dest_gref; + } else { + virt = seg->dest.virt + copied; + off = (unsigned long)virt & ~XEN_PAGE_MASK; + len = min(len, (size_t)XEN_PAGE_SIZE - off); + + ret = gntdev_get_page(batch, virt, true, &gfn); + if (ret < 0) + return ret; + + op->dest.u.gmfn = gfn; + op->dest.domid = DOMID_SELF; + op->dest.offset = off; + } + + op->len = len; + copied += len; + + batch->status[batch->nr_ops] = status; + batch->nr_ops++; + } + + return 0; +} + +static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u) +{ + struct ioctl_gntdev_grant_copy copy; + struct gntdev_copy_batch batch; + unsigned int i; + int ret = 0; + + if (copy_from_user(©, u, sizeof(copy))) + return -EFAULT; + + batch.nr_ops = 0; + batch.nr_pages = 0; + + for (i = 0; i < copy.count; i++) { + struct gntdev_grant_copy_segment seg; + + if (copy_from_user(&seg, ©.segments[i], sizeof(seg))) { + ret = -EFAULT; + goto out; + } + + ret = gntdev_grant_copy_seg(&batch, &seg, ©.segments[i].status); + if (ret < 0) + goto out; + + cond_resched(); + } + if (batch.nr_ops) + ret = gntdev_copy(&batch); + return ret; + + out: + gntdev_put_pages(&batch); + return ret; +} + static long gntdev_ioctl(struct file *flip, unsigned int cmd, unsigned long arg) { @@ -767,6 +967,9 @@ static long gntdev_ioctl(struct file *flip, case IOCTL_GNTDEV_SET_UNMAP_NOTIFY: return gntdev_ioctl_notify(priv, ptr); + case IOCTL_GNTDEV_GRANT_COPY: + return gntdev_ioctl_grant_copy(priv, ptr); + default: pr_debug("priv %p, unknown cmd %x\n", priv, cmd); return -ENOIOCTLCMD; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index c49f79e..effbaf9 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -128,7 +128,7 @@ struct unmap_refs_callback_data { int result; }; -static struct gnttab_ops *gnttab_interface; +static const struct gnttab_ops *gnttab_interface; static int grant_table_version; static int grefs_per_grant_frame; @@ -1013,7 +1013,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return rc; } -static struct gnttab_ops gnttab_v1_ops = { +static const struct gnttab_ops gnttab_v1_ops = { .map_frames = gnttab_map_frames_v1, .unmap_frames = gnttab_unmap_frames_v1, .update_entry = gnttab_update_entry_v1, diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index 49e88f2..cdc6daa 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -78,7 +78,7 @@ static int xen_pcpu_down(uint32_t cpu_id) .u.cpu_ol.cpuid = cpu_id, }; - return HYPERVISOR_dom0_op(&op); + return HYPERVISOR_platform_op(&op); } static int xen_pcpu_up(uint32_t cpu_id) @@ -89,7 +89,7 @@ static int xen_pcpu_up(uint32_t cpu_id) .u.cpu_ol.cpuid = cpu_id, }; - return HYPERVISOR_dom0_op(&op); + return HYPERVISOR_platform_op(&op); } static ssize_t show_online(struct device *dev, @@ -277,7 +277,7 @@ static int sync_pcpu(uint32_t cpu, uint32_t *max_cpu) .u.pcpu_info.xen_cpuid = cpu, }; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) return ret; @@ -364,7 +364,7 @@ int xen_pcpu_id(uint32_t acpi_id) op.cmd = XENPF_get_cpuinfo; while (cpu_id <= max_id) { op.u.pcpu_info.xen_cpuid = cpu_id; - if (HYPERVISOR_dom0_op(&op)) { + if (HYPERVISOR_platform_op(&op)) { cpu_id++; continue; } diff --git a/drivers/xen/time.c b/drivers/xen/time.c new file mode 100644 index 0000000..7107842 --- /dev/null +++ b/drivers/xen/time.c @@ -0,0 +1,88 @@ +/* + * Xen stolen ticks accounting. + */ +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/math64.h> +#include <linux/gfp.h> + +#include <asm/xen/hypervisor.h> +#include <asm/xen/hypercall.h> + +#include <xen/events.h> +#include <xen/features.h> +#include <xen/interface/xen.h> +#include <xen/interface/vcpu.h> +#include <xen/xen-ops.h> + +/* runstate info updated by Xen */ +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); + +/* return an consistent snapshot of 64-bit time/counter value */ +static u64 get64(const u64 *p) +{ + u64 ret; + + if (BITS_PER_LONG < 64) { + u32 *p32 = (u32 *)p; + u32 h, l, h2; + + /* + * Read high then low, and then make sure high is + * still the same; this will only loop if low wraps + * and carries into high. + * XXX some clean way to make this endian-proof? + */ + do { + h = READ_ONCE(p32[1]); + l = READ_ONCE(p32[0]); + h2 = READ_ONCE(p32[1]); + } while(h2 != h); + + ret = (((u64)h) << 32) | l; + } else + ret = READ_ONCE(*p); + + return ret; +} + +/* + * Runstate accounting + */ +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res) +{ + u64 state_time; + struct vcpu_runstate_info *state; + + BUG_ON(preemptible()); + + state = this_cpu_ptr(&xen_runstate); + + /* + * The runstate info is always updated by the hypervisor on + * the current CPU, so there's no need to use anything + * stronger than a compiler barrier when fetching it. + */ + do { + state_time = get64(&state->state_entry_time); + *res = READ_ONCE(*state); + } while (get64(&state->state_entry_time) != state_time); +} + +/* return true when a vcpu could run but has no real cpu to run on */ +bool xen_vcpu_stolen(int vcpu) +{ + return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; +} + +void xen_setup_runstate_info(int cpu) +{ + struct vcpu_register_runstate_memory_area area; + + area.addr.v = &per_cpu(xen_runstate, cpu); + + if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, + cpu, &area)) + BUG(); +} + diff --git a/drivers/xen/xen-acpi-cpuhotplug.c b/drivers/xen/xen-acpi-cpuhotplug.c index f4a3694..fdc9e67 100644 --- a/drivers/xen/xen-acpi-cpuhotplug.c +++ b/drivers/xen/xen-acpi-cpuhotplug.c @@ -206,7 +206,7 @@ static int xen_hotadd_cpu(struct acpi_processor *pr) op.u.cpu_add.acpi_id = pr->acpi_id; op.u.cpu_add.pxm = pxm; - cpu_id = HYPERVISOR_dom0_op(&op); + cpu_id = HYPERVISOR_platform_op(&op); if (cpu_id < 0) pr_err(PREFIX "Failed to hotadd CPU for acpi_id %d\n", pr->acpi_id); diff --git a/drivers/xen/xen-acpi-pad.c b/drivers/xen/xen-acpi-pad.c index f83b754..23d1808 100644 --- a/drivers/xen/xen-acpi-pad.c +++ b/drivers/xen/xen-acpi-pad.c @@ -36,7 +36,7 @@ static int xen_acpi_pad_idle_cpus(unsigned int idle_nums) op.u.core_parking.type = XEN_CORE_PARKING_SET; op.u.core_parking.idle_nums = idle_nums; - return HYPERVISOR_dom0_op(&op); + return HYPERVISOR_platform_op(&op); } static int xen_acpi_pad_idle_cpus_num(void) @@ -46,7 +46,7 @@ static int xen_acpi_pad_idle_cpus_num(void) op.cmd = XENPF_core_parking; op.u.core_parking.type = XEN_CORE_PARKING_GET; - return HYPERVISOR_dom0_op(&op) + return HYPERVISOR_platform_op(&op) ?: op.u.core_parking.idle_nums; } diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 70fa438..076970a 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -116,7 +116,7 @@ static int push_cxx_to_hypervisor(struct acpi_processor *_pr) set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states); if (!no_hypercall) - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (!ret) { pr_debug("ACPI CPU%u - C-states uploaded.\n", _pr->acpi_id); @@ -244,7 +244,7 @@ static int push_pxx_to_hypervisor(struct acpi_processor *_pr) } if (!no_hypercall) - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (!ret) { struct acpi_processor_performance *perf; @@ -302,7 +302,7 @@ static unsigned int __init get_max_acpi_id(void) info = &op.u.pcpu_info; info->xen_cpuid = 0; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) return NR_CPUS; @@ -310,7 +310,7 @@ static unsigned int __init get_max_acpi_id(void) last_cpu = op.u.pcpu_info.max_present; for (i = 0; i <= last_cpu; i++) { info->xen_cpuid = i; - ret = HYPERVISOR_dom0_op(&op); + ret = HYPERVISOR_platform_op(&op); if (ret) continue; max_acpi_id = max(info->acpi_id, max_acpi_id); diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c index f8b1285..a03f261 100644 --- a/drivers/xen/xenfs/xensyms.c +++ b/drivers/xen/xenfs/xensyms.c @@ -31,7 +31,7 @@ static int xensyms_next_sym(struct xensyms *xs) symnum = symdata->symnum; - ret = HYPERVISOR_dom0_op(&xs->op); + ret = HYPERVISOR_platform_op(&xs->op); if (ret < 0) return ret; @@ -50,7 +50,7 @@ static int xensyms_next_sym(struct xensyms *xs) set_xen_guest_handle(symdata->name, xs->name); symdata->symnum--; /* Rewind */ - ret = HYPERVISOR_dom0_op(&xs->op); + ret = HYPERVISOR_platform_op(&xs->op); if (ret < 0) return ret; } diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h index aa7610a..d066197 100644 --- a/include/uapi/xen/gntdev.h +++ b/include/uapi/xen/gntdev.h @@ -144,6 +144,56 @@ struct ioctl_gntdev_unmap_notify { __u32 event_channel_port; }; +struct gntdev_grant_copy_segment { + union { + void __user *virt; + struct { + grant_ref_t ref; + __u16 offset; + domid_t domid; + } foreign; + } source, dest; + __u16 len; + + __u16 flags; /* GNTCOPY_* */ + __s16 status; /* GNTST_* */ +}; + +/* + * Copy between grant references and local buffers. + * + * The copy is split into @count @segments, each of which can copy + * to/from one grant reference. + * + * Each segment is similar to struct gnttab_copy in the hypervisor ABI + * except the local buffer is specified using a virtual address + * (instead of a GFN and offset). + * + * The local buffer may cross a Xen page boundary -- the driver will + * split segments into multiple ops if required. + * + * Returns 0 if all segments have been processed and @status in each + * segment is valid. Note that one or more segments may have failed + * (status != GNTST_okay). + * + * If the driver had to split a segment into two or more ops, @status + * includes the status of the first failed op for that segment (or + * GNTST_okay if all ops were successful). + * + * If -1 is returned, the status of all segments is undefined. + * + * EINVAL: A segment has local buffers for both source and + * destination. + * EINVAL: A segment crosses the boundary of a foreign page. + * EFAULT: A segment's local buffer is not accessible. + */ +#define IOCTL_GNTDEV_GRANT_COPY \ + _IOC(_IOC_NONE, 'G', 8, sizeof(struct ioctl_gntdev_grant_copy)) +struct ioctl_gntdev_grant_copy { + unsigned int count; + struct gntdev_grant_copy_segment __user *segments; +}; + /* Clear (set to zero) the byte specified by index */ #define UNMAP_NOTIFY_CLEAR_BYTE 0x1 /* Send an interrupt on the indicated event channel */ diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index 8e03587..732efb0 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -35,14 +35,23 @@ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC, * 1 January, 1970 if the current system time was <system_time>. */ -#define XENPF_settime 17 -struct xenpf_settime { +#define XENPF_settime32 17 +struct xenpf_settime32 { /* IN variables. */ uint32_t secs; uint32_t nsecs; uint64_t system_time; }; -DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t); +DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime32_t); +#define XENPF_settime64 62 +struct xenpf_settime64 { + /* IN variables. */ + uint64_t secs; + uint32_t nsecs; + uint32_t mbz; + uint64_t system_time; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime64_t); /* * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. @@ -495,7 +504,8 @@ struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ union { - struct xenpf_settime settime; + struct xenpf_settime32 settime32; + struct xenpf_settime64 settime64; struct xenpf_add_memtype add_memtype; struct xenpf_del_memtype del_memtype; struct xenpf_read_memtype read_memtype; diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 167071c..d133112 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -48,7 +48,7 @@ #define __HYPERVISOR_set_callbacks 4 #define __HYPERVISOR_fpu_taskswitch 5 #define __HYPERVISOR_sched_op_compat 6 -#define __HYPERVISOR_dom0_op 7 +#define __HYPERVISOR_platform_op 7 #define __HYPERVISOR_set_debugreg 8 #define __HYPERVISOR_get_debugreg 9 #define __HYPERVISOR_update_descriptor 10 diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index e4e214a..86abe07 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -5,6 +5,7 @@ #include <linux/notifier.h> #include <linux/efi.h> #include <asm/xen/interface.h> +#include <xen/interface/vcpu.h> DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); @@ -18,6 +19,10 @@ void xen_arch_suspend(void); void xen_resume_notifier_register(struct notifier_block *nb); void xen_resume_notifier_unregister(struct notifier_block *nb); +bool xen_vcpu_stolen(int vcpu); +void xen_setup_runstate_info(int cpu); +void xen_get_runstate_snapshot(struct vcpu_runstate_info *res); + int xen_setup_shutdown_event(void); extern unsigned long *xen_contiguous_bitmap; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index d5ff5c6..b2ab2ff 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -5,6 +5,9 @@ #include <linux/static_key.h> #include <linux/context_tracking.h> #include "sched.h" +#ifdef CONFIG_PARAVIRT +#include <asm/paravirt.h> +#endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING |