diff options
29 files changed, 1264 insertions, 80 deletions
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h index fbb5198..09d5f7f 100644 --- a/arch/ia64/include/asm/xen/interface.h +++ b/arch/ia64/include/asm/xen/interface.h @@ -77,6 +77,7 @@ DEFINE_GUEST_HANDLE(int); DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); +DEFINE_GUEST_HANDLE(uint32_t); typedef unsigned long xen_pfn_t; DEFINE_GUEST_HANDLE(xen_pfn_t); diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index a1f2db5..cbf0c9d 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h @@ -56,6 +56,7 @@ DEFINE_GUEST_HANDLE(int); DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); DEFINE_GUEST_HANDLE(uint64_t); +DEFINE_GUEST_HANDLE(uint32_t); #endif #ifndef HYPERVISOR_VIRT_START diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index d99346e..7415aa9 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -324,6 +324,32 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) out: return ret; } + +static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq) +{ + int ret = 0; + + if (pci_seg_supported) { + struct physdev_pci_device restore_ext; + + restore_ext.seg = pci_domain_nr(dev->bus); + restore_ext.bus = dev->bus->number; + restore_ext.devfn = dev->devfn; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext, + &restore_ext); + if (ret == -ENOSYS) + pci_seg_supported = false; + WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret); + } + if (!pci_seg_supported) { + struct physdev_restore_msi restore; + + restore.bus = dev->bus->number; + restore.devfn = dev->devfn; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore); + WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret); + } +} #endif static void xen_teardown_msi_irqs(struct pci_dev *dev) @@ -446,6 +472,7 @@ int __init pci_xen_initial_domain(void) #ifdef CONFIG_PCI_MSI x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs; x86_msi.teardown_msi_irq = xen_teardown_msi_irq; + x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; #endif xen_setup_acpi_sci(); __acpi_register_gsi = acpi_register_gsi_xen; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4172af8..b132ade 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -62,6 +62,15 @@ #include <asm/reboot.h> #include <asm/stackprotector.h> #include <asm/hypervisor.h> +#include <asm/mwait.h> + +#ifdef CONFIG_ACPI +#include <linux/acpi.h> +#include <asm/acpi.h> +#include <acpi/pdc_intel.h> +#include <acpi/processor.h> +#include <xen/interface/platform.h> +#endif #include "xen-ops.h" #include "mmu.h" @@ -200,13 +209,17 @@ static void __init xen_banner(void) static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; +static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask; +static __read_mostly unsigned int cpuid_leaf5_ecx_val; +static __read_mostly unsigned int cpuid_leaf5_edx_val; + static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { unsigned maskebx = ~0; unsigned maskecx = ~0; unsigned maskedx = ~0; - + unsigned setecx = 0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. @@ -214,9 +227,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, switch (*ax) { case 1: maskecx = cpuid_leaf1_ecx_mask; + setecx = cpuid_leaf1_ecx_set_mask; maskedx = cpuid_leaf1_edx_mask; break; + case CPUID_MWAIT_LEAF: + /* Synthesize the values.. */ + *ax = 0; + *bx = 0; + *cx = cpuid_leaf5_ecx_val; + *dx = cpuid_leaf5_edx_val; + return; + case 0xb: /* Suppress extended topology stuff */ maskebx = 0; @@ -232,9 +254,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, *bx &= maskebx; *cx &= maskecx; + *cx |= setecx; *dx &= maskedx; + } +static bool __init xen_check_mwait(void) +{ +#ifdef CONFIG_ACPI + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .u.set_pminfo.id = -1, + .u.set_pminfo.type = XEN_PM_PDC, + }; + uint32_t buf[3]; + unsigned int ax, bx, cx, dx; + unsigned int mwait_mask; + + /* We need to determine whether it is OK to expose the MWAIT + * capability to the kernel to harvest deeper than C3 states from ACPI + * _CST using the processor_harvest_xen.c module. For this to work, we + * need to gather the MWAIT_LEAF values (which the cstate.c code + * checks against). The hypervisor won't expose the MWAIT flag because + * it would break backwards compatibility; so we will find out directly + * from the hardware and hypercall. + */ + if (!xen_initial_domain()) + return false; + + ax = 1; + cx = 0; + + native_cpuid(&ax, &bx, &cx, &dx); + + mwait_mask = (1 << (X86_FEATURE_EST % 32)) | + (1 << (X86_FEATURE_MWAIT % 32)); + + if ((cx & mwait_mask) != mwait_mask) + return false; + + /* We need to emulate the MWAIT_LEAF and for that we need both + * ecx and edx. The hypercall provides only partial information. + */ + + ax = CPUID_MWAIT_LEAF; + bx = 0; + cx = 0; + dx = 0; + + native_cpuid(&ax, &bx, &cx, &dx); + + /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so, + * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. + */ + buf[0] = ACPI_PDC_REVISION_ID; + buf[1] = 1; + buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP); + + set_xen_guest_handle(op.u.set_pminfo.pdc, buf); + + if ((HYPERVISOR_dom0_op(&op) == 0) && + (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { + cpuid_leaf5_ecx_val = cx; + cpuid_leaf5_edx_val = dx; + } + return true; +#else + return false; +#endif +} static void __init xen_init_cpuid_mask(void) { unsigned int ax, bx, cx, dx; @@ -261,6 +349,9 @@ static void __init xen_init_cpuid_mask(void) /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ if ((cx & xsave_mask) != xsave_mask) cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ + + if (xen_check_mwait()) + cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); } static void xen_set_debugreg(int reg, unsigned long val) @@ -777,11 +868,11 @@ static DEFINE_PER_CPU(unsigned long, xen_cr0_value); static unsigned long xen_read_cr0(void) { - unsigned long cr0 = percpu_read(xen_cr0_value); + unsigned long cr0 = this_cpu_read(xen_cr0_value); if (unlikely(cr0 == 0)) { cr0 = native_read_cr0(); - percpu_write(xen_cr0_value, cr0); + this_cpu_write(xen_cr0_value, cr0); } return cr0; @@ -791,7 +882,7 @@ static void xen_write_cr0(unsigned long cr0) { struct multicall_space mcs; - percpu_write(xen_cr0_value, cr0); + this_cpu_write(xen_cr0_value, cr0); /* Only pay attention to cr0.TS; everything else is ignored. */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 8bbb465..1573376 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -26,7 +26,7 @@ static unsigned long xen_save_fl(void) struct vcpu_info *vcpu; unsigned long flags; - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); /* flag has opposite sense of mask */ flags = !vcpu->evtchn_upcall_mask; @@ -50,7 +50,7 @@ static void xen_restore_fl(unsigned long flags) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = flags; preempt_enable_no_resched(); @@ -72,7 +72,7 @@ static void xen_irq_disable(void) make sure we're don't switch CPUs between getting the vcpu pointer and updating the mask. */ preempt_disable(); - percpu_read(xen_vcpu)->evtchn_upcall_mask = 1; + this_cpu_read(xen_vcpu)->evtchn_upcall_mask = 1; preempt_enable_no_resched(); } PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); @@ -86,7 +86,7 @@ static void xen_irq_enable(void) the caller is confused and is trying to re-enable interrupts on an indeterminate processor. */ - vcpu = percpu_read(xen_vcpu); + vcpu = this_cpu_read(xen_vcpu); vcpu->evtchn_upcall_mask = 0; /* Doesn't matter if we get preempted here, because any diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 95c1cf6..988828b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1071,14 +1071,14 @@ static void drop_other_mm_ref(void *info) struct mm_struct *mm = info; struct mm_struct *active_mm; - active_mm = percpu_read(cpu_tlbstate.active_mm); + active_mm = this_cpu_read(cpu_tlbstate.active_mm); - if (active_mm == mm && percpu_read(cpu_tlbstate.state) != TLBSTATE_OK) + if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) leave_mm(smp_processor_id()); /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) + if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd)) load_cr3(swapper_pg_dir); } @@ -1185,17 +1185,17 @@ static void __init xen_pagetable_setup_done(pgd_t *base) static void xen_write_cr2(unsigned long cr2) { - percpu_read(xen_vcpu)->arch.cr2 = cr2; + this_cpu_read(xen_vcpu)->arch.cr2 = cr2; } static unsigned long xen_read_cr2(void) { - return percpu_read(xen_vcpu)->arch.cr2; + return this_cpu_read(xen_vcpu)->arch.cr2; } unsigned long xen_read_cr2_direct(void) { - return percpu_read(xen_vcpu_info.arch.cr2); + return this_cpu_read(xen_vcpu_info.arch.cr2); } static void xen_flush_tlb(void) @@ -1278,12 +1278,12 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, static unsigned long xen_read_cr3(void) { - return percpu_read(xen_cr3); + return this_cpu_read(xen_cr3); } static void set_current_cr3(void *v) { - percpu_write(xen_current_cr3, (unsigned long)v); + this_cpu_write(xen_current_cr3, (unsigned long)v); } static void __xen_write_cr3(bool kernel, unsigned long cr3) @@ -1306,7 +1306,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) xen_extend_mmuext_op(&op); if (kernel) { - percpu_write(xen_cr3, cr3); + this_cpu_write(xen_cr3, cr3); /* Update xen_current_cr3 once the batch has actually been submitted. */ @@ -1322,7 +1322,7 @@ static void xen_write_cr3(unsigned long cr3) /* Update while interrupts are disabled, so its atomic with respect to ipis */ - percpu_write(xen_cr3, cr3); + this_cpu_write(xen_cr3, cr3); __xen_write_cr3(true, cr3); diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index dee79b7..9c2e74f 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -47,7 +47,7 @@ static inline void xen_mc_issue(unsigned mode) xen_mc_flush(); /* restore flags saved in xen_mc_batch */ - local_irq_restore(percpu_read(xen_mc_irq_flags)); + local_irq_restore(this_cpu_read(xen_mc_irq_flags)); } /* Set up a callback to be called when the current batch is flushed */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e03c636..1236623 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -420,7 +420,6 @@ void __init xen_arch_setup(void) boot_cpu_data.hlt_works_ok = 1; #endif disable_cpuidle(); - boot_option_idle_override = IDLE_HALT; WARN_ON(set_pm_idle_to_default()); fiddle_vdso(); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 501d4e0..315d8fa 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -76,7 +76,7 @@ static void __cpuinit cpu_bringup(void) xen_setup_cpu_clockevents(); set_cpu_online(cpu, true); - percpu_write(cpu_state, CPU_ONLINE); + this_cpu_write(cpu_state, CPU_ONLINE); wmb(); /* We can take interrupts now: we're officially "up". */ diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 7b46781..8f675ae 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -558,7 +558,7 @@ config INPUT_CMA3000_I2C config INPUT_XEN_KBDDEV_FRONTEND tristate "Xen virtual keyboard and mouse support" - depends on XEN_FBDEV_FRONTEND + depends on XEN default y select XEN_XENBUS_FRONTEND help diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index af295bb..053670e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3163,6 +3163,31 @@ int __pci_reset_function(struct pci_dev *dev) EXPORT_SYMBOL_GPL(__pci_reset_function); /** + * __pci_reset_function_locked - reset a PCI device function while holding + * the @dev mutex lock. + * @dev: PCI device to reset + * + * Some devices allow an individual function to be reset without affecting + * other functions in the same device. The PCI device must be responsive + * to PCI config space in order to use this function. + * + * The device function is presumed to be unused and the caller is holding + * the device mutex lock when this function is called. + * Resetting the device will make the contents of PCI configuration space + * random, so any caller of this must be prepared to reinitialise the + * device including MSI, bus mastering, BARs, decoding IO and memory spaces, + * etc. + * + * Returns 0 if the device function was successfully reset or negative if the + * device doesn't support resetting a single function. + */ +int __pci_reset_function_locked(struct pci_dev *dev) +{ + return pci_dev_reset(dev, 1); +} +EXPORT_SYMBOL_GPL(__pci_reset_function_locked); + +/** * pci_probe_reset_function - check whether the device can be safely reset * @dev: PCI device to reset * diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig index 48cb8d3..0282a83 100644 --- a/drivers/tty/hvc/Kconfig +++ b/drivers/tty/hvc/Kconfig @@ -66,6 +66,14 @@ config HVC_XEN help Xen virtual console device driver +config HVC_XEN_FRONTEND + bool "Xen Hypervisor Multiple Consoles support" + depends on HVC_XEN + select XEN_XENBUS_FRONTEND + default y + help + Xen driver for secondary virtual consoles + config HVC_UDBG bool "udbg based fake hypervisor console" depends on PPC && EXPERIMENTAL diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index a1b0a75..83d5c88 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -23,44 +23,74 @@ #include <linux/err.h> #include <linux/init.h> #include <linux/types.h> +#include <linux/list.h> +#include <asm/io.h> #include <asm/xen/hypervisor.h> #include <xen/xen.h> +#include <xen/interface/xen.h> +#include <xen/hvm.h> +#include <xen/grant_table.h> #include <xen/page.h> #include <xen/events.h> #include <xen/interface/io/console.h> #include <xen/hvc-console.h> +#include <xen/xenbus.h> #include "hvc_console.h" #define HVC_COOKIE 0x58656e /* "Xen" in hex */ -static struct hvc_struct *hvc; -static int xencons_irq; +struct xencons_info { + struct list_head list; + struct xenbus_device *xbdev; + struct xencons_interface *intf; + unsigned int evtchn; + struct hvc_struct *hvc; + int irq; + int vtermno; + grant_ref_t gntref; +}; + +static LIST_HEAD(xenconsoles); +static DEFINE_SPINLOCK(xencons_lock); /* ------------------------------------------------------------------ */ -static unsigned long console_pfn = ~0ul; +static struct xencons_info *vtermno_to_xencons(int vtermno) +{ + struct xencons_info *entry, *n, *ret = NULL; + + if (list_empty(&xenconsoles)) + return NULL; + + list_for_each_entry_safe(entry, n, &xenconsoles, list) { + if (entry->vtermno == vtermno) { + ret = entry; + break; + } + } + + return ret; +} -static inline struct xencons_interface *xencons_interface(void) +static inline int xenbus_devid_to_vtermno(int devid) { - if (console_pfn == ~0ul) - return mfn_to_virt(xen_start_info->console.domU.mfn); - else - return __va(console_pfn << PAGE_SHIFT); + return devid + HVC_COOKIE; } -static inline void notify_daemon(void) +static inline void notify_daemon(struct xencons_info *cons) { /* Use evtchn: this is called early, before irq is set up. */ - notify_remote_via_evtchn(xen_start_info->console.domU.evtchn); + notify_remote_via_evtchn(cons->evtchn); } -static int __write_console(const char *data, int len) +static int __write_console(struct xencons_info *xencons, + const char *data, int len) { - struct xencons_interface *intf = xencons_interface(); XENCONS_RING_IDX cons, prod; + struct xencons_interface *intf = xencons->intf; int sent = 0; cons = intf->out_cons; @@ -75,13 +105,16 @@ static int __write_console(const char *data, int len) intf->out_prod = prod; if (sent) - notify_daemon(); + notify_daemon(xencons); return sent; } static int domU_write_console(uint32_t vtermno, const char *data, int len) { int ret = len; + struct xencons_info *cons = vtermno_to_xencons(vtermno); + if (cons == NULL) + return -EINVAL; /* * Make sure the whole buffer is emitted, polling if @@ -90,7 +123,7 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len) * kernel is crippled. */ while (len) { - int sent = __write_console(data, len); + int sent = __write_console(cons, data, len); data += sent; len -= sent; @@ -104,9 +137,13 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len) static int domU_read_console(uint32_t vtermno, char *buf, int len) { - struct xencons_interface *intf = xencons_interface(); + struct xencons_interface *intf; XENCONS_RING_IDX cons, prod; int recv = 0; + struct xencons_info *xencons = vtermno_to_xencons(vtermno); + if (xencons == NULL) + return -EINVAL; + intf = xencons->intf; cons = intf->in_cons; prod = intf->in_prod; @@ -119,7 +156,7 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; - notify_daemon(); + notify_daemon(xencons); return recv; } @@ -157,68 +194,407 @@ static struct hv_ops dom0_hvc_ops = { .notifier_hangup = notifier_hangup_irq, }; -static int __init xen_hvc_init(void) +static int xen_hvm_console_init(void) +{ + int r; + uint64_t v = 0; + unsigned long mfn; + struct xencons_info *info; + + if (!xen_hvm_domain()) + return -ENODEV; + + info = vtermno_to_xencons(HVC_COOKIE); + if (!info) { + info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); + if (!info) + return -ENOMEM; + } + + /* already configured */ + if (info->intf != NULL) + return 0; + + r = hvm_get_parameter(HVM_PARAM_CONSOLE_EVTCHN, &v); + if (r < 0) { + kfree(info); + return -ENODEV; + } + info->evtchn = v; + hvm_get_parameter(HVM_PARAM_CONSOLE_PFN, &v); + if (r < 0) { + kfree(info); + return -ENODEV; + } + mfn = v; + info->intf = ioremap(mfn << PAGE_SHIFT, PAGE_SIZE); + if (info->intf == NULL) { + kfree(info); + return -ENODEV; + } + info->vtermno = HVC_COOKIE; + + spin_lock(&xencons_lock); + list_add_tail(&info->list, &xenconsoles); + spin_unlock(&xencons_lock); + + return 0; +} + +static int xen_pv_console_init(void) { - struct hvc_struct *hp; - struct hv_ops *ops; + struct xencons_info *info; if (!xen_pv_domain()) return -ENODEV; - if (xen_initial_domain()) { - ops = &dom0_hvc_ops; - xencons_irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); - } else { - if (!xen_start_info->console.domU.evtchn) - return -ENODEV; + if (!xen_start_info->console.domU.evtchn) + return -ENODEV; - ops = &domU_hvc_ops; - xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); + info = vtermno_to_xencons(HVC_COOKIE); + if (!info) { + info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); + if (!info) + return -ENOMEM; } - if (xencons_irq < 0) - xencons_irq = 0; - else - irq_set_noprobe(xencons_irq); - hp = hvc_alloc(HVC_COOKIE, xencons_irq, ops, 256); - if (IS_ERR(hp)) - return PTR_ERR(hp); + /* already configured */ + if (info->intf != NULL) + return 0; + + info->evtchn = xen_start_info->console.domU.evtchn; + info->intf = mfn_to_virt(xen_start_info->console.domU.mfn); + info->vtermno = HVC_COOKIE; + + spin_lock(&xencons_lock); + list_add_tail(&info->list, &xenconsoles); + spin_unlock(&xencons_lock); + + return 0; +} + +static int xen_initial_domain_console_init(void) +{ + struct xencons_info *info; + + if (!xen_initial_domain()) + return -ENODEV; + + info = vtermno_to_xencons(HVC_COOKIE); + if (!info) { + info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); + if (!info) + return -ENOMEM; + } - hvc = hp; + info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0); + info->vtermno = HVC_COOKIE; - console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn); + spin_lock(&xencons_lock); + list_add_tail(&info->list, &xenconsoles); + spin_unlock(&xencons_lock); return 0; } void xen_console_resume(void) { - if (xencons_irq) - rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); + struct xencons_info *info = vtermno_to_xencons(HVC_COOKIE); + if (info != NULL && info->irq) + rebind_evtchn_irq(info->evtchn, info->irq); +} + +static void xencons_disconnect_backend(struct xencons_info *info) +{ + if (info->irq > 0) + unbind_from_irqhandler(info->irq, NULL); + info->irq = 0; + if (info->evtchn > 0) + xenbus_free_evtchn(info->xbdev, info->evtchn); + info->evtchn = 0; + if (info->gntref > 0) + gnttab_free_grant_references(info->gntref); + info->gntref = 0; + if (info->hvc != NULL) + hvc_remove(info->hvc); + info->hvc = NULL; +} + +static void xencons_free(struct xencons_info *info) +{ + free_page((unsigned long)info->intf); + info->intf = NULL; + info->vtermno = 0; + kfree(info); +} + +static int xen_console_remove(struct xencons_info *info) +{ + xencons_disconnect_backend(info); + spin_lock(&xencons_lock); + list_del(&info->list); + spin_unlock(&xencons_lock); + if (info->xbdev != NULL) + xencons_free(info); + else { + if (xen_hvm_domain()) + iounmap(info->intf); + kfree(info); + } + return 0; +} + +#ifdef CONFIG_HVC_XEN_FRONTEND +static struct xenbus_driver xencons_driver; + +static int xencons_remove(struct xenbus_device *dev) +{ + return xen_console_remove(dev_get_drvdata(&dev->dev)); +} + +static int xencons_connect_backend(struct xenbus_device *dev, + struct xencons_info *info) +{ + int ret, evtchn, devid, ref, irq; + struct xenbus_transaction xbt; + grant_ref_t gref_head; + unsigned long mfn; + + ret = xenbus_alloc_evtchn(dev, &evtchn); + if (ret) + return ret; + info->evtchn = evtchn; + irq = bind_evtchn_to_irq(evtchn); + if (irq < 0) + return irq; + info->irq = irq; + devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; + info->hvc = hvc_alloc(xenbus_devid_to_vtermno(devid), + irq, &domU_hvc_ops, 256); + if (IS_ERR(info->hvc)) + return PTR_ERR(info->hvc); + if (xen_pv_domain()) + mfn = virt_to_mfn(info->intf); + else + mfn = __pa(info->intf) >> PAGE_SHIFT; + ret = gnttab_alloc_grant_references(1, &gref_head); + if (ret < 0) + return ret; + info->gntref = gref_head; + ref = gnttab_claim_grant_reference(&gref_head); + if (ref < 0) + return ref; + gnttab_grant_foreign_access_ref(ref, info->xbdev->otherend_id, + mfn, 0); + + again: + ret = xenbus_transaction_start(&xbt); + if (ret) { + xenbus_dev_fatal(dev, ret, "starting transaction"); + return ret; + } + ret = xenbus_printf(xbt, dev->nodename, "ring-ref", "%d", ref); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "port", "%u", + evtchn); + if (ret) + goto error_xenbus; + ret = xenbus_printf(xbt, dev->nodename, "type", "ioemu"); + if (ret) + goto error_xenbus; + ret = xenbus_transaction_end(xbt, 0); + if (ret) { + if (ret == -EAGAIN) + goto again; + xenbus_dev_fatal(dev, ret, "completing transaction"); + return ret; + } + + xenbus_switch_state(dev, XenbusStateInitialised); + return 0; + + error_xenbus: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, ret, "writing xenstore"); + return ret; +} + +static int __devinit xencons_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int ret, devid; + struct xencons_info *info; + + devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; + if (devid == 0) + return -ENODEV; + + info = kzalloc(sizeof(struct xencons_info), GFP_KERNEL | __GFP_ZERO); + if (!info) + goto error_nomem; + dev_set_drvdata(&dev->dev, info); + info->xbdev = dev; + info->vtermno = xenbus_devid_to_vtermno(devid); + info->intf = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!info->intf) + goto error_nomem; + + ret = xencons_connect_backend(dev, info); + if (ret < 0) + goto error; + spin_lock(&xencons_lock); + list_add_tail(&info->list, &xenconsoles); + spin_unlock(&xencons_lock); + + return 0; + + error_nomem: + ret = -ENOMEM; + xenbus_dev_fatal(dev, ret, "allocating device memory"); + error: + xencons_disconnect_backend(info); + xencons_free(info); + return ret; +} + +static int xencons_resume(struct xenbus_device *dev) +{ + struct xencons_info *info = dev_get_drvdata(&dev->dev); + + xencons_disconnect_backend(info); + memset(info->intf, 0, PAGE_SIZE); + return xencons_connect_backend(dev, info); +} + +static void xencons_backend_changed(struct xenbus_device *dev, + enum xenbus_state backend_state) +{ + switch (backend_state) { + case XenbusStateReconfiguring: + case XenbusStateReconfigured: + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateUnknown: + case XenbusStateClosed: + break; + + case XenbusStateInitWait: + break; + + case XenbusStateConnected: + xenbus_switch_state(dev, XenbusStateConnected); + break; + + case XenbusStateClosing: + xenbus_frontend_closed(dev); + break; + } +} + +static const struct xenbus_device_id xencons_ids[] = { + { "console" }, + { "" } +}; + + +static DEFINE_XENBUS_DRIVER(xencons, "xenconsole", + .probe = xencons_probe, + .remove = xencons_remove, + .resume = xencons_resume, + .otherend_changed = xencons_backend_changed, +); +#endif /* CONFIG_HVC_XEN_FRONTEND */ + +static int __init xen_hvc_init(void) +{ + int r; + struct xencons_info *info; + const struct hv_ops *ops; + + if (!xen_domain()) + return -ENODEV; + + if (xen_initial_domain()) { + ops = &dom0_hvc_ops; + r = xen_initial_domain_console_init(); + if (r < 0) + return r; + info = vtermno_to_xencons(HVC_COOKIE); + } else { + ops = &domU_hvc_ops; + if (xen_hvm_domain()) + r = xen_hvm_console_init(); + else + r = xen_pv_console_init(); + if (r < 0) + return r; + + info = vtermno_to_xencons(HVC_COOKIE); + info->irq = bind_evtchn_to_irq(info->evtchn); + } + if (info->irq < 0) + info->irq = 0; /* NO_IRQ */ + else + irq_set_noprobe(info->irq); + + info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256); + if (IS_ERR(info->hvc)) { + r = PTR_ERR(info->hvc); + spin_lock(&xencons_lock); + list_del(&info->list); + spin_unlock(&xencons_lock); + if (info->irq) + unbind_from_irqhandler(info->irq, NULL); + kfree(info); + return r; + } + + r = 0; +#ifdef CONFIG_HVC_XEN_FRONTEND + r = xenbus_register_frontend(&xencons_driver); +#endif + return r; } static void __exit xen_hvc_fini(void) { - if (hvc) - hvc_remove(hvc); + struct xencons_info *entry, *next; + + if (list_empty(&xenconsoles)) + return; + + list_for_each_entry_safe(entry, next, &xenconsoles, list) { + xen_console_remove(entry); + } } static int xen_cons_init(void) { - struct hv_ops *ops; + const struct hv_ops *ops; - if (!xen_pv_domain()) + if (!xen_domain()) return 0; if (xen_initial_domain()) ops = &dom0_hvc_ops; - else + else { + int r; ops = &domU_hvc_ops; + if (xen_hvm_domain()) + r = xen_hvm_console_init(); + else + r = xen_pv_console_init(); + if (r < 0) + return r; + } + hvc_instantiate(HVC_COOKIE, 0, ops); return 0; } + module_init(xen_hvc_init); module_exit(xen_hvc_fini); console_initcall(xen_cons_init); @@ -230,6 +606,9 @@ static void xenboot_write_console(struct console *console, const char *string, unsigned int linelen, off = 0; const char *pos; + if (!xen_pv_domain()) + return; + dom0_write_console(0, string, len); if (xen_initial_domain()) diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 6ca0c40..eca60c7 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -2269,6 +2269,7 @@ config XEN_FBDEV_FRONTEND select FB_SYS_IMAGEBLIT select FB_SYS_FOPS select FB_DEFERRED_IO + select INPUT_XEN_KBDDEV_FRONTEND select XEN_XENBUS_FRONTEND default y help diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a1ced52..648bcd4 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -178,4 +178,21 @@ config XEN_PRIVCMD depends on XEN default m +config XEN_ACPI_PROCESSOR + tristate "Xen ACPI processor" + depends on XEN && X86 && ACPI_PROCESSOR + default y if (X86_ACPI_CPUFREQ = y || X86_POWERNOW_K8 = y) + default m if (X86_ACPI_CPUFREQ = m || X86_POWERNOW_K8 = m) + help + This ACPI processor uploads Power Management information to the Xen hypervisor. + + To do that the driver parses the Power Management data and uploads said + information to the Xen hypervisor. Then the Xen hypervisor can select the + proper Cx and Pxx states. It also registers itslef as the SMM so that + other drivers (such as ACPI cpufreq scaling driver) will not load. + + To compile this driver as a module, choose M here: the + module will be called xen_acpi_processor If you do not know what to choose, + select M here. If the CPUFREQ drivers are built in, select Y here. + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index aa31337..9adc5be 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -20,7 +20,7 @@ obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o - +obj-$(CONFIG_XEN_ACPI_PROCESSOR) += xen-acpi-processor.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 1e0fe01..fdb6d22 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -97,7 +97,7 @@ static struct attribute *version_attrs[] = { NULL }; -static struct attribute_group version_group = { +static const struct attribute_group version_group = { .name = "version", .attrs = version_attrs, }; @@ -210,7 +210,7 @@ static struct attribute *xen_compile_attrs[] = { NULL }; -static struct attribute_group xen_compilation_group = { +static const struct attribute_group xen_compilation_group = { .name = "compilation", .attrs = xen_compile_attrs, }; @@ -340,7 +340,7 @@ static struct attribute *xen_properties_attrs[] = { NULL }; -static struct attribute_group xen_properties_group = { +static const struct attribute_group xen_properties_group = { .name = "properties", .attrs = xen_properties_attrs, }; diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c new file mode 100644 index 0000000..5c2be96 --- /dev/null +++ b/drivers/xen/xen-acpi-processor.c @@ -0,0 +1,562 @@ +/* + * Copyright 2012 by Oracle Inc + * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> + * + * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249 + * so many thanks go to Kevin Tian <kevin.tian@intel.com> + * and Yu Ke <ke.yu@intel.com>. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <linux/cpumask.h> +#include <linux/cpufreq.h> +#include <linux/freezer.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <acpi/acpi_bus.h> +#include <acpi/acpi_drivers.h> +#include <acpi/processor.h> + +#include <xen/interface/platform.h> +#include <asm/xen/hypercall.h> + +#define DRV_NAME "xen-acpi-processor: " + +static int no_hypercall; +MODULE_PARM_DESC(off, "Inhibit the hypercall."); +module_param_named(off, no_hypercall, int, 0400); + +/* + * Note: Do not convert the acpi_id* below to cpumask_var_t or use cpumask_bit + * - as those shrink to nr_cpu_bits (which is dependent on possible_cpu), which + * can be less than what we want to put in. Instead use the 'nr_acpi_bits' + * which is dynamically computed based on the MADT or x2APIC table. + */ +static unsigned int nr_acpi_bits; +/* Mutex to protect the acpi_ids_done - for CPU hotplug use. */ +static DEFINE_MUTEX(acpi_ids_mutex); +/* Which ACPI ID we have processed from 'struct acpi_processor'. */ +static unsigned long *acpi_ids_done; +/* Which ACPI ID exist in the SSDT/DSDT processor definitions. */ +static unsigned long __initdata *acpi_id_present; +/* And if there is an _CST definition (or a PBLK) for the ACPI IDs */ +static unsigned long __initdata *acpi_id_cst_present; + +static int push_cxx_to_hypervisor(struct acpi_processor *_pr) +{ + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .interface_version = XENPF_INTERFACE_VERSION, + .u.set_pminfo.id = _pr->acpi_id, + .u.set_pminfo.type = XEN_PM_CX, + }; + struct xen_processor_cx *dst_cx, *dst_cx_states = NULL; + struct acpi_processor_cx *cx; + unsigned int i, ok; + int ret = 0; + + dst_cx_states = kcalloc(_pr->power.count, + sizeof(struct xen_processor_cx), GFP_KERNEL); + if (!dst_cx_states) + return -ENOMEM; + + for (ok = 0, i = 1; i <= _pr->power.count; i++) { + cx = &_pr->power.states[i]; + if (!cx->valid) + continue; + + dst_cx = &(dst_cx_states[ok++]); + + dst_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO; + if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) { + dst_cx->reg.bit_width = 8; + dst_cx->reg.bit_offset = 0; + dst_cx->reg.access_size = 1; + } else { + dst_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE; + if (cx->entry_method == ACPI_CSTATE_FFH) { + /* NATIVE_CSTATE_BEYOND_HALT */ + dst_cx->reg.bit_offset = 2; + dst_cx->reg.bit_width = 1; /* VENDOR_INTEL */ + } + dst_cx->reg.access_size = 0; + } + dst_cx->reg.address = cx->address; + + dst_cx->type = cx->type; + dst_cx->latency = cx->latency; + dst_cx->power = cx->power; + + dst_cx->dpcnt = 0; + set_xen_guest_handle(dst_cx->dp, NULL); + } + if (!ok) { + pr_debug(DRV_NAME "No _Cx for ACPI CPU %u\n", _pr->acpi_id); + kfree(dst_cx_states); + return -EINVAL; + } + op.u.set_pminfo.power.count = ok; + op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control; + op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check; + op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst; + op.u.set_pminfo.power.flags.power_setup_done = + _pr->flags.power_setup_done; + + set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states); + + if (!no_hypercall) + ret = HYPERVISOR_dom0_op(&op); + + if (!ret) { + pr_debug("ACPI CPU%u - C-states uploaded.\n", _pr->acpi_id); + for (i = 1; i <= _pr->power.count; i++) { + cx = &_pr->power.states[i]; + if (!cx->valid) + continue; + pr_debug(" C%d: %s %d uS\n", + cx->type, cx->desc, (u32)cx->latency); + } + } else + pr_err(DRV_NAME "(CX): Hypervisor error (%d) for ACPI CPU%u\n", + ret, _pr->acpi_id); + + kfree(dst_cx_states); + + return ret; +} +static struct xen_processor_px * +xen_copy_pss_data(struct acpi_processor *_pr, + struct xen_processor_performance *dst_perf) +{ + struct xen_processor_px *dst_states = NULL; + unsigned int i; + + BUILD_BUG_ON(sizeof(struct xen_processor_px) != + sizeof(struct acpi_processor_px)); + + dst_states = kcalloc(_pr->performance->state_count, + sizeof(struct xen_processor_px), GFP_KERNEL); + if (!dst_states) + return ERR_PTR(-ENOMEM); + + dst_perf->state_count = _pr->performance->state_count; + for (i = 0; i < _pr->performance->state_count; i++) { + /* Fortunatly for us, they are both the same size */ + memcpy(&(dst_states[i]), &(_pr->performance->states[i]), + sizeof(struct acpi_processor_px)); + } + return dst_states; +} +static int xen_copy_psd_data(struct acpi_processor *_pr, + struct xen_processor_performance *dst) +{ + struct acpi_psd_package *pdomain; + + BUILD_BUG_ON(sizeof(struct xen_psd_package) != + sizeof(struct acpi_psd_package)); + + /* This information is enumerated only if acpi_processor_preregister_performance + * has been called. + */ + dst->shared_type = _pr->performance->shared_type; + + pdomain = &(_pr->performance->domain_info); + + /* 'acpi_processor_preregister_performance' does not parse if the + * num_processors <= 1, but Xen still requires it. Do it manually here. + */ + if (pdomain->num_processors <= 1) { + if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ALL) + dst->shared_type = CPUFREQ_SHARED_TYPE_ALL; + else if (pdomain->coord_type == DOMAIN_COORD_TYPE_HW_ALL) + dst->shared_type = CPUFREQ_SHARED_TYPE_HW; + else if (pdomain->coord_type == DOMAIN_COORD_TYPE_SW_ANY) + dst->shared_type = CPUFREQ_SHARED_TYPE_ANY; + + } + memcpy(&(dst->domain_info), pdomain, sizeof(struct acpi_psd_package)); + return 0; +} +static int xen_copy_pct_data(struct acpi_pct_register *pct, + struct xen_pct_register *dst_pct) +{ + /* It would be nice if you could just do 'memcpy(pct, dst_pct') but + * sadly the Xen structure did not have the proper padding so the + * descriptor field takes two (dst_pct) bytes instead of one (pct). + */ + dst_pct->descriptor = pct->descriptor; + dst_pct->length = pct->length; + dst_pct->space_id = pct->space_id; + dst_pct->bit_width = pct->bit_width; + dst_pct->bit_offset = pct->bit_offset; + dst_pct->reserved = pct->reserved; + dst_pct->address = pct->address; + return 0; +} +static int push_pxx_to_hypervisor(struct acpi_processor *_pr) +{ + int ret = 0; + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .interface_version = XENPF_INTERFACE_VERSION, + .u.set_pminfo.id = _pr->acpi_id, + .u.set_pminfo.type = XEN_PM_PX, + }; + struct xen_processor_performance *dst_perf; + struct xen_processor_px *dst_states = NULL; + + dst_perf = &op.u.set_pminfo.perf; + + dst_perf->platform_limit = _pr->performance_platform_limit; + dst_perf->flags |= XEN_PX_PPC; + xen_copy_pct_data(&(_pr->performance->control_register), + &dst_perf->control_register); + xen_copy_pct_data(&(_pr->performance->status_register), + &dst_perf->status_register); + dst_perf->flags |= XEN_PX_PCT; + dst_states = xen_copy_pss_data(_pr, dst_perf); + if (!IS_ERR_OR_NULL(dst_states)) { + set_xen_guest_handle(dst_perf->states, dst_states); + dst_perf->flags |= XEN_PX_PSS; + } + if (!xen_copy_psd_data(_pr, dst_perf)) + dst_perf->flags |= XEN_PX_PSD; + + if (dst_perf->flags != (XEN_PX_PSD | XEN_PX_PSS | XEN_PX_PCT | XEN_PX_PPC)) { + pr_warn(DRV_NAME "ACPI CPU%u missing some P-state data (%x), skipping.\n", + _pr->acpi_id, dst_perf->flags); + ret = -ENODEV; + goto err_free; + } + + if (!no_hypercall) + ret = HYPERVISOR_dom0_op(&op); + + if (!ret) { + struct acpi_processor_performance *perf; + unsigned int i; + + perf = _pr->performance; + pr_debug("ACPI CPU%u - P-states uploaded.\n", _pr->acpi_id); + for (i = 0; i < perf->state_count; i++) { + pr_debug(" %cP%d: %d MHz, %d mW, %d uS\n", + (i == perf->state ? '*' : ' '), i, + (u32) perf->states[i].core_frequency, + (u32) perf->states[i].power, + (u32) perf->states[i].transition_latency); + } + } else if (ret != -EINVAL) + /* EINVAL means the ACPI ID is incorrect - meaning the ACPI + * table is referencing a non-existing CPU - which can happen + * with broken ACPI tables. */ + pr_warn(DRV_NAME "(_PXX): Hypervisor error (%d) for ACPI CPU%u\n", + ret, _pr->acpi_id); +err_free: + if (!IS_ERR_OR_NULL(dst_states)) + kfree(dst_states); + + return ret; +} +static int upload_pm_data(struct acpi_processor *_pr) +{ + int err = 0; + + mutex_lock(&acpi_ids_mutex); + if (__test_and_set_bit(_pr->acpi_id, acpi_ids_done)) { + mutex_unlock(&acpi_ids_mutex); + return -EBUSY; + } + if (_pr->flags.power) + err = push_cxx_to_hypervisor(_pr); + + if (_pr->performance && _pr->performance->states) + err |= push_pxx_to_hypervisor(_pr); + + mutex_unlock(&acpi_ids_mutex); + return err; +} +static unsigned int __init get_max_acpi_id(void) +{ + struct xenpf_pcpuinfo *info; + struct xen_platform_op op = { + .cmd = XENPF_get_cpuinfo, + .interface_version = XENPF_INTERFACE_VERSION, + }; + int ret = 0; + unsigned int i, last_cpu, max_acpi_id = 0; + + info = &op.u.pcpu_info; + info->xen_cpuid = 0; + + ret = HYPERVISOR_dom0_op(&op); + if (ret) + return NR_CPUS; + + /* The max_present is the same irregardless of the xen_cpuid */ + last_cpu = op.u.pcpu_info.max_present; + for (i = 0; i <= last_cpu; i++) { + info->xen_cpuid = i; + ret = HYPERVISOR_dom0_op(&op); + if (ret) + continue; + max_acpi_id = max(info->acpi_id, max_acpi_id); + } + max_acpi_id *= 2; /* Slack for CPU hotplug support. */ + pr_debug(DRV_NAME "Max ACPI ID: %u\n", max_acpi_id); + return max_acpi_id; +} +/* + * The read_acpi_id and check_acpi_ids are there to support the Xen + * oddity of virtual CPUs != physical CPUs in the initial domain. + * The user can supply 'xen_max_vcpus=X' on the Xen hypervisor line + * which will band the amount of CPUs the initial domain can see. + * In general that is OK, except it plays havoc with any of the + * for_each_[present|online]_cpu macros which are banded to the virtual + * CPU amount. + */ +static acpi_status __init +read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv) +{ + u32 acpi_id; + acpi_status status; + acpi_object_type acpi_type; + unsigned long long tmp; + union acpi_object object = { 0 }; + struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; + acpi_io_address pblk = 0; + + status = acpi_get_type(handle, &acpi_type); + if (ACPI_FAILURE(status)) + return AE_OK; + + switch (acpi_type) { + case ACPI_TYPE_PROCESSOR: + status = acpi_evaluate_object(handle, NULL, NULL, &buffer); + if (ACPI_FAILURE(status)) + return AE_OK; + acpi_id = object.processor.proc_id; + pblk = object.processor.pblk_address; + break; + case ACPI_TYPE_DEVICE: + status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); + if (ACPI_FAILURE(status)) + return AE_OK; + acpi_id = tmp; + break; + default: + return AE_OK; + } + /* There are more ACPI Processor objects than in x2APIC or MADT. + * This can happen with incorrect ACPI SSDT declerations. */ + if (acpi_id > nr_acpi_bits) { + pr_debug(DRV_NAME "We only have %u, trying to set %u\n", + nr_acpi_bits, acpi_id); + return AE_OK; + } + /* OK, There is a ACPI Processor object */ + __set_bit(acpi_id, acpi_id_present); + + pr_debug(DRV_NAME "ACPI CPU%u w/ PBLK:0x%lx\n", acpi_id, + (unsigned long)pblk); + + status = acpi_evaluate_object(handle, "_CST", NULL, &buffer); + if (ACPI_FAILURE(status)) { + if (!pblk) + return AE_OK; + } + /* .. and it has a C-state */ + __set_bit(acpi_id, acpi_id_cst_present); + + return AE_OK; +} +static int __init check_acpi_ids(struct acpi_processor *pr_backup) +{ + + if (!pr_backup) + return -ENODEV; + + /* All online CPUs have been processed at this stage. Now verify + * whether in fact "online CPUs" == physical CPUs. + */ + acpi_id_present = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); + if (!acpi_id_present) + return -ENOMEM; + + acpi_id_cst_present = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); + if (!acpi_id_cst_present) { + kfree(acpi_id_present); + return -ENOMEM; + } + + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, + read_acpi_id, NULL, NULL, NULL); + acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL); + + if (!bitmap_equal(acpi_id_present, acpi_ids_done, nr_acpi_bits)) { + unsigned int i; + for_each_set_bit(i, acpi_id_present, nr_acpi_bits) { + pr_backup->acpi_id = i; + /* Mask out C-states if there are no _CST or PBLK */ + pr_backup->flags.power = test_bit(i, acpi_id_cst_present); + (void)upload_pm_data(pr_backup); + } + } + kfree(acpi_id_present); + acpi_id_present = NULL; + kfree(acpi_id_cst_present); + acpi_id_cst_present = NULL; + return 0; +} +static int __init check_prereq(void) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + if (!xen_initial_domain()) + return -ENODEV; + + if (!acpi_gbl_FADT.smi_command) + return -ENODEV; + + if (c->x86_vendor == X86_VENDOR_INTEL) { + if (!cpu_has(c, X86_FEATURE_EST)) + return -ENODEV; + + return 0; + } + if (c->x86_vendor == X86_VENDOR_AMD) { + /* Copied from powernow-k8.h, can't include ../cpufreq/powernow + * as we get compile warnings for the static functions. + */ +#define CPUID_FREQ_VOLT_CAPABILITIES 0x80000007 +#define USE_HW_PSTATE 0x00000080 + u32 eax, ebx, ecx, edx; + cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); + if ((edx & USE_HW_PSTATE) != USE_HW_PSTATE) + return -ENODEV; + return 0; + } + return -ENODEV; +} +/* acpi_perf_data is a pointer to percpu data. */ +static struct acpi_processor_performance __percpu *acpi_perf_data; + +static void free_acpi_perf_data(void) +{ + unsigned int i; + + /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ + for_each_possible_cpu(i) + free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) + ->shared_cpu_map); + free_percpu(acpi_perf_data); +} + +static int __init xen_acpi_processor_init(void) +{ + struct acpi_processor *pr_backup = NULL; + unsigned int i; + int rc = check_prereq(); + + if (rc) + return rc; + + nr_acpi_bits = get_max_acpi_id() + 1; + acpi_ids_done = kcalloc(BITS_TO_LONGS(nr_acpi_bits), sizeof(unsigned long), GFP_KERNEL); + if (!acpi_ids_done) + return -ENOMEM; + + acpi_perf_data = alloc_percpu(struct acpi_processor_performance); + if (!acpi_perf_data) { + pr_debug(DRV_NAME "Memory allocation error for acpi_perf_data.\n"); + kfree(acpi_ids_done); + return -ENOMEM; + } + for_each_possible_cpu(i) { + if (!zalloc_cpumask_var_node( + &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, + GFP_KERNEL, cpu_to_node(i))) { + rc = -ENOMEM; + goto err_out; + } + } + + /* Do initialization in ACPI core. It is OK to fail here. */ + (void)acpi_processor_preregister_performance(acpi_perf_data); + + for_each_possible_cpu(i) { + struct acpi_processor_performance *perf; + + perf = per_cpu_ptr(acpi_perf_data, i); + rc = acpi_processor_register_performance(perf, i); + if (WARN_ON(rc)) + goto err_out; + } + rc = acpi_processor_notify_smm(THIS_MODULE); + if (WARN_ON(rc)) + goto err_unregister; + + for_each_possible_cpu(i) { + struct acpi_processor *_pr; + _pr = per_cpu(processors, i /* APIC ID */); + if (!_pr) + continue; + + if (!pr_backup) { + pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL); + memcpy(pr_backup, _pr, sizeof(struct acpi_processor)); + } + (void)upload_pm_data(_pr); + } + rc = check_acpi_ids(pr_backup); + if (rc) + goto err_unregister; + + kfree(pr_backup); + + return 0; +err_unregister: + for_each_possible_cpu(i) { + struct acpi_processor_performance *perf; + perf = per_cpu_ptr(acpi_perf_data, i); + acpi_processor_unregister_performance(perf, i); + } +err_out: + /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ + free_acpi_perf_data(); + kfree(acpi_ids_done); + return rc; +} +static void __exit xen_acpi_processor_exit(void) +{ + int i; + + kfree(acpi_ids_done); + for_each_possible_cpu(i) { + struct acpi_processor_performance *perf; + perf = per_cpu_ptr(acpi_perf_data, i); + acpi_processor_unregister_performance(perf, i); + } + free_acpi_perf_data(); +} + +MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>"); +MODULE_DESCRIPTION("Xen ACPI Processor P-states (and Cx) driver which uploads PM data to Xen hypervisor"); +MODULE_LICENSE("GPL"); + +/* We want to be loaded before the CPU freq scaling drivers are loaded. + * They are loaded in late_initcall. */ +device_initcall(xen_acpi_processor_init); +module_exit(xen_acpi_processor_exit); diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index 596e6a7..8f37e23 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -207,7 +207,7 @@ static struct attribute *balloon_info_attrs[] = { NULL }; -static struct attribute_group balloon_info_group = { +static const struct attribute_group balloon_info_group = { .name = "info", .attrs = balloon_info_attrs }; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 19834d1..097e536 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -85,19 +85,34 @@ static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev) static void pcistub_device_release(struct kref *kref) { struct pcistub_device *psdev; + struct xen_pcibk_dev_data *dev_data; psdev = container_of(kref, struct pcistub_device, kref); + dev_data = pci_get_drvdata(psdev->dev); dev_dbg(&psdev->dev->dev, "pcistub_device_release\n"); xen_unregister_device_domain_owner(psdev->dev); - /* Clean-up the device */ + /* Call the reset function which does not take lock as this + * is called from "unbind" which takes a device_lock mutex. + */ + __pci_reset_function_locked(psdev->dev); + if (pci_load_and_free_saved_state(psdev->dev, + &dev_data->pci_saved_state)) { + dev_dbg(&psdev->dev->dev, "Could not reload PCI state\n"); + } else + pci_restore_state(psdev->dev); + + /* Disable the device */ xen_pcibk_reset_device(psdev->dev); + + kfree(dev_data); + pci_set_drvdata(psdev->dev, NULL); + + /* Clean-up the device */ xen_pcibk_config_free_dyn_fields(psdev->dev); xen_pcibk_config_free_dev(psdev->dev); - kfree(pci_get_drvdata(psdev->dev)); - pci_set_drvdata(psdev->dev, NULL); psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; pci_dev_put(psdev->dev); @@ -231,7 +246,17 @@ void pcistub_put_pci_dev(struct pci_dev *dev) /* Cleanup our device * (so it's ready for the next domain) */ + + /* This is OK - we are running from workqueue context + * and want to inhibit the user from fiddling with 'reset' + */ + pci_reset_function(dev); + pci_restore_state(psdev->dev); + + /* This disables the device. */ xen_pcibk_reset_device(found_psdev->dev); + + /* And cleanup up our emulated fields. */ xen_pcibk_config_free_dyn_fields(found_psdev->dev); xen_pcibk_config_reset_dev(found_psdev->dev); @@ -328,6 +353,16 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) if (err) goto config_release; + dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); + __pci_reset_function_locked(dev); + + /* We need the device active to save the state. */ + dev_dbg(&dev->dev, "save state of device\n"); + pci_save_state(dev); + dev_data->pci_saved_state = pci_store_saved_state(dev); + if (!dev_data->pci_saved_state) + dev_err(&dev->dev, "Could not store PCI conf saved state!\n"); + /* Now disable the device (this also ensures some private device * data is setup before we export) */ diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index e9b4011..a7def01 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -41,6 +41,7 @@ struct xen_pcibk_device { struct xen_pcibk_dev_data { struct list_head config_fields; + struct pci_saved_state *pci_saved_state; unsigned int permissive:1; unsigned int warned_on_write:1; unsigned int enable_intx:1; diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 767ff65..146c948 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -488,7 +488,7 @@ static struct attribute *selfballoon_attrs[] = { NULL }; -static struct attribute_group selfballoon_group = { +static const struct attribute_group selfballoon_group = { .name = "selfballoon", .attrs = selfballoon_attrs }; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 566d2ad..b3e146e 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -569,7 +569,7 @@ int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, { struct gnttab_map_grant_ref op; - gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, + gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, dev->otherend_id); if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) @@ -662,7 +662,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) goto found; } } - node = NULL; + node = addr = NULL; found: spin_unlock(&xenbus_valloc_lock); @@ -698,7 +698,7 @@ int xenbus_unmap_ring(struct xenbus_device *dev, { struct gnttab_unmap_grant_ref op; - gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); + gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map, handle); if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 3864967..b793723 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -257,11 +257,12 @@ int xenbus_dev_remove(struct device *_dev) DPRINTK("%s", dev->nodename); free_otherend_watch(dev); - free_otherend_details(dev); if (drv->remove) drv->remove(dev); + free_otherend_details(dev); + xenbus_switch_state(dev, XenbusStateClosed); return 0; } diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 9c57819..f20c5f1 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -53,6 +53,12 @@ static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, char *nodename; int err; + /* ignore console/0 */ + if (!strncmp(type, "console", 7) && !strncmp(name, "0", 1)) { + DPRINTK("Ignoring buggy device entry console/0"); + return 0; + } + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); if (!nodename) return -ENOMEM; diff --git a/include/linux/pci.h b/include/linux/pci.h index 27bf521..900da5d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -817,6 +817,7 @@ int pcie_set_readrq(struct pci_dev *dev, int rq); int pcie_get_mps(struct pci_dev *dev); int pcie_set_mps(struct pci_dev *dev, int mps); int __pci_reset_function(struct pci_dev *dev); +int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h index 1888d8c..1b4f923 100644 --- a/include/xen/interface/hvm/params.h +++ b/include/xen/interface/hvm/params.h @@ -90,6 +90,10 @@ /* Boolean: Enable aligning all periodic vpts to reduce interrupts */ #define HVM_PARAM_VPT_ALIGN 16 -#define HVM_NR_PARAMS 17 +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +#define HVM_NR_PARAMS 19 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index c1080d9..0c28989 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -145,6 +145,13 @@ struct physdev_manage_pci { uint8_t devfn; }; +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + #define PHYSDEVOP_manage_pci_add_ext 20 struct physdev_manage_pci_ext { /* IN */ diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h index c168468..486653f 100644 --- a/include/xen/interface/platform.h +++ b/include/xen/interface/platform.h @@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t); #define XEN_PM_CX 0 #define XEN_PM_PX 1 #define XEN_PM_TX 2 - +#define XEN_PM_PDC 3 /* Px sub info type */ #define XEN_PX_PCT 1 #define XEN_PX_PSS 2 @@ -293,10 +293,27 @@ struct xenpf_set_processor_pminfo { union { struct xen_processor_power power;/* Cx: _CST/_CSD */ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ + GUEST_HANDLE(uint32_t) pdc; }; }; DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo); +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE 1 + /* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID 2 + uint32_t flags; + uint32_t apic_id; + uint32_t acpi_id; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo); + struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -312,6 +329,7 @@ struct xen_platform_op { struct xenpf_change_freq change_freq; struct xenpf_getidletime getidletime; struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_pcpuinfo pcpu_info; uint8_t pad[128]; } u; }; |