diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-02 22:09:10 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-02 22:09:10 -0700 |
commit | 56d92aa5cf7c96c70f81d0350c94faf46a9fb76d (patch) | |
tree | 2fb5d5b891903cada4dff9c581c70d33340a3769 /drivers | |
parent | 33c2a174120b2c1baec9d1dac513f9d4b761b26a (diff) | |
parent | c341ca45ce56143804ef5a8f4db753e554e640b4 (diff) | |
download | op-kernel-dev-56d92aa5cf7c96c70f81d0350c94faf46a9fb76d.zip op-kernel-dev-56d92aa5cf7c96c70f81d0350c94faf46a9fb76d.tar.gz |
Merge tag 'stable/for-linus-3.7-x86-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
Pull Xen update from Konrad Rzeszutek Wilk:
"Features:
- When hotplugging PCI devices in a PV guest we can allocate
Xen-SWIOTLB later.
- Cleanup Xen SWIOTLB.
- Support pages out grants from HVM domains in the backends.
- Support wild cards in xen-pciback.hide=(BDF) arguments.
- Update grant status updates with upstream hypervisor.
- Boot PV guests with more than 128GB.
- Cleanup Xen MMU code/add comments.
- Obtain XENVERS using a preferred method.
- Lay out generic changes to support Xen ARM.
- Allow privcmd ioctl for HVM (used to do only PV).
- Do v2 of mmap_batch for privcmd ioctls.
- If hypervisor saves the LED keyboard light - we will now instruct
the kernel about its state.
Fixes:
- More fixes to Xen PCI backend for various calls/FLR/etc.
- With more than 4GB in a 64-bit PV guest disable native SWIOTLB.
- Fix up smatch warnings.
- Fix up various return values in privmcmd and mm."
* tag 'stable/for-linus-3.7-x86-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: (48 commits)
xen/pciback: Restore the PCI config space after an FLR.
xen-pciback: properly clean up after calling pcistub_device_find()
xen/vga: add the xen EFI video mode support
xen/x86: retrieve keyboard shift status flags from hypervisor.
xen/gndev: Xen backend support for paged out grant targets V4.
xen-pciback: support wild cards in slot specifications
xen/swiotlb: Fix compile warnings when using plain integer instead of NULL pointer.
xen/swiotlb: Remove functions not needed anymore.
xen/pcifront: Use Xen-SWIOTLB when initting if required.
xen/swiotlb: For early initialization, return zero on success.
xen/swiotlb: Use the swiotlb_late_init_with_tbl to init Xen-SWIOTLB late when PV PCI is used.
xen/swiotlb: Move the error strings to its own function.
xen/swiotlb: Move the nr_tbl determination in its own function.
xen/arm: compile and run xenbus
xen: resynchronise grant table status codes with upstream
xen/privcmd: return -EFAULT on error
xen/privcmd: Fix mmap batch ioctl error status copy back.
xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
xen/mm: return more precise error from xen_remap_domain_range()
xen/mmu: If the revector fails, don't attempt to revector anything else.
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/xen-netback/netback.c | 11 | ||||
-rw-r--r-- | drivers/pci/xen-pcifront.c | 15 | ||||
-rw-r--r-- | drivers/tty/hvc/hvc_xen.c | 2 | ||||
-rw-r--r-- | drivers/xen/events.c | 18 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 2 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 67 | ||||
-rw-r--r-- | drivers/xen/privcmd.c | 135 | ||||
-rw-r--r-- | drivers/xen/swiotlb-xen.c | 119 | ||||
-rw-r--r-- | drivers/xen/sys-hypervisor.c | 13 | ||||
-rw-r--r-- | drivers/xen/tmem.c | 1 | ||||
-rw-r--r-- | drivers/xen/xen-pciback/pci_stub.c | 136 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_client.c | 6 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_comms.c | 2 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_dev_backend.c | 2 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe.c | 56 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_probe_frontend.c | 1 | ||||
-rw-r--r-- | drivers/xen/xenbus/xenbus_xs.c | 3 |
17 files changed, 435 insertions, 154 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 682633bf..05593d8 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -635,9 +635,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) return; BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op)); - ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op, - npo.copy_prod); - BUG_ON(ret != 0); + gnttab_batch_copy(netbk->grant_copy_op, npo.copy_prod); while ((skb = __skb_dequeue(&rxq)) != NULL) { sco = (struct skb_cb_overlay *)skb->cb; @@ -1460,18 +1458,15 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) static void xen_netbk_tx_action(struct xen_netbk *netbk) { unsigned nr_gops; - int ret; nr_gops = xen_netbk_tx_build_gops(netbk); if (nr_gops == 0) return; - ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, - netbk->tx_copy_ops, nr_gops); - BUG_ON(ret); - xen_netbk_tx_submit(netbk); + gnttab_batch_copy(netbk->tx_copy_ops, nr_gops); + xen_netbk_tx_submit(netbk); } static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index def8d0b..0aab85a 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -21,6 +21,7 @@ #include <linux/bitops.h> #include <linux/time.h> +#include <asm/xen/swiotlb-xen.h> #define INVALID_GRANT_REF (0) #define INVALID_EVTCHN (-1) @@ -236,7 +237,7 @@ static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, return errno_to_pcibios_err(do_pci_op(pdev, &op)); } -struct pci_ops pcifront_bus_ops = { +static struct pci_ops pcifront_bus_ops = { .read = pcifront_bus_read, .write = pcifront_bus_write, }; @@ -668,7 +669,7 @@ static irqreturn_t pcifront_handler_aer(int irq, void *dev) schedule_pcifront_aer_op(pdev); return IRQ_HANDLED; } -static int pcifront_connect(struct pcifront_device *pdev) +static int pcifront_connect_and_init_dma(struct pcifront_device *pdev) { int err = 0; @@ -681,9 +682,13 @@ static int pcifront_connect(struct pcifront_device *pdev) dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); err = -EEXIST; } - spin_unlock(&pcifront_dev_lock); + if (!err && !swiotlb_nr_tbl()) { + err = pci_xen_swiotlb_init_late(); + if (err) + dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n"); + } return err; } @@ -842,10 +847,10 @@ static int __devinit pcifront_try_connect(struct pcifront_device *pdev) XenbusStateInitialised) goto out; - err = pcifront_connect(pdev); + err = pcifront_connect_and_init_dma(pdev); if (err) { xenbus_dev_fatal(pdev->xdev, err, - "Error connecting PCI Frontend"); + "Error setting up PCI Frontend"); goto out; } diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 1e456dc..2944ff8 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -21,6 +21,7 @@ #include <linux/console.h> #include <linux/delay.h> #include <linux/err.h> +#include <linux/irq.h> #include <linux/init.h> #include <linux/types.h> #include <linux/list.h> @@ -35,6 +36,7 @@ #include <xen/page.h> #include <xen/events.h> #include <xen/interface/io/console.h> +#include <xen/interface/sched.h> #include <xen/hvc-console.h> #include <xen/xenbus.h> diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 7595581..c60d162 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -373,11 +373,22 @@ static void unmask_evtchn(int port) { struct shared_info *s = HYPERVISOR_shared_info; unsigned int cpu = get_cpu(); + int do_hypercall = 0, evtchn_pending = 0; BUG_ON(!irqs_disabled()); - /* Slow path (hypercall) if this is a non-local port. */ - if (unlikely(cpu != cpu_from_evtchn(port))) { + if (unlikely((cpu != cpu_from_evtchn(port)))) + do_hypercall = 1; + else + evtchn_pending = sync_test_bit(port, &s->evtchn_pending[0]); + + if (unlikely(evtchn_pending && xen_hvm_domain())) + do_hypercall = 1; + + /* Slow path (hypercall) if this is a non-local port or if this is + * an hvm domain and an event is pending (hvm domains don't have + * their own implementation of irq_enable). */ + if (do_hypercall) { struct evtchn_unmask unmask = { .port = port }; (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); } else { @@ -390,7 +401,7 @@ static void unmask_evtchn(int port) * 'hw_resend_irq'. Just like a real IO-APIC we 'lose * the interrupt edge' if the channel is masked. */ - if (sync_test_bit(port, &s->evtchn_pending[0]) && + if (evtchn_pending && !sync_test_and_set_bit(port / BITS_PER_LONG, &vcpu_info->evtchn_pending_sel)) vcpu_info->evtchn_upcall_pending = 1; @@ -831,6 +842,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) struct irq_info *info = info_for_irq(irq); WARN_ON(info == NULL || info->type != IRQT_EVTCHN); } + irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN); out: mutex_unlock(&irq_mapping_update_lock); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 7f12416..5df9fd8 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -446,7 +446,7 @@ static void mn_release(struct mmu_notifier *mn, spin_unlock(&priv->lock); } -struct mmu_notifier_ops gntdev_mmu_ops = { +static struct mmu_notifier_ops gntdev_mmu_ops = { .release = mn_release, .invalidate_page = mn_invl_page, .invalidate_range_start = mn_invl_range_start, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 0067266..b2b0a37 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -38,6 +38,7 @@ #include <linux/vmalloc.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/delay.h> #include <linux/hardirq.h> #include <xen/xen.h> @@ -47,6 +48,7 @@ #include <xen/interface/memory.h> #include <xen/hvc-console.h> #include <asm/xen/hypercall.h> +#include <asm/xen/interface.h> #include <asm/pgtable.h> #include <asm/sync_bitops.h> @@ -285,10 +287,9 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, - unsigned long frame, int flags, - unsigned page_off, - unsigned length) +static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, unsigned length) { gnttab_shared.v2[ref].sub_page.frame = frame; gnttab_shared.v2[ref].sub_page.page_off = page_off; @@ -345,9 +346,9 @@ bool gnttab_subpage_grants_available(void) } EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); -void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, - int flags, domid_t trans_domid, - grant_ref_t trans_gref) +static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) { gnttab_shared.v2[ref].transitive.trans_domid = trans_domid; gnttab_shared.v2[ref].transitive.gref = trans_gref; @@ -823,6 +824,52 @@ unsigned int gnttab_max_grant_frames(void) } EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); +/* Handling of paged out grant targets (GNTST_eagain) */ +#define MAX_DELAY 256 +static inline void +gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status, + const char *func) +{ + unsigned delay = 1; + + do { + BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1)); + if (*status == GNTST_eagain) + msleep(delay++); + } while ((*status == GNTST_eagain) && (delay < MAX_DELAY)); + + if (delay >= MAX_DELAY) { + printk(KERN_ERR "%s: %s eagain grant\n", func, current->comm); + *status = GNTST_bad_page; + } +} + +void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count) +{ + struct gnttab_map_grant_ref *op; + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count)) + BUG(); + for (op = batch; op < batch + count; op++) + if (op->status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op, + &op->status, __func__); +} +EXPORT_SYMBOL_GPL(gnttab_batch_map); + +void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) +{ + struct gnttab_copy *op; + + if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count)) + BUG(); + for (op = batch; op < batch + count; op++) + if (op->status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_copy, op, + &op->status, __func__); +} +EXPORT_SYMBOL_GPL(gnttab_batch_copy); + int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) @@ -836,6 +883,12 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (ret) return ret; + /* Retry eagain maps */ + for (i = 0; i < count; i++) + if (map_ops[i].status == GNTST_eagain) + gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, + &map_ops[i].status, __func__); + if (xen_feature(XENFEAT_auto_translated_physmap)) return ret; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index ccee0f1..ef63895 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages) */ static int gather_array(struct list_head *pagelist, unsigned nelem, size_t size, - void __user *data) + const void __user *data) { unsigned pageidx; void *pagedata; @@ -246,61 +246,117 @@ struct mmap_batch_state { domid_t domain; unsigned long va; struct vm_area_struct *vma; - int err; - - xen_pfn_t __user *user; + /* A tristate: + * 0 for no errors + * 1 if at least one error has happened (and no + * -ENOENT errors have happened) + * -ENOENT if at least 1 -ENOENT has happened. + */ + int global_error; + /* An array for individual errors */ + int *err; + + /* User-space mfn array to store errors in the second pass for V1. */ + xen_pfn_t __user *user_mfn; }; static int mmap_batch_fn(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; + int ret; + + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain); - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { - *mfnp |= 0xf0000000U; - st->err++; + /* Store error code for second pass. */ + *(st->err++) = ret; + + /* And see if it affects the global_error. */ + if (ret < 0) { + if (ret == -ENOENT) + st->global_error = -ENOENT; + else { + /* Record that at least one error has happened. */ + if (st->global_error == 0) + st->global_error = 1; + } } st->va += PAGE_SIZE; return 0; } -static int mmap_return_errors(void *data, void *state) +static int mmap_return_errors_v1(void *data, void *state) { xen_pfn_t *mfnp = data; struct mmap_batch_state *st = state; - - return put_user(*mfnp, st->user++); + int err = *(st->err++); + + /* + * V1 encodes the error codes in the 32bit top nibble of the + * mfn (with its known limitations vis-a-vis 64 bit callers). + */ + *mfnp |= (err == -ENOENT) ? + PRIVCMD_MMAPBATCH_PAGED_ERROR : + PRIVCMD_MMAPBATCH_MFN_ERROR; + return __put_user(*mfnp, st->user_mfn++); } static struct vm_operations_struct privcmd_vm_ops; -static long privcmd_ioctl_mmap_batch(void __user *udata) +static long privcmd_ioctl_mmap_batch(void __user *udata, int version) { int ret; - struct privcmd_mmapbatch m; + struct privcmd_mmapbatch_v2 m; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long nr_pages; LIST_HEAD(pagelist); + int *err_array = NULL; struct mmap_batch_state state; if (!xen_initial_domain()) return -EPERM; - if (copy_from_user(&m, udata, sizeof(m))) - return -EFAULT; + switch (version) { + case 1: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch))) + return -EFAULT; + /* Returns per-frame error in m.arr. */ + m.err = NULL; + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr))) + return -EFAULT; + break; + case 2: + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2))) + return -EFAULT; + /* Returns per-frame error code in m.err. */ + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err)))) + return -EFAULT; + break; + default: + return -EINVAL; + } nr_pages = m.num; if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) return -EINVAL; - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), - m.arr); + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr); - if (ret || list_empty(&pagelist)) + if (ret) goto out; + if (list_empty(&pagelist)) { + ret = -EINVAL; + goto out; + } + + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL); + if (err_array == NULL) { + ret = -ENOMEM; + goto out; + } down_write(&mm->mmap_sem); @@ -315,24 +371,37 @@ static long privcmd_ioctl_mmap_batch(void __user *udata) goto out; } - state.domain = m.dom; - state.vma = vma; - state.va = m.addr; - state.err = 0; + state.domain = m.dom; + state.vma = vma; + state.va = m.addr; + state.global_error = 0; + state.err = err_array; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_batch_fn, &state); + /* mmap_batch_fn guarantees ret == 0 */ + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state)); up_write(&mm->mmap_sem); - if (state.err > 0) { - state.user = m.arr; + if (state.global_error && (version == 1)) { + /* Write back errors in second pass. */ + state.user_mfn = (xen_pfn_t *)m.arr; + state.err = err_array; ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, - mmap_return_errors, &state); + &pagelist, mmap_return_errors_v1, &state); + } else if (version == 2) { + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int)); + if (ret) + ret = -EFAULT; } + /* If we have not had any EFAULT-like global errors then set the global + * error to -ENOENT if necessary. */ + if ((ret == 0) && (state.global_error == -ENOENT)) + ret = -ENOENT; + out: + kfree(err_array); free_page_list(&pagelist); return ret; @@ -354,7 +423,11 @@ static long privcmd_ioctl(struct file *file, break; case IOCTL_PRIVCMD_MMAPBATCH: - ret = privcmd_ioctl_mmap_batch(udata); + ret = privcmd_ioctl_mmap_batch(udata, 1); + break; + + case IOCTL_PRIVCMD_MMAPBATCH_V2: + ret = privcmd_ioctl_mmap_batch(udata, 2); break; default: @@ -380,10 +453,6 @@ static struct vm_operations_struct privcmd_vm_ops = { static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) { - /* Unsupported for auto-translate guests. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -ENOSYS; - /* DONTCOPY is essential for Xen because copy_page_range doesn't know * how to recreate these mappings */ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 4d51948..58db6df 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -52,7 +52,7 @@ static unsigned long xen_io_tlb_nslabs; * Quick lookup value of the bus address of the IOTLB. */ -u64 start_dma_addr; +static u64 start_dma_addr; static dma_addr_t xen_phys_to_bus(phys_addr_t paddr) { @@ -144,31 +144,72 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) } while (i < nslabs); return 0; } +static unsigned long xen_set_nslabs(unsigned long nr_tbl) +{ + if (!nr_tbl) { + xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); + xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); + } else + xen_io_tlb_nslabs = nr_tbl; -void __init xen_swiotlb_init(int verbose) + return xen_io_tlb_nslabs << IO_TLB_SHIFT; +} + +enum xen_swiotlb_err { + XEN_SWIOTLB_UNKNOWN = 0, + XEN_SWIOTLB_ENOMEM, + XEN_SWIOTLB_EFIXUP +}; + +static const char *xen_swiotlb_error(enum xen_swiotlb_err err) +{ + switch (err) { + case XEN_SWIOTLB_ENOMEM: + return "Cannot allocate Xen-SWIOTLB buffer\n"; + case XEN_SWIOTLB_EFIXUP: + return "Failed to get contiguous memory for DMA from Xen!\n"\ + "You either: don't have the permissions, do not have"\ + " enough free memory under 4GB, or the hypervisor memory"\ + " is too fragmented!"; + default: + break; + } + return ""; +} +int __ref xen_swiotlb_init(int verbose, bool early) { - unsigned long bytes; + unsigned long bytes, order; int rc = -ENOMEM; - unsigned long nr_tbl; - char *m = NULL; + enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN; unsigned int repeat = 3; - nr_tbl = swiotlb_nr_tbl(); - if (nr_tbl) - xen_io_tlb_nslabs = nr_tbl; - else { - xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); - xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); - } + xen_io_tlb_nslabs = swiotlb_nr_tbl(); retry: - bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; - + bytes = xen_set_nslabs(xen_io_tlb_nslabs); + order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT); /* * Get IO TLB memory from any location. */ - xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); + if (early) + xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); + else { +#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) +#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) + while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { + xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order); + if (xen_io_tlb_start) + break; + order--; + } + if (order != get_order(bytes)) { + pr_warn("Warning: only able to allocate %ld MB " + "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + xen_io_tlb_nslabs = SLABS_PER_PAGE << order; + bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; + } + } if (!xen_io_tlb_start) { - m = "Cannot allocate Xen-SWIOTLB buffer!\n"; + m_ret = XEN_SWIOTLB_ENOMEM; goto error; } xen_io_tlb_end = xen_io_tlb_start + bytes; @@ -179,17 +220,22 @@ retry: bytes, xen_io_tlb_nslabs); if (rc) { - free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); - m = "Failed to get contiguous memory for DMA from Xen!\n"\ - "You either: don't have the permissions, do not have"\ - " enough free memory under 4GB, or the hypervisor memory"\ - "is too fragmented!"; + if (early) + free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); + else { + free_pages((unsigned long)xen_io_tlb_start, order); + xen_io_tlb_start = NULL; + } + m_ret = XEN_SWIOTLB_EFIXUP; goto error; } start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); - swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); - - return; + if (early) { + swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); + rc = 0; + } else + rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs); + return rc; error: if (repeat--) { xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ @@ -198,10 +244,13 @@ error: (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); goto retry; } - xen_raw_printk("%s (rc:%d)", m, rc); - panic("%s (rc:%d)", m, rc); + pr_err("%s (rc:%d)", xen_swiotlb_error(m_ret), rc); + if (early) + panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc); + else + free_pages((unsigned long)xen_io_tlb_start, order); + return rc; } - void * xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags, @@ -466,14 +515,6 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, } EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg_attrs); -int -xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - return xen_swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); -} -EXPORT_SYMBOL_GPL(xen_swiotlb_map_sg); - /* * Unmap a set of streaming mode DMA translations. Again, cpu read rules * concerning calls here are the same as for swiotlb_unmap_page() above. @@ -494,14 +535,6 @@ xen_swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, } EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg_attrs); -void -xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - return xen_swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); -} -EXPORT_SYMBOL_GPL(xen_swiotlb_unmap_sg); - /* * Make physical memory consistent for a set of streaming mode DMA translations * after a transfer. diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index fdb6d22..5e5ad7e2 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -114,7 +114,7 @@ static void xen_sysfs_version_destroy(void) /* UUID */ -static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +static ssize_t uuid_show_fallback(struct hyp_sysfs_attr *attr, char *buffer) { char *vm, *val; int ret; @@ -135,6 +135,17 @@ static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) return ret; } +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + xen_domain_handle_t uuid; + int ret; + ret = HYPERVISOR_xen_version(XENVER_guest_handle, uuid); + if (ret) + return uuid_show_fallback(attr, buffer); + ret = sprintf(buffer, "%pU\n", uuid); + return ret; +} + HYPERVISOR_ATTR_RO(uuid); static int __init xen_sysfs_uuid_init(void) diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 89f264c..144564e 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -21,6 +21,7 @@ #include <asm/xen/hypercall.h> #include <asm/xen/page.h> #include <asm/xen/hypervisor.h> +#include <xen/tmem.h> #define TMEM_CONTROL 0 #define TMEM_NEW_POOL 1 diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 92ff01d..961d664 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -362,6 +362,7 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) else { dev_dbg(&dev->dev, "reseting (FLR, D3, etc) the device\n"); __pci_reset_function_locked(dev); + pci_restore_state(dev); } /* Now disable the device (this also ensures some private device * data is setup before we export) @@ -681,14 +682,14 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev) dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } if (!test_bit(_XEN_PCIB_AERHANDLER, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_err(&dev->dev, "guest with no AER driver should have been killed\n"); - goto release; + goto end; } result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result); @@ -698,9 +699,9 @@ static pci_ers_result_t xen_pcibk_slot_reset(struct pci_dev *dev) "No AER slot_reset service or disconnected!\n"); kill_domain_by_device(psdev); } -release: - pcistub_device_put(psdev); end: + if (psdev) + pcistub_device_put(psdev); up_write(&pcistub_sem); return result; @@ -739,14 +740,14 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev) dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } if (!test_bit(_XEN_PCIB_AERHANDLER, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_err(&dev->dev, "guest with no AER driver should have been killed\n"); - goto release; + goto end; } result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result); @@ -756,9 +757,9 @@ static pci_ers_result_t xen_pcibk_mmio_enabled(struct pci_dev *dev) "No AER mmio_enabled service or disconnected!\n"); kill_domain_by_device(psdev); } -release: - pcistub_device_put(psdev); end: + if (psdev) + pcistub_device_put(psdev); up_write(&pcistub_sem); return result; } @@ -797,7 +798,7 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } /*Guest owns the device yet no aer handler regiested, kill guest*/ @@ -805,7 +806,7 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result); @@ -815,9 +816,9 @@ static pci_ers_result_t xen_pcibk_error_detected(struct pci_dev *dev, "No AER error_detected service or disconnected!\n"); kill_domain_by_device(psdev); } -release: - pcistub_device_put(psdev); end: + if (psdev) + pcistub_device_put(psdev); up_write(&pcistub_sem); return result; } @@ -851,7 +852,7 @@ static void xen_pcibk_error_resume(struct pci_dev *dev) dev_err(&dev->dev, DRV_NAME " device is not connected or owned" " by HVM, kill it\n"); kill_domain_by_device(psdev); - goto release; + goto end; } if (!test_bit(_XEN_PCIB_AERHANDLER, @@ -859,13 +860,13 @@ static void xen_pcibk_error_resume(struct pci_dev *dev) dev_err(&dev->dev, "guest with no AER driver should have been killed\n"); kill_domain_by_device(psdev); - goto release; + goto end; } common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED); -release: - pcistub_device_put(psdev); end: + if (psdev) + pcistub_device_put(psdev); up_write(&pcistub_sem); return; } @@ -897,17 +898,41 @@ static inline int str_to_slot(const char *buf, int *domain, int *bus, int *slot, int *func) { int err; + char wc = '*'; err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func); - if (err == 4) + switch (err) { + case 3: + *func = -1; + err = sscanf(buf, " %x:%x:%x.%c", domain, bus, slot, &wc); + break; + case 2: + *slot = *func = -1; + err = sscanf(buf, " %x:%x:*.%c", domain, bus, &wc); + if (err >= 2) + ++err; + break; + } + if (err == 4 && wc == '*') return 0; else if (err < 0) return -EINVAL; /* try again without domain */ *domain = 0; + wc = '*'; err = sscanf(buf, " %x:%x.%x", bus, slot, func); - if (err == 3) + switch (err) { + case 2: + *func = -1; + err = sscanf(buf, " %x:%x.%c", bus, slot, &wc); + break; + case 1: + *slot = *func = -1; + err = sscanf(buf, " %x:*.%c", bus, &wc) + 1; + break; + } + if (err == 3 && wc == '*') return 0; return -EINVAL; @@ -930,6 +955,19 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) { struct pcistub_device_id *pci_dev_id; unsigned long flags; + int rc = 0; + + if (slot < 0) { + for (slot = 0; !rc && slot < 32; ++slot) + rc = pcistub_device_id_add(domain, bus, slot, func); + return rc; + } + + if (func < 0) { + for (func = 0; !rc && func < 8; ++func) + rc = pcistub_device_id_add(domain, bus, slot, func); + return rc; + } pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL); if (!pci_dev_id) @@ -952,15 +990,15 @@ static int pcistub_device_id_add(int domain, int bus, int slot, int func) static int pcistub_device_id_remove(int domain, int bus, int slot, int func) { struct pcistub_device_id *pci_dev_id, *t; - int devfn = PCI_DEVFN(slot, func); int err = -ENOENT; unsigned long flags; spin_lock_irqsave(&device_ids_lock, flags); list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) { - if (pci_dev_id->domain == domain - && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) { + if (pci_dev_id->domain == domain && pci_dev_id->bus == bus + && (slot < 0 || PCI_SLOT(pci_dev_id->devfn) == slot) + && (func < 0 || PCI_FUNC(pci_dev_id->devfn) == func)) { /* Don't break; here because it's possible the same * slot could be in the list more than once */ @@ -987,7 +1025,7 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, struct config_field *field; psdev = pcistub_device_find(domain, bus, slot, func); - if (!psdev || !psdev->dev) { + if (!psdev) { err = -ENODEV; goto out; } @@ -1011,6 +1049,8 @@ static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg, if (err) kfree(field); out: + if (psdev) + pcistub_device_put(psdev); return err; } @@ -1115,10 +1155,9 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, err = str_to_slot(buf, &domain, &bus, &slot, &func); if (err) - goto out; + return err; psdev = pcistub_device_find(domain, bus, slot, func); - if (!psdev) goto out; @@ -1134,6 +1173,8 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, if (dev_data->isr_on) dev_data->ack_intr = 1; out: + if (psdev) + pcistub_device_put(psdev); if (!err) err = count; return err; @@ -1216,15 +1257,16 @@ static ssize_t permissive_add(struct device_driver *drv, const char *buf, err = str_to_slot(buf, &domain, &bus, &slot, &func); if (err) goto out; + if (slot < 0 || func < 0) { + err = -EINVAL; + goto out; + } psdev = pcistub_device_find(domain, bus, slot, func); if (!psdev) { err = -ENODEV; goto out; } - if (!psdev->dev) { - err = -ENODEV; - goto release; - } + dev_data = pci_get_drvdata(psdev->dev); /* the driver data for a device should never be null at this point */ if (!dev_data) { @@ -1297,17 +1339,51 @@ static int __init pcistub_init(void) if (pci_devs_to_hide && *pci_devs_to_hide) { do { + char wc = '*'; + parsed = 0; err = sscanf(pci_devs_to_hide + pos, " (%x:%x:%x.%x) %n", &domain, &bus, &slot, &func, &parsed); - if (err != 4) { + switch (err) { + case 3: + func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x:%x.%c) %n", + &domain, &bus, &slot, &wc, + &parsed); + break; + case 2: + slot = func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x:*.%c) %n", + &domain, &bus, &wc, &parsed) + 1; + break; + } + + if (err != 4 || wc != '*') { domain = 0; + wc = '*'; err = sscanf(pci_devs_to_hide + pos, " (%x:%x.%x) %n", &bus, &slot, &func, &parsed); - if (err != 3) + switch (err) { + case 2: + func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:%x.%c) %n", + &bus, &slot, &wc, + &parsed); + break; + case 1: + slot = func = -1; + err = sscanf(pci_devs_to_hide + pos, + " (%x:*.%c) %n", + &bus, &wc, &parsed) + 1; + break; + } + if (err != 3 || wc != '*') goto parse_error; } diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index b3e146e..bcf3ba4 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -490,8 +490,7 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, op.host_addr = arbitrary_virt_to_machine(pte).maddr; - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); + gnttab_batch_map(&op, 1); if (op.status != GNTST_okay) { free_vm_area(area); @@ -572,8 +571,7 @@ int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map, gnt_ref, dev->otherend_id); - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); + gnttab_batch_map(&op, 1); if (op.status != GNTST_okay) { xenbus_dev_fatal(dev, op.status, diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 52fe7ad..c5aa55c 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -224,7 +224,7 @@ int xb_init_comms(void) int err; err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, 0, "xenbus", &xb_waitq); - if (err <= 0) { + if (err < 0) { printk(KERN_ERR "XENBUS request irq failed %i\n", err); return err; } diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c index be738c4..d730008 100644 --- a/drivers/xen/xenbus/xenbus_dev_backend.c +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -107,7 +107,7 @@ static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -const struct file_operations xenbus_backend_fops = { +static const struct file_operations xenbus_backend_fops = { .open = xenbus_backend_open, .mmap = xenbus_backend_mmap, .unlocked_ioctl = xenbus_backend_ioctl, diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index b793723..038b71d 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -324,8 +324,8 @@ static int cmp_dev(struct device *dev, void *data) return 0; } -struct xenbus_device *xenbus_device_find(const char *nodename, - struct bus_type *bus) +static struct xenbus_device *xenbus_device_find(const char *nodename, + struct bus_type *bus) { struct xb_find_info info = { .dev = NULL, .nodename = nodename }; @@ -719,17 +719,47 @@ static int __init xenstored_local_init(void) return err; } +enum xenstore_init { + UNKNOWN, + PV, + HVM, + LOCAL, +}; static int __init xenbus_init(void) { int err = 0; + enum xenstore_init usage = UNKNOWN; + uint64_t v = 0; if (!xen_domain()) return -ENODEV; xenbus_ring_ops_init(); - if (xen_hvm_domain()) { - uint64_t v = 0; + if (xen_pv_domain()) + usage = PV; + if (xen_hvm_domain()) + usage = HVM; + if (xen_hvm_domain() && xen_initial_domain()) + usage = LOCAL; + if (xen_pv_domain() && !xen_start_info->store_evtchn) + usage = LOCAL; + if (xen_pv_domain() && xen_start_info->store_evtchn) + xenstored_ready = 1; + + switch (usage) { + case LOCAL: + err = xenstored_local_init(); + if (err) + goto out_error; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case PV: + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); + break; + case HVM: err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); if (err) goto out_error; @@ -738,18 +768,12 @@ static int __init xenbus_init(void) if (err) goto out_error; xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - if (xen_store_evtchn) - xenstored_ready = 1; - else { - err = xenstored_local_init(); - if (err) - goto out_error; - } - xen_store_interface = mfn_to_virt(xen_store_mfn); + xen_store_interface = + ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); + break; + default: + pr_warn("Xenstore state unknown\n"); + break; } /* Initialize the interface to xenstore. */ diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index a31b54d..3159a37 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -21,6 +21,7 @@ #include <xen/xenbus.h> #include <xen/events.h> #include <xen/page.h> +#include <xen/xen.h> #include <xen/platform_pci.h> diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index bce15cf..131dec0 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -44,6 +44,7 @@ #include <linux/rwsem.h> #include <linux/module.h> #include <linux/mutex.h> +#include <asm/xen/hypervisor.h> #include <xen/xenbus.h> #include <xen/xen.h> #include "xenbus_comms.h" @@ -622,7 +623,7 @@ static void xs_reset_watches(void) { int err, supported = 0; - if (!xen_hvm_domain()) + if (!xen_hvm_domain() || xen_initial_domain()) return; err = xenbus_scanf(XBT_NIL, "control", |