diff options
author | Stefano Stabellini <stefano.stabellini@eu.citrix.com> | 2010-10-22 21:24:06 +0100 |
---|---|---|
committer | Stefano Stabellini <stefano.stabellini@eu.citrix.com> | 2010-10-22 21:24:06 +0100 |
commit | 67ba37293e938208795d6a3562201bdb0cf43393 (patch) | |
tree | 3522e949ff19f3809583bfb3fa3973ddf264689d /drivers/pci | |
parent | cd07202cc8262e1669edff0d97715f3dd9260917 (diff) | |
parent | 5bba6c56dc99ff88f79a79572e29ecf445710878 (diff) | |
download | op-kernel-dev-67ba37293e938208795d6a3562201bdb0cf43393.zip op-kernel-dev-67ba37293e938208795d6a3562201bdb0cf43393.tar.gz |
Merge commit 'konrad/stable/xen-pcifront-0.8.2' into 2.6.36-rc8-initial-domain-v6
Diffstat (limited to 'drivers/pci')
-rw-r--r-- | drivers/pci/Kconfig | 21 | ||||
-rw-r--r-- | drivers/pci/Makefile | 2 | ||||
-rw-r--r-- | drivers/pci/bus.c | 1 | ||||
-rw-r--r-- | drivers/pci/dmar.c | 8 | ||||
-rw-r--r-- | drivers/pci/htirq.c | 22 | ||||
-rw-r--r-- | drivers/pci/intr_remapping.c | 212 | ||||
-rw-r--r-- | drivers/pci/msi.c | 52 | ||||
-rw-r--r-- | drivers/pci/xen-pcifront.c | 1148 |
8 files changed, 1239 insertions, 227 deletions
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 34ef70d..5b1630e 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -40,6 +40,27 @@ config PCI_STUB When in doubt, say N. +config XEN_PCIDEV_FRONTEND + tristate "Xen PCI Frontend" + depends on PCI && X86 && XEN + select HOTPLUG + select PCI_XEN + default y + help + The PCI device frontend driver allows the kernel to import arbitrary + PCI devices from a PCI backend to support PCI driver domains. + +config XEN_PCIDEV_FE_DEBUG + bool "Xen PCI Frontend debugging" + depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG + help + Say Y here if you want the Xen PCI frontend to produce a bunch of debug + messages to the system log. Select this if you are having a + problem with Xen PCI frontend support and want to see more of what is + going on. + + When in doubt, say N. + config HT_IRQ bool "Interrupts on hypertransport devices" default y diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index dc1aa09..d5e2705 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -65,6 +65,8 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o obj-$(CONFIG_PCI_STUB) += pci-stub.o +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o + ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 7f0af0e..69546e9 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -299,6 +299,7 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), } up_read(&pci_bus_sem); } +EXPORT_SYMBOL_GPL(pci_walk_bus); EXPORT_SYMBOL(pci_bus_alloc_resource); EXPORT_SYMBOL_GPL(pci_bus_add_device); diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 0a19708..3de3a43 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -1221,9 +1221,9 @@ const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) } } -void dmar_msi_unmask(unsigned int irq) +void dmar_msi_unmask(struct irq_data *data) { - struct intel_iommu *iommu = get_irq_data(irq); + struct intel_iommu *iommu = irq_data_get_irq_data(data); unsigned long flag; /* unmask it */ @@ -1234,10 +1234,10 @@ void dmar_msi_unmask(unsigned int irq) spin_unlock_irqrestore(&iommu->register_lock, flag); } -void dmar_msi_mask(unsigned int irq) +void dmar_msi_mask(struct irq_data *data) { unsigned long flag; - struct intel_iommu *iommu = get_irq_data(irq); + struct intel_iommu *iommu = irq_data_get_irq_data(data); /* mask it */ spin_lock_irqsave(&iommu->register_lock, flag); diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c index 98abf8b..834842a 100644 --- a/drivers/pci/htirq.c +++ b/drivers/pci/htirq.c @@ -57,28 +57,22 @@ void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg) *msg = cfg->msg; } -void mask_ht_irq(unsigned int irq) +void mask_ht_irq(struct irq_data *data) { - struct ht_irq_cfg *cfg; - struct ht_irq_msg msg; - - cfg = get_irq_data(irq); + struct ht_irq_cfg *cfg = irq_data_get_irq_data(data); + struct ht_irq_msg msg = cfg->msg; - msg = cfg->msg; msg.address_lo |= 1; - write_ht_irq_msg(irq, &msg); + write_ht_irq_msg(data->irq, &msg); } -void unmask_ht_irq(unsigned int irq) +void unmask_ht_irq(struct irq_data *data) { - struct ht_irq_cfg *cfg; - struct ht_irq_msg msg; - - cfg = get_irq_data(irq); + struct ht_irq_cfg *cfg = irq_data_get_irq_data(data); + struct ht_irq_msg msg = cfg->msg; - msg = cfg->msg; msg.address_lo &= ~1; - write_ht_irq_msg(irq, &msg); + write_ht_irq_msg(data->irq, &msg); } /** diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index fd1d286..ec87cd6 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -46,109 +46,24 @@ static __init int setup_intremap(char *str) } early_param("intremap", setup_intremap); -struct irq_2_iommu { - struct intel_iommu *iommu; - u16 irte_index; - u16 sub_handle; - u8 irte_mask; -}; - -#ifdef CONFIG_GENERIC_HARDIRQS -static struct irq_2_iommu *get_one_free_irq_2_iommu(int node) -{ - struct irq_2_iommu *iommu; - - iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); - printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node); - - return iommu; -} - -static struct irq_2_iommu *irq_2_iommu(unsigned int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - if (WARN_ON_ONCE(!desc)) - return NULL; - - return desc->irq_2_iommu; -} - -static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) -{ - struct irq_desc *desc; - struct irq_2_iommu *irq_iommu; - - desc = irq_to_desc(irq); - if (!desc) { - printk(KERN_INFO "can not get irq_desc for %d\n", irq); - return NULL; - } - - irq_iommu = desc->irq_2_iommu; - - if (!irq_iommu) - desc->irq_2_iommu = get_one_free_irq_2_iommu(irq_node(irq)); - - return desc->irq_2_iommu; -} - -#else /* !CONFIG_SPARSE_IRQ */ - -static struct irq_2_iommu irq_2_iommuX[NR_IRQS]; - -static struct irq_2_iommu *irq_2_iommu(unsigned int irq) -{ - if (irq < nr_irqs) - return &irq_2_iommuX[irq]; - - return NULL; -} -static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) -{ - return irq_2_iommu(irq); -} -#endif - static DEFINE_SPINLOCK(irq_2_ir_lock); -static struct irq_2_iommu *valid_irq_2_iommu(unsigned int irq) -{ - struct irq_2_iommu *irq_iommu; - - irq_iommu = irq_2_iommu(irq); - - if (!irq_iommu) - return NULL; - - if (!irq_iommu->iommu) - return NULL; - - return irq_iommu; -} - -int irq_remapped(int irq) +static struct irq_2_iommu *irq_2_iommu(unsigned int irq) { - return valid_irq_2_iommu(irq) != NULL; + struct irq_cfg *cfg = get_irq_chip_data(irq); + return cfg ? &cfg->irq_2_iommu : NULL; } int get_irte(int irq, struct irte *entry) { - int index; - struct irq_2_iommu *irq_iommu; + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); unsigned long flags; + int index; - if (!entry) + if (!entry || !irq_iommu) return -1; spin_lock_irqsave(&irq_2_ir_lock, flags); - irq_iommu = valid_irq_2_iommu(irq); - if (!irq_iommu) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); - return -1; - } index = irq_iommu->irte_index + irq_iommu->sub_handle; *entry = *(irq_iommu->iommu->ir_table->base + index); @@ -160,20 +75,14 @@ int get_irte(int irq, struct irte *entry) int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) { struct ir_table *table = iommu->ir_table; - struct irq_2_iommu *irq_iommu; + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); u16 index, start_index; unsigned int mask = 0; unsigned long flags; int i; - if (!count) - return -1; - -#ifndef CONFIG_SPARSE_IRQ - /* protect irq_2_iommu_alloc later */ - if (irq >= nr_irqs) + if (!count || !irq_iommu) return -1; -#endif /* * start the IRTE search from index 0. @@ -214,13 +123,6 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) for (i = index; i < index + count; i++) table->base[i].present = 1; - irq_iommu = irq_2_iommu_alloc(irq); - if (!irq_iommu) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); - printk(KERN_ERR "can't allocate irq_2_iommu\n"); - return -1; - } - irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; @@ -244,17 +146,14 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) int map_irq_to_irte_handle(int irq, u16 *sub_handle) { - int index; - struct irq_2_iommu *irq_iommu; + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); unsigned long flags; + int index; - spin_lock_irqsave(&irq_2_ir_lock, flags); - irq_iommu = valid_irq_2_iommu(irq); - if (!irq_iommu) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + if (!irq_iommu) return -1; - } + spin_lock_irqsave(&irq_2_ir_lock, flags); *sub_handle = irq_iommu->sub_handle; index = irq_iommu->irte_index; spin_unlock_irqrestore(&irq_2_ir_lock, flags); @@ -263,18 +162,13 @@ int map_irq_to_irte_handle(int irq, u16 *sub_handle) int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) { - struct irq_2_iommu *irq_iommu; + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); unsigned long flags; - spin_lock_irqsave(&irq_2_ir_lock, flags); - - irq_iommu = irq_2_iommu_alloc(irq); - - if (!irq_iommu) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); - printk(KERN_ERR "can't allocate irq_2_iommu\n"); + if (!irq_iommu) return -1; - } + + spin_lock_irqsave(&irq_2_ir_lock, flags); irq_iommu->iommu = iommu; irq_iommu->irte_index = index; @@ -286,43 +180,18 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) return 0; } -int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) -{ - struct irq_2_iommu *irq_iommu; - unsigned long flags; - - spin_lock_irqsave(&irq_2_ir_lock, flags); - irq_iommu = valid_irq_2_iommu(irq); - if (!irq_iommu) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); - return -1; - } - - irq_iommu->iommu = NULL; - irq_iommu->irte_index = 0; - irq_iommu->sub_handle = 0; - irq_2_iommu(irq)->irte_mask = 0; - - spin_unlock_irqrestore(&irq_2_ir_lock, flags); - - return 0; -} - int modify_irte(int irq, struct irte *irte_modified) { - int rc; - int index; - struct irte *irte; + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); struct intel_iommu *iommu; - struct irq_2_iommu *irq_iommu; unsigned long flags; + struct irte *irte; + int rc, index; - spin_lock_irqsave(&irq_2_ir_lock, flags); - irq_iommu = valid_irq_2_iommu(irq); - if (!irq_iommu) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + if (!irq_iommu) return -1; - } + + spin_lock_irqsave(&irq_2_ir_lock, flags); iommu = irq_iommu->iommu; @@ -339,31 +208,6 @@ int modify_irte(int irq, struct irte *irte_modified) return rc; } -int flush_irte(int irq) -{ - int rc; - int index; - struct intel_iommu *iommu; - struct irq_2_iommu *irq_iommu; - unsigned long flags; - - spin_lock_irqsave(&irq_2_ir_lock, flags); - irq_iommu = valid_irq_2_iommu(irq); - if (!irq_iommu) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); - return -1; - } - - iommu = irq_iommu->iommu; - - index = irq_iommu->irte_index + irq_iommu->sub_handle; - - rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); - spin_unlock_irqrestore(&irq_2_ir_lock, flags); - - return rc; -} - struct intel_iommu *map_hpet_to_ir(u8 hpet_id) { int i; @@ -420,16 +264,14 @@ static int clear_entries(struct irq_2_iommu *irq_iommu) int free_irte(int irq) { - int rc = 0; - struct irq_2_iommu *irq_iommu; + struct irq_2_iommu *irq_iommu = irq_2_iommu(irq); unsigned long flags; + int rc; - spin_lock_irqsave(&irq_2_ir_lock, flags); - irq_iommu = valid_irq_2_iommu(irq); - if (!irq_iommu) { - spin_unlock_irqrestore(&irq_2_ir_lock, flags); + if (!irq_iommu) return -1; - } + + spin_lock_irqsave(&irq_2_ir_lock, flags); rc = clear_entries(irq_iommu); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 69b7be3..7c24dce 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -35,7 +35,12 @@ int arch_msi_check_device(struct pci_dev *dev, int nvec, int type) #endif #ifndef arch_setup_msi_irqs -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +# define arch_setup_msi_irqs default_setup_msi_irqs +# define HAVE_DEFAULT_MSI_SETUP_IRQS +#endif + +#ifdef HAVE_DEFAULT_MSI_SETUP_IRQS +int default_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { struct msi_desc *entry; int ret; @@ -60,7 +65,12 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) #endif #ifndef arch_teardown_msi_irqs -void arch_teardown_msi_irqs(struct pci_dev *dev) +# define arch_teardown_msi_irqs default_teardown_msi_irqs +# define HAVE_DEFAULT_MSI_TEARDOWN_IRQS +#endif + +#ifdef HAVE_DEFAULT_MSI_TEARDOWN_IRQS +void default_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; @@ -170,33 +180,31 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag) desc->masked = __msix_mask_irq(desc, flag); } -static void msi_set_mask_bit(unsigned irq, u32 flag) +static void msi_set_mask_bit(struct irq_data *data, u32 flag) { - struct msi_desc *desc = get_irq_msi(irq); + struct msi_desc *desc = irq_data_get_msi(data); if (desc->msi_attrib.is_msix) { msix_mask_irq(desc, flag); readl(desc->mask_base); /* Flush write to device */ } else { - unsigned offset = irq - desc->dev->irq; + unsigned offset = data->irq - desc->dev->irq; msi_mask_irq(desc, 1 << offset, flag << offset); } } -void mask_msi_irq(unsigned int irq) +void mask_msi_irq(struct irq_data *data) { - msi_set_mask_bit(irq, 1); + msi_set_mask_bit(data, 1); } -void unmask_msi_irq(unsigned int irq) +void unmask_msi_irq(struct irq_data *data) { - msi_set_mask_bit(irq, 0); + msi_set_mask_bit(data, 0); } -void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_desc_msi(desc); - BUG_ON(entry->dev->current_state != PCI_D0); if (entry->msi_attrib.is_msix) { @@ -227,15 +235,13 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) void read_msi_msg(unsigned int irq, struct msi_msg *msg) { - struct irq_desc *desc = irq_to_desc(irq); + struct msi_desc *entry = get_irq_msi(irq); - read_msi_msg_desc(desc, msg); + __read_msi_msg(entry, msg); } -void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_desc_msi(desc); - /* Assert that the cache is valid, assuming that * valid messages are not all-zeroes. */ BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | @@ -246,15 +252,13 @@ void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) { - struct irq_desc *desc = irq_to_desc(irq); + struct msi_desc *entry = get_irq_msi(irq); - get_cached_msi_msg_desc(desc, msg); + __get_cached_msi_msg(entry, msg); } -void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_desc_msi(desc); - if (entry->dev->current_state != PCI_D0) { /* Don't touch the hardware now */ } else if (entry->msi_attrib.is_msix) { @@ -292,9 +296,9 @@ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) void write_msi_msg(unsigned int irq, struct msi_msg *msg) { - struct irq_desc *desc = irq_to_desc(irq); + struct msi_desc *entry = get_irq_msi(irq); - write_msi_msg_desc(desc, msg); + __write_msi_msg(entry, msg); } static void free_msi_irqs(struct pci_dev *dev) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c new file mode 100644 index 0000000..a87c498 --- /dev/null +++ b/drivers/pci/xen-pcifront.c @@ -0,0 +1,1148 @@ +/* + * Xen PCI Frontend. + * + * Author: Ryan Wilson <hap9@epoch.ncsc.mil> + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/grant_table.h> +#include <xen/page.h> +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/msi.h> +#include <xen/xenbus.h> +#include <xen/interface/io/pciif.h> +#include <asm/xen/pci.h> +#include <linux/interrupt.h> +#include <asm/atomic.h> +#include <linux/workqueue.h> +#include <linux/bitops.h> +#include <linux/time.h> + +#define INVALID_GRANT_REF (0) +#define INVALID_EVTCHN (-1) + +struct pci_bus_entry { + struct list_head list; + struct pci_bus *bus; +}; + +#define _PDEVB_op_active (0) +#define PDEVB_op_active (1 << (_PDEVB_op_active)) + +struct pcifront_device { + struct xenbus_device *xdev; + struct list_head root_buses; + + int evtchn; + int gnt_ref; + + int irq; + + /* Lock this when doing any operations in sh_info */ + spinlock_t sh_info_lock; + struct xen_pci_sharedinfo *sh_info; + struct work_struct op_work; + unsigned long flags; + +}; + +struct pcifront_sd { + int domain; + struct pcifront_device *pdev; +}; + +static inline struct pcifront_device * +pcifront_get_pdev(struct pcifront_sd *sd) +{ + return sd->pdev; +} + +static inline void pcifront_init_sd(struct pcifront_sd *sd, + unsigned int domain, unsigned int bus, + struct pcifront_device *pdev) +{ + sd->domain = domain; + sd->pdev = pdev; +} + +static DEFINE_SPINLOCK(pcifront_dev_lock); +static struct pcifront_device *pcifront_dev; + +static int verbose_request; +module_param(verbose_request, int, 0644); + +static int errno_to_pcibios_err(int errno) +{ + switch (errno) { + case XEN_PCI_ERR_success: + return PCIBIOS_SUCCESSFUL; + + case XEN_PCI_ERR_dev_not_found: + return PCIBIOS_DEVICE_NOT_FOUND; + + case XEN_PCI_ERR_invalid_offset: + case XEN_PCI_ERR_op_failed: + return PCIBIOS_BAD_REGISTER_NUMBER; + + case XEN_PCI_ERR_not_implemented: + return PCIBIOS_FUNC_NOT_SUPPORTED; + + case XEN_PCI_ERR_access_denied: + return PCIBIOS_SET_FAILED; + } + return errno; +} + +static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev) +{ + if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) + && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) { + dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n"); + schedule_work(&pdev->op_work); + } +} + +static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) +{ + int err = 0; + struct xen_pci_op *active_op = &pdev->sh_info->op; + unsigned long irq_flags; + evtchn_port_t port = pdev->evtchn; + unsigned irq = pdev->irq; + s64 ns, ns_timeout; + struct timeval tv; + + spin_lock_irqsave(&pdev->sh_info_lock, irq_flags); + + memcpy(active_op, op, sizeof(struct xen_pci_op)); + + /* Go */ + wmb(); + set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(port); + + /* + * We set a poll timeout of 3 seconds but give up on return after + * 2 seconds. It is better to time out too late rather than too early + * (in the latter case we end up continually re-executing poll() with a + * timeout in the past). 1s difference gives plenty of slack for error. + */ + do_gettimeofday(&tv); + ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC; + + xen_clear_irq_pending(irq); + + while (test_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags)) { + xen_poll_irq_timeout(irq, jiffies + 3*HZ); + xen_clear_irq_pending(irq); + do_gettimeofday(&tv); + ns = timeval_to_ns(&tv); + if (ns > ns_timeout) { + dev_err(&pdev->xdev->dev, + "pciback not responding!!!\n"); + clear_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags); + err = XEN_PCI_ERR_dev_not_found; + goto out; + } + } + + /* + * We might lose backend service request since we + * reuse same evtchn with pci_conf backend response. So re-schedule + * aer pcifront service. + */ + if (test_bit(_XEN_PCIB_active, + (unsigned long *)&pdev->sh_info->flags)) { + dev_err(&pdev->xdev->dev, + "schedule aer pcifront service\n"); + schedule_pcifront_aer_op(pdev); + } + + memcpy(op, active_op, sizeof(struct xen_pci_op)); + + err = op->err; +out: + spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags); + return err; +} + +/* Access to this function is spinlocked in drivers/pci/access.c */ +static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 *val) +{ + int err = 0; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_read, + .domain = pci_domain_nr(bus), + .bus = bus->number, + .devfn = devfn, + .offset = where, + .size = size, + }; + struct pcifront_sd *sd = bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + if (verbose_request) + dev_info(&pdev->xdev->dev, + "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n", + pci_domain_nr(bus), bus->number, PCI_SLOT(devfn), + PCI_FUNC(devfn), where, size); + + err = do_pci_op(pdev, &op); + + if (likely(!err)) { + if (verbose_request) + dev_info(&pdev->xdev->dev, "read got back value %x\n", + op.value); + + *val = op.value; + } else if (err == -ENODEV) { + /* No device here, pretend that it just returned 0 */ + err = 0; + *val = 0; + } + + return errno_to_pcibios_err(err); +} + +/* Access to this function is spinlocked in drivers/pci/access.c */ +static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn, + int where, int size, u32 val) +{ + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_write, + .domain = pci_domain_nr(bus), + .bus = bus->number, + .devfn = devfn, + .offset = where, + .size = size, + .value = val, + }; + struct pcifront_sd *sd = bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + if (verbose_request) + dev_info(&pdev->xdev->dev, + "write dev=%04x:%02x:%02x.%01x - " + "offset %x size %d val %x\n", + pci_domain_nr(bus), bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); + + return errno_to_pcibios_err(do_pci_op(pdev, &op)); +} + +struct pci_ops pcifront_bus_ops = { + .read = pcifront_bus_read, + .write = pcifront_bus_write, +}; + +#ifdef CONFIG_PCI_MSI +static int pci_frontend_enable_msix(struct pci_dev *dev, + int **vector, int nvec) +{ + int err; + int i; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_enable_msix, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + .value = nvec, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + struct msi_desc *entry; + + if (nvec > SH_INFO_MAX_VEC) { + dev_err(&dev->dev, "too much vector for pci frontend: %x." + " Increase SH_INFO_MAX_VEC.\n", nvec); + return -EINVAL; + } + + i = 0; + list_for_each_entry(entry, &dev->msi_list, list) { + op.msix_entries[i].entry = entry->msi_attrib.entry_nr; + /* Vector is useless at this point. */ + op.msix_entries[i].vector = -1; + i++; + } + + err = do_pci_op(pdev, &op); + + if (likely(!err)) { + if (likely(!op.value)) { + /* we get the result */ + for (i = 0; i < nvec; i++) + *(*vector+i) = op.msix_entries[i].vector; + return 0; + } else { + printk(KERN_DEBUG "enable msix get value %x\n", + op.value); + return op.value; + } + } else { + dev_err(&dev->dev, "enable msix get err %x\n", err); + return err; + } +} + +static void pci_frontend_disable_msix(struct pci_dev *dev) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_disable_msix, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + + /* What should do for error ? */ + if (err) + dev_err(&dev->dev, "pci_disable_msix get err %x\n", err); +} + +static int pci_frontend_enable_msi(struct pci_dev *dev, int **vector) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_enable_msi, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + if (likely(!err)) { + *(*vector) = op.value; + } else { + dev_err(&dev->dev, "pci frontend enable msi failed for dev " + "%x:%x\n", op.bus, op.devfn); + err = -EINVAL; + } + return err; +} + +static void pci_frontend_disable_msi(struct pci_dev *dev) +{ + int err; + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_disable_msi, + .domain = pci_domain_nr(dev->bus), + .bus = dev->bus->number, + .devfn = dev->devfn, + }; + struct pcifront_sd *sd = dev->bus->sysdata; + struct pcifront_device *pdev = pcifront_get_pdev(sd); + + err = do_pci_op(pdev, &op); + if (err == XEN_PCI_ERR_dev_not_found) { + /* XXX No response from backend, what shall we do? */ + printk(KERN_DEBUG "get no response from backend for disable MSI\n"); + return; + } + if (err) + /* how can pciback notify us fail? */ + printk(KERN_DEBUG "get fake response frombackend\n"); +} + +static struct xen_pci_frontend_ops pci_frontend_ops = { + .enable_msi = pci_frontend_enable_msi, + .disable_msi = pci_frontend_disable_msi, + .enable_msix = pci_frontend_enable_msix, + .disable_msix = pci_frontend_disable_msix, +}; + +static void pci_frontend_registrar(int enable) +{ + if (enable) + xen_pci_frontend = &pci_frontend_ops; + else + xen_pci_frontend = NULL; +}; +#else +static inline void pci_frontend_registrar(int enable) { }; +#endif /* CONFIG_PCI_MSI */ + +/* Claim resources for the PCI frontend as-is, backend won't allow changes */ +static int pcifront_claim_resource(struct pci_dev *dev, void *data) +{ + struct pcifront_device *pdev = data; + int i; + struct resource *r; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + r = &dev->resource[i]; + + if (!r->parent && r->start && r->flags) { + dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n", + pci_name(dev), i); + if (pci_claim_resource(dev, i)) { + dev_err(&pdev->xdev->dev, "Could not claim " + "resource %s/%d! Device offline. Try " + "giving less than 4GB to domain.\n", + pci_name(dev), i); + } + } + } + + return 0; +} + +static int __devinit pcifront_scan_bus(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus, + struct pci_bus *b) +{ + struct pci_dev *d; + unsigned int devfn; + + /* Scan the bus for functions and add. + * We omit handling of PCI bridge attachment because pciback prevents + * bridges from being exported. + */ + for (devfn = 0; devfn < 0x100; devfn++) { + d = pci_get_slot(b, devfn); + if (d) { + /* Device is already known. */ + pci_dev_put(d); + continue; + } + + d = pci_scan_single_device(b, devfn); + if (d) + dev_info(&pdev->xdev->dev, "New device on " + "%04x:%02x:%02x.%02x found.\n", domain, bus, + PCI_SLOT(devfn), PCI_FUNC(devfn)); + } + + return 0; +} + +static int __devinit pcifront_scan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) +{ + struct pci_bus *b; + struct pcifront_sd *sd = NULL; + struct pci_bus_entry *bus_entry = NULL; + int err = 0; + +#ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); + dev_err(&pdev->xdev->dev, + "Please compile with CONFIG_PCI_DOMAINS\n"); + err = -EINVAL; + goto err_out; + } +#endif + + dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", + domain, bus); + + bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); + sd = kmalloc(sizeof(*sd), GFP_KERNEL); + if (!bus_entry || !sd) { + err = -ENOMEM; + goto err_out; + } + pcifront_init_sd(sd, domain, bus, pdev); + + b = pci_scan_bus_parented(&pdev->xdev->dev, bus, + &pcifront_bus_ops, sd); + if (!b) { + dev_err(&pdev->xdev->dev, + "Error creating PCI Frontend Bus!\n"); + err = -ENOMEM; + goto err_out; + } + + bus_entry->bus = b; + + list_add(&bus_entry->list, &pdev->root_buses); + + /* pci_scan_bus_parented skips devices which do not have a have + * devfn==0. The pcifront_scan_bus enumerates all devfn. */ + err = pcifront_scan_bus(pdev, domain, bus, b); + + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + return err; + +err_out: + kfree(bus_entry); + kfree(sd); + + return err; +} + +static int __devinit pcifront_rescan_root(struct pcifront_device *pdev, + unsigned int domain, unsigned int bus) +{ + int err; + struct pci_bus *b; + +#ifndef CONFIG_PCI_DOMAINS + if (domain != 0) { + dev_err(&pdev->xdev->dev, + "PCI Root in non-zero PCI Domain! domain=%d\n", domain); + dev_err(&pdev->xdev->dev, + "Please compile with CONFIG_PCI_DOMAINS\n"); + return -EINVAL; + } +#endif + + dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n", + domain, bus); + + b = pci_find_bus(domain, bus); + if (!b) + /* If the bus is unknown, create it. */ + return pcifront_scan_root(pdev, domain, bus); + + err = pcifront_scan_bus(pdev, domain, bus, b); + + /* Claim resources before going "live" with our devices */ + pci_walk_bus(b, pcifront_claim_resource, pdev); + + /* Create SysFS and notify udev of the devices. Aka: "going live" */ + pci_bus_add_devices(b); + + return err; +} + +static void free_root_bus_devs(struct pci_bus *bus) +{ + struct pci_dev *dev; + + while (!list_empty(&bus->devices)) { + dev = container_of(bus->devices.next, struct pci_dev, + bus_list); + dev_dbg(&dev->dev, "removing device\n"); + pci_remove_bus_device(dev); + } +} + +static void pcifront_free_roots(struct pcifront_device *pdev) +{ + struct pci_bus_entry *bus_entry, *t; + + dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n"); + + list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) { + list_del(&bus_entry->list); + + free_root_bus_devs(bus_entry->bus); + + kfree(bus_entry->bus->sysdata); + + device_unregister(bus_entry->bus->bridge); + pci_remove_bus(bus_entry->bus); + + kfree(bus_entry); + } +} + +static pci_ers_result_t pcifront_common_process(int cmd, + struct pcifront_device *pdev, + pci_channel_state_t state) +{ + pci_ers_result_t result; + struct pci_driver *pdrv; + int bus = pdev->sh_info->aer_op.bus; + int devfn = pdev->sh_info->aer_op.devfn; + struct pci_dev *pcidev; + int flag = 0; + + dev_dbg(&pdev->xdev->dev, + "pcifront AER process: cmd %x (bus:%x, devfn%x)", + cmd, bus, devfn); + result = PCI_ERS_RESULT_NONE; + + pcidev = pci_get_bus_and_slot(bus, devfn); + if (!pcidev || !pcidev->driver) { + dev_err(&pcidev->dev, + "device or driver is NULL\n"); + return result; + } + pdrv = pcidev->driver; + + if (get_driver(&pdrv->driver)) { + if (pdrv->err_handler && pdrv->err_handler->error_detected) { + dev_dbg(&pcidev->dev, + "trying to call AER service\n"); + if (pcidev) { + flag = 1; + switch (cmd) { + case XEN_PCI_OP_aer_detected: + result = pdrv->err_handler-> + error_detected(pcidev, state); + break; + case XEN_PCI_OP_aer_mmio: + result = pdrv->err_handler-> + mmio_enabled(pcidev); + break; + case XEN_PCI_OP_aer_slotreset: + result = pdrv->err_handler-> + slot_reset(pcidev); + break; + case XEN_PCI_OP_aer_resume: + pdrv->err_handler->resume(pcidev); + break; + default: + dev_err(&pdev->xdev->dev, + "bad request in aer recovery " + "operation!\n"); + + } + } + } + put_driver(&pdrv->driver); + } + if (!flag) + result = PCI_ERS_RESULT_NONE; + + return result; +} + + +static void pcifront_do_aer(struct work_struct *data) +{ + struct pcifront_device *pdev = + container_of(data, struct pcifront_device, op_work); + int cmd = pdev->sh_info->aer_op.cmd; + pci_channel_state_t state = + (pci_channel_state_t)pdev->sh_info->aer_op.err; + + /*If a pci_conf op is in progress, + we have to wait until it is done before service aer op*/ + dev_dbg(&pdev->xdev->dev, + "pcifront service aer bus %x devfn %x\n", + pdev->sh_info->aer_op.bus, pdev->sh_info->aer_op.devfn); + + pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state); + + /* Post the operation to the guest. */ + wmb(); + clear_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(pdev->evtchn); + + /*in case of we lost an aer request in four lines time_window*/ + smp_mb__before_clear_bit(); + clear_bit(_PDEVB_op_active, &pdev->flags); + smp_mb__after_clear_bit(); + + schedule_pcifront_aer_op(pdev); + +} + +static irqreturn_t pcifront_handler_aer(int irq, void *dev) +{ + struct pcifront_device *pdev = dev; + schedule_pcifront_aer_op(pdev); + return IRQ_HANDLED; +} +static int pcifront_connect(struct pcifront_device *pdev) +{ + int err = 0; + + spin_lock(&pcifront_dev_lock); + + if (!pcifront_dev) { + dev_info(&pdev->xdev->dev, "Installing PCI frontend\n"); + pcifront_dev = pdev; + } else { + dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n"); + err = -EEXIST; + } + + spin_unlock(&pcifront_dev_lock); + + return err; +} + +static void pcifront_disconnect(struct pcifront_device *pdev) +{ + spin_lock(&pcifront_dev_lock); + + if (pdev == pcifront_dev) { + dev_info(&pdev->xdev->dev, + "Disconnecting PCI Frontend Buses\n"); + pcifront_dev = NULL; + } + + spin_unlock(&pcifront_dev_lock); +} +static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev; + + pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL); + if (pdev == NULL) + goto out; + + pdev->sh_info = + (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL); + if (pdev->sh_info == NULL) { + kfree(pdev); + pdev = NULL; + goto out; + } + pdev->sh_info->flags = 0; + + /*Flag for registering PV AER handler*/ + set_bit(_XEN_PCIB_AERHANDLER, (void *)&pdev->sh_info->flags); + + dev_set_drvdata(&xdev->dev, pdev); + pdev->xdev = xdev; + + INIT_LIST_HEAD(&pdev->root_buses); + + spin_lock_init(&pdev->sh_info_lock); + + pdev->evtchn = INVALID_EVTCHN; + pdev->gnt_ref = INVALID_GRANT_REF; + pdev->irq = -1; + + INIT_WORK(&pdev->op_work, pcifront_do_aer); + + dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n", + pdev, pdev->sh_info); +out: + return pdev; +} + +static void free_pdev(struct pcifront_device *pdev) +{ + dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev); + + pcifront_free_roots(pdev); + + /*For PCIE_AER error handling job*/ + flush_scheduled_work(); + + if (pdev->irq >= 0) + unbind_from_irqhandler(pdev->irq, pdev); + + if (pdev->evtchn != INVALID_EVTCHN) + xenbus_free_evtchn(pdev->xdev, pdev->evtchn); + + if (pdev->gnt_ref != INVALID_GRANT_REF) + gnttab_end_foreign_access(pdev->gnt_ref, 0 /* r/w page */, + (unsigned long)pdev->sh_info); + else + free_page((unsigned long)pdev->sh_info); + + dev_set_drvdata(&pdev->xdev->dev, NULL); + + kfree(pdev); +} + +static int pcifront_publish_info(struct pcifront_device *pdev) +{ + int err = 0; + struct xenbus_transaction trans; + + err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + if (err < 0) + goto out; + + pdev->gnt_ref = err; + + err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); + if (err) + goto out; + + err = bind_evtchn_to_irqhandler(pdev->evtchn, pcifront_handler_aer, + 0, "pcifront", pdev); + + if (err < 0) + return err; + + pdev->irq = err; + +do_publish: + err = xenbus_transaction_start(&trans); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend " + "(start transaction)"); + goto out; + } + + err = xenbus_printf(trans, pdev->xdev->nodename, + "pci-op-ref", "%u", pdev->gnt_ref); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "event-channel", "%u", pdev->evtchn); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "magic", XEN_PCI_MAGIC); + + if (err) { + xenbus_transaction_end(trans, 1); + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend"); + goto out; + } else { + err = xenbus_transaction_end(trans, 0); + if (err == -EAGAIN) + goto do_publish; + else if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error completing transaction " + "for backend"); + goto out; + } + } + + xenbus_switch_state(pdev->xdev, XenbusStateInitialised); + + dev_dbg(&pdev->xdev->dev, "publishing successful!\n"); + +out: + return err; +} + +static int __devinit pcifront_try_connect(struct pcifront_device *pdev) +{ + int err = -EFAULT; + int i, num_roots, len; + char str[64]; + unsigned int domain, bus; + + + /* Only connect once */ + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateInitialised) + goto out; + + err = pcifront_connect(pdev); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error connecting PCI Frontend"); + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err == -ENOENT) { + xenbus_dev_error(pdev->xdev, err, + "No PCI Roots found, trying 0000:00"); + err = pcifront_scan_root(pdev, 0, 0); + num_roots = 0; + } else if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + for (i = 0; i < num_roots; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + + err = pcifront_scan_root(pdev, domain, bus); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error scanning PCI root %04x:%02x", + domain, bus); + goto out; + } + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateConnected); + +out: + return err; +} + +static int pcifront_try_disconnect(struct pcifront_device *pdev) +{ + int err = 0; + enum xenbus_state prev_state; + + + prev_state = xenbus_read_driver_state(pdev->xdev->nodename); + + if (prev_state >= XenbusStateClosing) + goto out; + + if (prev_state == XenbusStateConnected) { + pcifront_free_roots(pdev); + pcifront_disconnect(pdev); + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateClosed); + +out: + + return err; +} + +static int __devinit pcifront_attach_devices(struct pcifront_device *pdev) +{ + int err = -EFAULT; + int i, num_roots, len; + unsigned int domain, bus; + char str[64]; + + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateReconfiguring) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err == -ENOENT) { + xenbus_dev_error(pdev->xdev, err, + "No PCI Roots found, trying 0000:00"); + err = pcifront_rescan_root(pdev, 0, 0); + num_roots = 0; + } else if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + for (i = 0; i < num_roots; i++) { + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + + err = pcifront_rescan_root(pdev, domain, bus); + if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error scanning PCI root %04x:%02x", + domain, bus); + goto out; + } + } + + xenbus_switch_state(pdev->xdev, XenbusStateConnected); + +out: + return err; +} + +static int pcifront_detach_devices(struct pcifront_device *pdev) +{ + int err = 0; + int i, num_devs; + unsigned int domain, bus, slot, func; + struct pci_bus *pci_bus; + struct pci_dev *pci_dev; + char str[64]; + + if (xenbus_read_driver_state(pdev->xdev->nodename) != + XenbusStateConnected) + goto out; + + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d", + &num_devs); + if (err != 1) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI devices"); + goto out; + } + + /* Find devices being detached and remove them. */ + for (i = 0; i < num_devs; i++) { + int l, state; + l = snprintf(str, sizeof(str), "state-%d", i); + if (unlikely(l >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d", + &state); + if (err != 1) + state = XenbusStateUnknown; + + if (state != XenbusStateClosing) + continue; + + /* Remove device. */ + l = snprintf(str, sizeof(str), "vdev-%d", i); + if (unlikely(l >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, + "%x:%x:%x.%x", &domain, &bus, &slot, &func); + if (err != 4) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI device %d", i); + goto out; + } + + pci_bus = pci_find_bus(domain, bus); + if (!pci_bus) { + dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n", + domain, bus); + continue; + } + pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func)); + if (!pci_dev) { + dev_dbg(&pdev->xdev->dev, + "Cannot get PCI device %04x:%02x:%02x.%02x\n", + domain, bus, slot, func); + continue; + } + pci_remove_bus_device(pci_dev); + pci_dev_put(pci_dev); + + dev_dbg(&pdev->xdev->dev, + "PCI device %04x:%02x:%02x.%02x removed.\n", + domain, bus, slot, func); + } + + err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring); + +out: + return err; +} + +static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev, + enum xenbus_state be_state) +{ + struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + + switch (be_state) { + case XenbusStateUnknown: + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateClosed: + break; + + case XenbusStateConnected: + pcifront_try_connect(pdev); + break; + + case XenbusStateClosing: + dev_warn(&xdev->dev, "backend going away!\n"); + pcifront_try_disconnect(pdev); + break; + + case XenbusStateReconfiguring: + pcifront_detach_devices(pdev); + break; + + case XenbusStateReconfigured: + pcifront_attach_devices(pdev); + break; + } +} + +static int pcifront_xenbus_probe(struct xenbus_device *xdev, + const struct xenbus_device_id *id) +{ + int err = 0; + struct pcifront_device *pdev = alloc_pdev(xdev); + + if (pdev == NULL) { + err = -ENOMEM; + xenbus_dev_fatal(xdev, err, + "Error allocating pcifront_device struct"); + goto out; + } + + err = pcifront_publish_info(pdev); + if (err) + free_pdev(pdev); + +out: + return err; +} + +static int pcifront_xenbus_remove(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev); + if (pdev) + free_pdev(pdev); + + return 0; +} + +static const struct xenbus_device_id xenpci_ids[] = { + {"pci"}, + {""}, +}; + +static struct xenbus_driver xenbus_pcifront_driver = { + .name = "pcifront", + .owner = THIS_MODULE, + .ids = xenpci_ids, + .probe = pcifront_xenbus_probe, + .remove = pcifront_xenbus_remove, + .otherend_changed = pcifront_backend_changed, +}; + +static int __init pcifront_init(void) +{ + if (!xen_pv_domain() || xen_initial_domain()) + return -ENODEV; + + pci_frontend_registrar(1 /* enable */); + + return xenbus_register_frontend(&xenbus_pcifront_driver); +} + +static void __exit pcifront_cleanup(void) +{ + xenbus_unregister_driver(&xenbus_pcifront_driver); + pci_frontend_registrar(0 /* disable */); +} +module_init(pcifront_init); +module_exit(pcifront_cleanup); + +MODULE_DESCRIPTION("Xen PCI passthrough frontend."); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:pci"); |