From cb3d37a93cc59da400d27361fcda024d49210abd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 1 Jun 2015 21:03:59 +0200 Subject: acpi: add missing ssdt commit 5cb18b3d7bff2a83275ee98af2a14eb9e21c93ab TPM2 ACPI table support was missing a file, so build with iasl fails (build without iasl works since it uses the generated hex files). Reported-by: "Daniel P. Berrange" Signed-off-by: Michael S. Tsirkin --- hw/i386/ssdt-tpm-common.dsl | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 hw/i386/ssdt-tpm-common.dsl diff --git a/hw/i386/ssdt-tpm-common.dsl b/hw/i386/ssdt-tpm-common.dsl new file mode 100644 index 0000000..9da4970 --- /dev/null +++ b/hw/i386/ssdt-tpm-common.dsl @@ -0,0 +1,36 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +/* + * Common parts for TPM 1.2 and TPM 2 (with slight differences for PPI) + * to be #included + */ + + + External(\_SB.PCI0.ISA, DeviceObj) + Scope(\_SB.PCI0.ISA) { + /* TPM with emulated TPM TIS interface */ + Device (TPM) { + Name (_HID, EisaID ("PNP0C31")) + Name (_CRS, ResourceTemplate () + { + Memory32Fixed (ReadWrite, TPM_TIS_ADDR_BASE, TPM_TIS_ADDR_SIZE) + IRQNoFlags () {TPM_TIS_IRQ} + }) + Method (_STA, 0, NotSerialized) { + Return (0x0F) + } + } + } -- cgit v1.1 From 557772f26b361ade84acecb366fe6fdd2d55a6d9 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Mon, 1 Jun 2015 17:09:12 +0300 Subject: hw/q35: fix floppy controller definition in ich9 In DSDT FDC0 declares the IO region as IO(Decode16, 0x03F2, 0x03F2, 0x00, 0x04). Use the same in lpc_ich9 initialization code. Now the floppy drive is detected correctly on Windows. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/isa/lpc_ich9.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index dba7585..bc9afc6 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -494,7 +494,7 @@ static void ich9_lpc_machine_ready(Notifier *n, void *opaque) /* lpt */ pci_conf[0x82] |= 0x04; } - if (memory_region_present(io_as, 0x3f0)) { + if (memory_region_present(io_as, 0x3f2)) { /* floppy */ pci_conf[0x82] |= 0x08; } -- cgit v1.1 From 6652d0811c9463fbfb2d2d1cb2ec03f388145c5f Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 May 2015 16:26:07 +0800 Subject: virtio-pci: don't try to mask or unmask vqs without notifiers We should validate the vq index against nvqs_with_notifiers. Otherwise we may try to mask or unmask vector for vqs without notifiers (e.g control vq). This will lead qemu abort on kvm_irqchip_commit_routes() when trying to boot win8.1 guest. Fixes 851c2a75a6e80c8aa5e713864d98cfb512e7229b ("virtio-pci: speedup MSI-X masking and unmasking") Reported-by: Alex Williamson Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index d1ddc39..6d4f64e 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -632,21 +632,26 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, if (!virtio_queue_get_num(vdev, index)) { break; } - ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); - if (ret < 0) { - goto undo; + if (index < proxy->nvqs_with_notifiers) { + ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); + if (ret < 0) { + goto undo; + } + ++unmasked; } vq = virtio_vector_next_queue(vq); - ++unmasked; } return 0; undo: vq = virtio_vector_first_queue(vdev, vector); - while (vq && --unmasked >= 0) { + while (vq && unmasked >= 0) { index = virtio_get_queue_index(vq); - virtio_pci_vq_vector_mask(proxy, index, vector); + if (index < proxy->nvqs_with_notifiers) { + virtio_pci_vq_vector_mask(proxy, index, vector); + --unmasked; + } vq = virtio_vector_next_queue(vq); } return ret; @@ -664,7 +669,9 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) if (!virtio_queue_get_num(vdev, index)) { break; } - virtio_pci_vq_vector_mask(proxy, index, vector); + if (index < proxy->nvqs_with_notifiers) { + virtio_pci_vq_vector_mask(proxy, index, vector); + } vq = virtio_vector_next_queue(vq); } } -- cgit v1.1 From 977ad992f14b29a7d4f18eaba42a705004545a64 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 2 Jun 2015 15:47:20 +0200 Subject: TPM: fix build with tpm disabled Failure was included on commit Signed-off-by: Juan Quintela Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/sysemu/tpm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h index c143890..c8afa17 100644 --- a/include/sysemu/tpm.h +++ b/include/sysemu/tpm.h @@ -32,11 +32,13 @@ TPMVersion tpm_tis_get_tpm_version(Object *obj); static inline TPMVersion tpm_get_version(void) { +#ifdef CONFIG_TPM Object *obj = object_resolve_path_type("", TYPE_TPM_TIS, NULL); if (obj) { return tpm_tis_get_tpm_version(obj); } +#endif return TPM_VERSION_UNSPEC; } -- cgit v1.1 From d5aaa1b0456033fc9ff723ac881ebe1b61360cca Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 3 Jun 2015 14:47:19 +0200 Subject: virtio: 64bit features fixups. Commit "019a3ed virtio: make features 64bit wide" missed a few changes, as I've noticed while trying to rebase the virtio-1 branch to latest master. This patch adds them. Signed-off-by: Gerd Hoffmann Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 2 +- hw/virtio/virtio.c | 2 +- include/hw/virtio/virtio.h | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 012ab7f..0d3bf0f 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -511,7 +511,7 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) return virtio_net_guest_offloads_by_features(vdev->guest_features); } -static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features) +static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) { VirtIONet *n = VIRTIO_NET(vdev); int i; diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 596e3d8..8ac6156 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1003,7 +1003,7 @@ void virtio_save(VirtIODevice *vdev, QEMUFile *f) vmstate_save_state(f, &vmstate_virtio, vdev, NULL); } -int virtio_set_features(VirtIODevice *vdev, uint32_t val) +int virtio_set_features(VirtIODevice *vdev, uint64_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); bool bad = (val & ~(vdev->host_features)) != 0; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 7222a90..2bb7c1a 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -98,7 +98,7 @@ typedef struct VirtioDeviceClass { DeviceUnrealize unrealize; uint64_t (*get_features)(VirtIODevice *vdev, uint64_t requested_features); uint64_t (*bad_features)(VirtIODevice *vdev); - void (*set_features)(VirtIODevice *vdev, uint32_t val); + void (*set_features)(VirtIODevice *vdev, uint64_t val); void (*get_config)(VirtIODevice *vdev, uint8_t *config); void (*set_config)(VirtIODevice *vdev, const uint8_t *config); void (*reset)(VirtIODevice *vdev); @@ -184,7 +184,7 @@ void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector); void virtio_set_status(VirtIODevice *vdev, uint8_t val); void virtio_reset(void *opaque); void virtio_update_irq(VirtIODevice *vdev); -int virtio_set_features(VirtIODevice *vdev, uint32_t val); +int virtio_set_features(VirtIODevice *vdev, uint64_t val); /* Base devices. */ typedef struct VirtIOBlkConf VirtIOBlkConf; @@ -230,19 +230,19 @@ VirtQueue *virtio_vector_next_queue(VirtQueue *vq); static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) { assert(fbit < 64); - *features |= (1 << fbit); + *features |= (1ULL << fbit); } static inline void virtio_clear_feature(uint64_t *features, unsigned int fbit) { assert(fbit < 64); - *features &= ~(1 << fbit); + *features &= ~(1ULL << fbit); } static inline bool __virtio_has_feature(uint64_t features, unsigned int fbit) { assert(fbit < 64); - return !!(features & (1 << fbit)); + return !!(features & (1ULL << fbit)); } static inline bool virtio_has_feature(VirtIODevice *vdev, unsigned int fbit) -- cgit v1.1 From ca9b46bcecc0f06882eec1b152b71f93a066da79 Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Wed, 13 May 2015 17:21:36 +0800 Subject: acpi: add acpi_send_gpe_event() to rise sci for hotplug Add a new API named acpi_send_gpe_event() to send hotplug SCI. This API can be used by pci, cpu and memory hotplug. This patch is rebased on master. Signed-off-by: Zhu Guihua Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Igor Mammedov --- hw/acpi/core.c | 7 +++++++ hw/acpi/cpu_hotplug.c | 3 +-- hw/acpi/memory_hotplug.c | 6 ++---- hw/acpi/pcihp.c | 7 ++----- include/hw/acpi/acpi.h | 10 ++++++++++ include/hw/acpi/memory_hotplug.h | 2 -- include/hw/acpi/pc-hotplug.h | 1 - 7 files changed, 22 insertions(+), 14 deletions(-) diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 51913d6..8623993 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -666,6 +666,13 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr) return val; } +void acpi_send_gpe_event(ACPIREGS *ar, qemu_irq irq, + AcpiGPEStatusBits status) +{ + ar->gpe.sts[0] |= status; + acpi_update_sci(ar, irq); +} + void acpi_update_sci(ACPIREGS *regs, qemu_irq irq) { int sci_level, pm1a_sts; diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c index b8ebfad..f5b9972 100644 --- a/hw/acpi/cpu_hotplug.c +++ b/hw/acpi/cpu_hotplug.c @@ -59,8 +59,7 @@ void acpi_cpu_plug_cb(ACPIREGS *ar, qemu_irq irq, return; } - ar->gpe.sts[0] |= ACPI_CPU_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_CPU_HOTPLUG_STATUS); } void acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index 34cef1e..2ff0d5c 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -241,8 +241,7 @@ void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st, mdev->is_inserting = true; /* do ACPI magic */ - ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS); return; } @@ -260,8 +259,7 @@ void acpi_memory_unplug_request_cb(ACPIREGS *ar, qemu_irq irq, mdev->is_removing = true; /* Do ACPI magic */ - ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS); } void acpi_memory_unplug_cb(MemHotplugState *mem_st, diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index 1e11af9..fbbc4dd 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -45,7 +45,6 @@ # define ACPI_PCIHP_DPRINTF(format, ...) do { } while (0) #endif -#define ACPI_PCI_HOTPLUG_STATUS 2 #define ACPI_PCIHP_ADDR 0xae00 #define ACPI_PCIHP_SIZE 0x0014 #define ACPI_PCIHP_LEGACY_SIZE 0x000f @@ -202,8 +201,7 @@ void acpi_pcihp_device_plug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s, s->acpi_pcihp_pci_status[bsel].up |= (1U << slot); - ar->gpe.sts[0] |= ACPI_PCI_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_PCI_HOTPLUG_STATUS); } void acpi_pcihp_device_unplug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s, @@ -220,8 +218,7 @@ void acpi_pcihp_device_unplug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s, s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); - ar->gpe.sts[0] |= ACPI_PCI_HOTPLUG_STATUS; - acpi_update_sci(ar, irq); + acpi_send_gpe_event(ar, irq, ACPI_PCI_HOTPLUG_STATUS); } static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size) diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h index 1f678b4..9390997 100644 --- a/include/hw/acpi/acpi.h +++ b/include/hw/acpi/acpi.h @@ -91,6 +91,13 @@ /* PM2_CNT */ #define ACPI_BITMASK_ARB_DISABLE 0x0001 +/* These values are part of guest ABI, and can not be changed */ +typedef enum { + ACPI_PCI_HOTPLUG_STATUS = 2, + ACPI_CPU_HOTPLUG_STATUS = 4, + ACPI_MEMORY_HOTPLUG_STATUS = 8, +} AcpiGPEStatusBits; + /* structs */ typedef struct ACPIPMTimer ACPIPMTimer; typedef struct ACPIPM1EVT ACPIPM1EVT; @@ -172,6 +179,9 @@ void acpi_gpe_reset(ACPIREGS *ar); void acpi_gpe_ioport_writeb(ACPIREGS *ar, uint32_t addr, uint32_t val); uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr); +void acpi_send_gpe_event(ACPIREGS *ar, qemu_irq irq, + AcpiGPEStatusBits status); + void acpi_update_sci(ACPIREGS *acpi_regs, qemu_irq irq); /* acpi.c */ diff --git a/include/hw/acpi/memory_hotplug.h b/include/hw/acpi/memory_hotplug.h index 986223b..1342adb 100644 --- a/include/hw/acpi/memory_hotplug.h +++ b/include/hw/acpi/memory_hotplug.h @@ -5,8 +5,6 @@ #include "hw/acpi/acpi.h" #include "migration/vmstate.h" -#define ACPI_MEMORY_HOTPLUG_STATUS 8 - /** * MemStatus: * @is_removing: the memory device in slot has been requested to be ejected. diff --git a/include/hw/acpi/pc-hotplug.h b/include/hw/acpi/pc-hotplug.h index 0513c1c..77b1569 100644 --- a/include/hw/acpi/pc-hotplug.h +++ b/include/hw/acpi/pc-hotplug.h @@ -16,7 +16,6 @@ * ONLY DEFINEs are permited in this file since it's shared * between C and ASL code. */ -#define ACPI_CPU_HOTPLUG_STATUS 4 /* Limit for CPU arch IDs for CPU hotplug. All hotpluggable CPUs should * have CPUClass.get_arch_id() < ACPI_CPU_HOTPLUG_ID_LIMIT. -- cgit v1.1 From 32d9ca15bac63e8a7bad6dc1a4ab624e6d6d3b0f Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:22:56 +0300 Subject: acpi: add implementation of aml_while() term Commit 68e6b0af7 (acpi: add aml_while() term) added the definition of aml_while without the actual implementation. Implement the term. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/acpi/aml-build.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c index 2bebf23..0d4b324 100644 --- a/hw/acpi/aml-build.c +++ b/hw/acpi/aml-build.c @@ -687,6 +687,14 @@ Aml *aml_else(void) return var; } +/* ACPI 1.0b: 16.2.5.3 Type 1 Opcodes Encoding: DefWhile */ +Aml *aml_while(Aml *predicate) +{ + Aml *var = aml_bundle(0xA2 /* WhileOp */, AML_PACKAGE); + aml_append(var, predicate); + return var; +} + /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefMethod */ Aml *aml_method(const char *name, int arg_count) { -- cgit v1.1 From ce6a28ee057da3e4a587dada369e33a8486b0066 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:22:57 +0300 Subject: hw/pci: made pci_bus_is_root a PCIBusClass method Refactoring it as a method of PCIBusClass will allow different implementations for subclasses. Removed the assumption that the root bus does not have a parent device because is specific only to the default class implementation. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/pci/pci.c | 17 ++++++++++++++--- include/hw/pci/pci.h | 2 ++ include/hw/pci/pci_bus.h | 8 ++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 48f19a3..132d19e 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -88,9 +88,15 @@ static void pci_bus_unrealize(BusState *qbus, Error **errp) vmstate_unregister(NULL, &vmstate_pcibus, bus); } +static bool pcibus_is_root(PCIBus *bus) +{ + return !bus->parent_dev; +} + static void pci_bus_class_init(ObjectClass *klass, void *data) { BusClass *k = BUS_CLASS(klass); + PCIBusClass *pbc = PCI_BUS_CLASS(klass); k->print_dev = pcibus_dev_print; k->get_dev_path = pcibus_get_dev_path; @@ -98,12 +104,15 @@ static void pci_bus_class_init(ObjectClass *klass, void *data) k->realize = pci_bus_realize; k->unrealize = pci_bus_unrealize; k->reset = pcibus_reset; + + pbc->is_root = pcibus_is_root; } static const TypeInfo pci_bus_info = { .name = TYPE_PCI_BUS, .parent = TYPE_BUS, .instance_size = sizeof(PCIBus), + .class_size = sizeof(PCIBusClass), .class_init = pci_bus_class_init, }; @@ -278,7 +287,10 @@ PCIBus *pci_device_root_bus(const PCIDevice *d) { PCIBus *bus = d->bus; - while ((d = bus->parent_dev) != NULL) { + while (!pci_bus_is_root(bus)) { + d = bus->parent_dev; + assert(d != NULL); + bus = d->bus; } @@ -291,7 +303,6 @@ const char *pci_root_bus_path(PCIDevice *dev) PCIHostState *host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent); PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_GET_CLASS(host_bridge); - assert(!rootbus->parent_dev); assert(host_bridge->bus == rootbus); if (hc->root_bus_path) { @@ -325,7 +336,7 @@ bool pci_bus_is_express(PCIBus *bus) bool pci_bus_is_root(PCIBus *bus) { - return !bus->parent_dev; + return PCI_BUS_GET_CLASS(bus)->is_root(bus); } void pci_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent, diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 5d050c8..df05c96 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -340,6 +340,8 @@ typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); #define TYPE_PCI_BUS "PCI" #define PCI_BUS(obj) OBJECT_CHECK(PCIBus, (obj), TYPE_PCI_BUS) +#define PCI_BUS_CLASS(klass) OBJECT_CLASS_CHECK(PCIBusClass, (klass), TYPE_PCI_BUS) +#define PCI_BUS_GET_CLASS(obj) OBJECT_GET_CLASS(PCIBusClass, (obj), TYPE_PCI_BUS) #define TYPE_PCIE_BUS "PCIE" bool pci_bus_is_express(PCIBus *bus); diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index fabaeee..b5ba9c4 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -8,6 +8,14 @@ * use accessor functions in pci.h, pci_bridge.h */ +typedef struct PCIBusClass { + /*< private >*/ + BusClass parent_class; + /*< public >*/ + + bool (*is_root)(PCIBus *bus); +} PCIBusClass; + struct PCIBus { BusState qbus; PCIIOMMUFunc iommu_fn; -- cgit v1.1 From 602141d9974d726063907851528c89d617730156 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:22:58 +0300 Subject: hw/pci: made pci_bus_num a PCIBusClass method Refactoring it as a method of PCIBusClass will allow different implementations for subclasses. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/pci/pci.c | 13 ++++++++++--- include/hw/pci/pci_bus.h | 1 + 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 132d19e..2f24f74 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -93,6 +93,14 @@ static bool pcibus_is_root(PCIBus *bus) return !bus->parent_dev; } +static int pcibus_num(PCIBus *bus) +{ + if (pcibus_is_root(bus)) { + return 0; /* pci host bridge */ + } + return bus->parent_dev->config[PCI_SECONDARY_BUS]; +} + static void pci_bus_class_init(ObjectClass *klass, void *data) { BusClass *k = BUS_CLASS(klass); @@ -106,6 +114,7 @@ static void pci_bus_class_init(ObjectClass *klass, void *data) k->reset = pcibus_reset; pbc->is_root = pcibus_is_root; + pbc->bus_num = pcibus_num; } static const TypeInfo pci_bus_info = { @@ -390,9 +399,7 @@ PCIBus *pci_register_bus(DeviceState *parent, const char *name, int pci_bus_num(PCIBus *s) { - if (pci_bus_is_root(s)) - return 0; /* pci host bridge */ - return s->parent_dev->config[PCI_SECONDARY_BUS]; + return PCI_BUS_GET_CLASS(s)->bus_num(s); } static int get_pci_config_device(QEMUFile *f, void *pv, size_t size) diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index b5ba9c4..7b9939e 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -14,6 +14,7 @@ typedef struct PCIBusClass { /*< public >*/ bool (*is_root)(PCIBus *bus); + int (*bus_num)(PCIBus *bus); } PCIBusClass; struct PCIBus { -- cgit v1.1 From ca6c18556c5e9c4aac12489b960c3e4601e183bf Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:22:59 +0300 Subject: hw/i386: query only for q35/pc when looking for pci host bridge Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE anymore. On i386 arch we only have two pci hosts, so we can look only for them. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/i386/acpi-build.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 2c7399b..50b93bd 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -240,13 +240,32 @@ static void acpi_get_misc_info(AcpiMiscInfo *info) info->applesmc_io_base = applesmc_port(); } +/* + * Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE. + * On i386 arch we only have two pci hosts, so we can look only for them. + */ +static Object *acpi_get_i386_pci_host(void) +{ + PCIHostState *host; + + host = OBJECT_CHECK(PCIHostState, + object_resolve_path("/machine/i440fx", NULL), + TYPE_PCI_HOST_BRIDGE); + if (!host) { + host = OBJECT_CHECK(PCIHostState, + object_resolve_path("/machine/q35", NULL), + TYPE_PCI_HOST_BRIDGE); + } + + return OBJECT(host); +} + static void acpi_get_pci_info(PcPciInfo *info) { Object *pci_host; - bool ambiguous; - pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous); - g_assert(!ambiguous); + + pci_host = acpi_get_i386_pci_host(); g_assert(pci_host); info->w32.begin = object_property_get_int(pci_host, @@ -957,10 +976,9 @@ build_ssdt(GArray *table_data, GArray *linker, { Object *pci_host; PCIBus *bus = NULL; - bool ambiguous; - pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous); - if (!ambiguous && pci_host) { + pci_host = acpi_get_i386_pci_host(); + if (pci_host) { bus = PCI_HOST_BRIDGE(pci_host)->bus; } @@ -1272,10 +1290,8 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg) { Object *pci_host; QObject *o; - bool ambiguous; - pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous); - g_assert(!ambiguous); + pci_host = acpi_get_i386_pci_host(); g_assert(pci_host); o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL); -- cgit v1.1 From 09e5b81922179b6c52b42fd27587e64b474036c7 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:00 +0300 Subject: hw/pci: extend PCI config access to support devices behind PXB PXB buses are assumed to be children of bus 0. Look for them while scanning the buses. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/pci/pci.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 2f24f74..3361d85 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1699,10 +1699,28 @@ static bool pci_secondary_bus_in_range(PCIDevice *dev, int bus_num) { return !(pci_get_word(dev->config + PCI_BRIDGE_CONTROL) & PCI_BRIDGE_CTL_BUS_RESET) /* Don't walk the bus if it's reset. */ && - dev->config[PCI_SECONDARY_BUS] < bus_num && + dev->config[PCI_SECONDARY_BUS] <= bus_num && bus_num <= dev->config[PCI_SUBORDINATE_BUS]; } +/* Whether a given bus number is in a range of a root bus */ +static bool pci_root_bus_in_range(PCIBus *bus, int bus_num) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) { + PCIDevice *dev = bus->devices[i]; + + if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) { + if (pci_secondary_bus_in_range(dev, bus_num)) { + return true; + } + } + } + + return false; +} + static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) { PCIBus *sec; @@ -1724,12 +1742,18 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num) /* try child bus */ for (; bus; bus = sec) { QLIST_FOREACH(sec, &bus->child, sibling) { - assert(!pci_bus_is_root(sec)); - if (sec->parent_dev->config[PCI_SECONDARY_BUS] == bus_num) { + if (pci_bus_num(sec) == bus_num) { return sec; } - if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) { - break; + /* PXB buses assumed to be children of bus 0 */ + if (pci_bus_is_root(sec)) { + if (pci_root_bus_in_range(sec, bus_num)) { + break; + } + } else { + if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) { + break; + } } } } -- cgit v1.1 From a4894206e3672f8a5e5443d72b705495e022b638 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:01 +0300 Subject: hw/acpi: add support for i440fx 'snooping' root busses If the machine has extra root busses that are snooping to the i440fx host bridge, we need to add them to acpi in order to be properly detected by guests. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/i386/acpi-build.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 50b93bd..a7e248d 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -626,6 +626,7 @@ build_ssdt(GArray *table_data, GArray *linker, uint32_t nr_mem = machine->ram_slots; unsigned acpi_cpus = guest_info->apic_id_limit; Aml *ssdt, *sb_scope, *scope, *pkg, *dev, *method, *crs, *field, *ifctx; + PCIBus *bus = NULL; int i; ssdt = init_aml_allocator(); @@ -637,6 +638,28 @@ build_ssdt(GArray *table_data, GArray *linker, /* Reserve space for header */ acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader)); + /* Extra PCI root buses are implemented only for i440fx */ + bus = find_i440fx(); + if (bus) { + QLIST_FOREACH(bus, &bus->child, sibling) { + uint8_t bus_num = pci_bus_num(bus); + + /* look only for expander root buses */ + if (!pci_bus_is_root(bus)) { + continue; + } + + scope = aml_scope("\\_SB"); + dev = aml_device("PC%.02X", bus_num); + aml_append(dev, + aml_name_decl("_UID", aml_string("PC%.02X", bus_num))); + aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A03"))); + aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); + aml_append(scope, dev); + aml_append(ssdt, scope); + } + } + scope = aml_scope("\\_SB.PCI0"); /* build PCI0._CRS */ crs = aml_resource_template(); -- cgit v1.1 From 0d8935e3370e07f57651e43d2de9011d75c2a066 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:02 +0300 Subject: hw/apci: add _PRT method for extra PCI root busses Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/i386/acpi-build.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a7e248d..451566f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -617,6 +617,86 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus, aml_append(parent_scope, method); } +/* + * initialize_route - Initialize the interrupt routing rule + * through a specific LINK: + * if (lnk_idx == idx) + * route using link 'link_name' + */ +static Aml *initialize_route(Aml *route, const char *link_name, + Aml *lnk_idx, int idx) +{ + Aml *if_ctx = aml_if(aml_equal(lnk_idx, aml_int(idx))); + Aml *pkg = aml_package(4); + + aml_append(pkg, aml_int(0)); + aml_append(pkg, aml_int(0)); + aml_append(pkg, aml_name("%s", link_name)); + aml_append(pkg, aml_int(0)); + aml_append(if_ctx, aml_store(pkg, route)); + + return if_ctx; +} + +/* + * build_prt - Define interrupt rounting rules + * + * Returns an array of 128 routes, one for each device, + * based on device location. + * The main goal is to equaly distribute the interrupts + * over the 4 existing ACPI links (works only for i440fx). + * The hash function is (slot + pin) & 3 -> "LNK[D|A|B|C]". + * + */ +static Aml *build_prt(void) +{ + Aml *method, *while_ctx, *pin, *res; + + method = aml_method("_PRT", 0); + res = aml_local(0); + pin = aml_local(1); + aml_append(method, aml_store(aml_package(128), res)); + aml_append(method, aml_store(aml_int(0), pin)); + + /* while (pin < 128) */ + while_ctx = aml_while(aml_lless(pin, aml_int(128))); + { + Aml *slot = aml_local(2); + Aml *lnk_idx = aml_local(3); + Aml *route = aml_local(4); + + /* slot = pin >> 2 */ + aml_append(while_ctx, + aml_store(aml_shiftright(pin, aml_int(2)), slot)); + /* lnk_idx = (slot + pin) & 3 */ + aml_append(while_ctx, + aml_store(aml_and(aml_add(pin, slot), aml_int(3)), lnk_idx)); + + /* route[2] = "LNK[D|A|B|C]", selection based on pin % 3 */ + aml_append(while_ctx, initialize_route(route, "LNKD", lnk_idx, 0)); + aml_append(while_ctx, initialize_route(route, "LNKA", lnk_idx, 1)); + aml_append(while_ctx, initialize_route(route, "LNKB", lnk_idx, 2)); + aml_append(while_ctx, initialize_route(route, "LNKC", lnk_idx, 3)); + + /* route[0] = 0x[slot]FFFF */ + aml_append(while_ctx, + aml_store(aml_or(aml_shiftleft(slot, aml_int(16)), aml_int(0xFFFF)), + aml_index(route, aml_int(0)))); + /* route[1] = pin & 3 */ + aml_append(while_ctx, + aml_store(aml_and(pin, aml_int(3)), aml_index(route, aml_int(1)))); + /* res[pin] = route */ + aml_append(while_ctx, aml_store(route, aml_index(res, pin))); + /* pin++ */ + aml_append(while_ctx, aml_increment(pin)); + } + aml_append(method, while_ctx); + /* return res*/ + aml_append(method, aml_return(res)); + + return method; +} + static void build_ssdt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc, @@ -655,6 +735,7 @@ build_ssdt(GArray *table_data, GArray *linker, aml_name_decl("_UID", aml_string("PC%.02X", bus_num))); aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A03"))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); + aml_append(dev, build_prt()); aml_append(scope, dev); aml_append(ssdt, scope); } -- cgit v1.1 From a43c6e276231e8040203940cb07be00387686e87 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:03 +0300 Subject: hw/acpi: add _CRS method for extra root busses Save the IO/mem/bus numbers ranges assigned to the extra root busses to be removed from the root bus 0 range. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/i386/acpi-build.c | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 451566f..8b1e6b1 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -697,6 +697,137 @@ static Aml *build_prt(void) return method; } +typedef struct CrsRangeEntry { + uint64_t base; + uint64_t limit; +} CrsRangeEntry; + +static void crs_range_insert(GPtrArray *ranges, uint64_t base, uint64_t limit) +{ + CrsRangeEntry *entry; + + entry = g_malloc(sizeof(*entry)); + entry->base = base; + entry->limit = limit; + + g_ptr_array_add(ranges, entry); +} + +static void crs_range_free(gpointer data) +{ + CrsRangeEntry *entry = (CrsRangeEntry *)data; + g_free(entry); +} + +static Aml *build_crs(PCIHostState *host, + GPtrArray *io_ranges, GPtrArray *mem_ranges) +{ + Aml *crs = aml_resource_template(); + uint8_t max_bus = pci_bus_num(host->bus); + uint8_t type; + int devfn; + + for (devfn = 0; devfn < ARRAY_SIZE(host->bus->devices); devfn++) { + int i; + uint64_t range_base, range_limit; + PCIDevice *dev = host->bus->devices[devfn]; + + if (!dev) { + continue; + } + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + PCIIORegion *r = &dev->io_regions[i]; + + range_base = r->addr; + range_limit = r->addr + r->size - 1; + + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { + aml_append(crs, + aml_word_io(aml_min_fixed, aml_max_fixed, + aml_pos_decode, aml_entire_range, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(io_ranges, range_base, range_limit); + } else { /* "memory" */ + aml_append(crs, + aml_dword_memory(aml_pos_decode, aml_min_fixed, + aml_max_fixed, aml_non_cacheable, + aml_ReadWrite, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } + } + + type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION; + if (type == PCI_HEADER_TYPE_BRIDGE) { + uint8_t subordinate = dev->config[PCI_SUBORDINATE_BUS]; + if (subordinate > max_bus) { + max_bus = subordinate; + } + + range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); + range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); + aml_append(crs, + aml_word_io(aml_min_fixed, aml_max_fixed, + aml_pos_decode, aml_entire_range, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(io_ranges, range_base, range_limit); + + range_base = + pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + range_limit = + pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); + aml_append(crs, + aml_dword_memory(aml_pos_decode, aml_min_fixed, + aml_max_fixed, aml_non_cacheable, + aml_ReadWrite, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + + range_base = + pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + range_limit = + pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); + aml_append(crs, + aml_dword_memory(aml_pos_decode, aml_min_fixed, + aml_max_fixed, aml_non_cacheable, + aml_ReadWrite, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } + } + + aml_append(crs, + aml_word_bus_number(aml_min_fixed, aml_max_fixed, aml_pos_decode, + 0, + pci_bus_num(host->bus), + max_bus, + 0, + max_bus - pci_bus_num(host->bus) + 1)); + + return crs; +} + static void build_ssdt(GArray *table_data, GArray *linker, AcpiCpuInfo *cpu, AcpiPmInfo *pm, AcpiMiscInfo *misc, @@ -707,6 +838,8 @@ build_ssdt(GArray *table_data, GArray *linker, unsigned acpi_cpus = guest_info->apic_id_limit; Aml *ssdt, *sb_scope, *scope, *pkg, *dev, *method, *crs, *field, *ifctx; PCIBus *bus = NULL; + GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free); + GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); int i; ssdt = init_aml_allocator(); @@ -736,9 +869,15 @@ build_ssdt(GArray *table_data, GArray *linker, aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A03"))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); aml_append(dev, build_prt()); + crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), + io_ranges, mem_ranges); + aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); aml_append(ssdt, scope); } + + g_ptr_array_free(io_ranges, true); + g_ptr_array_free(mem_ranges, true); } scope = aml_scope("\\_SB.PCI0"); -- cgit v1.1 From dcdca29655f774568f30a82b7fe0190b4bd38802 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:04 +0300 Subject: hw/acpi: remove from root bus 0 the crs resources used by other buses. If multiple root buses are used, root bus 0 cannot use all the pci holes ranges. Remove the IO/mem ranges used by the other primary buses. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/i386/acpi-build.c | 118 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 91 insertions(+), 27 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 8b1e6b1..1290983 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -719,6 +719,50 @@ static void crs_range_free(gpointer data) g_free(entry); } +static gint crs_range_compare(gconstpointer a, gconstpointer b) +{ + CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; + CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; + + return (int64_t)entry_a->base - (int64_t)entry_b->base; +} + +/* + * crs_replace_with_free_ranges - given the 'used' ranges within [start - end] + * interval, computes the 'free' ranges from the same interval. + * Example: If the input array is { [a1 - a2],[b1 - b2] }, the function + * will return { [base - a1], [a2 - b1], [b2 - limit] }. + */ +static void crs_replace_with_free_ranges(GPtrArray *ranges, + uint64_t start, uint64_t end) +{ + GPtrArray *free_ranges = g_ptr_array_new_with_free_func(crs_range_free); + uint64_t free_base = start; + int i; + + g_ptr_array_sort(ranges, crs_range_compare); + for (i = 0; i < ranges->len; i++) { + CrsRangeEntry *used = g_ptr_array_index(ranges, i); + + if (free_base < used->base) { + crs_range_insert(free_ranges, free_base, used->base - 1); + } + + free_base = used->limit + 1; + } + + if (free_base < end) { + crs_range_insert(free_ranges, free_base, end); + } + + g_ptr_array_set_size(ranges, 0); + for (i = 0; i < free_ranges->len; i++) { + g_ptr_array_add(ranges, g_ptr_array_index(free_ranges, i)); + } + + g_ptr_array_free(free_ranges, false); +} + static Aml *build_crs(PCIHostState *host, GPtrArray *io_ranges, GPtrArray *mem_ranges) { @@ -744,8 +788,8 @@ static Aml *build_crs(PCIHostState *host, if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { aml_append(crs, - aml_word_io(aml_min_fixed, aml_max_fixed, - aml_pos_decode, aml_entire_range, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, 0, range_base, range_limit, @@ -754,9 +798,9 @@ static Aml *build_crs(PCIHostState *host, crs_range_insert(io_ranges, range_base, range_limit); } else { /* "memory" */ aml_append(crs, - aml_dword_memory(aml_pos_decode, aml_min_fixed, - aml_max_fixed, aml_non_cacheable, - aml_ReadWrite, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, 0, range_base, range_limit, @@ -776,8 +820,8 @@ static Aml *build_crs(PCIHostState *host, range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); aml_append(crs, - aml_word_io(aml_min_fixed, aml_max_fixed, - aml_pos_decode, aml_entire_range, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, 0, range_base, range_limit, @@ -790,9 +834,9 @@ static Aml *build_crs(PCIHostState *host, range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); aml_append(crs, - aml_dword_memory(aml_pos_decode, aml_min_fixed, - aml_max_fixed, aml_non_cacheable, - aml_ReadWrite, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, 0, range_base, range_limit, @@ -805,9 +849,9 @@ static Aml *build_crs(PCIHostState *host, range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); aml_append(crs, - aml_dword_memory(aml_pos_decode, aml_min_fixed, - aml_max_fixed, aml_non_cacheable, - aml_ReadWrite, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, 0, range_base, range_limit, @@ -818,7 +862,7 @@ static Aml *build_crs(PCIHostState *host, } aml_append(crs, - aml_word_bus_number(aml_min_fixed, aml_max_fixed, aml_pos_decode, + aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, 0, pci_bus_num(host->bus), max_bus, @@ -840,6 +884,8 @@ build_ssdt(GArray *table_data, GArray *linker, PCIBus *bus = NULL; GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free); GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free); + CrsRangeEntry *entry; + int root_bus_limit = 0xFF; int i; ssdt = init_aml_allocator(); @@ -862,6 +908,10 @@ build_ssdt(GArray *table_data, GArray *linker, continue; } + if (bus_num < root_bus_limit) { + root_bus_limit = bus_num - 1; + } + scope = aml_scope("\\_SB"); dev = aml_device("PC%.02X", bus_num); aml_append(dev, @@ -875,9 +925,6 @@ build_ssdt(GArray *table_data, GArray *linker, aml_append(scope, dev); aml_append(ssdt, scope); } - - g_ptr_array_free(io_ranges, true); - g_ptr_array_free(mem_ranges, true); } scope = aml_scope("\\_SB.PCI0"); @@ -885,26 +932,40 @@ build_ssdt(GArray *table_data, GArray *linker, crs = aml_resource_template(); aml_append(crs, aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, - 0x0000, 0x0000, 0x00FF, 0x0000, 0x0100)); + 0x0000, 0x0, root_bus_limit, + 0x0000, root_bus_limit + 1)); aml_append(crs, aml_io(AML_DECODE16, 0x0CF8, 0x0CF8, 0x01, 0x08)); aml_append(crs, aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE, AML_ENTIRE_RANGE, 0x0000, 0x0000, 0x0CF7, 0x0000, 0x0CF8)); - aml_append(crs, - aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, - AML_POS_DECODE, AML_ENTIRE_RANGE, - 0x0000, 0x0D00, 0xFFFF, 0x0000, 0xF300)); + + crs_replace_with_free_ranges(io_ranges, 0x0D00, 0xFFFF); + for (i = 0; i < io_ranges->len; i++) { + entry = g_ptr_array_index(io_ranges, i); + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0x0000, entry->base, entry->limit, + 0x0000, entry->limit - entry->base + 1)); + } + aml_append(crs, aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, AML_CACHEABLE, AML_READ_WRITE, 0, 0x000A0000, 0x000BFFFF, 0, 0x00020000)); - aml_append(crs, - aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, - AML_NON_CACHEABLE, AML_READ_WRITE, - 0, pci->w32.begin, pci->w32.end - 1, 0, - pci->w32.end - pci->w32.begin)); + + crs_replace_with_free_ranges(mem_ranges, pci->w32.begin, pci->w32.end - 1); + for (i = 0; i < mem_ranges->len; i++) { + entry = g_ptr_array_index(mem_ranges, i); + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, + AML_NON_CACHEABLE, AML_READ_WRITE, + 0, entry->base, entry->limit, + 0, entry->limit - entry->base + 1)); + } + if (pci->w64.begin) { aml_append(crs, aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED, @@ -927,6 +988,9 @@ build_ssdt(GArray *table_data, GArray *linker, aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); + g_ptr_array_free(io_ranges, true); + g_ptr_array_free(mem_ranges, true); + /* reserve PCIHP resources */ if (pm->pcihp_io_len) { dev = aml_device("PHPR"); -- cgit v1.1 From cb2ed8b3c66284f226c523231e2c09e60bbb34bb Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:05 +0300 Subject: hw/pci: removed 'rootbus nr is 0' assumption from qmp_pci_query Use the newer pci_bus_num to correctly get the root bus number. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/pci/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 3361d85..a956640 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1594,7 +1594,8 @@ PciInfoList *qmp_query_pci(Error **errp) QLIST_FOREACH(host_bridge, &pci_host_bridges, next) { info = g_malloc0(sizeof(*info)); - info->value = qmp_query_pci_bus(host_bridge->bus, 0); + info->value = qmp_query_pci_bus(host_bridge->bus, + pci_bus_num(host_bridge->bus)); /* XXX: waiting for the qapi to support GSList */ if (!cur_item) { -- cgit v1.1 From 40d14bef8012087ade60f254487d31db822a1a44 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:06 +0300 Subject: hw/pci: introduce PCI Expander Bridge (PXB) PXB is a "light-weight" host bridge whose purpose is to enable the main host bridge to support multiple PCI root buses for pc machines. As oposed to PCI-2-PCI bridge's secondary bus, PXB's bus is a primary bus and can be associated with a NUMA node (different from the main host bridge) allowing the guest OS to recognize the proximity of a pass-through device to other resources as RAM and CPUs. The PXB is composed from: - A primary PCI bus (can be associated with a NUMA node) Acts like a normal pci bus and from the functionality point of view is an "expansion" of the bus behind the main host bridge. - A pci-2-pci bridge behind the primary PCI bus where the actual devices will be attached. - A host-bridge PCI device Situated on the bus behind the main host bridge, allows the BIOS to configure the bus number and IO/mem resources. It does not have its own config/data register for configuration cycles, this being handled by the main host bridge. - A host-bridge sysbus to comply with QEMU current design. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/pci-bridge/Makefile.objs | 1 + hw/pci-bridge/pci_expander_bridge.c | 196 ++++++++++++++++++++++++++++++++++++ include/hw/pci/pci.h | 1 + 3 files changed, 198 insertions(+) create mode 100644 hw/pci-bridge/pci_expander_bridge.c diff --git a/hw/pci-bridge/Makefile.objs b/hw/pci-bridge/Makefile.objs index 96c596e..f2adfe3 100644 --- a/hw/pci-bridge/Makefile.objs +++ b/hw/pci-bridge/Makefile.objs @@ -1,4 +1,5 @@ common-obj-y += pci_bridge_dev.o +common-obj-y += pci_expander_bridge.o common-obj-$(CONFIG_XIO3130) += xio3130_upstream.o xio3130_downstream.o common-obj-$(CONFIG_IOH3420) += ioh3420.o common-obj-$(CONFIG_I82801B11) += i82801b11.o diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c new file mode 100644 index 0000000..88e85c1 --- /dev/null +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -0,0 +1,196 @@ +/* + * PCI Expander Bridge Device Emulation + * + * Copyright (C) 2015 Red Hat Inc + * + * Authors: + * Marcel Apfelbaum + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_host.h" +#include "hw/pci/pci_bus.h" +#include "hw/i386/pc.h" +#include "qemu/range.h" +#include "qemu/error-report.h" + +#define TYPE_PXB_BUS "pxb-bus" +#define PXB_BUS(obj) OBJECT_CHECK(PXBBus, (obj), TYPE_PXB_BUS) + +typedef struct PXBBus { + /*< private >*/ + PCIBus parent_obj; + /*< public >*/ + + char bus_path[8]; +} PXBBus; + +#define TYPE_PXB_DEVICE "pxb" +#define PXB_DEV(obj) OBJECT_CHECK(PXBDev, (obj), TYPE_PXB_DEVICE) + +typedef struct PXBDev { + /*< private >*/ + PCIDevice parent_obj; + /*< public >*/ + + uint8_t bus_nr; +} PXBDev; + +#define TYPE_PXB_HOST "pxb-host" + +static int pxb_bus_num(PCIBus *bus) +{ + PXBDev *pxb = PXB_DEV(bus->parent_dev); + + return pxb->bus_nr; +} + +static bool pxb_is_root(PCIBus *bus) +{ + return true; /* by definition */ +} + +static void pxb_bus_class_init(ObjectClass *class, void *data) +{ + PCIBusClass *pbc = PCI_BUS_CLASS(class); + + pbc->bus_num = pxb_bus_num; + pbc->is_root = pxb_is_root; +} + +static const TypeInfo pxb_bus_info = { + .name = TYPE_PXB_BUS, + .parent = TYPE_PCI_BUS, + .instance_size = sizeof(PXBBus), + .class_init = pxb_bus_class_init, +}; + +static const char *pxb_host_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +{ + PXBBus *bus = PXB_BUS(rootbus); + + snprintf(bus->bus_path, 8, "0000:%02x", pxb_bus_num(rootbus)); + return bus->bus_path; +} + +static void pxb_host_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class); + + dc->fw_name = "pci"; + hc->root_bus_path = pxb_host_root_bus_path; +} + +static const TypeInfo pxb_host_info = { + .name = TYPE_PXB_HOST, + .parent = TYPE_PCI_HOST_BRIDGE, + .class_init = pxb_host_class_init, +}; + +/* + * Registers the PXB bus as a child of the i440fx root bus. + * + * Returns 0 on successs, -1 if i440fx host was not + * found or the bus number is already in use. + */ +static int pxb_register_bus(PCIDevice *dev, PCIBus *pxb_bus) +{ + PCIBus *bus = dev->bus; + int pxb_bus_num = pci_bus_num(pxb_bus); + + if (bus->parent_dev) { + error_report("PXB devices can be attached only to root bus."); + return -1; + } + + QLIST_FOREACH(bus, &bus->child, sibling) { + if (pci_bus_num(bus) == pxb_bus_num) { + error_report("Bus %d is already in use.", pxb_bus_num); + return -1; + } + } + QLIST_INSERT_HEAD(&dev->bus->child, pxb_bus, sibling); + + return 0; +} + +static int pxb_dev_initfn(PCIDevice *dev) +{ + PXBDev *pxb = PXB_DEV(dev); + DeviceState *ds, *bds; + PCIBus *bus; + const char *dev_name = NULL; + + if (dev->qdev.id && *dev->qdev.id) { + dev_name = dev->qdev.id; + } + + ds = qdev_create(NULL, TYPE_PXB_HOST); + bus = pci_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS); + + bus->parent_dev = dev; + bus->address_space_mem = dev->bus->address_space_mem; + bus->address_space_io = dev->bus->address_space_io; + bus->map_irq = pci_swizzle_map_irq_fn; + + bds = qdev_create(BUS(bus), "pci-bridge"); + bds->id = dev_name; + qdev_prop_set_uint8(bds, "chassis_nr", pxb->bus_nr); + + PCI_HOST_BRIDGE(ds)->bus = bus; + + if (pxb_register_bus(dev, bus)) { + return -EINVAL; + } + + qdev_init_nofail(ds); + qdev_init_nofail(bds); + + pci_word_test_and_set_mask(dev->config + PCI_STATUS, + PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK); + pci_config_set_class(dev->config, PCI_CLASS_BRIDGE_HOST); + + return 0; +} + +static Property pxb_dev_properties[] = { + /* Note: 0 is not a legal a PXB bus number. */ + DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pxb_dev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = pxb_dev_initfn; + k->vendor_id = PCI_VENDOR_ID_REDHAT; + k->device_id = PCI_DEVICE_ID_REDHAT_PXB; + k->class_id = PCI_CLASS_BRIDGE_HOST; + + dc->desc = "PCI Expander Bridge"; + dc->props = pxb_dev_properties; +} + +static const TypeInfo pxb_dev_info = { + .name = TYPE_PXB_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PXBDev), + .class_init = pxb_dev_class_init, +}; + +static void pxb_register_types(void) +{ + type_register_static(&pxb_bus_info); + type_register_static(&pxb_host_info); + type_register_static(&pxb_dev_info); +} + +type_init(pxb_register_types) diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index df05c96..7940700 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -91,6 +91,7 @@ #define PCI_DEVICE_ID_REDHAT_ROCKER 0x0006 #define PCI_DEVICE_ID_REDHAT_SDHCI 0x0007 #define PCI_DEVICE_ID_REDHAT_PCIE_HOST 0x0008 +#define PCI_DEVICE_ID_REDHAT_PXB 0x0009 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100 #define FMT_PCIBUS PRIx64 -- cgit v1.1 From 2118196bb3795a43bf708c37bdcf4b3c33778ccb Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:07 +0300 Subject: hw/pci: inform bios if the system has extra pci root buses The bios looks for 'etc/extra-pci-roots' to decide if is going to scan further buses after bus 0 tree. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/i386/pc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 1eb1db0..a93972f 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -30,6 +30,7 @@ #include "hw/block/fdc.h" #include "hw/ide.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" #include "monitor/monitor.h" #include "hw/nvram/fw_cfg.h" #include "hw/timer/hpet.h" @@ -1119,6 +1120,25 @@ void pc_guest_info_machine_done(Notifier *notifier, void *data) PcGuestInfoState *guest_info_state = container_of(notifier, PcGuestInfoState, machine_done); + PCIBus *bus = find_i440fx(); + + if (bus) { + int extra_hosts = 0; + + QLIST_FOREACH(bus, &bus->child, sibling) { + /* look for expander root buses */ + if (pci_bus_is_root(bus)) { + extra_hosts++; + } + } + if (extra_hosts && guest_info_state->info.fw_cfg) { + uint64_t *val = g_malloc(sizeof(*val)); + *val = cpu_to_le64(extra_hosts); + fw_cfg_add_file(guest_info_state->info.fw_cfg, + "etc/extra-pci-roots", val, sizeof(*val)); + } + } + acpi_setup(&guest_info_state->info); } -- cgit v1.1 From 0639b00d055b313930c23c4d6c9ebfb4af61c00c Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:08 +0300 Subject: hw/pxb: add map_irq func The bios does not index the pxb slot number when it computes the IRQ because it resides on bus 0 and not on the current bus. However Qemu routes the irq through bus 0 and adds the pxb slot to the IRQ computation of the PXB device. Synchronize between bios and Qemu by canceling pxb's effect. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/pci-bridge/pci_expander_bridge.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index 88e85c1..8660a00 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -120,6 +120,24 @@ static int pxb_register_bus(PCIDevice *dev, PCIBus *pxb_bus) return 0; } +static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin) +{ + PCIDevice *pxb = pci_dev->bus->parent_dev; + + /* + * The bios does not index the pxb slot number when + * it computes the IRQ because it resides on bus 0 + * and not on the current bus. + * However QEMU routes the irq through bus 0 and adds + * the pxb slot to the IRQ computation of the PXB + * device. + * + * Synchronize between bios and QEMU by canceling + * pxb's effect. + */ + return pin - PCI_SLOT(pxb->devfn); +} + static int pxb_dev_initfn(PCIDevice *dev) { PXBDev *pxb = PXB_DEV(dev); @@ -137,7 +155,7 @@ static int pxb_dev_initfn(PCIDevice *dev) bus->parent_dev = dev; bus->address_space_mem = dev->bus->address_space_mem; bus->address_space_io = dev->bus->address_space_io; - bus->map_irq = pci_swizzle_map_irq_fn; + bus->map_irq = pxb_map_irq_fn; bds = qdev_create(BUS(bus), "pci-bridge"); bds->id = dev_name; -- cgit v1.1 From 6a3042b23bbb1fa92c00ea9267c830e7f2e99313 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:09 +0300 Subject: hw/pci: add support for NUMA nodes PCI root buses can be attached to a specific NUMA node. PCI buses are not attached by default to a NUMA node. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/pci/pci.c | 11 +++++++++++ include/hw/pci/pci.h | 1 + include/hw/pci/pci_bus.h | 1 + include/sysemu/sysemu.h | 1 + 4 files changed, 14 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index a956640..4989408 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -101,6 +101,11 @@ static int pcibus_num(PCIBus *bus) return bus->parent_dev->config[PCI_SECONDARY_BUS]; } +static uint16_t pcibus_numa_node(PCIBus *bus) +{ + return NUMA_NODE_UNASSIGNED; +} + static void pci_bus_class_init(ObjectClass *klass, void *data) { BusClass *k = BUS_CLASS(klass); @@ -115,6 +120,7 @@ static void pci_bus_class_init(ObjectClass *klass, void *data) pbc->is_root = pcibus_is_root; pbc->bus_num = pcibus_num; + pbc->numa_node = pcibus_numa_node; } static const TypeInfo pci_bus_info = { @@ -402,6 +408,11 @@ int pci_bus_num(PCIBus *s) return PCI_BUS_GET_CLASS(s)->bus_num(s); } +int pci_bus_numa_node(PCIBus *bus) +{ + return PCI_BUS_GET_CLASS(bus)->numa_node(bus); +} + static int get_pci_config_device(QEMUFile *f, void *pv, size_t size) { PCIDevice *s = container_of(pv, PCIDevice, config); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 7940700..c2a427f 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -382,6 +382,7 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, PCIDevice *pci_vga_init(PCIBus *bus); int pci_bus_num(PCIBus *s); +int pci_bus_numa_node(PCIBus *bus); void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque), void *opaque); diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h index 7b9939e..403fec6 100644 --- a/include/hw/pci/pci_bus.h +++ b/include/hw/pci/pci_bus.h @@ -15,6 +15,7 @@ typedef struct PCIBusClass { bool (*is_root)(PCIBus *bus); int (*bus_num)(PCIBus *bus); + uint16_t (*numa_node)(PCIBus *bus); } PCIBusClass; struct PCIBus { diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 8a52934..4fcc20e 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -137,6 +137,7 @@ extern const char *mem_path; extern int mem_prealloc; #define MAX_NODES 128 +#define NUMA_NODE_UNASSIGNED MAX_NODES /* The following shall be true for all CPUs: * cpu->cpu_index < max_cpus <= MAX_CPUMASK_BITS -- cgit v1.1 From 0e79e51a7dcbd4fde5738d713b60f0fb0321f1af Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:10 +0300 Subject: hw/pxb: add numa_node parameter The pxb can be attach to and existing numa node by specifying numa_node option that equals the desired numa nodeid. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/i386/acpi-build.c | 6 ++++++ hw/pci-bridge/pci_expander_bridge.c | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1290983..45c36a8 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -902,6 +902,7 @@ build_ssdt(GArray *table_data, GArray *linker, if (bus) { QLIST_FOREACH(bus, &bus->child, sibling) { uint8_t bus_num = pci_bus_num(bus); + uint8_t numa_node = pci_bus_numa_node(bus); /* look only for expander root buses */ if (!pci_bus_is_root(bus)) { @@ -918,6 +919,11 @@ build_ssdt(GArray *table_data, GArray *linker, aml_name_decl("_UID", aml_string("PC%.02X", bus_num))); aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A03"))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); + + if (numa_node != NUMA_NODE_UNASSIGNED) { + aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node))); + } + aml_append(dev, build_prt()); crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent), io_ranges, mem_ranges); diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index 8660a00..ec2bb45 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -17,6 +17,7 @@ #include "hw/i386/pc.h" #include "qemu/range.h" #include "qemu/error-report.h" +#include "sysemu/numa.h" #define TYPE_PXB_BUS "pxb-bus" #define PXB_BUS(obj) OBJECT_CHECK(PXBBus, (obj), TYPE_PXB_BUS) @@ -38,6 +39,7 @@ typedef struct PXBDev { /*< public >*/ uint8_t bus_nr; + uint16_t numa_node; } PXBDev; #define TYPE_PXB_HOST "pxb-host" @@ -54,12 +56,20 @@ static bool pxb_is_root(PCIBus *bus) return true; /* by definition */ } +static uint16_t pxb_bus_numa_node(PCIBus *bus) +{ + PXBDev *pxb = PXB_DEV(bus->parent_dev); + + return pxb->numa_node; +} + static void pxb_bus_class_init(ObjectClass *class, void *data) { PCIBusClass *pbc = PCI_BUS_CLASS(class); pbc->bus_num = pxb_bus_num; pbc->is_root = pxb_is_root; + pbc->numa_node = pxb_bus_numa_node; } static const TypeInfo pxb_bus_info = { @@ -145,6 +155,12 @@ static int pxb_dev_initfn(PCIDevice *dev) PCIBus *bus; const char *dev_name = NULL; + if (pxb->numa_node != NUMA_NODE_UNASSIGNED && + pxb->numa_node >= nb_numa_nodes) { + error_report("Illegal numa node %d.", pxb->numa_node); + return -EINVAL; + } + if (dev->qdev.id && *dev->qdev.id) { dev_name = dev->qdev.id; } @@ -180,6 +196,7 @@ static int pxb_dev_initfn(PCIDevice *dev) static Property pxb_dev_properties[] = { /* Note: 0 is not a legal a PXB bus number. */ DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0), + DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED), DEFINE_PROP_END_OF_LIST(), }; -- cgit v1.1 From 0f6dd8e1d514b8c24689499ed72ea89fd0d967f3 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:11 +0300 Subject: apci: fix PXB behaviour if used with unsupported BIOS PXB does not work with unsupported bioses, but should not interfere with normal OS operation. We don't ship them anymore, but it's reasonable to keep the work-around until we update the bios in qemu. Fix this by not adding PXB mem/IO chunks to _CRS if they weren't configured by BIOS. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- hw/i386/acpi-build.c | 87 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 29 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 45c36a8..db32fd1 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -786,6 +786,14 @@ static Aml *build_crs(PCIHostState *host, range_base = r->addr; range_limit = r->addr + r->size - 1; + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (!range_base || range_base > range_limit) { + continue; + } + if (r->type & PCI_BASE_ADDRESS_SPACE_IO) { aml_append(crs, aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, @@ -819,45 +827,66 @@ static Aml *build_crs(PCIHostState *host, range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO); range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO); - aml_append(crs, - aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, - AML_POS_DECODE, AML_ENTIRE_RANGE, - 0, - range_base, - range_limit, - 0, - range_limit - range_base + 1)); - crs_range_insert(io_ranges, range_base, range_limit); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base || range_base > range_limit) { + aml_append(crs, + aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED, + AML_POS_DECODE, AML_ENTIRE_RANGE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(io_ranges, range_base, range_limit); + } range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY); - aml_append(crs, - aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, - AML_MAX_FIXED, AML_NON_CACHEABLE, - AML_READ_WRITE, - 0, - range_base, - range_limit, - 0, - range_limit - range_base + 1)); - crs_range_insert(mem_ranges, range_base, range_limit); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base || range_base > range_limit) { + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH); - aml_append(crs, - aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, - AML_MAX_FIXED, AML_NON_CACHEABLE, - AML_READ_WRITE, - 0, - range_base, - range_limit, - 0, - range_limit - range_base + 1)); - crs_range_insert(mem_ranges, range_base, range_limit); + + /* + * Work-around for old bioses + * that do not support multiple root buses + */ + if (range_base || range_base > range_limit) { + aml_append(crs, + aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, + AML_MAX_FIXED, AML_NON_CACHEABLE, + AML_READ_WRITE, + 0, + range_base, + range_limit, + 0, + range_limit - range_base + 1)); + crs_range_insert(mem_ranges, range_base, range_limit); + } } } -- cgit v1.1 From 814550d73a94dcf9f2c9f8d2ee280226f1145388 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Tue, 2 Jun 2015 14:23:12 +0300 Subject: docs: Add PXB documentation Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Laszlo Ersek --- docs/pci_expander_bridge.txt | 58 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 docs/pci_expander_bridge.txt diff --git a/docs/pci_expander_bridge.txt b/docs/pci_expander_bridge.txt new file mode 100644 index 0000000..d7913fb --- /dev/null +++ b/docs/pci_expander_bridge.txt @@ -0,0 +1,58 @@ +PCI EXPANDER BRIDGE (PXB) +========================= + +Description +=========== +PXB is a "light-weight" host bridge in the same PCI domain +as the main host bridge whose purpose is to enable +the main host bridge to support multiple PCI root buses. +It is implemented only for i440fx and can be placed only +on bus 0 (pci.0). + +As opposed to PCI-2-PCI bridge's secondary bus, PXB's bus +is a primary bus and can be associated with a NUMA node +(different from the main host bridge) allowing the guest OS +to recognize the proximity of a pass-through device to +other resources as RAM and CPUs. + +Usage +===== +A detailed command line would be: + +[qemu-bin + storage options] +-m 2G +-object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 -numa node,nodeid=0,cpus=0,memdev=ram-node0 +-object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 -numa node,nodeid=1,cpus=1,memdev=ram-node1 +-device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd-device e1000,bus=bridge1,addr=0x4,netdev=nd +-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8,bus=pci.0 -device e1000,bus=bridge2,addr=0x3 +-device pxb,id=bridge3,bus=pci.0,bus_nr=40,bus=pci.0 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1 + +Here you have: + - 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in host, but you can and should put it in different host NUMA nodes) + - a pxb host bridge attached to NUMA 1 with an e1000 behind it + - a pxb host bridge attached to NUMA 0 with an e1000 behind it + - a pxb host bridge not attached to any NUMA with a hard drive behind it. + +Limitations +=========== +Please observe that we specified the bus "pci.0" for the second and third pxb. +This is because when no bus is given, another pxb can be selected by QEMU as default bus, +however, PXBs can be placed only under the root bus. + +Implementation +============== +The PXB is composed by: +- HostBridge (TYPE_PXB_HOST) + The host bridge allows to register and query the PXB's rPCI root bus in QEMU. +- PXBDev(TYPE_PXB_DEVICE) + It is a regular PCI Device that resides on the piix host-bridge bus and its bus uses the same PCI domain. + However, the bus behind is exposed through ACPI as a primary PCI bus and starts a new PCI hierarchy. + The interrupts from devices behind the PXB are routed through this device the same as if it were a + PCI-2-PCI bridge. The _PRT follows the i440fx model. +- PCIBridgeDev(TYPE_PCI_BRIDGE_DEV) + Created automatically as part of init sequence. + When adding a device to PXB it is attached to the bridge for two reasons: + - Using the bridge will enable hotplug support + - All the devices behind the bridge will use bridge's IO/MEM windows compacting + the PCI address space. + -- cgit v1.1 From b5d3b039221f056befb3715471fee1f68214815c Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 3 Jun 2015 17:10:43 +0200 Subject: pc-dimm: don't assert if pc-dimm alignment != hotpluggable mem range size Drop superfluous pc-dimm alignment on hot-pluggable mem range size assert, since it causes QEMU crash during hotplug when hotplugging pc-dimm with alignment bigger than an alignment of hot-pluggable mem range size. Instead allow pc_dimm_get_free_addr() find free address and bail out gracefully later in that function during checking if pc-dimm will fit in hot-pluggable mem range. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/pc-dimm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 39f0c97..e70633d 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -211,7 +211,6 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start, uint64_t address_space_end = address_space_start + address_space_size; g_assert(QEMU_ALIGN_UP(address_space_start, align) == address_space_start); - g_assert(QEMU_ALIGN_UP(address_space_size, align) == address_space_size); if (!address_space_size) { error_setg(errp, "memory hotplug is not enabled, " -- cgit v1.1 From 9a10bbb4e83b184faef6fa744396a6775283c0aa Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 29 Apr 2015 15:20:14 +0200 Subject: hw/acpi: acpi_pm1_cnt_init(): take "disable_s3" and "disable_s4" This patch only modifies the function prototype and updates all chipset code that calls acpi_pm1_cnt_init() to pass in their own disable_s3 and disable_s4 settings. vt82c686 is assumed to be fixed "S3 and S4 enabled". RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1204696 Cc: Amit Shah Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: Aurelien Jarno Cc: Leon Alrae Signed-off-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Laszlo Ersek --- hw/acpi/core.c | 3 ++- hw/acpi/ich9.c | 3 ++- hw/acpi/piix4.c | 2 +- hw/isa/vt82c686.c | 2 +- include/hw/acpi/acpi.h | 3 ++- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/hw/acpi/core.c b/hw/acpi/core.c index 8623993..c165096 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -592,7 +592,8 @@ static const MemoryRegionOps acpi_pm_cnt_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, uint8_t s4_val) +void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, + bool disable_s3, bool disable_s4, uint8_t s4_val) { ar->pm1.cnt.s4_val = s4_val; ar->wakeup.notify = acpi_notify_wakeup; diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 84e5bb8..799351e 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -219,7 +219,8 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm, acpi_pm_tmr_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io); acpi_pm1_evt_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io); - acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, pm->s4_val); + acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, pm->disable_s3, pm->disable_s4, + pm->s4_val); acpi_gpe_init(&pm->acpi_regs, ICH9_PMIO_GPE0_LEN); memory_region_init_io(&pm->io_gpe, OBJECT(lpc_pci), &ich9_gpe_ops, pm, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 1b28481..01b304a 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -475,7 +475,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp) acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io); acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io); - acpi_pm1_cnt_init(&s->ar, &s->io, s->s4_val); + acpi_pm1_cnt_init(&s->ar, &s->io, s->disable_s3, s->disable_s4, s->s4_val); acpi_gpe_init(&s->ar, GPE_LEN); s->powerdown_notifier.notify = piix4_pm_powerdown_req; diff --git a/hw/isa/vt82c686.c b/hw/isa/vt82c686.c index b8197b1..b2ba870 100644 --- a/hw/isa/vt82c686.c +++ b/hw/isa/vt82c686.c @@ -356,7 +356,7 @@ static void vt82c686b_pm_realize(PCIDevice *dev, Error **errp) acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io); acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io); - acpi_pm1_cnt_init(&s->ar, &s->io, 2); + acpi_pm1_cnt_init(&s->ar, &s->io, false, false, 2); } I2CBus *vt82c686b_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, diff --git a/include/hw/acpi/acpi.h b/include/hw/acpi/acpi.h index 9390997..b20bd55 100644 --- a/include/hw/acpi/acpi.h +++ b/include/hw/acpi/acpi.h @@ -167,7 +167,8 @@ void acpi_pm1_evt_init(ACPIREGS *ar, acpi_update_sci_fn update_sci, MemoryRegion *parent); /* PM1a_CNT: piix and ich9 don't implement PM1b CNT. */ -void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, uint8_t s4_val); +void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, + bool disable_s3, bool disable_s4, uint8_t s4_val); void acpi_pm1_cnt_update(ACPIREGS *ar, bool sci_enable, bool sci_disable); void acpi_pm1_cnt_reset(ACPIREGS *ar); -- cgit v1.1 From e3845e7c47cc3eaf35305c9c0f9d55ca3840b49b Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 29 Apr 2015 15:20:15 +0200 Subject: hw/acpi: move "etc/system-states" fw_cfg file from PIIX4 to core The acpi_pm1_cnt_init() core function is responsible for setting up the register block that will ultimately react to S3 and S4 requests (see acpi_pm1_cnt_write()). It makes sense to advertise this configuration to the guest firmware via an easy to parse fw_cfg file (ACPI is too complex for firmware to parse), and indeed PIIX4 does that. However, since acpi_pm1_cnt_init() is not specific to PIIX4, neither should be the fw_cfg file. This patch makes "etc/system-states" appear on all chipsets modified in the previous patch, not just PIIX4 (assuming they have fw_cfg at all). RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1204696 Cc: Amit Shah Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: Aurelien Jarno Cc: Leon Alrae Signed-off-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Laszlo Ersek --- hw/acpi/core.c | 12 ++++++++++++ hw/acpi/piix4.c | 8 -------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/hw/acpi/core.c b/hw/acpi/core.c index c165096..0f201d8 100644 --- a/hw/acpi/core.c +++ b/hw/acpi/core.c @@ -22,6 +22,7 @@ #include "hw/hw.h" #include "hw/i386/pc.h" #include "hw/acpi/acpi.h" +#include "hw/nvram/fw_cfg.h" #include "qemu/config-file.h" #include "qapi/opts-visitor.h" #include "qapi/dealloc-visitor.h" @@ -595,12 +596,23 @@ static const MemoryRegionOps acpi_pm_cnt_ops = { void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, bool disable_s3, bool disable_s4, uint8_t s4_val) { + FWCfgState *fw_cfg; + ar->pm1.cnt.s4_val = s4_val; ar->wakeup.notify = acpi_notify_wakeup; qemu_register_wakeup_notifier(&ar->wakeup); memory_region_init_io(&ar->pm1.cnt.io, memory_region_owner(parent), &acpi_pm_cnt_ops, ar, "acpi-cnt", 2); memory_region_add_subregion(parent, 4, &ar->pm1.cnt.io); + + fw_cfg = fw_cfg_find(); + if (fw_cfg) { + uint8_t suspend[6] = {128, 0, 0, 129, 128, 128}; + suspend[3] = 1 | ((!disable_s3) << 7); + suspend[4] = s4_val | ((!disable_s4) << 7); + + fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 6); + } } void acpi_pm1_cnt_reset(ACPIREGS *ar) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 01b304a..13895ad 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -525,14 +525,6 @@ I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qdev_init_nofail(dev); - if (fw_cfg) { - uint8_t suspend[6] = {128, 0, 0, 129, 128, 128}; - suspend[3] = 1 | ((!s->disable_s3) << 7); - suspend[4] = s->s4_val | ((!s->disable_s4) << 7); - - fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 6); - } - return s->smb.smbus; } -- cgit v1.1 From 6e7d82497dc8da7d420c8fa6632d759e08a18bc3 Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 29 Apr 2015 15:20:16 +0200 Subject: hw/acpi: piix4_pm_init(): take fw_cfg object no more This PIIX4 init function has no more reason to receive a pointer to the FwCfg object. Remove the parameter from the prototype, and update callers. As a result, the pc_init1() function no longer needs to save the return value of pc_memory_init() and xen_load_linux(), which makes it more similar to pc_q35_init(). The return type & value of pc_memory_init() and xen_load_linux() are not changed themselves; maybe we'll need their return values sometime later. RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1204696 Cc: Amit Shah Cc: "Michael S. Tsirkin" Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: Aurelien Jarno Cc: Leon Alrae Signed-off-by: Laszlo Ersek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Laszlo Ersek --- hw/acpi/piix4.c | 3 +-- hw/i386/pc_piix.c | 19 +++++++++---------- hw/mips/mips_malta.c | 2 +- include/hw/i386/pc.h | 3 +-- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index 13895ad..b730ca6 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -503,8 +503,7 @@ Object *piix4_pm_find(void) I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, FWCfgState *fw_cfg, - DeviceState **piix4_pm) + int kvm_enabled, DeviceState **piix4_pm) { DeviceState *dev; PIIX4PMState *s; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 6e7fa42..0688f0c 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -99,7 +99,6 @@ static void pc_init1(MachineState *machine) MemoryRegion *pci_memory; MemoryRegion *rom_memory; DeviceState *icc_bridge; - FWCfgState *fw_cfg = NULL; PcGuestInfo *guest_info; ram_addr_t lowmem; @@ -180,16 +179,16 @@ static void pc_init1(MachineState *machine) /* allocate ram and load rom/bios */ if (!xen_enabled()) { - fw_cfg = pc_memory_init(machine, system_memory, - below_4g_mem_size, above_4g_mem_size, - rom_memory, &ram_memory, guest_info); + pc_memory_init(machine, system_memory, + below_4g_mem_size, above_4g_mem_size, + rom_memory, &ram_memory, guest_info); } else if (machine->kernel_filename != NULL) { /* For xen HVM direct kernel boot, load linux here */ - fw_cfg = xen_load_linux(machine->kernel_filename, - machine->kernel_cmdline, - machine->initrd_filename, - below_4g_mem_size, - guest_info); + xen_load_linux(machine->kernel_filename, + machine->kernel_cmdline, + machine->initrd_filename, + below_4g_mem_size, + guest_info); } gsi_state = g_malloc0(sizeof(*gsi_state)); @@ -288,7 +287,7 @@ static void pc_init1(MachineState *machine) /* TODO: Populate SPD eeprom data. */ smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, gsi[9], *smi_irq, - kvm_enabled(), fw_cfg, &piix4_pm); + kvm_enabled(), &piix4_pm); smbus_eeprom_init(smbus, 8, NULL, 0); object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c index 482250d..5140882 100644 --- a/hw/mips/mips_malta.c +++ b/hw/mips/mips_malta.c @@ -1161,7 +1161,7 @@ void mips_malta_init(MachineState *machine) pci_piix4_ide_init(pci_bus, hd, piix4_devfn + 1); pci_create_simple(pci_bus, piix4_devfn + 2, "piix4-usb-uhci"); smbus = piix4_pm_init(pci_bus, piix4_devfn + 3, 0x1100, - isa_get_irq(NULL, 9), NULL, 0, NULL, NULL); + isa_get_irq(NULL, 9), NULL, 0, NULL); smbus_eeprom_init(smbus, 8, smbus_eeprom_buf, smbus_eeprom_size); g_free(smbus_eeprom_buf); pit = pit_init(isa_bus, 0x40, 0, NULL); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 27bd748..0d501c9 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -218,8 +218,7 @@ void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base, qemu_irq sci_irq, qemu_irq smi_irq, - int kvm_enabled, FWCfgState *fw_cfg, - DeviceState **piix4_pm); + int kvm_enabled, DeviceState **piix4_pm); void piix4_smbus_register_device(SMBusDevice *dev, uint8_t addr); /* hpet.c */ -- cgit v1.1 From 309750fad51f17d1ec6195c5d8ad7d741596ddb6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 4 Jun 2015 05:28:46 -0400 Subject: vhost: logs sharing Currently we allocate one vhost log per vhost device. This is sub optimal when: - Guest has several device with vhost as backend - Guest has multiqueue devices In the above cases, we can avoid the memory allocation by sharing a single vhost log among all the vhost devices. This is done through: - Introducing a new vhost_log structure with refcnt inside. - Using a global pointer to vhost_log structure that will be used. And introduce helper to get the log with expected log size and helper to - drop the refcnt to the old log. - Each vhost device still keep track of a pointer to the log that was used. With above, if no resize happens, all vhost device will share a single vhost log. During resize, a new vhost_log structure will be allocated and made for the global pointer. And each vhost devices will drop the refcnt to the old log. Tested by doing scp during migration for a 2 queues virtio-net-pci. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 77 ++++++++++++++++++++++++++++++++++++----------- include/hw/virtio/vhost.h | 8 ++++- 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 54851b7..01f1e04 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -22,15 +22,19 @@ #include "hw/virtio/virtio-bus.h" #include "migration/migration.h" +static struct vhost_log *vhost_log; + static void vhost_dev_sync_region(struct vhost_dev *dev, MemoryRegionSection *section, uint64_t mfirst, uint64_t mlast, uint64_t rfirst, uint64_t rlast) { + vhost_log_chunk_t *log = dev->log->log; + uint64_t start = MAX(mfirst, rfirst); uint64_t end = MIN(mlast, rlast); - vhost_log_chunk_t *from = dev->log + start / VHOST_LOG_CHUNK; - vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1; + vhost_log_chunk_t *from = log + start / VHOST_LOG_CHUNK; + vhost_log_chunk_t *to = log + end / VHOST_LOG_CHUNK + 1; uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK; if (end < start) { @@ -280,22 +284,57 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) } return log_size; } +static struct vhost_log *vhost_log_alloc(uint64_t size) +{ + struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log))); + + log->size = size; + log->refcnt = 1; + + return log; +} + +static struct vhost_log *vhost_log_get(uint64_t size) +{ + if (!vhost_log || vhost_log->size != size) { + vhost_log = vhost_log_alloc(size); + } else { + ++vhost_log->refcnt; + } + + return vhost_log; +} + +static void vhost_log_put(struct vhost_dev *dev, bool sync) +{ + struct vhost_log *log = dev->log; + + if (!log) { + return; + } + + --log->refcnt; + if (log->refcnt == 0) { + /* Sync only the range covered by the old log */ + if (dev->log_size && sync) { + vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); + } + if (vhost_log == log) { + vhost_log = NULL; + } + g_free(log); + } +} static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) { - vhost_log_chunk_t *log; - uint64_t log_base; + struct vhost_log *log = vhost_log_get(size); + uint64_t log_base = (uintptr_t)log->log; int r; - log = g_malloc0(size * sizeof *log); - log_base = (uintptr_t)log; r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); assert(r >= 0); - /* Sync only the range covered by the old log */ - if (dev->log_size) { - vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); - } - g_free(dev->log); + vhost_log_put(dev, true); dev->log = log; dev->log_size = size; } @@ -601,7 +640,7 @@ static int vhost_migration_log(MemoryListener *listener, int enable) if (r < 0) { return r; } - g_free(dev->log); + vhost_log_put(dev, false); dev->log = NULL; dev->log_size = 0; } else { @@ -1060,10 +1099,10 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = hdev->log_size ? - g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL; - log_base = (uintptr_t)hdev->log; - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, &log_base); + hdev->log = vhost_log_get(hdev->log_size); + log_base = (uintptr_t)hdev->log->log; + r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, + hdev->log_size ? &log_base : NULL); if (r < 0) { r = -errno; goto fail_log; @@ -1072,6 +1111,9 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) return 0; fail_log: + if (hdev->log_size) { + vhost_log_put(hdev, false); + } fail_vq: while (--i >= 0) { vhost_virtqueue_stop(hdev, @@ -1098,10 +1140,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) hdev->vqs + i, hdev->vq_index + i); } - vhost_log_sync_range(hdev, 0, ~0x0ull); + vhost_log_put(hdev, true); hdev->started = false; - g_free(hdev->log); hdev->log = NULL; hdev->log_size = 0; } diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 8f04888..816a2e8 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -28,6 +28,12 @@ typedef unsigned long vhost_log_chunk_t; #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) #define VHOST_INVALID_FEATURE_BIT (0xff) +struct vhost_log { + unsigned long long size; + int refcnt; + vhost_log_chunk_t log[0]; +}; + struct vhost_memory; struct vhost_dev { MemoryListener memory_listener; @@ -43,7 +49,6 @@ struct vhost_dev { unsigned long long backend_features; bool started; bool log_enabled; - vhost_log_chunk_t *log; unsigned long long log_size; Error *migration_blocker; bool force; @@ -52,6 +57,7 @@ struct vhost_dev { hwaddr mem_changed_end_addr; const VhostOps *vhost_ops; void *opaque; + struct vhost_log *log; }; int vhost_dev_init(struct vhost_dev *hdev, void *opaque, -- cgit v1.1