summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2016-09-30 01:39:18 +0000
committerjhb <jhb@FreeBSD.org>2016-09-30 01:39:18 +0000
commitc761744d70aec20c011a440e546e233b176dfd64 (patch)
tree9c092465c668c2a3a918408d96a96534fb003870
parentcdee9ae0005aad3526e07ef1a0d3b1ca013066da (diff)
downloadFreeBSD-src-c761744d70aec20c011a440e546e233b176dfd64.zip
FreeBSD-src-c761744d70aec20c011a440e546e233b176dfd64.tar.gz
MFC 304858,305485,305497: Fix various issues with PCI pass through and VT-d.
304858: Enable I/O MMU when PCI pass through is first used. Rather than enabling the I/O MMU when the vmm module is loaded, defer initialization until the first attempt to pass a PCI device through to a guest. If the I/O MMU fails to initialize or is not present, than fail the attempt to pass a PCI device through to a guest. The hw.vmm.force_iommu tunable has been removed since the I/O MMU is no longer enabled during boot. However, the I/O MMU support can be disabled by setting the hw.vmm.iommu.enable tunable to 0 to prevent use of the I/O MMU on any systems where it is buggy. 305485: Leave ppt devices in the host domain when they are not attached to a VM. This allows a pass through device to be reset to a normal device driver on the host and reused on the host. ppt devices are now always active in some I/O MMU domain when the I/O MMU is active, either the host domain or the domain of a VM they are attached to. 305497: Update the I/O MMU in bhyve when PCI devices are added and removed. When the I/O MMU is active in bhyve, all PCI devices need valid entries in the DMAR context tables. The I/O MMU code does a single enumeration of the available PCI devices during initialization to add all existing devices to a domain representing the host. The ppt(4) driver then moves pass through devices in and out of domains for virtual machines as needed. However, when new PCI devices were added at runtime either via SR-IOV or HotPlug, the I/O MMU tables were not updated. This change adds a new set of EVENTHANDLERS that are invoked when PCI devices are added and deleted. The I/O MMU driver in bhyve installs handlers for these events which it uses to add and remove devices to the "host" domain. Sponsored by: Chelsio Communications
-rw-r--r--share/man/man9/pci.925
-rw-r--r--sys/amd64/vmm/io/iommu.c66
-rw-r--r--sys/amd64/vmm/io/iommu.h1
-rw-r--r--sys/amd64/vmm/io/ppt.c2
-rw-r--r--sys/amd64/vmm/vmm.c12
-rw-r--r--sys/dev/pci/pci.c3
-rw-r--r--sys/dev/pci/pcivar.h9
7 files changed, 95 insertions, 23 deletions
diff --git a/share/man/man9/pci.9 b/share/man/man9/pci.9
index 1a4c5de..209986b 100644
--- a/share/man/man9/pci.9
+++ b/share/man/man9/pci.9
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd September 1, 2016
+.Dd September 6, 2016
.Dt PCI 9
.Os
.Sh NAME
@@ -149,6 +149,10 @@
.Fn pcie_read_config "device_t dev" "int reg" "int width"
.Ft void
.Fn pcie_write_config "device_t dev" "int reg" "uint32_t val" "int width"
+.Ft void
+.Fn pci_event_fn "void *arg" "device_t dev"
+.Fn EVENTHANDLER_REGISTER "pci_add_device" "pci_event_fn"
+.Fn EVENTHANDLER_DEREGISTER "pci_delete_resource" "pci_event_fn"
.In dev/pci/pci_iov.h
.Ft int
.Fn pci_iov_attach "device_t dev" "nvlist_t *pf_schema" "nvlist_t *vf_schema"
@@ -910,6 +914,24 @@ with one in the new distribution.
The
.Fn pci_remap_msix
function will fail if this condition is not met.
+.Ss Device Events
+The
+.Va pci_add_device
+event handler is invoked every time a new PCI device is added to the system.
+This includes the creation of Virtual Functions via SR-IOV.
+.Pp
+The
+.Va pci_delete_device
+event handler is invoked every time a PCI device is removed from the system.
+.Pp
+Both event handlers pass the
+.Vt device_t
+object of the relevant PCI device as
+.Fa dev
+to each callback function.
+Both event handlers are invoked while
+.Fa dev
+is unattached but with valid instance variables.
.Sh SEE ALSO
.Xr pci 4 ,
.Xr pciconf 8 ,
@@ -921,6 +943,7 @@ function will fail if this condition is not met.
.Xr devclass 9 ,
.Xr device 9 ,
.Xr driver 9 ,
+.Xr eventhandler 9 ,
.Xr rman 9
.Rs
.%B FreeBSD Developers' Handbook
diff --git a/sys/amd64/vmm/io/iommu.c b/sys/amd64/vmm/io/iommu.c
index 9cfc4c2..75cf1ec 100644
--- a/sys/amd64/vmm/io/iommu.c
+++ b/sys/amd64/vmm/io/iommu.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
+#include <machine/cpu.h>
#include <machine/md_var.h>
#include "vmm_util.h"
@@ -51,8 +52,13 @@ static int iommu_avail;
SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, initialized, CTLFLAG_RD, &iommu_avail,
0, "bhyve iommu initialized?");
+static int iommu_enable = 1;
+SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, enable, CTLFLAG_RDTUN, &iommu_enable, 0,
+ "Enable use of I/O MMU (required for PCI passthrough).");
+
static struct iommu_ops *ops;
static void *host_domain;
+static eventhandler_tag add_tag, delete_tag;
static __inline int
IOMMU_INIT(void)
@@ -148,14 +154,31 @@ IOMMU_DISABLE(void)
(*ops->disable)();
}
-void
+static void
+iommu_pci_add(void *arg, device_t dev)
+{
+
+ /* Add new devices to the host domain. */
+ iommu_add_device(host_domain, pci_get_rid(dev));
+}
+
+static void
+iommu_pci_delete(void *arg, device_t dev)
+{
+
+ iommu_remove_device(host_domain, pci_get_rid(dev));
+}
+
+static void
iommu_init(void)
{
int error, bus, slot, func;
vm_paddr_t maxaddr;
- const char *name;
device_t dev;
+ if (!iommu_enable)
+ return;
+
if (vmm_is_intel())
ops = &iommu_ops_intel;
else if (vmm_is_amd())
@@ -174,8 +197,13 @@ iommu_init(void)
*/
maxaddr = vmm_mem_maxaddr();
host_domain = IOMMU_CREATE_DOMAIN(maxaddr);
- if (host_domain == NULL)
- panic("iommu_init: unable to create a host domain");
+ if (host_domain == NULL) {
+ printf("iommu_init: unable to create a host domain");
+ IOMMU_CLEANUP();
+ ops = NULL;
+ iommu_avail = 0;
+ return;
+ }
/*
* Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to
@@ -183,6 +211,9 @@ iommu_init(void)
*/
iommu_create_mapping(host_domain, 0, 0, maxaddr);
+ add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0);
+ delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete,
+ NULL, 0);
for (bus = 0; bus <= PCI_BUSMAX; bus++) {
for (slot = 0; slot <= PCI_SLOTMAX; slot++) {
for (func = 0; func <= PCI_FUNCMAX; func++) {
@@ -190,12 +221,7 @@ iommu_init(void)
if (dev == NULL)
continue;
- /* skip passthrough devices */
- name = device_get_name(dev);
- if (name != NULL && strcmp(name, "ppt") == 0)
- continue;
-
- /* everything else belongs to the host domain */
+ /* Everything belongs to the host domain. */
iommu_add_device(host_domain,
pci_get_rid(dev));
}
@@ -208,6 +234,15 @@ iommu_init(void)
void
iommu_cleanup(void)
{
+
+ if (add_tag != NULL) {
+ EVENTHANDLER_DEREGISTER(pci_add_device, add_tag);
+ add_tag = NULL;
+ }
+ if (delete_tag != NULL) {
+ EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag);
+ delete_tag = NULL;
+ }
IOMMU_DISABLE();
IOMMU_DESTROY_DOMAIN(host_domain);
IOMMU_CLEANUP();
@@ -216,7 +251,16 @@ iommu_cleanup(void)
void *
iommu_create_domain(vm_paddr_t maxaddr)
{
-
+ static volatile int iommu_initted;
+
+ if (iommu_initted < 2) {
+ if (atomic_cmpset_int(&iommu_initted, 0, 1)) {
+ iommu_init();
+ atomic_store_rel_int(&iommu_initted, 2);
+ } else
+ while (iommu_initted == 1)
+ cpu_spinwait();
+ }
return (IOMMU_CREATE_DOMAIN(maxaddr));
}
diff --git a/sys/amd64/vmm/io/iommu.h b/sys/amd64/vmm/io/iommu.h
index 36b44fa..a941c77 100644
--- a/sys/amd64/vmm/io/iommu.h
+++ b/sys/amd64/vmm/io/iommu.h
@@ -61,7 +61,6 @@ struct iommu_ops {
extern struct iommu_ops iommu_ops_intel;
extern struct iommu_ops iommu_ops_amd;
-void iommu_init(void);
void iommu_cleanup(void);
void *iommu_host_domain(void);
void *iommu_create_domain(vm_paddr_t maxaddr);
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index 692190a..6541d7d 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -363,6 +363,7 @@ ppt_assign_device(struct vm *vm, int bus, int slot, int func)
return (EBUSY);
ppt->vm = vm;
+ iommu_remove_device(iommu_host_domain(), pci_get_rid(ppt->dev));
iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
return (0);
}
@@ -385,6 +386,7 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
ppt_teardown_msi(ppt);
ppt_teardown_msix(ppt);
iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
+ iommu_add_device(iommu_host_domain(), pci_get_rid(ppt->dev));
ppt->vm = NULL;
return (0);
}
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index ebd6360..537454a 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -224,11 +224,6 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN,
&trace_guest_exceptions, 0,
"Trap into hypervisor on all guest exceptions and reflect them back");
-static int vmm_force_iommu = 0;
-TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu);
-SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
- "Force use of I/O MMU even if no passthrough devices were found.");
-
static void vm_free_memmap(struct vm *vm, int ident);
static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
@@ -358,8 +353,6 @@ vmm_handler(module_t mod, int what, void *arg)
switch (what) {
case MOD_LOAD:
vmmdev_init();
- if (vmm_force_iommu || ppt_avail_devices() > 0)
- iommu_init();
error = vmm_init();
if (error == 0)
vmm_initialized = 1;
@@ -396,9 +389,6 @@ static moduledata_t vmm_kmod = {
/*
* vmm initialization has the following dependencies:
*
- * - iommu initialization must happen after the pci passthru driver has had
- * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE).
- *
* - VT-x initialization requires smp_rendezvous() and therefore must happen
* after SMP is fully functional (after SI_SUB_SMP).
*/
@@ -893,6 +883,8 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
("vm_assign_pptdev: iommu must be NULL"));
maxaddr = sysmem_maxaddr(vm);
vm->iommu = iommu_create_domain(maxaddr);
+ if (vm->iommu == NULL)
+ return (ENXIO);
vm_iommu_map(vm);
}
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 3a5a938..e8f9f84 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -4070,6 +4070,7 @@ pci_add_child(device_t bus, struct pci_devinfo *dinfo)
pci_print_verbose(dinfo);
pci_add_resources(bus, dinfo->cfg.dev, 0, 0);
pci_child_added(dinfo->cfg.dev);
+ EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev);
}
void
@@ -5311,6 +5312,8 @@ pci_child_deleted(device_t dev, device_t child)
dinfo = device_get_ivars(child);
rl = &dinfo->resources;
+ EVENTHANDLER_INVOKE(pci_delete_device, child);
+
/* Turn off access to resources we're about to free */
if (bus_child_present(child) != 0) {
pci_write_config(child, PCIR_COMMAND, pci_read_config(child,
diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h
index 3180b4e..84501bd 100644
--- a/sys/dev/pci/pcivar.h
+++ b/sys/dev/pci/pcivar.h
@@ -31,6 +31,7 @@
#define _PCIVAR_H_
#include <sys/queue.h>
+#include <sys/eventhandler.h>
/* some PCI bus constants */
#define PCI_MAXMAPS_0 6 /* max. no. of memory/port maps */
@@ -631,4 +632,12 @@ void * vga_pci_map_bios(device_t dev, size_t *size);
void vga_pci_unmap_bios(device_t dev, void *bios);
int vga_pci_repost(device_t dev);
+/**
+ * Global eventhandlers invoked when PCI devices are added or removed
+ * from the system.
+ */
+typedef void (*pci_event_fn)(void *arg, device_t dev);
+EVENTHANDLER_DECLARE(pci_add_device, pci_event_fn);
+EVENTHANDLER_DECLARE(pci_delete_device, pci_event_fn);
+
#endif /* _PCIVAR_H_ */
OpenPOWER on IntegriCloud