diff options
-rw-r--r-- | share/man/man9/pci.9 | 25 | ||||
-rw-r--r-- | sys/amd64/vmm/io/iommu.c | 66 | ||||
-rw-r--r-- | sys/amd64/vmm/io/iommu.h | 1 | ||||
-rw-r--r-- | sys/amd64/vmm/io/ppt.c | 2 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm.c | 12 | ||||
-rw-r--r-- | sys/dev/pci/pci.c | 3 | ||||
-rw-r--r-- | sys/dev/pci/pcivar.h | 9 |
7 files changed, 95 insertions, 23 deletions
diff --git a/share/man/man9/pci.9 b/share/man/man9/pci.9 index 1a4c5de..209986b 100644 --- a/share/man/man9/pci.9 +++ b/share/man/man9/pci.9 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 1, 2016 +.Dd September 6, 2016 .Dt PCI 9 .Os .Sh NAME @@ -149,6 +149,10 @@ .Fn pcie_read_config "device_t dev" "int reg" "int width" .Ft void .Fn pcie_write_config "device_t dev" "int reg" "uint32_t val" "int width" +.Ft void +.Fn pci_event_fn "void *arg" "device_t dev" +.Fn EVENTHANDLER_REGISTER "pci_add_device" "pci_event_fn" +.Fn EVENTHANDLER_DEREGISTER "pci_delete_resource" "pci_event_fn" .In dev/pci/pci_iov.h .Ft int .Fn pci_iov_attach "device_t dev" "nvlist_t *pf_schema" "nvlist_t *vf_schema" @@ -910,6 +914,24 @@ with one in the new distribution. The .Fn pci_remap_msix function will fail if this condition is not met. +.Ss Device Events +The +.Va pci_add_device +event handler is invoked every time a new PCI device is added to the system. +This includes the creation of Virtual Functions via SR-IOV. +.Pp +The +.Va pci_delete_device +event handler is invoked every time a PCI device is removed from the system. +.Pp +Both event handlers pass the +.Vt device_t +object of the relevant PCI device as +.Fa dev +to each callback function. +Both event handlers are invoked while +.Fa dev +is unattached but with valid instance variables. .Sh SEE ALSO .Xr pci 4 , .Xr pciconf 8 , @@ -921,6 +943,7 @@ function will fail if this condition is not met. .Xr devclass 9 , .Xr device 9 , .Xr driver 9 , +.Xr eventhandler 9 , .Xr rman 9 .Rs .%B FreeBSD Developers' Handbook diff --git a/sys/amd64/vmm/io/iommu.c b/sys/amd64/vmm/io/iommu.c index 9cfc4c2..75cf1ec 100644 --- a/sys/amd64/vmm/io/iommu.c +++ b/sys/amd64/vmm/io/iommu.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> +#include <machine/cpu.h> #include <machine/md_var.h> #include "vmm_util.h" @@ -51,8 +52,13 @@ static int iommu_avail; SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, initialized, CTLFLAG_RD, &iommu_avail, 0, "bhyve iommu initialized?"); +static int iommu_enable = 1; +SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, enable, CTLFLAG_RDTUN, &iommu_enable, 0, + "Enable use of I/O MMU (required for PCI passthrough)."); + static struct iommu_ops *ops; static void *host_domain; +static eventhandler_tag add_tag, delete_tag; static __inline int IOMMU_INIT(void) @@ -148,14 +154,31 @@ IOMMU_DISABLE(void) (*ops->disable)(); } -void +static void +iommu_pci_add(void *arg, device_t dev) +{ + + /* Add new devices to the host domain. */ + iommu_add_device(host_domain, pci_get_rid(dev)); +} + +static void +iommu_pci_delete(void *arg, device_t dev) +{ + + iommu_remove_device(host_domain, pci_get_rid(dev)); +} + +static void iommu_init(void) { int error, bus, slot, func; vm_paddr_t maxaddr; - const char *name; device_t dev; + if (!iommu_enable) + return; + if (vmm_is_intel()) ops = &iommu_ops_intel; else if (vmm_is_amd()) @@ -174,8 +197,13 @@ iommu_init(void) */ maxaddr = vmm_mem_maxaddr(); host_domain = IOMMU_CREATE_DOMAIN(maxaddr); - if (host_domain == NULL) - panic("iommu_init: unable to create a host domain"); + if (host_domain == NULL) { + printf("iommu_init: unable to create a host domain"); + IOMMU_CLEANUP(); + ops = NULL; + iommu_avail = 0; + return; + } /* * Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to @@ -183,6 +211,9 @@ iommu_init(void) */ iommu_create_mapping(host_domain, 0, 0, maxaddr); + add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0); + delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete, + NULL, 0); for (bus = 0; bus <= PCI_BUSMAX; bus++) { for (slot = 0; slot <= PCI_SLOTMAX; slot++) { for (func = 0; func <= PCI_FUNCMAX; func++) { @@ -190,12 +221,7 @@ iommu_init(void) if (dev == NULL) continue; - /* skip passthrough devices */ - name = device_get_name(dev); - if (name != NULL && strcmp(name, "ppt") == 0) - continue; - - /* everything else belongs to the host domain */ + /* Everything belongs to the host domain. */ iommu_add_device(host_domain, pci_get_rid(dev)); } @@ -208,6 +234,15 @@ iommu_init(void) void iommu_cleanup(void) { + + if (add_tag != NULL) { + EVENTHANDLER_DEREGISTER(pci_add_device, add_tag); + add_tag = NULL; + } + if (delete_tag != NULL) { + EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag); + delete_tag = NULL; + } IOMMU_DISABLE(); IOMMU_DESTROY_DOMAIN(host_domain); IOMMU_CLEANUP(); @@ -216,7 +251,16 @@ iommu_cleanup(void) void * iommu_create_domain(vm_paddr_t maxaddr) { - + static volatile int iommu_initted; + + if (iommu_initted < 2) { + if (atomic_cmpset_int(&iommu_initted, 0, 1)) { + iommu_init(); + atomic_store_rel_int(&iommu_initted, 2); + } else + while (iommu_initted == 1) + cpu_spinwait(); + } return (IOMMU_CREATE_DOMAIN(maxaddr)); } diff --git a/sys/amd64/vmm/io/iommu.h b/sys/amd64/vmm/io/iommu.h index 36b44fa..a941c77 100644 --- a/sys/amd64/vmm/io/iommu.h +++ b/sys/amd64/vmm/io/iommu.h @@ -61,7 +61,6 @@ struct iommu_ops { extern struct iommu_ops iommu_ops_intel; extern struct iommu_ops iommu_ops_amd; -void iommu_init(void); void iommu_cleanup(void); void *iommu_host_domain(void); void *iommu_create_domain(vm_paddr_t maxaddr); diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index 692190a..6541d7d 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -363,6 +363,7 @@ ppt_assign_device(struct vm *vm, int bus, int slot, int func) return (EBUSY); ppt->vm = vm; + iommu_remove_device(iommu_host_domain(), pci_get_rid(ppt->dev)); iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); return (0); } @@ -385,6 +386,7 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func) ppt_teardown_msi(ppt); ppt_teardown_msix(ppt); iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); + iommu_add_device(iommu_host_domain(), pci_get_rid(ppt->dev)); ppt->vm = NULL; return (0); } diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index ebd6360..537454a 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -224,11 +224,6 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, &trace_guest_exceptions, 0, "Trap into hypervisor on all guest exceptions and reflect them back"); -static int vmm_force_iommu = 0; -TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu); -SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0, - "Force use of I/O MMU even if no passthrough devices were found."); - static void vm_free_memmap(struct vm *vm, int ident); static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); @@ -358,8 +353,6 @@ vmm_handler(module_t mod, int what, void *arg) switch (what) { case MOD_LOAD: vmmdev_init(); - if (vmm_force_iommu || ppt_avail_devices() > 0) - iommu_init(); error = vmm_init(); if (error == 0) vmm_initialized = 1; @@ -396,9 +389,6 @@ static moduledata_t vmm_kmod = { /* * vmm initialization has the following dependencies: * - * - iommu initialization must happen after the pci passthru driver has had - * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). - * * - VT-x initialization requires smp_rendezvous() and therefore must happen * after SMP is fully functional (after SI_SUB_SMP). */ @@ -893,6 +883,8 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) ("vm_assign_pptdev: iommu must be NULL")); maxaddr = sysmem_maxaddr(vm); vm->iommu = iommu_create_domain(maxaddr); + if (vm->iommu == NULL) + return (ENXIO); vm_iommu_map(vm); } diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 3a5a938..e8f9f84 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -4070,6 +4070,7 @@ pci_add_child(device_t bus, struct pci_devinfo *dinfo) pci_print_verbose(dinfo); pci_add_resources(bus, dinfo->cfg.dev, 0, 0); pci_child_added(dinfo->cfg.dev); + EVENTHANDLER_INVOKE(pci_add_device, dinfo->cfg.dev); } void @@ -5311,6 +5312,8 @@ pci_child_deleted(device_t dev, device_t child) dinfo = device_get_ivars(child); rl = &dinfo->resources; + EVENTHANDLER_INVOKE(pci_delete_device, child); + /* Turn off access to resources we're about to free */ if (bus_child_present(child) != 0) { pci_write_config(child, PCIR_COMMAND, pci_read_config(child, diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h index 3180b4e..84501bd 100644 --- a/sys/dev/pci/pcivar.h +++ b/sys/dev/pci/pcivar.h @@ -31,6 +31,7 @@ #define _PCIVAR_H_ #include <sys/queue.h> +#include <sys/eventhandler.h> /* some PCI bus constants */ #define PCI_MAXMAPS_0 6 /* max. no. of memory/port maps */ @@ -631,4 +632,12 @@ void * vga_pci_map_bios(device_t dev, size_t *size); void vga_pci_unmap_bios(device_t dev, void *bios); int vga_pci_repost(device_t dev); +/** + * Global eventhandlers invoked when PCI devices are added or removed + * from the system. + */ +typedef void (*pci_event_fn)(void *arg, device_t dev); +EVENTHANDLER_DECLARE(pci_add_device, pci_event_fn); +EVENTHANDLER_DECLARE(pci_delete_device, pci_event_fn); + #endif /* _PCIVAR_H_ */ |