diff options
author | Renato Botelho <renato@netgate.com> | 2016-10-06 07:51:36 -0300 |
---|---|---|
committer | Renato Botelho <renato@netgate.com> | 2016-10-06 07:51:36 -0300 |
commit | 5dd7ab172435dc99e1abc1f7d73fd5e720050bbc (patch) | |
tree | 19b1dffa928be3fe0fb3b046f47c39e471c592bc /sys/amd64 | |
parent | 32988844e41ba2dafefd4b0ca819b8c48ecbbb81 (diff) | |
parent | 22e3935b200286024203db77aec100fca35e3e95 (diff) | |
download | FreeBSD-src-5dd7ab172435dc99e1abc1f7d73fd5e720050bbc.zip FreeBSD-src-5dd7ab172435dc99e1abc1f7d73fd5e720050bbc.tar.gz |
Merge remote-tracking branch 'origin/stable/11' into devel-11
Diffstat (limited to 'sys/amd64')
-rw-r--r-- | sys/amd64/amd64/efirt.c | 609 | ||||
-rw-r--r-- | sys/amd64/amd64/machdep.c | 4 | ||||
-rw-r--r-- | sys/amd64/amd64/minidump_machdep.c | 10 | ||||
-rw-r--r-- | sys/amd64/amd64/pmap.c | 123 | ||||
-rw-r--r-- | sys/amd64/conf/NOTES | 3 | ||||
-rw-r--r-- | sys/amd64/include/cpufunc.h | 33 | ||||
-rw-r--r-- | sys/amd64/include/efi.h | 59 | ||||
-rw-r--r-- | sys/amd64/include/pmap.h | 31 | ||||
-rw-r--r-- | sys/amd64/vmm/io/iommu.c | 66 | ||||
-rw-r--r-- | sys/amd64/vmm/io/iommu.h | 1 | ||||
-rw-r--r-- | sys/amd64/vmm/io/ppt.c | 13 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm.c | 12 |
12 files changed, 889 insertions, 75 deletions
diff --git a/sys/amd64/amd64/efirt.c b/sys/amd64/amd64/efirt.c new file mode 100644 index 0000000..8db768b --- /dev/null +++ b/sys/amd64/amd64/efirt.c @@ -0,0 +1,609 @@ +/*- + * Copyright (c) 2004 Marcel Moolenaar + * Copyright (c) 2001 Doug Rabson + * Copyright (c) 2016 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/efi.h> +#include <sys/kernel.h> +#include <sys/linker.h> +#include <sys/lock.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/clock.h> +#include <sys/proc.h> +#include <sys/rwlock.h> +#include <sys/sched.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <machine/fpu.h> +#include <machine/efi.h> +#include <machine/metadata.h> +#include <machine/md_var.h> +#include <machine/smp.h> +#include <machine/vmparam.h> +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> + +static struct efi_systbl *efi_systbl; +static struct efi_cfgtbl *efi_cfgtbl; +static struct efi_rt *efi_runtime; + +static int efi_status2err[25] = { + 0, /* EFI_SUCCESS */ + ENOEXEC, /* EFI_LOAD_ERROR */ + EINVAL, /* EFI_INVALID_PARAMETER */ + ENOSYS, /* EFI_UNSUPPORTED */ + EMSGSIZE, /* EFI_BAD_BUFFER_SIZE */ + EOVERFLOW, /* EFI_BUFFER_TOO_SMALL */ + EBUSY, /* EFI_NOT_READY */ + EIO, /* EFI_DEVICE_ERROR */ + EROFS, /* EFI_WRITE_PROTECTED */ + EAGAIN, /* EFI_OUT_OF_RESOURCES */ + EIO, /* EFI_VOLUME_CORRUPTED */ + ENOSPC, /* EFI_VOLUME_FULL */ + ENXIO, /* EFI_NO_MEDIA */ + ESTALE, /* EFI_MEDIA_CHANGED */ + ENOENT, /* EFI_NOT_FOUND */ + EACCES, /* EFI_ACCESS_DENIED */ + ETIMEDOUT, /* EFI_NO_RESPONSE */ + EADDRNOTAVAIL, /* EFI_NO_MAPPING */ + ETIMEDOUT, /* EFI_TIMEOUT */ + EDOOFUS, /* EFI_NOT_STARTED */ + EALREADY, /* EFI_ALREADY_STARTED */ + ECANCELED, /* EFI_ABORTED */ + EPROTO, /* EFI_ICMP_ERROR */ + EPROTO, /* EFI_TFTP_ERROR */ + EPROTO /* EFI_PROTOCOL_ERROR */ +}; + +static int +efi_status_to_errno(efi_status status) +{ + u_long code; + + code = status & 0x3ffffffffffffffful; + return (code < nitems(efi_status2err) ? efi_status2err[code] : EDOOFUS); +} + +static struct mtx efi_lock; +static pml4_entry_t *efi_pml4; +static vm_object_t obj_1t1_pt; +static vm_page_t efi_pml4_page; + +static void +efi_destroy_1t1_map(void) +{ + vm_page_t m; + + if (obj_1t1_pt != NULL) { + VM_OBJECT_RLOCK(obj_1t1_pt); + TAILQ_FOREACH(m, &obj_1t1_pt->memq, listq) + m->wire_count = 0; + atomic_subtract_int(&vm_cnt.v_wire_count, + obj_1t1_pt->resident_page_count); + VM_OBJECT_RUNLOCK(obj_1t1_pt); + vm_object_deallocate(obj_1t1_pt); + } + + obj_1t1_pt = NULL; + efi_pml4 = NULL; + efi_pml4_page = NULL; +} + +static vm_page_t +efi_1t1_page(vm_pindex_t idx) +{ + + return (vm_page_grab(obj_1t1_pt, idx, VM_ALLOC_NOBUSY | + VM_ALLOC_WIRED | VM_ALLOC_ZERO)); +} + +static pt_entry_t * +efi_1t1_pte(vm_offset_t va) +{ + pml4_entry_t *pml4e; + pdp_entry_t *pdpe; + pd_entry_t *pde; + pt_entry_t *pte; + vm_page_t m; + vm_pindex_t pml4_idx, pdp_idx, pd_idx; + vm_paddr_t mphys; + + pml4_idx = pmap_pml4e_index(va); + pml4e = &efi_pml4[pml4_idx]; + if (*pml4e == 0) { + m = efi_1t1_page(1 + pml4_idx); + mphys = VM_PAGE_TO_PHYS(m); + *pml4e = mphys | X86_PG_RW | X86_PG_V; + } else { + mphys = *pml4e & ~PAGE_MASK; + } + + pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys); + pdp_idx = pmap_pdpe_index(va); + pdpe += pdp_idx; + if (*pdpe == 0) { + m = efi_1t1_page(1 + NPML4EPG + (pml4_idx + 1) * (pdp_idx + 1)); + mphys = VM_PAGE_TO_PHYS(m); + *pdpe = mphys | X86_PG_RW | X86_PG_V; + } else { + mphys = *pdpe & ~PAGE_MASK; + } + + pde = (pd_entry_t *)PHYS_TO_DMAP(mphys); + pd_idx = pmap_pde_index(va); + pde += pd_idx; + if (*pde == 0) { + m = efi_1t1_page(1 + NPML4EPG + NPML4EPG * NPDPEPG + + (pml4_idx + 1) * (pdp_idx + 1) * (pd_idx + 1)); + mphys = VM_PAGE_TO_PHYS(m); + *pde = mphys | X86_PG_RW | X86_PG_V; + } else { + mphys = *pde & ~PAGE_MASK; + } + + pte = (pt_entry_t *)PHYS_TO_DMAP(mphys); + pte += pmap_pte_index(va); + KASSERT(*pte == 0, ("va %#jx *pt %#jx", va, *pte)); + + return (pte); +} + +static bool +efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) +{ + struct efi_md *p; + pt_entry_t *pte; + vm_offset_t va; + uint64_t idx; + int bits, i, mode; + + obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, 1 + NPML4EPG + + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG, + VM_PROT_ALL, 0, NULL); + VM_OBJECT_WLOCK(obj_1t1_pt); + efi_pml4_page = efi_1t1_page(0); + VM_OBJECT_WUNLOCK(obj_1t1_pt); + efi_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_pml4_page)); + pmap_pinit_pml4(efi_pml4_page); + + for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, + descsz)) { + if ((p->md_attr & EFI_MD_ATTR_RT) == 0) + continue; + if (p->md_virt != NULL) { + if (bootverbose) + printf("EFI Runtime entry %d is mapped\n", i); + goto fail; + } + if ((p->md_phys & EFI_PAGE_MASK) != 0) { + if (bootverbose) + printf("EFI Runtime entry %d is not aligned\n", + i); + goto fail; + } + if (p->md_phys + p->md_pages * EFI_PAGE_SIZE < p->md_phys || + p->md_phys + p->md_pages * EFI_PAGE_SIZE >= + VM_MAXUSER_ADDRESS) { + printf("EFI Runtime entry %d is not in mappable for RT:" + "base %#016jx %#jx pages\n", + i, (uintmax_t)p->md_phys, + (uintmax_t)p->md_pages); + goto fail; + } + if ((p->md_attr & EFI_MD_ATTR_WB) != 0) + mode = VM_MEMATTR_WRITE_BACK; + else if ((p->md_attr & EFI_MD_ATTR_WT) != 0) + mode = VM_MEMATTR_WRITE_THROUGH; + else if ((p->md_attr & EFI_MD_ATTR_WC) != 0) + mode = VM_MEMATTR_WRITE_COMBINING; + else if ((p->md_attr & EFI_MD_ATTR_WP) != 0) + mode = VM_MEMATTR_WRITE_PROTECTED; + else if ((p->md_attr & EFI_MD_ATTR_UC) != 0) + mode = VM_MEMATTR_UNCACHEABLE; + else { + if (bootverbose) + printf("EFI Runtime entry %d mapping " + "attributes unsupported\n", i); + mode = VM_MEMATTR_UNCACHEABLE; + } + bits = pmap_cache_bits(kernel_pmap, mode, FALSE) | X86_PG_RW | + X86_PG_V; + VM_OBJECT_WLOCK(obj_1t1_pt); + for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++, + va += PAGE_SIZE) { + pte = efi_1t1_pte(va); + pte_store(pte, va | bits); + } + VM_OBJECT_WUNLOCK(obj_1t1_pt); + } + + return (true); + +fail: + efi_destroy_1t1_map(); + return (false); +} + +/* + * Create an environment for the EFI runtime code call. The most + * important part is creating the required 1:1 physical->virtual + * mappings for the runtime segments. To do that, we manually create + * page table which unmap userspace but gives correct kernel mapping. + * The 1:1 mappings for runtime segments usually occupy low 4G of the + * physical address map. + * + * The 1:1 mappings were chosen over the SetVirtualAddressMap() EFI RT + * service, because there are some BIOSes which fail to correctly + * relocate itself on the call, requiring both 1:1 and virtual + * mapping. As result, we must provide 1:1 mapping anyway, so no + * reason to bother with the virtual map, and no need to add a + * complexity into loader. + * + * The fpu_kern_enter() call allows firmware to use FPU, as mandated + * by the specification. In particular, CR0.TS bit is cleared. Also + * it enters critical section, giving us neccessary protection against + * context switch. + * + * There is no need to disable interrupts around the change of %cr3, + * the kernel mappings are correct, while we only grabbed the + * userspace portion of VA. Interrupts handlers must not access + * userspace. Having interrupts enabled fixes the issue with + * firmware/SMM long operation, which would negatively affect IPIs, + * esp. TLB shootdown requests. + */ +static int +efi_enter(void) +{ + pmap_t curpmap; + int error; + + if (efi_runtime == NULL) + return (ENXIO); + curpmap = PCPU_GET(curpmap); + PMAP_LOCK(curpmap); + mtx_lock(&efi_lock); + error = fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX); + if (error != 0) { + PMAP_UNLOCK(curpmap); + return (error); + } + + /* + * IPI TLB shootdown handler invltlb_pcid_handler() reloads + * %cr3 from the curpmap->pm_cr3, which would disable runtime + * segments mappings. Block the handler's action by setting + * curpmap to impossible value. See also comment in + * pmap.c:pmap_activate_sw(). + */ + if (pmap_pcid_enabled && !invpcid_works) + PCPU_SET(curpmap, NULL); + + load_cr3(VM_PAGE_TO_PHYS(efi_pml4_page) | (pmap_pcid_enabled ? + curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0)); + /* + * If PCID is enabled, the clear CR3_PCID_SAVE bit in the loaded %cr3 + * causes TLB invalidation. + */ + if (!pmap_pcid_enabled) + invltlb(); + return (0); +} + +static void +efi_leave(void) +{ + pmap_t curpmap; + + curpmap = &curproc->p_vmspace->vm_pmap; + if (pmap_pcid_enabled && !invpcid_works) + PCPU_SET(curpmap, curpmap); + load_cr3(curpmap->pm_cr3 | (pmap_pcid_enabled ? + curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0)); + if (!pmap_pcid_enabled) + invltlb(); + + fpu_kern_leave(curthread, NULL); + mtx_unlock(&efi_lock); + PMAP_UNLOCK(curpmap); +} + +static int +efi_init(void) +{ + struct efi_map_header *efihdr; + struct efi_md *map; + caddr_t kmdp; + size_t efisz; + + mtx_init(&efi_lock, "efi", NULL, MTX_DEF); + + if (efi_systbl_phys == 0) { + if (bootverbose) + printf("EFI systbl not available\n"); + return (ENXIO); + } + efi_systbl = (struct efi_systbl *)PHYS_TO_DMAP(efi_systbl_phys); + if (efi_systbl->st_hdr.th_sig != EFI_SYSTBL_SIG) { + efi_systbl = NULL; + if (bootverbose) + printf("EFI systbl signature invalid\n"); + return (ENXIO); + } + efi_cfgtbl = (efi_systbl->st_cfgtbl == 0) ? NULL : + (struct efi_cfgtbl *)efi_systbl->st_cfgtbl; + if (efi_cfgtbl == NULL) { + if (bootverbose) + printf("EFI config table is not present\n"); + } + + kmdp = preload_search_by_type("elf kernel"); + if (kmdp == NULL) + kmdp = preload_search_by_type("elf64 kernel"); + efihdr = (struct efi_map_header *)preload_search_info(kmdp, + MODINFO_METADATA | MODINFOMD_EFI_MAP); + if (efihdr == NULL) { + if (bootverbose) + printf("EFI map is not present\n"); + return (ENXIO); + } + efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; + map = (struct efi_md *)((uint8_t *)efihdr + efisz); + if (efihdr->descriptor_size == 0) + return (ENOMEM); + + if (!efi_create_1t1_map(map, efihdr->memory_size / + efihdr->descriptor_size, efihdr->descriptor_size)) { + if (bootverbose) + printf("EFI cannot create runtime map\n"); + return (ENOMEM); + } + + efi_runtime = (efi_systbl->st_rt == 0) ? NULL : + (struct efi_rt *)efi_systbl->st_rt; + if (efi_runtime == NULL) { + if (bootverbose) + printf("EFI runtime services table is not present\n"); + efi_destroy_1t1_map(); + return (ENXIO); + } + + return (0); +} + +static void +efi_uninit(void) +{ + + efi_destroy_1t1_map(); + + efi_systbl = NULL; + efi_cfgtbl = NULL; + efi_runtime = NULL; + + mtx_destroy(&efi_lock); +} + +int +efi_get_table(struct uuid *uuid, void **ptr) +{ + struct efi_cfgtbl *ct; + u_long count; + + if (efi_cfgtbl == NULL) + return (ENXIO); + count = efi_systbl->st_entries; + ct = efi_cfgtbl; + while (count--) { + if (!bcmp(&ct->ct_uuid, uuid, sizeof(*uuid))) { + *ptr = (void *)PHYS_TO_DMAP(ct->ct_data); + return (0); + } + ct++; + } + return (ENOENT); +} + +int +efi_get_time_locked(struct efi_tm *tm) +{ + efi_status status; + int error; + + mtx_assert(&resettodr_lock, MA_OWNED); + error = efi_enter(); + if (error != 0) + return (error); + status = efi_runtime->rt_gettime(tm, NULL); + efi_leave(); + error = efi_status_to_errno(status); + return (error); +} + +int +efi_get_time(struct efi_tm *tm) +{ + int error; + + if (efi_runtime == NULL) + return (ENXIO); + mtx_lock(&resettodr_lock); + error = efi_get_time_locked(tm); + mtx_unlock(&resettodr_lock); + return (error); +} + +int +efi_reset_system(void) +{ + int error; + + error = efi_enter(); + if (error != 0) + return (error); + efi_runtime->rt_reset(EFI_RESET_WARM, 0, 0, NULL); + efi_leave(); + return (EIO); +} + +int +efi_set_time_locked(struct efi_tm *tm) +{ + efi_status status; + int error; + + mtx_assert(&resettodr_lock, MA_OWNED); + error = efi_enter(); + if (error != 0) + return (error); + status = efi_runtime->rt_settime(tm); + efi_leave(); + error = efi_status_to_errno(status); + return (error); +} + +int +efi_set_time(struct efi_tm *tm) +{ + int error; + + if (efi_runtime == NULL) + return (ENXIO); + mtx_lock(&resettodr_lock); + error = efi_set_time_locked(tm); + mtx_unlock(&resettodr_lock); + return (error); +} + +int +efi_var_get(efi_char *name, struct uuid *vendor, uint32_t *attrib, + size_t *datasize, void *data) +{ + efi_status status; + int error; + + error = efi_enter(); + if (error != 0) + return (error); + status = efi_runtime->rt_getvar(name, vendor, attrib, datasize, data); + efi_leave(); + error = efi_status_to_errno(status); + return (error); +} + +int +efi_var_nextname(size_t *namesize, efi_char *name, struct uuid *vendor) +{ + efi_status status; + int error; + + error = efi_enter(); + if (error != 0) + return (error); + status = efi_runtime->rt_scanvar(namesize, name, vendor); + efi_leave(); + error = efi_status_to_errno(status); + return (error); +} + +int +efi_var_set(efi_char *name, struct uuid *vendor, uint32_t attrib, + size_t datasize, void *data) +{ + efi_status status; + int error; + + error = efi_enter(); + if (error != 0) + return (error); + status = efi_runtime->rt_setvar(name, vendor, attrib, datasize, data); + efi_leave(); + error = efi_status_to_errno(status); + return (error); +} + +static int +efirt_modevents(module_t m, int event, void *arg __unused) +{ + + switch (event) { + case MOD_LOAD: + return (efi_init()); + break; + + case MOD_UNLOAD: + efi_uninit(); + return (0); + + case MOD_SHUTDOWN: + return (0); + + default: + return (EOPNOTSUPP); + } +} + +static moduledata_t efirt_moddata = { + .name = "efirt", + .evhand = efirt_modevents, + .priv = NULL, +}; +DECLARE_MODULE(efirt, efirt_moddata, SI_SUB_VM_CONF, SI_ORDER_ANY); +MODULE_VERSION(efirt, 1); + +/* XXX debug stuff */ +static int +efi_time_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + struct efi_tm tm; + int error, val; + + val = 0; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + error = efi_get_time(&tm); + if (error == 0) { + uprintf("EFI reports: Year %d Month %d Day %d Hour %d Min %d " + "Sec %d\n", tm.tm_year, tm.tm_mon, tm.tm_mday, tm.tm_hour, + tm.tm_min, tm.tm_sec); + } + return (error); +} + +SYSCTL_PROC(_debug, OID_AUTO, efi_time, CTLTYPE_INT | CTLFLAG_RW, NULL, 0, + efi_time_sysctl_handler, "I", ""); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 05b75dd..8aa21bb 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -192,7 +192,7 @@ struct msgbuf *msgbufp; * Physical address of the EFI System Table. Stashed from the metadata hints * passed into the kernel and used by the EFI code to call runtime services. */ -vm_paddr_t efi_systbl; +vm_paddr_t efi_systbl_phys; /* Intel ICH registers */ #define ICH_PMBASE 0x400 @@ -1502,7 +1502,7 @@ native_parse_preload_data(u_int64_t modulep) ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif - efi_systbl = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); + efi_systbl_phys = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); return (kmdp); } diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c index df04f42..442819b 100644 --- a/sys/amd64/amd64/minidump_machdep.c +++ b/sys/amd64/amd64/minidump_machdep.c @@ -239,10 +239,10 @@ minidumpsys(struct dumperinfo *di) * page written corresponds to 1GB of space */ pmapsize += PAGE_SIZE; - ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1); + ii = pmap_pml4e_index(va); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); - i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); + i = pmap_pdpe_index(va); if ((pdp[i] & PG_V) == 0) { va += NBPDP; continue; @@ -264,7 +264,7 @@ minidumpsys(struct dumperinfo *di) pd = (uint64_t *)PHYS_TO_DMAP(pdp[i] & PG_FRAME); for (n = 0; n < NPDEPG; n++, va += NBPDR) { - j = (va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1); + j = pmap_pde_index(va); if ((pd[j] & PG_V) == 0) continue; @@ -368,10 +368,10 @@ minidumpsys(struct dumperinfo *di) bzero(fakepd, sizeof(fakepd)); for (va = VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NBPDR, kernel_vm_end); va += NBPDP) { - ii = (va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1); + ii = pmap_pml4e_index(va); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); - i = (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1); + i = pmap_pdpe_index(va); /* We always write a page, even if it is zero */ if ((pdp[i] & PG_V) == 0) { diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index f87d3b5..ae525da 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -673,35 +673,6 @@ pmap_pde_pindex(vm_offset_t va) } -/* Return various clipped indexes for a given VA */ -static __inline vm_pindex_t -pmap_pte_index(vm_offset_t va) -{ - - return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); -} - -static __inline vm_pindex_t -pmap_pde_index(vm_offset_t va) -{ - - return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); -} - -static __inline vm_pindex_t -pmap_pdpe_index(vm_offset_t va) -{ - - return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); -} - -static __inline vm_pindex_t -pmap_pml4e_index(vm_offset_t va) -{ - - return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); -} - /* Return a pointer to the PML4 slot that corresponds to a VA */ static __inline pml4_entry_t * pmap_pml4e(pmap_t pmap, vm_offset_t va) @@ -1353,7 +1324,7 @@ pmap_swap_pat(pmap_t pmap, pt_entry_t entry) * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ -static int +int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; @@ -2374,6 +2345,29 @@ pmap_pinit0(pmap_t pmap) CPU_FILL(&kernel_pmap->pm_active); } +void +pmap_pinit_pml4(vm_page_t pml4pg) +{ + pml4_entry_t *pm_pml4; + int i; + + pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); + + /* Wire in kernel global address entries. */ + for (i = 0; i < NKPML4E; i++) { + pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) | X86_PG_RW | + X86_PG_V | PG_U; + } + for (i = 0; i < ndmpdpphys; i++) { + pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | + X86_PG_V | PG_U; + } + + /* install self-referential address mapping entry(s) */ + pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW | + X86_PG_A | X86_PG_M; +} + /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. @@ -2410,20 +2404,7 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) */ if ((pmap->pm_type = pm_type) == PT_X86) { pmap->pm_cr3 = pml4phys; - - /* Wire in kernel global address entries. */ - for (i = 0; i < NKPML4E; i++) { - pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) | - X86_PG_RW | X86_PG_V | PG_U; - } - for (i = 0; i < ndmpdpphys; i++) { - pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | - X86_PG_RW | X86_PG_V | PG_U; - } - - /* install self-referential address mapping entry(s) */ - pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | - X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; + pmap_pinit_pml4(pml4pg); } pmap->pm_root.rt_root = 0; @@ -5850,6 +5831,14 @@ safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) * should be tested and standardized at some point in the future for * optimal aging of shared pages. * + * As an optimization, update the page's dirty field if a modified bit is + * found while counting reference bits. This opportunistic update can be + * performed at low cost and can eliminate the need for some future calls + * to pmap_is_modified(). However, since this function stops after + * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some + * dirty pages. Those dirty pages will only be detected by a future call + * to pmap_is_modified(). + * * A DI block is not needed within this function, because * invalidations are performed before the PV list lock is * released. @@ -5862,7 +5851,7 @@ pmap_ts_referenced(vm_page_t m) pmap_t pmap; struct rwlock *lock; pd_entry_t oldpde, *pde; - pt_entry_t *pte, PG_A; + pt_entry_t *pte, PG_A, PG_M, PG_RW; vm_offset_t va; vm_paddr_t pa; int cleared, md_gen, not_cleared, pvh_gen; @@ -5897,9 +5886,19 @@ retry: } } PG_A = pmap_accessed_bit(pmap); + PG_M = pmap_modified_bit(pmap); + PG_RW = pmap_rw_bit(pmap); va = pv->pv_va; pde = pmap_pde(pmap, pv->pv_va); oldpde = *pde; + if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) { + /* + * Although "oldpde" is mapping a 2MB page, because + * this function is called at a 4KB page granularity, + * we only update the 4KB page under test. + */ + vm_page_dirty(m); + } if ((*pde & PG_A) != 0) { /* * Since this reference bit is shared by 512 4KB @@ -5993,11 +5992,15 @@ small_mappings: } } PG_A = pmap_accessed_bit(pmap); + PG_M = pmap_modified_bit(pmap); + PG_RW = pmap_rw_bit(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced: found a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); + if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + vm_page_dirty(m); if ((*pte & PG_A) != 0) { if (safe_to_clear_referenced(pmap, *pte)) { atomic_clear_long(pte, PG_A); @@ -6865,6 +6868,7 @@ pmap_activate_sw(struct thread *td) { pmap_t oldpmap, pmap; uint64_t cached, cr3; + register_t rflags; u_int cpuid; oldpmap = PCPU_GET(curpmap); @@ -6888,16 +6892,43 @@ pmap_activate_sw(struct thread *td) pmap == kernel_pmap, ("non-kernel pmap thread %p pmap %p cpu %d pcid %#x", td, pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid)); + + /* + * If the INVPCID instruction is not available, + * invltlb_pcid_handler() is used for handle + * invalidate_all IPI, which checks for curpmap == + * smp_tlb_pmap. Below operations sequence has a + * window where %CR3 is loaded with the new pmap's + * PML4 address, but curpmap value is not yet updated. + * This causes invltlb IPI handler, called between the + * updates, to execute as NOP, which leaves stale TLB + * entries. + * + * Note that the most typical use of + * pmap_activate_sw(), from the context switch, is + * immune to this race, because interrupts are + * disabled (while the thread lock is owned), and IPI + * happends after curpmap is updated. Protect other + * callers in a similar way, by disabling interrupts + * around the %cr3 register reload and curpmap + * assignment. + */ + if (!invpcid_works) + rflags = intr_disable(); + if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) { load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid | cached); if (cached) PCPU_INC(pm_save_cnt); } + PCPU_SET(curpmap, pmap); + if (!invpcid_works) + intr_restore(rflags); } else if (cr3 != pmap->pm_cr3) { load_cr3(pmap->pm_cr3); + PCPU_SET(curpmap, pmap); } - PCPU_SET(curpmap, pmap); #ifdef SMP CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); #else diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index 4107ca6..bf10762 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -599,6 +599,9 @@ options ENABLE_ALART # Control alarm on Intel intpm driver # options NKPT=31 +# EFI Runtime Services support (not functional yet). +options EFIRT + ##################################################################### # ABI Emulation diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index f234873..4b7df46 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -645,12 +645,36 @@ load_gs(u_short sel) #endif static __inline void +bare_lgdt(struct region_descriptor *addr) +{ + __asm __volatile("lgdt (%0)" : : "r" (addr)); +} + +static __inline void +sgdt(struct region_descriptor *addr) +{ + char *loc; + + loc = (char *)addr; + __asm __volatile("sgdt %0" : "=m" (*loc) : : "memory"); +} + +static __inline void lidt(struct region_descriptor *addr) { __asm __volatile("lidt (%0)" : : "r" (addr)); } static __inline void +sidt(struct region_descriptor *addr) +{ + char *loc; + + loc = (char *)addr; + __asm __volatile("sidt %0" : "=m" (*loc) : : "memory"); +} + +static __inline void lldt(u_short sel) { __asm __volatile("lldt %0" : : "r" (sel)); @@ -662,6 +686,15 @@ ltr(u_short sel) __asm __volatile("ltr %0" : : "r" (sel)); } +static __inline uint32_t +read_tr(void) +{ + u_short sel; + + __asm __volatile("str %0" : "=r" (sel)); + return (sel); +} + static __inline uint64_t rdr0(void) { diff --git a/sys/amd64/include/efi.h b/sys/amd64/include/efi.h new file mode 100644 index 0000000..272d5a8 --- /dev/null +++ b/sys/amd64/include/efi.h @@ -0,0 +1,59 @@ +/*- + * Copyright (c) 2016 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __AMD64_INCLUDE_EFI_H_ +#define __AMD64_INCLUDE_EFI_H_ + +/* + * XXX: from gcc 6.2 manual: + * Note, the ms_abi attribute for Microsoft Windows 64-bit targets + * currently requires the -maccumulate-outgoing-args option. + */ +#define EFIABI_ATTR __attribute__((ms_abi)) + +#ifdef _KERNEL +struct uuid; +struct efi_tm; + +int efi_get_table(struct uuid *uuid, void **ptr); +int efi_get_time(struct efi_tm *tm); +int efi_get_time_locked(struct efi_tm *tm); +int efi_reset_system(void); +int efi_set_time(struct efi_tm *tm); +int efi_set_time_locked(struct efi_tm *tm); +int efi_var_get(uint16_t *name, struct uuid *vendor, uint32_t *attrib, + size_t *datasize, void *data); +int efi_var_nextname(size_t *namesize, uint16_t *name, struct uuid *vendor); +int efi_var_set(uint16_t *name, struct uuid *vendor, uint32_t attrib, + size_t datasize, void *data); +#endif + +#endif /* __AMD64_INCLUDE_EFI_H_ */ diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 90546f5..4d924bd 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -391,6 +391,7 @@ struct thread; void pmap_activate_sw(struct thread *); void pmap_bootstrap(vm_paddr_t *); +int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde); int pmap_change_attr(vm_offset_t, vm_size_t, int); void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate); void pmap_init_pat(void); @@ -403,6 +404,7 @@ void *pmap_mapdev(vm_paddr_t, vm_size_t); void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int); boolean_t pmap_page_is_mapped(vm_page_t m); void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); +void pmap_pinit_pml4(vm_page_t); void pmap_unmapdev(vm_offset_t, vm_size_t); void pmap_invalidate_page(pmap_t, vm_offset_t); void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); @@ -416,6 +418,35 @@ boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); #endif /* _KERNEL */ +/* Return various clipped indexes for a given VA */ +static __inline vm_pindex_t +pmap_pte_index(vm_offset_t va) +{ + + return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); +} + +static __inline vm_pindex_t +pmap_pde_index(vm_offset_t va) +{ + + return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); +} + +static __inline vm_pindex_t +pmap_pdpe_index(vm_offset_t va) +{ + + return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); +} + +static __inline vm_pindex_t +pmap_pml4e_index(vm_offset_t va) +{ + + return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); +} + #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ diff --git a/sys/amd64/vmm/io/iommu.c b/sys/amd64/vmm/io/iommu.c index 9cfc4c2..75cf1ec 100644 --- a/sys/amd64/vmm/io/iommu.c +++ b/sys/amd64/vmm/io/iommu.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <dev/pci/pcivar.h> #include <dev/pci/pcireg.h> +#include <machine/cpu.h> #include <machine/md_var.h> #include "vmm_util.h" @@ -51,8 +52,13 @@ static int iommu_avail; SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, initialized, CTLFLAG_RD, &iommu_avail, 0, "bhyve iommu initialized?"); +static int iommu_enable = 1; +SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, enable, CTLFLAG_RDTUN, &iommu_enable, 0, + "Enable use of I/O MMU (required for PCI passthrough)."); + static struct iommu_ops *ops; static void *host_domain; +static eventhandler_tag add_tag, delete_tag; static __inline int IOMMU_INIT(void) @@ -148,14 +154,31 @@ IOMMU_DISABLE(void) (*ops->disable)(); } -void +static void +iommu_pci_add(void *arg, device_t dev) +{ + + /* Add new devices to the host domain. */ + iommu_add_device(host_domain, pci_get_rid(dev)); +} + +static void +iommu_pci_delete(void *arg, device_t dev) +{ + + iommu_remove_device(host_domain, pci_get_rid(dev)); +} + +static void iommu_init(void) { int error, bus, slot, func; vm_paddr_t maxaddr; - const char *name; device_t dev; + if (!iommu_enable) + return; + if (vmm_is_intel()) ops = &iommu_ops_intel; else if (vmm_is_amd()) @@ -174,8 +197,13 @@ iommu_init(void) */ maxaddr = vmm_mem_maxaddr(); host_domain = IOMMU_CREATE_DOMAIN(maxaddr); - if (host_domain == NULL) - panic("iommu_init: unable to create a host domain"); + if (host_domain == NULL) { + printf("iommu_init: unable to create a host domain"); + IOMMU_CLEANUP(); + ops = NULL; + iommu_avail = 0; + return; + } /* * Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to @@ -183,6 +211,9 @@ iommu_init(void) */ iommu_create_mapping(host_domain, 0, 0, maxaddr); + add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0); + delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete, + NULL, 0); for (bus = 0; bus <= PCI_BUSMAX; bus++) { for (slot = 0; slot <= PCI_SLOTMAX; slot++) { for (func = 0; func <= PCI_FUNCMAX; func++) { @@ -190,12 +221,7 @@ iommu_init(void) if (dev == NULL) continue; - /* skip passthrough devices */ - name = device_get_name(dev); - if (name != NULL && strcmp(name, "ppt") == 0) - continue; - - /* everything else belongs to the host domain */ + /* Everything belongs to the host domain. */ iommu_add_device(host_domain, pci_get_rid(dev)); } @@ -208,6 +234,15 @@ iommu_init(void) void iommu_cleanup(void) { + + if (add_tag != NULL) { + EVENTHANDLER_DEREGISTER(pci_add_device, add_tag); + add_tag = NULL; + } + if (delete_tag != NULL) { + EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag); + delete_tag = NULL; + } IOMMU_DISABLE(); IOMMU_DESTROY_DOMAIN(host_domain); IOMMU_CLEANUP(); @@ -216,7 +251,16 @@ iommu_cleanup(void) void * iommu_create_domain(vm_paddr_t maxaddr) { - + static volatile int iommu_initted; + + if (iommu_initted < 2) { + if (atomic_cmpset_int(&iommu_initted, 0, 1)) { + iommu_init(); + atomic_store_rel_int(&iommu_initted, 2); + } else + while (iommu_initted == 1) + cpu_spinwait(); + } return (IOMMU_CREATE_DOMAIN(maxaddr)); } diff --git a/sys/amd64/vmm/io/iommu.h b/sys/amd64/vmm/io/iommu.h index 36b44fa..a941c77 100644 --- a/sys/amd64/vmm/io/iommu.h +++ b/sys/amd64/vmm/io/iommu.h @@ -61,7 +61,6 @@ struct iommu_ops { extern struct iommu_ops iommu_ops_intel; extern struct iommu_ops iommu_ops_amd; -void iommu_init(void); void iommu_cleanup(void); void *iommu_host_domain(void); void *iommu_create_domain(vm_paddr_t maxaddr); diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index 692190a..4c9ff47 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -362,7 +362,13 @@ ppt_assign_device(struct vm *vm, int bus, int slot, int func) if (ppt->vm != NULL && ppt->vm != vm) return (EBUSY); + pci_save_state(ppt->dev); + pcie_flr(ppt->dev, + max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10), + true); + pci_restore_state(ppt->dev); ppt->vm = vm; + iommu_remove_device(iommu_host_domain(), pci_get_rid(ppt->dev)); iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); return (0); } @@ -381,10 +387,17 @@ ppt_unassign_device(struct vm *vm, int bus, int slot, int func) */ if (ppt->vm != vm) return (EBUSY); + + pci_save_state(ppt->dev); + pcie_flr(ppt->dev, + max(pcie_get_max_completion_timeout(ppt->dev) / 1000, 10), + true); + pci_restore_state(ppt->dev); ppt_unmap_mmio(vm, ppt); ppt_teardown_msi(ppt); ppt_teardown_msix(ppt); iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); + iommu_add_device(iommu_host_domain(), pci_get_rid(ppt->dev)); ppt->vm = NULL; return (0); } diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index ebd6360..537454a 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -224,11 +224,6 @@ SYSCTL_INT(_hw_vmm, OID_AUTO, trace_guest_exceptions, CTLFLAG_RDTUN, &trace_guest_exceptions, 0, "Trap into hypervisor on all guest exceptions and reflect them back"); -static int vmm_force_iommu = 0; -TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu); -SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0, - "Force use of I/O MMU even if no passthrough devices were found."); - static void vm_free_memmap(struct vm *vm, int ident); static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); @@ -358,8 +353,6 @@ vmm_handler(module_t mod, int what, void *arg) switch (what) { case MOD_LOAD: vmmdev_init(); - if (vmm_force_iommu || ppt_avail_devices() > 0) - iommu_init(); error = vmm_init(); if (error == 0) vmm_initialized = 1; @@ -396,9 +389,6 @@ static moduledata_t vmm_kmod = { /* * vmm initialization has the following dependencies: * - * - iommu initialization must happen after the pci passthru driver has had - * a chance to attach to any passthru devices (after SI_SUB_CONFIGURE). - * * - VT-x initialization requires smp_rendezvous() and therefore must happen * after SMP is fully functional (after SI_SUB_SMP). */ @@ -893,6 +883,8 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) ("vm_assign_pptdev: iommu must be NULL")); maxaddr = sysmem_maxaddr(vm); vm->iommu = iommu_create_domain(maxaddr); + if (vm->iommu == NULL) + return (ENXIO); vm_iommu_map(vm); } |