summaryrefslogtreecommitdiffstats
path: root/sys/x86/iommu
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2013-10-28 13:33:29 +0000
committerkib <kib@FreeBSD.org>2013-10-28 13:33:29 +0000
commit74b8996ebefacbf869105ddeb0bd74e0c79859d1 (patch)
treec58e8615de663a385a4b33efbb947ccf0106671f /sys/x86/iommu
parentde4d214d0421a4179a8e4b872e203547b330cb19 (diff)
downloadFreeBSD-src-74b8996ebefacbf869105ddeb0bd74e0c79859d1.zip
FreeBSD-src-74b8996ebefacbf869105ddeb0bd74e0c79859d1.tar.gz
Import the driver for VT-d DMAR hardware, as specified in the revision
1.3 of Intelб╝ Virtualization Technology for Directed I/O Architecture Specification. The Extended Context and PASIDs from the rev. 2.2 are not supported, but I am not aware of any released hardware which implements them. Code does not use queued invalidation, see comments for the reason, and does not provide interrupt remapping services. Code implements the management of the guest address space per domain and allows to establish and tear down arbitrary mappings, but not partial unmapping. The superpages are created as needed, but not promoted. Faults are recorded, fault records could be obtained programmatically, and printed on the console. Implement the busdma(9) using DMARs. This busdma backend avoids bouncing and provides security against misbehaving hardware and driver bad programming, preventing leaks and corruption of the memory by wild DMA accesses. By default, the implementation is compiled into amd64 GENERIC kernel but disabled; to enable, set hw.dmar.enable=1 loader tunable. Code is written to work on i386, but testing there was low priority, and driver is not enabled in GENERIC. Even with the DMAR turned on, individual devices could be directed to use the bounce busdma with the hw.busdma.pci<domain>:<bus>:<device>:<function>.bounce=1 tunable. If DMARs are capable of the pass-through translations, it is used, otherwise, an identity-mapping page table is constructed. The driver was tested on Xeon 5400/5500 chipset legacy machine, Haswell desktop and E5 SandyBridge dual-socket boxes, with ahci(4), ata(4), bce(4), ehci(4), mfi(4), uhci(4), xhci(4) devices. It also works with em(4) and igb(4), but there some fixes are needed for drivers, which are not committed yet. Intel GPUs do not work with DMAR (yet). Many thanks to John Baldwin, who explained me the newbus integration; Peter Holm, who did all testing and helped me to discover and understand several incredible bugs; and to Jim Harris for the access to the EDS and BWG and for listening when I have to explain my findings to somebody. Sponsored by: The FreeBSD Foundation MFC after: 1 month
Diffstat (limited to 'sys/x86/iommu')
-rw-r--r--sys/x86/iommu/busdma_dmar.c754
-rw-r--r--sys/x86/iommu/busdma_dmar.h65
-rw-r--r--sys/x86/iommu/intel_ctx.c553
-rw-r--r--sys/x86/iommu/intel_dmar.h374
-rw-r--r--sys/x86/iommu/intel_drv.c1098
-rw-r--r--sys/x86/iommu/intel_fault.c289
-rw-r--r--sys/x86/iommu/intel_gas.c722
-rw-r--r--sys/x86/iommu/intel_idpgtbl.c799
-rw-r--r--sys/x86/iommu/intel_quirks.c195
-rw-r--r--sys/x86/iommu/intel_reg.h294
-rw-r--r--sys/x86/iommu/intel_utils.c562
11 files changed, 5705 insertions, 0 deletions
diff --git a/sys/x86/iommu/busdma_dmar.c b/sys/x86/iommu/busdma_dmar.c
new file mode 100644
index 0000000..5249f30
--- /dev/null
+++ b/sys/x86/iommu/busdma_dmar.c
@@ -0,0 +1,754 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <dev/pci/pcivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+/*
+ * busdma_dmar.c, the implementation of the busdma(9) interface using
+ * DMAR units from Intel VT-d.
+ */
+
+static bool
+dmar_bus_dma_is_dev_disabled(device_t dev)
+{
+ char str[128], *env;
+ int domain, bus, slot, func;
+
+ domain = pci_get_domain(dev);
+ bus = pci_get_bus(dev);
+ slot = pci_get_slot(dev);
+ func = pci_get_function(dev);
+ snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d.bounce",
+ domain, bus, slot, func);
+ env = getenv(str);
+ if (env == NULL)
+ return (false);
+ freeenv(env);
+ return (true);
+}
+
+struct dmar_ctx *
+dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
+{
+ struct dmar_ctx *ctx;
+ bool disabled;
+
+ /*
+ * If the user requested the IOMMU disabled for the device, we
+ * cannot disable the DMAR, due to possibility of other
+ * devices on the same DMAR still requiring translation.
+ * Instead provide the identity mapping for the device
+ * context.
+ */
+ disabled = dmar_bus_dma_is_dev_disabled(dev);
+ ctx = dmar_get_ctx(dmar, dev, disabled, rmrr);
+ if (ctx == NULL)
+ return (NULL);
+ ctx->ctx_tag.owner = dev;
+ if (disabled) {
+ /*
+ * Keep the first reference on context, release the
+ * later refs.
+ */
+ DMAR_LOCK(dmar);
+ if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
+ ctx->flags |= DMAR_CTX_DISABLED;
+ DMAR_UNLOCK(dmar);
+ } else {
+ dmar_free_ctx_locked(dmar, ctx);
+ }
+ ctx = NULL;
+ }
+ return (ctx);
+}
+
+bus_dma_tag_t
+dmar_get_dma_tag(device_t dev, device_t child)
+{
+ struct dmar_unit *dmar;
+ struct dmar_ctx *ctx;
+ bus_dma_tag_t res;
+
+ dmar = dmar_find(child);
+ /* Not in scope of any DMAR ? */
+ if (dmar == NULL)
+ return (NULL);
+ dmar_quirks_pre_use(dmar);
+ dmar_instantiate_rmrr_ctxs(dmar);
+
+ ctx = dmar_instantiate_ctx(dmar, child, false);
+ res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
+ return (res);
+}
+
+static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
+
+static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
+ struct bus_dmamap_dmar *map);
+
+static int
+dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
+ bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
+ bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
+ int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
+ void *lockfuncarg, bus_dma_tag_t *dmat)
+{
+ struct bus_dma_tag_dmar *newtag, *oldtag;
+ int error;
+
+ *dmat = NULL;
+ error = common_bus_dma_tag_create(parent != NULL ?
+ &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
+ boundary, lowaddr, highaddr, filter, filterarg, maxsize,
+ nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
+ sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
+ if (error != 0)
+ return (error);
+
+ oldtag = (struct bus_dma_tag_dmar *)parent;
+ newtag->common.impl = &bus_dma_dmar_impl;
+ newtag->ctx = oldtag->ctx;
+ newtag->owner = oldtag->owner;
+ error = 0;
+
+ if (error != 0)
+ free(newtag, M_DEVBUF);
+ else
+ *dmat = (bus_dma_tag_t)newtag;
+ CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
+ __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
+ error);
+ return (error);
+}
+
+static int
+dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
+{
+ struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
+ int error;
+
+ error = 0;
+ dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
+
+ if (dmat != NULL) {
+ if (dmat->map_count != 0) {
+ error = EBUSY;
+ goto out;
+ }
+ while (dmat != NULL) {
+ parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
+ if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
+ 1) {
+ if (dmat == &dmat->ctx->ctx_tag)
+ dmar_free_ctx(dmat->ctx);
+ free(dmat->segments, M_DMAR_DMAMAP);
+ free(dmat, M_DEVBUF);
+ dmat = parent;
+ } else
+ dmat = NULL;
+ }
+ }
+out:
+ CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
+ return (error);
+}
+
+static int
+dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO);
+ if (map == NULL) {
+ *mapp = NULL;
+ return (ENOMEM);
+ }
+ if (tag->segments == NULL) {
+ tag->segments = malloc(sizeof(bus_dma_segment_t) *
+ tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT);
+ if (tag->segments == NULL) {
+ free(map, M_DMAR_DMAMAP);
+ *mapp = NULL;
+ return (ENOMEM);
+ }
+ }
+ TAILQ_INIT(&map->map_entries);
+ map->tag = tag;
+ map->locked = true;
+ map->cansleep = false;
+ tag->map_count++;
+ *mapp = (bus_dmamap_t)map;
+
+ return (0);
+}
+
+static int
+dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ if (map != NULL) {
+ DMAR_CTX_LOCK(tag->ctx);
+ if (!TAILQ_EMPTY(&map->map_entries)) {
+ DMAR_CTX_UNLOCK(tag->ctx);
+ return (EBUSY);
+ }
+ DMAR_CTX_UNLOCK(tag->ctx);
+ free(map, M_DMAR_DMAMAP);
+ }
+ tag->map_count--;
+ return (0);
+}
+
+
+static int
+dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
+ bus_dmamap_t *mapp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ int error, mflags;
+ vm_memattr_t attr;
+
+ error = dmar_bus_dmamap_create(dmat, flags, mapp);
+ if (error != 0)
+ return (error);
+
+ mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
+ mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
+ attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
+ VM_MEMATTR_DEFAULT;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)*mapp;
+
+ if (tag->common.maxsize < PAGE_SIZE &&
+ tag->common.alignment <= tag->common.maxsize &&
+ attr == VM_MEMATTR_DEFAULT) {
+ *vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags);
+ map->flags |= BUS_DMAMAP_DMAR_MALLOC;
+ } else {
+ *vaddr = (void *)kmem_alloc_attr(kernel_arena,
+ tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR,
+ attr);
+ map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
+ }
+ if (*vaddr == NULL) {
+ dmar_bus_dmamap_destroy(dmat, *mapp);
+ *mapp = NULL;
+ return (ENOMEM);
+ }
+ return (0);
+}
+
+static void
+dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+
+ if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
+ free(vaddr, M_DEVBUF);
+ map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
+ } else {
+ KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
+ ("dmar_bus_dmamem_free for non alloced map %p", map));
+ kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize);
+ map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
+ }
+
+ dmar_bus_dmamap_destroy(dmat, map1);
+}
+
+static int
+dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
+ struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
+ int flags, bus_dma_segment_t *segs, int *segp,
+ struct dmar_map_entries_tailq *unroll_list)
+{
+ struct dmar_ctx *ctx;
+ struct dmar_map_entry *entry;
+ dmar_gaddr_t size;
+ bus_size_t buflen1;
+ int error, idx, gas_flags, seg;
+
+ if (segs == NULL)
+ segs = tag->segments;
+ ctx = tag->ctx;
+ seg = *segp;
+ idx = 0;
+ while (buflen > 0) {
+ seg++;
+ if (seg >= tag->common.nsegments) {
+ error = EFBIG;
+ break;
+ }
+ buflen1 = buflen > tag->common.maxsegsz ?
+ tag->common.maxsegsz : buflen;
+ buflen -= buflen1;
+ size = round_page(offset + buflen1);
+
+ /*
+ * (Too) optimistically allow split if there are more
+ * then one segments left.
+ */
+ gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
+ if (seg + 1 < tag->common.nsegments)
+ gas_flags |= DMAR_GM_CANSPLIT;
+
+ error = dmar_gas_map(ctx, &tag->common, size,
+ DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
+ gas_flags, ma + idx, &entry);
+ if (error != 0)
+ break;
+ if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
+ KASSERT(size >= entry->end - entry->start,
+ ("split increased entry size %jx %jx %jx",
+ (uintmax_t)size, (uintmax_t)entry->start,
+ (uintmax_t)entry->end));
+ size = entry->end - entry->start;
+ if (buflen1 > size)
+ buflen1 = size;
+ } else {
+ KASSERT(entry->end - entry->start == size,
+ ("no split allowed %jx %jx %jx",
+ (uintmax_t)size, (uintmax_t)entry->start,
+ (uintmax_t)entry->end));
+ }
+
+ KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
+ == 0,
+ ("alignment failed: ctx %p start 0x%jx offset %x "
+ "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
+ (uintmax_t)tag->common.alignment));
+ KASSERT(entry->end <= tag->common.lowaddr ||
+ entry->start >= tag->common.highaddr,
+ ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
+ "lowaddr 0x%jx highaddr 0x%jx", ctx,
+ (uintmax_t)entry->start, (uintmax_t)entry->end,
+ (uintmax_t)tag->common.lowaddr,
+ (uintmax_t)tag->common.highaddr));
+ KASSERT(dmar_test_boundary(entry->start, entry->end -
+ entry->start, tag->common.boundary),
+ ("boundary failed: ctx %p start 0x%jx end 0x%jx "
+ "boundary 0x%jx", ctx, (uintmax_t)entry->start,
+ (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
+ KASSERT(buflen1 <= tag->common.maxsegsz,
+ ("segment too large: ctx %p start 0x%jx end 0x%jx "
+ "maxsegsz 0x%jx", ctx, (uintmax_t)entry->start,
+ (uintmax_t)entry->end, (uintmax_t)tag->common.maxsegsz));
+
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
+ entry->flags |= DMAR_MAP_ENTRY_MAP;
+ DMAR_CTX_UNLOCK(ctx);
+ TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
+
+ segs[seg].ds_addr = entry->start + offset;
+ segs[seg].ds_len = buflen1;
+
+ idx += OFF_TO_IDX(trunc_page(offset + buflen1));
+ offset += buflen1;
+ offset &= DMAR_PAGE_MASK;
+ }
+ if (error == 0)
+ *segp = seg;
+ return (error);
+}
+
+static int
+dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
+ struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
+ int flags, bus_dma_segment_t *segs, int *segp)
+{
+ struct dmar_ctx *ctx;
+ struct dmar_map_entry *entry, *entry1;
+ struct dmar_map_entries_tailq unroll_list;
+ int error;
+
+ ctx = tag->ctx;
+ atomic_add_long(&ctx->loads, 1);
+
+ TAILQ_INIT(&unroll_list);
+ error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
+ buflen, flags, segs, segp, &unroll_list);
+ if (error != 0) {
+ /*
+ * The busdma interface does not allow us to report
+ * partial buffer load, so unfortunately we have to
+ * revert all work done.
+ */
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
+ entry1) {
+ /*
+ * No entries other than what we have created
+ * during the failed run might have been
+ * inserted there in between, since we own ctx
+ * pglock.
+ */
+ TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
+ TAILQ_REMOVE(&unroll_list, entry, unroll_link);
+ TAILQ_INSERT_TAIL(&ctx->unload_entries, entry,
+ dmamap_link);
+ }
+ DMAR_CTX_UNLOCK(ctx);
+ taskqueue_enqueue(ctx->dmar->delayed_taskqueue,
+ &ctx->unload_task);
+ }
+
+ if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
+ !map->cansleep)
+ error = EINPROGRESS;
+ if (error == EINPROGRESS)
+ dmar_bus_schedule_dmamap(ctx->dmar, map);
+ return (error);
+}
+
+static int
+dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
+ struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
+ bus_dma_segment_t *segs, int *segp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
+ flags, segs, segp));
+}
+
+static int
+dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
+ vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
+ int *segp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ vm_page_t *ma;
+ vm_paddr_t pstart, pend;
+ int error, i, ma_cnt, offset;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ pstart = trunc_page(buf);
+ pend = round_page(buf + buflen);
+ offset = buf & PAGE_MASK;
+ ma_cnt = OFF_TO_IDX(pend - pstart);
+ ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
+ M_WAITOK : M_NOWAIT);
+ if (ma == NULL)
+ return (ENOMEM);
+ for (i = 0; i < ma_cnt; i++)
+ ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
+ error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
+ flags, segs, segp);
+ free(ma, M_DEVBUF);
+ return (error);
+}
+
+static int
+dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
+ bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
+ int *segp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ vm_page_t *ma, fma;
+ vm_paddr_t pstart, pend, paddr;
+ int error, i, ma_cnt, offset;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ pstart = trunc_page((vm_offset_t)buf);
+ pend = round_page((vm_offset_t)buf + buflen);
+ offset = (vm_offset_t)buf & PAGE_MASK;
+ ma_cnt = OFF_TO_IDX(pend - pstart);
+ ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
+ M_WAITOK : M_NOWAIT);
+ if (ma == NULL)
+ return (ENOMEM);
+ if (dumping) {
+ /*
+ * If dumping, do not attempt to call
+ * PHYS_TO_VM_PAGE() at all. It may return non-NULL
+ * but the vm_page returned might be not initialized,
+ * e.g. for the kernel itself.
+ */
+ KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
+ fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
+ M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
+ if (fma == NULL) {
+ free(ma, M_DEVBUF);
+ return (ENOMEM);
+ }
+ for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
+ paddr = pmap_kextract(pstart);
+ vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
+ ma[i] = &fma[i];
+ }
+ } else {
+ fma = NULL;
+ for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
+ if (pmap == kernel_pmap)
+ paddr = pmap_kextract(pstart);
+ else
+ paddr = pmap_extract(pmap, pstart);
+ ma[i] = PHYS_TO_VM_PAGE(paddr);
+ KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
+ ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
+ (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
+ ma[i]));
+ }
+ }
+ error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
+ flags, segs, segp);
+ free(ma, M_DEVBUF);
+ free(fma, M_DEVBUF);
+ return (error);
+}
+
+static void
+dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
+ struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
+{
+ struct bus_dmamap_dmar *map;
+
+ if (map1 == NULL)
+ return;
+ map = (struct bus_dmamap_dmar *)map1;
+ map->mem = *mem;
+ map->tag = (struct bus_dma_tag_dmar *)dmat;
+ map->callback = callback;
+ map->callback_arg = callback_arg;
+}
+
+static bus_dma_segment_t *
+dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
+ bus_dma_segment_t *segs, int nsegs, int error)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+
+ if (!map->locked) {
+ KASSERT(map->cansleep,
+ ("map not locked and not sleepable context %p", map));
+
+ /*
+ * We are called from the delayed context. Relock the
+ * driver.
+ */
+ (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
+ map->locked = true;
+ }
+
+ if (segs == NULL)
+ segs = tag->segments;
+ return (segs);
+}
+
+/*
+ * The limitations of busdma KPI forces the dmar to perform the actual
+ * unload, consisting of the unmapping of the map entries page tables,
+ * from the delayed context on i386, since page table page mapping
+ * might require a sleep to be successfull. The unfortunate
+ * consequence is that the DMA requests can be served some time after
+ * the bus_dmamap_unload() call returned.
+ *
+ * On amd64, we assume that sf allocation cannot fail.
+ */
+static void
+dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ struct dmar_ctx *ctx;
+#if defined(__amd64__)
+ struct dmar_map_entries_tailq entries;
+#endif
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ ctx = tag->ctx;
+ atomic_add_long(&ctx->unloads, 1);
+
+#if defined(__i386__)
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_CONCAT(&ctx->unload_entries, &map->map_entries, dmamap_link);
+ DMAR_CTX_UNLOCK(ctx);
+ taskqueue_enqueue(ctx->dmar->delayed_taskqueue, &ctx->unload_task);
+#else /* defined(__amd64__) */
+ TAILQ_INIT(&entries);
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
+ DMAR_CTX_UNLOCK(ctx);
+ THREAD_NO_SLEEPING();
+ dmar_ctx_unload(ctx, &entries, false);
+ THREAD_SLEEPING_OK();
+ KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
+#endif
+}
+
+static void
+dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
+ bus_dmasync_op_t op)
+{
+}
+
+struct bus_dma_impl bus_dma_dmar_impl = {
+ .tag_create = dmar_bus_dma_tag_create,
+ .tag_destroy = dmar_bus_dma_tag_destroy,
+ .map_create = dmar_bus_dmamap_create,
+ .map_destroy = dmar_bus_dmamap_destroy,
+ .mem_alloc = dmar_bus_dmamem_alloc,
+ .mem_free = dmar_bus_dmamem_free,
+ .load_phys = dmar_bus_dmamap_load_phys,
+ .load_buffer = dmar_bus_dmamap_load_buffer,
+ .load_ma = dmar_bus_dmamap_load_ma,
+ .map_waitok = dmar_bus_dmamap_waitok,
+ .map_complete = dmar_bus_dmamap_complete,
+ .map_unload = dmar_bus_dmamap_unload,
+ .map_sync = dmar_bus_dmamap_sync
+};
+
+static void
+dmar_bus_task_dmamap(void *arg, int pending)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ struct dmar_unit *unit;
+ struct dmar_ctx *ctx;
+
+ unit = arg;
+ DMAR_LOCK(unit);
+ while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
+ TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
+ DMAR_UNLOCK(unit);
+ tag = map->tag;
+ ctx = map->tag->ctx;
+ map->cansleep = true;
+ map->locked = false;
+ bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
+ &map->mem, map->callback, map->callback_arg,
+ BUS_DMA_WAITOK);
+ map->cansleep = false;
+ if (map->locked) {
+ (tag->common.lockfunc)(tag->common.lockfuncarg,
+ BUS_DMA_UNLOCK);
+ } else
+ map->locked = true;
+ map->cansleep = false;
+ DMAR_LOCK(unit);
+ }
+ DMAR_UNLOCK(unit);
+}
+
+static void
+dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
+{
+ struct dmar_ctx *ctx;
+
+ ctx = map->tag->ctx;
+ map->locked = false;
+ DMAR_LOCK(unit);
+ TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
+ DMAR_UNLOCK(unit);
+ taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
+}
+
+int
+dmar_init_busdma(struct dmar_unit *unit)
+{
+
+ TAILQ_INIT(&unit->delayed_maps);
+ TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
+ unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
+ taskqueue_thread_enqueue, &unit->delayed_taskqueue);
+ taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
+ "dmar%d busdma taskq", unit->unit);
+ return (0);
+}
+
+void
+dmar_fini_busdma(struct dmar_unit *unit)
+{
+
+ if (unit->delayed_taskqueue == NULL)
+ return;
+
+ taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
+ taskqueue_free(unit->delayed_taskqueue);
+ unit->delayed_taskqueue = NULL;
+}
diff --git a/sys/x86/iommu/busdma_dmar.h b/sys/x86/iommu/busdma_dmar.h
new file mode 100644
index 0000000..60ea6bc
--- /dev/null
+++ b/sys/x86/iommu/busdma_dmar.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __X86_IOMMU_BUSDMA_DMAR_H
+#define __X86_IOMMU_BUSDMA_DMAR_H
+
+struct dmar_map_entry;
+TAILQ_HEAD(dmar_map_entries_tailq, dmar_map_entry);
+
+struct bus_dma_tag_dmar {
+ struct bus_dma_tag_common common;
+ struct dmar_ctx *ctx;
+ device_t owner;
+ int map_count;
+ bus_dma_segment_t *segments;
+};
+
+struct bus_dmamap_dmar {
+ struct bus_dma_tag_dmar *tag;
+ struct memdesc mem;
+ bus_dmamap_callback_t *callback;
+ void *callback_arg;
+ struct dmar_map_entries_tailq map_entries;
+ TAILQ_ENTRY(bus_dmamap_dmar) delay_link;
+ bool locked;
+ bool cansleep;
+ int flags;
+};
+
+#define BUS_DMAMAP_DMAR_MALLOC 0x0001
+#define BUS_DMAMAP_DMAR_KMEM_ALLOC 0x0002
+
+extern struct bus_dma_impl bus_dma_dmar_impl;
+
+bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child);
+
+#endif
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
new file mode 100644
index 0000000..b8da09d
--- /dev/null
+++ b/sys/x86/iommu/intel_ctx.c
@@ -0,0 +1,553 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+#include <dev/pci/pcivar.h>
+
+static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
+
+static void dmar_ctx_unload_task(void *arg, int pending);
+
+static void
+dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
+{
+ struct sf_buf *sf;
+ dmar_root_entry_t *re;
+ vm_page_t ctxm;
+
+ /*
+ * Allocated context page must be linked.
+ */
+ ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC);
+ if (ctxm != NULL)
+ return;
+
+ /*
+ * Page not present, allocate and link. Note that other
+ * thread might execute this sequence in parallel. This
+ * should be safe, because the context entries written by both
+ * threads are equal.
+ */
+ TD_PREP_PINNED_ASSERT;
+ ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO |
+ DMAR_PGF_WAITOK);
+ re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf);
+ re += bus;
+ dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
+ VM_PAGE_TO_PHYS(ctxm)));
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+ TD_PINNED_ASSERT;
+}
+
+static dmar_ctx_entry_t *
+dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
+{
+ dmar_ctx_entry_t *ctxp;
+
+ ctxp = dmar_map_pgtbl(ctx->dmar->ctx_obj, 1 + ctx->bus,
+ DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp);
+ ctxp += ((ctx->slot & 0x1f) << 3) + (ctx->func & 0x7);
+ return (ctxp);
+}
+
+static void
+ctx_tag_init(struct dmar_ctx *ctx)
+{
+ bus_addr_t maxaddr;
+
+ maxaddr = MIN(ctx->end, BUS_SPACE_MAXADDR);
+ ctx->ctx_tag.common.ref_count = 1; /* Prevent free */
+ ctx->ctx_tag.common.impl = &bus_dma_dmar_impl;
+ ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY;
+ ctx->ctx_tag.common.lowaddr = maxaddr;
+ ctx->ctx_tag.common.highaddr = maxaddr;
+ ctx->ctx_tag.common.maxsize = maxaddr;
+ ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED;
+ ctx->ctx_tag.common.maxsegsz = maxaddr;
+ ctx->ctx_tag.ctx = ctx;
+ /* XXXKIB initialize tag further */
+}
+
+static void
+ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp)
+{
+ struct dmar_unit *unit;
+ vm_page_t ctx_root;
+
+ unit = ctx->dmar;
+ KASSERT(ctxp->ctx1 == 0 && ctxp->ctx2 == 0,
+ ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
+ unit->unit, ctx->bus, ctx->slot, ctx->func, ctxp->ctx1,
+ ctxp->ctx2));
+ ctxp->ctx2 = DMAR_CTX2_DID(ctx->domain);
+ ctxp->ctx2 |= ctx->awlvl;
+ if ((ctx->flags & DMAR_CTX_IDMAP) != 0 &&
+ (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
+ KASSERT(ctx->pgtbl_obj == NULL,
+ ("ctx %p non-null pgtbl_obj", ctx));
+ dmar_pte_store(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
+ } else {
+ ctx_root = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_NOALLOC);
+ dmar_pte_store(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
+ (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
+ DMAR_CTX1_P);
+ }
+}
+
+static int
+ctx_init_rmrr(struct dmar_ctx *ctx, device_t dev)
+{
+ struct dmar_map_entries_tailq rmrr_entries;
+ struct dmar_map_entry *entry, *entry1;
+ vm_page_t *ma;
+ dmar_gaddr_t start, end;
+ vm_pindex_t size, i;
+ int error, error1;
+
+ error = 0;
+ TAILQ_INIT(&rmrr_entries);
+ dmar_ctx_parse_rmrr(ctx, dev, &rmrr_entries);
+ TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
+ /*
+ * VT-d specification requires that the start of an
+ * RMRR entry is 4k-aligned. Buggy BIOSes put
+ * anything into the start and end fields. Truncate
+ * and round as neccesary.
+ *
+ * We also allow the overlapping RMRR entries, see
+ * dmar_gas_alloc_region().
+ */
+ start = entry->start;
+ end = entry->end;
+ entry->start = trunc_page(start);
+ entry->end = round_page(end);
+ size = OFF_TO_IDX(entry->end - entry->start);
+ ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
+ for (i = 0; i < size; i++) {
+ ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
+ VM_MEMATTR_DEFAULT);
+ }
+ error1 = dmar_gas_map_region(ctx, entry, DMAR_MAP_ENTRY_READ |
+ DMAR_MAP_ENTRY_WRITE, DMAR_GM_CANWAIT, ma);
+ /*
+ * Non-failed RMRR entries are owned by context rb
+ * tree. Get rid of the failed entry, but do not stop
+ * the loop. Rest of the parsed RMRR entries are
+ * loaded and removed on the context destruction.
+ */
+ if (error1 == 0 && entry->end != entry->start) {
+ DMAR_LOCK(ctx->dmar);
+ ctx->flags |= DMAR_CTX_RMRR;
+ DMAR_UNLOCK(ctx->dmar);
+ } else {
+ if (error1 != 0) {
+ device_printf(dev,
+ "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
+ ctx->dmar->unit, start, end, error1);
+ error = error1;
+ }
+ TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
+ dmar_gas_free_entry(ctx, entry);
+ }
+ for (i = 0; i < size; i++)
+ vm_page_putfake(ma[i]);
+ free(ma, M_TEMP);
+ }
+ return (error);
+}
+
+static struct dmar_ctx *
+dmar_get_ctx_alloc(struct dmar_unit *dmar, int bus, int slot, int func)
+{
+ struct dmar_ctx *ctx;
+
+ ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
+ RB_INIT(&ctx->rb_root);
+ TAILQ_INIT(&ctx->unload_entries);
+ TASK_INIT(&ctx->unload_task, 0, dmar_ctx_unload_task, ctx);
+ mtx_init(&ctx->lock, "dmarctx", NULL, MTX_DEF);
+ ctx->dmar = dmar;
+ ctx->bus = bus;
+ ctx->slot = slot;
+ ctx->func = func;
+ return (ctx);
+}
+
+static void
+dmar_ctx_dtr(struct dmar_ctx *ctx, bool gas_inited, bool pgtbl_inited)
+{
+
+ if (gas_inited) {
+ DMAR_CTX_LOCK(ctx);
+ dmar_gas_fini_ctx(ctx);
+ DMAR_CTX_UNLOCK(ctx);
+ }
+ if (pgtbl_inited) {
+ if (ctx->pgtbl_obj != NULL)
+ DMAR_CTX_PGLOCK(ctx);
+ ctx_free_pgtbl(ctx);
+ }
+ mtx_destroy(&ctx->lock);
+ free(ctx, M_DMAR_CTX);
+}
+
+struct dmar_ctx *
+dmar_get_ctx(struct dmar_unit *dmar, device_t dev, bool id_mapped, bool rmrr_init)
+{
+ struct dmar_ctx *ctx, *ctx1;
+ dmar_ctx_entry_t *ctxp;
+ struct sf_buf *sf;
+ int bus, slot, func, error, mgaw;
+ bool enable;
+
+ bus = pci_get_bus(dev);
+ slot = pci_get_slot(dev);
+ func = pci_get_function(dev);
+ enable = false;
+ TD_PREP_PINNED_ASSERT;
+ DMAR_LOCK(dmar);
+ ctx = dmar_find_ctx_locked(dmar, bus, slot, func);
+ error = 0;
+ if (ctx == NULL) {
+ /*
+ * Perform the allocations which require sleep or have
+ * higher chance to succeed if the sleep is allowed.
+ */
+ DMAR_UNLOCK(dmar);
+ dmar_ensure_ctx_page(dmar, bus);
+ ctx1 = dmar_get_ctx_alloc(dmar, bus, slot, func);
+
+ if (id_mapped) {
+ /*
+ * For now, use the maximal usable physical
+ * address of the installed memory to
+ * calculate the mgaw. It is useful for the
+ * identity mapping, and less so for the
+ * virtualized bus address space.
+ */
+ ctx1->end = ptoa(Maxmem);
+ mgaw = dmar_maxaddr2mgaw(dmar, ctx1->end, false);
+ error = ctx_set_agaw(ctx1, mgaw);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, false, false);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ } else {
+ ctx1->end = BUS_SPACE_MAXADDR;
+ mgaw = dmar_maxaddr2mgaw(dmar, ctx1->end, true);
+ error = ctx_set_agaw(ctx1, mgaw);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, false, false);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ /* Use all supported address space for remapping. */
+ ctx1->end = 1ULL << (ctx1->agaw - 1);
+ }
+
+
+ dmar_gas_init_ctx(ctx1);
+ if (id_mapped) {
+ if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
+ ctx1->pgtbl_obj = ctx_get_idmap_pgtbl(ctx1,
+ ctx1->end);
+ }
+ ctx1->flags |= DMAR_CTX_IDMAP;
+ } else {
+ error = ctx_alloc_pgtbl(ctx1);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, true, false);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ /* Disable local apic region access */
+ error = dmar_gas_reserve_region(ctx1, 0xfee00000,
+ 0xfeefffff + 1);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, true, true);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ error = ctx_init_rmrr(ctx1, dev);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, true, true);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ }
+ ctxp = dmar_map_ctx_entry(ctx1, &sf);
+ DMAR_LOCK(dmar);
+
+ /*
+ * Recheck the contexts, other thread might have
+ * already allocated needed one.
+ */
+ ctx = dmar_find_ctx_locked(dmar, bus, slot, func);
+ if (ctx == NULL) {
+ ctx = ctx1;
+ ctx->domain = alloc_unrl(dmar->domids);
+ if (ctx->domain == -1) {
+ DMAR_UNLOCK(dmar);
+ dmar_unmap_pgtbl(sf, true);
+ dmar_ctx_dtr(ctx, true, true);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ ctx_tag_init(ctx);
+
+ /*
+ * This is the first activated context for the
+ * DMAR unit. Enable the translation after
+ * everything is set up.
+ */
+ if (LIST_EMPTY(&dmar->contexts))
+ enable = true;
+ LIST_INSERT_HEAD(&dmar->contexts, ctx, link);
+ ctx_id_entry_init(ctx, ctxp);
+ device_printf(dev,
+ "dmar%d pci%d:%d:%d:%d domain %d mgaw %d agaw %d\n",
+ dmar->unit, dmar->segment, bus, slot,
+ func, ctx->domain, ctx->mgaw, ctx->agaw);
+ } else {
+ dmar_ctx_dtr(ctx1, true, true);
+ }
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+ }
+ ctx->refs++;
+ if ((ctx->flags & DMAR_CTX_RMRR) != 0)
+ ctx->refs++; /* XXXKIB */
+
+ /*
+ * If dmar declares Caching Mode as Set, follow 11.5 "Caching
+ * Mode Consideration" and do the (global) invalidation of the
+ * negative TLB entries.
+ */
+ if ((dmar->hw_cap & DMAR_CAP_CM) != 0 || enable) {
+ error = dmar_inv_ctx_glob(dmar);
+ if (error == 0 &&
+ (dmar->hw_ecap & DMAR_ECAP_DI) != 0)
+ error = dmar_inv_iotlb_glob(dmar);
+ if (error != 0) {
+ dmar_free_ctx_locked(dmar, ctx);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ }
+ if (enable && !rmrr_init) {
+ error = dmar_enable_translation(dmar);
+ if (error != 0) {
+ dmar_free_ctx_locked(dmar, ctx);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ }
+ DMAR_UNLOCK(dmar);
+ TD_PINNED_ASSERT;
+ return (ctx);
+}
+
+void
+dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
+{
+ struct sf_buf *sf;
+ dmar_ctx_entry_t *ctxp;
+
+ DMAR_ASSERT_LOCKED(dmar);
+ KASSERT(ctx->refs >= 1,
+ ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
+
+ /*
+ * If our reference is not last, only the dereference should
+ * be performed.
+ */
+ if (ctx->refs > 1) {
+ ctx->refs--;
+ DMAR_UNLOCK(dmar);
+ return;
+ }
+
+ KASSERT((ctx->flags & DMAR_CTX_RMRR) == 0,
+ ("lost ref on RMRR ctx %p", ctx));
+ KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
+ ("lost ref on disabled ctx %p", ctx));
+
+ /*
+ * Otherwise, the context entry must be cleared before the
+ * page table is destroyed. The mapping of the context
+ * entries page could require sleep, unlock the dmar.
+ */
+ DMAR_UNLOCK(dmar);
+ TD_PREP_PINNED_ASSERT;
+ ctxp = dmar_map_ctx_entry(ctx, &sf);
+ DMAR_LOCK(dmar);
+ KASSERT(ctx->refs >= 1,
+ ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
+
+ /*
+ * Other thread might have referenced the context, in which
+ * case again only the dereference should be performed.
+ */
+ if (ctx->refs > 1) {
+ ctx->refs--;
+ DMAR_UNLOCK(dmar);
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+ TD_PINNED_ASSERT;
+ return;
+ }
+
+ KASSERT((ctx->flags & DMAR_CTX_RMRR) == 0,
+ ("lost ref on RMRR ctx %p", ctx));
+ KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
+ ("lost ref on disabled ctx %p", ctx));
+
+ /*
+ * Clear the context pointer and flush the caches.
+ * XXXKIB: cannot do this if any RMRR entries are still present.
+ */
+ dmar_pte_clear(&ctxp->ctx1);
+ ctxp->ctx2 = 0;
+ dmar_inv_ctx_glob(dmar);
+ if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0)
+ dmar_inv_iotlb_glob(dmar);
+ LIST_REMOVE(ctx, link);
+ DMAR_UNLOCK(dmar);
+
+ /*
+ * The rest of the destruction is invisible for other users of
+ * the dmar unit.
+ */
+ taskqueue_drain(dmar->delayed_taskqueue, &ctx->unload_task);
+ KASSERT(TAILQ_EMPTY(&ctx->unload_entries),
+ ("unfinished unloads %p", ctx));
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+ free_unr(dmar->domids, ctx->domain);
+ dmar_ctx_dtr(ctx, true, true);
+ TD_PINNED_ASSERT;
+}
+
+void
+dmar_free_ctx(struct dmar_ctx *ctx)
+{
+ struct dmar_unit *dmar;
+
+ dmar = ctx->dmar;
+ DMAR_LOCK(dmar);
+ dmar_free_ctx_locked(dmar, ctx);
+}
+
+struct dmar_ctx *
+dmar_find_ctx_locked(struct dmar_unit *dmar, int bus, int slot, int func)
+{
+ struct dmar_ctx *ctx;
+
+ DMAR_ASSERT_LOCKED(dmar);
+
+ LIST_FOREACH(ctx, &dmar->contexts, link) {
+ if (ctx->bus == bus && ctx->slot == slot && ctx->func == func)
+ return (ctx);
+ }
+ return (NULL);
+}
+
+void
+dmar_ctx_unload(struct dmar_ctx *ctx, struct dmar_map_entries_tailq *entries,
+ bool cansleep)
+{
+ struct dmar_map_entry *entry;
+ int error;
+
+ while ((entry = TAILQ_FIRST(entries)) != NULL) {
+ KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0,
+ ("not mapped entry %p %p", ctx, entry));
+ TAILQ_REMOVE(entries, entry, dmamap_link);
+ error = ctx_unmap_buf(ctx, entry->start, entry->end -
+ entry->start, cansleep ? DMAR_PGF_WAITOK : 0);
+ KASSERT(error == 0, ("unmap %p error %d", ctx, error));
+ DMAR_CTX_LOCK(ctx);
+ dmar_gas_free_space(ctx, entry);
+ DMAR_CTX_UNLOCK(ctx);
+ dmar_gas_free_entry(ctx, entry);
+ }
+}
+
+static void
+dmar_ctx_unload_task(void *arg, int pending)
+{
+ struct dmar_ctx *ctx;
+ struct dmar_map_entries_tailq entries;
+
+ ctx = arg;
+ TAILQ_INIT(&entries);
+
+ for (;;) {
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_SWAP(&ctx->unload_entries, &entries, dmar_map_entry,
+ dmamap_link);
+ DMAR_CTX_UNLOCK(ctx);
+ if (TAILQ_EMPTY(&entries))
+ break;
+ dmar_ctx_unload(ctx, &entries, true);
+ }
+}
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
new file mode 100644
index 0000000..0c84856
--- /dev/null
+++ b/sys/x86/iommu/intel_dmar.h
@@ -0,0 +1,374 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __X86_IOMMU_INTEL_DMAR_H
+#define __X86_IOMMU_INTEL_DMAR_H
+
+/* Host or physical memory address, after translation. */
+typedef uint64_t dmar_haddr_t;
+/* Guest or bus address, before translation. */
+typedef uint64_t dmar_gaddr_t;
+
+struct dmar_map_entry {
+ dmar_gaddr_t start;
+ dmar_gaddr_t end;
+ dmar_gaddr_t free_after; /* Free space after the entry */
+ dmar_gaddr_t free_down; /* Max free space below the
+ current R/B tree node */
+ u_int flags;
+ TAILQ_ENTRY(dmar_map_entry) dmamap_link; /* Link for dmamap entries */
+ RB_ENTRY(dmar_map_entry) rb_entry; /* Links for ctx entries */
+ TAILQ_ENTRY(dmar_map_entry) unroll_link; /* Link for unroll after
+ dmamap_load failure */
+};
+
+RB_HEAD(dmar_gas_entries_tree, dmar_map_entry);
+RB_PROTOTYPE(dmar_gas_entries_tree, dmar_map_entry, rb_entry,
+ dmar_gas_cmp_entries);
+
+#define DMAR_MAP_ENTRY_PLACE 0x0001 /* Fake entry */
+#define DMAR_MAP_ENTRY_RMRR 0x0002 /* Permanent, not linked by
+ dmamap_link */
+#define DMAR_MAP_ENTRY_MAP 0x0004 /* Busdma created, linked by
+ dmamap_link */
+#define DMAR_MAP_ENTRY_UNMAPPED 0x0010 /* No backing pages */
+#define DMAR_MAP_ENTRY_READ 0x1000 /* Read permitted */
+#define DMAR_MAP_ENTRY_WRITE 0x2000 /* Write permitted */
+#define DMAR_MAP_ENTRY_SNOOP 0x4000 /* Snoop */
+#define DMAR_MAP_ENTRY_TM 0x8000 /* Transient */
+
+struct dmar_ctx {
+ int bus; /* pci bus/slot/func */
+ int slot;
+ int func;
+ int domain; /* DID */
+ int mgaw; /* Real max address width */
+ int agaw; /* Adjusted guest address width */
+ int pglvl; /* The pagelevel */
+ int awlvl; /* The pagelevel as the bitmask, to set in
+ context entry */
+ dmar_gaddr_t end;/* Highest address + 1 in the guest AS */
+ u_int refs; /* References to the context, from tags */
+ struct dmar_unit *dmar;
+ struct bus_dma_tag_dmar ctx_tag; /* Root tag */
+ struct mtx lock;
+ LIST_ENTRY(dmar_ctx) link; /* Member in the dmar list */
+ vm_object_t pgtbl_obj; /* Page table pages */
+ u_int flags; /* Protected by dmar lock */
+ uint64_t last_fault_rec[2]; /* Last fault reported */
+ u_int entries_cnt;
+ u_long loads;
+ u_long unloads;
+ struct dmar_gas_entries_tree rb_root;
+ struct dmar_map_entries_tailq unload_entries; /* Entries to unload */
+ struct dmar_map_entry *first_place, *last_place;
+ struct task unload_task;
+};
+
+/* struct dmar_ctx flags */
+#define DMAR_CTX_FAULTED 0x0001 /* Fault was reported,
+ last_fault_rec is valid */
+#define DMAR_CTX_IDMAP 0x0002 /* Context uses identity page table */
+#define DMAR_CTX_RMRR 0x0004 /* Context contains RMRR entry,
+ cannot be turned off */
+#define DMAR_CTX_DISABLED 0x0008 /* Device is disabled, the
+ ephemeral reference is kept
+ to prevent context destruction */
+
+#define DMAR_CTX_PGLOCK(ctx) VM_OBJECT_WLOCK((ctx)->pgtbl_obj)
+#define DMAR_CTX_PGTRYLOCK(ctx) VM_OBJECT_TRYWLOCK((ctx)->pgtbl_obj)
+#define DMAR_CTX_PGUNLOCK(ctx) VM_OBJECT_WUNLOCK((ctx)->pgtbl_obj)
+#define DMAR_CTX_ASSERT_PGLOCKED(ctx) \
+ VM_OBJECT_ASSERT_WLOCKED((ctx)->pgtbl_obj)
+
+#define DMAR_CTX_LOCK(ctx) mtx_lock(&(ctx)->lock)
+#define DMAR_CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->lock)
+#define DMAR_CTX_ASSERT_LOCKED(ctx) mtx_assert(&(ctx)->lock, MA_OWNED)
+
+struct dmar_unit {
+ device_t dev;
+ int unit;
+ uint16_t segment;
+ uint64_t base;
+
+ /* Resources */
+ int reg_rid;
+ struct resource *regs;
+ int irq;
+ int irq_rid;
+ struct resource *irq_res;
+ void *intr_handle;
+
+ /* Hardware registers cache */
+ uint32_t hw_ver;
+ uint64_t hw_cap;
+ uint64_t hw_ecap;
+ uint32_t hw_gcmd;
+
+ /* Data for being a dmar */
+ struct mtx lock;
+ LIST_HEAD(, dmar_ctx) contexts;
+ struct unrhdr *domids;
+ vm_object_t ctx_obj;
+ u_int barrier_flags;
+
+ /* Fault handler data */
+ struct mtx fault_lock;
+ uint64_t *fault_log;
+ int fault_log_head;
+ int fault_log_tail;
+ int fault_log_size;
+ struct task fault_task;
+ struct taskqueue *fault_taskqueue;
+
+ /* Busdma delayed map load */
+ struct task dmamap_load_task;
+ TAILQ_HEAD(, bus_dmamap_dmar) delayed_maps;
+ struct taskqueue *delayed_taskqueue;
+};
+
+#define DMAR_LOCK(dmar) mtx_lock(&(dmar)->lock)
+#define DMAR_UNLOCK(dmar) mtx_unlock(&(dmar)->lock)
+#define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->lock, MA_OWNED)
+
+#define DMAR_FAULT_LOCK(dmar) mtx_lock_spin(&(dmar)->fault_lock)
+#define DMAR_FAULT_UNLOCK(dmar) mtx_unlock_spin(&(dmar)->fault_lock)
+#define DMAR_FAULT_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->fault_lock, MA_OWNED)
+
+#define DMAR_IS_COHERENT(dmar) (((dmar)->hw_ecap & DMAR_ECAP_C) != 0)
+
+/* Barrier ids */
+#define DMAR_BARRIER_RMRR 0
+#define DMAR_BARRIER_USEQ 1
+
+struct dmar_unit *dmar_find(device_t dev);
+
+u_int dmar_nd2mask(u_int nd);
+bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl);
+int ctx_set_agaw(struct dmar_ctx *ctx, int mgaw);
+int dmar_maxaddr2mgaw(struct dmar_unit* unit, dmar_gaddr_t maxaddr,
+ bool allow_less);
+vm_pindex_t pglvl_max_pages(int pglvl);
+int ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl);
+dmar_gaddr_t pglvl_page_size(int total_pglvl, int lvl);
+dmar_gaddr_t ctx_page_size(struct dmar_ctx *ctx, int lvl);
+struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags);
+void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags);
+void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
+ struct sf_buf **sf);
+void dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent);
+int dmar_load_root_entry_ptr(struct dmar_unit *unit);
+int dmar_inv_ctx_glob(struct dmar_unit *unit);
+int dmar_inv_iotlb_glob(struct dmar_unit *unit);
+int dmar_flush_write_bufs(struct dmar_unit *unit);
+int dmar_enable_translation(struct dmar_unit *unit);
+int dmar_disable_translation(struct dmar_unit *unit);
+void dmar_enable_intr(struct dmar_unit *unit);
+void dmar_disable_intr(struct dmar_unit *unit);
+bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id);
+void dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id);
+
+int dmar_intr(void *arg);
+int dmar_init_fault_log(struct dmar_unit *unit);
+void dmar_fini_fault_log(struct dmar_unit *unit);
+
+vm_object_t ctx_get_idmap_pgtbl(struct dmar_ctx *ctx, dmar_gaddr_t maxaddr);
+void put_idmap_pgtbl(vm_object_t obj);
+int ctx_map_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ vm_page_t *ma, uint64_t pflags, int flags);
+int ctx_unmap_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ int flags);
+int ctx_alloc_pgtbl(struct dmar_ctx *ctx);
+void ctx_free_pgtbl(struct dmar_ctx *ctx);
+
+struct dmar_ctx *dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev,
+ bool rmrr);
+struct dmar_ctx *dmar_get_ctx(struct dmar_unit *dmar, device_t dev,
+ bool id_mapped, bool rmrr_init);
+void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
+void dmar_free_ctx(struct dmar_ctx *ctx);
+struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, int bus,
+ int slot, int func);
+void dmar_ctx_unload(struct dmar_ctx *ctx,
+ struct dmar_map_entries_tailq *entries, bool cansleep);
+
+int dmar_init_busdma(struct dmar_unit *unit);
+void dmar_fini_busdma(struct dmar_unit *unit);
+
+void dmar_gas_init_ctx(struct dmar_ctx *ctx);
+void dmar_gas_fini_ctx(struct dmar_ctx *ctx);
+struct dmar_map_entry *dmar_gas_alloc_entry(struct dmar_ctx *ctx, u_int flags);
+void dmar_gas_free_entry(struct dmar_ctx *ctx, struct dmar_map_entry *entry);
+void dmar_gas_free_space(struct dmar_ctx *ctx, struct dmar_map_entry *entry);
+int dmar_gas_map(struct dmar_ctx *ctx, const struct bus_dma_tag_common *common,
+ dmar_gaddr_t size, u_int eflags, u_int flags, vm_page_t *ma,
+ struct dmar_map_entry **res);
+int dmar_gas_map_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry,
+ u_int eflags, u_int flags, vm_page_t *ma);
+int dmar_gas_reserve_region(struct dmar_ctx *ctx, dmar_gaddr_t start,
+ dmar_gaddr_t end);
+
+void dmar_ctx_parse_rmrr(struct dmar_ctx *ctx, device_t dev,
+ struct dmar_map_entries_tailq *rmrr_entries);
+int dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar);
+
+void dmar_quirks_post_ident(struct dmar_unit *dmar);
+void dmar_quirks_pre_use(struct dmar_unit *dmar);
+
+#define DMAR_GM_CANWAIT 0x0001
+#define DMAR_GM_CANSPLIT 0x0002
+
+#define DMAR_PGF_WAITOK 0x0001
+#define DMAR_PGF_ZERO 0x0002
+#define DMAR_PGF_ALLOC 0x0004
+#define DMAR_PGF_NOALLOC 0x0008
+#define DMAR_PGF_OBJL 0x0010
+
+extern dmar_haddr_t dmar_high;
+extern int haw;
+extern int dmar_tbl_pagecnt;
+extern int dmar_match_verbose;
+extern int dmar_check_free;
+
+static inline uint32_t
+dmar_read4(const struct dmar_unit *unit, int reg)
+{
+
+ return (bus_read_4(unit->regs, reg));
+}
+
+static inline uint64_t
+dmar_read8(const struct dmar_unit *unit, int reg)
+{
+#ifdef __i386__
+ uint32_t high, low;
+
+ low = bus_read_4(unit->regs, reg);
+ high = bus_read_4(unit->regs, reg + 4);
+ return (low | ((uint64_t)high << 32));
+#else
+ return (bus_read_8(unit->regs, reg));
+#endif
+}
+
+static inline void
+dmar_write4(const struct dmar_unit *unit, int reg, uint32_t val)
+{
+
+ KASSERT(reg != DMAR_GCMD_REG || (val & DMAR_GCMD_TE) ==
+ (unit->hw_gcmd & DMAR_GCMD_TE),
+ ("dmar%d clearing TE 0x%08x 0x%08x", unit->unit,
+ unit->hw_gcmd, val));
+ bus_write_4(unit->regs, reg, val);
+}
+
+static inline void
+dmar_write8(const struct dmar_unit *unit, int reg, uint64_t val)
+{
+
+ KASSERT(reg != DMAR_GCMD_REG, ("8byte GCMD write"));
+#ifdef __i386__
+ uint32_t high, low;
+
+ low = val;
+ high = val >> 32;
+ bus_write_4(unit->regs, reg, low);
+ bus_write_4(unit->regs, reg + 4, high);
+#else
+ bus_write_8(unit->regs, reg, val);
+#endif
+}
+
+/*
+ * dmar_pte_store and dmar_pte_clear ensure that on i386, 32bit writes
+ * are issued in the correct order. For store, the lower word,
+ * containing the P or R and W bits, is set only after the high word
+ * is written. For clear, the P bit is cleared first, then the high
+ * word is cleared.
+ */
+static inline void
+dmar_pte_store(volatile uint64_t *dst, uint64_t val)
+{
+
+ KASSERT(*dst == 0, ("used pte %p oldval %jx newval %jx",
+ dst, (uintmax_t)*dst, (uintmax_t)val));
+#ifdef __i386__
+ volatile uint32_t *p;
+ uint32_t hi, lo;
+
+ hi = val >> 32;
+ lo = val;
+ p = (volatile uint32_t *)dst;
+ *(p + 1) = hi;
+ *p = lo;
+#else
+ *dst = val;
+#endif
+}
+
+static inline void
+dmar_pte_clear(volatile uint64_t *dst)
+{
+#ifdef __i386__
+ volatile uint32_t *p;
+
+ p = (volatile uint32_t *)dst;
+ *p = 0;
+ *(p + 1) = 0;
+#else
+ *dst = 0;
+#endif
+}
+
+static inline bool
+dmar_test_boundary(dmar_gaddr_t start, dmar_gaddr_t size,
+ dmar_gaddr_t boundary)
+{
+
+ if (boundary == 0)
+ return (true);
+ return (start + size <= ((start + boundary) & ~(boundary - 1)));
+}
+
+#ifdef INVARIANTS
+#define TD_PREP_PINNED_ASSERT \
+ int old_td_pinned; \
+ old_td_pinned = curthread->td_pinned
+#define TD_PINNED_ASSERT \
+ KASSERT(curthread->td_pinned == old_td_pinned, \
+ ("pin count leak: %d %d %s:%d", curthread->td_pinned, \
+ old_td_pinned, __FILE__, __LINE__))
+#else
+#define TD_PREP_PINNED_ASSERT
+#define TD_PINNED_ASSERT
+#endif
+
+#endif
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
new file mode 100644
index 0000000..f022174
--- /dev/null
+++ b/sys/x86/iommu/intel_drv.c
@@ -0,0 +1,1098 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+#if defined(__amd64__) /* || defined(__ia64__) */
+#define DEV_APIC
+#else
+#include "opt_apic.h"
+#endif
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <machine/bus.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+#include <dev/pci/pcivar.h>
+
+#ifdef DEV_APIC
+#include "pcib_if.h"
+#endif
+
+#define DMAR_REG_RID 1
+#define DMAR_IRQ_RID 0
+
+static devclass_t dmar_devclass;
+static device_t *dmar_devs;
+static int dmar_devcnt;
+
+typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *);
+
+static void
+dmar_iterate_tbl(dmar_iter_t iter, void *arg)
+{
+ ACPI_TABLE_DMAR *dmartbl;
+ ACPI_DMAR_HEADER *dmarh;
+ char *ptr, *ptrend;
+ ACPI_STATUS status;
+
+ status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
+ if (ACPI_FAILURE(status))
+ return;
+ ptr = (char *)dmartbl + sizeof(*dmartbl);
+ ptrend = (char *)dmartbl + dmartbl->Header.Length;
+ for (;;) {
+ if (ptr >= ptrend)
+ break;
+ dmarh = (ACPI_DMAR_HEADER *)ptr;
+ if (dmarh->Length <= 0) {
+ printf("dmar_identify: corrupted DMAR table, l %d\n",
+ dmarh->Length);
+ break;
+ }
+ ptr += dmarh->Length;
+ if (!iter(dmarh, arg))
+ break;
+ }
+}
+
+struct find_iter_args {
+ int i;
+ ACPI_DMAR_HARDWARE_UNIT *res;
+};
+
+static int
+dmar_find_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+ struct find_iter_args *fia;
+
+ if (dmarh->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
+ return (1);
+
+ fia = arg;
+ if (fia->i == 0) {
+ fia->res = (ACPI_DMAR_HARDWARE_UNIT *)dmarh;
+ return (0);
+ }
+ fia->i--;
+ return (1);
+}
+
+static ACPI_DMAR_HARDWARE_UNIT *
+dmar_find_by_index(int idx)
+{
+ struct find_iter_args fia;
+
+ fia.i = idx;
+ fia.res = NULL;
+ dmar_iterate_tbl(dmar_find_iter, &fia);
+ return (fia.res);
+}
+
+static int
+dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+
+ if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_UNIT)
+ dmar_devcnt++;
+ return (1);
+}
+
+static int dmar_enable = 0;
+static void
+dmar_identify(driver_t *driver, device_t parent)
+{
+ ACPI_TABLE_DMAR *dmartbl;
+ ACPI_DMAR_HARDWARE_UNIT *dmarh;
+ ACPI_STATUS status;
+ int i, error;
+
+ if (acpi_disabled("dmar"))
+ return;
+ TUNABLE_INT_FETCH("hw.dmar.enable", &dmar_enable);
+ if (!dmar_enable)
+ return;
+#ifdef INVARIANTS
+ TUNABLE_INT_FETCH("hw.dmar.check_free", &dmar_check_free);
+#endif
+ TUNABLE_INT_FETCH("hw.dmar.match_verbose", &dmar_match_verbose);
+ status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
+ if (ACPI_FAILURE(status))
+ return;
+ haw = dmartbl->Width + 1;
+ if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR)
+ dmar_high = BUS_SPACE_MAXADDR;
+ else
+ dmar_high = 1ULL << (haw + 1);
+ if (bootverbose) {
+ printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width,
+ (unsigned)dmartbl->Flags,
+ "\020\001INTR_REMAP\002X2APIC_OPT_OUT");
+ }
+
+ dmar_iterate_tbl(dmar_count_iter, NULL);
+ if (dmar_devcnt == 0)
+ return;
+ dmar_devs = malloc(sizeof(device_t) * dmar_devcnt, M_DEVBUF,
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < dmar_devcnt; i++) {
+ dmarh = dmar_find_by_index(i);
+ if (dmarh == NULL) {
+ printf("dmar_identify: cannot find HWUNIT %d\n", i);
+ continue;
+ }
+ dmar_devs[i] = BUS_ADD_CHILD(parent, 1, "dmar", i);
+ if (dmar_devs[i] == NULL) {
+ printf("dmar_identify: cannot create instance %d\n", i);
+ continue;
+ }
+ error = bus_set_resource(dmar_devs[i], SYS_RES_MEMORY,
+ DMAR_REG_RID, dmarh->Address, PAGE_SIZE);
+ if (error != 0) {
+ printf(
+ "dmar%d: unable to alloc register window at 0x%08jx: error %d\n",
+ i, (uintmax_t)dmarh->Address, error);
+ device_delete_child(parent, dmar_devs[i]);
+ dmar_devs[i] = NULL;
+ }
+ }
+}
+
+static int
+dmar_probe(device_t dev)
+{
+
+ if (acpi_get_handle(dev) != NULL)
+ return (ENXIO);
+ device_set_desc(dev, "DMA remap");
+ return (0);
+}
+
+static void
+dmar_release_resources(device_t dev, struct dmar_unit *unit)
+{
+
+ dmar_fini_busdma(unit);
+ dmar_fini_fault_log(unit);
+ if (unit->irq != -1) {
+ bus_teardown_intr(dev, unit->irq_res, unit->intr_handle);
+ bus_release_resource(dev, SYS_RES_IRQ, unit->irq_rid,
+ unit->irq_res);
+ bus_delete_resource(dev, SYS_RES_IRQ, unit->irq_rid);
+ PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
+ dev, unit->irq);
+ unit->irq = -1;
+ }
+ if (unit->regs != NULL) {
+ bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
+ unit->regs);
+ bus_release_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
+ unit->regs);
+ unit->regs = NULL;
+ }
+ if (unit->domids != NULL) {
+ delete_unrhdr(unit->domids);
+ unit->domids = NULL;
+ }
+ if (unit->ctx_obj != NULL) {
+ vm_object_deallocate(unit->ctx_obj);
+ unit->ctx_obj = NULL;
+ }
+}
+
+static int
+dmar_alloc_irq(device_t dev, struct dmar_unit *unit)
+{
+ device_t pcib;
+ uint64_t msi_addr;
+ uint32_t msi_data;
+ int error;
+
+ pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
+ error = PCIB_ALLOC_MSIX(pcib, dev, &unit->irq);
+ if (error != 0) {
+ device_printf(dev, "cannot allocate fault interrupt, %d\n",
+ error);
+ goto err1;
+ }
+ unit->irq_rid = DMAR_IRQ_RID;
+ error = bus_set_resource(dev, SYS_RES_IRQ, unit->irq_rid, unit->irq,
+ 1);
+ if (error != 0) {
+ device_printf(dev, "cannot set interrupt resource, %d\n",
+ error);
+ goto err2;
+ }
+ unit->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+ &unit->irq_rid, RF_ACTIVE);
+ if (unit->irq_res == NULL) {
+ device_printf(dev, "cannot map fault interrupt\n");
+ error = ENXIO;
+ goto err3;
+ }
+ error = bus_setup_intr(dev, unit->irq_res, INTR_TYPE_MISC,
+ dmar_intr, NULL, unit, &unit->intr_handle);
+ if (error != 0) {
+ device_printf(dev, "cannot setup fault interrupt, %d\n", error);
+ goto err4;
+ }
+ bus_describe_intr(dev, unit->irq_res, unit->intr_handle, "fault");
+ error = PCIB_MAP_MSI(pcib, dev, unit->irq, &msi_addr, &msi_data);
+ if (error != 0) {
+ device_printf(dev, "cannot map interrupt, %d\n", error);
+ goto err5;
+ }
+ dmar_write4(unit, DMAR_FEDATA_REG, msi_data);
+ dmar_write4(unit, DMAR_FEADDR_REG, msi_addr);
+ /* Only for xAPIC mode */
+ dmar_write4(unit, DMAR_FEUADDR_REG, msi_addr >> 32);
+ return (0);
+
+err5:
+ bus_teardown_intr(dev, unit->irq_res, unit->intr_handle);
+err4:
+ bus_release_resource(dev, SYS_RES_IRQ, unit->irq_rid, unit->irq_res);
+err3:
+ bus_delete_resource(dev, SYS_RES_IRQ, unit->irq_rid);
+err2:
+ PCIB_RELEASE_MSIX(pcib, dev, unit->irq);
+ unit->irq = -1;
+err1:
+ return (error);
+}
+
+#ifdef DEV_APIC
+static int
+dmar_remap_intr(device_t dev, device_t child, u_int irq)
+{
+ struct dmar_unit *unit;
+ uint64_t msi_addr;
+ uint32_t msi_data;
+ int error;
+
+ unit = device_get_softc(dev);
+ if (irq != unit->irq)
+ return (ENOENT);
+ error = PCIB_MAP_MSI(device_get_parent(device_get_parent(dev)), dev,
+ irq, &msi_addr, &msi_data);
+ if (error != 0)
+ return (error);
+ dmar_disable_intr(unit);
+ dmar_write4(unit, DMAR_FEDATA_REG, msi_data);
+ dmar_write4(unit, DMAR_FEADDR_REG, msi_addr);
+ dmar_write4(unit, DMAR_FEUADDR_REG, msi_addr >> 32);
+ dmar_enable_intr(unit);
+ return (0);
+}
+#endif
+
+static void
+dmar_print_caps(device_t dev, struct dmar_unit *unit,
+ ACPI_DMAR_HARDWARE_UNIT *dmaru)
+{
+ uint32_t caphi, ecaphi;
+
+ device_printf(dev, "regs@0x%08jx, ver=%d.%d, seg=%d, flags=<%b>\n",
+ (uintmax_t)dmaru->Address, DMAR_MAJOR_VER(unit->hw_ver),
+ DMAR_MINOR_VER(unit->hw_ver), dmaru->Segment,
+ dmaru->Flags, "\020\001INCLUDE_ALL_PCI");
+ caphi = unit->hw_cap >> 32;
+ device_printf(dev, "cap=%b,", (u_int)unit->hw_cap,
+ "\020\004AFL\005WBF\006PLMR\007PHMR\010CM\027ZLR\030ISOCH");
+ printf("%b, ", caphi, "\020\010PSI\027DWD\030DRD");
+ printf("ndoms=%d, sagaw=%d, mgaw=%d, fro=%d, nfr=%d, superp=%d",
+ DMAR_CAP_ND(unit->hw_cap), DMAR_CAP_SAGAW(unit->hw_cap),
+ DMAR_CAP_MGAW(unit->hw_cap), DMAR_CAP_FRO(unit->hw_cap),
+ DMAR_CAP_NFR(unit->hw_cap), DMAR_CAP_SPS(unit->hw_cap));
+ if ((unit->hw_cap & DMAR_CAP_PSI) != 0)
+ printf(", mamv=%d", DMAR_CAP_MAMV(unit->hw_cap));
+ printf("\n");
+ ecaphi = unit->hw_ecap >> 32;
+ device_printf(dev, "ecap=%b,", (u_int)unit->hw_ecap,
+ "\020\001C\002QI\003DI\004IR\005EIM\007PT\010SC");
+ printf("%b, ", ecaphi, "\020");
+ printf("mhmw=%d, iro=%d\n", DMAR_ECAP_MHMV(unit->hw_ecap),
+ DMAR_ECAP_IRO(unit->hw_ecap));
+}
+
+static int
+dmar_attach(device_t dev)
+{
+ struct dmar_unit *unit;
+ ACPI_DMAR_HARDWARE_UNIT *dmaru;
+ int error;
+
+ unit = device_get_softc(dev);
+ unit->dev = dev;
+ unit->unit = device_get_unit(dev);
+ dmaru = dmar_find_by_index(unit->unit);
+ if (dmaru == NULL)
+ return (EINVAL);
+ unit->irq = -1;
+ unit->segment = dmaru->Segment;
+ unit->base = dmaru->Address;
+ unit->reg_rid = DMAR_REG_RID;
+ unit->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+ &unit->reg_rid, RF_ACTIVE);
+ if (unit->regs == NULL) {
+ device_printf(dev, "cannot allocate register window\n");
+ return (ENOMEM);
+ }
+ unit->hw_ver = dmar_read4(unit, DMAR_VER_REG);
+ unit->hw_cap = dmar_read8(unit, DMAR_CAP_REG);
+ unit->hw_ecap = dmar_read8(unit, DMAR_ECAP_REG);
+ if (bootverbose)
+ dmar_print_caps(dev, unit, dmaru);
+ dmar_quirks_post_ident(unit);
+
+ error = dmar_alloc_irq(dev, unit);
+ if (error != 0) {
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ mtx_init(&unit->lock, "dmarhw", NULL, MTX_DEF);
+ unit->domids = new_unrhdr(0, dmar_nd2mask(DMAR_CAP_ND(unit->hw_cap)),
+ &unit->lock);
+
+ /*
+ * 9.2 "Context Entry":
+ * When Caching Mode (CM) field is reported as Set, the
+ * domain-id value of zero is architecturally reserved.
+ * Software must not use domain-id value of zero
+ * when CM is Set.
+ */
+ if ((unit->hw_cap & DMAR_CAP_CM) != 0)
+ alloc_unr_specific(unit->domids, 0);
+
+ unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 +
+ DMAR_CTX_CNT), 0, 0, NULL);
+
+ /*
+ * Allocate and load the root entry table pointer. Enable the
+ * address translation after the required invalidations are
+ * done.
+ */
+ dmar_pgalloc(unit->ctx_obj, 0, DMAR_PGF_WAITOK | DMAR_PGF_ZERO);
+ DMAR_LOCK(unit);
+ error = dmar_load_root_entry_ptr(unit);
+ if (error != 0) {
+ DMAR_UNLOCK(unit);
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ error = dmar_inv_ctx_glob(unit);
+ if (error != 0) {
+ DMAR_UNLOCK(unit);
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) {
+ error = dmar_inv_iotlb_glob(unit);
+ if (error != 0) {
+ DMAR_UNLOCK(unit);
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ }
+
+ DMAR_UNLOCK(unit);
+ error = dmar_init_fault_log(unit);
+ if (error != 0) {
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ error = dmar_init_busdma(unit);
+ if (error != 0) {
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+
+#ifdef NOTYET
+ DMAR_LOCK(unit);
+ error = dmar_enable_translation(unit);
+ if (error != 0) {
+ DMAR_UNLOCK(unit);
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ DMAR_UNLOCK(unit);
+#endif
+
+ return (0);
+}
+
+static int
+dmar_detach(device_t dev)
+{
+
+ return (EBUSY);
+}
+
+static int
+dmar_suspend(device_t dev)
+{
+
+ return (0);
+}
+
+static int
+dmar_resume(device_t dev)
+{
+
+ /* XXXKIB */
+ return (0);
+}
+
+static device_method_t dmar_methods[] = {
+ DEVMETHOD(device_identify, dmar_identify),
+ DEVMETHOD(device_probe, dmar_probe),
+ DEVMETHOD(device_attach, dmar_attach),
+ DEVMETHOD(device_detach, dmar_detach),
+ DEVMETHOD(device_suspend, dmar_suspend),
+ DEVMETHOD(device_resume, dmar_resume),
+#ifdef DEV_APIC
+ DEVMETHOD(bus_remap_intr, dmar_remap_intr),
+#endif
+ DEVMETHOD_END
+};
+
+static driver_t dmar_driver = {
+ "dmar",
+ dmar_methods,
+ sizeof(struct dmar_unit),
+};
+
+DRIVER_MODULE(dmar, acpi, dmar_driver, dmar_devclass, 0, 0);
+MODULE_DEPEND(dmar, acpi, 1, 1, 1);
+
+static void
+dmar_print_path(device_t dev, const char *banner, int busno, int depth,
+ const ACPI_DMAR_PCI_PATH *path)
+{
+ int i;
+
+ device_printf(dev, "%s [%d, ", banner, busno);
+ for (i = 0; i < depth; i++) {
+ if (i != 0)
+ printf(", ");
+ printf("(%d, %d)", path[i].Device, path[i].Function);
+ }
+ printf("]\n");
+}
+
+static int
+dmar_dev_depth(device_t child)
+{
+ devclass_t pci_class;
+ device_t bus, pcib;
+ int depth;
+
+ pci_class = devclass_find("pci");
+ for (depth = 1; ; depth++) {
+ bus = device_get_parent(child);
+ pcib = device_get_parent(bus);
+ if (device_get_devclass(device_get_parent(pcib)) !=
+ pci_class)
+ return (depth);
+ child = pcib;
+ }
+}
+
+static void
+dmar_dev_path(device_t child, int *busno, ACPI_DMAR_PCI_PATH *path, int depth)
+{
+ devclass_t pci_class;
+ device_t bus, pcib;
+
+ pci_class = devclass_find("pci");
+ for (depth--; depth != -1; depth--) {
+ path[depth].Device = pci_get_slot(child);
+ path[depth].Function = pci_get_function(child);
+ bus = device_get_parent(child);
+ pcib = device_get_parent(bus);
+ if (device_get_devclass(device_get_parent(pcib)) !=
+ pci_class) {
+ /* reached a host bridge */
+ *busno = pcib_get_bus(bus);
+ return;
+ }
+ child = pcib;
+ }
+ panic("wrong depth");
+}
+
+static int
+dmar_match_pathes(int busno1, const ACPI_DMAR_PCI_PATH *path1, int depth1,
+ int busno2, const ACPI_DMAR_PCI_PATH *path2, int depth2,
+ enum AcpiDmarScopeType scope_type)
+{
+ int i, depth;
+
+ if (busno1 != busno2)
+ return (0);
+ if (scope_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && depth1 != depth2)
+ return (0);
+ depth = depth1;
+ if (depth2 < depth)
+ depth = depth2;
+ for (i = 0; i < depth; i++) {
+ if (path1[i].Device != path2[i].Device ||
+ path1[i].Function != path2[i].Function)
+ return (0);
+ }
+ return (1);
+}
+
+static int
+dmar_match_devscope(ACPI_DMAR_DEVICE_SCOPE *devscope, device_t dev,
+ int dev_busno, const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len)
+{
+ ACPI_DMAR_PCI_PATH *path;
+ int path_len;
+
+ if (devscope->Length < sizeof(*devscope)) {
+ printf("dmar_find: corrupted DMAR table, dl %d\n",
+ devscope->Length);
+ return (-1);
+ }
+ if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
+ devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
+ return (0);
+ path_len = devscope->Length - sizeof(*devscope);
+ if (path_len % 2 != 0) {
+ printf("dmar_find_bsf: corrupted DMAR table, dl %d\n",
+ devscope->Length);
+ return (-1);
+ }
+ path_len /= 2;
+ path = (ACPI_DMAR_PCI_PATH *)(devscope + 1);
+ if (path_len == 0) {
+ printf("dmar_find: corrupted DMAR table, dl %d\n",
+ devscope->Length);
+ return (-1);
+ }
+ if (dmar_match_verbose)
+ dmar_print_path(dev, "DMAR", devscope->Bus, path_len, path);
+
+ return (dmar_match_pathes(devscope->Bus, path, path_len, dev_busno,
+ dev_path, dev_path_len, devscope->EntryType));
+}
+
+struct dmar_unit *
+dmar_find(device_t dev)
+{
+ device_t dmar_dev;
+ ACPI_DMAR_HARDWARE_UNIT *dmarh;
+ ACPI_DMAR_DEVICE_SCOPE *devscope;
+ char *ptr, *ptrend;
+ int i, match, dev_domain, dev_busno, dev_path_len;
+
+ dmar_dev = NULL;
+ dev_domain = pci_get_domain(dev);
+ dev_path_len = dmar_dev_depth(dev);
+ ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
+ dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
+ if (dmar_match_verbose)
+ dmar_print_path(dev, "PCI", dev_busno, dev_path_len, dev_path);
+
+ for (i = 0; i < dmar_devcnt; i++) {
+ if (dmar_devs[i] == NULL)
+ continue;
+ dmarh = dmar_find_by_index(i);
+ if (dmarh == NULL)
+ continue;
+ if (dmarh->Segment != dev_domain)
+ continue;
+ if ((dmarh->Flags & ACPI_DMAR_INCLUDE_ALL) != 0) {
+ dmar_dev = dmar_devs[i];
+ if (dmar_match_verbose) {
+ device_printf(dev,
+ "pci%d:%d:%d:%d matched dmar%d INCLUDE_ALL\n",
+ dev_domain, pci_get_bus(dev),
+ pci_get_slot(dev),
+ pci_get_function(dev),
+ ((struct dmar_unit *)device_get_softc(
+ dmar_dev))->unit);
+ }
+ goto found;
+ }
+ ptr = (char *)dmarh + sizeof(*dmarh);
+ ptrend = (char *)dmarh + dmarh->Header.Length;
+ for (;;) {
+ if (ptr >= ptrend)
+ break;
+ devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
+ ptr += devscope->Length;
+ if (dmar_match_verbose) {
+ device_printf(dev,
+ "pci%d:%d:%d:%d matching dmar%d\n",
+ dev_domain, pci_get_bus(dev),
+ pci_get_slot(dev),
+ pci_get_function(dev),
+ ((struct dmar_unit *)device_get_softc(
+ dmar_devs[i]))->unit);
+ }
+ match = dmar_match_devscope(devscope, dev, dev_busno,
+ dev_path, dev_path_len);
+ if (dmar_match_verbose) {
+ if (match == -1)
+ printf("table error\n");
+ else if (match == 0)
+ printf("not matched\n");
+ else
+ printf("matched\n");
+ }
+ if (match == -1)
+ return (NULL);
+ else if (match == 1) {
+ dmar_dev = dmar_devs[i];
+ goto found;
+ }
+ }
+ }
+ return (NULL);
+found:
+ return (device_get_softc(dmar_dev));
+}
+
+struct rmrr_iter_args {
+ struct dmar_ctx *ctx;
+ device_t dev;
+ int dev_domain;
+ int dev_busno;
+ ACPI_DMAR_PCI_PATH *dev_path;
+ int dev_path_len;
+ struct dmar_map_entries_tailq *rmrr_entries;
+};
+
+static int
+dmar_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+ struct rmrr_iter_args *ria;
+ ACPI_DMAR_RESERVED_MEMORY *resmem;
+ ACPI_DMAR_DEVICE_SCOPE *devscope;
+ struct dmar_map_entry *entry;
+ char *ptr, *ptrend;
+ int match;
+
+ if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
+ return (1);
+
+ ria = arg;
+ resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
+ if (dmar_match_verbose) {
+ printf("RMRR [%jx,%jx] segment %d\n",
+ (uintmax_t)resmem->BaseAddress,
+ (uintmax_t)resmem->EndAddress,
+ resmem->Segment);
+ }
+ if (resmem->Segment != ria->dev_domain)
+ return (1);
+
+ ptr = (char *)resmem + sizeof(*resmem);
+ ptrend = (char *)resmem + resmem->Header.Length;
+ for (;;) {
+ if (ptr >= ptrend)
+ break;
+ devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
+ ptr += devscope->Length;
+ match = dmar_match_devscope(devscope, ria->dev, ria->dev_busno,
+ ria->dev_path, ria->dev_path_len);
+ if (match == 1) {
+ if (dmar_match_verbose)
+ printf("matched\n");
+ entry = dmar_gas_alloc_entry(ria->ctx, DMAR_PGF_WAITOK);
+ entry->start = resmem->BaseAddress;
+ /* The RMRR entry end address is inclusive. */
+ entry->end = resmem->EndAddress;
+ TAILQ_INSERT_TAIL(ria->rmrr_entries, entry,
+ unroll_link);
+ } else if (dmar_match_verbose) {
+ printf("not matched, err %d\n", match);
+ }
+ }
+
+ return (1);
+}
+
+void
+dmar_ctx_parse_rmrr(struct dmar_ctx *ctx, device_t dev,
+ struct dmar_map_entries_tailq *rmrr_entries)
+{
+ struct rmrr_iter_args ria;
+
+ ria.dev_domain = pci_get_domain(dev);
+ ria.dev_path_len = dmar_dev_depth(dev);
+ ACPI_DMAR_PCI_PATH dev_path[ria.dev_path_len];
+ dmar_dev_path(dev, &ria.dev_busno, dev_path, ria.dev_path_len);
+
+ if (dmar_match_verbose) {
+ device_printf(dev, "parsing RMRR entries for ");
+ dmar_print_path(dev, "PCI", ria.dev_busno, ria.dev_path_len,
+ dev_path);
+ }
+
+ ria.ctx = ctx;
+ ria.dev = dev;
+ ria.dev_path = dev_path;
+ ria.rmrr_entries = rmrr_entries;
+ dmar_iterate_tbl(dmar_rmrr_iter, &ria);
+}
+
+struct inst_rmrr_iter_args {
+ struct dmar_unit *dmar;
+};
+
+static device_t
+dmar_path_dev(int segment, int path_len, int busno,
+ const ACPI_DMAR_PCI_PATH *path)
+{
+ devclass_t pci_class;
+ device_t bus, pcib, dev;
+ int i;
+
+ pci_class = devclass_find("pci");
+ dev = NULL;
+ for (i = 0; i < path_len; i++, path++) {
+ dev = pci_find_dbsf(segment, busno, path->Device,
+ path->Function);
+ if (dev == NULL)
+ break;
+ if (i != path_len - 1) {
+ bus = device_get_parent(dev);
+ pcib = device_get_parent(bus);
+ if (device_get_devclass(device_get_parent(pcib)) !=
+ pci_class)
+ return (NULL);
+ }
+ busno = pcib_get_bus(dev);
+ }
+ return (dev);
+}
+
+static int
+dmar_inst_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+ const ACPI_DMAR_RESERVED_MEMORY *resmem;
+ const ACPI_DMAR_DEVICE_SCOPE *devscope;
+ struct inst_rmrr_iter_args *iria;
+ const char *ptr, *ptrend;
+ struct dmar_unit *dev_dmar;
+ device_t dev;
+
+ if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
+ return (1);
+
+ iria = arg;
+ resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
+ if (resmem->Segment != iria->dmar->segment)
+ return (1);
+ if (dmar_match_verbose) {
+ printf("dmar%d: RMRR [%jx,%jx]\n", iria->dmar->unit,
+ (uintmax_t)resmem->BaseAddress,
+ (uintmax_t)resmem->EndAddress);
+ }
+
+ ptr = (char *)resmem + sizeof(*resmem);
+ ptrend = (char *)resmem + resmem->Header.Length;
+ for (;;) {
+ if (ptr >= ptrend)
+ break;
+ devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
+ ptr += devscope->Length;
+ /* XXXKIB bridge */
+ if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT)
+ continue;
+ if (dmar_match_verbose) {
+ dmar_print_path(iria->dmar->dev, "RMRR scope",
+ devscope->Bus, (devscope->Length -
+ sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2,
+ (ACPI_DMAR_PCI_PATH *)(devscope + 1));
+ }
+ dev = dmar_path_dev(resmem->Segment, (devscope->Length -
+ sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2, devscope->Bus,
+ (ACPI_DMAR_PCI_PATH *)(devscope + 1));
+ if (dev == NULL) {
+ if (dmar_match_verbose)
+ printf("null dev\n");
+ continue;
+ }
+ dev_dmar = dmar_find(dev);
+ if (dev_dmar != iria->dmar) {
+ if (dmar_match_verbose) {
+ printf("dmar%d matched, skipping\n",
+ dev_dmar->unit);
+ }
+ continue;
+ }
+ if (dmar_match_verbose)
+ printf("matched, instantiating RMRR context\n");
+ dmar_instantiate_ctx(iria->dmar, dev, true);
+ }
+
+ return (1);
+
+}
+
+/*
+ * Pre-create all contexts for the DMAR which have RMRR entries.
+ */
+int
+dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar)
+{
+ struct inst_rmrr_iter_args iria;
+ int error;
+
+ if (!dmar_barrier_enter(dmar, DMAR_BARRIER_RMRR))
+ return (0);
+
+ error = 0;
+ iria.dmar = dmar;
+ if (dmar_match_verbose)
+ printf("dmar%d: instantiating RMRR contexts\n", dmar->unit);
+ dmar_iterate_tbl(dmar_inst_rmrr_iter, &iria);
+ DMAR_LOCK(dmar);
+ if (!LIST_EMPTY(&dmar->contexts)) {
+ KASSERT((dmar->hw_gcmd & DMAR_GCMD_TE) == 0,
+ ("dmar%d: RMRR not handled but translation is already enabled",
+ dmar->unit));
+ error = dmar_enable_translation(dmar);
+ }
+ dmar_barrier_exit(dmar, DMAR_BARRIER_RMRR);
+ return (error);
+}
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+
+static void
+dmar_print_ctx_entry(const struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *l, *r;
+
+ db_printf(
+ " start %jx end %jx free_after %jx free_down %jx flags %x ",
+ entry->start, entry->end, entry->free_after, entry->free_down,
+ entry->flags);
+ db_printf("left ");
+ l = RB_LEFT(entry, rb_entry);
+ if (l == NULL)
+ db_printf("NULL ");
+ else
+ db_printf("%jx ", l->start);
+ db_printf("right ");
+ r = RB_RIGHT(entry, rb_entry);
+ if (r == NULL)
+ db_printf("NULL");
+ else
+ db_printf("%jx", r->start);
+ db_printf("\n");
+}
+
+static void
+dmar_print_ctx(struct dmar_ctx *ctx, bool show_mappings)
+{
+ struct dmar_map_entry *entry;
+
+ db_printf(
+ " @%p pci%d:%d:%d dom %d mgaw %d agaw %d pglvl %d end %jx\n"
+ " refs %d flags %x pgobj %p map_ents %u loads %lu unloads %lu\n",
+ ctx, ctx->bus, ctx->slot, ctx->func, ctx->domain, ctx->mgaw,
+ ctx->agaw, ctx->pglvl, (uintmax_t)ctx->end, ctx->refs,
+ ctx->flags, ctx->pgtbl_obj, ctx->entries_cnt, ctx->loads,
+ ctx->unloads);
+ if (!show_mappings)
+ return;
+ db_printf(" mapped:\n");
+ RB_FOREACH(entry, dmar_gas_entries_tree, &ctx->rb_root) {
+ dmar_print_ctx_entry(entry);
+ if (db_pager_quit)
+ break;
+ }
+ if (db_pager_quit)
+ return;
+ db_printf(" unloading:\n");
+ TAILQ_FOREACH(entry, &ctx->unload_entries, dmamap_link) {
+ dmar_print_ctx_entry(entry);
+ if (db_pager_quit)
+ break;
+ }
+}
+
+DB_FUNC(dmar_ctx, db_dmar_print_ctx, db_show_table, CS_OWN, NULL)
+{
+ struct dmar_unit *unit;
+ struct dmar_ctx *ctx;
+ bool show_mappings, valid;
+ int domain, bus, device, function, i, t;
+ db_expr_t radix;
+
+ valid = false;
+ radix = db_radix;
+ db_radix = 10;
+ t = db_read_token();
+ if (t == tSLASH) {
+ t = db_read_token();
+ if (t != tIDENT) {
+ db_printf("Bad modifier\n");
+ db_radix = radix;
+ db_skip_to_eol();
+ return;
+ }
+ show_mappings = strchr(db_tok_string, 'm') != NULL;
+ t = db_read_token();
+ }
+ if (t == tNUMBER) {
+ domain = db_tok_number;
+ t = db_read_token();
+ if (t == tNUMBER) {
+ bus = db_tok_number;
+ t = db_read_token();
+ if (t == tNUMBER) {
+ device = db_tok_number;
+ t = db_read_token();
+ if (t == tNUMBER) {
+ function = db_tok_number;
+ valid = true;
+ }
+ }
+ }
+ }
+ db_radix = radix;
+ db_skip_to_eol();
+ if (!valid) {
+ db_printf("usage: show dmar_ctx [/m] "
+ "<domain> <bus> <device> <func>\n");
+ return;
+ }
+ for (i = 0; i < dmar_devcnt; i++) {
+ unit = device_get_softc(dmar_devs[i]);
+ LIST_FOREACH(ctx, &unit->contexts, link) {
+ if (domain == unit->segment && bus == ctx->bus &&
+ device == ctx->slot && function == ctx->func) {
+ dmar_print_ctx(ctx, show_mappings);
+ goto out;
+ }
+ }
+ }
+out:;
+}
+
+static void
+dmar_print_one(int idx, bool show_ctxs, bool show_mappings)
+{
+ struct dmar_unit *unit;
+ struct dmar_ctx *ctx;
+ int i, frir;
+
+ unit = device_get_softc(dmar_devs[idx]);
+ db_printf("dmar%d at %p, root at 0x%jx, ver 0x%x\n", unit->unit, unit,
+ dmar_read8(unit, DMAR_RTADDR_REG), dmar_read4(unit, DMAR_VER_REG));
+ db_printf("cap 0x%jx ecap 0x%jx gsts 0x%x fsts 0x%x fectl 0x%x\n",
+ (uintmax_t)dmar_read8(unit, DMAR_CAP_REG),
+ (uintmax_t)dmar_read8(unit, DMAR_ECAP_REG),
+ dmar_read4(unit, DMAR_GSTS_REG),
+ dmar_read4(unit, DMAR_FSTS_REG),
+ dmar_read4(unit, DMAR_FECTL_REG));
+ db_printf("fed 0x%x fea 0x%x feua 0x%x\n",
+ dmar_read4(unit, DMAR_FEDATA_REG),
+ dmar_read4(unit, DMAR_FEADDR_REG),
+ dmar_read4(unit, DMAR_FEUADDR_REG));
+ db_printf("primary fault log:\n");
+ for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) {
+ frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16;
+ db_printf(" %d at 0x%x: %jx %jx\n", i, frir,
+ (uintmax_t)dmar_read8(unit, frir),
+ (uintmax_t)dmar_read8(unit, frir + 8));
+ }
+ if (show_ctxs) {
+ db_printf("contexts:\n");
+ LIST_FOREACH(ctx, &unit->contexts, link) {
+ dmar_print_ctx(ctx, show_mappings);
+ if (db_pager_quit)
+ break;
+ }
+ }
+}
+
+DB_SHOW_COMMAND(dmar, db_dmar_print)
+{
+ bool show_ctxs, show_mappings;
+
+ show_ctxs = strchr(modif, 'c') != NULL;
+ show_mappings = strchr(modif, 'm') != NULL;
+ if (!have_addr) {
+ db_printf("usage: show dmar [/c] [/m] index\n");
+ return;
+ }
+ dmar_print_one((int)addr, show_ctxs, show_mappings);
+}
+
+DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars)
+{
+ int i;
+ bool show_ctxs, show_mappings;
+
+ show_ctxs = strchr(modif, 'c') != NULL;
+ show_mappings = strchr(modif, 'm') != NULL;
+
+ for (i = 0; i < dmar_devcnt; i++) {
+ dmar_print_one(i, show_ctxs, show_mappings);
+ if (db_pager_quit)
+ break;
+ }
+}
+#endif
diff --git a/sys/x86/iommu/intel_fault.c b/sys/x86/iommu/intel_fault.c
new file mode 100644
index 0000000..5c5a94d
--- /dev/null
+++ b/sys/x86/iommu/intel_fault.c
@@ -0,0 +1,289 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <machine/bus.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+/*
+ * Fault interrupt handling for DMARs. If advanced fault logging is
+ * not implemented by hardware, the code emulates it. Fast interrupt
+ * handler flushes the fault registers into circular buffer at
+ * unit->fault_log, and schedules a task.
+ *
+ * The fast handler is used since faults usually come in bursts, and
+ * number of fault log registers is limited, e.g. down to one for 5400
+ * MCH. We are trying to reduce the latency for clearing the fault
+ * register file. The task is usually long-running, since printf() is
+ * slow, but this is not problematic because bursts are rare.
+ *
+ * For the same reason, each translation unit task is executed in its
+ * own thread.
+ *
+ * XXXKIB It seems there is no hardware available which implements
+ * advanced fault logging, so the code to handle AFL is not written.
+ */
+
+static int
+dmar_fault_next(struct dmar_unit *unit, int faultp)
+{
+
+ faultp += 2;
+ if (faultp == unit->fault_log_size)
+ faultp = 0;
+ return (faultp);
+}
+
+static void
+dmar_intr_clear(struct dmar_unit *unit, uint32_t fsts)
+{
+ uint32_t clear;
+
+ clear = 0;
+ if ((fsts & DMAR_FSTS_ITE) != 0) {
+ printf("DMAR%d: Invalidation timed out\n", unit->unit);
+ clear |= DMAR_FSTS_ITE;
+ }
+ if ((fsts & DMAR_FSTS_ICE) != 0) {
+ printf("DMAR%d: Invalidation completion error\n",
+ unit->unit);
+ clear |= DMAR_FSTS_ICE;
+ }
+ if ((fsts & DMAR_FSTS_IQE) != 0) {
+ printf("DMAR%d: Invalidation queue error\n",
+ unit->unit);
+ clear |= DMAR_FSTS_IQE;
+ }
+ if ((fsts & DMAR_FSTS_APF) != 0) {
+ printf("DMAR%d: Advanced pending fault\n", unit->unit);
+ clear |= DMAR_FSTS_APF;
+ }
+ if ((fsts & DMAR_FSTS_AFO) != 0) {
+ printf("DMAR%d: Advanced fault overflow\n", unit->unit);
+ clear |= DMAR_FSTS_AFO;
+ }
+ if (clear != 0)
+ dmar_write4(unit, DMAR_FSTS_REG, clear);
+}
+
+int
+dmar_intr(void *arg)
+{
+ struct dmar_unit *unit;
+ uint64_t fault_rec[2];
+ uint32_t fsts;
+ int fri, frir, faultp;
+ bool enqueue;
+
+ unit = arg;
+ enqueue = false;
+ fsts = dmar_read4(unit, DMAR_FSTS_REG);
+ dmar_intr_clear(unit, fsts);
+
+ if ((fsts & DMAR_FSTS_PPF) == 0)
+ goto done;
+
+ fri = DMAR_FSTS_FRI(fsts);
+ for (;;) {
+ frir = (DMAR_CAP_FRO(unit->hw_cap) + fri) * 16;
+ fault_rec[1] = dmar_read8(unit, frir + 8);
+ if ((fault_rec[1] & DMAR_FRCD2_F) == 0)
+ break;
+ fault_rec[0] = dmar_read8(unit, frir);
+ dmar_write4(unit, frir + 12, DMAR_FRCD2_F32);
+ DMAR_FAULT_LOCK(unit);
+ faultp = unit->fault_log_head;
+ if (dmar_fault_next(unit, faultp) == unit->fault_log_tail) {
+ /* XXXKIB log overflow */
+ } else {
+ unit->fault_log[faultp] = fault_rec[0];
+ unit->fault_log[faultp + 1] = fault_rec[1];
+ unit->fault_log_head = dmar_fault_next(unit, faultp);
+ enqueue = true;
+ }
+ DMAR_FAULT_UNLOCK(unit);
+ fri += 1;
+ if (fri >= DMAR_CAP_NFR(unit->hw_cap))
+ fri = 0;
+ }
+
+done:
+ /*
+ * On SandyBridge, due to errata BJ124, IvyBridge errata
+ * BV100, and Haswell errata HSD40, "Spurious Intel VT-d
+ * Interrupts May Occur When the PFO Bit is Set". Handle the
+ * cases by clearing overflow bit even if no fault is
+ * reported.
+ *
+ * On IvyBridge, errata BV30 states that clearing clear
+ * DMAR_FRCD2_F bit in the fault register causes spurious
+ * interrupt. Do nothing.
+ *
+ */
+ if ((fsts & DMAR_FSTS_PFO) != 0) {
+ printf("DMAR%d: Fault Overflow\n", unit->unit);
+ dmar_write4(unit, DMAR_FSTS_REG, DMAR_FSTS_PFO);
+ }
+
+ if (enqueue) {
+ taskqueue_enqueue_fast(unit->fault_taskqueue,
+ &unit->fault_task);
+ }
+ return (FILTER_HANDLED);
+}
+
+static void
+dmar_fault_task(void *arg, int pending __unused)
+{
+ struct dmar_unit *unit;
+ struct dmar_ctx *ctx;
+ uint64_t fault_rec[2];
+ int sid, bus, slot, func, faultp;
+
+ unit = arg;
+ DMAR_FAULT_LOCK(unit);
+ for (;;) {
+ faultp = unit->fault_log_tail;
+ if (faultp == unit->fault_log_head)
+ break;
+
+ fault_rec[0] = unit->fault_log[faultp];
+ fault_rec[1] = unit->fault_log[faultp + 1];
+ unit->fault_log_tail = dmar_fault_next(unit, faultp);
+ DMAR_FAULT_UNLOCK(unit);
+
+ sid = DMAR_FRCD2_SID(fault_rec[1]);
+ bus = (sid >> 8) & 0xf;
+ slot = (sid >> 3) & 0x1f;
+ func = sid & 0x7;
+ printf("DMAR%d: ", unit->unit);
+ DMAR_LOCK(unit);
+ ctx = dmar_find_ctx_locked(unit, bus, slot, func);
+ if (ctx == NULL) {
+ printf("<unknown dev>:");
+ } else {
+ ctx->flags |= DMAR_CTX_FAULTED;
+ ctx->last_fault_rec[0] = fault_rec[0];
+ ctx->last_fault_rec[1] = fault_rec[1];
+ device_printf(ctx->ctx_tag.owner, "");
+ }
+ DMAR_UNLOCK(unit);
+ printf(
+ "pci%d:%d:%d fault acc %x adt 0x%x reason 0x%x addr %jx\n",
+ bus, slot, func, DMAR_FRCD2_T(fault_rec[1]),
+ DMAR_FRCD2_AT(fault_rec[1]), DMAR_FRCD2_FR(fault_rec[1]),
+ (uintmax_t)fault_rec[0]);
+ DMAR_FAULT_LOCK(unit);
+ }
+ DMAR_FAULT_UNLOCK(unit);
+}
+
+static void
+dmar_clear_faults(struct dmar_unit *unit)
+{
+ uint32_t frec, frir, fsts;
+ int i;
+
+ for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) {
+ frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16;
+ frec = dmar_read4(unit, frir + 12);
+ if ((frec & DMAR_FRCD2_F32) == 0)
+ continue;
+ dmar_write4(unit, frir + 12, DMAR_FRCD2_F32);
+ }
+ fsts = dmar_read4(unit, DMAR_FSTS_REG);
+ dmar_write4(unit, DMAR_FSTS_REG, fsts);
+}
+
+int
+dmar_init_fault_log(struct dmar_unit *unit)
+{
+
+ mtx_init(&unit->fault_lock, "dmarflt", NULL, MTX_SPIN);
+ unit->fault_log_size = 256; /* 128 fault log entries */
+ TUNABLE_INT_FETCH("hw.dmar.fault_log_size", &unit->fault_log_size);
+ if (unit->fault_log_size % 2 != 0)
+ panic("hw.dmar_fault_log_size must be even");
+ unit->fault_log = malloc(sizeof(uint64_t) * unit->fault_log_size,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+
+ TASK_INIT(&unit->fault_task, 0, dmar_fault_task, unit);
+ unit->fault_taskqueue = taskqueue_create_fast("dmar", M_WAITOK,
+ taskqueue_thread_enqueue, &unit->fault_taskqueue);
+ taskqueue_start_threads(&unit->fault_taskqueue, 1, PI_AV,
+ "dmar%d fault taskq", unit->unit);
+
+ dmar_disable_intr(unit);
+ dmar_clear_faults(unit);
+ dmar_enable_intr(unit);
+
+ return (0);
+}
+
+void
+dmar_fini_fault_log(struct dmar_unit *unit)
+{
+
+ dmar_disable_intr(unit);
+
+ if (unit->fault_taskqueue == NULL)
+ return;
+
+ taskqueue_drain(unit->fault_taskqueue, &unit->fault_task);
+ taskqueue_free(unit->fault_taskqueue);
+ mtx_destroy(&unit->fault_lock);
+
+ free(unit->fault_log, M_DEVBUF);
+ unit->fault_log = NULL;
+ unit->fault_log_head = unit->fault_log_tail = 0;
+}
diff --git a/sys/x86/iommu/intel_gas.c b/sys/x86/iommu/intel_gas.c
new file mode 100644
index 0000000..bd3c400
--- /dev/null
+++ b/sys/x86/iommu/intel_gas.c
@@ -0,0 +1,722 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define RB_AUGMENT(entry) dmar_gas_augment_entry(entry)
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <dev/pci/pcivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/uma.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+/*
+ * Guest Address Space management.
+ */
+
+static uma_zone_t dmar_map_entry_zone;
+
+static void
+intel_gas_init(void)
+{
+
+ dmar_map_entry_zone = uma_zcreate("DMAR_MAP_ENTRY",
+ sizeof(struct dmar_map_entry), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
+
+struct dmar_map_entry *
+dmar_gas_alloc_entry(struct dmar_ctx *ctx, u_int flags)
+{
+ struct dmar_map_entry *res;
+
+ KASSERT((flags & ~(DMAR_PGF_WAITOK)) == 0,
+ ("unsupported flags %x", flags));
+
+ res = uma_zalloc(dmar_map_entry_zone, ((flags & DMAR_PGF_WAITOK) !=
+ 0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
+ if (res != NULL)
+ atomic_add_int(&ctx->entries_cnt, 1);
+ return (res);
+}
+
+void
+dmar_gas_free_entry(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+
+ atomic_subtract_int(&ctx->entries_cnt, 1);
+ uma_zfree(dmar_map_entry_zone, entry);
+}
+
+static int
+dmar_gas_cmp_entries(struct dmar_map_entry *a, struct dmar_map_entry *b)
+{
+
+ /* Last entry have zero size, so <= */
+ KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
+ a, (uintmax_t)a->start, (uintmax_t)a->end));
+ KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
+ b, (uintmax_t)b->start, (uintmax_t)b->end));
+ KASSERT(a->end <= b->start || b->end <= a->start ||
+ a->end == a->start || b->end == b->start,
+ ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)",
+ a, (uintmax_t)a->start, (uintmax_t)a->end,
+ b, (uintmax_t)b->start, (uintmax_t)b->end));
+
+ if (a->end < b->end)
+ return (-1);
+ else if (b->end < a->end)
+ return (1);
+ return (0);
+}
+
+static void
+dmar_gas_augment_entry(struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *l, *r;
+
+ for (; entry != NULL; entry = RB_PARENT(entry, rb_entry)) {
+ l = RB_LEFT(entry, rb_entry);
+ r = RB_RIGHT(entry, rb_entry);
+ if (l == NULL && r == NULL) {
+ entry->free_down = entry->free_after;
+ } else if (l == NULL && r != NULL) {
+ entry->free_down = MAX(entry->free_after, r->free_down);
+ } else if (/*l != NULL && */ r == NULL) {
+ entry->free_down = MAX(entry->free_after, l->free_down);
+ } else /* if (l != NULL && r != NULL) */ {
+ entry->free_down = MAX(entry->free_after, l->free_down);
+ entry->free_down = MAX(entry->free_down, r->free_down);
+ }
+ }
+}
+
+RB_GENERATE(dmar_gas_entries_tree, dmar_map_entry, rb_entry,
+ dmar_gas_cmp_entries);
+
+static void
+dmar_gas_fix_free(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *next;
+
+ next = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ entry->free_after = (next != NULL ? next->start : ctx->end) -
+ entry->end;
+ dmar_gas_augment_entry(entry);
+}
+
+#ifdef INVARIANTS
+static void
+dmar_gas_check_free(struct dmar_ctx *ctx)
+{
+ struct dmar_map_entry *entry, *next, *l, *r;
+ dmar_gaddr_t v;
+
+ RB_FOREACH(entry, dmar_gas_entries_tree, &ctx->rb_root) {
+ next = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ if (next == NULL) {
+ MPASS(entry->free_after == ctx->end - entry->end);
+ } else {
+ MPASS(entry->free_after = next->start - entry->end);
+ MPASS(entry->end <= next->start);
+ }
+ l = RB_LEFT(entry, rb_entry);
+ r = RB_RIGHT(entry, rb_entry);
+ if (l == NULL && r == NULL) {
+ MPASS(entry->free_down == entry->free_after);
+ } else if (l == NULL && r != NULL) {
+ MPASS(entry->free_down = MAX(entry->free_after,
+ r->free_down));
+ } else if (r == NULL) {
+ MPASS(entry->free_down = MAX(entry->free_after,
+ l->free_down));
+ } else {
+ v = MAX(entry->free_after, l->free_down);
+ v = MAX(entry->free_down, r->free_down);
+ MPASS(entry->free_down == v);
+ }
+ }
+}
+#endif
+
+static bool
+dmar_gas_rb_insert(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *prev, *found;
+
+ found = RB_INSERT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_fix_free(ctx, entry);
+ prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ if (prev != NULL)
+ dmar_gas_fix_free(ctx, prev);
+ return (found == NULL);
+}
+
+static void
+dmar_gas_rb_remove(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *prev;
+
+ prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ if (prev != NULL)
+ dmar_gas_fix_free(ctx, prev);
+}
+
+void
+dmar_gas_init_ctx(struct dmar_ctx *ctx)
+{
+ struct dmar_map_entry *begin, *end;
+
+ begin = dmar_gas_alloc_entry(ctx, DMAR_PGF_WAITOK);
+ end = dmar_gas_alloc_entry(ctx, DMAR_PGF_WAITOK);
+
+ DMAR_CTX_LOCK(ctx);
+ KASSERT(ctx->entries_cnt == 2, ("dirty ctx %p", ctx));
+ KASSERT(RB_EMPTY(&ctx->rb_root), ("non-empty entries %p", ctx));
+
+ begin->start = 0;
+ begin->end = DMAR_PAGE_SIZE;
+ begin->free_after = ctx->end - begin->end;
+ begin->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED;
+ dmar_gas_rb_insert(ctx, begin);
+
+ end->start = ctx->end;
+ end->end = ctx->end;
+ end->free_after = 0;
+ end->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED;
+ dmar_gas_rb_insert(ctx, end);
+
+ ctx->first_place = begin;
+ ctx->last_place = end;
+ DMAR_CTX_UNLOCK(ctx);
+}
+
+void
+dmar_gas_fini_ctx(struct dmar_ctx *ctx)
+{
+ struct dmar_map_entry *entry, *entry1;
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+ KASSERT(ctx->entries_cnt == 2, ("ctx still in use %p", ctx));
+
+ entry = RB_MIN(dmar_gas_entries_tree, &ctx->rb_root);
+ KASSERT(entry->start == 0, ("start entry start %p", ctx));
+ KASSERT(entry->end == DMAR_PAGE_SIZE, ("start entry end %p", ctx));
+ KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE,
+ ("start entry flags %p", ctx));
+ RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_free_entry(ctx, entry);
+
+ entry = RB_MAX(dmar_gas_entries_tree, &ctx->rb_root);
+ KASSERT(entry->start == ctx->end, ("end entry start %p", ctx));
+ KASSERT(entry->end == ctx->end, ("end entry end %p", ctx));
+ KASSERT(entry->free_after == 0, ("end entry free_after%p", ctx));
+ KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE,
+ ("end entry flags %p", ctx));
+ RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_free_entry(ctx, entry);
+
+ RB_FOREACH_SAFE(entry, dmar_gas_entries_tree, &ctx->rb_root, entry1) {
+ KASSERT((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0,
+ ("non-RMRR entry left %p", ctx));
+ RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_free_entry(ctx, entry);
+ }
+}
+
+struct dmar_gas_match_args {
+ struct dmar_ctx *ctx;
+ dmar_gaddr_t size;
+ const struct bus_dma_tag_common *common;
+ u_int gas_flags;
+ struct dmar_map_entry *entry;
+};
+
+static bool
+dmar_gas_match_one(struct dmar_gas_match_args *a, struct dmar_map_entry *prev,
+ dmar_gaddr_t end)
+{
+ dmar_gaddr_t bs, start;
+
+ if (a->entry->start + a->size > end)
+ return (false);
+
+ /* DMAR_PAGE_SIZE to create gap after new entry. */
+ if (a->entry->start < prev->end + DMAR_PAGE_SIZE ||
+ a->entry->start + a->size + DMAR_PAGE_SIZE > prev->end +
+ prev->free_after)
+ return (false);
+
+ /* No boundary crossing. */
+ if (dmar_test_boundary(a->entry->start, a->size, a->common->boundary))
+ return (true);
+
+ /*
+ * The start to start + size region crosses the boundary.
+ * Check if there is enough space after the next boundary
+ * after the prev->end.
+ */
+ bs = (a->entry->start + a->common->boundary) & ~(a->common->boundary
+ - 1);
+ start = roundup2(bs, a->common->alignment);
+ /* DMAR_PAGE_SIZE to create gap after new entry. */
+ if (start + a->size + DMAR_PAGE_SIZE <= prev->end + prev->free_after &&
+ start + a->size <= end) {
+ a->entry->start = start;
+ return (true);
+ }
+
+ /*
+ * Not enough space to align at boundary, but allowed to split.
+ * We already checked that start + size does not overlap end.
+ *
+ * XXXKIB. It is possible that bs is exactly at the start of
+ * the next entry, then we do not have gap. Ignore for now.
+ */
+ if ((a->gas_flags & DMAR_GM_CANSPLIT) != 0) {
+ a->size = bs - a->entry->start;
+ return (true);
+ }
+
+ return (false);
+}
+
+static void
+dmar_gas_match_insert(struct dmar_gas_match_args *a,
+ struct dmar_map_entry *prev)
+{
+ struct dmar_map_entry *next;
+ bool found;
+
+ /*
+ * The prev->end is always aligned on the page size, which
+ * causes page alignment for the entry->start too. The size
+ * is checked to be multiple of the page size.
+ *
+ * The page sized gap is created between consequent
+ * allocations to ensure that out-of-bounds accesses fault.
+ */
+ a->entry->end = a->entry->start + a->size;
+
+ next = RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, prev);
+ KASSERT(next->start >= a->entry->end &&
+ next->start - a->entry->start >= a->size,
+ ("dmar_gas_match_insert hole failed %p prev (%jx, %jx) "
+ "free_after %jx next (%jx, %jx) entry (%jx, %jx)", a->ctx,
+ (uintmax_t)prev->start, (uintmax_t)prev->end,
+ (uintmax_t)prev->free_after,
+ (uintmax_t)next->start, (uintmax_t)next->end,
+ (uintmax_t)a->entry->start, (uintmax_t)a->entry->end));
+
+ prev->free_after = a->entry->start - prev->end;
+ a->entry->free_after = next->start - a->entry->end;
+
+ found = dmar_gas_rb_insert(a->ctx, a->entry);
+ KASSERT(found, ("found dup %p start %jx size %jx",
+ a->ctx, (uintmax_t)a->entry->start, (uintmax_t)a->size));
+ a->entry->flags = DMAR_MAP_ENTRY_MAP;
+
+ KASSERT(RB_PREV(dmar_gas_entries_tree, &a->ctx->rb_root,
+ a->entry) == prev,
+ ("entry %p prev %p inserted prev %p", a->entry, prev,
+ RB_PREV(dmar_gas_entries_tree, &a->ctx->rb_root, a->entry)));
+ KASSERT(RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root,
+ a->entry) == next,
+ ("entry %p next %p inserted next %p", a->entry, next,
+ RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, a->entry)));
+}
+
+static int
+dmar_gas_lowermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *prev)
+{
+ struct dmar_map_entry *l;
+ int ret;
+
+ if (prev->end < a->common->lowaddr) {
+ a->entry->start = roundup2(prev->end + DMAR_PAGE_SIZE,
+ a->common->alignment);
+ if (dmar_gas_match_one(a, prev, a->common->lowaddr)) {
+ dmar_gas_match_insert(a, prev);
+ return (0);
+ }
+ }
+ if (prev->free_down < a->size + DMAR_PAGE_SIZE)
+ return (ENOMEM);
+ l = RB_LEFT(prev, rb_entry);
+ if (l != NULL) {
+ ret = dmar_gas_lowermatch(a, l);
+ if (ret == 0)
+ return (0);
+ }
+ l = RB_RIGHT(prev, rb_entry);
+ if (l != NULL)
+ return (dmar_gas_lowermatch(a, l));
+ return (ENOMEM);
+}
+
+static int
+dmar_gas_uppermatch(struct dmar_gas_match_args *a)
+{
+ struct dmar_map_entry *next, *prev, find_entry;
+
+ find_entry.start = a->common->highaddr;
+ next = RB_NFIND(dmar_gas_entries_tree, &a->ctx->rb_root, &find_entry);
+ if (next == NULL)
+ return (ENOMEM);
+ prev = RB_PREV(dmar_gas_entries_tree, &a->ctx->rb_root, next);
+ KASSERT(prev != NULL, ("no prev %p %jx", a->ctx,
+ (uintmax_t)find_entry.start));
+ for (;;) {
+ a->entry->start = prev->start + DMAR_PAGE_SIZE;
+ if (a->entry->start < a->common->highaddr)
+ a->entry->start = a->common->highaddr;
+ a->entry->start = roundup2(a->entry->start,
+ a->common->alignment);
+ if (dmar_gas_match_one(a, prev, a->ctx->end)) {
+ dmar_gas_match_insert(a, prev);
+ return (0);
+ }
+
+ /*
+ * XXXKIB. This falls back to linear iteration over
+ * the free space in the high region. But high
+ * regions are almost unused, the code should be
+ * enough to cover the case, although in the
+ * non-optimal way.
+ */
+ prev = next;
+ next = RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, prev);
+ KASSERT(next != NULL, ("no next %p %jx", a->ctx,
+ (uintmax_t)find_entry.start));
+ if (next->end >= a->ctx->end)
+ return (ENOMEM);
+ }
+}
+
+static int
+dmar_gas_find_space(struct dmar_ctx *ctx,
+ const struct bus_dma_tag_common *common, dmar_gaddr_t size,
+ u_int flags, struct dmar_map_entry *entry)
+{
+ struct dmar_gas_match_args a;
+ int error;
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+ KASSERT(entry->flags == 0, ("dirty entry %p %p", ctx, entry));
+ KASSERT((size & DMAR_PAGE_MASK) == 0, ("size %jx", (uintmax_t)size));
+
+ a.ctx = ctx;
+ a.size = size;
+ a.common = common;
+ a.gas_flags = flags;
+ a.entry = entry;
+
+ /* Handle lower region. */
+ if (common->lowaddr > 0) {
+ error = dmar_gas_lowermatch(&a, RB_ROOT(&ctx->rb_root));
+ if (error == 0)
+ return (0);
+ KASSERT(error == ENOMEM,
+ ("error %d from dmar_gas_lowermatch", error));
+ }
+ /* Handle upper region. */
+ if (common->highaddr >= ctx->end)
+ return (ENOMEM);
+ error = dmar_gas_uppermatch(&a);
+ KASSERT(error == ENOMEM,
+ ("error %d from dmar_gas_uppermatch", error));
+ return (error);
+}
+
+static int
+dmar_gas_alloc_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry,
+ u_int flags)
+{
+ struct dmar_map_entry *next, *prev;
+ bool found;
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+
+ if ((entry->start & DMAR_PAGE_MASK) != 0 ||
+ (entry->end & DMAR_PAGE_MASK) != 0)
+ return (EINVAL);
+ if (entry->start >= entry->end)
+ return (EINVAL);
+ if (entry->end >= ctx->end)
+ return (EINVAL);
+
+ next = RB_NFIND(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ KASSERT(next != NULL, ("next must be non-null %p %jx", ctx,
+ (uintmax_t)entry->start));
+ prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, next);
+ /* prev could be NULL */
+
+ /*
+ * Adapt to broken BIOSes which specify overlapping RMRR
+ * entries.
+ *
+ * XXXKIB: this does not handle a case when prev or next
+ * entries are completely covered by the current one, which
+ * extends both ways.
+ */
+ if (prev != NULL && prev->end > entry->start &&
+ (prev->flags & DMAR_MAP_ENTRY_PLACE) == 0) {
+ if ((prev->flags & DMAR_MAP_ENTRY_RMRR) == 0)
+ return (EBUSY);
+ entry->start = prev->end;
+ }
+ if (next != NULL && next->start < entry->end &&
+ (next->flags & DMAR_MAP_ENTRY_PLACE) == 0) {
+ if ((next->flags & DMAR_MAP_ENTRY_RMRR) == 0)
+ return (EBUSY);
+ entry->end = next->start;
+ }
+ if (entry->end == entry->start)
+ return (0);
+
+ if (prev != NULL && prev->end > entry->start) {
+ /* This assumes that prev is the placeholder entry. */
+ dmar_gas_rb_remove(ctx, prev);
+ prev = NULL;
+ }
+ if (next != NULL && next->start < entry->end) {
+ dmar_gas_rb_remove(ctx, next);
+ next = NULL;
+ }
+
+ found = dmar_gas_rb_insert(ctx, entry);
+ KASSERT(found, ("found RMRR dup %p start %jx end %jx",
+ ctx, (uintmax_t)entry->start, (uintmax_t)entry->end));
+ entry->flags = DMAR_MAP_ENTRY_RMRR;
+
+#ifdef INVARIANTS
+ struct dmar_map_entry *ip, *in;
+ ip = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ in = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ KASSERT(prev == NULL || ip == prev,
+ ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
+ entry, entry->start, entry->end, prev,
+ prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
+ ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
+ KASSERT(next == NULL || in == next,
+ ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
+ entry, entry->start, entry->end, next,
+ next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
+ in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
+#endif
+
+ return (0);
+}
+
+void
+dmar_gas_free_space(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+ KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR |
+ DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_MAP,
+ ("permanent entry %p %p", ctx, entry));
+
+ dmar_gas_rb_remove(ctx, entry);
+ entry->flags &= ~DMAR_MAP_ENTRY_MAP;
+#ifdef INVARIANTS
+ if (dmar_check_free)
+ dmar_gas_check_free(ctx);
+#endif
+}
+
+static void
+dmar_gas_free_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *next, *prev;
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+ KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR |
+ DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_RMRR,
+ ("non-RMRR entry %p %p", ctx, entry));
+
+ prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ next = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_rb_remove(ctx, entry);
+ entry->flags &= ~DMAR_MAP_ENTRY_RMRR;
+
+ if (prev == NULL)
+ dmar_gas_rb_insert(ctx, ctx->first_place);
+ if (next == NULL)
+ dmar_gas_rb_insert(ctx, ctx->last_place);
+}
+
+int
+dmar_gas_map(struct dmar_ctx *ctx, const struct bus_dma_tag_common *common,
+ dmar_gaddr_t size, u_int eflags, u_int flags, vm_page_t *ma,
+ struct dmar_map_entry **res)
+{
+ struct dmar_map_entry *entry;
+ int error;
+
+ KASSERT((flags & ~(DMAR_GM_CANWAIT | DMAR_GM_CANSPLIT)) == 0,
+ ("invalid flags 0x%x", flags));
+
+ entry = dmar_gas_alloc_entry(ctx, (flags & DMAR_GM_CANWAIT) != 0 ?
+ DMAR_PGF_WAITOK : 0);
+ if (entry == NULL)
+ return (ENOMEM);
+ DMAR_CTX_LOCK(ctx);
+ error = dmar_gas_find_space(ctx, common, size, flags, entry);
+ if (error == ENOMEM) {
+ DMAR_CTX_UNLOCK(ctx);
+ dmar_gas_free_entry(ctx, entry);
+ return (error);
+ }
+#ifdef INVARIANTS
+ if (dmar_check_free)
+ dmar_gas_check_free(ctx);
+#endif
+ KASSERT(error == 0,
+ ("unexpected error %d from dmar_gas_find_entry", error));
+ KASSERT(entry->end < ctx->end, ("allocated GPA %jx, max GPA %jx",
+ (uintmax_t)entry->end, (uintmax_t)ctx->end));
+ entry->flags |= eflags;
+ DMAR_CTX_UNLOCK(ctx);
+
+ error = ctx_map_buf(ctx, entry->start, size, ma,
+ ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) |
+ ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) |
+ ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) |
+ ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0),
+ (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0);
+ if (error == ENOMEM) {
+ DMAR_CTX_LOCK(ctx);
+ dmar_gas_free_space(ctx, entry);
+ DMAR_CTX_UNLOCK(ctx);
+ dmar_gas_free_entry(ctx, entry);
+ return (error);
+ }
+ KASSERT(error == 0,
+ ("unexpected error %d from ctx_map_buf", error));
+
+ *res = entry;
+ return (0);
+}
+
+int
+dmar_gas_map_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry,
+ u_int eflags, u_int flags, vm_page_t *ma)
+{
+ dmar_gaddr_t start;
+ int error;
+
+ KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", ctx,
+ entry, entry->flags));
+ KASSERT((flags & ~(DMAR_GM_CANWAIT)) == 0,
+ ("invalid flags 0x%x", flags));
+
+ start = entry->start;
+ DMAR_CTX_LOCK(ctx);
+ error = dmar_gas_alloc_region(ctx, entry, flags);
+ if (error != 0) {
+ DMAR_CTX_UNLOCK(ctx);
+ return (error);
+ }
+ entry->flags |= eflags;
+ DMAR_CTX_UNLOCK(ctx);
+ if (entry->end == entry->start)
+ return (0);
+
+ error = ctx_map_buf(ctx, entry->start, entry->end - entry->start,
+ ma + OFF_TO_IDX(start - entry->start),
+ ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) |
+ ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) |
+ ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) |
+ ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0),
+ (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0);
+ if (error == ENOMEM) {
+ DMAR_CTX_LOCK(ctx);
+ dmar_gas_free_region(ctx, entry);
+ DMAR_CTX_UNLOCK(ctx);
+ entry->flags = 0;
+ return (error);
+ }
+ KASSERT(error == 0,
+ ("unexpected error %d from ctx_map_buf", error));
+
+ return (0);
+}
+
+int
+dmar_gas_reserve_region(struct dmar_ctx *ctx, dmar_gaddr_t start,
+ dmar_gaddr_t end)
+{
+ struct dmar_map_entry *entry;
+ int error;
+
+ entry = dmar_gas_alloc_entry(ctx, DMAR_PGF_WAITOK);
+ entry->start = start;
+ entry->end = end;
+ DMAR_CTX_LOCK(ctx);
+ error = dmar_gas_alloc_region(ctx, entry, DMAR_GM_CANWAIT);
+ if (error == 0)
+ entry->flags |= DMAR_MAP_ENTRY_UNMAPPED;
+ DMAR_CTX_UNLOCK(ctx);
+ if (error != 0)
+ dmar_gas_free_entry(ctx, entry);
+ return (error);
+}
diff --git a/sys/x86/iommu/intel_idpgtbl.c b/sys/x86/iommu/intel_idpgtbl.c
new file mode 100644
index 0000000..a1773aa
--- /dev/null
+++ b/sys/x86/iommu/intel_idpgtbl.c
@@ -0,0 +1,799 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sf_buf.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+static int ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ dmar_gaddr_t size, int flags);
+static void ctx_flush_iotlb(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ dmar_gaddr_t size, int flags);
+
+/*
+ * The cache of the identity mapping page tables for the DMARs. Using
+ * the cache saves significant amount of memory for page tables by
+ * reusing the page tables, since usually DMARs are identical and have
+ * the same capabilities. Still, cache records the information needed
+ * to match DMAR capabilities and page table format, to correctly
+ * handle different DMARs.
+ */
+
+struct idpgtbl {
+ dmar_gaddr_t maxaddr; /* Page table covers the guest address
+ range [0..maxaddr) */
+ int pglvl; /* Total page table levels ignoring
+ superpages */
+ int leaf; /* The last materialized page table
+ level, it is non-zero if superpages
+ are supported */
+ vm_object_t pgtbl_obj; /* The page table pages */
+ LIST_ENTRY(idpgtbl) link;
+};
+
+static struct sx idpgtbl_lock;
+SX_SYSINIT(idpgtbl, &idpgtbl_lock, "idpgtbl");
+static LIST_HEAD(, idpgtbl) idpgtbls = LIST_HEAD_INITIALIZER(idpgtbls);
+static MALLOC_DEFINE(M_DMAR_IDPGTBL, "dmar_idpgtbl",
+ "Intel DMAR Identity mappings cache elements");
+
+/*
+ * Build the next level of the page tables for the identity mapping.
+ * - lvl is the level to build;
+ * - idx is the index of the page table page in the pgtbl_obj, which is
+ * being allocated filled now;
+ * - addr is the starting address in the bus address space which is
+ * mapped by the page table page.
+ */
+static void
+ctx_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx,
+ dmar_gaddr_t addr)
+{
+ vm_page_t m, m1;
+ dmar_pte_t *pte;
+ struct sf_buf *sf;
+ dmar_gaddr_t f, pg_sz;
+ vm_pindex_t base;
+ int i;
+
+ VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj);
+ if (addr >= tbl->maxaddr)
+ return;
+ m = dmar_pgalloc(tbl->pgtbl_obj, idx, DMAR_PGF_OBJL | DMAR_PGF_WAITOK |
+ DMAR_PGF_ZERO);
+ base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */
+ pg_sz = pglvl_page_size(tbl->pglvl, lvl);
+ if (lvl != tbl->leaf) {
+ for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz)
+ ctx_idmap_nextlvl(tbl, lvl + 1, base + i, f);
+ }
+ VM_OBJECT_WUNLOCK(tbl->pgtbl_obj);
+ pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, DMAR_PGF_WAITOK, &sf);
+ if (lvl == tbl->leaf) {
+ for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) {
+ if (f >= tbl->maxaddr)
+ break;
+ pte[i].pte = (DMAR_PTE_ADDR_MASK & f) |
+ DMAR_PTE_R | DMAR_PTE_W;
+ }
+ } else {
+ for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) {
+ if (f >= tbl->maxaddr)
+ break;
+ m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i,
+ DMAR_PGF_NOALLOC);
+ KASSERT(m1 != NULL, ("lost page table page"));
+ pte[i].pte = (DMAR_PTE_ADDR_MASK &
+ VM_PAGE_TO_PHYS(m1)) | DMAR_PTE_R | DMAR_PTE_W;
+ }
+ }
+ /* ctx_get_idmap_pgtbl flushes CPU cache if needed. */
+ dmar_unmap_pgtbl(sf, true);
+ VM_OBJECT_WLOCK(tbl->pgtbl_obj);
+}
+
+/*
+ * Find a ready and compatible identity-mapping page table in the
+ * cache. If not found, populate the identity-mapping page table for
+ * the context, up to the maxaddr. The maxaddr byte is allowed to be
+ * not mapped, which is aligned with the definition of Maxmem as the
+ * highest usable physical address + 1. If superpages are used, the
+ * maxaddr is typically mapped.
+ */
+vm_object_t
+ctx_get_idmap_pgtbl(struct dmar_ctx *ctx, dmar_gaddr_t maxaddr)
+{
+ struct dmar_unit *unit;
+ struct idpgtbl *tbl;
+ vm_object_t res;
+ vm_page_t m;
+ int leaf, i;
+
+ /*
+ * First, determine where to stop the paging structures.
+ */
+ for (i = 0; i < ctx->pglvl; i++) {
+ if (i == ctx->pglvl - 1 || ctx_is_sp_lvl(ctx, i)) {
+ leaf = i;
+ break;
+ }
+ }
+
+ /*
+ * Search the cache for a compatible page table. Qualified
+ * page table must map up to maxaddr, its level must be
+ * supported by the DMAR and leaf should be equal to the
+ * calculated value. The later restriction could be lifted
+ * but I believe it is currently impossible to have any
+ * deviations for existing hardware.
+ */
+ sx_slock(&idpgtbl_lock);
+ LIST_FOREACH(tbl, &idpgtbls, link) {
+ if (tbl->maxaddr >= maxaddr &&
+ dmar_pglvl_supported(ctx->dmar, tbl->pglvl) &&
+ tbl->leaf == leaf) {
+ res = tbl->pgtbl_obj;
+ vm_object_reference(res);
+ sx_sunlock(&idpgtbl_lock);
+ ctx->pglvl = tbl->pglvl; /* XXXKIB ? */
+ goto end;
+ }
+ }
+
+ /*
+ * Not found in cache, relock the cache into exclusive mode to
+ * be able to add element, and recheck cache again after the
+ * relock.
+ */
+ sx_sunlock(&idpgtbl_lock);
+ sx_xlock(&idpgtbl_lock);
+ LIST_FOREACH(tbl, &idpgtbls, link) {
+ if (tbl->maxaddr >= maxaddr &&
+ dmar_pglvl_supported(ctx->dmar, tbl->pglvl) &&
+ tbl->leaf == leaf) {
+ res = tbl->pgtbl_obj;
+ vm_object_reference(res);
+ sx_xunlock(&idpgtbl_lock);
+ ctx->pglvl = tbl->pglvl; /* XXXKIB ? */
+ return (res);
+ }
+ }
+
+ /*
+ * Still not found, create new page table.
+ */
+ tbl = malloc(sizeof(*tbl), M_DMAR_IDPGTBL, M_WAITOK);
+ tbl->pglvl = ctx->pglvl;
+ tbl->leaf = leaf;
+ tbl->maxaddr = maxaddr;
+ tbl->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
+ IDX_TO_OFF(pglvl_max_pages(tbl->pglvl)), 0, 0, NULL);
+ VM_OBJECT_WLOCK(tbl->pgtbl_obj);
+ ctx_idmap_nextlvl(tbl, 0, 0, 0);
+ VM_OBJECT_WUNLOCK(tbl->pgtbl_obj);
+ LIST_INSERT_HEAD(&idpgtbls, tbl, link);
+ res = tbl->pgtbl_obj;
+ vm_object_reference(res);
+ sx_xunlock(&idpgtbl_lock);
+
+end:
+ /*
+ * Table was found or created.
+ *
+ * If DMAR does not snoop paging structures accesses, flush
+ * CPU cache to memory. Note that dmar_unmap_pgtbl() coherent
+ * argument was possibly invalid at the time of the identity
+ * page table creation, since DMAR which was passed at the
+ * time of creation could be coherent, while current DMAR is
+ * not.
+ *
+ * If DMAR cannot look into the chipset write buffer, flush it
+ * as well.
+ */
+ unit = ctx->dmar;
+ if (!DMAR_IS_COHERENT(unit)) {
+ VM_OBJECT_WLOCK(res);
+ for (m = vm_page_lookup(res, 0); m != NULL;
+ m = vm_page_next(m))
+ pmap_invalidate_cache_pages(&m, 1);
+ VM_OBJECT_WUNLOCK(res);
+ }
+ if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) {
+ DMAR_LOCK(unit);
+ dmar_flush_write_bufs(unit);
+ DMAR_UNLOCK(unit);
+ }
+
+ return (res);
+}
+
+/*
+ * Return a reference to the identity mapping page table to the cache.
+ */
+void
+put_idmap_pgtbl(vm_object_t obj)
+{
+ struct idpgtbl *tbl, *tbl1;
+ vm_object_t rmobj;
+
+ sx_slock(&idpgtbl_lock);
+ KASSERT(obj->ref_count >= 2, ("lost cache reference"));
+ vm_object_deallocate(obj);
+
+ /*
+ * Cache always owns one last reference on the page table object.
+ * If there is an additional reference, object must stay.
+ */
+ if (obj->ref_count > 1) {
+ sx_sunlock(&idpgtbl_lock);
+ return;
+ }
+
+ /*
+ * Cache reference is the last, remove cache element and free
+ * page table object, returning the page table pages to the
+ * system.
+ */
+ sx_sunlock(&idpgtbl_lock);
+ sx_xlock(&idpgtbl_lock);
+ LIST_FOREACH_SAFE(tbl, &idpgtbls, link, tbl1) {
+ rmobj = tbl->pgtbl_obj;
+ if (rmobj->ref_count == 1) {
+ LIST_REMOVE(tbl, link);
+ atomic_subtract_int(&dmar_tbl_pagecnt,
+ rmobj->resident_page_count);
+ vm_object_deallocate(rmobj);
+ free(tbl, M_DMAR_IDPGTBL);
+ }
+ }
+ sx_xunlock(&idpgtbl_lock);
+}
+
+/*
+ * The core routines to map and unmap host pages at the given guest
+ * address. Support superpages.
+ */
+
+/*
+ * Index of the pte for the guest address base in the page table at
+ * the level lvl.
+ */
+static int
+ctx_pgtbl_pte_off(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl)
+{
+
+ base >>= DMAR_PAGE_SHIFT + (ctx->pglvl - lvl - 1) * DMAR_NPTEPGSHIFT;
+ return (base & DMAR_PTEMASK);
+}
+
+/*
+ * Returns the page index of the page table page in the page table
+ * object, which maps the given address base at the page table level
+ * lvl.
+ */
+static vm_pindex_t
+ctx_pgtbl_get_pindex(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl)
+{
+ vm_pindex_t idx, pidx;
+ int i;
+
+ KASSERT(lvl >= 0 && lvl < ctx->pglvl, ("wrong lvl %p %d", ctx, lvl));
+
+ for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx)
+ idx = ctx_pgtbl_pte_off(ctx, base, i) + pidx * DMAR_NPTEPG + 1;
+ return (idx);
+}
+
+static dmar_pte_t *
+ctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags,
+ vm_pindex_t *idxp, struct sf_buf **sf)
+{
+ vm_page_t m;
+ struct sf_buf *sfp;
+ dmar_pte_t *pte, *ptep;
+ vm_pindex_t idx, idx1;
+
+ DMAR_CTX_ASSERT_PGLOCKED(ctx);
+ KASSERT((flags & DMAR_PGF_OBJL) != 0, ("lost PGF_OBJL"));
+
+ idx = ctx_pgtbl_get_pindex(ctx, base, lvl);
+ if (*sf != NULL && idx == *idxp) {
+ pte = (dmar_pte_t *)sf_buf_kva(*sf);
+ } else {
+ if (*sf != NULL)
+ dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
+ *idxp = idx;
+retry:
+ pte = dmar_map_pgtbl(ctx->pgtbl_obj, idx, flags, sf);
+ if (pte == NULL) {
+ KASSERT(lvl > 0, ("lost root page table page %p", ctx));
+ /*
+ * Page table page does not exists, allocate
+ * it and create pte in the up level.
+ */
+ m = dmar_pgalloc(ctx->pgtbl_obj, idx, flags |
+ DMAR_PGF_ZERO);
+ if (m == NULL)
+ return (NULL);
+
+ /*
+ * Prevent potential free while pgtbl_obj is
+ * unlocked in the recursive call to
+ * ctx_pgtbl_map_pte(), if other thread did
+ * pte write and clean while the lock if
+ * dropped.
+ */
+ m->wire_count++;
+
+ sfp = NULL;
+ ptep = ctx_pgtbl_map_pte(ctx, base, lvl - 1, flags,
+ &idx1, &sfp);
+ if (ptep == NULL) {
+ KASSERT(m->pindex != 0,
+ ("loosing root page %p", ctx));
+ m->wire_count--;
+ dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags);
+ return (NULL);
+ }
+ dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W |
+ VM_PAGE_TO_PHYS(m));
+ sf_buf_page(sfp)->wire_count += 1;
+ m->wire_count--;
+ dmar_unmap_pgtbl(sfp, DMAR_IS_COHERENT(ctx->dmar));
+ /* Only executed once. */
+ goto retry;
+ }
+ }
+ pte += ctx_pgtbl_pte_off(ctx, base, lvl);
+ return (pte);
+}
+
+static int
+ctx_map_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ vm_page_t *ma, uint64_t pflags, int flags)
+{
+ struct dmar_unit *unit;
+ dmar_pte_t *pte;
+ struct sf_buf *sf;
+ dmar_gaddr_t pg_sz, base1, size1;
+ vm_pindex_t pi, c, idx, run_sz;
+ int lvl;
+ bool superpage;
+
+ DMAR_CTX_ASSERT_PGLOCKED(ctx);
+
+ base1 = base;
+ size1 = size;
+ flags |= DMAR_PGF_OBJL;
+ TD_PREP_PINNED_ASSERT;
+
+ for (sf = NULL, pi = 0; size > 0; base += pg_sz, size -= pg_sz,
+ pi += run_sz) {
+ for (lvl = 0, c = 0, superpage = false;; lvl++) {
+ pg_sz = ctx_page_size(ctx, lvl);
+ run_sz = pg_sz >> DMAR_PAGE_SHIFT;
+ if (lvl == ctx->pglvl - 1)
+ break;
+ /*
+ * Check if the current base suitable for the
+ * superpage mapping. First, verify the level.
+ */
+ if (!ctx_is_sp_lvl(ctx, lvl))
+ continue;
+ /*
+ * Next, look at the size of the mapping and
+ * alignment of both guest and host addresses.
+ */
+ if (size < pg_sz || (base & (pg_sz - 1)) != 0 ||
+ (VM_PAGE_TO_PHYS(ma[pi]) & (pg_sz - 1)) != 0)
+ continue;
+ /* All passed, check host pages contiguouty. */
+ if (c == 0) {
+ for (c = 1; c < run_sz; c++) {
+ if (VM_PAGE_TO_PHYS(ma[pi + c]) !=
+ VM_PAGE_TO_PHYS(ma[pi + c - 1]) +
+ PAGE_SIZE)
+ break;
+ }
+ }
+ if (c >= run_sz) {
+ superpage = true;
+ break;
+ }
+ }
+ KASSERT(size >= pg_sz,
+ ("mapping loop overflow %p %jx %jx %jx", ctx,
+ (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
+ pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf);
+ if (pte == NULL) {
+ KASSERT((flags & DMAR_PGF_WAITOK) == 0,
+ ("failed waitable pte alloc %p", ctx));
+ if (sf != NULL) {
+ dmar_unmap_pgtbl(sf,
+ DMAR_IS_COHERENT(ctx->dmar));
+ }
+ ctx_unmap_buf_locked(ctx, base1, base - base1, flags);
+ TD_PINNED_ASSERT;
+ return (ENOMEM);
+ }
+ dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags |
+ (superpage ? DMAR_PTE_SP : 0));
+ sf_buf_page(sf)->wire_count += 1;
+ }
+ if (sf != NULL)
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
+ DMAR_CTX_PGUNLOCK(ctx);
+ unit = ctx->dmar;
+ if ((unit->hw_cap & DMAR_CAP_CM) != 0)
+ ctx_flush_iotlb(ctx, base1, size1, flags);
+ else if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) {
+ /* See 11.1 Write Buffer Flushing. */
+ DMAR_LOCK(unit);
+ dmar_flush_write_bufs(unit);
+ DMAR_UNLOCK(unit);
+ }
+
+ TD_PINNED_ASSERT;
+ return (0);
+}
+
+int
+ctx_map_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ vm_page_t *ma, uint64_t pflags, int flags)
+{
+
+ KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0,
+ ("modifying idmap pagetable ctx %p", ctx));
+ KASSERT((base & DMAR_PAGE_MASK) == 0,
+ ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((size & DMAR_PAGE_MASK) == 0,
+ ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(size > 0, ("zero size %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(base < (1ULL << ctx->agaw),
+ ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base,
+ (uintmax_t)size, ctx->agaw));
+ KASSERT(base + size < (1ULL << ctx->agaw),
+ ("end too high %p %jx %jx agaw %d", ctx, (uintmax_t)base,
+ (uintmax_t)size, ctx->agaw));
+ KASSERT(base + size > base,
+ ("size overflow %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((pflags & (DMAR_PTE_R | DMAR_PTE_W)) != 0,
+ ("neither read nor write %jx", (uintmax_t)pflags));
+ KASSERT((pflags & ~(DMAR_PTE_R | DMAR_PTE_W | DMAR_PTE_SNP |
+ DMAR_PTE_TM)) == 0,
+ ("invalid pte flags %jx", (uintmax_t)pflags));
+ KASSERT((pflags & DMAR_PTE_SNP) == 0 ||
+ (ctx->dmar->hw_ecap & DMAR_ECAP_SC) != 0,
+ ("PTE_SNP for dmar without snoop control %p %jx",
+ ctx, (uintmax_t)pflags));
+ KASSERT((pflags & DMAR_PTE_TM) == 0 ||
+ (ctx->dmar->hw_ecap & DMAR_ECAP_DI) != 0,
+ ("PTE_TM for dmar without DIOTLB %p %jx",
+ ctx, (uintmax_t)pflags));
+ KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags));
+
+ DMAR_CTX_PGLOCK(ctx);
+ return (ctx_map_buf_locked(ctx, base, size, ma, pflags, flags));
+}
+
+static void ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ int lvl, int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_fs);
+
+static void
+ctx_free_pgtbl_pde(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags)
+{
+ struct sf_buf *sf;
+ dmar_pte_t *pde;
+ vm_pindex_t idx;
+
+ sf = NULL;
+ pde = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf);
+ ctx_unmap_clear_pte(ctx, base, lvl, flags, pde, &sf, true);
+}
+
+static void
+ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl,
+ int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf)
+{
+ vm_page_t m;
+
+ dmar_pte_clear(&pte->pte);
+ m = sf_buf_page(*sf);
+ if (free_sf) {
+ dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
+ *sf = NULL;
+ }
+ m->wire_count--;
+ if (m->wire_count != 0)
+ return;
+ KASSERT(lvl != 0,
+ ("lost reference (lvl) on root pg ctx %p base %jx lvl %d",
+ ctx, (uintmax_t)base, lvl));
+ KASSERT(m->pindex != 0,
+ ("lost reference (idx) on root pg ctx %p base %jx lvl %d",
+ ctx, (uintmax_t)base, lvl));
+ dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags);
+ ctx_free_pgtbl_pde(ctx, base, lvl - 1, flags);
+}
+
+/*
+ * Assumes that the unmap is never partial.
+ */
+static int
+ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ dmar_gaddr_t size, int flags)
+{
+ dmar_pte_t *pte;
+ struct sf_buf *sf;
+ vm_pindex_t idx;
+ dmar_gaddr_t pg_sz, base1, size1;
+ int lvl;
+
+ DMAR_CTX_ASSERT_PGLOCKED(ctx);
+ if (size == 0)
+ return (0);
+
+ KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0,
+ ("modifying idmap pagetable ctx %p", ctx));
+ KASSERT((base & DMAR_PAGE_MASK) == 0,
+ ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((size & DMAR_PAGE_MASK) == 0,
+ ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(base < (1ULL << ctx->agaw),
+ ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base,
+ (uintmax_t)size, ctx->agaw));
+ KASSERT(base + size < (1ULL << ctx->agaw),
+ ("end too high %p %jx %jx agaw %d", ctx, (uintmax_t)base,
+ (uintmax_t)size, ctx->agaw));
+ KASSERT(base + size > base,
+ ("size overflow %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags));
+
+ base1 = base;
+ size1 = size;
+ flags |= DMAR_PGF_OBJL;
+ TD_PREP_PINNED_ASSERT;
+
+ for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
+ for (lvl = 0; lvl < ctx->pglvl; lvl++) {
+ if (lvl != ctx->pglvl - 1 && !ctx_is_sp_lvl(ctx, lvl))
+ continue;
+ pg_sz = ctx_page_size(ctx, lvl);
+ if (pg_sz > size)
+ continue;
+ pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags,
+ &idx, &sf);
+ KASSERT(pte != NULL,
+ ("sleeping or page missed %p %jx %d 0x%x",
+ ctx, (uintmax_t)base, lvl, flags));
+ if ((pte->pte & DMAR_PTE_SP) != 0 ||
+ lvl == ctx->pglvl - 1) {
+ ctx_unmap_clear_pte(ctx, base, lvl, flags,
+ pte, &sf, false);
+ break;
+ }
+ }
+ KASSERT(size >= pg_sz,
+ ("unmapping loop overflow %p %jx %jx %jx", ctx,
+ (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
+ }
+ if (sf != NULL)
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
+ DMAR_CTX_PGUNLOCK(ctx);
+ ctx_flush_iotlb(ctx, base1, size1, flags);
+ /*
+ * See 11.1 Write Buffer Flushing for an explanation why RWBF
+ * can be ignored there.
+ */
+
+ TD_PINNED_ASSERT;
+ return (0);
+}
+
+int
+ctx_unmap_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ int flags)
+{
+
+ DMAR_CTX_PGLOCK(ctx);
+ return (ctx_unmap_buf_locked(ctx, base, size, flags));
+}
+
+int
+ctx_alloc_pgtbl(struct dmar_ctx *ctx)
+{
+ vm_page_t m;
+
+ KASSERT(ctx->pgtbl_obj == NULL, ("already initialized %p", ctx));
+
+ ctx->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
+ IDX_TO_OFF(pglvl_max_pages(ctx->pglvl)), 0, 0, NULL);
+ DMAR_CTX_PGLOCK(ctx);
+ m = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_WAITOK |
+ DMAR_PGF_ZERO | DMAR_PGF_OBJL);
+ /* No implicit free of the top level page table page. */
+ m->wire_count = 1;
+ DMAR_CTX_PGUNLOCK(ctx);
+ return (0);
+}
+
+void
+ctx_free_pgtbl(struct dmar_ctx *ctx)
+{
+ vm_object_t obj;
+ vm_page_t m;
+
+ obj = ctx->pgtbl_obj;
+ if (obj == NULL) {
+ KASSERT((ctx->dmar->hw_ecap & DMAR_ECAP_PT) != 0 &&
+ (ctx->flags & DMAR_CTX_IDMAP) != 0,
+ ("lost pagetable object ctx %p", ctx));
+ return;
+ }
+ DMAR_CTX_ASSERT_PGLOCKED(ctx);
+ ctx->pgtbl_obj = NULL;
+
+ if ((ctx->flags & DMAR_CTX_IDMAP) != 0) {
+ put_idmap_pgtbl(obj);
+ ctx->flags &= ~DMAR_CTX_IDMAP;
+ return;
+ }
+
+ /* Obliterate wire_counts */
+ VM_OBJECT_ASSERT_WLOCKED(obj);
+ for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m))
+ m->wire_count = 0;
+ VM_OBJECT_WUNLOCK(obj);
+ vm_object_deallocate(obj);
+}
+
+static inline uint64_t
+ctx_wait_iotlb_flush(struct dmar_unit *unit, uint64_t wt, int iro)
+{
+ uint64_t iotlbr;
+
+ dmar_write8(unit, iro + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
+ DMAR_IOTLB_DR | DMAR_IOTLB_DW | wt);
+ for (;;) {
+ iotlbr = dmar_read8(unit, iro + DMAR_IOTLB_REG_OFF);
+ if ((iotlbr & DMAR_IOTLB_IVT) == 0)
+ break;
+ cpu_spinwait();
+ }
+ return (iotlbr);
+}
+
+/*
+ * flags is only intended for PGF_WAITOK, to disallow queued
+ * invalidation.
+ */
+static void
+ctx_flush_iotlb(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ int flags)
+{
+ struct dmar_unit *unit;
+ dmar_gaddr_t isize;
+ uint64_t iotlbr;
+ int am, iro;
+
+ unit = ctx->dmar;
+#if 0
+ if ((unit->hw_ecap & DMAR_ECAP_QI) != 0 &&
+ (flags & DMAR_PGF_WAITOK) != 0) {
+ /*
+ * XXXKIB: There, a queued invalidation interface
+ * could be used. But since queued and registered
+ * interfaces cannot be used simultaneously, and we
+ * must use sleep-less (i.e. register) interface when
+ * DMAR_PGF_WAITOK is not specified, only register
+ * interface is suitable.
+ */
+ return;
+ }
+#endif
+ iro = DMAR_ECAP_IRO(unit->hw_ecap) * 16;
+ DMAR_LOCK(unit);
+ if ((unit->hw_cap & DMAR_CAP_PSI) == 0 || size > 2 * 1024 * 1024) {
+ iotlbr = ctx_wait_iotlb_flush(unit, DMAR_IOTLB_IIRG_DOM |
+ DMAR_IOTLB_DID(ctx->domain), iro);
+ KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) !=
+ DMAR_IOTLB_IAIG_INVLD,
+ ("dmar%d: invalidation failed %jx", unit->unit,
+ (uintmax_t)iotlbr));
+ } else {
+ for (; size > 0; base += isize, size -= isize) {
+ for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) {
+ isize = 1ULL << (am + DMAR_PAGE_SHIFT);
+ if ((base & (isize - 1)) == 0 && size >= isize)
+ break;
+ if (am == 0)
+ break;
+ }
+ dmar_write8(unit, iro, base | am);
+ iotlbr = ctx_wait_iotlb_flush(unit,
+ DMAR_IOTLB_IIRG_PAGE | DMAR_IOTLB_DID(ctx->domain),
+ iro);
+ KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) !=
+ DMAR_IOTLB_IAIG_INVLD,
+ ("dmar%d: PSI invalidation failed "
+ "iotlbr 0x%jx base 0x%jx size 0x%jx am %d",
+ unit->unit, (uintmax_t)iotlbr,
+ (uintmax_t)base, (uintmax_t)size, am));
+ /*
+ * Any non-page granularity covers whole guest
+ * address space for the domain.
+ */
+ if ((iotlbr & DMAR_IOTLB_IAIG_MASK) !=
+ DMAR_IOTLB_IAIG_PAGE)
+ break;
+ }
+ }
+ DMAR_UNLOCK(unit);
+}
diff --git a/sys/x86/iommu/intel_quirks.c b/sys/x86/iommu/intel_quirks.c
new file mode 100644
index 0000000..7c35ae6
--- /dev/null
+++ b/sys/x86/iommu/intel_quirks.c
@@ -0,0 +1,195 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <machine/bus.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+#include <dev/pci/pcivar.h>
+
+typedef void (*dmar_quirk_fun)(struct dmar_unit *);
+
+struct intel_dmar_quirk_cpu {
+ u_int ext_family;
+ u_int ext_model;
+ u_int family_code;
+ u_int model;
+ u_int stepping;
+ dmar_quirk_fun quirk;
+ const char *descr;
+};
+
+struct intel_dmar_quirk_nb {
+ u_int dev_id;
+ u_int rev_no;
+ dmar_quirk_fun quirk;
+ const char *descr;
+};
+
+static void
+dmar_match_quirks(struct dmar_unit *dmar,
+ const struct intel_dmar_quirk_nb *nb_quirks, int nb_quirks_len,
+ const struct intel_dmar_quirk_cpu *cpu_quirks, int cpu_quirks_len)
+{
+ device_t nb;
+ const struct intel_dmar_quirk_nb *nb_quirk;
+ const struct intel_dmar_quirk_cpu *cpu_quirk;
+ u_int p[4];
+ u_int dev_id, rev_no;
+ u_int ext_family, ext_model, family_code, model, stepping;
+ int i;
+
+ if (nb_quirks != NULL) {
+ nb = pci_find_bsf(0, 0, 0);
+ if (nb != NULL) {
+ dev_id = pci_get_device(nb);
+ rev_no = pci_get_revid(nb);
+ for (i = 0; i < nb_quirks_len; i++) {
+ nb_quirk = &nb_quirks[i];
+ if (nb_quirk->dev_id == dev_id &&
+ nb_quirk->rev_no == rev_no) {
+ if (bootverbose) {
+ device_printf(dmar->dev,
+ "NB IOMMU quirk %s\n",
+ nb_quirk->descr);
+ }
+ nb_quirk->quirk(dmar);
+ }
+ }
+ } else {
+ device_printf(dmar->dev, "cannot find northbridge\n");
+ }
+ }
+ if (cpu_quirks != NULL) {
+ do_cpuid(1, p);
+ ext_family = (p[0] & CPUID_EXT_FAMILY) >> 20;
+ ext_model = (p[0] & CPUID_EXT_MODEL) >> 16;
+ family_code = (p[0] & CPUID_FAMILY) >> 8;
+ model = (p[0] & CPUID_MODEL) >> 4;
+ stepping = p[0] & CPUID_STEPPING;
+ for (i = 0; i < cpu_quirks_len; i++) {
+ cpu_quirk = &cpu_quirks[i];
+ if (cpu_quirk->ext_family == ext_family &&
+ cpu_quirk->ext_model == ext_model &&
+ cpu_quirk->family_code == family_code &&
+ cpu_quirk->model == model &&
+ (cpu_quirk->stepping == -1 ||
+ cpu_quirk->stepping == stepping)) {
+ if (bootverbose) {
+ device_printf(dmar->dev,
+ "CPU IOMMU quirk %s\n",
+ cpu_quirk->descr);
+ }
+ cpu_quirk->quirk(dmar);
+ }
+ }
+ }
+}
+
+static void
+nb_5400_no_low_high_prot_mem(struct dmar_unit *unit)
+{
+
+ unit->hw_cap &= ~(DMAR_CAP_PHMR | DMAR_CAP_PLMR);
+}
+
+static const struct intel_dmar_quirk_nb pre_use_nb[] = {
+ {
+ .dev_id = 0x4001, .rev_no = 0x20,
+ .quirk = nb_5400_no_low_high_prot_mem,
+ .descr = "5400 E23" /* no low/high protected memory */
+ },
+ {
+ .dev_id = 0x4003, .rev_no = 0x20,
+ .quirk = nb_5400_no_low_high_prot_mem,
+ .descr = "5400 E23" /* no low/high protected memory */
+ },
+};
+
+static void
+cpu_e5_am9(struct dmar_unit *unit)
+{
+
+ unit->hw_cap &= ~(0x3fULL << 48);
+ unit->hw_cap |= (9ULL << 48);
+}
+
+static const struct intel_dmar_quirk_cpu post_ident_cpu[] = {
+ {
+ .ext_family = 0, .ext_model = 2, .family_code = 6, .model = 13,
+ .stepping = 6, .quirk = cpu_e5_am9,
+ .descr = "E5 BT176" /* AM should be at most 9 */
+ },
+};
+
+void
+dmar_quirks_pre_use(struct dmar_unit *dmar)
+{
+
+ if (!dmar_barrier_enter(dmar, DMAR_BARRIER_USEQ))
+ return;
+ DMAR_LOCK(dmar);
+ dmar_match_quirks(dmar, pre_use_nb, nitems(pre_use_nb),
+ NULL, 0);
+ dmar_barrier_exit(dmar, DMAR_BARRIER_USEQ);
+}
+
+void
+dmar_quirks_post_ident(struct dmar_unit *dmar)
+{
+
+ dmar_match_quirks(dmar, NULL, 0, post_ident_cpu,
+ nitems(post_ident_cpu));
+}
diff --git a/sys/x86/iommu/intel_reg.h b/sys/x86/iommu/intel_reg.h
new file mode 100644
index 0000000..1157a9b
--- /dev/null
+++ b/sys/x86/iommu/intel_reg.h
@@ -0,0 +1,294 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __X86_IOMMU_INTEL_REG_H
+#define __X86_IOMMU_INTEL_REG_H
+
+#define DMAR_PAGE_SIZE PAGE_SIZE
+#define DMAR_PAGE_MASK (DMAR_PAGE_SIZE - 1)
+#define DMAR_PAGE_SHIFT PAGE_SHIFT
+#define DMAR_NPTEPG (DMAR_PAGE_SIZE / sizeof(dmar_pte_t))
+#define DMAR_NPTEPGSHIFT 9
+#define DMAR_PTEMASK (DMAR_NPTEPG - 1)
+
+typedef struct dmar_root_entry {
+ uint64_t r1;
+ uint64_t r2;
+} dmar_root_entry_t;
+#define DMAR_ROOT_R1_P 1 /* Present */
+#define DMAR_ROOT_R1_CTP_MASK 0xfffffffffffff000 /* Mask for Context-Entry
+ Table Pointer */
+
+#define DMAR_CTX_CNT (DMAR_PAGE_SIZE / sizeof(dmar_root_entry_t))
+
+typedef struct dmar_ctx_entry {
+ uint64_t ctx1;
+ uint64_t ctx2;
+} dmar_ctx_entry_t;
+#define DMAR_CTX1_P 1 /* Present */
+#define DMAR_CTX1_FPD 2 /* Fault Processing Disable */
+ /* Translation Type: */
+#define DMAR_CTX1_T_UNTR 0 /* only Untranslated */
+#define DMAR_CTX1_T_TR 4 /* both Untranslated
+ and Translated */
+#define DMAR_CTX1_T_PASS 8 /* Pass-Through */
+#define DMAR_CTX1_ASR_MASK 0xfffffffffffff000 /* Mask for the Address
+ Space Root */
+#define DMAR_CTX2_AW_2LVL 0 /* 2-level page tables */
+#define DMAR_CTX2_AW_3LVL 1 /* 3-level page tables */
+#define DMAR_CTX2_AW_4LVL 2 /* 4-level page tables */
+#define DMAR_CTX2_AW_5LVL 3 /* 5-level page tables */
+#define DMAR_CTX2_AW_6LVL 4 /* 6-level page tables */
+#define DMAR_CTX2_DID(x) ((x) << 8) /* Domain Identifier */
+
+typedef struct dmar_pte {
+ uint64_t pte;
+} dmar_pte_t;
+#define DMAR_PTE_R 1 /* Read */
+#define DMAR_PTE_W (1 << 1) /* Write */
+#define DMAR_PTE_SP (1 << 7) /* Super Page */
+#define DMAR_PTE_SNP (1 << 11) /* Snoop Behaviour */
+#define DMAR_PTE_ADDR_MASK 0xffffffffff000 /* Address Mask */
+#define DMAR_PTE_TM (1ULL << 62) /* Transient Mapping */
+
+/* Version register */
+#define DMAR_VER_REG 0
+#define DMAR_MAJOR_VER(x) (((x) >> 4) & 0xf)
+#define DMAR_MINOR_VER(x) ((x) & 0xf)
+
+/* Capabilities register */
+#define DMAR_CAP_REG 0x8
+#define DMAR_CAP_DRD (1ULL << 55) /* DMA Read Draining */
+#define DMAR_CAP_DWD (1ULL << 54) /* DMA Write Draining */
+#define DMAR_CAP_MAMV(x) ((u_int)(((x) >> 48) & 0x3f))
+ /* Maximum Address Mask */
+#define DMAR_CAP_NFR(x) ((u_int)(((x) >> 40) & 0xff) + 1)
+ /* Num of Fault-recording regs */
+#define DMAR_CAP_PSI (1ULL << 39) /* Page Selective Invalidation */
+#define DMAR_CAP_SPS(x) ((u_int)(((x) >> 34) & 0xf)) /* Super-Page Support */
+#define DMAR_CAP_SPS_2M 0x1
+#define DMAR_CAP_SPS_1G 0x2
+#define DMAR_CAP_SPS_512G 0x4
+#define DMAR_CAP_SPS_1T 0x8
+#define DMAR_CAP_FRO(x) ((u_int)(((x) >> 24) & 0x1ff))
+ /* Fault-recording reg offset */
+#define DMAR_CAP_ISOCH (1 << 23) /* Isochrony */
+#define DMAR_CAP_ZLR (1 << 22) /* Zero-length reads */
+#define DMAR_CAP_MGAW(x) ((u_int)(((x) >> 16) & 0x3f))
+ /* Max Guest Address Width */
+#define DMAR_CAP_SAGAW(x) ((u_int)(((x) >> 8) & 0x1f))
+ /* Adjusted Guest Address Width */
+#define DMAR_CAP_SAGAW_2LVL 0x01
+#define DMAR_CAP_SAGAW_3LVL 0x02
+#define DMAR_CAP_SAGAW_4LVL 0x04
+#define DMAR_CAP_SAGAW_5LVL 0x08
+#define DMAR_CAP_SAGAW_6LVL 0x10
+#define DMAR_CAP_CM (1 << 7) /* Caching mode */
+#define DMAR_CAP_PHMR (1 << 6) /* Protected High-mem Region */
+#define DMAR_CAP_PLMR (1 << 5) /* Protected Low-mem Region */
+#define DMAR_CAP_RWBF (1 << 4) /* Required Write-Buffer Flushing */
+#define DMAR_CAP_AFL (1 << 3) /* Advanced Fault Logging */
+#define DMAR_CAP_ND(x) ((u_int)((x) & 0x3)) /* Number of domains */
+
+/* Extended Capabilities register */
+#define DMAR_ECAP_REG 0x10
+#define DMAR_ECAP_MHMV(x) ((u_int)(((x) >> 20) & 0xf))
+ /* Maximum Handle Mask Value */
+#define DMAR_ECAP_IRO(x) ((u_int)(((x) >> 8) & 0x3ff))
+ /* IOTLB Register Offset */
+#define DMAR_ECAP_SC (1 << 7) /* Snoop Control */
+#define DMAR_ECAP_PT (1 << 6) /* Pass Through */
+#define DMAR_ECAP_EIM (1 << 4) /* Extended Interrupt Mode */
+#define DMAR_ECAP_IR (1 << 3) /* Interrupt Remapping */
+#define DMAR_ECAP_DI (1 << 2) /* Device IOTLB */
+#define DMAR_ECAP_QI (1 << 1) /* Queued Invalidation */
+#define DMAR_ECAP_C (1 << 0) /* Coherency */
+
+/* Global Command register */
+#define DMAR_GCMD_REG 0x18
+#define DMAR_GCMD_TE (1 << 31) /* Translation Enable */
+#define DMAR_GCMD_SRTP (1 << 30) /* Set Root Table Pointer */
+#define DMAR_GCMD_SFL (1 << 29) /* Set Fault Log */
+#define DMAR_GCMD_EAFL (1 << 28) /* Enable Advanced Fault Logging */
+#define DMAR_GCMD_WBF (1 << 27) /* Write Buffer Flush */
+#define DMAR_GCMD_QIE (1 << 26) /* Queued Invalidation Enable */
+#define DMAR_GCMD_IRE (1 << 25) /* Interrupt Remapping Enable */
+#define DMAR_GCMD_SIRTP (1 << 24) /* Set Interrupt Remap Table Pointer */
+#define DMAR_GCMD_CFI (1 << 23) /* Compatibility Format Interrupt */
+
+/* Global Status register */
+#define DMAR_GSTS_REG 0x1c
+#define DMAR_GSTS_TES (1 << 31) /* Translation Enable Status */
+#define DMAR_GSTS_RTPS (1 << 30) /* Root Table Pointer Status */
+#define DMAR_GSTS_FLS (1 << 29) /* Fault Log Status */
+#define DMAR_GSTS_AFLS (1 << 28) /* Advanced Fault Logging Status */
+#define DMAR_GSTS_WBFS (1 << 27) /* Write Buffer Flush Status */
+#define DMAR_GSTS_QIES (1 << 26) /* Queued Invalidation Enable Status */
+#define DMAR_GSTS_IRES (1 << 25) /* Interrupt Remapping Enable Status */
+#define DMAR_GSTS_IRTPS (1 << 24) /* Interrupt Remapping Table
+ Pointer Status */
+#define DMAR_GSTS_CFIS (1 << 23) /* Compatibility Format
+ Interrupt Status */
+
+/* Root-Entry Table Address register */
+#define DMAR_RTADDR_REG 0x20
+
+/* Context Command register */
+#define DMAR_CCMD_REG 0x28
+#define DMAR_CCMD_ICC (1ULL << 63) /* Invalidate Context-Cache */
+#define DMAR_CCMD_ICC32 (1 << 31)
+#define DMAR_CCMD_CIRG_MASK (0x3ULL << 61) /* Context Invalidation
+ Request Granularity */
+#define DMAR_CCMD_CIRG_GLOB (0x1ULL << 61) /* Global */
+#define DMAR_CCMD_CIRG_DOM (0x2ULL << 61) /* Domain */
+#define DMAR_CCMD_CIRG_DEV (0x3ULL << 61) /* Device */
+#define DMAR_CCMD_CAIG(x) (((x) >> 59) & 0x3) /* Context Actual
+ Invalidation Granularity */
+#define DMAR_CCMD_CAIG_GLOB 0x1 /* Global */
+#define DMAR_CCMD_CAIG_DOM 0x2 /* Domain */
+#define DMAR_CCMD_CAIG_DEV 0x3 /* Device */
+#define DMAR_CCMD_FM (0x3UUL << 32) /* Function Mask */
+#define DMAR_CCMD_SID(x) (((x) & 0xffff) << 16) /* Source-ID */
+#define DMAR_CCMD_DID(x) ((x) & 0xffff) /* Domain-ID */
+
+/* Invalidate Address register */
+#define DMAR_IVA_REG_OFF 0
+#define DMAR_IVA_IH (1 << 6) /* Invalidation Hint */
+#define DMAR_IVA_AM(x) ((x) & 0x1f) /* Address Mask */
+#define DMAR_IVA_ADDR(x) ((x) & ~0xfffULL) /* Address */
+
+/* IOTLB Invalidate register */
+#define DMAR_IOTLB_REG_OFF 0x8
+#define DMAR_IOTLB_IVT (1ULL << 63) /* Invalidate IOTLB */
+#define DMAR_IOTLB_IVT32 (1 << 31)
+#define DMAR_IOTLB_IIRG_MASK (0x3ULL << 60) /* Invalidation Request
+ Granularity */
+#define DMAR_IOTLB_IIRG_GLB (0x1ULL << 60) /* Global */
+#define DMAR_IOTLB_IIRG_DOM (0x2ULL << 60) /* Domain-selective */
+#define DMAR_IOTLB_IIRG_PAGE (0x3ULL << 60) /* Page-selective */
+#define DMAR_IOTLB_IAIG_MASK (0x3ULL << 57) /* Actual Invalidation
+ Granularity */
+#define DMAR_IOTLB_IAIG_INVLD 0 /* Hw detected error */
+#define DMAR_IOTLB_IAIG_GLB (0x1ULL << 57) /* Global */
+#define DMAR_IOTLB_IAIG_DOM (0x2ULL << 57) /* Domain-selective */
+#define DMAR_IOTLB_IAIG_PAGE (0x3ULL << 57) /* Page-selective */
+#define DMAR_IOTLB_DR (0x1ULL << 49) /* Drain Reads */
+#define DMAR_IOTLB_DW (0x1ULL << 48) /* Drain Writes */
+#define DMAR_IOTLB_DID(x) (((uint64_t)(x) & 0xffff) << 32) /* Domain Id */
+
+/* Fault Status register */
+#define DMAR_FSTS_REG 0x34
+#define DMAR_FSTS_FRI(x) (((x) >> 8) & 0xff) /* Fault Record Index */
+#define DMAR_FSTS_ITE (1 << 6) /* Invalidation Time-out */
+#define DMAR_FSTS_ICE (1 << 5) /* Invalidation Completion */
+#define DMAR_FSTS_IQE (1 << 4) /* Invalidation Queue */
+#define DMAR_FSTS_APF (1 << 3) /* Advanced Pending Fault */
+#define DMAR_FSTS_AFO (1 << 2) /* Advanced Fault Overflow */
+#define DMAR_FSTS_PPF (1 << 1) /* Primary Pending Fault */
+#define DMAR_FSTS_PFO 1 /* Fault Overflow */
+
+/* Fault Event Control register */
+#define DMAR_FECTL_REG 0x38
+#define DMAR_FECTL_IM (1 << 31) /* Interrupt Mask */
+#define DMAR_FECTL_IP (1 << 30) /* Interrupt Pending */
+
+/* Fault Event Data register */
+#define DMAR_FEDATA_REG 0x3c
+
+/* Fault Event Address register */
+#define DMAR_FEADDR_REG 0x40
+
+/* Fault Event Upper Address register */
+#define DMAR_FEUADDR_REG 0x44
+
+/* Advanced Fault Log register */
+#define DMAR_AFLOG_REG 0x58
+
+/* Fault Recording Register, also usable for Advanced Fault Log records */
+#define DMAR_FRCD2_F (1ULL << 63) /* Fault */
+#define DMAR_FRCD2_F32 (1 << 31)
+#define DMAR_FRCD2_T(x) ((int)((x >> 62) & 1)) /* Type */
+#define DMAR_FRCD2_T_W 0 /* Write request */
+#define DMAR_FRCD2_T_R 1 /* Read or AtomicOp */
+#define DMAR_FRCD2_AT(x) ((int)((x >> 60) & 0x3)) /* Address Type */
+#define DMAR_FRCD2_FR(x) ((int)((x >> 32) & 0xff)) /* Fault Reason */
+#define DMAR_FRCD2_SID(x) ((int)(x & 0xffff)) /* Source Identifier */
+#define DMAR_FRCS1_FI_MASK 0xffffffffff000 /* Fault Info, Address Mask */
+
+/* Protected Memory Enable register */
+#define DMAR_PMEN_REG 0x64
+#define DMAR_PMEN_EPM (1 << 31) /* Enable Protected Memory */
+#define DMAR_PMEN_PRS 1 /* Protected Region Status */
+
+/* Protected Low-Memory Base register */
+#define DMAR_PLMBASE_REG 0x68
+
+/* Protected Low-Memory Limit register */
+#define DMAR_PLMLIMIT_REG 0x6c
+
+/* Protected High-Memory Base register */
+#define DMAR_PHMBASE_REG 0x70
+
+/* Protected High-Memory Limit register */
+#define DMAR_PHMLIMIT_REG 0x78
+
+/* Invalidation Queue Head register */
+#define DMAR_IQH_REG 0x80
+
+/* Invalidation Queue Tail register */
+#define DMAR_IQT_REG 0x88
+
+/* Invalidation Queue Address register */
+#define DMAR_IQA_REG 0x90
+
+ /* Invalidation Completion Status register */
+#define DMAR_ICS_REG 0x9c
+#define DMAR_ICS_IWC 1 /* Invalidation Wait
+ Descriptor Complete */
+
+/* Invalidation Event Control register */
+#define DMAR_IECTL_REG 0xa0
+#define DMAR_IECTL_IM (1 << 31) /* Interrupt Mask */
+#define DMAR_IECTL_IP (1 << 30) /* Interrupt Pending */
+
+/* Invalidation Event Data register */
+#define DMAR_IEDATA_REG 0xa4
+
+/* Invalidation Event Address register */
+#define DMAR_IEADDR_REG 0xa8
+
+/* Invalidation Event Upper Address register */
+#define DMAR_IEUADDR_REG 0xac
+
+/* Interrupt Remapping Table Address register */
+#define DMAR_IRTA_REG 0xb8
+
+#endif
diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c
new file mode 100644
index 0000000..c9d047b
--- /dev/null
+++ b/sys/x86/iommu/intel_utils.c
@@ -0,0 +1,562 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/sf_buf.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+u_int
+dmar_nd2mask(u_int nd)
+{
+ static const u_int masks[] = {
+ 0x000f, /* nd == 0 */
+ 0x002f, /* nd == 1 */
+ 0x00ff, /* nd == 2 */
+ 0x02ff, /* nd == 3 */
+ 0x0fff, /* nd == 4 */
+ 0x2fff, /* nd == 5 */
+ 0xffff, /* nd == 6 */
+ 0x0000, /* nd == 7 reserved */
+ };
+
+ KASSERT(nd <= 6, ("number of domains %d", nd));
+ return (masks[nd]);
+}
+
+static const struct sagaw_bits_tag {
+ int agaw;
+ int cap;
+ int awlvl;
+ int pglvl;
+} sagaw_bits[] = {
+ {.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL,
+ .pglvl = 2},
+ {.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL,
+ .pglvl = 3},
+ {.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL,
+ .pglvl = 4},
+ {.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL,
+ .pglvl = 5},
+ {.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL,
+ .pglvl = 6}
+};
+#define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0]))
+
+bool
+dmar_pglvl_supported(struct dmar_unit *unit, int pglvl)
+{
+ int i;
+
+ for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
+ if (sagaw_bits[i].pglvl != pglvl)
+ continue;
+ if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
+ return (true);
+ }
+ return (false);
+}
+
+int
+ctx_set_agaw(struct dmar_ctx *ctx, int mgaw)
+{
+ int sagaw, i;
+
+ ctx->mgaw = mgaw;
+ sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap);
+ for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
+ if (sagaw_bits[i].agaw >= mgaw) {
+ ctx->agaw = sagaw_bits[i].agaw;
+ ctx->pglvl = sagaw_bits[i].pglvl;
+ ctx->awlvl = sagaw_bits[i].awlvl;
+ return (0);
+ }
+ }
+ device_printf(ctx->dmar->dev,
+ "context request mgaw %d for pci%d:%d:%d:%d, "
+ "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, ctx->bus,
+ ctx->slot, ctx->func, sagaw);
+ return (EINVAL);
+}
+
+/*
+ * Find a best fit mgaw for the given maxaddr:
+ * - if allow_less is false, must find sagaw which maps all requested
+ * addresses (used by identity mappings);
+ * - if allow_less is true, and no supported sagaw can map all requested
+ * address space, accept the biggest sagaw, whatever is it.
+ */
+int
+dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less)
+{
+ int i;
+
+ for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
+ if ((1ULL << sagaw_bits[i].agaw) >= maxaddr &&
+ (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
+ break;
+ }
+ if (allow_less && i == SIZEOF_SAGAW_BITS) {
+ do {
+ i--;
+ } while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap)
+ == 0);
+ }
+ if (i < SIZEOF_SAGAW_BITS)
+ return (sagaw_bits[i].agaw);
+ KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d",
+ (uintmax_t) maxaddr, allow_less));
+ return (-1);
+}
+
+/*
+ * Calculate the total amount of page table pages needed to map the
+ * whole bus address space on the context with the selected agaw.
+ */
+vm_pindex_t
+pglvl_max_pages(int pglvl)
+{
+ vm_pindex_t res;
+ int i;
+
+ for (res = 0, i = pglvl; i > 0; i--) {
+ res *= DMAR_NPTEPG;
+ res++;
+ }
+ return (res);
+}
+
+/*
+ * Return true if the page table level lvl supports the superpage for
+ * the context ctx.
+ */
+int
+ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl)
+{
+ int alvl, cap_sps;
+ static const int sagaw_sp[] = {
+ DMAR_CAP_SPS_2M,
+ DMAR_CAP_SPS_1G,
+ DMAR_CAP_SPS_512G,
+ DMAR_CAP_SPS_1T
+ };
+
+ alvl = ctx->pglvl - lvl - 1;
+ cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap);
+ return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) &&
+ (sagaw_sp[alvl] & cap_sps) != 0);
+}
+
+dmar_gaddr_t
+pglvl_page_size(int total_pglvl, int lvl)
+{
+ int rlvl;
+ static const dmar_gaddr_t pg_sz[] = {
+ (dmar_gaddr_t)DMAR_PAGE_SIZE,
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
+ };
+
+ KASSERT(lvl >= 0 && lvl < total_pglvl,
+ ("total %d lvl %d", total_pglvl, lvl));
+ rlvl = total_pglvl - lvl - 1;
+ KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]),
+ ("sizeof pg_sz lvl %d", lvl));
+ return (pg_sz[rlvl]);
+}
+
+dmar_gaddr_t
+ctx_page_size(struct dmar_ctx *ctx, int lvl)
+{
+
+ return (pglvl_page_size(ctx->pglvl, lvl));
+}
+
+dmar_haddr_t dmar_high;
+int haw;
+int dmar_tbl_pagecnt;
+
+vm_page_t
+dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
+{
+ vm_page_t m;
+ int zeroed;
+
+ zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
+ for (;;) {
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_lookup(obj, idx);
+ if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) {
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ break;
+ }
+ m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY |
+ VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0,
+ dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ if (m != NULL) {
+ if (zeroed && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ atomic_add_int(&dmar_tbl_pagecnt, 1);
+ break;
+ }
+ if ((flags & DMAR_PGF_WAITOK) == 0)
+ break;
+ if ((flags & DMAR_PGF_OBJL) != 0)
+ VM_OBJECT_WUNLOCK(obj);
+ VM_WAIT;
+ if ((flags & DMAR_PGF_OBJL) != 0)
+ VM_OBJECT_WLOCK(obj);
+ }
+ return (m);
+}
+
+void
+dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
+{
+ vm_page_t m;
+
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_lookup(obj, idx);
+ if (m != NULL) {
+ vm_page_free(m);
+ atomic_subtract_int(&dmar_tbl_pagecnt, 1);
+ }
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+}
+
+void *
+dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
+ struct sf_buf **sf)
+{
+ vm_page_t m;
+ bool allocated;
+
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_lookup(obj, idx);
+ if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) {
+ m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL);
+ allocated = true;
+ } else
+ allocated = false;
+ if (m == NULL) {
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return (NULL);
+ }
+ /* Sleepable allocations cannot fail. */
+ if ((flags & DMAR_PGF_WAITOK) != 0)
+ VM_OBJECT_WUNLOCK(obj);
+ sched_pin();
+ *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK)
+ == 0 ? SFB_NOWAIT : 0));
+ if (*sf == NULL) {
+ sched_unpin();
+ if (allocated) {
+ VM_OBJECT_ASSERT_WLOCKED(obj);
+ dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL);
+ }
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return (NULL);
+ }
+ if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) ==
+ (DMAR_PGF_WAITOK | DMAR_PGF_OBJL))
+ VM_OBJECT_WLOCK(obj);
+ else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return ((void *)sf_buf_kva(*sf));
+}
+
+void
+dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent)
+{
+ vm_page_t m;
+
+ m = sf_buf_page(sf);
+ sf_buf_free(sf);
+ sched_unpin();
+
+ /*
+ * If DMAR does not snoop paging structures accesses, flush
+ * CPU cache to memory.
+ */
+ if (!coherent)
+ pmap_invalidate_cache_pages(&m, 1);
+}
+
+/*
+ * Load the root entry pointer into the hardware, busily waiting for
+ * the completion.
+ */
+int
+dmar_load_root_entry_ptr(struct dmar_unit *unit)
+{
+ vm_page_t root_entry;
+
+ /*
+ * Access to the GCMD register must be serialized while the
+ * command is submitted.
+ */
+ DMAR_ASSERT_LOCKED(unit);
+
+ /* VM_OBJECT_RLOCK(unit->ctx_obj); */
+ VM_OBJECT_WLOCK(unit->ctx_obj);
+ root_entry = vm_page_lookup(unit->ctx_obj, 0);
+ /* VM_OBJECT_RUNLOCK(unit->ctx_obj); */
+ VM_OBJECT_WUNLOCK(unit->ctx_obj);
+ dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry));
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0)
+ cpu_spinwait();
+ return (0);
+}
+
+/*
+ * Globally invalidate the context entries cache, busily waiting for
+ * the completion.
+ */
+int
+dmar_inv_ctx_glob(struct dmar_unit *unit)
+{
+
+ /*
+ * Access to the CCMD register must be serialized while the
+ * command is submitted.
+ */
+ DMAR_ASSERT_LOCKED(unit);
+
+ /*
+ * The DMAR_CCMD_ICC bit in the upper dword should be written
+ * after the low dword write is completed. Amd64
+ * dmar_write8() does not have this issue, i386 dmar_write8()
+ * writes the upper dword last.
+ */
+ dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0)
+ cpu_spinwait();
+ return (0);
+}
+
+/*
+ * Globally invalidate the IOTLB, busily waiting for the completion.
+ */
+int
+dmar_inv_iotlb_glob(struct dmar_unit *unit)
+{
+ int reg;
+
+ DMAR_ASSERT_LOCKED(unit);
+
+ reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap);
+ /* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */
+ dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
+ DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) &
+ DMAR_IOTLB_IVT32) != 0)
+ cpu_spinwait();
+ return (0);
+}
+
+/*
+ * Flush the chipset write buffers. See 11.1 "Write Buffer Flushing"
+ * in the architecture specification.
+ */
+int
+dmar_flush_write_bufs(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+
+ /*
+ * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported.
+ */
+ KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0,
+ ("dmar%d: no RWBF", unit->unit));
+
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0)
+ cpu_spinwait();
+ return (0);
+}
+
+int
+dmar_enable_translation(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ unit->hw_gcmd |= DMAR_GCMD_TE;
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0)
+ cpu_spinwait();
+ return (0);
+}
+
+int
+dmar_disable_translation(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ unit->hw_gcmd &= ~DMAR_GCMD_TE;
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0)
+ cpu_spinwait();
+ return (0);
+}
+
+void
+dmar_enable_intr(struct dmar_unit *unit)
+{
+ uint32_t fectl;
+
+ fectl = dmar_read4(unit, DMAR_FECTL_REG);
+ fectl &= ~DMAR_FECTL_IM;
+ dmar_write4(unit, DMAR_FECTL_REG, fectl);
+}
+
+void
+dmar_disable_intr(struct dmar_unit *unit)
+{
+ uint32_t fectl;
+
+ fectl = dmar_read4(unit, DMAR_FECTL_REG);
+ dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM);
+}
+
+#define BARRIER_F \
+ u_int f_done, f_inproc, f_wakeup; \
+ \
+ f_done = 1 << (barrier_id * 3); \
+ f_inproc = 1 << (barrier_id * 3 + 1); \
+ f_wakeup = 1 << (barrier_id * 3 + 2)
+
+bool
+dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id)
+{
+ BARRIER_F;
+
+ DMAR_LOCK(dmar);
+ if ((dmar->barrier_flags & f_done) != 0) {
+ DMAR_UNLOCK(dmar);
+ return (false);
+ }
+
+ if ((dmar->barrier_flags & f_inproc) != 0) {
+ while ((dmar->barrier_flags & f_inproc) != 0) {
+ dmar->barrier_flags |= f_wakeup;
+ msleep(&dmar->barrier_flags, &dmar->lock, 0,
+ "dmarb", 0);
+ }
+ KASSERT((dmar->barrier_flags & f_done) != 0,
+ ("dmar%d barrier %d missing done", dmar->unit, barrier_id));
+ DMAR_UNLOCK(dmar);
+ return (false);
+ }
+
+ dmar->barrier_flags |= f_inproc;
+ DMAR_UNLOCK(dmar);
+ return (true);
+}
+
+void
+dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
+{
+ BARRIER_F;
+
+ DMAR_ASSERT_LOCKED(dmar);
+ KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc,
+ ("dmar%d barrier %d missed entry", dmar->unit, barrier_id));
+ dmar->barrier_flags |= f_done;
+ if ((dmar->barrier_flags & f_wakeup) != 0)
+ wakeup(&dmar->barrier_flags);
+ dmar->barrier_flags &= ~(f_inproc | f_wakeup);
+ DMAR_UNLOCK(dmar);
+}
+
+int dmar_match_verbose;
+
+static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL,
+ "");
+SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD | CTLFLAG_TUN,
+ &dmar_tbl_pagecnt, 0,
+ "Count of pages used for DMAR pagetables");
+SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RW | CTLFLAG_TUN,
+ &dmar_match_verbose, 0,
+ "Verbose matching of the PCI devices to DMAR paths");
+#ifdef INVARIANTS
+int dmar_check_free;
+SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RW | CTLFLAG_TUN,
+ &dmar_check_free, 0,
+ "Check the GPA RBtree for free_down and free_after validity");
+#endif
+
OpenPOWER on IntegriCloud