summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/conf/GENERIC1
-rw-r--r--sys/conf/files.amd649
-rw-r--r--sys/conf/files.i3869
-rw-r--r--sys/conf/options1
-rw-r--r--sys/dev/acpica/acpi_pci.c29
-rw-r--r--sys/i386/conf/NOTES1
-rw-r--r--sys/x86/include/busdma_impl.h1
-rw-r--r--sys/x86/iommu/busdma_dmar.c752
-rw-r--r--sys/x86/iommu/busdma_dmar.h65
-rw-r--r--sys/x86/iommu/intel_ctx.c631
-rw-r--r--sys/x86/iommu/intel_dmar.h435
-rw-r--r--sys/x86/iommu/intel_drv.c1182
-rw-r--r--sys/x86/iommu/intel_fault.c315
-rw-r--r--sys/x86/iommu/intel_gas.c724
-rw-r--r--sys/x86/iommu/intel_idpgtbl.c783
-rw-r--r--sys/x86/iommu/intel_qi.c414
-rw-r--r--sys/x86/iommu/intel_quirks.c195
-rw-r--r--sys/x86/iommu/intel_reg.h330
-rw-r--r--sys/x86/iommu/intel_utils.c563
19 files changed, 6439 insertions, 1 deletions
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
index 3b48f0f..6b3f648 100644
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -85,6 +85,7 @@ device cpufreq
# Bus support.
device acpi
+options ACPI_DMAR
device pci
# Floppy drives
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index e1d1857..33c4297 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -531,6 +531,15 @@ x86/cpufreq/powernow.c optional cpufreq
x86/cpufreq/est.c optional cpufreq
x86/cpufreq/hwpstate.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
+x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci
+x86/iommu/intel_ctx.c optional acpi acpi_dmar pci
+x86/iommu/intel_drv.c optional acpi acpi_dmar pci
+x86/iommu/intel_fault.c optional acpi acpi_dmar pci
+x86/iommu/intel_gas.c optional acpi acpi_dmar pci
+x86/iommu/intel_idpgtbl.c optional acpi acpi_dmar pci
+x86/iommu/intel_qi.c optional acpi acpi_dmar pci
+x86/iommu/intel_quirks.c optional acpi acpi_dmar pci
+x86/iommu/intel_utils.c optional acpi acpi_dmar pci
x86/isa/atpic.c optional atpic isa
x86/isa/atrtc.c standard
x86/isa/clock.c standard
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index d946425..7e6e54a 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -566,6 +566,15 @@ x86/cpufreq/hwpstate.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
x86/cpufreq/powernow.c optional cpufreq
x86/cpufreq/smist.c optional cpufreq
+x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci
+x86/iommu/intel_ctx.c optional acpi acpi_dmar pci
+x86/iommu/intel_drv.c optional acpi acpi_dmar pci
+x86/iommu/intel_fault.c optional acpi acpi_dmar pci
+x86/iommu/intel_gas.c optional acpi acpi_dmar pci
+x86/iommu/intel_idpgtbl.c optional acpi acpi_dmar pci
+x86/iommu/intel_qi.c optional acpi acpi_dmar pci
+x86/iommu/intel_quirks.c optional acpi acpi_dmar pci
+x86/iommu/intel_utils.c optional acpi acpi_dmar pci
x86/isa/atpic.c optional atpic
x86/isa/atrtc.c optional native
x86/isa/clock.c optional native
diff --git a/sys/conf/options b/sys/conf/options
index a4c785e..642064d 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -688,6 +688,7 @@ OPENSOLARIS_WITNESS opt_global.h
ACPI_DEBUG opt_acpi.h
ACPI_MAX_TASKS opt_acpi.h
ACPI_MAX_THREADS opt_acpi.h
+ACPI_DMAR opt_acpi.h
# ISA support
DEV_ISA opt_isa.h
diff --git a/sys/dev/acpica/acpi_pci.c b/sys/dev/acpica/acpi_pci.c
index 39fba88..78d8639 100644
--- a/sys/dev/acpica/acpi_pci.c
+++ b/sys/dev/acpica/acpi_pci.c
@@ -29,6 +29,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_acpi.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
@@ -80,6 +82,7 @@ static ACPI_STATUS acpi_pci_save_handle(ACPI_HANDLE handle, UINT32 level,
static int acpi_pci_set_powerstate_method(device_t dev, device_t child,
int state);
static void acpi_pci_update_device(ACPI_HANDLE handle, device_t pci_child);
+static bus_dma_tag_t acpi_pci_get_dma_tag(device_t bus, device_t child);
static device_method_t acpi_pci_methods[] = {
/* Device interface */
@@ -90,6 +93,7 @@ static device_method_t acpi_pci_methods[] = {
DEVMETHOD(bus_read_ivar, acpi_pci_read_ivar),
DEVMETHOD(bus_write_ivar, acpi_pci_write_ivar),
DEVMETHOD(bus_child_location_str, acpi_pci_child_location_str_method),
+ DEVMETHOD(bus_get_dma_tag, acpi_pci_get_dma_tag),
/* PCI interface */
DEVMETHOD(pci_set_powerstate, acpi_pci_set_powerstate_method),
@@ -308,3 +312,28 @@ acpi_pci_attach(device_t dev)
return (bus_generic_attach(dev));
}
+
+#ifdef ACPI_DMAR
+bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child);
+static bus_dma_tag_t
+acpi_pci_get_dma_tag(device_t bus, device_t child)
+{
+ bus_dma_tag_t tag;
+
+ if (device_get_parent(child) == bus) {
+ /* try dmar and return if it works */
+ tag = dmar_get_dma_tag(bus, child);
+ } else
+ tag = NULL;
+ if (tag == NULL)
+ tag = pci_get_dma_tag(bus, child);
+ return (tag);
+}
+#else
+static bus_dma_tag_t
+acpi_pci_get_dma_tag(device_t bus, device_t child)
+{
+
+ return (pci_get_dma_tag(bus, child));
+}
+#endif
diff --git a/sys/i386/conf/NOTES b/sys/i386/conf/NOTES
index e236f10..212dbd9 100644
--- a/sys/i386/conf/NOTES
+++ b/sys/i386/conf/NOTES
@@ -491,6 +491,7 @@ device tdfx_linux # Enable Linuxulator support
device acpi
options ACPI_DEBUG
+options ACPI_DMAR
# ACPI WMI Mapping driver
device acpi_wmi
diff --git a/sys/x86/include/busdma_impl.h b/sys/x86/include/busdma_impl.h
index 70c9a6f..b851e7a 100644
--- a/sys/x86/include/busdma_impl.h
+++ b/sys/x86/include/busdma_impl.h
@@ -82,7 +82,6 @@ struct bus_dma_impl {
bus_dmasync_op_t op);
};
-void busdma_lock_mutex(void *arg, bus_dma_lock_op_t op);
void bus_dma_dflt_lock(void *arg, bus_dma_lock_op_t op);
int bus_dma_run_filter(struct bus_dma_tag_common *dmat, bus_addr_t paddr);
int common_bus_dma_tag_create(struct bus_dma_tag_common *parent,
diff --git a/sys/x86/iommu/busdma_dmar.c b/sys/x86/iommu/busdma_dmar.c
new file mode 100644
index 0000000..cada29d
--- /dev/null
+++ b/sys/x86/iommu/busdma_dmar.c
@@ -0,0 +1,752 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <dev/pci/pcivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+/*
+ * busdma_dmar.c, the implementation of the busdma(9) interface using
+ * DMAR units from Intel VT-d.
+ */
+
+static bool
+dmar_bus_dma_is_dev_disabled(device_t dev)
+{
+ char str[128], *env;
+ int domain, bus, slot, func;
+
+ domain = pci_get_domain(dev);
+ bus = pci_get_bus(dev);
+ slot = pci_get_slot(dev);
+ func = pci_get_function(dev);
+ snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d.bounce",
+ domain, bus, slot, func);
+ env = getenv(str);
+ if (env == NULL)
+ return (false);
+ freeenv(env);
+ return (true);
+}
+
+struct dmar_ctx *
+dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev, bool rmrr)
+{
+ struct dmar_ctx *ctx;
+ bool disabled;
+
+ /*
+ * If the user requested the IOMMU disabled for the device, we
+ * cannot disable the DMAR, due to possibility of other
+ * devices on the same DMAR still requiring translation.
+ * Instead provide the identity mapping for the device
+ * context.
+ */
+ disabled = dmar_bus_dma_is_dev_disabled(dev);
+ ctx = dmar_get_ctx(dmar, dev, disabled, rmrr);
+ if (ctx == NULL)
+ return (NULL);
+ ctx->ctx_tag.owner = dev;
+ if (disabled) {
+ /*
+ * Keep the first reference on context, release the
+ * later refs.
+ */
+ DMAR_LOCK(dmar);
+ if ((ctx->flags & DMAR_CTX_DISABLED) == 0) {
+ ctx->flags |= DMAR_CTX_DISABLED;
+ DMAR_UNLOCK(dmar);
+ } else {
+ dmar_free_ctx_locked(dmar, ctx);
+ }
+ ctx = NULL;
+ }
+ return (ctx);
+}
+
+bus_dma_tag_t
+dmar_get_dma_tag(device_t dev, device_t child)
+{
+ struct dmar_unit *dmar;
+ struct dmar_ctx *ctx;
+ bus_dma_tag_t res;
+
+ dmar = dmar_find(child);
+ /* Not in scope of any DMAR ? */
+ if (dmar == NULL)
+ return (NULL);
+ dmar_quirks_pre_use(dmar);
+ dmar_instantiate_rmrr_ctxs(dmar);
+
+ ctx = dmar_instantiate_ctx(dmar, child, false);
+ res = ctx == NULL ? NULL : (bus_dma_tag_t)&ctx->ctx_tag;
+ return (res);
+}
+
+static MALLOC_DEFINE(M_DMAR_DMAMAP, "dmar_dmamap", "Intel DMAR DMA Map");
+
+static void dmar_bus_schedule_dmamap(struct dmar_unit *unit,
+ struct bus_dmamap_dmar *map);
+
+static int
+dmar_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
+ bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
+ bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
+ int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
+ void *lockfuncarg, bus_dma_tag_t *dmat)
+{
+ struct bus_dma_tag_dmar *newtag, *oldtag;
+ int error;
+
+ *dmat = NULL;
+ error = common_bus_dma_tag_create(parent != NULL ?
+ &((struct bus_dma_tag_dmar *)parent)->common : NULL, alignment,
+ boundary, lowaddr, highaddr, filter, filterarg, maxsize,
+ nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
+ sizeof(struct bus_dma_tag_dmar), (void **)&newtag);
+ if (error != 0)
+ goto out;
+
+ oldtag = (struct bus_dma_tag_dmar *)parent;
+ newtag->common.impl = &bus_dma_dmar_impl;
+ newtag->ctx = oldtag->ctx;
+ newtag->owner = oldtag->owner;
+
+ *dmat = (bus_dma_tag_t)newtag;
+out:
+ CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
+ __func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
+ error);
+ return (error);
+}
+
+static int
+dmar_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
+{
+ struct bus_dma_tag_dmar *dmat, *dmat_copy, *parent;
+ int error;
+
+ error = 0;
+ dmat_copy = dmat = (struct bus_dma_tag_dmar *)dmat1;
+
+ if (dmat != NULL) {
+ if (dmat->map_count != 0) {
+ error = EBUSY;
+ goto out;
+ }
+ while (dmat != NULL) {
+ parent = (struct bus_dma_tag_dmar *)dmat->common.parent;
+ if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
+ 1) {
+ if (dmat == &dmat->ctx->ctx_tag)
+ dmar_free_ctx(dmat->ctx);
+ free(dmat->segments, M_DMAR_DMAMAP);
+ free(dmat, M_DEVBUF);
+ dmat = parent;
+ } else
+ dmat = NULL;
+ }
+ }
+out:
+ CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
+ return (error);
+}
+
+static int
+dmar_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = malloc(sizeof(*map), M_DMAR_DMAMAP, M_NOWAIT | M_ZERO);
+ if (map == NULL) {
+ *mapp = NULL;
+ return (ENOMEM);
+ }
+ if (tag->segments == NULL) {
+ tag->segments = malloc(sizeof(bus_dma_segment_t) *
+ tag->common.nsegments, M_DMAR_DMAMAP, M_NOWAIT);
+ if (tag->segments == NULL) {
+ free(map, M_DMAR_DMAMAP);
+ *mapp = NULL;
+ return (ENOMEM);
+ }
+ }
+ TAILQ_INIT(&map->map_entries);
+ map->tag = tag;
+ map->locked = true;
+ map->cansleep = false;
+ tag->map_count++;
+ *mapp = (bus_dmamap_t)map;
+
+ return (0);
+}
+
+static int
+dmar_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ if (map != NULL) {
+ DMAR_CTX_LOCK(tag->ctx);
+ if (!TAILQ_EMPTY(&map->map_entries)) {
+ DMAR_CTX_UNLOCK(tag->ctx);
+ return (EBUSY);
+ }
+ DMAR_CTX_UNLOCK(tag->ctx);
+ free(map, M_DMAR_DMAMAP);
+ }
+ tag->map_count--;
+ return (0);
+}
+
+
+static int
+dmar_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
+ bus_dmamap_t *mapp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ int error, mflags;
+ vm_memattr_t attr;
+
+ error = dmar_bus_dmamap_create(dmat, flags, mapp);
+ if (error != 0)
+ return (error);
+
+ mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
+ mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
+ attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
+ VM_MEMATTR_DEFAULT;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)*mapp;
+
+ if (tag->common.maxsize < PAGE_SIZE &&
+ tag->common.alignment <= tag->common.maxsize &&
+ attr == VM_MEMATTR_DEFAULT) {
+ *vaddr = malloc(tag->common.maxsize, M_DEVBUF, mflags);
+ map->flags |= BUS_DMAMAP_DMAR_MALLOC;
+ } else {
+ *vaddr = (void *)kmem_alloc_attr(kernel_arena,
+ tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR,
+ attr);
+ map->flags |= BUS_DMAMAP_DMAR_KMEM_ALLOC;
+ }
+ if (*vaddr == NULL) {
+ dmar_bus_dmamap_destroy(dmat, *mapp);
+ *mapp = NULL;
+ return (ENOMEM);
+ }
+ return (0);
+}
+
+static void
+dmar_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+
+ if ((map->flags & BUS_DMAMAP_DMAR_MALLOC) != 0) {
+ free(vaddr, M_DEVBUF);
+ map->flags &= ~BUS_DMAMAP_DMAR_MALLOC;
+ } else {
+ KASSERT((map->flags & BUS_DMAMAP_DMAR_KMEM_ALLOC) != 0,
+ ("dmar_bus_dmamem_free for non alloced map %p", map));
+ kmem_free(kernel_arena, (vm_offset_t)vaddr, tag->common.maxsize);
+ map->flags &= ~BUS_DMAMAP_DMAR_KMEM_ALLOC;
+ }
+
+ dmar_bus_dmamap_destroy(dmat, map1);
+}
+
+static int
+dmar_bus_dmamap_load_something1(struct bus_dma_tag_dmar *tag,
+ struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
+ int flags, bus_dma_segment_t *segs, int *segp,
+ struct dmar_map_entries_tailq *unroll_list)
+{
+ struct dmar_ctx *ctx;
+ struct dmar_map_entry *entry;
+ dmar_gaddr_t size;
+ bus_size_t buflen1;
+ int error, idx, gas_flags, seg;
+
+ if (segs == NULL)
+ segs = tag->segments;
+ ctx = tag->ctx;
+ seg = *segp;
+ error = 0;
+ idx = 0;
+ while (buflen > 0) {
+ seg++;
+ if (seg >= tag->common.nsegments) {
+ error = EFBIG;
+ break;
+ }
+ buflen1 = buflen > tag->common.maxsegsz ?
+ tag->common.maxsegsz : buflen;
+ buflen -= buflen1;
+ size = round_page(offset + buflen1);
+
+ /*
+ * (Too) optimistically allow split if there are more
+ * then one segments left.
+ */
+ gas_flags = map->cansleep ? DMAR_GM_CANWAIT : 0;
+ if (seg + 1 < tag->common.nsegments)
+ gas_flags |= DMAR_GM_CANSPLIT;
+
+ error = dmar_gas_map(ctx, &tag->common, size,
+ DMAR_MAP_ENTRY_READ | DMAR_MAP_ENTRY_WRITE,
+ gas_flags, ma + idx, &entry);
+ if (error != 0)
+ break;
+ if ((gas_flags & DMAR_GM_CANSPLIT) != 0) {
+ KASSERT(size >= entry->end - entry->start,
+ ("split increased entry size %jx %jx %jx",
+ (uintmax_t)size, (uintmax_t)entry->start,
+ (uintmax_t)entry->end));
+ size = entry->end - entry->start;
+ if (buflen1 > size)
+ buflen1 = size;
+ } else {
+ KASSERT(entry->end - entry->start == size,
+ ("no split allowed %jx %jx %jx",
+ (uintmax_t)size, (uintmax_t)entry->start,
+ (uintmax_t)entry->end));
+ }
+
+ KASSERT(((entry->start + offset) & (tag->common.alignment - 1))
+ == 0,
+ ("alignment failed: ctx %p start 0x%jx offset %x "
+ "align 0x%jx", ctx, (uintmax_t)entry->start, offset,
+ (uintmax_t)tag->common.alignment));
+ KASSERT(entry->end <= tag->common.lowaddr ||
+ entry->start >= tag->common.highaddr,
+ ("entry placement failed: ctx %p start 0x%jx end 0x%jx "
+ "lowaddr 0x%jx highaddr 0x%jx", ctx,
+ (uintmax_t)entry->start, (uintmax_t)entry->end,
+ (uintmax_t)tag->common.lowaddr,
+ (uintmax_t)tag->common.highaddr));
+ KASSERT(dmar_test_boundary(entry->start, entry->end -
+ entry->start, tag->common.boundary),
+ ("boundary failed: ctx %p start 0x%jx end 0x%jx "
+ "boundary 0x%jx", ctx, (uintmax_t)entry->start,
+ (uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
+ KASSERT(buflen1 <= tag->common.maxsegsz,
+ ("segment too large: ctx %p start 0x%jx end 0x%jx "
+ "maxsegsz 0x%jx", ctx, (uintmax_t)entry->start,
+ (uintmax_t)entry->end, (uintmax_t)tag->common.maxsegsz));
+
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
+ entry->flags |= DMAR_MAP_ENTRY_MAP;
+ DMAR_CTX_UNLOCK(ctx);
+ TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link);
+
+ segs[seg].ds_addr = entry->start + offset;
+ segs[seg].ds_len = buflen1;
+
+ idx += OFF_TO_IDX(trunc_page(offset + buflen1));
+ offset += buflen1;
+ offset &= DMAR_PAGE_MASK;
+ }
+ if (error == 0)
+ *segp = seg;
+ return (error);
+}
+
+static int
+dmar_bus_dmamap_load_something(struct bus_dma_tag_dmar *tag,
+ struct bus_dmamap_dmar *map, vm_page_t *ma, int offset, bus_size_t buflen,
+ int flags, bus_dma_segment_t *segs, int *segp)
+{
+ struct dmar_ctx *ctx;
+ struct dmar_map_entry *entry, *entry1;
+ struct dmar_map_entries_tailq unroll_list;
+ int error;
+
+ ctx = tag->ctx;
+ atomic_add_long(&ctx->loads, 1);
+
+ TAILQ_INIT(&unroll_list);
+ error = dmar_bus_dmamap_load_something1(tag, map, ma, offset,
+ buflen, flags, segs, segp, &unroll_list);
+ if (error != 0) {
+ /*
+ * The busdma interface does not allow us to report
+ * partial buffer load, so unfortunately we have to
+ * revert all work done.
+ */
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link,
+ entry1) {
+ /*
+ * No entries other than what we have created
+ * during the failed run might have been
+ * inserted there in between, since we own ctx
+ * pglock.
+ */
+ TAILQ_REMOVE(&map->map_entries, entry, dmamap_link);
+ TAILQ_REMOVE(&unroll_list, entry, unroll_link);
+ TAILQ_INSERT_TAIL(&ctx->unload_entries, entry,
+ dmamap_link);
+ }
+ DMAR_CTX_UNLOCK(ctx);
+ taskqueue_enqueue(ctx->dmar->delayed_taskqueue,
+ &ctx->unload_task);
+ }
+
+ if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
+ !map->cansleep)
+ error = EINPROGRESS;
+ if (error == EINPROGRESS)
+ dmar_bus_schedule_dmamap(ctx->dmar, map);
+ return (error);
+}
+
+static int
+dmar_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
+ struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
+ bus_dma_segment_t *segs, int *segp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ return (dmar_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
+ flags, segs, segp));
+}
+
+static int
+dmar_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
+ vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
+ int *segp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ vm_page_t *ma;
+ vm_paddr_t pstart, pend;
+ int error, i, ma_cnt, offset;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ pstart = trunc_page(buf);
+ pend = round_page(buf + buflen);
+ offset = buf & PAGE_MASK;
+ ma_cnt = OFF_TO_IDX(pend - pstart);
+ ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
+ M_WAITOK : M_NOWAIT);
+ if (ma == NULL)
+ return (ENOMEM);
+ for (i = 0; i < ma_cnt; i++)
+ ma[i] = PHYS_TO_VM_PAGE(pstart + i * PAGE_SIZE);
+ error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
+ flags, segs, segp);
+ free(ma, M_DEVBUF);
+ return (error);
+}
+
+static int
+dmar_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
+ bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
+ int *segp)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ vm_page_t *ma, fma;
+ vm_paddr_t pstart, pend, paddr;
+ int error, i, ma_cnt, offset;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ pstart = trunc_page((vm_offset_t)buf);
+ pend = round_page((vm_offset_t)buf + buflen);
+ offset = (vm_offset_t)buf & PAGE_MASK;
+ ma_cnt = OFF_TO_IDX(pend - pstart);
+ ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, map->cansleep ?
+ M_WAITOK : M_NOWAIT);
+ if (ma == NULL)
+ return (ENOMEM);
+ if (dumping) {
+ /*
+ * If dumping, do not attempt to call
+ * PHYS_TO_VM_PAGE() at all. It may return non-NULL
+ * but the vm_page returned might be not initialized,
+ * e.g. for the kernel itself.
+ */
+ KASSERT(pmap == kernel_pmap, ("non-kernel address write"));
+ fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF,
+ M_ZERO | (map->cansleep ? M_WAITOK : M_NOWAIT));
+ if (fma == NULL) {
+ free(ma, M_DEVBUF);
+ return (ENOMEM);
+ }
+ for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
+ paddr = pmap_kextract(pstart);
+ vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
+ ma[i] = &fma[i];
+ }
+ } else {
+ fma = NULL;
+ for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
+ if (pmap == kernel_pmap)
+ paddr = pmap_kextract(pstart);
+ else
+ paddr = pmap_extract(pmap, pstart);
+ ma[i] = PHYS_TO_VM_PAGE(paddr);
+ KASSERT(VM_PAGE_TO_PHYS(ma[i]) == paddr,
+ ("PHYS_TO_VM_PAGE failed %jx %jx m %p",
+ (uintmax_t)paddr, (uintmax_t)VM_PAGE_TO_PHYS(ma[i]),
+ ma[i]));
+ }
+ }
+ error = dmar_bus_dmamap_load_something(tag, map, ma, offset, buflen,
+ flags, segs, segp);
+ free(ma, M_DEVBUF);
+ free(fma, M_DEVBUF);
+ return (error);
+}
+
+static void
+dmar_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
+ struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
+{
+ struct bus_dmamap_dmar *map;
+
+ if (map1 == NULL)
+ return;
+ map = (struct bus_dmamap_dmar *)map1;
+ map->mem = *mem;
+ map->tag = (struct bus_dma_tag_dmar *)dmat;
+ map->callback = callback;
+ map->callback_arg = callback_arg;
+}
+
+static bus_dma_segment_t *
+dmar_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
+ bus_dma_segment_t *segs, int nsegs, int error)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+
+ if (!map->locked) {
+ KASSERT(map->cansleep,
+ ("map not locked and not sleepable context %p", map));
+
+ /*
+ * We are called from the delayed context. Relock the
+ * driver.
+ */
+ (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
+ map->locked = true;
+ }
+
+ if (segs == NULL)
+ segs = tag->segments;
+ return (segs);
+}
+
+/*
+ * The limitations of busdma KPI forces the dmar to perform the actual
+ * unload, consisting of the unmapping of the map entries page tables,
+ * from the delayed context on i386, since page table page mapping
+ * might require a sleep to be successfull. The unfortunate
+ * consequence is that the DMA requests can be served some time after
+ * the bus_dmamap_unload() call returned.
+ *
+ * On amd64, we assume that sf allocation cannot fail.
+ */
+static void
+dmar_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ struct dmar_ctx *ctx;
+#if defined(__amd64__)
+ struct dmar_map_entries_tailq entries;
+#endif
+
+ tag = (struct bus_dma_tag_dmar *)dmat;
+ map = (struct bus_dmamap_dmar *)map1;
+ ctx = tag->ctx;
+ atomic_add_long(&ctx->unloads, 1);
+
+#if defined(__i386__)
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_CONCAT(&ctx->unload_entries, &map->map_entries, dmamap_link);
+ DMAR_CTX_UNLOCK(ctx);
+ taskqueue_enqueue(ctx->dmar->delayed_taskqueue, &ctx->unload_task);
+#else /* defined(__amd64__) */
+ TAILQ_INIT(&entries);
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
+ DMAR_CTX_UNLOCK(ctx);
+ THREAD_NO_SLEEPING();
+ dmar_ctx_unload(ctx, &entries, false);
+ THREAD_SLEEPING_OK();
+ KASSERT(TAILQ_EMPTY(&entries), ("lazy dmar_ctx_unload %p", ctx));
+#endif
+}
+
+static void
+dmar_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map,
+ bus_dmasync_op_t op)
+{
+}
+
+struct bus_dma_impl bus_dma_dmar_impl = {
+ .tag_create = dmar_bus_dma_tag_create,
+ .tag_destroy = dmar_bus_dma_tag_destroy,
+ .map_create = dmar_bus_dmamap_create,
+ .map_destroy = dmar_bus_dmamap_destroy,
+ .mem_alloc = dmar_bus_dmamem_alloc,
+ .mem_free = dmar_bus_dmamem_free,
+ .load_phys = dmar_bus_dmamap_load_phys,
+ .load_buffer = dmar_bus_dmamap_load_buffer,
+ .load_ma = dmar_bus_dmamap_load_ma,
+ .map_waitok = dmar_bus_dmamap_waitok,
+ .map_complete = dmar_bus_dmamap_complete,
+ .map_unload = dmar_bus_dmamap_unload,
+ .map_sync = dmar_bus_dmamap_sync
+};
+
+static void
+dmar_bus_task_dmamap(void *arg, int pending)
+{
+ struct bus_dma_tag_dmar *tag;
+ struct bus_dmamap_dmar *map;
+ struct dmar_unit *unit;
+ struct dmar_ctx *ctx;
+
+ unit = arg;
+ DMAR_LOCK(unit);
+ while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
+ TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
+ DMAR_UNLOCK(unit);
+ tag = map->tag;
+ ctx = map->tag->ctx;
+ map->cansleep = true;
+ map->locked = false;
+ bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
+ &map->mem, map->callback, map->callback_arg,
+ BUS_DMA_WAITOK);
+ map->cansleep = false;
+ if (map->locked) {
+ (tag->common.lockfunc)(tag->common.lockfuncarg,
+ BUS_DMA_UNLOCK);
+ } else
+ map->locked = true;
+ map->cansleep = false;
+ DMAR_LOCK(unit);
+ }
+ DMAR_UNLOCK(unit);
+}
+
+static void
+dmar_bus_schedule_dmamap(struct dmar_unit *unit, struct bus_dmamap_dmar *map)
+{
+ struct dmar_ctx *ctx;
+
+ ctx = map->tag->ctx;
+ map->locked = false;
+ DMAR_LOCK(unit);
+ TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
+ DMAR_UNLOCK(unit);
+ taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
+}
+
+int
+dmar_init_busdma(struct dmar_unit *unit)
+{
+
+ TAILQ_INIT(&unit->delayed_maps);
+ TASK_INIT(&unit->dmamap_load_task, 0, dmar_bus_task_dmamap, unit);
+ unit->delayed_taskqueue = taskqueue_create("dmar", M_WAITOK,
+ taskqueue_thread_enqueue, &unit->delayed_taskqueue);
+ taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
+ "dmar%d busdma taskq", unit->unit);
+ return (0);
+}
+
+void
+dmar_fini_busdma(struct dmar_unit *unit)
+{
+
+ if (unit->delayed_taskqueue == NULL)
+ return;
+
+ taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
+ taskqueue_free(unit->delayed_taskqueue);
+ unit->delayed_taskqueue = NULL;
+}
diff --git a/sys/x86/iommu/busdma_dmar.h b/sys/x86/iommu/busdma_dmar.h
new file mode 100644
index 0000000..60ea6bc
--- /dev/null
+++ b/sys/x86/iommu/busdma_dmar.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __X86_IOMMU_BUSDMA_DMAR_H
+#define __X86_IOMMU_BUSDMA_DMAR_H
+
+struct dmar_map_entry;
+TAILQ_HEAD(dmar_map_entries_tailq, dmar_map_entry);
+
+struct bus_dma_tag_dmar {
+ struct bus_dma_tag_common common;
+ struct dmar_ctx *ctx;
+ device_t owner;
+ int map_count;
+ bus_dma_segment_t *segments;
+};
+
+struct bus_dmamap_dmar {
+ struct bus_dma_tag_dmar *tag;
+ struct memdesc mem;
+ bus_dmamap_callback_t *callback;
+ void *callback_arg;
+ struct dmar_map_entries_tailq map_entries;
+ TAILQ_ENTRY(bus_dmamap_dmar) delay_link;
+ bool locked;
+ bool cansleep;
+ int flags;
+};
+
+#define BUS_DMAMAP_DMAR_MALLOC 0x0001
+#define BUS_DMAMAP_DMAR_KMEM_ALLOC 0x0002
+
+extern struct bus_dma_impl bus_dma_dmar_impl;
+
+bus_dma_tag_t dmar_get_dma_tag(device_t dev, device_t child);
+
+#endif
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
new file mode 100644
index 0000000..fc5fe09
--- /dev/null
+++ b/sys/x86/iommu/intel_ctx.c
@@ -0,0 +1,631 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+#include <dev/pci/pcivar.h>
+
+static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
+
+static void dmar_ctx_unload_task(void *arg, int pending);
+
+static void
+dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
+{
+ struct sf_buf *sf;
+ dmar_root_entry_t *re;
+ vm_page_t ctxm;
+
+ /*
+ * Allocated context page must be linked.
+ */
+ ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_NOALLOC);
+ if (ctxm != NULL)
+ return;
+
+ /*
+ * Page not present, allocate and link. Note that other
+ * thread might execute this sequence in parallel. This
+ * should be safe, because the context entries written by both
+ * threads are equal.
+ */
+ TD_PREP_PINNED_ASSERT;
+ ctxm = dmar_pgalloc(dmar->ctx_obj, 1 + bus, DMAR_PGF_ZERO |
+ DMAR_PGF_WAITOK);
+ re = dmar_map_pgtbl(dmar->ctx_obj, 0, DMAR_PGF_NOALLOC, &sf);
+ re += bus;
+ dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
+ VM_PAGE_TO_PHYS(ctxm)));
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+ TD_PINNED_ASSERT;
+}
+
+static dmar_ctx_entry_t *
+dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
+{
+ dmar_ctx_entry_t *ctxp;
+
+ ctxp = dmar_map_pgtbl(ctx->dmar->ctx_obj, 1 + ctx->bus,
+ DMAR_PGF_NOALLOC | DMAR_PGF_WAITOK, sfp);
+ ctxp += ((ctx->slot & 0x1f) << 3) + (ctx->func & 0x7);
+ return (ctxp);
+}
+
+static void
+ctx_tag_init(struct dmar_ctx *ctx)
+{
+ bus_addr_t maxaddr;
+
+ maxaddr = MIN(ctx->end, BUS_SPACE_MAXADDR);
+ ctx->ctx_tag.common.ref_count = 1; /* Prevent free */
+ ctx->ctx_tag.common.impl = &bus_dma_dmar_impl;
+ ctx->ctx_tag.common.boundary = PCI_DMA_BOUNDARY;
+ ctx->ctx_tag.common.lowaddr = maxaddr;
+ ctx->ctx_tag.common.highaddr = maxaddr;
+ ctx->ctx_tag.common.maxsize = maxaddr;
+ ctx->ctx_tag.common.nsegments = BUS_SPACE_UNRESTRICTED;
+ ctx->ctx_tag.common.maxsegsz = maxaddr;
+ ctx->ctx_tag.ctx = ctx;
+ /* XXXKIB initialize tag further */
+}
+
+static void
+ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp)
+{
+ struct dmar_unit *unit;
+ vm_page_t ctx_root;
+
+ unit = ctx->dmar;
+ KASSERT(ctxp->ctx1 == 0 && ctxp->ctx2 == 0,
+ ("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
+ unit->unit, ctx->bus, ctx->slot, ctx->func, ctxp->ctx1,
+ ctxp->ctx2));
+ ctxp->ctx2 = DMAR_CTX2_DID(ctx->domain);
+ ctxp->ctx2 |= ctx->awlvl;
+ if ((ctx->flags & DMAR_CTX_IDMAP) != 0 &&
+ (unit->hw_ecap & DMAR_ECAP_PT) != 0) {
+ KASSERT(ctx->pgtbl_obj == NULL,
+ ("ctx %p non-null pgtbl_obj", ctx));
+ dmar_pte_store(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
+ } else {
+ ctx_root = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_NOALLOC);
+ dmar_pte_store(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
+ (DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
+ DMAR_CTX1_P);
+ }
+}
+
+static int
+ctx_init_rmrr(struct dmar_ctx *ctx, device_t dev)
+{
+ struct dmar_map_entries_tailq rmrr_entries;
+ struct dmar_map_entry *entry, *entry1;
+ vm_page_t *ma;
+ dmar_gaddr_t start, end;
+ vm_pindex_t size, i;
+ int error, error1;
+
+ error = 0;
+ TAILQ_INIT(&rmrr_entries);
+ dmar_ctx_parse_rmrr(ctx, dev, &rmrr_entries);
+ TAILQ_FOREACH_SAFE(entry, &rmrr_entries, unroll_link, entry1) {
+ /*
+ * VT-d specification requires that the start of an
+ * RMRR entry is 4k-aligned. Buggy BIOSes put
+ * anything into the start and end fields. Truncate
+ * and round as neccesary.
+ *
+ * We also allow the overlapping RMRR entries, see
+ * dmar_gas_alloc_region().
+ */
+ start = entry->start;
+ end = entry->end;
+ entry->start = trunc_page(start);
+ entry->end = round_page(end);
+ size = OFF_TO_IDX(entry->end - entry->start);
+ ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
+ for (i = 0; i < size; i++) {
+ ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
+ VM_MEMATTR_DEFAULT);
+ }
+ error1 = dmar_gas_map_region(ctx, entry, DMAR_MAP_ENTRY_READ |
+ DMAR_MAP_ENTRY_WRITE, DMAR_GM_CANWAIT, ma);
+ /*
+ * Non-failed RMRR entries are owned by context rb
+ * tree. Get rid of the failed entry, but do not stop
+ * the loop. Rest of the parsed RMRR entries are
+ * loaded and removed on the context destruction.
+ */
+ if (error1 == 0 && entry->end != entry->start) {
+ DMAR_LOCK(ctx->dmar);
+ ctx->flags |= DMAR_CTX_RMRR;
+ DMAR_UNLOCK(ctx->dmar);
+ } else {
+ if (error1 != 0) {
+ device_printf(dev,
+ "dmar%d failed to map RMRR region (%jx, %jx) %d\n",
+ ctx->dmar->unit, start, end, error1);
+ error = error1;
+ }
+ TAILQ_REMOVE(&rmrr_entries, entry, unroll_link);
+ dmar_gas_free_entry(ctx, entry);
+ }
+ for (i = 0; i < size; i++)
+ vm_page_putfake(ma[i]);
+ free(ma, M_TEMP);
+ }
+ return (error);
+}
+
+static struct dmar_ctx *
+dmar_get_ctx_alloc(struct dmar_unit *dmar, int bus, int slot, int func)
+{
+ struct dmar_ctx *ctx;
+
+ ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
+ RB_INIT(&ctx->rb_root);
+ TAILQ_INIT(&ctx->unload_entries);
+ TASK_INIT(&ctx->unload_task, 0, dmar_ctx_unload_task, ctx);
+ mtx_init(&ctx->lock, "dmarctx", NULL, MTX_DEF);
+ ctx->dmar = dmar;
+ ctx->bus = bus;
+ ctx->slot = slot;
+ ctx->func = func;
+ return (ctx);
+}
+
+static void
+dmar_ctx_dtr(struct dmar_ctx *ctx, bool gas_inited, bool pgtbl_inited)
+{
+
+ if (gas_inited) {
+ DMAR_CTX_LOCK(ctx);
+ dmar_gas_fini_ctx(ctx);
+ DMAR_CTX_UNLOCK(ctx);
+ }
+ if (pgtbl_inited) {
+ if (ctx->pgtbl_obj != NULL)
+ DMAR_CTX_PGLOCK(ctx);
+ ctx_free_pgtbl(ctx);
+ }
+ mtx_destroy(&ctx->lock);
+ free(ctx, M_DMAR_CTX);
+}
+
+struct dmar_ctx *
+dmar_get_ctx(struct dmar_unit *dmar, device_t dev, bool id_mapped, bool rmrr_init)
+{
+ struct dmar_ctx *ctx, *ctx1;
+ dmar_ctx_entry_t *ctxp;
+ struct sf_buf *sf;
+ int bus, slot, func, error, mgaw;
+ bool enable;
+
+ bus = pci_get_bus(dev);
+ slot = pci_get_slot(dev);
+ func = pci_get_function(dev);
+ enable = false;
+ TD_PREP_PINNED_ASSERT;
+ DMAR_LOCK(dmar);
+ ctx = dmar_find_ctx_locked(dmar, bus, slot, func);
+ error = 0;
+ if (ctx == NULL) {
+ /*
+ * Perform the allocations which require sleep or have
+ * higher chance to succeed if the sleep is allowed.
+ */
+ DMAR_UNLOCK(dmar);
+ dmar_ensure_ctx_page(dmar, bus);
+ ctx1 = dmar_get_ctx_alloc(dmar, bus, slot, func);
+
+ if (id_mapped) {
+ /*
+ * For now, use the maximal usable physical
+ * address of the installed memory to
+ * calculate the mgaw. It is useful for the
+ * identity mapping, and less so for the
+ * virtualized bus address space.
+ */
+ ctx1->end = ptoa(Maxmem);
+ mgaw = dmar_maxaddr2mgaw(dmar, ctx1->end, false);
+ error = ctx_set_agaw(ctx1, mgaw);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, false, false);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ } else {
+ ctx1->end = BUS_SPACE_MAXADDR;
+ mgaw = dmar_maxaddr2mgaw(dmar, ctx1->end, true);
+ error = ctx_set_agaw(ctx1, mgaw);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, false, false);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ /* Use all supported address space for remapping. */
+ ctx1->end = 1ULL << (ctx1->agaw - 1);
+ }
+
+
+ dmar_gas_init_ctx(ctx1);
+ if (id_mapped) {
+ if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
+ ctx1->pgtbl_obj = ctx_get_idmap_pgtbl(ctx1,
+ ctx1->end);
+ }
+ ctx1->flags |= DMAR_CTX_IDMAP;
+ } else {
+ error = ctx_alloc_pgtbl(ctx1);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, true, false);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ /* Disable local apic region access */
+ error = dmar_gas_reserve_region(ctx1, 0xfee00000,
+ 0xfeefffff + 1);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, true, true);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ error = ctx_init_rmrr(ctx1, dev);
+ if (error != 0) {
+ dmar_ctx_dtr(ctx1, true, true);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ }
+ ctxp = dmar_map_ctx_entry(ctx1, &sf);
+ DMAR_LOCK(dmar);
+
+ /*
+ * Recheck the contexts, other thread might have
+ * already allocated needed one.
+ */
+ ctx = dmar_find_ctx_locked(dmar, bus, slot, func);
+ if (ctx == NULL) {
+ ctx = ctx1;
+ ctx->domain = alloc_unrl(dmar->domids);
+ if (ctx->domain == -1) {
+ DMAR_UNLOCK(dmar);
+ dmar_unmap_pgtbl(sf, true);
+ dmar_ctx_dtr(ctx, true, true);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ ctx_tag_init(ctx);
+
+ /*
+ * This is the first activated context for the
+ * DMAR unit. Enable the translation after
+ * everything is set up.
+ */
+ if (LIST_EMPTY(&dmar->contexts))
+ enable = true;
+ LIST_INSERT_HEAD(&dmar->contexts, ctx, link);
+ ctx_id_entry_init(ctx, ctxp);
+ device_printf(dev,
+ "dmar%d pci%d:%d:%d:%d domain %d mgaw %d agaw %d\n",
+ dmar->unit, dmar->segment, bus, slot,
+ func, ctx->domain, ctx->mgaw, ctx->agaw);
+ } else {
+ dmar_ctx_dtr(ctx1, true, true);
+ }
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+ }
+ ctx->refs++;
+ if ((ctx->flags & DMAR_CTX_RMRR) != 0)
+ ctx->refs++; /* XXXKIB */
+
+ /*
+ * If dmar declares Caching Mode as Set, follow 11.5 "Caching
+ * Mode Consideration" and do the (global) invalidation of the
+ * negative TLB entries.
+ */
+ if ((dmar->hw_cap & DMAR_CAP_CM) != 0 || enable) {
+ if (dmar->qi_enabled) {
+ dmar_qi_invalidate_ctx_glob_locked(dmar);
+ if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0)
+ dmar_qi_invalidate_iotlb_glob_locked(dmar);
+ } else {
+ error = dmar_inv_ctx_glob(dmar);
+ if (error == 0 &&
+ (dmar->hw_ecap & DMAR_ECAP_DI) != 0)
+ error = dmar_inv_iotlb_glob(dmar);
+ if (error != 0) {
+ dmar_free_ctx_locked(dmar, ctx);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ }
+ }
+
+ /*
+ * The dmar lock was potentially dropped between check for the
+ * empty context list and now. Recheck the state of GCMD_TE
+ * to avoid unneeded command.
+ */
+ if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
+ error = dmar_enable_translation(dmar);
+ if (error != 0) {
+ dmar_free_ctx_locked(dmar, ctx);
+ TD_PINNED_ASSERT;
+ return (NULL);
+ }
+ }
+ DMAR_UNLOCK(dmar);
+ TD_PINNED_ASSERT;
+ return (ctx);
+}
+
+void
+dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
+{
+ struct sf_buf *sf;
+ dmar_ctx_entry_t *ctxp;
+
+ DMAR_ASSERT_LOCKED(dmar);
+ KASSERT(ctx->refs >= 1,
+ ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
+
+ /*
+ * If our reference is not last, only the dereference should
+ * be performed.
+ */
+ if (ctx->refs > 1) {
+ ctx->refs--;
+ DMAR_UNLOCK(dmar);
+ return;
+ }
+
+ KASSERT((ctx->flags & DMAR_CTX_RMRR) == 0,
+ ("lost ref on RMRR ctx %p", ctx));
+ KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
+ ("lost ref on disabled ctx %p", ctx));
+
+ /*
+ * Otherwise, the context entry must be cleared before the
+ * page table is destroyed. The mapping of the context
+ * entries page could require sleep, unlock the dmar.
+ */
+ DMAR_UNLOCK(dmar);
+ TD_PREP_PINNED_ASSERT;
+ ctxp = dmar_map_ctx_entry(ctx, &sf);
+ DMAR_LOCK(dmar);
+ KASSERT(ctx->refs >= 1,
+ ("dmar %p ctx %p refs %u", dmar, ctx, ctx->refs));
+
+ /*
+ * Other thread might have referenced the context, in which
+ * case again only the dereference should be performed.
+ */
+ if (ctx->refs > 1) {
+ ctx->refs--;
+ DMAR_UNLOCK(dmar);
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+ TD_PINNED_ASSERT;
+ return;
+ }
+
+ KASSERT((ctx->flags & DMAR_CTX_RMRR) == 0,
+ ("lost ref on RMRR ctx %p", ctx));
+ KASSERT((ctx->flags & DMAR_CTX_DISABLED) == 0,
+ ("lost ref on disabled ctx %p", ctx));
+
+ /*
+ * Clear the context pointer and flush the caches.
+ * XXXKIB: cannot do this if any RMRR entries are still present.
+ */
+ dmar_pte_clear(&ctxp->ctx1);
+ ctxp->ctx2 = 0;
+ dmar_inv_ctx_glob(dmar);
+ if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
+ if (dmar->qi_enabled)
+ dmar_qi_invalidate_iotlb_glob_locked(dmar);
+ else
+ dmar_inv_iotlb_glob(dmar);
+ }
+ LIST_REMOVE(ctx, link);
+ DMAR_UNLOCK(dmar);
+
+ /*
+ * The rest of the destruction is invisible for other users of
+ * the dmar unit.
+ */
+ taskqueue_drain(dmar->delayed_taskqueue, &ctx->unload_task);
+ KASSERT(TAILQ_EMPTY(&ctx->unload_entries),
+ ("unfinished unloads %p", ctx));
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(dmar));
+ free_unr(dmar->domids, ctx->domain);
+ dmar_ctx_dtr(ctx, true, true);
+ TD_PINNED_ASSERT;
+}
+
+void
+dmar_free_ctx(struct dmar_ctx *ctx)
+{
+ struct dmar_unit *dmar;
+
+ dmar = ctx->dmar;
+ DMAR_LOCK(dmar);
+ dmar_free_ctx_locked(dmar, ctx);
+}
+
+struct dmar_ctx *
+dmar_find_ctx_locked(struct dmar_unit *dmar, int bus, int slot, int func)
+{
+ struct dmar_ctx *ctx;
+
+ DMAR_ASSERT_LOCKED(dmar);
+
+ LIST_FOREACH(ctx, &dmar->contexts, link) {
+ if (ctx->bus == bus && ctx->slot == slot && ctx->func == func)
+ return (ctx);
+ }
+ return (NULL);
+}
+
+void
+dmar_ctx_free_entry(struct dmar_map_entry *entry, bool free)
+{
+ struct dmar_ctx *ctx;
+
+ ctx = entry->ctx;
+ DMAR_CTX_LOCK(ctx);
+ if ((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0)
+ dmar_gas_free_region(ctx, entry);
+ else
+ dmar_gas_free_space(ctx, entry);
+ DMAR_CTX_UNLOCK(ctx);
+ if (free)
+ dmar_gas_free_entry(ctx, entry);
+ else
+ entry->flags = 0;
+}
+
+void
+dmar_ctx_unload_entry(struct dmar_map_entry *entry, bool free)
+{
+ struct dmar_unit *unit;
+
+ unit = entry->ctx->dmar;
+ if (unit->qi_enabled) {
+ DMAR_LOCK(unit);
+ dmar_qi_invalidate_locked(entry->ctx, entry->start,
+ entry->end - entry->start, &entry->gseq);
+ if (!free)
+ entry->flags |= DMAR_MAP_ENTRY_QI_NF;
+ TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
+ DMAR_UNLOCK(unit);
+ } else {
+ ctx_flush_iotlb_sync(entry->ctx, entry->start, entry->end -
+ entry->start);
+ dmar_ctx_free_entry(entry, free);
+ }
+}
+
+void
+dmar_ctx_unload(struct dmar_ctx *ctx, struct dmar_map_entries_tailq *entries,
+ bool cansleep)
+{
+ struct dmar_unit *unit;
+ struct dmar_map_entry *entry, *entry1;
+ struct dmar_qi_genseq gseq;
+ int error;
+
+ unit = ctx->dmar;
+
+ TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
+ KASSERT((entry->flags & DMAR_MAP_ENTRY_MAP) != 0,
+ ("not mapped entry %p %p", ctx, entry));
+ error = ctx_unmap_buf(ctx, entry->start, entry->end -
+ entry->start, cansleep ? DMAR_PGF_WAITOK : 0);
+ KASSERT(error == 0, ("unmap %p error %d", ctx, error));
+ if (!unit->qi_enabled) {
+ ctx_flush_iotlb_sync(ctx, entry->start,
+ entry->end - entry->start);
+ TAILQ_REMOVE(entries, entry, dmamap_link);
+ dmar_ctx_free_entry(entry, true);
+ }
+ }
+ if (TAILQ_EMPTY(entries))
+ return;
+
+ KASSERT(unit->qi_enabled, ("loaded entry left"));
+ DMAR_LOCK(unit);
+ TAILQ_FOREACH(entry, entries, dmamap_link) {
+ entry->gseq.gen = 0;
+ entry->gseq.seq = 0;
+ dmar_qi_invalidate_locked(ctx, entry->start, entry->end -
+ entry->start, TAILQ_NEXT(entry, dmamap_link) == NULL ?
+ &gseq : NULL);
+ }
+ TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
+ entry->gseq = gseq;
+ TAILQ_REMOVE(entries, entry, dmamap_link);
+ TAILQ_INSERT_TAIL(&unit->tlb_flush_entries, entry, dmamap_link);
+ }
+ DMAR_UNLOCK(unit);
+}
+
+static void
+dmar_ctx_unload_task(void *arg, int pending)
+{
+ struct dmar_ctx *ctx;
+ struct dmar_map_entries_tailq entries;
+
+ ctx = arg;
+ TAILQ_INIT(&entries);
+
+ for (;;) {
+ DMAR_CTX_LOCK(ctx);
+ TAILQ_SWAP(&ctx->unload_entries, &entries, dmar_map_entry,
+ dmamap_link);
+ DMAR_CTX_UNLOCK(ctx);
+ if (TAILQ_EMPTY(&entries))
+ break;
+ dmar_ctx_unload(ctx, &entries, true);
+ }
+}
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
new file mode 100644
index 0000000..994e5e1
--- /dev/null
+++ b/sys/x86/iommu/intel_dmar.h
@@ -0,0 +1,435 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __X86_IOMMU_INTEL_DMAR_H
+#define __X86_IOMMU_INTEL_DMAR_H
+
+/* Host or physical memory address, after translation. */
+typedef uint64_t dmar_haddr_t;
+/* Guest or bus address, before translation. */
+typedef uint64_t dmar_gaddr_t;
+
+struct dmar_qi_genseq {
+ u_int gen;
+ uint32_t seq;
+};
+
+struct dmar_map_entry {
+ dmar_gaddr_t start;
+ dmar_gaddr_t end;
+ dmar_gaddr_t free_after; /* Free space after the entry */
+ dmar_gaddr_t free_down; /* Max free space below the
+ current R/B tree node */
+ u_int flags;
+ TAILQ_ENTRY(dmar_map_entry) dmamap_link; /* Link for dmamap entries */
+ RB_ENTRY(dmar_map_entry) rb_entry; /* Links for ctx entries */
+ TAILQ_ENTRY(dmar_map_entry) unroll_link; /* Link for unroll after
+ dmamap_load failure */
+ struct dmar_ctx *ctx;
+ struct dmar_qi_genseq gseq;
+};
+
+RB_HEAD(dmar_gas_entries_tree, dmar_map_entry);
+RB_PROTOTYPE(dmar_gas_entries_tree, dmar_map_entry, rb_entry,
+ dmar_gas_cmp_entries);
+
+#define DMAR_MAP_ENTRY_PLACE 0x0001 /* Fake entry */
+#define DMAR_MAP_ENTRY_RMRR 0x0002 /* Permanent, not linked by
+ dmamap_link */
+#define DMAR_MAP_ENTRY_MAP 0x0004 /* Busdma created, linked by
+ dmamap_link */
+#define DMAR_MAP_ENTRY_UNMAPPED 0x0010 /* No backing pages */
+#define DMAR_MAP_ENTRY_QI_NF 0x0020 /* qi task, do not free entry */
+#define DMAR_MAP_ENTRY_READ 0x1000 /* Read permitted */
+#define DMAR_MAP_ENTRY_WRITE 0x2000 /* Write permitted */
+#define DMAR_MAP_ENTRY_SNOOP 0x4000 /* Snoop */
+#define DMAR_MAP_ENTRY_TM 0x8000 /* Transient */
+
+struct dmar_ctx {
+ int bus; /* pci bus/slot/func */
+ int slot;
+ int func;
+ int domain; /* DID */
+ int mgaw; /* Real max address width */
+ int agaw; /* Adjusted guest address width */
+ int pglvl; /* The pagelevel */
+ int awlvl; /* The pagelevel as the bitmask, to set in
+ context entry */
+ dmar_gaddr_t end;/* Highest address + 1 in the guest AS */
+ u_int refs; /* References to the context, from tags */
+ struct dmar_unit *dmar;
+ struct bus_dma_tag_dmar ctx_tag; /* Root tag */
+ struct mtx lock;
+ LIST_ENTRY(dmar_ctx) link; /* Member in the dmar list */
+ vm_object_t pgtbl_obj; /* Page table pages */
+ u_int flags; /* Protected by dmar lock */
+ uint64_t last_fault_rec[2]; /* Last fault reported */
+ u_int entries_cnt;
+ u_long loads;
+ u_long unloads;
+ struct dmar_gas_entries_tree rb_root;
+ struct dmar_map_entries_tailq unload_entries; /* Entries to unload */
+ struct dmar_map_entry *first_place, *last_place;
+ struct task unload_task;
+};
+
+/* struct dmar_ctx flags */
+#define DMAR_CTX_FAULTED 0x0001 /* Fault was reported,
+ last_fault_rec is valid */
+#define DMAR_CTX_IDMAP 0x0002 /* Context uses identity page table */
+#define DMAR_CTX_RMRR 0x0004 /* Context contains RMRR entry,
+ cannot be turned off */
+#define DMAR_CTX_DISABLED 0x0008 /* Device is disabled, the
+ ephemeral reference is kept
+ to prevent context destruction */
+
+#define DMAR_CTX_PGLOCK(ctx) VM_OBJECT_WLOCK((ctx)->pgtbl_obj)
+#define DMAR_CTX_PGTRYLOCK(ctx) VM_OBJECT_TRYWLOCK((ctx)->pgtbl_obj)
+#define DMAR_CTX_PGUNLOCK(ctx) VM_OBJECT_WUNLOCK((ctx)->pgtbl_obj)
+#define DMAR_CTX_ASSERT_PGLOCKED(ctx) \
+ VM_OBJECT_ASSERT_WLOCKED((ctx)->pgtbl_obj)
+
+#define DMAR_CTX_LOCK(ctx) mtx_lock(&(ctx)->lock)
+#define DMAR_CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->lock)
+#define DMAR_CTX_ASSERT_LOCKED(ctx) mtx_assert(&(ctx)->lock, MA_OWNED)
+
+struct dmar_msi_data {
+ int irq;
+ int irq_rid;
+ struct resource *irq_res;
+ void *intr_handle;
+ int (*handler)(void *);
+ int msi_data_reg;
+ int msi_addr_reg;
+ int msi_uaddr_reg;
+ void (*enable_intr)(struct dmar_unit *);
+ void (*disable_intr)(struct dmar_unit *);
+ const char *name;
+};
+
+#define DMAR_INTR_FAULT 0
+#define DMAR_INTR_QI 1
+#define DMAR_INTR_TOTAL 2
+
+struct dmar_unit {
+ device_t dev;
+ int unit;
+ uint16_t segment;
+ uint64_t base;
+
+ /* Resources */
+ int reg_rid;
+ struct resource *regs;
+
+ struct dmar_msi_data intrs[DMAR_INTR_TOTAL];
+
+ /* Hardware registers cache */
+ uint32_t hw_ver;
+ uint64_t hw_cap;
+ uint64_t hw_ecap;
+ uint32_t hw_gcmd;
+
+ /* Data for being a dmar */
+ struct mtx lock;
+ LIST_HEAD(, dmar_ctx) contexts;
+ struct unrhdr *domids;
+ vm_object_t ctx_obj;
+ u_int barrier_flags;
+
+ /* Fault handler data */
+ struct mtx fault_lock;
+ uint64_t *fault_log;
+ int fault_log_head;
+ int fault_log_tail;
+ int fault_log_size;
+ struct task fault_task;
+ struct taskqueue *fault_taskqueue;
+
+ /* QI */
+ int qi_enabled;
+ vm_offset_t inv_queue;
+ vm_size_t inv_queue_size;
+ uint32_t inv_queue_avail;
+ uint32_t inv_queue_tail;
+ volatile uint32_t inv_waitd_seq_hw; /* hw writes there on wait
+ descr completion */
+ uint64_t inv_waitd_seq_hw_phys;
+ uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */
+ u_int inv_waitd_gen; /* seq number generation AKA seq overflows */
+ u_int inv_seq_waiters; /* count of waiters for seq */
+ u_int inv_queue_full; /* informational counter */
+
+ /* Delayed freeing of map entries queue processing */
+ struct dmar_map_entries_tailq tlb_flush_entries;
+ struct task qi_task;
+ struct taskqueue *qi_taskqueue;
+
+ /* Busdma delayed map load */
+ struct task dmamap_load_task;
+ TAILQ_HEAD(, bus_dmamap_dmar) delayed_maps;
+ struct taskqueue *delayed_taskqueue;
+};
+
+#define DMAR_LOCK(dmar) mtx_lock(&(dmar)->lock)
+#define DMAR_UNLOCK(dmar) mtx_unlock(&(dmar)->lock)
+#define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->lock, MA_OWNED)
+
+#define DMAR_FAULT_LOCK(dmar) mtx_lock_spin(&(dmar)->fault_lock)
+#define DMAR_FAULT_UNLOCK(dmar) mtx_unlock_spin(&(dmar)->fault_lock)
+#define DMAR_FAULT_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->fault_lock, MA_OWNED)
+
+#define DMAR_IS_COHERENT(dmar) (((dmar)->hw_ecap & DMAR_ECAP_C) != 0)
+#define DMAR_HAS_QI(dmar) (((dmar)->hw_ecap & DMAR_ECAP_QI) != 0)
+
+/* Barrier ids */
+#define DMAR_BARRIER_RMRR 0
+#define DMAR_BARRIER_USEQ 1
+
+struct dmar_unit *dmar_find(device_t dev);
+
+u_int dmar_nd2mask(u_int nd);
+bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl);
+int ctx_set_agaw(struct dmar_ctx *ctx, int mgaw);
+int dmar_maxaddr2mgaw(struct dmar_unit* unit, dmar_gaddr_t maxaddr,
+ bool allow_less);
+vm_pindex_t pglvl_max_pages(int pglvl);
+int ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl);
+dmar_gaddr_t pglvl_page_size(int total_pglvl, int lvl);
+dmar_gaddr_t ctx_page_size(struct dmar_ctx *ctx, int lvl);
+int calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size,
+ dmar_gaddr_t *isizep);
+struct vm_page *dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags);
+void dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags);
+void *dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
+ struct sf_buf **sf);
+void dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent);
+int dmar_load_root_entry_ptr(struct dmar_unit *unit);
+int dmar_inv_ctx_glob(struct dmar_unit *unit);
+int dmar_inv_iotlb_glob(struct dmar_unit *unit);
+int dmar_flush_write_bufs(struct dmar_unit *unit);
+int dmar_enable_translation(struct dmar_unit *unit);
+int dmar_disable_translation(struct dmar_unit *unit);
+bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id);
+void dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id);
+
+int dmar_fault_intr(void *arg);
+void dmar_enable_fault_intr(struct dmar_unit *unit);
+void dmar_disable_fault_intr(struct dmar_unit *unit);
+int dmar_init_fault_log(struct dmar_unit *unit);
+void dmar_fini_fault_log(struct dmar_unit *unit);
+
+int dmar_qi_intr(void *arg);
+void dmar_enable_qi_intr(struct dmar_unit *unit);
+void dmar_disable_qi_intr(struct dmar_unit *unit);
+int dmar_init_qi(struct dmar_unit *unit);
+void dmar_fini_qi(struct dmar_unit *unit);
+void dmar_qi_invalidate_locked(struct dmar_ctx *ctx, dmar_gaddr_t start,
+ dmar_gaddr_t size, struct dmar_qi_genseq *pseq);
+void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit);
+void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit);
+
+vm_object_t ctx_get_idmap_pgtbl(struct dmar_ctx *ctx, dmar_gaddr_t maxaddr);
+void put_idmap_pgtbl(vm_object_t obj);
+int ctx_map_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ vm_page_t *ma, uint64_t pflags, int flags);
+int ctx_unmap_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ int flags);
+void ctx_flush_iotlb_sync(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ dmar_gaddr_t size);
+int ctx_alloc_pgtbl(struct dmar_ctx *ctx);
+void ctx_free_pgtbl(struct dmar_ctx *ctx);
+
+struct dmar_ctx *dmar_instantiate_ctx(struct dmar_unit *dmar, device_t dev,
+ bool rmrr);
+struct dmar_ctx *dmar_get_ctx(struct dmar_unit *dmar, device_t dev,
+ bool id_mapped, bool rmrr_init);
+void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
+void dmar_free_ctx(struct dmar_ctx *ctx);
+struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, int bus,
+ int slot, int func);
+void dmar_ctx_unload_entry(struct dmar_map_entry *entry, bool free);
+void dmar_ctx_unload(struct dmar_ctx *ctx,
+ struct dmar_map_entries_tailq *entries, bool cansleep);
+void dmar_ctx_free_entry(struct dmar_map_entry *entry, bool free);
+
+int dmar_init_busdma(struct dmar_unit *unit);
+void dmar_fini_busdma(struct dmar_unit *unit);
+
+void dmar_gas_init_ctx(struct dmar_ctx *ctx);
+void dmar_gas_fini_ctx(struct dmar_ctx *ctx);
+struct dmar_map_entry *dmar_gas_alloc_entry(struct dmar_ctx *ctx, u_int flags);
+void dmar_gas_free_entry(struct dmar_ctx *ctx, struct dmar_map_entry *entry);
+void dmar_gas_free_space(struct dmar_ctx *ctx, struct dmar_map_entry *entry);
+int dmar_gas_map(struct dmar_ctx *ctx, const struct bus_dma_tag_common *common,
+ dmar_gaddr_t size, u_int eflags, u_int flags, vm_page_t *ma,
+ struct dmar_map_entry **res);
+void dmar_gas_free_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry);
+int dmar_gas_map_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry,
+ u_int eflags, u_int flags, vm_page_t *ma);
+int dmar_gas_reserve_region(struct dmar_ctx *ctx, dmar_gaddr_t start,
+ dmar_gaddr_t end);
+
+void dmar_ctx_parse_rmrr(struct dmar_ctx *ctx, device_t dev,
+ struct dmar_map_entries_tailq *rmrr_entries);
+int dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar);
+
+void dmar_quirks_post_ident(struct dmar_unit *dmar);
+void dmar_quirks_pre_use(struct dmar_unit *dmar);
+
+#define DMAR_GM_CANWAIT 0x0001
+#define DMAR_GM_CANSPLIT 0x0002
+
+#define DMAR_PGF_WAITOK 0x0001
+#define DMAR_PGF_ZERO 0x0002
+#define DMAR_PGF_ALLOC 0x0004
+#define DMAR_PGF_NOALLOC 0x0008
+#define DMAR_PGF_OBJL 0x0010
+
+extern dmar_haddr_t dmar_high;
+extern int haw;
+extern int dmar_tbl_pagecnt;
+extern int dmar_match_verbose;
+extern int dmar_check_free;
+
+static inline uint32_t
+dmar_read4(const struct dmar_unit *unit, int reg)
+{
+
+ return (bus_read_4(unit->regs, reg));
+}
+
+static inline uint64_t
+dmar_read8(const struct dmar_unit *unit, int reg)
+{
+#ifdef __i386__
+ uint32_t high, low;
+
+ low = bus_read_4(unit->regs, reg);
+ high = bus_read_4(unit->regs, reg + 4);
+ return (low | ((uint64_t)high << 32));
+#else
+ return (bus_read_8(unit->regs, reg));
+#endif
+}
+
+static inline void
+dmar_write4(const struct dmar_unit *unit, int reg, uint32_t val)
+{
+
+ KASSERT(reg != DMAR_GCMD_REG || (val & DMAR_GCMD_TE) ==
+ (unit->hw_gcmd & DMAR_GCMD_TE),
+ ("dmar%d clearing TE 0x%08x 0x%08x", unit->unit,
+ unit->hw_gcmd, val));
+ bus_write_4(unit->regs, reg, val);
+}
+
+static inline void
+dmar_write8(const struct dmar_unit *unit, int reg, uint64_t val)
+{
+
+ KASSERT(reg != DMAR_GCMD_REG, ("8byte GCMD write"));
+#ifdef __i386__
+ uint32_t high, low;
+
+ low = val;
+ high = val >> 32;
+ bus_write_4(unit->regs, reg, low);
+ bus_write_4(unit->regs, reg + 4, high);
+#else
+ bus_write_8(unit->regs, reg, val);
+#endif
+}
+
+/*
+ * dmar_pte_store and dmar_pte_clear ensure that on i386, 32bit writes
+ * are issued in the correct order. For store, the lower word,
+ * containing the P or R and W bits, is set only after the high word
+ * is written. For clear, the P bit is cleared first, then the high
+ * word is cleared.
+ */
+static inline void
+dmar_pte_store(volatile uint64_t *dst, uint64_t val)
+{
+
+ KASSERT(*dst == 0, ("used pte %p oldval %jx newval %jx",
+ dst, (uintmax_t)*dst, (uintmax_t)val));
+#ifdef __i386__
+ volatile uint32_t *p;
+ uint32_t hi, lo;
+
+ hi = val >> 32;
+ lo = val;
+ p = (volatile uint32_t *)dst;
+ *(p + 1) = hi;
+ *p = lo;
+#else
+ *dst = val;
+#endif
+}
+
+static inline void
+dmar_pte_clear(volatile uint64_t *dst)
+{
+#ifdef __i386__
+ volatile uint32_t *p;
+
+ p = (volatile uint32_t *)dst;
+ *p = 0;
+ *(p + 1) = 0;
+#else
+ *dst = 0;
+#endif
+}
+
+static inline bool
+dmar_test_boundary(dmar_gaddr_t start, dmar_gaddr_t size,
+ dmar_gaddr_t boundary)
+{
+
+ if (boundary == 0)
+ return (true);
+ return (start + size <= ((start + boundary) & ~(boundary - 1)));
+}
+
+#ifdef INVARIANTS
+#define TD_PREP_PINNED_ASSERT \
+ int old_td_pinned; \
+ old_td_pinned = curthread->td_pinned
+#define TD_PINNED_ASSERT \
+ KASSERT(curthread->td_pinned == old_td_pinned, \
+ ("pin count leak: %d %d %s:%d", curthread->td_pinned, \
+ old_td_pinned, __FILE__, __LINE__))
+#else
+#define TD_PREP_PINNED_ASSERT
+#define TD_PINNED_ASSERT
+#endif
+
+#endif
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
new file mode 100644
index 0000000..a846b92
--- /dev/null
+++ b/sys/x86/iommu/intel_drv.c
@@ -0,0 +1,1182 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+#if defined(__amd64__) /* || defined(__ia64__) */
+#define DEV_APIC
+#else
+#include "opt_apic.h"
+#endif
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <machine/bus.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+#include <dev/pci/pcivar.h>
+
+#ifdef DEV_APIC
+#include "pcib_if.h"
+#endif
+
+#define DMAR_FAULT_IRQ_RID 0
+#define DMAR_QI_IRQ_RID 1
+#define DMAR_REG_RID 2
+
+static devclass_t dmar_devclass;
+static device_t *dmar_devs;
+static int dmar_devcnt;
+
+typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *);
+
+static void
+dmar_iterate_tbl(dmar_iter_t iter, void *arg)
+{
+ ACPI_TABLE_DMAR *dmartbl;
+ ACPI_DMAR_HEADER *dmarh;
+ char *ptr, *ptrend;
+ ACPI_STATUS status;
+
+ status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
+ if (ACPI_FAILURE(status))
+ return;
+ ptr = (char *)dmartbl + sizeof(*dmartbl);
+ ptrend = (char *)dmartbl + dmartbl->Header.Length;
+ for (;;) {
+ if (ptr >= ptrend)
+ break;
+ dmarh = (ACPI_DMAR_HEADER *)ptr;
+ if (dmarh->Length <= 0) {
+ printf("dmar_identify: corrupted DMAR table, l %d\n",
+ dmarh->Length);
+ break;
+ }
+ ptr += dmarh->Length;
+ if (!iter(dmarh, arg))
+ break;
+ }
+}
+
+struct find_iter_args {
+ int i;
+ ACPI_DMAR_HARDWARE_UNIT *res;
+};
+
+static int
+dmar_find_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+ struct find_iter_args *fia;
+
+ if (dmarh->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
+ return (1);
+
+ fia = arg;
+ if (fia->i == 0) {
+ fia->res = (ACPI_DMAR_HARDWARE_UNIT *)dmarh;
+ return (0);
+ }
+ fia->i--;
+ return (1);
+}
+
+static ACPI_DMAR_HARDWARE_UNIT *
+dmar_find_by_index(int idx)
+{
+ struct find_iter_args fia;
+
+ fia.i = idx;
+ fia.res = NULL;
+ dmar_iterate_tbl(dmar_find_iter, &fia);
+ return (fia.res);
+}
+
+static int
+dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+
+ if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_UNIT)
+ dmar_devcnt++;
+ return (1);
+}
+
+static int dmar_enable = 0;
+static void
+dmar_identify(driver_t *driver, device_t parent)
+{
+ ACPI_TABLE_DMAR *dmartbl;
+ ACPI_DMAR_HARDWARE_UNIT *dmarh;
+ ACPI_STATUS status;
+ int i, error;
+
+ if (acpi_disabled("dmar"))
+ return;
+ TUNABLE_INT_FETCH("hw.dmar.enable", &dmar_enable);
+ if (!dmar_enable)
+ return;
+#ifdef INVARIANTS
+ TUNABLE_INT_FETCH("hw.dmar.check_free", &dmar_check_free);
+#endif
+ TUNABLE_INT_FETCH("hw.dmar.match_verbose", &dmar_match_verbose);
+ status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
+ if (ACPI_FAILURE(status))
+ return;
+ haw = dmartbl->Width + 1;
+ if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR)
+ dmar_high = BUS_SPACE_MAXADDR;
+ else
+ dmar_high = 1ULL << (haw + 1);
+ if (bootverbose) {
+ printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width,
+ (unsigned)dmartbl->Flags,
+ "\020\001INTR_REMAP\002X2APIC_OPT_OUT");
+ }
+
+ dmar_iterate_tbl(dmar_count_iter, NULL);
+ if (dmar_devcnt == 0)
+ return;
+ dmar_devs = malloc(sizeof(device_t) * dmar_devcnt, M_DEVBUF,
+ M_WAITOK | M_ZERO);
+ for (i = 0; i < dmar_devcnt; i++) {
+ dmarh = dmar_find_by_index(i);
+ if (dmarh == NULL) {
+ printf("dmar_identify: cannot find HWUNIT %d\n", i);
+ continue;
+ }
+ dmar_devs[i] = BUS_ADD_CHILD(parent, 1, "dmar", i);
+ if (dmar_devs[i] == NULL) {
+ printf("dmar_identify: cannot create instance %d\n", i);
+ continue;
+ }
+ error = bus_set_resource(dmar_devs[i], SYS_RES_MEMORY,
+ DMAR_REG_RID, dmarh->Address, PAGE_SIZE);
+ if (error != 0) {
+ printf(
+ "dmar%d: unable to alloc register window at 0x%08jx: error %d\n",
+ i, (uintmax_t)dmarh->Address, error);
+ device_delete_child(parent, dmar_devs[i]);
+ dmar_devs[i] = NULL;
+ }
+ }
+}
+
+static int
+dmar_probe(device_t dev)
+{
+
+ if (acpi_get_handle(dev) != NULL)
+ return (ENXIO);
+ device_set_desc(dev, "DMA remap");
+ return (BUS_PROBE_NOWILDCARD);
+}
+
+static void
+dmar_release_intr(device_t dev, struct dmar_unit *unit, int idx)
+{
+ struct dmar_msi_data *dmd;
+
+ dmd = &unit->intrs[idx];
+ if (dmd->irq == -1)
+ return;
+ bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
+ bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
+ bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
+ PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
+ dev, dmd->irq);
+ dmd->irq = -1;
+}
+
+static void
+dmar_release_resources(device_t dev, struct dmar_unit *unit)
+{
+ int i;
+
+ dmar_fini_busdma(unit);
+ dmar_fini_qi(unit);
+ dmar_fini_fault_log(unit);
+ for (i = 0; i < DMAR_INTR_TOTAL; i++)
+ dmar_release_intr(dev, unit, i);
+ if (unit->regs != NULL) {
+ bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
+ unit->regs);
+ bus_release_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
+ unit->regs);
+ unit->regs = NULL;
+ }
+ if (unit->domids != NULL) {
+ delete_unrhdr(unit->domids);
+ unit->domids = NULL;
+ }
+ if (unit->ctx_obj != NULL) {
+ vm_object_deallocate(unit->ctx_obj);
+ unit->ctx_obj = NULL;
+ }
+}
+
+static int
+dmar_alloc_irq(device_t dev, struct dmar_unit *unit, int idx)
+{
+ device_t pcib;
+ struct dmar_msi_data *dmd;
+ uint64_t msi_addr;
+ uint32_t msi_data;
+ int error;
+
+ dmd = &unit->intrs[idx];
+ pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
+ error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq);
+ if (error != 0) {
+ device_printf(dev, "cannot allocate %s interrupt, %d\n",
+ dmd->name, error);
+ goto err1;
+ }
+ error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid,
+ dmd->irq, 1);
+ if (error != 0) {
+ device_printf(dev, "cannot set %s interrupt resource, %d\n",
+ dmd->name, error);
+ goto err2;
+ }
+ dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+ &dmd->irq_rid, RF_ACTIVE);
+ if (dmd->irq_res == NULL) {
+ device_printf(dev,
+ "cannot allocate resource for %s interrupt\n", dmd->name);
+ error = ENXIO;
+ goto err3;
+ }
+ error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC,
+ dmd->handler, NULL, unit, &dmd->intr_handle);
+ if (error != 0) {
+ device_printf(dev, "cannot setup %s interrupt, %d\n",
+ dmd->name, error);
+ goto err4;
+ }
+ bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, dmd->name);
+ error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data);
+ if (error != 0) {
+ device_printf(dev, "cannot map %s interrupt, %d\n",
+ dmd->name, error);
+ goto err5;
+ }
+ dmar_write4(unit, dmd->msi_data_reg, msi_data);
+ dmar_write4(unit, dmd->msi_addr_reg, msi_addr);
+ /* Only for xAPIC mode */
+ dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32);
+ return (0);
+
+err5:
+ bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
+err4:
+ bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
+err3:
+ bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
+err2:
+ PCIB_RELEASE_MSIX(pcib, dev, dmd->irq);
+ dmd->irq = -1;
+err1:
+ return (error);
+}
+
+#ifdef DEV_APIC
+static int
+dmar_remap_intr(device_t dev, device_t child, u_int irq)
+{
+ struct dmar_unit *unit;
+ struct dmar_msi_data *dmd;
+ uint64_t msi_addr;
+ uint32_t msi_data;
+ int i, error;
+
+ unit = device_get_softc(dev);
+ for (i = 0; i < DMAR_INTR_TOTAL; i++) {
+ dmd = &unit->intrs[i];
+ if (irq == dmd->irq) {
+ error = PCIB_MAP_MSI(device_get_parent(
+ device_get_parent(dev)),
+ dev, irq, &msi_addr, &msi_data);
+ if (error != 0)
+ return (error);
+ DMAR_LOCK(unit);
+ (dmd->disable_intr)(unit);
+ dmar_write4(unit, dmd->msi_data_reg, msi_data);
+ dmar_write4(unit, dmd->msi_addr_reg, msi_addr);
+ dmar_write4(unit, dmd->msi_uaddr_reg, msi_addr >> 32);
+ (dmd->enable_intr)(unit);
+ DMAR_UNLOCK(unit);
+ return (0);
+ }
+ }
+ return (ENOENT);
+}
+#endif
+
+static void
+dmar_print_caps(device_t dev, struct dmar_unit *unit,
+ ACPI_DMAR_HARDWARE_UNIT *dmaru)
+{
+ uint32_t caphi, ecaphi;
+
+ device_printf(dev, "regs@0x%08jx, ver=%d.%d, seg=%d, flags=<%b>\n",
+ (uintmax_t)dmaru->Address, DMAR_MAJOR_VER(unit->hw_ver),
+ DMAR_MINOR_VER(unit->hw_ver), dmaru->Segment,
+ dmaru->Flags, "\020\001INCLUDE_ALL_PCI");
+ caphi = unit->hw_cap >> 32;
+ device_printf(dev, "cap=%b,", (u_int)unit->hw_cap,
+ "\020\004AFL\005WBF\006PLMR\007PHMR\010CM\027ZLR\030ISOCH");
+ printf("%b, ", caphi, "\020\010PSI\027DWD\030DRD");
+ printf("ndoms=%d, sagaw=%d, mgaw=%d, fro=%d, nfr=%d, superp=%d",
+ DMAR_CAP_ND(unit->hw_cap), DMAR_CAP_SAGAW(unit->hw_cap),
+ DMAR_CAP_MGAW(unit->hw_cap), DMAR_CAP_FRO(unit->hw_cap),
+ DMAR_CAP_NFR(unit->hw_cap), DMAR_CAP_SPS(unit->hw_cap));
+ if ((unit->hw_cap & DMAR_CAP_PSI) != 0)
+ printf(", mamv=%d", DMAR_CAP_MAMV(unit->hw_cap));
+ printf("\n");
+ ecaphi = unit->hw_ecap >> 32;
+ device_printf(dev, "ecap=%b,", (u_int)unit->hw_ecap,
+ "\020\001C\002QI\003DI\004IR\005EIM\007PT\010SC");
+ printf("%b, ", ecaphi, "\020");
+ printf("mhmw=%d, iro=%d\n", DMAR_ECAP_MHMV(unit->hw_ecap),
+ DMAR_ECAP_IRO(unit->hw_ecap));
+}
+
+static int
+dmar_attach(device_t dev)
+{
+ struct dmar_unit *unit;
+ ACPI_DMAR_HARDWARE_UNIT *dmaru;
+ int i, error;
+
+ unit = device_get_softc(dev);
+ unit->dev = dev;
+ unit->unit = device_get_unit(dev);
+ dmaru = dmar_find_by_index(unit->unit);
+ if (dmaru == NULL)
+ return (EINVAL);
+ unit->segment = dmaru->Segment;
+ unit->base = dmaru->Address;
+ unit->reg_rid = DMAR_REG_RID;
+ unit->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+ &unit->reg_rid, RF_ACTIVE);
+ if (unit->regs == NULL) {
+ device_printf(dev, "cannot allocate register window\n");
+ return (ENOMEM);
+ }
+ unit->hw_ver = dmar_read4(unit, DMAR_VER_REG);
+ unit->hw_cap = dmar_read8(unit, DMAR_CAP_REG);
+ unit->hw_ecap = dmar_read8(unit, DMAR_ECAP_REG);
+ if (bootverbose)
+ dmar_print_caps(dev, unit, dmaru);
+ dmar_quirks_post_ident(unit);
+
+ for (i = 0; i < DMAR_INTR_TOTAL; i++)
+ unit->intrs[i].irq = -1;
+
+ unit->intrs[DMAR_INTR_FAULT].name = "fault";
+ unit->intrs[DMAR_INTR_FAULT].irq_rid = DMAR_FAULT_IRQ_RID;
+ unit->intrs[DMAR_INTR_FAULT].handler = dmar_fault_intr;
+ unit->intrs[DMAR_INTR_FAULT].msi_data_reg = DMAR_FEDATA_REG;
+ unit->intrs[DMAR_INTR_FAULT].msi_addr_reg = DMAR_FEADDR_REG;
+ unit->intrs[DMAR_INTR_FAULT].msi_uaddr_reg = DMAR_FEUADDR_REG;
+ unit->intrs[DMAR_INTR_FAULT].enable_intr = dmar_enable_fault_intr;
+ unit->intrs[DMAR_INTR_FAULT].disable_intr = dmar_disable_fault_intr;
+ error = dmar_alloc_irq(dev, unit, DMAR_INTR_FAULT);
+ if (error != 0) {
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ if (DMAR_HAS_QI(unit)) {
+ unit->intrs[DMAR_INTR_QI].name = "qi";
+ unit->intrs[DMAR_INTR_QI].irq_rid = DMAR_QI_IRQ_RID;
+ unit->intrs[DMAR_INTR_QI].handler = dmar_qi_intr;
+ unit->intrs[DMAR_INTR_QI].msi_data_reg = DMAR_IEDATA_REG;
+ unit->intrs[DMAR_INTR_QI].msi_addr_reg = DMAR_IEADDR_REG;
+ unit->intrs[DMAR_INTR_QI].msi_uaddr_reg = DMAR_IEUADDR_REG;
+ unit->intrs[DMAR_INTR_QI].enable_intr = dmar_enable_qi_intr;
+ unit->intrs[DMAR_INTR_QI].disable_intr = dmar_disable_qi_intr;
+ error = dmar_alloc_irq(dev, unit, DMAR_INTR_QI);
+ if (error != 0) {
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ }
+
+ mtx_init(&unit->lock, "dmarhw", NULL, MTX_DEF);
+ unit->domids = new_unrhdr(0, dmar_nd2mask(DMAR_CAP_ND(unit->hw_cap)),
+ &unit->lock);
+
+ /*
+ * 9.2 "Context Entry":
+ * When Caching Mode (CM) field is reported as Set, the
+ * domain-id value of zero is architecturally reserved.
+ * Software must not use domain-id value of zero
+ * when CM is Set.
+ */
+ if ((unit->hw_cap & DMAR_CAP_CM) != 0)
+ alloc_unr_specific(unit->domids, 0);
+
+ unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 +
+ DMAR_CTX_CNT), 0, 0, NULL);
+
+ /*
+ * Allocate and load the root entry table pointer. Enable the
+ * address translation after the required invalidations are
+ * done.
+ */
+ dmar_pgalloc(unit->ctx_obj, 0, DMAR_PGF_WAITOK | DMAR_PGF_ZERO);
+ DMAR_LOCK(unit);
+ error = dmar_load_root_entry_ptr(unit);
+ if (error != 0) {
+ DMAR_UNLOCK(unit);
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ error = dmar_inv_ctx_glob(unit);
+ if (error != 0) {
+ DMAR_UNLOCK(unit);
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) {
+ error = dmar_inv_iotlb_glob(unit);
+ if (error != 0) {
+ DMAR_UNLOCK(unit);
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ }
+
+ DMAR_UNLOCK(unit);
+ error = dmar_init_fault_log(unit);
+ if (error != 0) {
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ error = dmar_init_qi(unit);
+ if (error != 0) {
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ error = dmar_init_busdma(unit);
+ if (error != 0) {
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+
+#ifdef NOTYET
+ DMAR_LOCK(unit);
+ error = dmar_enable_translation(unit);
+ if (error != 0) {
+ DMAR_UNLOCK(unit);
+ dmar_release_resources(dev, unit);
+ return (error);
+ }
+ DMAR_UNLOCK(unit);
+#endif
+
+ return (0);
+}
+
+static int
+dmar_detach(device_t dev)
+{
+
+ return (EBUSY);
+}
+
+static int
+dmar_suspend(device_t dev)
+{
+
+ return (0);
+}
+
+static int
+dmar_resume(device_t dev)
+{
+
+ /* XXXKIB */
+ return (0);
+}
+
+static device_method_t dmar_methods[] = {
+ DEVMETHOD(device_identify, dmar_identify),
+ DEVMETHOD(device_probe, dmar_probe),
+ DEVMETHOD(device_attach, dmar_attach),
+ DEVMETHOD(device_detach, dmar_detach),
+ DEVMETHOD(device_suspend, dmar_suspend),
+ DEVMETHOD(device_resume, dmar_resume),
+#ifdef DEV_APIC
+ DEVMETHOD(bus_remap_intr, dmar_remap_intr),
+#endif
+ DEVMETHOD_END
+};
+
+static driver_t dmar_driver = {
+ "dmar",
+ dmar_methods,
+ sizeof(struct dmar_unit),
+};
+
+DRIVER_MODULE(dmar, acpi, dmar_driver, dmar_devclass, 0, 0);
+MODULE_DEPEND(dmar, acpi, 1, 1, 1);
+
+static void
+dmar_print_path(device_t dev, const char *banner, int busno, int depth,
+ const ACPI_DMAR_PCI_PATH *path)
+{
+ int i;
+
+ device_printf(dev, "%s [%d, ", banner, busno);
+ for (i = 0; i < depth; i++) {
+ if (i != 0)
+ printf(", ");
+ printf("(%d, %d)", path[i].Device, path[i].Function);
+ }
+ printf("]\n");
+}
+
+static int
+dmar_dev_depth(device_t child)
+{
+ devclass_t pci_class;
+ device_t bus, pcib;
+ int depth;
+
+ pci_class = devclass_find("pci");
+ for (depth = 1; ; depth++) {
+ bus = device_get_parent(child);
+ pcib = device_get_parent(bus);
+ if (device_get_devclass(device_get_parent(pcib)) !=
+ pci_class)
+ return (depth);
+ child = pcib;
+ }
+}
+
+static void
+dmar_dev_path(device_t child, int *busno, ACPI_DMAR_PCI_PATH *path, int depth)
+{
+ devclass_t pci_class;
+ device_t bus, pcib;
+
+ pci_class = devclass_find("pci");
+ for (depth--; depth != -1; depth--) {
+ path[depth].Device = pci_get_slot(child);
+ path[depth].Function = pci_get_function(child);
+ bus = device_get_parent(child);
+ pcib = device_get_parent(bus);
+ if (device_get_devclass(device_get_parent(pcib)) !=
+ pci_class) {
+ /* reached a host bridge */
+ *busno = pcib_get_bus(bus);
+ return;
+ }
+ child = pcib;
+ }
+ panic("wrong depth");
+}
+
+static int
+dmar_match_pathes(int busno1, const ACPI_DMAR_PCI_PATH *path1, int depth1,
+ int busno2, const ACPI_DMAR_PCI_PATH *path2, int depth2,
+ enum AcpiDmarScopeType scope_type)
+{
+ int i, depth;
+
+ if (busno1 != busno2)
+ return (0);
+ if (scope_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && depth1 != depth2)
+ return (0);
+ depth = depth1;
+ if (depth2 < depth)
+ depth = depth2;
+ for (i = 0; i < depth; i++) {
+ if (path1[i].Device != path2[i].Device ||
+ path1[i].Function != path2[i].Function)
+ return (0);
+ }
+ return (1);
+}
+
+static int
+dmar_match_devscope(ACPI_DMAR_DEVICE_SCOPE *devscope, device_t dev,
+ int dev_busno, const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len)
+{
+ ACPI_DMAR_PCI_PATH *path;
+ int path_len;
+
+ if (devscope->Length < sizeof(*devscope)) {
+ printf("dmar_find: corrupted DMAR table, dl %d\n",
+ devscope->Length);
+ return (-1);
+ }
+ if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
+ devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
+ return (0);
+ path_len = devscope->Length - sizeof(*devscope);
+ if (path_len % 2 != 0) {
+ printf("dmar_find_bsf: corrupted DMAR table, dl %d\n",
+ devscope->Length);
+ return (-1);
+ }
+ path_len /= 2;
+ path = (ACPI_DMAR_PCI_PATH *)(devscope + 1);
+ if (path_len == 0) {
+ printf("dmar_find: corrupted DMAR table, dl %d\n",
+ devscope->Length);
+ return (-1);
+ }
+ if (dmar_match_verbose)
+ dmar_print_path(dev, "DMAR", devscope->Bus, path_len, path);
+
+ return (dmar_match_pathes(devscope->Bus, path, path_len, dev_busno,
+ dev_path, dev_path_len, devscope->EntryType));
+}
+
+struct dmar_unit *
+dmar_find(device_t dev)
+{
+ device_t dmar_dev;
+ ACPI_DMAR_HARDWARE_UNIT *dmarh;
+ ACPI_DMAR_DEVICE_SCOPE *devscope;
+ char *ptr, *ptrend;
+ int i, match, dev_domain, dev_busno, dev_path_len;
+
+ dmar_dev = NULL;
+ dev_domain = pci_get_domain(dev);
+ dev_path_len = dmar_dev_depth(dev);
+ ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
+ dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
+ if (dmar_match_verbose)
+ dmar_print_path(dev, "PCI", dev_busno, dev_path_len, dev_path);
+
+ for (i = 0; i < dmar_devcnt; i++) {
+ if (dmar_devs[i] == NULL)
+ continue;
+ dmarh = dmar_find_by_index(i);
+ if (dmarh == NULL)
+ continue;
+ if (dmarh->Segment != dev_domain)
+ continue;
+ if ((dmarh->Flags & ACPI_DMAR_INCLUDE_ALL) != 0) {
+ dmar_dev = dmar_devs[i];
+ if (dmar_match_verbose) {
+ device_printf(dev,
+ "pci%d:%d:%d:%d matched dmar%d INCLUDE_ALL\n",
+ dev_domain, pci_get_bus(dev),
+ pci_get_slot(dev),
+ pci_get_function(dev),
+ ((struct dmar_unit *)device_get_softc(
+ dmar_dev))->unit);
+ }
+ goto found;
+ }
+ ptr = (char *)dmarh + sizeof(*dmarh);
+ ptrend = (char *)dmarh + dmarh->Header.Length;
+ for (;;) {
+ if (ptr >= ptrend)
+ break;
+ devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
+ ptr += devscope->Length;
+ if (dmar_match_verbose) {
+ device_printf(dev,
+ "pci%d:%d:%d:%d matching dmar%d\n",
+ dev_domain, pci_get_bus(dev),
+ pci_get_slot(dev),
+ pci_get_function(dev),
+ ((struct dmar_unit *)device_get_softc(
+ dmar_devs[i]))->unit);
+ }
+ match = dmar_match_devscope(devscope, dev, dev_busno,
+ dev_path, dev_path_len);
+ if (dmar_match_verbose) {
+ if (match == -1)
+ printf("table error\n");
+ else if (match == 0)
+ printf("not matched\n");
+ else
+ printf("matched\n");
+ }
+ if (match == -1)
+ return (NULL);
+ else if (match == 1) {
+ dmar_dev = dmar_devs[i];
+ goto found;
+ }
+ }
+ }
+ return (NULL);
+found:
+ return (device_get_softc(dmar_dev));
+}
+
+struct rmrr_iter_args {
+ struct dmar_ctx *ctx;
+ device_t dev;
+ int dev_domain;
+ int dev_busno;
+ ACPI_DMAR_PCI_PATH *dev_path;
+ int dev_path_len;
+ struct dmar_map_entries_tailq *rmrr_entries;
+};
+
+static int
+dmar_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+ struct rmrr_iter_args *ria;
+ ACPI_DMAR_RESERVED_MEMORY *resmem;
+ ACPI_DMAR_DEVICE_SCOPE *devscope;
+ struct dmar_map_entry *entry;
+ char *ptr, *ptrend;
+ int match;
+
+ if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
+ return (1);
+
+ ria = arg;
+ resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
+ if (dmar_match_verbose) {
+ printf("RMRR [%jx,%jx] segment %d\n",
+ (uintmax_t)resmem->BaseAddress,
+ (uintmax_t)resmem->EndAddress,
+ resmem->Segment);
+ }
+ if (resmem->Segment != ria->dev_domain)
+ return (1);
+
+ ptr = (char *)resmem + sizeof(*resmem);
+ ptrend = (char *)resmem + resmem->Header.Length;
+ for (;;) {
+ if (ptr >= ptrend)
+ break;
+ devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
+ ptr += devscope->Length;
+ match = dmar_match_devscope(devscope, ria->dev, ria->dev_busno,
+ ria->dev_path, ria->dev_path_len);
+ if (match == 1) {
+ if (dmar_match_verbose)
+ printf("matched\n");
+ entry = dmar_gas_alloc_entry(ria->ctx, DMAR_PGF_WAITOK);
+ entry->start = resmem->BaseAddress;
+ /* The RMRR entry end address is inclusive. */
+ entry->end = resmem->EndAddress;
+ TAILQ_INSERT_TAIL(ria->rmrr_entries, entry,
+ unroll_link);
+ } else if (dmar_match_verbose) {
+ printf("not matched, err %d\n", match);
+ }
+ }
+
+ return (1);
+}
+
+void
+dmar_ctx_parse_rmrr(struct dmar_ctx *ctx, device_t dev,
+ struct dmar_map_entries_tailq *rmrr_entries)
+{
+ struct rmrr_iter_args ria;
+
+ ria.dev_domain = pci_get_domain(dev);
+ ria.dev_path_len = dmar_dev_depth(dev);
+ ACPI_DMAR_PCI_PATH dev_path[ria.dev_path_len];
+ dmar_dev_path(dev, &ria.dev_busno, dev_path, ria.dev_path_len);
+
+ if (dmar_match_verbose) {
+ device_printf(dev, "parsing RMRR entries for ");
+ dmar_print_path(dev, "PCI", ria.dev_busno, ria.dev_path_len,
+ dev_path);
+ }
+
+ ria.ctx = ctx;
+ ria.dev = dev;
+ ria.dev_path = dev_path;
+ ria.rmrr_entries = rmrr_entries;
+ dmar_iterate_tbl(dmar_rmrr_iter, &ria);
+}
+
+struct inst_rmrr_iter_args {
+ struct dmar_unit *dmar;
+};
+
+static device_t
+dmar_path_dev(int segment, int path_len, int busno,
+ const ACPI_DMAR_PCI_PATH *path)
+{
+ devclass_t pci_class;
+ device_t bus, pcib, dev;
+ int i;
+
+ pci_class = devclass_find("pci");
+ dev = NULL;
+ for (i = 0; i < path_len; i++, path++) {
+ dev = pci_find_dbsf(segment, busno, path->Device,
+ path->Function);
+ if (dev == NULL)
+ break;
+ if (i != path_len - 1) {
+ bus = device_get_parent(dev);
+ pcib = device_get_parent(bus);
+ if (device_get_devclass(device_get_parent(pcib)) !=
+ pci_class)
+ return (NULL);
+ }
+ busno = pcib_get_bus(dev);
+ }
+ return (dev);
+}
+
+static int
+dmar_inst_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
+{
+ const ACPI_DMAR_RESERVED_MEMORY *resmem;
+ const ACPI_DMAR_DEVICE_SCOPE *devscope;
+ struct inst_rmrr_iter_args *iria;
+ const char *ptr, *ptrend;
+ struct dmar_unit *dev_dmar;
+ device_t dev;
+
+ if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
+ return (1);
+
+ iria = arg;
+ resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
+ if (resmem->Segment != iria->dmar->segment)
+ return (1);
+ if (dmar_match_verbose) {
+ printf("dmar%d: RMRR [%jx,%jx]\n", iria->dmar->unit,
+ (uintmax_t)resmem->BaseAddress,
+ (uintmax_t)resmem->EndAddress);
+ }
+
+ ptr = (const char *)resmem + sizeof(*resmem);
+ ptrend = (const char *)resmem + resmem->Header.Length;
+ for (;;) {
+ if (ptr >= ptrend)
+ break;
+ devscope = (const ACPI_DMAR_DEVICE_SCOPE *)ptr;
+ ptr += devscope->Length;
+ /* XXXKIB bridge */
+ if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT)
+ continue;
+ if (dmar_match_verbose) {
+ dmar_print_path(iria->dmar->dev, "RMRR scope",
+ devscope->Bus, (devscope->Length -
+ sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2,
+ (const ACPI_DMAR_PCI_PATH *)(devscope + 1));
+ }
+ dev = dmar_path_dev(resmem->Segment, (devscope->Length -
+ sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2, devscope->Bus,
+ (const ACPI_DMAR_PCI_PATH *)(devscope + 1));
+ if (dev == NULL) {
+ if (dmar_match_verbose)
+ printf("null dev\n");
+ continue;
+ }
+ dev_dmar = dmar_find(dev);
+ if (dev_dmar != iria->dmar) {
+ if (dmar_match_verbose) {
+ printf("dmar%d matched, skipping\n",
+ dev_dmar->unit);
+ }
+ continue;
+ }
+ if (dmar_match_verbose)
+ printf("matched, instantiating RMRR context\n");
+ dmar_instantiate_ctx(iria->dmar, dev, true);
+ }
+
+ return (1);
+
+}
+
+/*
+ * Pre-create all contexts for the DMAR which have RMRR entries.
+ */
+int
+dmar_instantiate_rmrr_ctxs(struct dmar_unit *dmar)
+{
+ struct inst_rmrr_iter_args iria;
+ int error;
+
+ if (!dmar_barrier_enter(dmar, DMAR_BARRIER_RMRR))
+ return (0);
+
+ error = 0;
+ iria.dmar = dmar;
+ if (dmar_match_verbose)
+ printf("dmar%d: instantiating RMRR contexts\n", dmar->unit);
+ dmar_iterate_tbl(dmar_inst_rmrr_iter, &iria);
+ DMAR_LOCK(dmar);
+ if (!LIST_EMPTY(&dmar->contexts)) {
+ KASSERT((dmar->hw_gcmd & DMAR_GCMD_TE) == 0,
+ ("dmar%d: RMRR not handled but translation is already enabled",
+ dmar->unit));
+ error = dmar_enable_translation(dmar);
+ }
+ dmar_barrier_exit(dmar, DMAR_BARRIER_RMRR);
+ return (error);
+}
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <ddb/db_lex.h>
+
+static void
+dmar_print_ctx_entry(const struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *l, *r;
+
+ db_printf(
+ " start %jx end %jx free_after %jx free_down %jx flags %x ",
+ entry->start, entry->end, entry->free_after, entry->free_down,
+ entry->flags);
+ db_printf("left ");
+ l = RB_LEFT(entry, rb_entry);
+ if (l == NULL)
+ db_printf("NULL ");
+ else
+ db_printf("%jx ", l->start);
+ db_printf("right ");
+ r = RB_RIGHT(entry, rb_entry);
+ if (r == NULL)
+ db_printf("NULL");
+ else
+ db_printf("%jx", r->start);
+ db_printf("\n");
+}
+
+static void
+dmar_print_ctx(struct dmar_ctx *ctx, bool show_mappings)
+{
+ struct dmar_map_entry *entry;
+
+ db_printf(
+ " @%p pci%d:%d:%d dom %d mgaw %d agaw %d pglvl %d end %jx\n"
+ " refs %d flags %x pgobj %p map_ents %u loads %lu unloads %lu\n",
+ ctx, ctx->bus, ctx->slot, ctx->func, ctx->domain, ctx->mgaw,
+ ctx->agaw, ctx->pglvl, (uintmax_t)ctx->end, ctx->refs,
+ ctx->flags, ctx->pgtbl_obj, ctx->entries_cnt, ctx->loads,
+ ctx->unloads);
+ if (!show_mappings)
+ return;
+ db_printf(" mapped:\n");
+ RB_FOREACH(entry, dmar_gas_entries_tree, &ctx->rb_root) {
+ dmar_print_ctx_entry(entry);
+ if (db_pager_quit)
+ break;
+ }
+ if (db_pager_quit)
+ return;
+ db_printf(" unloading:\n");
+ TAILQ_FOREACH(entry, &ctx->unload_entries, dmamap_link) {
+ dmar_print_ctx_entry(entry);
+ if (db_pager_quit)
+ break;
+ }
+}
+
+DB_FUNC(dmar_ctx, db_dmar_print_ctx, db_show_table, CS_OWN, NULL)
+{
+ struct dmar_unit *unit;
+ struct dmar_ctx *ctx;
+ bool show_mappings, valid;
+ int domain, bus, device, function, i, t;
+ db_expr_t radix;
+
+ valid = false;
+ radix = db_radix;
+ db_radix = 10;
+ t = db_read_token();
+ if (t == tSLASH) {
+ t = db_read_token();
+ if (t != tIDENT) {
+ db_printf("Bad modifier\n");
+ db_radix = radix;
+ db_skip_to_eol();
+ return;
+ }
+ show_mappings = strchr(db_tok_string, 'm') != NULL;
+ t = db_read_token();
+ } else {
+ show_mappings = false;
+ }
+ if (t == tNUMBER) {
+ domain = db_tok_number;
+ t = db_read_token();
+ if (t == tNUMBER) {
+ bus = db_tok_number;
+ t = db_read_token();
+ if (t == tNUMBER) {
+ device = db_tok_number;
+ t = db_read_token();
+ if (t == tNUMBER) {
+ function = db_tok_number;
+ valid = true;
+ }
+ }
+ }
+ }
+ db_radix = radix;
+ db_skip_to_eol();
+ if (!valid) {
+ db_printf("usage: show dmar_ctx [/m] "
+ "<domain> <bus> <device> <func>\n");
+ return;
+ }
+ for (i = 0; i < dmar_devcnt; i++) {
+ unit = device_get_softc(dmar_devs[i]);
+ LIST_FOREACH(ctx, &unit->contexts, link) {
+ if (domain == unit->segment && bus == ctx->bus &&
+ device == ctx->slot && function == ctx->func) {
+ dmar_print_ctx(ctx, show_mappings);
+ goto out;
+ }
+ }
+ }
+out:;
+}
+
+static void
+dmar_print_one(int idx, bool show_ctxs, bool show_mappings)
+{
+ struct dmar_unit *unit;
+ struct dmar_ctx *ctx;
+ int i, frir;
+
+ unit = device_get_softc(dmar_devs[idx]);
+ db_printf("dmar%d at %p, root at 0x%jx, ver 0x%x\n", unit->unit, unit,
+ dmar_read8(unit, DMAR_RTADDR_REG), dmar_read4(unit, DMAR_VER_REG));
+ db_printf("cap 0x%jx ecap 0x%jx gsts 0x%x fsts 0x%x fectl 0x%x\n",
+ (uintmax_t)dmar_read8(unit, DMAR_CAP_REG),
+ (uintmax_t)dmar_read8(unit, DMAR_ECAP_REG),
+ dmar_read4(unit, DMAR_GSTS_REG),
+ dmar_read4(unit, DMAR_FSTS_REG),
+ dmar_read4(unit, DMAR_FECTL_REG));
+ db_printf("fed 0x%x fea 0x%x feua 0x%x\n",
+ dmar_read4(unit, DMAR_FEDATA_REG),
+ dmar_read4(unit, DMAR_FEADDR_REG),
+ dmar_read4(unit, DMAR_FEUADDR_REG));
+ db_printf("primary fault log:\n");
+ for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) {
+ frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16;
+ db_printf(" %d at 0x%x: %jx %jx\n", i, frir,
+ (uintmax_t)dmar_read8(unit, frir),
+ (uintmax_t)dmar_read8(unit, frir + 8));
+ }
+ if (DMAR_HAS_QI(unit)) {
+ db_printf("ied 0x%x iea 0x%x ieua 0x%x\n",
+ dmar_read4(unit, DMAR_IEDATA_REG),
+ dmar_read4(unit, DMAR_IEADDR_REG),
+ dmar_read4(unit, DMAR_IEUADDR_REG));
+ if (unit->qi_enabled) {
+ db_printf("qi is enabled: queue @0x%jx (IQA 0x%jx) "
+ "size 0x%jx\n"
+ " head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n"
+ " hw compl 0x%x@%p/phys@%jx next seq 0x%x gen 0x%x\n",
+ (uintmax_t)unit->inv_queue,
+ (uintmax_t)dmar_read8(unit, DMAR_IQA_REG),
+ (uintmax_t)unit->inv_queue_size,
+ dmar_read4(unit, DMAR_IQH_REG),
+ dmar_read4(unit, DMAR_IQT_REG),
+ unit->inv_queue_avail,
+ dmar_read4(unit, DMAR_ICS_REG),
+ dmar_read4(unit, DMAR_IECTL_REG),
+ unit->inv_waitd_seq_hw,
+ &unit->inv_waitd_seq_hw,
+ (uintmax_t)unit->inv_waitd_seq_hw_phys,
+ unit->inv_waitd_seq,
+ unit->inv_waitd_gen);
+ } else {
+ db_printf("qi is disabled\n");
+ }
+ }
+ if (show_ctxs) {
+ db_printf("contexts:\n");
+ LIST_FOREACH(ctx, &unit->contexts, link) {
+ dmar_print_ctx(ctx, show_mappings);
+ if (db_pager_quit)
+ break;
+ }
+ }
+}
+
+DB_SHOW_COMMAND(dmar, db_dmar_print)
+{
+ bool show_ctxs, show_mappings;
+
+ show_ctxs = strchr(modif, 'c') != NULL;
+ show_mappings = strchr(modif, 'm') != NULL;
+ if (!have_addr) {
+ db_printf("usage: show dmar [/c] [/m] index\n");
+ return;
+ }
+ dmar_print_one((int)addr, show_ctxs, show_mappings);
+}
+
+DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars)
+{
+ int i;
+ bool show_ctxs, show_mappings;
+
+ show_ctxs = strchr(modif, 'c') != NULL;
+ show_mappings = strchr(modif, 'm') != NULL;
+
+ for (i = 0; i < dmar_devcnt; i++) {
+ dmar_print_one(i, show_ctxs, show_mappings);
+ if (db_pager_quit)
+ break;
+ }
+}
+#endif
diff --git a/sys/x86/iommu/intel_fault.c b/sys/x86/iommu/intel_fault.c
new file mode 100644
index 0000000..18f8fef
--- /dev/null
+++ b/sys/x86/iommu/intel_fault.c
@@ -0,0 +1,315 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <machine/bus.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+/*
+ * Fault interrupt handling for DMARs. If advanced fault logging is
+ * not implemented by hardware, the code emulates it. Fast interrupt
+ * handler flushes the fault registers into circular buffer at
+ * unit->fault_log, and schedules a task.
+ *
+ * The fast handler is used since faults usually come in bursts, and
+ * number of fault log registers is limited, e.g. down to one for 5400
+ * MCH. We are trying to reduce the latency for clearing the fault
+ * register file. The task is usually long-running, since printf() is
+ * slow, but this is not problematic because bursts are rare.
+ *
+ * For the same reason, each translation unit task is executed in its
+ * own thread.
+ *
+ * XXXKIB It seems there is no hardware available which implements
+ * advanced fault logging, so the code to handle AFL is not written.
+ */
+
+static int
+dmar_fault_next(struct dmar_unit *unit, int faultp)
+{
+
+ faultp += 2;
+ if (faultp == unit->fault_log_size)
+ faultp = 0;
+ return (faultp);
+}
+
+static void
+dmar_fault_intr_clear(struct dmar_unit *unit, uint32_t fsts)
+{
+ uint32_t clear;
+
+ clear = 0;
+ if ((fsts & DMAR_FSTS_ITE) != 0) {
+ printf("DMAR%d: Invalidation timed out\n", unit->unit);
+ clear |= DMAR_FSTS_ITE;
+ }
+ if ((fsts & DMAR_FSTS_ICE) != 0) {
+ printf("DMAR%d: Invalidation completion error\n",
+ unit->unit);
+ clear |= DMAR_FSTS_ICE;
+ }
+ if ((fsts & DMAR_FSTS_IQE) != 0) {
+ printf("DMAR%d: Invalidation queue error\n",
+ unit->unit);
+ clear |= DMAR_FSTS_IQE;
+ }
+ if ((fsts & DMAR_FSTS_APF) != 0) {
+ printf("DMAR%d: Advanced pending fault\n", unit->unit);
+ clear |= DMAR_FSTS_APF;
+ }
+ if ((fsts & DMAR_FSTS_AFO) != 0) {
+ printf("DMAR%d: Advanced fault overflow\n", unit->unit);
+ clear |= DMAR_FSTS_AFO;
+ }
+ if (clear != 0)
+ dmar_write4(unit, DMAR_FSTS_REG, clear);
+}
+
+int
+dmar_fault_intr(void *arg)
+{
+ struct dmar_unit *unit;
+ uint64_t fault_rec[2];
+ uint32_t fsts;
+ int fri, frir, faultp;
+ bool enqueue;
+
+ unit = arg;
+ enqueue = false;
+ fsts = dmar_read4(unit, DMAR_FSTS_REG);
+ dmar_fault_intr_clear(unit, fsts);
+
+ if ((fsts & DMAR_FSTS_PPF) == 0)
+ goto done;
+
+ fri = DMAR_FSTS_FRI(fsts);
+ for (;;) {
+ frir = (DMAR_CAP_FRO(unit->hw_cap) + fri) * 16;
+ fault_rec[1] = dmar_read8(unit, frir + 8);
+ if ((fault_rec[1] & DMAR_FRCD2_F) == 0)
+ break;
+ fault_rec[0] = dmar_read8(unit, frir);
+ dmar_write4(unit, frir + 12, DMAR_FRCD2_F32);
+ DMAR_FAULT_LOCK(unit);
+ faultp = unit->fault_log_head;
+ if (dmar_fault_next(unit, faultp) == unit->fault_log_tail) {
+ /* XXXKIB log overflow */
+ } else {
+ unit->fault_log[faultp] = fault_rec[0];
+ unit->fault_log[faultp + 1] = fault_rec[1];
+ unit->fault_log_head = dmar_fault_next(unit, faultp);
+ enqueue = true;
+ }
+ DMAR_FAULT_UNLOCK(unit);
+ fri += 1;
+ if (fri >= DMAR_CAP_NFR(unit->hw_cap))
+ fri = 0;
+ }
+
+done:
+ /*
+ * On SandyBridge, due to errata BJ124, IvyBridge errata
+ * BV100, and Haswell errata HSD40, "Spurious Intel VT-d
+ * Interrupts May Occur When the PFO Bit is Set". Handle the
+ * cases by clearing overflow bit even if no fault is
+ * reported.
+ *
+ * On IvyBridge, errata BV30 states that clearing clear
+ * DMAR_FRCD2_F bit in the fault register causes spurious
+ * interrupt. Do nothing.
+ *
+ */
+ if ((fsts & DMAR_FSTS_PFO) != 0) {
+ printf("DMAR%d: Fault Overflow\n", unit->unit);
+ dmar_write4(unit, DMAR_FSTS_REG, DMAR_FSTS_PFO);
+ }
+
+ if (enqueue) {
+ taskqueue_enqueue_fast(unit->fault_taskqueue,
+ &unit->fault_task);
+ }
+ return (FILTER_HANDLED);
+}
+
+static void
+dmar_fault_task(void *arg, int pending __unused)
+{
+ struct dmar_unit *unit;
+ struct dmar_ctx *ctx;
+ uint64_t fault_rec[2];
+ int sid, bus, slot, func, faultp;
+
+ unit = arg;
+ DMAR_FAULT_LOCK(unit);
+ for (;;) {
+ faultp = unit->fault_log_tail;
+ if (faultp == unit->fault_log_head)
+ break;
+
+ fault_rec[0] = unit->fault_log[faultp];
+ fault_rec[1] = unit->fault_log[faultp + 1];
+ unit->fault_log_tail = dmar_fault_next(unit, faultp);
+ DMAR_FAULT_UNLOCK(unit);
+
+ sid = DMAR_FRCD2_SID(fault_rec[1]);
+ bus = (sid >> 8) & 0xf;
+ slot = (sid >> 3) & 0x1f;
+ func = sid & 0x7;
+ printf("DMAR%d: ", unit->unit);
+ DMAR_LOCK(unit);
+ ctx = dmar_find_ctx_locked(unit, bus, slot, func);
+ if (ctx == NULL) {
+ printf("<unknown dev>:");
+ } else {
+ ctx->flags |= DMAR_CTX_FAULTED;
+ ctx->last_fault_rec[0] = fault_rec[0];
+ ctx->last_fault_rec[1] = fault_rec[1];
+ device_print_prettyname(ctx->ctx_tag.owner);
+ }
+ DMAR_UNLOCK(unit);
+ printf(
+ "pci%d:%d:%d fault acc %x adt 0x%x reason 0x%x addr %jx\n",
+ bus, slot, func, DMAR_FRCD2_T(fault_rec[1]),
+ DMAR_FRCD2_AT(fault_rec[1]), DMAR_FRCD2_FR(fault_rec[1]),
+ (uintmax_t)fault_rec[0]);
+ DMAR_FAULT_LOCK(unit);
+ }
+ DMAR_FAULT_UNLOCK(unit);
+}
+
+static void
+dmar_clear_faults(struct dmar_unit *unit)
+{
+ uint32_t frec, frir, fsts;
+ int i;
+
+ for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) {
+ frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16;
+ frec = dmar_read4(unit, frir + 12);
+ if ((frec & DMAR_FRCD2_F32) == 0)
+ continue;
+ dmar_write4(unit, frir + 12, DMAR_FRCD2_F32);
+ }
+ fsts = dmar_read4(unit, DMAR_FSTS_REG);
+ dmar_write4(unit, DMAR_FSTS_REG, fsts);
+}
+
+int
+dmar_init_fault_log(struct dmar_unit *unit)
+{
+
+ mtx_init(&unit->fault_lock, "dmarflt", NULL, MTX_SPIN);
+ unit->fault_log_size = 256; /* 128 fault log entries */
+ TUNABLE_INT_FETCH("hw.dmar.fault_log_size", &unit->fault_log_size);
+ if (unit->fault_log_size % 2 != 0)
+ panic("hw.dmar_fault_log_size must be even");
+ unit->fault_log = malloc(sizeof(uint64_t) * unit->fault_log_size,
+ M_DEVBUF, M_WAITOK | M_ZERO);
+
+ TASK_INIT(&unit->fault_task, 0, dmar_fault_task, unit);
+ unit->fault_taskqueue = taskqueue_create_fast("dmar", M_WAITOK,
+ taskqueue_thread_enqueue, &unit->fault_taskqueue);
+ taskqueue_start_threads(&unit->fault_taskqueue, 1, PI_AV,
+ "dmar%d fault taskq", unit->unit);
+
+ DMAR_LOCK(unit);
+ dmar_disable_fault_intr(unit);
+ dmar_clear_faults(unit);
+ dmar_enable_fault_intr(unit);
+ DMAR_UNLOCK(unit);
+
+ return (0);
+}
+
+void
+dmar_fini_fault_log(struct dmar_unit *unit)
+{
+
+ DMAR_LOCK(unit);
+ dmar_disable_fault_intr(unit);
+ DMAR_UNLOCK(unit);
+
+ if (unit->fault_taskqueue == NULL)
+ return;
+
+ taskqueue_drain(unit->fault_taskqueue, &unit->fault_task);
+ taskqueue_free(unit->fault_taskqueue);
+ unit->fault_taskqueue = NULL;
+ mtx_destroy(&unit->fault_lock);
+
+ free(unit->fault_log, M_DEVBUF);
+ unit->fault_log = NULL;
+ unit->fault_log_head = unit->fault_log_tail = 0;
+}
+
+void
+dmar_enable_fault_intr(struct dmar_unit *unit)
+{
+ uint32_t fectl;
+
+ DMAR_ASSERT_LOCKED(unit);
+ fectl = dmar_read4(unit, DMAR_FECTL_REG);
+ fectl &= ~DMAR_FECTL_IM;
+ dmar_write4(unit, DMAR_FECTL_REG, fectl);
+}
+
+void
+dmar_disable_fault_intr(struct dmar_unit *unit)
+{
+ uint32_t fectl;
+
+ DMAR_ASSERT_LOCKED(unit);
+ fectl = dmar_read4(unit, DMAR_FECTL_REG);
+ dmar_write4(unit, DMAR_FECTL_REG, fectl | DMAR_FECTL_IM);
+}
diff --git a/sys/x86/iommu/intel_gas.c b/sys/x86/iommu/intel_gas.c
new file mode 100644
index 0000000..51ad151
--- /dev/null
+++ b/sys/x86/iommu/intel_gas.c
@@ -0,0 +1,724 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define RB_AUGMENT(entry) dmar_gas_augment_entry(entry)
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <dev/pci/pcivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/uma.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+/*
+ * Guest Address Space management.
+ */
+
+static uma_zone_t dmar_map_entry_zone;
+
+static void
+intel_gas_init(void)
+{
+
+ dmar_map_entry_zone = uma_zcreate("DMAR_MAP_ENTRY",
+ sizeof(struct dmar_map_entry), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL);
+
+struct dmar_map_entry *
+dmar_gas_alloc_entry(struct dmar_ctx *ctx, u_int flags)
+{
+ struct dmar_map_entry *res;
+
+ KASSERT((flags & ~(DMAR_PGF_WAITOK)) == 0,
+ ("unsupported flags %x", flags));
+
+ res = uma_zalloc(dmar_map_entry_zone, ((flags & DMAR_PGF_WAITOK) !=
+ 0 ? M_WAITOK : M_NOWAIT) | M_ZERO);
+ if (res != NULL) {
+ res->ctx = ctx;
+ atomic_add_int(&ctx->entries_cnt, 1);
+ }
+ return (res);
+}
+
+void
+dmar_gas_free_entry(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+
+ KASSERT(ctx == entry->ctx,
+ ("mismatched free ctx %p entry %p entry->ctx %p", ctx,
+ entry, entry->ctx));
+ atomic_subtract_int(&ctx->entries_cnt, 1);
+ uma_zfree(dmar_map_entry_zone, entry);
+}
+
+static int
+dmar_gas_cmp_entries(struct dmar_map_entry *a, struct dmar_map_entry *b)
+{
+
+ /* Last entry have zero size, so <= */
+ KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)",
+ a, (uintmax_t)a->start, (uintmax_t)a->end));
+ KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)",
+ b, (uintmax_t)b->start, (uintmax_t)b->end));
+ KASSERT(a->end <= b->start || b->end <= a->start ||
+ a->end == a->start || b->end == b->start,
+ ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)",
+ a, (uintmax_t)a->start, (uintmax_t)a->end,
+ b, (uintmax_t)b->start, (uintmax_t)b->end));
+
+ if (a->end < b->end)
+ return (-1);
+ else if (b->end < a->end)
+ return (1);
+ return (0);
+}
+
+static void
+dmar_gas_augment_entry(struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *l, *r;
+
+ for (; entry != NULL; entry = RB_PARENT(entry, rb_entry)) {
+ l = RB_LEFT(entry, rb_entry);
+ r = RB_RIGHT(entry, rb_entry);
+ if (l == NULL && r == NULL) {
+ entry->free_down = entry->free_after;
+ } else if (l == NULL && r != NULL) {
+ entry->free_down = MAX(entry->free_after, r->free_down);
+ } else if (/*l != NULL && */ r == NULL) {
+ entry->free_down = MAX(entry->free_after, l->free_down);
+ } else /* if (l != NULL && r != NULL) */ {
+ entry->free_down = MAX(entry->free_after, l->free_down);
+ entry->free_down = MAX(entry->free_down, r->free_down);
+ }
+ }
+}
+
+RB_GENERATE(dmar_gas_entries_tree, dmar_map_entry, rb_entry,
+ dmar_gas_cmp_entries);
+
+static void
+dmar_gas_fix_free(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *next;
+
+ next = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ entry->free_after = (next != NULL ? next->start : ctx->end) -
+ entry->end;
+ dmar_gas_augment_entry(entry);
+}
+
+#ifdef INVARIANTS
+static void
+dmar_gas_check_free(struct dmar_ctx *ctx)
+{
+ struct dmar_map_entry *entry, *next, *l, *r;
+ dmar_gaddr_t v;
+
+ RB_FOREACH(entry, dmar_gas_entries_tree, &ctx->rb_root) {
+ KASSERT(ctx == entry->ctx,
+ ("mismatched free ctx %p entry %p entry->ctx %p", ctx,
+ entry, entry->ctx));
+ next = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ if (next == NULL) {
+ MPASS(entry->free_after == ctx->end - entry->end);
+ } else {
+ MPASS(entry->free_after = next->start - entry->end);
+ MPASS(entry->end <= next->start);
+ }
+ l = RB_LEFT(entry, rb_entry);
+ r = RB_RIGHT(entry, rb_entry);
+ if (l == NULL && r == NULL) {
+ MPASS(entry->free_down == entry->free_after);
+ } else if (l == NULL && r != NULL) {
+ MPASS(entry->free_down = MAX(entry->free_after,
+ r->free_down));
+ } else if (r == NULL) {
+ MPASS(entry->free_down = MAX(entry->free_after,
+ l->free_down));
+ } else {
+ v = MAX(entry->free_after, l->free_down);
+ v = MAX(entry->free_down, r->free_down);
+ MPASS(entry->free_down == v);
+ }
+ }
+}
+#endif
+
+static bool
+dmar_gas_rb_insert(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *prev, *found;
+
+ found = RB_INSERT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_fix_free(ctx, entry);
+ prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ if (prev != NULL)
+ dmar_gas_fix_free(ctx, prev);
+ return (found == NULL);
+}
+
+static void
+dmar_gas_rb_remove(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *prev;
+
+ prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ if (prev != NULL)
+ dmar_gas_fix_free(ctx, prev);
+}
+
+void
+dmar_gas_init_ctx(struct dmar_ctx *ctx)
+{
+ struct dmar_map_entry *begin, *end;
+
+ begin = dmar_gas_alloc_entry(ctx, DMAR_PGF_WAITOK);
+ end = dmar_gas_alloc_entry(ctx, DMAR_PGF_WAITOK);
+
+ DMAR_CTX_LOCK(ctx);
+ KASSERT(ctx->entries_cnt == 2, ("dirty ctx %p", ctx));
+ KASSERT(RB_EMPTY(&ctx->rb_root), ("non-empty entries %p", ctx));
+
+ begin->start = 0;
+ begin->end = DMAR_PAGE_SIZE;
+ begin->free_after = ctx->end - begin->end;
+ begin->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED;
+ dmar_gas_rb_insert(ctx, begin);
+
+ end->start = ctx->end;
+ end->end = ctx->end;
+ end->free_after = 0;
+ end->flags = DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_UNMAPPED;
+ dmar_gas_rb_insert(ctx, end);
+
+ ctx->first_place = begin;
+ ctx->last_place = end;
+ DMAR_CTX_UNLOCK(ctx);
+}
+
+void
+dmar_gas_fini_ctx(struct dmar_ctx *ctx)
+{
+ struct dmar_map_entry *entry, *entry1;
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+ KASSERT(ctx->entries_cnt == 2, ("ctx still in use %p", ctx));
+
+ entry = RB_MIN(dmar_gas_entries_tree, &ctx->rb_root);
+ KASSERT(entry->start == 0, ("start entry start %p", ctx));
+ KASSERT(entry->end == DMAR_PAGE_SIZE, ("start entry end %p", ctx));
+ KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE,
+ ("start entry flags %p", ctx));
+ RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_free_entry(ctx, entry);
+
+ entry = RB_MAX(dmar_gas_entries_tree, &ctx->rb_root);
+ KASSERT(entry->start == ctx->end, ("end entry start %p", ctx));
+ KASSERT(entry->end == ctx->end, ("end entry end %p", ctx));
+ KASSERT(entry->free_after == 0, ("end entry free_after%p", ctx));
+ KASSERT(entry->flags == DMAR_MAP_ENTRY_PLACE,
+ ("end entry flags %p", ctx));
+ RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_free_entry(ctx, entry);
+
+ RB_FOREACH_SAFE(entry, dmar_gas_entries_tree, &ctx->rb_root, entry1) {
+ KASSERT((entry->flags & DMAR_MAP_ENTRY_RMRR) != 0,
+ ("non-RMRR entry left %p", ctx));
+ RB_REMOVE(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_free_entry(ctx, entry);
+ }
+}
+
+struct dmar_gas_match_args {
+ struct dmar_ctx *ctx;
+ dmar_gaddr_t size;
+ const struct bus_dma_tag_common *common;
+ u_int gas_flags;
+ struct dmar_map_entry *entry;
+};
+
+static bool
+dmar_gas_match_one(struct dmar_gas_match_args *a, struct dmar_map_entry *prev,
+ dmar_gaddr_t end)
+{
+ dmar_gaddr_t bs, start;
+
+ if (a->entry->start + a->size > end)
+ return (false);
+
+ /* DMAR_PAGE_SIZE to create gap after new entry. */
+ if (a->entry->start < prev->end + DMAR_PAGE_SIZE ||
+ a->entry->start + a->size + DMAR_PAGE_SIZE > prev->end +
+ prev->free_after)
+ return (false);
+
+ /* No boundary crossing. */
+ if (dmar_test_boundary(a->entry->start, a->size, a->common->boundary))
+ return (true);
+
+ /*
+ * The start to start + size region crosses the boundary.
+ * Check if there is enough space after the next boundary
+ * after the prev->end.
+ */
+ bs = (a->entry->start + a->common->boundary) & ~(a->common->boundary
+ - 1);
+ start = roundup2(bs, a->common->alignment);
+ /* DMAR_PAGE_SIZE to create gap after new entry. */
+ if (start + a->size + DMAR_PAGE_SIZE <= prev->end + prev->free_after &&
+ start + a->size <= end) {
+ a->entry->start = start;
+ return (true);
+ }
+
+ /*
+ * Not enough space to align at boundary, but allowed to split.
+ * We already checked that start + size does not overlap end.
+ *
+ * XXXKIB. It is possible that bs is exactly at the start of
+ * the next entry, then we do not have gap. Ignore for now.
+ */
+ if ((a->gas_flags & DMAR_GM_CANSPLIT) != 0) {
+ a->size = bs - a->entry->start;
+ return (true);
+ }
+
+ return (false);
+}
+
+static void
+dmar_gas_match_insert(struct dmar_gas_match_args *a,
+ struct dmar_map_entry *prev)
+{
+ struct dmar_map_entry *next;
+ bool found;
+
+ /*
+ * The prev->end is always aligned on the page size, which
+ * causes page alignment for the entry->start too. The size
+ * is checked to be multiple of the page size.
+ *
+ * The page sized gap is created between consequent
+ * allocations to ensure that out-of-bounds accesses fault.
+ */
+ a->entry->end = a->entry->start + a->size;
+
+ next = RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, prev);
+ KASSERT(next->start >= a->entry->end &&
+ next->start - a->entry->start >= a->size,
+ ("dmar_gas_match_insert hole failed %p prev (%jx, %jx) "
+ "free_after %jx next (%jx, %jx) entry (%jx, %jx)", a->ctx,
+ (uintmax_t)prev->start, (uintmax_t)prev->end,
+ (uintmax_t)prev->free_after,
+ (uintmax_t)next->start, (uintmax_t)next->end,
+ (uintmax_t)a->entry->start, (uintmax_t)a->entry->end));
+
+ prev->free_after = a->entry->start - prev->end;
+ a->entry->free_after = next->start - a->entry->end;
+
+ found = dmar_gas_rb_insert(a->ctx, a->entry);
+ KASSERT(found, ("found dup %p start %jx size %jx",
+ a->ctx, (uintmax_t)a->entry->start, (uintmax_t)a->size));
+ a->entry->flags = DMAR_MAP_ENTRY_MAP;
+
+ KASSERT(RB_PREV(dmar_gas_entries_tree, &a->ctx->rb_root,
+ a->entry) == prev,
+ ("entry %p prev %p inserted prev %p", a->entry, prev,
+ RB_PREV(dmar_gas_entries_tree, &a->ctx->rb_root, a->entry)));
+ KASSERT(RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root,
+ a->entry) == next,
+ ("entry %p next %p inserted next %p", a->entry, next,
+ RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, a->entry)));
+}
+
+static int
+dmar_gas_lowermatch(struct dmar_gas_match_args *a, struct dmar_map_entry *prev)
+{
+ struct dmar_map_entry *l;
+ int ret;
+
+ if (prev->end < a->common->lowaddr) {
+ a->entry->start = roundup2(prev->end + DMAR_PAGE_SIZE,
+ a->common->alignment);
+ if (dmar_gas_match_one(a, prev, a->common->lowaddr)) {
+ dmar_gas_match_insert(a, prev);
+ return (0);
+ }
+ }
+ if (prev->free_down < a->size + DMAR_PAGE_SIZE)
+ return (ENOMEM);
+ l = RB_LEFT(prev, rb_entry);
+ if (l != NULL) {
+ ret = dmar_gas_lowermatch(a, l);
+ if (ret == 0)
+ return (0);
+ }
+ l = RB_RIGHT(prev, rb_entry);
+ if (l != NULL)
+ return (dmar_gas_lowermatch(a, l));
+ return (ENOMEM);
+}
+
+static int
+dmar_gas_uppermatch(struct dmar_gas_match_args *a)
+{
+ struct dmar_map_entry *next, *prev, find_entry;
+
+ find_entry.start = a->common->highaddr;
+ next = RB_NFIND(dmar_gas_entries_tree, &a->ctx->rb_root, &find_entry);
+ if (next == NULL)
+ return (ENOMEM);
+ prev = RB_PREV(dmar_gas_entries_tree, &a->ctx->rb_root, next);
+ KASSERT(prev != NULL, ("no prev %p %jx", a->ctx,
+ (uintmax_t)find_entry.start));
+ for (;;) {
+ a->entry->start = prev->start + DMAR_PAGE_SIZE;
+ if (a->entry->start < a->common->highaddr)
+ a->entry->start = a->common->highaddr;
+ a->entry->start = roundup2(a->entry->start,
+ a->common->alignment);
+ if (dmar_gas_match_one(a, prev, a->ctx->end)) {
+ dmar_gas_match_insert(a, prev);
+ return (0);
+ }
+
+ /*
+ * XXXKIB. This falls back to linear iteration over
+ * the free space in the high region. But high
+ * regions are almost unused, the code should be
+ * enough to cover the case, although in the
+ * non-optimal way.
+ */
+ prev = next;
+ next = RB_NEXT(dmar_gas_entries_tree, &a->ctx->rb_root, prev);
+ KASSERT(next != NULL, ("no next %p %jx", a->ctx,
+ (uintmax_t)find_entry.start));
+ if (next->end >= a->ctx->end)
+ return (ENOMEM);
+ }
+}
+
+static int
+dmar_gas_find_space(struct dmar_ctx *ctx,
+ const struct bus_dma_tag_common *common, dmar_gaddr_t size,
+ u_int flags, struct dmar_map_entry *entry)
+{
+ struct dmar_gas_match_args a;
+ int error;
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+ KASSERT(entry->flags == 0, ("dirty entry %p %p", ctx, entry));
+ KASSERT((size & DMAR_PAGE_MASK) == 0, ("size %jx", (uintmax_t)size));
+
+ a.ctx = ctx;
+ a.size = size;
+ a.common = common;
+ a.gas_flags = flags;
+ a.entry = entry;
+
+ /* Handle lower region. */
+ if (common->lowaddr > 0) {
+ error = dmar_gas_lowermatch(&a, RB_ROOT(&ctx->rb_root));
+ if (error == 0)
+ return (0);
+ KASSERT(error == ENOMEM,
+ ("error %d from dmar_gas_lowermatch", error));
+ }
+ /* Handle upper region. */
+ if (common->highaddr >= ctx->end)
+ return (ENOMEM);
+ error = dmar_gas_uppermatch(&a);
+ KASSERT(error == ENOMEM,
+ ("error %d from dmar_gas_uppermatch", error));
+ return (error);
+}
+
+static int
+dmar_gas_alloc_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry,
+ u_int flags)
+{
+ struct dmar_map_entry *next, *prev;
+ bool found;
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+
+ if ((entry->start & DMAR_PAGE_MASK) != 0 ||
+ (entry->end & DMAR_PAGE_MASK) != 0)
+ return (EINVAL);
+ if (entry->start >= entry->end)
+ return (EINVAL);
+ if (entry->end >= ctx->end)
+ return (EINVAL);
+
+ next = RB_NFIND(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ KASSERT(next != NULL, ("next must be non-null %p %jx", ctx,
+ (uintmax_t)entry->start));
+ prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, next);
+ /* prev could be NULL */
+
+ /*
+ * Adapt to broken BIOSes which specify overlapping RMRR
+ * entries.
+ *
+ * XXXKIB: this does not handle a case when prev or next
+ * entries are completely covered by the current one, which
+ * extends both ways.
+ */
+ if (prev != NULL && prev->end > entry->start &&
+ (prev->flags & DMAR_MAP_ENTRY_PLACE) == 0) {
+ if ((prev->flags & DMAR_MAP_ENTRY_RMRR) == 0)
+ return (EBUSY);
+ entry->start = prev->end;
+ }
+ if (next != NULL && next->start < entry->end &&
+ (next->flags & DMAR_MAP_ENTRY_PLACE) == 0) {
+ if ((next->flags & DMAR_MAP_ENTRY_RMRR) == 0)
+ return (EBUSY);
+ entry->end = next->start;
+ }
+ if (entry->end == entry->start)
+ return (0);
+
+ if (prev != NULL && prev->end > entry->start) {
+ /* This assumes that prev is the placeholder entry. */
+ dmar_gas_rb_remove(ctx, prev);
+ prev = NULL;
+ }
+ if (next != NULL && next->start < entry->end) {
+ dmar_gas_rb_remove(ctx, next);
+ next = NULL;
+ }
+
+ found = dmar_gas_rb_insert(ctx, entry);
+ KASSERT(found, ("found RMRR dup %p start %jx end %jx",
+ ctx, (uintmax_t)entry->start, (uintmax_t)entry->end));
+ entry->flags = DMAR_MAP_ENTRY_RMRR;
+
+#ifdef INVARIANTS
+ struct dmar_map_entry *ip, *in;
+ ip = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ in = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ KASSERT(prev == NULL || ip == prev,
+ ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)",
+ entry, entry->start, entry->end, prev,
+ prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end,
+ ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end));
+ KASSERT(next == NULL || in == next,
+ ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)",
+ entry, entry->start, entry->end, next,
+ next == NULL ? 0 : next->start, next == NULL ? 0 : next->end,
+ in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end));
+#endif
+
+ return (0);
+}
+
+void
+dmar_gas_free_space(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+ KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR |
+ DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_MAP,
+ ("permanent entry %p %p", ctx, entry));
+
+ dmar_gas_rb_remove(ctx, entry);
+ entry->flags &= ~DMAR_MAP_ENTRY_MAP;
+#ifdef INVARIANTS
+ if (dmar_check_free)
+ dmar_gas_check_free(ctx);
+#endif
+}
+
+void
+dmar_gas_free_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry)
+{
+ struct dmar_map_entry *next, *prev;
+
+ DMAR_CTX_ASSERT_LOCKED(ctx);
+ KASSERT((entry->flags & (DMAR_MAP_ENTRY_PLACE | DMAR_MAP_ENTRY_RMRR |
+ DMAR_MAP_ENTRY_MAP)) == DMAR_MAP_ENTRY_RMRR,
+ ("non-RMRR entry %p %p", ctx, entry));
+
+ prev = RB_PREV(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ next = RB_NEXT(dmar_gas_entries_tree, &ctx->rb_root, entry);
+ dmar_gas_rb_remove(ctx, entry);
+ entry->flags &= ~DMAR_MAP_ENTRY_RMRR;
+
+ if (prev == NULL)
+ dmar_gas_rb_insert(ctx, ctx->first_place);
+ if (next == NULL)
+ dmar_gas_rb_insert(ctx, ctx->last_place);
+}
+
+int
+dmar_gas_map(struct dmar_ctx *ctx, const struct bus_dma_tag_common *common,
+ dmar_gaddr_t size, u_int eflags, u_int flags, vm_page_t *ma,
+ struct dmar_map_entry **res)
+{
+ struct dmar_map_entry *entry;
+ int error;
+
+ KASSERT((flags & ~(DMAR_GM_CANWAIT | DMAR_GM_CANSPLIT)) == 0,
+ ("invalid flags 0x%x", flags));
+
+ entry = dmar_gas_alloc_entry(ctx, (flags & DMAR_GM_CANWAIT) != 0 ?
+ DMAR_PGF_WAITOK : 0);
+ if (entry == NULL)
+ return (ENOMEM);
+ DMAR_CTX_LOCK(ctx);
+ error = dmar_gas_find_space(ctx, common, size, flags, entry);
+ if (error == ENOMEM) {
+ DMAR_CTX_UNLOCK(ctx);
+ dmar_gas_free_entry(ctx, entry);
+ return (error);
+ }
+#ifdef INVARIANTS
+ if (dmar_check_free)
+ dmar_gas_check_free(ctx);
+#endif
+ KASSERT(error == 0,
+ ("unexpected error %d from dmar_gas_find_entry", error));
+ KASSERT(entry->end < ctx->end, ("allocated GPA %jx, max GPA %jx",
+ (uintmax_t)entry->end, (uintmax_t)ctx->end));
+ entry->flags |= eflags;
+ DMAR_CTX_UNLOCK(ctx);
+
+ error = ctx_map_buf(ctx, entry->start, size, ma,
+ ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) |
+ ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) |
+ ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) |
+ ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0),
+ (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0);
+ if (error == ENOMEM) {
+ dmar_ctx_unload_entry(entry, true);
+ return (error);
+ }
+ KASSERT(error == 0,
+ ("unexpected error %d from ctx_map_buf", error));
+
+ *res = entry;
+ return (0);
+}
+
+int
+dmar_gas_map_region(struct dmar_ctx *ctx, struct dmar_map_entry *entry,
+ u_int eflags, u_int flags, vm_page_t *ma)
+{
+ dmar_gaddr_t start;
+ int error;
+
+ KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", ctx,
+ entry, entry->flags));
+ KASSERT((flags & ~(DMAR_GM_CANWAIT)) == 0,
+ ("invalid flags 0x%x", flags));
+
+ start = entry->start;
+ DMAR_CTX_LOCK(ctx);
+ error = dmar_gas_alloc_region(ctx, entry, flags);
+ if (error != 0) {
+ DMAR_CTX_UNLOCK(ctx);
+ return (error);
+ }
+ entry->flags |= eflags;
+ DMAR_CTX_UNLOCK(ctx);
+ if (entry->end == entry->start)
+ return (0);
+
+ error = ctx_map_buf(ctx, entry->start, entry->end - entry->start,
+ ma + OFF_TO_IDX(start - entry->start),
+ ((eflags & DMAR_MAP_ENTRY_READ) != 0 ? DMAR_PTE_R : 0) |
+ ((eflags & DMAR_MAP_ENTRY_WRITE) != 0 ? DMAR_PTE_W : 0) |
+ ((eflags & DMAR_MAP_ENTRY_SNOOP) != 0 ? DMAR_PTE_SNP : 0) |
+ ((eflags & DMAR_MAP_ENTRY_TM) != 0 ? DMAR_PTE_TM : 0),
+ (flags & DMAR_GM_CANWAIT) != 0 ? DMAR_PGF_WAITOK : 0);
+ if (error == ENOMEM) {
+ dmar_ctx_unload_entry(entry, false);
+ return (error);
+ }
+ KASSERT(error == 0,
+ ("unexpected error %d from ctx_map_buf", error));
+
+ return (0);
+}
+
+int
+dmar_gas_reserve_region(struct dmar_ctx *ctx, dmar_gaddr_t start,
+ dmar_gaddr_t end)
+{
+ struct dmar_map_entry *entry;
+ int error;
+
+ entry = dmar_gas_alloc_entry(ctx, DMAR_PGF_WAITOK);
+ entry->start = start;
+ entry->end = end;
+ DMAR_CTX_LOCK(ctx);
+ error = dmar_gas_alloc_region(ctx, entry, DMAR_GM_CANWAIT);
+ if (error == 0)
+ entry->flags |= DMAR_MAP_ENTRY_UNMAPPED;
+ DMAR_CTX_UNLOCK(ctx);
+ if (error != 0)
+ dmar_gas_free_entry(ctx, entry);
+ return (error);
+}
diff --git a/sys/x86/iommu/intel_idpgtbl.c b/sys/x86/iommu/intel_idpgtbl.c
new file mode 100644
index 0000000..b1a8c8f
--- /dev/null
+++ b/sys/x86/iommu/intel_idpgtbl.c
@@ -0,0 +1,783 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/rman.h>
+#include <sys/sf_buf.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <sys/uio.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <machine/atomic.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+static int ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ dmar_gaddr_t size, int flags);
+
+/*
+ * The cache of the identity mapping page tables for the DMARs. Using
+ * the cache saves significant amount of memory for page tables by
+ * reusing the page tables, since usually DMARs are identical and have
+ * the same capabilities. Still, cache records the information needed
+ * to match DMAR capabilities and page table format, to correctly
+ * handle different DMARs.
+ */
+
+struct idpgtbl {
+ dmar_gaddr_t maxaddr; /* Page table covers the guest address
+ range [0..maxaddr) */
+ int pglvl; /* Total page table levels ignoring
+ superpages */
+ int leaf; /* The last materialized page table
+ level, it is non-zero if superpages
+ are supported */
+ vm_object_t pgtbl_obj; /* The page table pages */
+ LIST_ENTRY(idpgtbl) link;
+};
+
+static struct sx idpgtbl_lock;
+SX_SYSINIT(idpgtbl, &idpgtbl_lock, "idpgtbl");
+static LIST_HEAD(, idpgtbl) idpgtbls = LIST_HEAD_INITIALIZER(idpgtbls);
+static MALLOC_DEFINE(M_DMAR_IDPGTBL, "dmar_idpgtbl",
+ "Intel DMAR Identity mappings cache elements");
+
+/*
+ * Build the next level of the page tables for the identity mapping.
+ * - lvl is the level to build;
+ * - idx is the index of the page table page in the pgtbl_obj, which is
+ * being allocated filled now;
+ * - addr is the starting address in the bus address space which is
+ * mapped by the page table page.
+ */
+static void
+ctx_idmap_nextlvl(struct idpgtbl *tbl, int lvl, vm_pindex_t idx,
+ dmar_gaddr_t addr)
+{
+ vm_page_t m, m1;
+ dmar_pte_t *pte;
+ struct sf_buf *sf;
+ dmar_gaddr_t f, pg_sz;
+ vm_pindex_t base;
+ int i;
+
+ VM_OBJECT_ASSERT_LOCKED(tbl->pgtbl_obj);
+ if (addr >= tbl->maxaddr)
+ return;
+ m = dmar_pgalloc(tbl->pgtbl_obj, idx, DMAR_PGF_OBJL | DMAR_PGF_WAITOK |
+ DMAR_PGF_ZERO);
+ base = idx * DMAR_NPTEPG + 1; /* Index of the first child page of idx */
+ pg_sz = pglvl_page_size(tbl->pglvl, lvl);
+ if (lvl != tbl->leaf) {
+ for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz)
+ ctx_idmap_nextlvl(tbl, lvl + 1, base + i, f);
+ }
+ VM_OBJECT_WUNLOCK(tbl->pgtbl_obj);
+ pte = dmar_map_pgtbl(tbl->pgtbl_obj, idx, DMAR_PGF_WAITOK, &sf);
+ if (lvl == tbl->leaf) {
+ for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) {
+ if (f >= tbl->maxaddr)
+ break;
+ pte[i].pte = (DMAR_PTE_ADDR_MASK & f) |
+ DMAR_PTE_R | DMAR_PTE_W;
+ }
+ } else {
+ for (i = 0, f = addr; i < DMAR_NPTEPG; i++, f += pg_sz) {
+ if (f >= tbl->maxaddr)
+ break;
+ m1 = dmar_pgalloc(tbl->pgtbl_obj, base + i,
+ DMAR_PGF_NOALLOC);
+ KASSERT(m1 != NULL, ("lost page table page"));
+ pte[i].pte = (DMAR_PTE_ADDR_MASK &
+ VM_PAGE_TO_PHYS(m1)) | DMAR_PTE_R | DMAR_PTE_W;
+ }
+ }
+ /* ctx_get_idmap_pgtbl flushes CPU cache if needed. */
+ dmar_unmap_pgtbl(sf, true);
+ VM_OBJECT_WLOCK(tbl->pgtbl_obj);
+}
+
+/*
+ * Find a ready and compatible identity-mapping page table in the
+ * cache. If not found, populate the identity-mapping page table for
+ * the context, up to the maxaddr. The maxaddr byte is allowed to be
+ * not mapped, which is aligned with the definition of Maxmem as the
+ * highest usable physical address + 1. If superpages are used, the
+ * maxaddr is typically mapped.
+ */
+vm_object_t
+ctx_get_idmap_pgtbl(struct dmar_ctx *ctx, dmar_gaddr_t maxaddr)
+{
+ struct dmar_unit *unit;
+ struct idpgtbl *tbl;
+ vm_object_t res;
+ vm_page_t m;
+ int leaf, i;
+
+ leaf = 0; /* silence gcc */
+
+ /*
+ * First, determine where to stop the paging structures.
+ */
+ for (i = 0; i < ctx->pglvl; i++) {
+ if (i == ctx->pglvl - 1 || ctx_is_sp_lvl(ctx, i)) {
+ leaf = i;
+ break;
+ }
+ }
+
+ /*
+ * Search the cache for a compatible page table. Qualified
+ * page table must map up to maxaddr, its level must be
+ * supported by the DMAR and leaf should be equal to the
+ * calculated value. The later restriction could be lifted
+ * but I believe it is currently impossible to have any
+ * deviations for existing hardware.
+ */
+ sx_slock(&idpgtbl_lock);
+ LIST_FOREACH(tbl, &idpgtbls, link) {
+ if (tbl->maxaddr >= maxaddr &&
+ dmar_pglvl_supported(ctx->dmar, tbl->pglvl) &&
+ tbl->leaf == leaf) {
+ res = tbl->pgtbl_obj;
+ vm_object_reference(res);
+ sx_sunlock(&idpgtbl_lock);
+ ctx->pglvl = tbl->pglvl; /* XXXKIB ? */
+ goto end;
+ }
+ }
+
+ /*
+ * Not found in cache, relock the cache into exclusive mode to
+ * be able to add element, and recheck cache again after the
+ * relock.
+ */
+ sx_sunlock(&idpgtbl_lock);
+ sx_xlock(&idpgtbl_lock);
+ LIST_FOREACH(tbl, &idpgtbls, link) {
+ if (tbl->maxaddr >= maxaddr &&
+ dmar_pglvl_supported(ctx->dmar, tbl->pglvl) &&
+ tbl->leaf == leaf) {
+ res = tbl->pgtbl_obj;
+ vm_object_reference(res);
+ sx_xunlock(&idpgtbl_lock);
+ ctx->pglvl = tbl->pglvl; /* XXXKIB ? */
+ return (res);
+ }
+ }
+
+ /*
+ * Still not found, create new page table.
+ */
+ tbl = malloc(sizeof(*tbl), M_DMAR_IDPGTBL, M_WAITOK);
+ tbl->pglvl = ctx->pglvl;
+ tbl->leaf = leaf;
+ tbl->maxaddr = maxaddr;
+ tbl->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
+ IDX_TO_OFF(pglvl_max_pages(tbl->pglvl)), 0, 0, NULL);
+ VM_OBJECT_WLOCK(tbl->pgtbl_obj);
+ ctx_idmap_nextlvl(tbl, 0, 0, 0);
+ VM_OBJECT_WUNLOCK(tbl->pgtbl_obj);
+ LIST_INSERT_HEAD(&idpgtbls, tbl, link);
+ res = tbl->pgtbl_obj;
+ vm_object_reference(res);
+ sx_xunlock(&idpgtbl_lock);
+
+end:
+ /*
+ * Table was found or created.
+ *
+ * If DMAR does not snoop paging structures accesses, flush
+ * CPU cache to memory. Note that dmar_unmap_pgtbl() coherent
+ * argument was possibly invalid at the time of the identity
+ * page table creation, since DMAR which was passed at the
+ * time of creation could be coherent, while current DMAR is
+ * not.
+ *
+ * If DMAR cannot look into the chipset write buffer, flush it
+ * as well.
+ */
+ unit = ctx->dmar;
+ if (!DMAR_IS_COHERENT(unit)) {
+ VM_OBJECT_WLOCK(res);
+ for (m = vm_page_lookup(res, 0); m != NULL;
+ m = vm_page_next(m))
+ pmap_invalidate_cache_pages(&m, 1);
+ VM_OBJECT_WUNLOCK(res);
+ }
+ if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) {
+ DMAR_LOCK(unit);
+ dmar_flush_write_bufs(unit);
+ DMAR_UNLOCK(unit);
+ }
+
+ return (res);
+}
+
+/*
+ * Return a reference to the identity mapping page table to the cache.
+ */
+void
+put_idmap_pgtbl(vm_object_t obj)
+{
+ struct idpgtbl *tbl, *tbl1;
+ vm_object_t rmobj;
+
+ sx_slock(&idpgtbl_lock);
+ KASSERT(obj->ref_count >= 2, ("lost cache reference"));
+ vm_object_deallocate(obj);
+
+ /*
+ * Cache always owns one last reference on the page table object.
+ * If there is an additional reference, object must stay.
+ */
+ if (obj->ref_count > 1) {
+ sx_sunlock(&idpgtbl_lock);
+ return;
+ }
+
+ /*
+ * Cache reference is the last, remove cache element and free
+ * page table object, returning the page table pages to the
+ * system.
+ */
+ sx_sunlock(&idpgtbl_lock);
+ sx_xlock(&idpgtbl_lock);
+ LIST_FOREACH_SAFE(tbl, &idpgtbls, link, tbl1) {
+ rmobj = tbl->pgtbl_obj;
+ if (rmobj->ref_count == 1) {
+ LIST_REMOVE(tbl, link);
+ atomic_subtract_int(&dmar_tbl_pagecnt,
+ rmobj->resident_page_count);
+ vm_object_deallocate(rmobj);
+ free(tbl, M_DMAR_IDPGTBL);
+ }
+ }
+ sx_xunlock(&idpgtbl_lock);
+}
+
+/*
+ * The core routines to map and unmap host pages at the given guest
+ * address. Support superpages.
+ */
+
+/*
+ * Index of the pte for the guest address base in the page table at
+ * the level lvl.
+ */
+static int
+ctx_pgtbl_pte_off(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl)
+{
+
+ base >>= DMAR_PAGE_SHIFT + (ctx->pglvl - lvl - 1) * DMAR_NPTEPGSHIFT;
+ return (base & DMAR_PTEMASK);
+}
+
+/*
+ * Returns the page index of the page table page in the page table
+ * object, which maps the given address base at the page table level
+ * lvl.
+ */
+static vm_pindex_t
+ctx_pgtbl_get_pindex(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl)
+{
+ vm_pindex_t idx, pidx;
+ int i;
+
+ KASSERT(lvl >= 0 && lvl < ctx->pglvl, ("wrong lvl %p %d", ctx, lvl));
+
+ for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx)
+ idx = ctx_pgtbl_pte_off(ctx, base, i) + pidx * DMAR_NPTEPG + 1;
+ return (idx);
+}
+
+static dmar_pte_t *
+ctx_pgtbl_map_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags,
+ vm_pindex_t *idxp, struct sf_buf **sf)
+{
+ vm_page_t m;
+ struct sf_buf *sfp;
+ dmar_pte_t *pte, *ptep;
+ vm_pindex_t idx, idx1;
+
+ DMAR_CTX_ASSERT_PGLOCKED(ctx);
+ KASSERT((flags & DMAR_PGF_OBJL) != 0, ("lost PGF_OBJL"));
+
+ idx = ctx_pgtbl_get_pindex(ctx, base, lvl);
+ if (*sf != NULL && idx == *idxp) {
+ pte = (dmar_pte_t *)sf_buf_kva(*sf);
+ } else {
+ if (*sf != NULL)
+ dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
+ *idxp = idx;
+retry:
+ pte = dmar_map_pgtbl(ctx->pgtbl_obj, idx, flags, sf);
+ if (pte == NULL) {
+ KASSERT(lvl > 0, ("lost root page table page %p", ctx));
+ /*
+ * Page table page does not exists, allocate
+ * it and create pte in the up level.
+ */
+ m = dmar_pgalloc(ctx->pgtbl_obj, idx, flags |
+ DMAR_PGF_ZERO);
+ if (m == NULL)
+ return (NULL);
+
+ /*
+ * Prevent potential free while pgtbl_obj is
+ * unlocked in the recursive call to
+ * ctx_pgtbl_map_pte(), if other thread did
+ * pte write and clean while the lock if
+ * dropped.
+ */
+ m->wire_count++;
+
+ sfp = NULL;
+ ptep = ctx_pgtbl_map_pte(ctx, base, lvl - 1, flags,
+ &idx1, &sfp);
+ if (ptep == NULL) {
+ KASSERT(m->pindex != 0,
+ ("loosing root page %p", ctx));
+ m->wire_count--;
+ dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags);
+ return (NULL);
+ }
+ dmar_pte_store(&ptep->pte, DMAR_PTE_R | DMAR_PTE_W |
+ VM_PAGE_TO_PHYS(m));
+ sf_buf_page(sfp)->wire_count += 1;
+ m->wire_count--;
+ dmar_unmap_pgtbl(sfp, DMAR_IS_COHERENT(ctx->dmar));
+ /* Only executed once. */
+ goto retry;
+ }
+ }
+ pte += ctx_pgtbl_pte_off(ctx, base, lvl);
+ return (pte);
+}
+
+static int
+ctx_map_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ vm_page_t *ma, uint64_t pflags, int flags)
+{
+ dmar_pte_t *pte;
+ struct sf_buf *sf;
+ dmar_gaddr_t pg_sz, base1, size1;
+ vm_pindex_t pi, c, idx, run_sz;
+ int lvl;
+ bool superpage;
+
+ DMAR_CTX_ASSERT_PGLOCKED(ctx);
+
+ base1 = base;
+ size1 = size;
+ flags |= DMAR_PGF_OBJL;
+ TD_PREP_PINNED_ASSERT;
+
+ for (sf = NULL, pi = 0; size > 0; base += pg_sz, size -= pg_sz,
+ pi += run_sz) {
+ for (lvl = 0, c = 0, superpage = false;; lvl++) {
+ pg_sz = ctx_page_size(ctx, lvl);
+ run_sz = pg_sz >> DMAR_PAGE_SHIFT;
+ if (lvl == ctx->pglvl - 1)
+ break;
+ /*
+ * Check if the current base suitable for the
+ * superpage mapping. First, verify the level.
+ */
+ if (!ctx_is_sp_lvl(ctx, lvl))
+ continue;
+ /*
+ * Next, look at the size of the mapping and
+ * alignment of both guest and host addresses.
+ */
+ if (size < pg_sz || (base & (pg_sz - 1)) != 0 ||
+ (VM_PAGE_TO_PHYS(ma[pi]) & (pg_sz - 1)) != 0)
+ continue;
+ /* All passed, check host pages contiguouty. */
+ if (c == 0) {
+ for (c = 1; c < run_sz; c++) {
+ if (VM_PAGE_TO_PHYS(ma[pi + c]) !=
+ VM_PAGE_TO_PHYS(ma[pi + c - 1]) +
+ PAGE_SIZE)
+ break;
+ }
+ }
+ if (c >= run_sz) {
+ superpage = true;
+ break;
+ }
+ }
+ KASSERT(size >= pg_sz,
+ ("mapping loop overflow %p %jx %jx %jx", ctx,
+ (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
+ pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf);
+ if (pte == NULL) {
+ KASSERT((flags & DMAR_PGF_WAITOK) == 0,
+ ("failed waitable pte alloc %p", ctx));
+ if (sf != NULL) {
+ dmar_unmap_pgtbl(sf,
+ DMAR_IS_COHERENT(ctx->dmar));
+ }
+ ctx_unmap_buf_locked(ctx, base1, base - base1, flags);
+ TD_PINNED_ASSERT;
+ return (ENOMEM);
+ }
+ dmar_pte_store(&pte->pte, VM_PAGE_TO_PHYS(ma[pi]) | pflags |
+ (superpage ? DMAR_PTE_SP : 0));
+ sf_buf_page(sf)->wire_count += 1;
+ }
+ if (sf != NULL)
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
+ TD_PINNED_ASSERT;
+ return (0);
+}
+
+int
+ctx_map_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ vm_page_t *ma, uint64_t pflags, int flags)
+{
+ struct dmar_unit *unit;
+ int error;
+
+ unit = ctx->dmar;
+
+ KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0,
+ ("modifying idmap pagetable ctx %p", ctx));
+ KASSERT((base & DMAR_PAGE_MASK) == 0,
+ ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((size & DMAR_PAGE_MASK) == 0,
+ ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(size > 0, ("zero size %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(base < (1ULL << ctx->agaw),
+ ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base,
+ (uintmax_t)size, ctx->agaw));
+ KASSERT(base + size < (1ULL << ctx->agaw),
+ ("end too high %p %jx %jx agaw %d", ctx, (uintmax_t)base,
+ (uintmax_t)size, ctx->agaw));
+ KASSERT(base + size > base,
+ ("size overflow %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((pflags & (DMAR_PTE_R | DMAR_PTE_W)) != 0,
+ ("neither read nor write %jx", (uintmax_t)pflags));
+ KASSERT((pflags & ~(DMAR_PTE_R | DMAR_PTE_W | DMAR_PTE_SNP |
+ DMAR_PTE_TM)) == 0,
+ ("invalid pte flags %jx", (uintmax_t)pflags));
+ KASSERT((pflags & DMAR_PTE_SNP) == 0 ||
+ (unit->hw_ecap & DMAR_ECAP_SC) != 0,
+ ("PTE_SNP for dmar without snoop control %p %jx",
+ ctx, (uintmax_t)pflags));
+ KASSERT((pflags & DMAR_PTE_TM) == 0 ||
+ (unit->hw_ecap & DMAR_ECAP_DI) != 0,
+ ("PTE_TM for dmar without DIOTLB %p %jx",
+ ctx, (uintmax_t)pflags));
+ KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags));
+
+ DMAR_CTX_PGLOCK(ctx);
+ error = ctx_map_buf_locked(ctx, base, size, ma, pflags, flags);
+ DMAR_CTX_PGUNLOCK(ctx);
+ if (error != 0)
+ return (error);
+
+ if ((unit->hw_cap & DMAR_CAP_CM) != 0)
+ ctx_flush_iotlb_sync(ctx, base, size);
+ else if ((unit->hw_cap & DMAR_CAP_RWBF) != 0) {
+ /* See 11.1 Write Buffer Flushing. */
+ DMAR_LOCK(unit);
+ dmar_flush_write_bufs(unit);
+ DMAR_UNLOCK(unit);
+ }
+ return (0);
+}
+
+static void ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ int lvl, int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_fs);
+
+static void
+ctx_free_pgtbl_pde(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl, int flags)
+{
+ struct sf_buf *sf;
+ dmar_pte_t *pde;
+ vm_pindex_t idx;
+
+ sf = NULL;
+ pde = ctx_pgtbl_map_pte(ctx, base, lvl, flags, &idx, &sf);
+ ctx_unmap_clear_pte(ctx, base, lvl, flags, pde, &sf, true);
+}
+
+static void
+ctx_unmap_clear_pte(struct dmar_ctx *ctx, dmar_gaddr_t base, int lvl,
+ int flags, dmar_pte_t *pte, struct sf_buf **sf, bool free_sf)
+{
+ vm_page_t m;
+
+ dmar_pte_clear(&pte->pte);
+ m = sf_buf_page(*sf);
+ if (free_sf) {
+ dmar_unmap_pgtbl(*sf, DMAR_IS_COHERENT(ctx->dmar));
+ *sf = NULL;
+ }
+ m->wire_count--;
+ if (m->wire_count != 0)
+ return;
+ KASSERT(lvl != 0,
+ ("lost reference (lvl) on root pg ctx %p base %jx lvl %d",
+ ctx, (uintmax_t)base, lvl));
+ KASSERT(m->pindex != 0,
+ ("lost reference (idx) on root pg ctx %p base %jx lvl %d",
+ ctx, (uintmax_t)base, lvl));
+ dmar_pgfree(ctx->pgtbl_obj, m->pindex, flags);
+ ctx_free_pgtbl_pde(ctx, base, lvl - 1, flags);
+}
+
+/*
+ * Assumes that the unmap is never partial.
+ */
+static int
+ctx_unmap_buf_locked(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ dmar_gaddr_t size, int flags)
+{
+ dmar_pte_t *pte;
+ struct sf_buf *sf;
+ vm_pindex_t idx;
+ dmar_gaddr_t pg_sz, base1, size1;
+ int lvl;
+
+ DMAR_CTX_ASSERT_PGLOCKED(ctx);
+ if (size == 0)
+ return (0);
+
+ KASSERT((ctx->flags & DMAR_CTX_IDMAP) == 0,
+ ("modifying idmap pagetable ctx %p", ctx));
+ KASSERT((base & DMAR_PAGE_MASK) == 0,
+ ("non-aligned base %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((size & DMAR_PAGE_MASK) == 0,
+ ("non-aligned size %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT(base < (1ULL << ctx->agaw),
+ ("base too high %p %jx %jx agaw %d", ctx, (uintmax_t)base,
+ (uintmax_t)size, ctx->agaw));
+ KASSERT(base + size < (1ULL << ctx->agaw),
+ ("end too high %p %jx %jx agaw %d", ctx, (uintmax_t)base,
+ (uintmax_t)size, ctx->agaw));
+ KASSERT(base + size > base,
+ ("size overflow %p %jx %jx", ctx, (uintmax_t)base,
+ (uintmax_t)size));
+ KASSERT((flags & ~DMAR_PGF_WAITOK) == 0, ("invalid flags %x", flags));
+
+ pg_sz = 0; /* silence gcc */
+ base1 = base;
+ size1 = size;
+ flags |= DMAR_PGF_OBJL;
+ TD_PREP_PINNED_ASSERT;
+
+ for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) {
+ for (lvl = 0; lvl < ctx->pglvl; lvl++) {
+ if (lvl != ctx->pglvl - 1 && !ctx_is_sp_lvl(ctx, lvl))
+ continue;
+ pg_sz = ctx_page_size(ctx, lvl);
+ if (pg_sz > size)
+ continue;
+ pte = ctx_pgtbl_map_pte(ctx, base, lvl, flags,
+ &idx, &sf);
+ KASSERT(pte != NULL,
+ ("sleeping or page missed %p %jx %d 0x%x",
+ ctx, (uintmax_t)base, lvl, flags));
+ if ((pte->pte & DMAR_PTE_SP) != 0 ||
+ lvl == ctx->pglvl - 1) {
+ ctx_unmap_clear_pte(ctx, base, lvl, flags,
+ pte, &sf, false);
+ break;
+ }
+ }
+ KASSERT(size >= pg_sz,
+ ("unmapping loop overflow %p %jx %jx %jx", ctx,
+ (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz));
+ }
+ if (sf != NULL)
+ dmar_unmap_pgtbl(sf, DMAR_IS_COHERENT(ctx->dmar));
+ /*
+ * See 11.1 Write Buffer Flushing for an explanation why RWBF
+ * can be ignored there.
+ */
+
+ TD_PINNED_ASSERT;
+ return (0);
+}
+
+int
+ctx_unmap_buf(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size,
+ int flags)
+{
+ int error;
+
+ DMAR_CTX_PGLOCK(ctx);
+ error = ctx_unmap_buf_locked(ctx, base, size, flags);
+ DMAR_CTX_PGUNLOCK(ctx);
+ return (error);
+}
+
+int
+ctx_alloc_pgtbl(struct dmar_ctx *ctx)
+{
+ vm_page_t m;
+
+ KASSERT(ctx->pgtbl_obj == NULL, ("already initialized %p", ctx));
+
+ ctx->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL,
+ IDX_TO_OFF(pglvl_max_pages(ctx->pglvl)), 0, 0, NULL);
+ DMAR_CTX_PGLOCK(ctx);
+ m = dmar_pgalloc(ctx->pgtbl_obj, 0, DMAR_PGF_WAITOK |
+ DMAR_PGF_ZERO | DMAR_PGF_OBJL);
+ /* No implicit free of the top level page table page. */
+ m->wire_count = 1;
+ DMAR_CTX_PGUNLOCK(ctx);
+ return (0);
+}
+
+void
+ctx_free_pgtbl(struct dmar_ctx *ctx)
+{
+ vm_object_t obj;
+ vm_page_t m;
+
+ obj = ctx->pgtbl_obj;
+ if (obj == NULL) {
+ KASSERT((ctx->dmar->hw_ecap & DMAR_ECAP_PT) != 0 &&
+ (ctx->flags & DMAR_CTX_IDMAP) != 0,
+ ("lost pagetable object ctx %p", ctx));
+ return;
+ }
+ DMAR_CTX_ASSERT_PGLOCKED(ctx);
+ ctx->pgtbl_obj = NULL;
+
+ if ((ctx->flags & DMAR_CTX_IDMAP) != 0) {
+ put_idmap_pgtbl(obj);
+ ctx->flags &= ~DMAR_CTX_IDMAP;
+ return;
+ }
+
+ /* Obliterate wire_counts */
+ VM_OBJECT_ASSERT_WLOCKED(obj);
+ for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m))
+ m->wire_count = 0;
+ VM_OBJECT_WUNLOCK(obj);
+ vm_object_deallocate(obj);
+}
+
+static inline uint64_t
+ctx_wait_iotlb_flush(struct dmar_unit *unit, uint64_t wt, int iro)
+{
+ uint64_t iotlbr;
+
+ dmar_write8(unit, iro + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
+ DMAR_IOTLB_DR | DMAR_IOTLB_DW | wt);
+ for (;;) {
+ iotlbr = dmar_read8(unit, iro + DMAR_IOTLB_REG_OFF);
+ if ((iotlbr & DMAR_IOTLB_IVT) == 0)
+ break;
+ cpu_spinwait();
+ }
+ return (iotlbr);
+}
+
+void
+ctx_flush_iotlb_sync(struct dmar_ctx *ctx, dmar_gaddr_t base, dmar_gaddr_t size)
+{
+ struct dmar_unit *unit;
+ dmar_gaddr_t isize;
+ uint64_t iotlbr;
+ int am, iro;
+
+ unit = ctx->dmar;
+ KASSERT(!unit->qi_enabled, ("dmar%d: sync iotlb flush call",
+ unit->unit));
+ iro = DMAR_ECAP_IRO(unit->hw_ecap) * 16;
+ DMAR_LOCK(unit);
+ if ((unit->hw_cap & DMAR_CAP_PSI) == 0 || size > 2 * 1024 * 1024) {
+ iotlbr = ctx_wait_iotlb_flush(unit, DMAR_IOTLB_IIRG_DOM |
+ DMAR_IOTLB_DID(ctx->domain), iro);
+ KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) !=
+ DMAR_IOTLB_IAIG_INVLD,
+ ("dmar%d: invalidation failed %jx", unit->unit,
+ (uintmax_t)iotlbr));
+ } else {
+ for (; size > 0; base += isize, size -= isize) {
+ am = calc_am(unit, base, size, &isize);
+ dmar_write8(unit, iro, base | am);
+ iotlbr = ctx_wait_iotlb_flush(unit,
+ DMAR_IOTLB_IIRG_PAGE | DMAR_IOTLB_DID(ctx->domain),
+ iro);
+ KASSERT((iotlbr & DMAR_IOTLB_IAIG_MASK) !=
+ DMAR_IOTLB_IAIG_INVLD,
+ ("dmar%d: PSI invalidation failed "
+ "iotlbr 0x%jx base 0x%jx size 0x%jx am %d",
+ unit->unit, (uintmax_t)iotlbr,
+ (uintmax_t)base, (uintmax_t)size, am));
+ /*
+ * Any non-page granularity covers whole guest
+ * address space for the domain.
+ */
+ if ((iotlbr & DMAR_IOTLB_IAIG_MASK) !=
+ DMAR_IOTLB_IAIG_PAGE)
+ break;
+ }
+ }
+ DMAR_UNLOCK(unit);
+}
diff --git a/sys/x86/iommu/intel_qi.c b/sys/x86/iommu/intel_qi.c
new file mode 100644
index 0000000..a682c93
--- /dev/null
+++ b/sys/x86/iommu/intel_qi.c
@@ -0,0 +1,414 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_acpi.h"
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <machine/bus.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <machine/cpu.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+static bool
+dmar_qi_seq_processed(const struct dmar_unit *unit,
+ const struct dmar_qi_genseq *pseq)
+{
+
+ return (pseq->gen < unit->inv_waitd_gen ||
+ (pseq->gen == unit->inv_waitd_gen &&
+ pseq->seq <= unit->inv_waitd_seq_hw));
+}
+
+static int
+dmar_enable_qi(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ unit->hw_gcmd |= DMAR_GCMD_QIE;
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) == 0)
+ cpu_spinwait();
+ return (0);
+}
+
+static int
+dmar_disable_qi(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ unit->hw_gcmd &= ~DMAR_GCMD_QIE;
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_QIES) != 0)
+ cpu_spinwait();
+ return (0);
+}
+
+static void
+dmar_qi_advance_tail(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ dmar_write4(unit, DMAR_IQT_REG, unit->inv_queue_tail);
+}
+
+static void
+dmar_qi_ensure(struct dmar_unit *unit, int descr_count)
+{
+ uint32_t head;
+ int bytes;
+
+ DMAR_ASSERT_LOCKED(unit);
+ bytes = descr_count << DMAR_IQ_DESCR_SZ_SHIFT;
+ for (;;) {
+ if (bytes <= unit->inv_queue_avail)
+ break;
+ /* refill */
+ head = dmar_read4(unit, DMAR_IQH_REG);
+ head &= DMAR_IQH_MASK;
+ unit->inv_queue_avail = head - unit->inv_queue_tail -
+ DMAR_IQ_DESCR_SZ;
+ if (head <= unit->inv_queue_tail)
+ unit->inv_queue_avail += unit->inv_queue_size;
+ if (bytes <= unit->inv_queue_avail)
+ break;
+
+ /*
+ * No space in the queue, do busy wait. Hardware must
+ * make a progress. But first advance the tail to
+ * inform the descriptor streamer about entries we
+ * might have already filled, otherwise they could
+ * clog the whole queue..
+ */
+ dmar_qi_advance_tail(unit);
+ unit->inv_queue_full++;
+ cpu_spinwait();
+ }
+ unit->inv_queue_avail -= bytes;
+}
+
+static void
+dmar_qi_emit(struct dmar_unit *unit, uint64_t data1, uint64_t data2)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data1;
+ unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+ KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
+ (uintmax_t)unit->inv_queue_size));
+ unit->inv_queue_tail &= unit->inv_queue_size - 1;
+ *(volatile uint64_t *)(unit->inv_queue + unit->inv_queue_tail) = data2;
+ unit->inv_queue_tail += DMAR_IQ_DESCR_SZ / 2;
+ KASSERT(unit->inv_queue_tail <= unit->inv_queue_size,
+ ("tail overflow 0x%x 0x%jx", unit->inv_queue_tail,
+ (uintmax_t)unit->inv_queue_size));
+ unit->inv_queue_tail &= unit->inv_queue_size - 1;
+}
+
+static void
+dmar_qi_emit_wait_descr(struct dmar_unit *unit, uint32_t seq, bool intr,
+ bool memw, bool fence)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ dmar_qi_emit(unit, DMAR_IQ_DESCR_WAIT_ID |
+ (intr ? DMAR_IQ_DESCR_WAIT_IF : 0) |
+ (memw ? DMAR_IQ_DESCR_WAIT_SW : 0) |
+ (fence ? DMAR_IQ_DESCR_WAIT_FN : 0) |
+ (memw ? DMAR_IQ_DESCR_WAIT_SD(seq) : 0),
+ memw ? unit->inv_waitd_seq_hw_phys : 0);
+}
+
+static void
+dmar_qi_emit_wait_seq(struct dmar_unit *unit, struct dmar_qi_genseq *pseq)
+{
+ struct dmar_qi_genseq gsec;
+ uint32_t seq;
+
+ KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
+ DMAR_ASSERT_LOCKED(unit);
+ if (unit->inv_waitd_seq == 0xffffffff) {
+ gsec.gen = unit->inv_waitd_gen;
+ gsec.seq = unit->inv_waitd_seq;
+ dmar_qi_ensure(unit, 1);
+ dmar_qi_emit_wait_descr(unit, gsec.seq, false, true, false);
+ dmar_qi_advance_tail(unit);
+ while (!dmar_qi_seq_processed(unit, &gsec))
+ cpu_spinwait();
+ unit->inv_waitd_gen++;
+ unit->inv_waitd_seq = 1;
+ }
+ seq = unit->inv_waitd_seq++;
+ pseq->gen = unit->inv_waitd_gen;
+ pseq->seq = seq;
+ dmar_qi_emit_wait_descr(unit, seq, true, true, false);
+}
+
+static void
+dmar_qi_wait_for_seq(struct dmar_unit *unit, const struct dmar_qi_genseq *gseq)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ unit->inv_seq_waiters++;
+ while (!dmar_qi_seq_processed(unit, gseq)) {
+ if (cold) {
+ cpu_spinwait();
+ } else {
+ msleep(&unit->inv_seq_waiters, &unit->lock, 0,
+ "dmarse", hz);
+ }
+ }
+ unit->inv_seq_waiters--;
+}
+
+void
+dmar_qi_invalidate_locked(struct dmar_ctx *ctx, dmar_gaddr_t base,
+ dmar_gaddr_t size, struct dmar_qi_genseq *pseq)
+{
+ struct dmar_unit *unit;
+ dmar_gaddr_t isize;
+ int am;
+
+ unit = ctx->dmar;
+ DMAR_ASSERT_LOCKED(unit);
+ for (; size > 0; base += isize, size -= isize) {
+ am = calc_am(unit, base, size, &isize);
+ dmar_qi_ensure(unit, 1);
+ dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV |
+ DMAR_IQ_DESCR_IOTLB_PAGE | DMAR_IQ_DESCR_IOTLB_DW |
+ DMAR_IQ_DESCR_IOTLB_DR |
+ DMAR_IQ_DESCR_IOTLB_DID(ctx->domain),
+ base | am);
+ }
+ if (pseq != NULL) {
+ dmar_qi_ensure(unit, 1);
+ dmar_qi_emit_wait_seq(unit, pseq);
+ }
+ dmar_qi_advance_tail(unit);
+}
+
+void
+dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit)
+{
+ struct dmar_qi_genseq gseq;
+
+ DMAR_ASSERT_LOCKED(unit);
+ dmar_qi_ensure(unit, 2);
+ dmar_qi_emit(unit, DMAR_IQ_DESCR_CTX_INV | DMAR_IQ_DESCR_CTX_GLOB, 0);
+ dmar_qi_emit_wait_seq(unit, &gseq);
+ dmar_qi_advance_tail(unit);
+ dmar_qi_wait_for_seq(unit, &gseq);
+}
+
+void
+dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit)
+{
+ struct dmar_qi_genseq gseq;
+
+ DMAR_ASSERT_LOCKED(unit);
+ dmar_qi_ensure(unit, 2);
+ dmar_qi_emit(unit, DMAR_IQ_DESCR_IOTLB_INV | DMAR_IQ_DESCR_IOTLB_GLOB |
+ DMAR_IQ_DESCR_IOTLB_DW | DMAR_IQ_DESCR_IOTLB_DR, 0);
+ dmar_qi_emit_wait_seq(unit, &gseq);
+ dmar_qi_advance_tail(unit);
+ dmar_qi_wait_for_seq(unit, &gseq);
+}
+
+int
+dmar_qi_intr(void *arg)
+{
+ struct dmar_unit *unit;
+
+ unit = arg;
+ KASSERT(unit->qi_enabled, ("dmar%d: QI is not enabled", unit->unit));
+ taskqueue_enqueue_fast(unit->qi_taskqueue, &unit->qi_task);
+ return (FILTER_HANDLED);
+}
+
+static void
+dmar_qi_task(void *arg, int pending __unused)
+{
+ struct dmar_unit *unit;
+ struct dmar_map_entry *entry;
+ uint32_t ics;
+
+ unit = arg;
+
+ DMAR_LOCK(unit);
+ for (;;) {
+ entry = TAILQ_FIRST(&unit->tlb_flush_entries);
+ if (entry == NULL)
+ break;
+ if ((entry->gseq.gen == 0 && entry->gseq.seq == 0) ||
+ !dmar_qi_seq_processed(unit, &entry->gseq))
+ break;
+ TAILQ_REMOVE(&unit->tlb_flush_entries, entry, dmamap_link);
+ DMAR_UNLOCK(unit);
+ dmar_ctx_free_entry(entry, (entry->flags &
+ DMAR_MAP_ENTRY_QI_NF) == 0);
+ DMAR_LOCK(unit);
+ }
+ ics = dmar_read4(unit, DMAR_ICS_REG);
+ if ((ics & DMAR_ICS_IWC) != 0) {
+ ics = DMAR_ICS_IWC;
+ dmar_write4(unit, DMAR_ICS_REG, ics);
+ }
+ if (unit->inv_seq_waiters > 0)
+ wakeup(&unit->inv_seq_waiters);
+ DMAR_UNLOCK(unit);
+}
+
+int
+dmar_init_qi(struct dmar_unit *unit)
+{
+ uint64_t iqa;
+ uint32_t ics;
+ int qi_sz;
+
+ if (!DMAR_HAS_QI(unit) || (unit->hw_cap & DMAR_CAP_CM) != 0)
+ return (0);
+ unit->qi_enabled = 1;
+ TUNABLE_INT_FETCH("hw.dmar.qi", &unit->qi_enabled);
+ if (!unit->qi_enabled)
+ return (0);
+
+ TAILQ_INIT(&unit->tlb_flush_entries);
+ TASK_INIT(&unit->qi_task, 0, dmar_qi_task, unit);
+ unit->qi_taskqueue = taskqueue_create_fast("dmar", M_WAITOK,
+ taskqueue_thread_enqueue, &unit->qi_taskqueue);
+ taskqueue_start_threads(&unit->qi_taskqueue, 1, PI_AV,
+ "dmar%d qi taskq", unit->unit);
+
+ unit->inv_waitd_gen = 0;
+ unit->inv_waitd_seq = 1;
+
+ qi_sz = DMAR_IQA_QS_DEF;
+ TUNABLE_INT_FETCH("hw.dmar.qi_size", &qi_sz);
+ if (qi_sz > DMAR_IQA_QS_MAX)
+ qi_sz = DMAR_IQA_QS_MAX;
+ unit->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
+ /* Reserve one descriptor to prevent wraparound. */
+ unit->inv_queue_avail = unit->inv_queue_size - DMAR_IQ_DESCR_SZ;
+
+ /* The invalidation queue reads by DMARs are always coherent. */
+ unit->inv_queue = kmem_alloc_contig(kernel_arena, unit->inv_queue_size,
+ M_WAITOK | M_ZERO, 0, dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
+ unit->inv_waitd_seq_hw_phys = pmap_kextract(
+ (vm_offset_t)&unit->inv_waitd_seq_hw);
+
+ DMAR_LOCK(unit);
+ dmar_write8(unit, DMAR_IQT_REG, 0);
+ iqa = pmap_kextract(unit->inv_queue);
+ iqa |= qi_sz;
+ dmar_write8(unit, DMAR_IQA_REG, iqa);
+ dmar_enable_qi(unit);
+ ics = dmar_read4(unit, DMAR_ICS_REG);
+ if ((ics & DMAR_ICS_IWC) != 0) {
+ ics = DMAR_ICS_IWC;
+ dmar_write4(unit, DMAR_ICS_REG, ics);
+ }
+ DMAR_UNLOCK(unit);
+
+ return (0);
+}
+
+void
+dmar_fini_qi(struct dmar_unit *unit)
+{
+ struct dmar_qi_genseq gseq;
+
+ if (unit->qi_enabled)
+ return;
+ taskqueue_drain(unit->qi_taskqueue, &unit->qi_task);
+ taskqueue_free(unit->qi_taskqueue);
+ unit->qi_taskqueue = NULL;
+
+ DMAR_LOCK(unit);
+ /* quisce */
+ dmar_qi_ensure(unit, 1);
+ dmar_qi_emit_wait_seq(unit, &gseq);
+ dmar_qi_advance_tail(unit);
+ dmar_qi_wait_for_seq(unit, &gseq);
+ /* only after the quisce, disable queue */
+ dmar_disable_qi(unit);
+ KASSERT(unit->inv_seq_waiters == 0,
+ ("dmar%d: waiters on disabled queue", unit->unit));
+ DMAR_UNLOCK(unit);
+
+ kmem_free(kernel_arena, unit->inv_queue, unit->inv_queue_size);
+ unit->inv_queue = 0;
+ unit->inv_queue_size = 0;
+ unit->qi_enabled = 0;
+}
+
+void
+dmar_enable_qi_intr(struct dmar_unit *unit)
+{
+ uint32_t iectl;
+
+ DMAR_ASSERT_LOCKED(unit);
+ KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit));
+ iectl = dmar_read4(unit, DMAR_IECTL_REG);
+ iectl &= ~DMAR_IECTL_IM;
+ dmar_write4(unit, DMAR_IECTL_REG, iectl);
+}
+
+void
+dmar_disable_qi_intr(struct dmar_unit *unit)
+{
+ uint32_t iectl;
+
+ DMAR_ASSERT_LOCKED(unit);
+ KASSERT(DMAR_HAS_QI(unit), ("dmar%d: QI is not supported", unit->unit));
+ iectl = dmar_read4(unit, DMAR_IECTL_REG);
+ dmar_write4(unit, DMAR_IECTL_REG, iectl | DMAR_IECTL_IM);
+}
diff --git a/sys/x86/iommu/intel_quirks.c b/sys/x86/iommu/intel_quirks.c
new file mode 100644
index 0000000..7c35ae6
--- /dev/null
+++ b/sys/x86/iommu/intel_quirks.c
@@ -0,0 +1,195 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/smp.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <machine/bus.h>
+#include <contrib/dev/acpica/include/acpi.h>
+#include <contrib/dev/acpica/include/accommon.h>
+#include <dev/acpica/acpivar.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_map.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+#include <dev/pci/pcivar.h>
+
+typedef void (*dmar_quirk_fun)(struct dmar_unit *);
+
+struct intel_dmar_quirk_cpu {
+ u_int ext_family;
+ u_int ext_model;
+ u_int family_code;
+ u_int model;
+ u_int stepping;
+ dmar_quirk_fun quirk;
+ const char *descr;
+};
+
+struct intel_dmar_quirk_nb {
+ u_int dev_id;
+ u_int rev_no;
+ dmar_quirk_fun quirk;
+ const char *descr;
+};
+
+static void
+dmar_match_quirks(struct dmar_unit *dmar,
+ const struct intel_dmar_quirk_nb *nb_quirks, int nb_quirks_len,
+ const struct intel_dmar_quirk_cpu *cpu_quirks, int cpu_quirks_len)
+{
+ device_t nb;
+ const struct intel_dmar_quirk_nb *nb_quirk;
+ const struct intel_dmar_quirk_cpu *cpu_quirk;
+ u_int p[4];
+ u_int dev_id, rev_no;
+ u_int ext_family, ext_model, family_code, model, stepping;
+ int i;
+
+ if (nb_quirks != NULL) {
+ nb = pci_find_bsf(0, 0, 0);
+ if (nb != NULL) {
+ dev_id = pci_get_device(nb);
+ rev_no = pci_get_revid(nb);
+ for (i = 0; i < nb_quirks_len; i++) {
+ nb_quirk = &nb_quirks[i];
+ if (nb_quirk->dev_id == dev_id &&
+ nb_quirk->rev_no == rev_no) {
+ if (bootverbose) {
+ device_printf(dmar->dev,
+ "NB IOMMU quirk %s\n",
+ nb_quirk->descr);
+ }
+ nb_quirk->quirk(dmar);
+ }
+ }
+ } else {
+ device_printf(dmar->dev, "cannot find northbridge\n");
+ }
+ }
+ if (cpu_quirks != NULL) {
+ do_cpuid(1, p);
+ ext_family = (p[0] & CPUID_EXT_FAMILY) >> 20;
+ ext_model = (p[0] & CPUID_EXT_MODEL) >> 16;
+ family_code = (p[0] & CPUID_FAMILY) >> 8;
+ model = (p[0] & CPUID_MODEL) >> 4;
+ stepping = p[0] & CPUID_STEPPING;
+ for (i = 0; i < cpu_quirks_len; i++) {
+ cpu_quirk = &cpu_quirks[i];
+ if (cpu_quirk->ext_family == ext_family &&
+ cpu_quirk->ext_model == ext_model &&
+ cpu_quirk->family_code == family_code &&
+ cpu_quirk->model == model &&
+ (cpu_quirk->stepping == -1 ||
+ cpu_quirk->stepping == stepping)) {
+ if (bootverbose) {
+ device_printf(dmar->dev,
+ "CPU IOMMU quirk %s\n",
+ cpu_quirk->descr);
+ }
+ cpu_quirk->quirk(dmar);
+ }
+ }
+ }
+}
+
+static void
+nb_5400_no_low_high_prot_mem(struct dmar_unit *unit)
+{
+
+ unit->hw_cap &= ~(DMAR_CAP_PHMR | DMAR_CAP_PLMR);
+}
+
+static const struct intel_dmar_quirk_nb pre_use_nb[] = {
+ {
+ .dev_id = 0x4001, .rev_no = 0x20,
+ .quirk = nb_5400_no_low_high_prot_mem,
+ .descr = "5400 E23" /* no low/high protected memory */
+ },
+ {
+ .dev_id = 0x4003, .rev_no = 0x20,
+ .quirk = nb_5400_no_low_high_prot_mem,
+ .descr = "5400 E23" /* no low/high protected memory */
+ },
+};
+
+static void
+cpu_e5_am9(struct dmar_unit *unit)
+{
+
+ unit->hw_cap &= ~(0x3fULL << 48);
+ unit->hw_cap |= (9ULL << 48);
+}
+
+static const struct intel_dmar_quirk_cpu post_ident_cpu[] = {
+ {
+ .ext_family = 0, .ext_model = 2, .family_code = 6, .model = 13,
+ .stepping = 6, .quirk = cpu_e5_am9,
+ .descr = "E5 BT176" /* AM should be at most 9 */
+ },
+};
+
+void
+dmar_quirks_pre_use(struct dmar_unit *dmar)
+{
+
+ if (!dmar_barrier_enter(dmar, DMAR_BARRIER_USEQ))
+ return;
+ DMAR_LOCK(dmar);
+ dmar_match_quirks(dmar, pre_use_nb, nitems(pre_use_nb),
+ NULL, 0);
+ dmar_barrier_exit(dmar, DMAR_BARRIER_USEQ);
+}
+
+void
+dmar_quirks_post_ident(struct dmar_unit *dmar)
+{
+
+ dmar_match_quirks(dmar, NULL, 0, post_ident_cpu,
+ nitems(post_ident_cpu));
+}
diff --git a/sys/x86/iommu/intel_reg.h b/sys/x86/iommu/intel_reg.h
new file mode 100644
index 0000000..4c266de
--- /dev/null
+++ b/sys/x86/iommu/intel_reg.h
@@ -0,0 +1,330 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __X86_IOMMU_INTEL_REG_H
+#define __X86_IOMMU_INTEL_REG_H
+
+#define DMAR_PAGE_SIZE PAGE_SIZE
+#define DMAR_PAGE_MASK (DMAR_PAGE_SIZE - 1)
+#define DMAR_PAGE_SHIFT PAGE_SHIFT
+#define DMAR_NPTEPG (DMAR_PAGE_SIZE / sizeof(dmar_pte_t))
+#define DMAR_NPTEPGSHIFT 9
+#define DMAR_PTEMASK (DMAR_NPTEPG - 1)
+
+typedef struct dmar_root_entry {
+ uint64_t r1;
+ uint64_t r2;
+} dmar_root_entry_t;
+#define DMAR_ROOT_R1_P 1 /* Present */
+#define DMAR_ROOT_R1_CTP_MASK 0xfffffffffffff000 /* Mask for Context-Entry
+ Table Pointer */
+
+#define DMAR_CTX_CNT (DMAR_PAGE_SIZE / sizeof(dmar_root_entry_t))
+
+typedef struct dmar_ctx_entry {
+ uint64_t ctx1;
+ uint64_t ctx2;
+} dmar_ctx_entry_t;
+#define DMAR_CTX1_P 1 /* Present */
+#define DMAR_CTX1_FPD 2 /* Fault Processing Disable */
+ /* Translation Type: */
+#define DMAR_CTX1_T_UNTR 0 /* only Untranslated */
+#define DMAR_CTX1_T_TR 4 /* both Untranslated
+ and Translated */
+#define DMAR_CTX1_T_PASS 8 /* Pass-Through */
+#define DMAR_CTX1_ASR_MASK 0xfffffffffffff000 /* Mask for the Address
+ Space Root */
+#define DMAR_CTX2_AW_2LVL 0 /* 2-level page tables */
+#define DMAR_CTX2_AW_3LVL 1 /* 3-level page tables */
+#define DMAR_CTX2_AW_4LVL 2 /* 4-level page tables */
+#define DMAR_CTX2_AW_5LVL 3 /* 5-level page tables */
+#define DMAR_CTX2_AW_6LVL 4 /* 6-level page tables */
+#define DMAR_CTX2_DID(x) ((x) << 8) /* Domain Identifier */
+
+typedef struct dmar_pte {
+ uint64_t pte;
+} dmar_pte_t;
+#define DMAR_PTE_R 1 /* Read */
+#define DMAR_PTE_W (1 << 1) /* Write */
+#define DMAR_PTE_SP (1 << 7) /* Super Page */
+#define DMAR_PTE_SNP (1 << 11) /* Snoop Behaviour */
+#define DMAR_PTE_ADDR_MASK 0xffffffffff000 /* Address Mask */
+#define DMAR_PTE_TM (1ULL << 62) /* Transient Mapping */
+
+/* Version register */
+#define DMAR_VER_REG 0
+#define DMAR_MAJOR_VER(x) (((x) >> 4) & 0xf)
+#define DMAR_MINOR_VER(x) ((x) & 0xf)
+
+/* Capabilities register */
+#define DMAR_CAP_REG 0x8
+#define DMAR_CAP_DRD (1ULL << 55) /* DMA Read Draining */
+#define DMAR_CAP_DWD (1ULL << 54) /* DMA Write Draining */
+#define DMAR_CAP_MAMV(x) ((u_int)(((x) >> 48) & 0x3f))
+ /* Maximum Address Mask */
+#define DMAR_CAP_NFR(x) ((u_int)(((x) >> 40) & 0xff) + 1)
+ /* Num of Fault-recording regs */
+#define DMAR_CAP_PSI (1ULL << 39) /* Page Selective Invalidation */
+#define DMAR_CAP_SPS(x) ((u_int)(((x) >> 34) & 0xf)) /* Super-Page Support */
+#define DMAR_CAP_SPS_2M 0x1
+#define DMAR_CAP_SPS_1G 0x2
+#define DMAR_CAP_SPS_512G 0x4
+#define DMAR_CAP_SPS_1T 0x8
+#define DMAR_CAP_FRO(x) ((u_int)(((x) >> 24) & 0x1ff))
+ /* Fault-recording reg offset */
+#define DMAR_CAP_ISOCH (1 << 23) /* Isochrony */
+#define DMAR_CAP_ZLR (1 << 22) /* Zero-length reads */
+#define DMAR_CAP_MGAW(x) ((u_int)(((x) >> 16) & 0x3f))
+ /* Max Guest Address Width */
+#define DMAR_CAP_SAGAW(x) ((u_int)(((x) >> 8) & 0x1f))
+ /* Adjusted Guest Address Width */
+#define DMAR_CAP_SAGAW_2LVL 0x01
+#define DMAR_CAP_SAGAW_3LVL 0x02
+#define DMAR_CAP_SAGAW_4LVL 0x04
+#define DMAR_CAP_SAGAW_5LVL 0x08
+#define DMAR_CAP_SAGAW_6LVL 0x10
+#define DMAR_CAP_CM (1 << 7) /* Caching mode */
+#define DMAR_CAP_PHMR (1 << 6) /* Protected High-mem Region */
+#define DMAR_CAP_PLMR (1 << 5) /* Protected Low-mem Region */
+#define DMAR_CAP_RWBF (1 << 4) /* Required Write-Buffer Flushing */
+#define DMAR_CAP_AFL (1 << 3) /* Advanced Fault Logging */
+#define DMAR_CAP_ND(x) ((u_int)((x) & 0x3)) /* Number of domains */
+
+/* Extended Capabilities register */
+#define DMAR_ECAP_REG 0x10
+#define DMAR_ECAP_MHMV(x) ((u_int)(((x) >> 20) & 0xf))
+ /* Maximum Handle Mask Value */
+#define DMAR_ECAP_IRO(x) ((u_int)(((x) >> 8) & 0x3ff))
+ /* IOTLB Register Offset */
+#define DMAR_ECAP_SC (1 << 7) /* Snoop Control */
+#define DMAR_ECAP_PT (1 << 6) /* Pass Through */
+#define DMAR_ECAP_EIM (1 << 4) /* Extended Interrupt Mode */
+#define DMAR_ECAP_IR (1 << 3) /* Interrupt Remapping */
+#define DMAR_ECAP_DI (1 << 2) /* Device IOTLB */
+#define DMAR_ECAP_QI (1 << 1) /* Queued Invalidation */
+#define DMAR_ECAP_C (1 << 0) /* Coherency */
+
+/* Global Command register */
+#define DMAR_GCMD_REG 0x18
+#define DMAR_GCMD_TE (1 << 31) /* Translation Enable */
+#define DMAR_GCMD_SRTP (1 << 30) /* Set Root Table Pointer */
+#define DMAR_GCMD_SFL (1 << 29) /* Set Fault Log */
+#define DMAR_GCMD_EAFL (1 << 28) /* Enable Advanced Fault Logging */
+#define DMAR_GCMD_WBF (1 << 27) /* Write Buffer Flush */
+#define DMAR_GCMD_QIE (1 << 26) /* Queued Invalidation Enable */
+#define DMAR_GCMD_IRE (1 << 25) /* Interrupt Remapping Enable */
+#define DMAR_GCMD_SIRTP (1 << 24) /* Set Interrupt Remap Table Pointer */
+#define DMAR_GCMD_CFI (1 << 23) /* Compatibility Format Interrupt */
+
+/* Global Status register */
+#define DMAR_GSTS_REG 0x1c
+#define DMAR_GSTS_TES (1 << 31) /* Translation Enable Status */
+#define DMAR_GSTS_RTPS (1 << 30) /* Root Table Pointer Status */
+#define DMAR_GSTS_FLS (1 << 29) /* Fault Log Status */
+#define DMAR_GSTS_AFLS (1 << 28) /* Advanced Fault Logging Status */
+#define DMAR_GSTS_WBFS (1 << 27) /* Write Buffer Flush Status */
+#define DMAR_GSTS_QIES (1 << 26) /* Queued Invalidation Enable Status */
+#define DMAR_GSTS_IRES (1 << 25) /* Interrupt Remapping Enable Status */
+#define DMAR_GSTS_IRTPS (1 << 24) /* Interrupt Remapping Table
+ Pointer Status */
+#define DMAR_GSTS_CFIS (1 << 23) /* Compatibility Format
+ Interrupt Status */
+
+/* Root-Entry Table Address register */
+#define DMAR_RTADDR_REG 0x20
+
+/* Context Command register */
+#define DMAR_CCMD_REG 0x28
+#define DMAR_CCMD_ICC (1ULL << 63) /* Invalidate Context-Cache */
+#define DMAR_CCMD_ICC32 (1 << 31)
+#define DMAR_CCMD_CIRG_MASK (0x3ULL << 61) /* Context Invalidation
+ Request Granularity */
+#define DMAR_CCMD_CIRG_GLOB (0x1ULL << 61) /* Global */
+#define DMAR_CCMD_CIRG_DOM (0x2ULL << 61) /* Domain */
+#define DMAR_CCMD_CIRG_DEV (0x3ULL << 61) /* Device */
+#define DMAR_CCMD_CAIG(x) (((x) >> 59) & 0x3) /* Context Actual
+ Invalidation Granularity */
+#define DMAR_CCMD_CAIG_GLOB 0x1 /* Global */
+#define DMAR_CCMD_CAIG_DOM 0x2 /* Domain */
+#define DMAR_CCMD_CAIG_DEV 0x3 /* Device */
+#define DMAR_CCMD_FM (0x3UUL << 32) /* Function Mask */
+#define DMAR_CCMD_SID(x) (((x) & 0xffff) << 16) /* Source-ID */
+#define DMAR_CCMD_DID(x) ((x) & 0xffff) /* Domain-ID */
+
+/* Invalidate Address register */
+#define DMAR_IVA_REG_OFF 0
+#define DMAR_IVA_IH (1 << 6) /* Invalidation Hint */
+#define DMAR_IVA_AM(x) ((x) & 0x1f) /* Address Mask */
+#define DMAR_IVA_ADDR(x) ((x) & ~0xfffULL) /* Address */
+
+/* IOTLB Invalidate register */
+#define DMAR_IOTLB_REG_OFF 0x8
+#define DMAR_IOTLB_IVT (1ULL << 63) /* Invalidate IOTLB */
+#define DMAR_IOTLB_IVT32 (1 << 31)
+#define DMAR_IOTLB_IIRG_MASK (0x3ULL << 60) /* Invalidation Request
+ Granularity */
+#define DMAR_IOTLB_IIRG_GLB (0x1ULL << 60) /* Global */
+#define DMAR_IOTLB_IIRG_DOM (0x2ULL << 60) /* Domain-selective */
+#define DMAR_IOTLB_IIRG_PAGE (0x3ULL << 60) /* Page-selective */
+#define DMAR_IOTLB_IAIG_MASK (0x3ULL << 57) /* Actual Invalidation
+ Granularity */
+#define DMAR_IOTLB_IAIG_INVLD 0 /* Hw detected error */
+#define DMAR_IOTLB_IAIG_GLB (0x1ULL << 57) /* Global */
+#define DMAR_IOTLB_IAIG_DOM (0x2ULL << 57) /* Domain-selective */
+#define DMAR_IOTLB_IAIG_PAGE (0x3ULL << 57) /* Page-selective */
+#define DMAR_IOTLB_DR (0x1ULL << 49) /* Drain Reads */
+#define DMAR_IOTLB_DW (0x1ULL << 48) /* Drain Writes */
+#define DMAR_IOTLB_DID(x) (((uint64_t)(x) & 0xffff) << 32) /* Domain Id */
+
+/* Fault Status register */
+#define DMAR_FSTS_REG 0x34
+#define DMAR_FSTS_FRI(x) (((x) >> 8) & 0xff) /* Fault Record Index */
+#define DMAR_FSTS_ITE (1 << 6) /* Invalidation Time-out */
+#define DMAR_FSTS_ICE (1 << 5) /* Invalidation Completion */
+#define DMAR_FSTS_IQE (1 << 4) /* Invalidation Queue */
+#define DMAR_FSTS_APF (1 << 3) /* Advanced Pending Fault */
+#define DMAR_FSTS_AFO (1 << 2) /* Advanced Fault Overflow */
+#define DMAR_FSTS_PPF (1 << 1) /* Primary Pending Fault */
+#define DMAR_FSTS_PFO 1 /* Fault Overflow */
+
+/* Fault Event Control register */
+#define DMAR_FECTL_REG 0x38
+#define DMAR_FECTL_IM (1 << 31) /* Interrupt Mask */
+#define DMAR_FECTL_IP (1 << 30) /* Interrupt Pending */
+
+/* Fault Event Data register */
+#define DMAR_FEDATA_REG 0x3c
+
+/* Fault Event Address register */
+#define DMAR_FEADDR_REG 0x40
+
+/* Fault Event Upper Address register */
+#define DMAR_FEUADDR_REG 0x44
+
+/* Advanced Fault Log register */
+#define DMAR_AFLOG_REG 0x58
+
+/* Fault Recording Register, also usable for Advanced Fault Log records */
+#define DMAR_FRCD2_F (1ULL << 63) /* Fault */
+#define DMAR_FRCD2_F32 (1 << 31)
+#define DMAR_FRCD2_T(x) ((int)((x >> 62) & 1)) /* Type */
+#define DMAR_FRCD2_T_W 0 /* Write request */
+#define DMAR_FRCD2_T_R 1 /* Read or AtomicOp */
+#define DMAR_FRCD2_AT(x) ((int)((x >> 60) & 0x3)) /* Address Type */
+#define DMAR_FRCD2_FR(x) ((int)((x >> 32) & 0xff)) /* Fault Reason */
+#define DMAR_FRCD2_SID(x) ((int)(x & 0xffff)) /* Source Identifier */
+#define DMAR_FRCS1_FI_MASK 0xffffffffff000 /* Fault Info, Address Mask */
+
+/* Protected Memory Enable register */
+#define DMAR_PMEN_REG 0x64
+#define DMAR_PMEN_EPM (1 << 31) /* Enable Protected Memory */
+#define DMAR_PMEN_PRS 1 /* Protected Region Status */
+
+/* Protected Low-Memory Base register */
+#define DMAR_PLMBASE_REG 0x68
+
+/* Protected Low-Memory Limit register */
+#define DMAR_PLMLIMIT_REG 0x6c
+
+/* Protected High-Memory Base register */
+#define DMAR_PHMBASE_REG 0x70
+
+/* Protected High-Memory Limit register */
+#define DMAR_PHMLIMIT_REG 0x78
+
+/* Queued Invalidation Descriptors */
+#define DMAR_IQ_DESCR_SZ_SHIFT 4 /* Shift for descriptor count
+ to ring offset */
+#define DMAR_IQ_DESCR_SZ (1 << DMAR_IQ_DESCR_SZ_SHIFT)
+ /* Descriptor size */
+
+#define DMAR_IQ_DESCR_CTX_INV 0x1 /* Context-cache Invalidate
+ Descriptor */
+#define DMAR_IQ_DESCR_CTX_GLOB (0x1 << 4) /* Granularity: Global */
+#define DMAR_IQ_DESCR_CTX_DOM (0x2 << 4) /* Granularity: Domain */
+#define DMAR_IQ_DESCR_CTX_DEV (0x3 << 4) /* Granularity: Device */
+#define DMAR_IQ_DESCR_CTX_DID(x) (((uint32_t)(x)) << 16) /* Domain Id */
+#define DMAR_IQ_DESCR_CTX_SRC(x) (((uint64_t)(x)) << 32) /* Source Id */
+#define DMAR_IQ_DESCR_CTX_FM(x) (((uint64_t)(x)) << 48) /* Function Mask */
+
+#define DMAR_IQ_DESCR_IOTLB_INV 0x2 /* IOTLB Invalidate Descriptor */
+#define DMAR_IQ_DESCR_IOTLB_GLOB (0x1 << 4) /* Granularity: Global */
+#define DMAR_IQ_DESCR_IOTLB_DOM (0x2 << 4) /* Granularity: Domain */
+#define DMAR_IQ_DESCR_IOTLB_PAGE (0x3 << 4) /* Granularity: Page */
+#define DMAR_IQ_DESCR_IOTLB_DW (1 << 6) /* Drain Writes */
+#define DMAR_IQ_DESCR_IOTLB_DR (1 << 7) /* Drain Reads */
+#define DMAR_IQ_DESCR_IOTLB_DID(x) (((uint32_t)(x)) << 16) /* Domain Id */
+
+#define DMAR_IQ_DESCR_WAIT_ID 0x5 /* Invalidation Wait Descriptor */
+#define DMAR_IQ_DESCR_WAIT_IF (1 << 4) /* Interrupt Flag */
+#define DMAR_IQ_DESCR_WAIT_SW (1 << 5) /* Status Write */
+#define DMAR_IQ_DESCR_WAIT_FN (1 << 6) /* Fence */
+#define DMAR_IQ_DESCR_WAIT_SD(x) (((uint64_t)(x)) << 32) /* Status Data */
+
+/* Invalidation Queue Head register */
+#define DMAR_IQH_REG 0x80
+#define DMAR_IQH_MASK 0x7fff0 /* Next cmd index mask */
+
+/* Invalidation Queue Tail register */
+#define DMAR_IQT_REG 0x88
+#define DMAR_IQT_MASK 0x7fff0
+
+/* Invalidation Queue Address register */
+#define DMAR_IQA_REG 0x90
+#define DMAR_IQA_IQA_MASK 0xfffffffffffff000 /* Invalidation Queue
+ Base Address mask */
+#define DMAR_IQA_QS_MASK 0x7 /* Queue Size in pages */
+#define DMAR_IQA_QS_MAX 0x7 /* Max Queue size */
+#define DMAR_IQA_QS_DEF 3
+
+ /* Invalidation Completion Status register */
+#define DMAR_ICS_REG 0x9c
+#define DMAR_ICS_IWC 1 /* Invalidation Wait
+ Descriptor Complete */
+
+/* Invalidation Event Control register */
+#define DMAR_IECTL_REG 0xa0
+#define DMAR_IECTL_IM (1 << 31) /* Interrupt Mask */
+#define DMAR_IECTL_IP (1 << 30) /* Interrupt Pending */
+
+/* Invalidation Event Data register */
+#define DMAR_IEDATA_REG 0xa4
+
+/* Invalidation Event Address register */
+#define DMAR_IEADDR_REG 0xa8
+
+/* Invalidation Event Upper Address register */
+#define DMAR_IEUADDR_REG 0xac
+
+/* Interrupt Remapping Table Address register */
+#define DMAR_IRTA_REG 0xb8
+
+#endif
diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c
new file mode 100644
index 0000000..d81ec04
--- /dev/null
+++ b/sys/x86/iommu/intel_utils.c
@@ -0,0 +1,563 @@
+/*-
+ * Copyright (c) 2013 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memdesc.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/rman.h>
+#include <sys/rwlock.h>
+#include <sys/sched.h>
+#include <sys/sf_buf.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/tree.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
+#include <machine/bus.h>
+#include <machine/cpu.h>
+#include <x86/include/busdma_impl.h>
+#include <x86/iommu/intel_reg.h>
+#include <x86/iommu/busdma_dmar.h>
+#include <x86/iommu/intel_dmar.h>
+
+u_int
+dmar_nd2mask(u_int nd)
+{
+ static const u_int masks[] = {
+ 0x000f, /* nd == 0 */
+ 0x002f, /* nd == 1 */
+ 0x00ff, /* nd == 2 */
+ 0x02ff, /* nd == 3 */
+ 0x0fff, /* nd == 4 */
+ 0x2fff, /* nd == 5 */
+ 0xffff, /* nd == 6 */
+ 0x0000, /* nd == 7 reserved */
+ };
+
+ KASSERT(nd <= 6, ("number of domains %d", nd));
+ return (masks[nd]);
+}
+
+static const struct sagaw_bits_tag {
+ int agaw;
+ int cap;
+ int awlvl;
+ int pglvl;
+} sagaw_bits[] = {
+ {.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL,
+ .pglvl = 2},
+ {.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL,
+ .pglvl = 3},
+ {.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL,
+ .pglvl = 4},
+ {.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL,
+ .pglvl = 5},
+ {.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL,
+ .pglvl = 6}
+};
+#define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0]))
+
+bool
+dmar_pglvl_supported(struct dmar_unit *unit, int pglvl)
+{
+ int i;
+
+ for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
+ if (sagaw_bits[i].pglvl != pglvl)
+ continue;
+ if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
+ return (true);
+ }
+ return (false);
+}
+
+int
+ctx_set_agaw(struct dmar_ctx *ctx, int mgaw)
+{
+ int sagaw, i;
+
+ ctx->mgaw = mgaw;
+ sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap);
+ for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
+ if (sagaw_bits[i].agaw >= mgaw) {
+ ctx->agaw = sagaw_bits[i].agaw;
+ ctx->pglvl = sagaw_bits[i].pglvl;
+ ctx->awlvl = sagaw_bits[i].awlvl;
+ return (0);
+ }
+ }
+ device_printf(ctx->dmar->dev,
+ "context request mgaw %d for pci%d:%d:%d:%d, "
+ "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, ctx->bus,
+ ctx->slot, ctx->func, sagaw);
+ return (EINVAL);
+}
+
+/*
+ * Find a best fit mgaw for the given maxaddr:
+ * - if allow_less is false, must find sagaw which maps all requested
+ * addresses (used by identity mappings);
+ * - if allow_less is true, and no supported sagaw can map all requested
+ * address space, accept the biggest sagaw, whatever is it.
+ */
+int
+dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less)
+{
+ int i;
+
+ for (i = 0; i < SIZEOF_SAGAW_BITS; i++) {
+ if ((1ULL << sagaw_bits[i].agaw) >= maxaddr &&
+ (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0)
+ break;
+ }
+ if (allow_less && i == SIZEOF_SAGAW_BITS) {
+ do {
+ i--;
+ } while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap)
+ == 0);
+ }
+ if (i < SIZEOF_SAGAW_BITS)
+ return (sagaw_bits[i].agaw);
+ KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d",
+ (uintmax_t) maxaddr, allow_less));
+ return (-1);
+}
+
+/*
+ * Calculate the total amount of page table pages needed to map the
+ * whole bus address space on the context with the selected agaw.
+ */
+vm_pindex_t
+pglvl_max_pages(int pglvl)
+{
+ vm_pindex_t res;
+ int i;
+
+ for (res = 0, i = pglvl; i > 0; i--) {
+ res *= DMAR_NPTEPG;
+ res++;
+ }
+ return (res);
+}
+
+/*
+ * Return true if the page table level lvl supports the superpage for
+ * the context ctx.
+ */
+int
+ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl)
+{
+ int alvl, cap_sps;
+ static const int sagaw_sp[] = {
+ DMAR_CAP_SPS_2M,
+ DMAR_CAP_SPS_1G,
+ DMAR_CAP_SPS_512G,
+ DMAR_CAP_SPS_1T
+ };
+
+ alvl = ctx->pglvl - lvl - 1;
+ cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap);
+ return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) &&
+ (sagaw_sp[alvl] & cap_sps) != 0);
+}
+
+dmar_gaddr_t
+pglvl_page_size(int total_pglvl, int lvl)
+{
+ int rlvl;
+ static const dmar_gaddr_t pg_sz[] = {
+ (dmar_gaddr_t)DMAR_PAGE_SIZE,
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT,
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT),
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT),
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT),
+ (dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT)
+ };
+
+ KASSERT(lvl >= 0 && lvl < total_pglvl,
+ ("total %d lvl %d", total_pglvl, lvl));
+ rlvl = total_pglvl - lvl - 1;
+ KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]),
+ ("sizeof pg_sz lvl %d", lvl));
+ return (pg_sz[rlvl]);
+}
+
+dmar_gaddr_t
+ctx_page_size(struct dmar_ctx *ctx, int lvl)
+{
+
+ return (pglvl_page_size(ctx->pglvl, lvl));
+}
+
+int
+calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size,
+ dmar_gaddr_t *isizep)
+{
+ dmar_gaddr_t isize;
+ int am;
+
+ for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) {
+ isize = 1ULL << (am + DMAR_PAGE_SHIFT);
+ if ((base & (isize - 1)) == 0 && size >= isize)
+ break;
+ if (am == 0)
+ break;
+ }
+ *isizep = isize;
+ return (am);
+}
+
+dmar_haddr_t dmar_high;
+int haw;
+int dmar_tbl_pagecnt;
+
+vm_page_t
+dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
+{
+ vm_page_t m;
+ int zeroed;
+
+ zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
+ for (;;) {
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_lookup(obj, idx);
+ if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) {
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ break;
+ }
+ m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY |
+ VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0,
+ dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ if (m != NULL) {
+ if (zeroed && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ atomic_add_int(&dmar_tbl_pagecnt, 1);
+ break;
+ }
+ if ((flags & DMAR_PGF_WAITOK) == 0)
+ break;
+ if ((flags & DMAR_PGF_OBJL) != 0)
+ VM_OBJECT_WUNLOCK(obj);
+ VM_WAIT;
+ if ((flags & DMAR_PGF_OBJL) != 0)
+ VM_OBJECT_WLOCK(obj);
+ }
+ return (m);
+}
+
+void
+dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
+{
+ vm_page_t m;
+
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_lookup(obj, idx);
+ if (m != NULL) {
+ vm_page_free(m);
+ atomic_subtract_int(&dmar_tbl_pagecnt, 1);
+ }
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+}
+
+void *
+dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
+ struct sf_buf **sf)
+{
+ vm_page_t m;
+ bool allocated;
+
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_lookup(obj, idx);
+ if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) {
+ m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL);
+ allocated = true;
+ } else
+ allocated = false;
+ if (m == NULL) {
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return (NULL);
+ }
+ /* Sleepable allocations cannot fail. */
+ if ((flags & DMAR_PGF_WAITOK) != 0)
+ VM_OBJECT_WUNLOCK(obj);
+ sched_pin();
+ *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK)
+ == 0 ? SFB_NOWAIT : 0));
+ if (*sf == NULL) {
+ sched_unpin();
+ if (allocated) {
+ VM_OBJECT_ASSERT_WLOCKED(obj);
+ dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL);
+ }
+ if ((flags & DMAR_PGF_OBJL) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return (NULL);
+ }
+ if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) ==
+ (DMAR_PGF_WAITOK | DMAR_PGF_OBJL))
+ VM_OBJECT_WLOCK(obj);
+ else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0)
+ VM_OBJECT_WUNLOCK(obj);
+ return ((void *)sf_buf_kva(*sf));
+}
+
+void
+dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent)
+{
+ vm_page_t m;
+
+ m = sf_buf_page(sf);
+ sf_buf_free(sf);
+ sched_unpin();
+
+ /*
+ * If DMAR does not snoop paging structures accesses, flush
+ * CPU cache to memory.
+ */
+ if (!coherent)
+ pmap_invalidate_cache_pages(&m, 1);
+}
+
+/*
+ * Load the root entry pointer into the hardware, busily waiting for
+ * the completion.
+ */
+int
+dmar_load_root_entry_ptr(struct dmar_unit *unit)
+{
+ vm_page_t root_entry;
+
+ /*
+ * Access to the GCMD register must be serialized while the
+ * command is submitted.
+ */
+ DMAR_ASSERT_LOCKED(unit);
+
+ /* VM_OBJECT_RLOCK(unit->ctx_obj); */
+ VM_OBJECT_WLOCK(unit->ctx_obj);
+ root_entry = vm_page_lookup(unit->ctx_obj, 0);
+ /* VM_OBJECT_RUNLOCK(unit->ctx_obj); */
+ VM_OBJECT_WUNLOCK(unit->ctx_obj);
+ dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry));
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0)
+ cpu_spinwait();
+ return (0);
+}
+
+/*
+ * Globally invalidate the context entries cache, busily waiting for
+ * the completion.
+ */
+int
+dmar_inv_ctx_glob(struct dmar_unit *unit)
+{
+
+ /*
+ * Access to the CCMD register must be serialized while the
+ * command is submitted.
+ */
+ DMAR_ASSERT_LOCKED(unit);
+ KASSERT(!unit->qi_enabled, ("QI enabled"));
+
+ /*
+ * The DMAR_CCMD_ICC bit in the upper dword should be written
+ * after the low dword write is completed. Amd64
+ * dmar_write8() does not have this issue, i386 dmar_write8()
+ * writes the upper dword last.
+ */
+ dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0)
+ cpu_spinwait();
+ return (0);
+}
+
+/*
+ * Globally invalidate the IOTLB, busily waiting for the completion.
+ */
+int
+dmar_inv_iotlb_glob(struct dmar_unit *unit)
+{
+ int reg;
+
+ DMAR_ASSERT_LOCKED(unit);
+ KASSERT(!unit->qi_enabled, ("QI enabled"));
+
+ reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap);
+ /* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */
+ dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT |
+ DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) &
+ DMAR_IOTLB_IVT32) != 0)
+ cpu_spinwait();
+ return (0);
+}
+
+/*
+ * Flush the chipset write buffers. See 11.1 "Write Buffer Flushing"
+ * in the architecture specification.
+ */
+int
+dmar_flush_write_bufs(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+
+ /*
+ * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported.
+ */
+ KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0,
+ ("dmar%d: no RWBF", unit->unit));
+
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0)
+ cpu_spinwait();
+ return (0);
+}
+
+int
+dmar_enable_translation(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ unit->hw_gcmd |= DMAR_GCMD_TE;
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0)
+ cpu_spinwait();
+ return (0);
+}
+
+int
+dmar_disable_translation(struct dmar_unit *unit)
+{
+
+ DMAR_ASSERT_LOCKED(unit);
+ unit->hw_gcmd &= ~DMAR_GCMD_TE;
+ dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd);
+ /* XXXKIB should have a timeout */
+ while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0)
+ cpu_spinwait();
+ return (0);
+}
+
+#define BARRIER_F \
+ u_int f_done, f_inproc, f_wakeup; \
+ \
+ f_done = 1 << (barrier_id * 3); \
+ f_inproc = 1 << (barrier_id * 3 + 1); \
+ f_wakeup = 1 << (barrier_id * 3 + 2)
+
+bool
+dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id)
+{
+ BARRIER_F;
+
+ DMAR_LOCK(dmar);
+ if ((dmar->barrier_flags & f_done) != 0) {
+ DMAR_UNLOCK(dmar);
+ return (false);
+ }
+
+ if ((dmar->barrier_flags & f_inproc) != 0) {
+ while ((dmar->barrier_flags & f_inproc) != 0) {
+ dmar->barrier_flags |= f_wakeup;
+ msleep(&dmar->barrier_flags, &dmar->lock, 0,
+ "dmarb", 0);
+ }
+ KASSERT((dmar->barrier_flags & f_done) != 0,
+ ("dmar%d barrier %d missing done", dmar->unit, barrier_id));
+ DMAR_UNLOCK(dmar);
+ return (false);
+ }
+
+ dmar->barrier_flags |= f_inproc;
+ DMAR_UNLOCK(dmar);
+ return (true);
+}
+
+void
+dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id)
+{
+ BARRIER_F;
+
+ DMAR_ASSERT_LOCKED(dmar);
+ KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc,
+ ("dmar%d barrier %d missed entry", dmar->unit, barrier_id));
+ dmar->barrier_flags |= f_done;
+ if ((dmar->barrier_flags & f_wakeup) != 0)
+ wakeup(&dmar->barrier_flags);
+ dmar->barrier_flags &= ~(f_inproc | f_wakeup);
+ DMAR_UNLOCK(dmar);
+}
+
+int dmar_match_verbose;
+
+static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL,
+ "");
+SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD | CTLFLAG_TUN,
+ &dmar_tbl_pagecnt, 0,
+ "Count of pages used for DMAR pagetables");
+SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RW | CTLFLAG_TUN,
+ &dmar_match_verbose, 0,
+ "Verbose matching of the PCI devices to DMAR paths");
+#ifdef INVARIANTS
+int dmar_check_free;
+SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RW | CTLFLAG_TUN,
+ &dmar_check_free, 0,
+ "Check the GPA RBtree for free_down and free_after validity");
+#endif
+
OpenPOWER on IntegriCloud