diff options
Diffstat (limited to 'sys/x86/iommu/intel_utils.c')
-rw-r--r-- | sys/x86/iommu/intel_utils.c | 563 |
1 files changed, 563 insertions, 0 deletions
diff --git a/sys/x86/iommu/intel_utils.c b/sys/x86/iommu/intel_utils.c new file mode 100644 index 0000000..d81ec04 --- /dev/null +++ b/sys/x86/iommu/intel_utils.c @@ -0,0 +1,563 @@ +/*- + * Copyright (c) 2013 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/memdesc.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/rman.h> +#include <sys/rwlock.h> +#include <sys/sched.h> +#include <sys/sf_buf.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/taskqueue.h> +#include <sys/tree.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/vm_pageout.h> +#include <machine/bus.h> +#include <machine/cpu.h> +#include <x86/include/busdma_impl.h> +#include <x86/iommu/intel_reg.h> +#include <x86/iommu/busdma_dmar.h> +#include <x86/iommu/intel_dmar.h> + +u_int +dmar_nd2mask(u_int nd) +{ + static const u_int masks[] = { + 0x000f, /* nd == 0 */ + 0x002f, /* nd == 1 */ + 0x00ff, /* nd == 2 */ + 0x02ff, /* nd == 3 */ + 0x0fff, /* nd == 4 */ + 0x2fff, /* nd == 5 */ + 0xffff, /* nd == 6 */ + 0x0000, /* nd == 7 reserved */ + }; + + KASSERT(nd <= 6, ("number of domains %d", nd)); + return (masks[nd]); +} + +static const struct sagaw_bits_tag { + int agaw; + int cap; + int awlvl; + int pglvl; +} sagaw_bits[] = { + {.agaw = 30, .cap = DMAR_CAP_SAGAW_2LVL, .awlvl = DMAR_CTX2_AW_2LVL, + .pglvl = 2}, + {.agaw = 39, .cap = DMAR_CAP_SAGAW_3LVL, .awlvl = DMAR_CTX2_AW_3LVL, + .pglvl = 3}, + {.agaw = 48, .cap = DMAR_CAP_SAGAW_4LVL, .awlvl = DMAR_CTX2_AW_4LVL, + .pglvl = 4}, + {.agaw = 57, .cap = DMAR_CAP_SAGAW_5LVL, .awlvl = DMAR_CTX2_AW_5LVL, + .pglvl = 5}, + {.agaw = 64, .cap = DMAR_CAP_SAGAW_6LVL, .awlvl = DMAR_CTX2_AW_6LVL, + .pglvl = 6} +}; +#define SIZEOF_SAGAW_BITS (sizeof(sagaw_bits) / sizeof(sagaw_bits[0])) + +bool +dmar_pglvl_supported(struct dmar_unit *unit, int pglvl) +{ + int i; + + for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { + if (sagaw_bits[i].pglvl != pglvl) + continue; + if ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0) + return (true); + } + return (false); +} + +int +ctx_set_agaw(struct dmar_ctx *ctx, int mgaw) +{ + int sagaw, i; + + ctx->mgaw = mgaw; + sagaw = DMAR_CAP_SAGAW(ctx->dmar->hw_cap); + for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { + if (sagaw_bits[i].agaw >= mgaw) { + ctx->agaw = sagaw_bits[i].agaw; + ctx->pglvl = sagaw_bits[i].pglvl; + ctx->awlvl = sagaw_bits[i].awlvl; + return (0); + } + } + device_printf(ctx->dmar->dev, + "context request mgaw %d for pci%d:%d:%d:%d, " + "no agaw found, sagaw %x\n", mgaw, ctx->dmar->segment, ctx->bus, + ctx->slot, ctx->func, sagaw); + return (EINVAL); +} + +/* + * Find a best fit mgaw for the given maxaddr: + * - if allow_less is false, must find sagaw which maps all requested + * addresses (used by identity mappings); + * - if allow_less is true, and no supported sagaw can map all requested + * address space, accept the biggest sagaw, whatever is it. + */ +int +dmar_maxaddr2mgaw(struct dmar_unit *unit, dmar_gaddr_t maxaddr, bool allow_less) +{ + int i; + + for (i = 0; i < SIZEOF_SAGAW_BITS; i++) { + if ((1ULL << sagaw_bits[i].agaw) >= maxaddr && + (DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) != 0) + break; + } + if (allow_less && i == SIZEOF_SAGAW_BITS) { + do { + i--; + } while ((DMAR_CAP_SAGAW(unit->hw_cap) & sagaw_bits[i].cap) + == 0); + } + if (i < SIZEOF_SAGAW_BITS) + return (sagaw_bits[i].agaw); + KASSERT(0, ("no mgaw for maxaddr %jx allow_less %d", + (uintmax_t) maxaddr, allow_less)); + return (-1); +} + +/* + * Calculate the total amount of page table pages needed to map the + * whole bus address space on the context with the selected agaw. + */ +vm_pindex_t +pglvl_max_pages(int pglvl) +{ + vm_pindex_t res; + int i; + + for (res = 0, i = pglvl; i > 0; i--) { + res *= DMAR_NPTEPG; + res++; + } + return (res); +} + +/* + * Return true if the page table level lvl supports the superpage for + * the context ctx. + */ +int +ctx_is_sp_lvl(struct dmar_ctx *ctx, int lvl) +{ + int alvl, cap_sps; + static const int sagaw_sp[] = { + DMAR_CAP_SPS_2M, + DMAR_CAP_SPS_1G, + DMAR_CAP_SPS_512G, + DMAR_CAP_SPS_1T + }; + + alvl = ctx->pglvl - lvl - 1; + cap_sps = DMAR_CAP_SPS(ctx->dmar->hw_cap); + return (alvl < sizeof(sagaw_sp) / sizeof(sagaw_sp[0]) && + (sagaw_sp[alvl] & cap_sps) != 0); +} + +dmar_gaddr_t +pglvl_page_size(int total_pglvl, int lvl) +{ + int rlvl; + static const dmar_gaddr_t pg_sz[] = { + (dmar_gaddr_t)DMAR_PAGE_SIZE, + (dmar_gaddr_t)DMAR_PAGE_SIZE << DMAR_NPTEPGSHIFT, + (dmar_gaddr_t)DMAR_PAGE_SIZE << (2 * DMAR_NPTEPGSHIFT), + (dmar_gaddr_t)DMAR_PAGE_SIZE << (3 * DMAR_NPTEPGSHIFT), + (dmar_gaddr_t)DMAR_PAGE_SIZE << (4 * DMAR_NPTEPGSHIFT), + (dmar_gaddr_t)DMAR_PAGE_SIZE << (5 * DMAR_NPTEPGSHIFT) + }; + + KASSERT(lvl >= 0 && lvl < total_pglvl, + ("total %d lvl %d", total_pglvl, lvl)); + rlvl = total_pglvl - lvl - 1; + KASSERT(rlvl < sizeof(pg_sz) / sizeof(pg_sz[0]), + ("sizeof pg_sz lvl %d", lvl)); + return (pg_sz[rlvl]); +} + +dmar_gaddr_t +ctx_page_size(struct dmar_ctx *ctx, int lvl) +{ + + return (pglvl_page_size(ctx->pglvl, lvl)); +} + +int +calc_am(struct dmar_unit *unit, dmar_gaddr_t base, dmar_gaddr_t size, + dmar_gaddr_t *isizep) +{ + dmar_gaddr_t isize; + int am; + + for (am = DMAR_CAP_MAMV(unit->hw_cap);; am--) { + isize = 1ULL << (am + DMAR_PAGE_SHIFT); + if ((base & (isize - 1)) == 0 && size >= isize) + break; + if (am == 0) + break; + } + *isizep = isize; + return (am); +} + +dmar_haddr_t dmar_high; +int haw; +int dmar_tbl_pagecnt; + +vm_page_t +dmar_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags) +{ + vm_page_t m; + int zeroed; + + zeroed = (flags & DMAR_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0; + for (;;) { + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_lookup(obj, idx); + if ((flags & DMAR_PGF_NOALLOC) != 0 || m != NULL) { + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + break; + } + m = vm_page_alloc_contig(obj, idx, VM_ALLOC_NOBUSY | + VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP | zeroed, 1, 0, + dmar_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + if (m != NULL) { + if (zeroed && (m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + atomic_add_int(&dmar_tbl_pagecnt, 1); + break; + } + if ((flags & DMAR_PGF_WAITOK) == 0) + break; + if ((flags & DMAR_PGF_OBJL) != 0) + VM_OBJECT_WUNLOCK(obj); + VM_WAIT; + if ((flags & DMAR_PGF_OBJL) != 0) + VM_OBJECT_WLOCK(obj); + } + return (m); +} + +void +dmar_pgfree(vm_object_t obj, vm_pindex_t idx, int flags) +{ + vm_page_t m; + + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_lookup(obj, idx); + if (m != NULL) { + vm_page_free(m); + atomic_subtract_int(&dmar_tbl_pagecnt, 1); + } + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); +} + +void * +dmar_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags, + struct sf_buf **sf) +{ + vm_page_t m; + bool allocated; + + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WLOCK(obj); + m = vm_page_lookup(obj, idx); + if (m == NULL && (flags & DMAR_PGF_ALLOC) != 0) { + m = dmar_pgalloc(obj, idx, flags | DMAR_PGF_OBJL); + allocated = true; + } else + allocated = false; + if (m == NULL) { + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + return (NULL); + } + /* Sleepable allocations cannot fail. */ + if ((flags & DMAR_PGF_WAITOK) != 0) + VM_OBJECT_WUNLOCK(obj); + sched_pin(); + *sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & DMAR_PGF_WAITOK) + == 0 ? SFB_NOWAIT : 0)); + if (*sf == NULL) { + sched_unpin(); + if (allocated) { + VM_OBJECT_ASSERT_WLOCKED(obj); + dmar_pgfree(obj, m->pindex, flags | DMAR_PGF_OBJL); + } + if ((flags & DMAR_PGF_OBJL) == 0) + VM_OBJECT_WUNLOCK(obj); + return (NULL); + } + if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == + (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) + VM_OBJECT_WLOCK(obj); + else if ((flags & (DMAR_PGF_WAITOK | DMAR_PGF_OBJL)) == 0) + VM_OBJECT_WUNLOCK(obj); + return ((void *)sf_buf_kva(*sf)); +} + +void +dmar_unmap_pgtbl(struct sf_buf *sf, bool coherent) +{ + vm_page_t m; + + m = sf_buf_page(sf); + sf_buf_free(sf); + sched_unpin(); + + /* + * If DMAR does not snoop paging structures accesses, flush + * CPU cache to memory. + */ + if (!coherent) + pmap_invalidate_cache_pages(&m, 1); +} + +/* + * Load the root entry pointer into the hardware, busily waiting for + * the completion. + */ +int +dmar_load_root_entry_ptr(struct dmar_unit *unit) +{ + vm_page_t root_entry; + + /* + * Access to the GCMD register must be serialized while the + * command is submitted. + */ + DMAR_ASSERT_LOCKED(unit); + + /* VM_OBJECT_RLOCK(unit->ctx_obj); */ + VM_OBJECT_WLOCK(unit->ctx_obj); + root_entry = vm_page_lookup(unit->ctx_obj, 0); + /* VM_OBJECT_RUNLOCK(unit->ctx_obj); */ + VM_OBJECT_WUNLOCK(unit->ctx_obj); + dmar_write8(unit, DMAR_RTADDR_REG, VM_PAGE_TO_PHYS(root_entry)); + dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_SRTP); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_RTPS) == 0) + cpu_spinwait(); + return (0); +} + +/* + * Globally invalidate the context entries cache, busily waiting for + * the completion. + */ +int +dmar_inv_ctx_glob(struct dmar_unit *unit) +{ + + /* + * Access to the CCMD register must be serialized while the + * command is submitted. + */ + DMAR_ASSERT_LOCKED(unit); + KASSERT(!unit->qi_enabled, ("QI enabled")); + + /* + * The DMAR_CCMD_ICC bit in the upper dword should be written + * after the low dword write is completed. Amd64 + * dmar_write8() does not have this issue, i386 dmar_write8() + * writes the upper dword last. + */ + dmar_write8(unit, DMAR_CCMD_REG, DMAR_CCMD_ICC | DMAR_CCMD_CIRG_GLOB); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_CCMD_REG + 4) & DMAR_CCMD_ICC32) != 0) + cpu_spinwait(); + return (0); +} + +/* + * Globally invalidate the IOTLB, busily waiting for the completion. + */ +int +dmar_inv_iotlb_glob(struct dmar_unit *unit) +{ + int reg; + + DMAR_ASSERT_LOCKED(unit); + KASSERT(!unit->qi_enabled, ("QI enabled")); + + reg = 16 * DMAR_ECAP_IRO(unit->hw_ecap); + /* See a comment about DMAR_CCMD_ICC in dmar_inv_ctx_glob. */ + dmar_write8(unit, reg + DMAR_IOTLB_REG_OFF, DMAR_IOTLB_IVT | + DMAR_IOTLB_IIRG_GLB | DMAR_IOTLB_DR | DMAR_IOTLB_DW); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, reg + DMAR_IOTLB_REG_OFF + 4) & + DMAR_IOTLB_IVT32) != 0) + cpu_spinwait(); + return (0); +} + +/* + * Flush the chipset write buffers. See 11.1 "Write Buffer Flushing" + * in the architecture specification. + */ +int +dmar_flush_write_bufs(struct dmar_unit *unit) +{ + + DMAR_ASSERT_LOCKED(unit); + + /* + * DMAR_GCMD_WBF is only valid when CAP_RWBF is reported. + */ + KASSERT((unit->hw_cap & DMAR_CAP_RWBF) != 0, + ("dmar%d: no RWBF", unit->unit)); + + dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd | DMAR_GCMD_WBF); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_WBFS) == 0) + cpu_spinwait(); + return (0); +} + +int +dmar_enable_translation(struct dmar_unit *unit) +{ + + DMAR_ASSERT_LOCKED(unit); + unit->hw_gcmd |= DMAR_GCMD_TE; + dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) == 0) + cpu_spinwait(); + return (0); +} + +int +dmar_disable_translation(struct dmar_unit *unit) +{ + + DMAR_ASSERT_LOCKED(unit); + unit->hw_gcmd &= ~DMAR_GCMD_TE; + dmar_write4(unit, DMAR_GCMD_REG, unit->hw_gcmd); + /* XXXKIB should have a timeout */ + while ((dmar_read4(unit, DMAR_GSTS_REG) & DMAR_GSTS_TES) != 0) + cpu_spinwait(); + return (0); +} + +#define BARRIER_F \ + u_int f_done, f_inproc, f_wakeup; \ + \ + f_done = 1 << (barrier_id * 3); \ + f_inproc = 1 << (barrier_id * 3 + 1); \ + f_wakeup = 1 << (barrier_id * 3 + 2) + +bool +dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id) +{ + BARRIER_F; + + DMAR_LOCK(dmar); + if ((dmar->barrier_flags & f_done) != 0) { + DMAR_UNLOCK(dmar); + return (false); + } + + if ((dmar->barrier_flags & f_inproc) != 0) { + while ((dmar->barrier_flags & f_inproc) != 0) { + dmar->barrier_flags |= f_wakeup; + msleep(&dmar->barrier_flags, &dmar->lock, 0, + "dmarb", 0); + } + KASSERT((dmar->barrier_flags & f_done) != 0, + ("dmar%d barrier %d missing done", dmar->unit, barrier_id)); + DMAR_UNLOCK(dmar); + return (false); + } + + dmar->barrier_flags |= f_inproc; + DMAR_UNLOCK(dmar); + return (true); +} + +void +dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id) +{ + BARRIER_F; + + DMAR_ASSERT_LOCKED(dmar); + KASSERT((dmar->barrier_flags & (f_done | f_inproc)) == f_inproc, + ("dmar%d barrier %d missed entry", dmar->unit, barrier_id)); + dmar->barrier_flags |= f_done; + if ((dmar->barrier_flags & f_wakeup) != 0) + wakeup(&dmar->barrier_flags); + dmar->barrier_flags &= ~(f_inproc | f_wakeup); + DMAR_UNLOCK(dmar); +} + +int dmar_match_verbose; + +static SYSCTL_NODE(_hw, OID_AUTO, dmar, CTLFLAG_RD, NULL, + ""); +SYSCTL_INT(_hw_dmar, OID_AUTO, tbl_pagecnt, CTLFLAG_RD | CTLFLAG_TUN, + &dmar_tbl_pagecnt, 0, + "Count of pages used for DMAR pagetables"); +SYSCTL_INT(_hw_dmar, OID_AUTO, match_verbose, CTLFLAG_RW | CTLFLAG_TUN, + &dmar_match_verbose, 0, + "Verbose matching of the PCI devices to DMAR paths"); +#ifdef INVARIANTS +int dmar_check_free; +SYSCTL_INT(_hw_dmar, OID_AUTO, check_free, CTLFLAG_RW | CTLFLAG_TUN, + &dmar_check_free, 0, + "Check the GPA RBtree for free_down and free_after validity"); +#endif + |