summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2016-05-25 07:09:54 +0000
committermav <mav@FreeBSD.org>2016-05-25 07:09:54 +0000
commitd748f0adede2aeb8cb93f4f727575df0987fec63 (patch)
treec3ddc45d3097fc4ebffb152e65e728afd2d6e73f
parent9067c52647a790e0945bbadd59190a5e1be02e48 (diff)
downloadFreeBSD-src-d748f0adede2aeb8cb93f4f727575df0987fec63.zip
FreeBSD-src-d748f0adede2aeb8cb93f4f727575df0987fec63.tar.gz
MFC ioat(4) driver in its present state.
-rw-r--r--share/man/man4/Makefile2
-rw-r--r--sys/conf/files.amd642
-rw-r--r--sys/dev/ioat/ioat.c2091
-rw-r--r--sys/dev/ioat/ioat.h218
-rw-r--r--sys/dev/ioat/ioat_hw.h167
-rw-r--r--sys/dev/ioat/ioat_internal.h600
-rw-r--r--sys/dev/ioat/ioat_test.c602
-rw-r--r--sys/dev/ioat/ioat_test.h90
-rw-r--r--sys/modules/Makefile2
-rw-r--r--sys/modules/ioat/Makefile15
-rw-r--r--tools/tools/ioat/Makefile9
-rw-r--r--tools/tools/ioat/ioatcontrol.8182
-rw-r--r--tools/tools/ioat/ioatcontrol.c258
13 files changed, 4238 insertions, 0 deletions
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
index 2c3f632..9d3684d 100644
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -199,6 +199,7 @@ MAN= aac.4 \
intpm.4 \
intro.4 \
${_io.4} \
+ ${_ioat.4} \
ip.4 \
ip6.4 \
ipfirewall.4 \
@@ -797,6 +798,7 @@ MLINKS+=lindev.4 full.4
.if ${MACHINE_CPUARCH} == "amd64"
_if_ntb.4= if_ntb.4
+_ioat.4= ioat.4
_ntb.4= ntb.4
_ntb_hw.4= ntb_hw.4
_qlxge.4= qlxge.4
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 7f961ec..eb8f47e 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -203,6 +203,8 @@ dev/if_ndis/if_ndis_pccard.c optional ndis pccard
dev/if_ndis/if_ndis_pci.c optional ndis cardbus | ndis pci
dev/if_ndis/if_ndis_usb.c optional ndis usb
dev/io/iodev.c optional io
+dev/ioat/ioat.c optional ioat pci
+dev/ioat/ioat_test.c optional ioat pci
dev/ipmi/ipmi.c optional ipmi
dev/ipmi/ipmi_acpi.c optional ipmi acpi
dev/ipmi/ipmi_isa.c optional ipmi isa
diff --git a/sys/dev/ioat/ioat.c b/sys/dev/ioat/ioat.c
new file mode 100644
index 0000000..aff048a
--- /dev/null
+++ b/sys/dev/ioat/ioat.c
@@ -0,0 +1,2091 @@
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/ioccom.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/sbuf.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/time.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/stdarg.h>
+
+#include "ioat.h"
+#include "ioat_hw.h"
+#include "ioat_internal.h"
+
+#ifndef BUS_SPACE_MAXADDR_40BIT
+#define BUS_SPACE_MAXADDR_40BIT 0xFFFFFFFFFFULL
+#endif
+#define IOAT_INTR_TIMO (hz / 10)
+#define IOAT_REFLK (&ioat->submit_lock)
+
+static int ioat_probe(device_t device);
+static int ioat_attach(device_t device);
+static int ioat_detach(device_t device);
+static int ioat_setup_intr(struct ioat_softc *ioat);
+static int ioat_teardown_intr(struct ioat_softc *ioat);
+static int ioat3_attach(device_t device);
+static int ioat_start_channel(struct ioat_softc *ioat);
+static int ioat_map_pci_bar(struct ioat_softc *ioat);
+static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg,
+ int error);
+static void ioat_interrupt_handler(void *arg);
+static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat);
+static int chanerr_to_errno(uint32_t);
+static void ioat_process_events(struct ioat_softc *ioat);
+static inline uint32_t ioat_get_active(struct ioat_softc *ioat);
+static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat);
+static void ioat_free_ring(struct ioat_softc *, uint32_t size,
+ struct ioat_descriptor **);
+static void ioat_free_ring_entry(struct ioat_softc *ioat,
+ struct ioat_descriptor *desc);
+static struct ioat_descriptor *ioat_alloc_ring_entry(struct ioat_softc *,
+ int mflags);
+static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags);
+static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *ioat,
+ uint32_t index);
+static struct ioat_descriptor **ioat_prealloc_ring(struct ioat_softc *,
+ uint32_t size, boolean_t need_dscr, int mflags);
+static int ring_grow(struct ioat_softc *, uint32_t oldorder,
+ struct ioat_descriptor **);
+static int ring_shrink(struct ioat_softc *, uint32_t oldorder,
+ struct ioat_descriptor **);
+static void ioat_halted_debug(struct ioat_softc *, uint32_t);
+static void ioat_timer_callback(void *arg);
+static void dump_descriptor(void *hw_desc);
+static void ioat_submit_single(struct ioat_softc *ioat);
+static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg,
+ int error);
+static int ioat_reset_hw(struct ioat_softc *ioat);
+static void ioat_reset_hw_task(void *, int);
+static void ioat_setup_sysctl(device_t device);
+static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS);
+static inline struct ioat_softc *ioat_get(struct ioat_softc *,
+ enum ioat_ref_kind);
+static inline void ioat_put(struct ioat_softc *, enum ioat_ref_kind);
+static inline void _ioat_putn(struct ioat_softc *, uint32_t,
+ enum ioat_ref_kind, boolean_t);
+static inline void ioat_putn(struct ioat_softc *, uint32_t,
+ enum ioat_ref_kind);
+static inline void ioat_putn_locked(struct ioat_softc *, uint32_t,
+ enum ioat_ref_kind);
+static void ioat_drain_locked(struct ioat_softc *);
+
+#define ioat_log_message(v, ...) do { \
+ if ((v) <= g_ioat_debug_level) { \
+ device_printf(ioat->device, __VA_ARGS__); \
+ } \
+} while (0)
+
+MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations");
+SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD, 0, "ioat node");
+
+static int g_force_legacy_interrupts;
+SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN,
+ &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled");
+
+int g_ioat_debug_level = 0;
+SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level,
+ 0, "Set log level (0-3) for ioat(4). Higher is more verbose.");
+
+/*
+ * OS <-> Driver interface structures
+ */
+static device_method_t ioat_pci_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, ioat_probe),
+ DEVMETHOD(device_attach, ioat_attach),
+ DEVMETHOD(device_detach, ioat_detach),
+ DEVMETHOD_END
+};
+
+static driver_t ioat_pci_driver = {
+ "ioat",
+ ioat_pci_methods,
+ sizeof(struct ioat_softc),
+};
+
+static devclass_t ioat_devclass;
+DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0);
+MODULE_VERSION(ioat, 1);
+
+/*
+ * Private data structures
+ */
+static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS];
+static int ioat_channel_index = 0;
+SYSCTL_INT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0,
+ "Number of IOAT channels attached");
+
+static struct _pcsid
+{
+ u_int32_t type;
+ const char *desc;
+} pci_ids[] = {
+ { 0x34308086, "TBG IOAT Ch0" },
+ { 0x34318086, "TBG IOAT Ch1" },
+ { 0x34328086, "TBG IOAT Ch2" },
+ { 0x34338086, "TBG IOAT Ch3" },
+ { 0x34298086, "TBG IOAT Ch4" },
+ { 0x342a8086, "TBG IOAT Ch5" },
+ { 0x342b8086, "TBG IOAT Ch6" },
+ { 0x342c8086, "TBG IOAT Ch7" },
+
+ { 0x37108086, "JSF IOAT Ch0" },
+ { 0x37118086, "JSF IOAT Ch1" },
+ { 0x37128086, "JSF IOAT Ch2" },
+ { 0x37138086, "JSF IOAT Ch3" },
+ { 0x37148086, "JSF IOAT Ch4" },
+ { 0x37158086, "JSF IOAT Ch5" },
+ { 0x37168086, "JSF IOAT Ch6" },
+ { 0x37178086, "JSF IOAT Ch7" },
+ { 0x37188086, "JSF IOAT Ch0 (RAID)" },
+ { 0x37198086, "JSF IOAT Ch1 (RAID)" },
+
+ { 0x3c208086, "SNB IOAT Ch0" },
+ { 0x3c218086, "SNB IOAT Ch1" },
+ { 0x3c228086, "SNB IOAT Ch2" },
+ { 0x3c238086, "SNB IOAT Ch3" },
+ { 0x3c248086, "SNB IOAT Ch4" },
+ { 0x3c258086, "SNB IOAT Ch5" },
+ { 0x3c268086, "SNB IOAT Ch6" },
+ { 0x3c278086, "SNB IOAT Ch7" },
+ { 0x3c2e8086, "SNB IOAT Ch0 (RAID)" },
+ { 0x3c2f8086, "SNB IOAT Ch1 (RAID)" },
+
+ { 0x0e208086, "IVB IOAT Ch0" },
+ { 0x0e218086, "IVB IOAT Ch1" },
+ { 0x0e228086, "IVB IOAT Ch2" },
+ { 0x0e238086, "IVB IOAT Ch3" },
+ { 0x0e248086, "IVB IOAT Ch4" },
+ { 0x0e258086, "IVB IOAT Ch5" },
+ { 0x0e268086, "IVB IOAT Ch6" },
+ { 0x0e278086, "IVB IOAT Ch7" },
+ { 0x0e2e8086, "IVB IOAT Ch0 (RAID)" },
+ { 0x0e2f8086, "IVB IOAT Ch1 (RAID)" },
+
+ { 0x2f208086, "HSW IOAT Ch0" },
+ { 0x2f218086, "HSW IOAT Ch1" },
+ { 0x2f228086, "HSW IOAT Ch2" },
+ { 0x2f238086, "HSW IOAT Ch3" },
+ { 0x2f248086, "HSW IOAT Ch4" },
+ { 0x2f258086, "HSW IOAT Ch5" },
+ { 0x2f268086, "HSW IOAT Ch6" },
+ { 0x2f278086, "HSW IOAT Ch7" },
+ { 0x2f2e8086, "HSW IOAT Ch0 (RAID)" },
+ { 0x2f2f8086, "HSW IOAT Ch1 (RAID)" },
+
+ { 0x0c508086, "BWD IOAT Ch0" },
+ { 0x0c518086, "BWD IOAT Ch1" },
+ { 0x0c528086, "BWD IOAT Ch2" },
+ { 0x0c538086, "BWD IOAT Ch3" },
+
+ { 0x6f508086, "BDXDE IOAT Ch0" },
+ { 0x6f518086, "BDXDE IOAT Ch1" },
+ { 0x6f528086, "BDXDE IOAT Ch2" },
+ { 0x6f538086, "BDXDE IOAT Ch3" },
+
+ { 0x6f208086, "BDX IOAT Ch0" },
+ { 0x6f218086, "BDX IOAT Ch1" },
+ { 0x6f228086, "BDX IOAT Ch2" },
+ { 0x6f238086, "BDX IOAT Ch3" },
+ { 0x6f248086, "BDX IOAT Ch4" },
+ { 0x6f258086, "BDX IOAT Ch5" },
+ { 0x6f268086, "BDX IOAT Ch6" },
+ { 0x6f278086, "BDX IOAT Ch7" },
+ { 0x6f2e8086, "BDX IOAT Ch0 (RAID)" },
+ { 0x6f2f8086, "BDX IOAT Ch1 (RAID)" },
+
+ { 0x00000000, NULL }
+};
+
+/*
+ * OS <-> Driver linkage functions
+ */
+static int
+ioat_probe(device_t device)
+{
+ struct _pcsid *ep;
+ u_int32_t type;
+
+ type = pci_get_devid(device);
+ for (ep = pci_ids; ep->type; ep++) {
+ if (ep->type == type) {
+ device_set_desc(device, ep->desc);
+ return (0);
+ }
+ }
+ return (ENXIO);
+}
+
+static int
+ioat_attach(device_t device)
+{
+ struct ioat_softc *ioat;
+ int error;
+
+ ioat = DEVICE2SOFTC(device);
+ ioat->device = device;
+
+ error = ioat_map_pci_bar(ioat);
+ if (error != 0)
+ goto err;
+
+ ioat->version = ioat_read_cbver(ioat);
+ if (ioat->version < IOAT_VER_3_0) {
+ error = ENODEV;
+ goto err;
+ }
+
+ error = ioat3_attach(device);
+ if (error != 0)
+ goto err;
+
+ error = pci_enable_busmaster(device);
+ if (error != 0)
+ goto err;
+
+ error = ioat_setup_intr(ioat);
+ if (error != 0)
+ goto err;
+
+ error = ioat_reset_hw(ioat);
+ if (error != 0)
+ goto err;
+
+ ioat_process_events(ioat);
+ ioat_setup_sysctl(device);
+
+ ioat->chan_idx = ioat_channel_index;
+ ioat_channel[ioat_channel_index++] = ioat;
+ ioat_test_attach();
+
+err:
+ if (error != 0)
+ ioat_detach(device);
+ return (error);
+}
+
+static int
+ioat_detach(device_t device)
+{
+ struct ioat_softc *ioat;
+
+ ioat = DEVICE2SOFTC(device);
+
+ ioat_test_detach();
+ taskqueue_drain(taskqueue_thread, &ioat->reset_task);
+
+ mtx_lock(IOAT_REFLK);
+ ioat->quiescing = TRUE;
+ ioat->destroying = TRUE;
+ wakeup(&ioat->quiescing);
+
+ ioat_channel[ioat->chan_idx] = NULL;
+
+ ioat_drain_locked(ioat);
+ mtx_unlock(IOAT_REFLK);
+
+ ioat_teardown_intr(ioat);
+ callout_drain(&ioat->timer);
+
+ pci_disable_busmaster(device);
+
+ if (ioat->pci_resource != NULL)
+ bus_release_resource(device, SYS_RES_MEMORY,
+ ioat->pci_resource_id, ioat->pci_resource);
+
+ if (ioat->ring != NULL)
+ ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring);
+
+ if (ioat->comp_update != NULL) {
+ bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map);
+ bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update,
+ ioat->comp_update_map);
+ bus_dma_tag_destroy(ioat->comp_update_tag);
+ }
+
+ bus_dma_tag_destroy(ioat->hw_desc_tag);
+
+ return (0);
+}
+
+static int
+ioat_teardown_intr(struct ioat_softc *ioat)
+{
+
+ if (ioat->tag != NULL)
+ bus_teardown_intr(ioat->device, ioat->res, ioat->tag);
+
+ if (ioat->res != NULL)
+ bus_release_resource(ioat->device, SYS_RES_IRQ,
+ rman_get_rid(ioat->res), ioat->res);
+
+ pci_release_msi(ioat->device);
+ return (0);
+}
+
+static int
+ioat_start_channel(struct ioat_softc *ioat)
+{
+ uint64_t status;
+ uint32_t chanerr;
+ int i;
+
+ ioat_acquire(&ioat->dmaengine);
+ ioat_null(&ioat->dmaengine, NULL, NULL, 0);
+ ioat_release(&ioat->dmaengine);
+
+ for (i = 0; i < 100; i++) {
+ DELAY(1);
+ status = ioat_get_chansts(ioat);
+ if (is_ioat_idle(status))
+ return (0);
+ }
+
+ chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
+ ioat_log_message(0, "could not start channel: "
+ "status = %#jx error = %b\n", (uintmax_t)status, (int)chanerr,
+ IOAT_CHANERR_STR);
+ return (ENXIO);
+}
+
+/*
+ * Initialize Hardware
+ */
+static int
+ioat3_attach(device_t device)
+{
+ struct ioat_softc *ioat;
+ struct ioat_descriptor **ring;
+ struct ioat_descriptor *next;
+ struct ioat_dma_hw_descriptor *dma_hw_desc;
+ int i, num_descriptors;
+ int error;
+ uint8_t xfercap;
+
+ error = 0;
+ ioat = DEVICE2SOFTC(device);
+ ioat->capabilities = ioat_read_dmacapability(ioat);
+
+ ioat_log_message(1, "Capabilities: %b\n", (int)ioat->capabilities,
+ IOAT_DMACAP_STR);
+
+ xfercap = ioat_read_xfercap(ioat);
+ ioat->max_xfer_size = 1 << xfercap;
+
+ ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) &
+ IOAT_INTRDELAY_SUPPORTED) != 0;
+ if (ioat->intrdelay_supported)
+ ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK;
+
+ /* TODO: need to check DCA here if we ever do XOR/PQ */
+
+ mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF);
+ mtx_init(&ioat->cleanup_lock, "ioat_cleanup", NULL, MTX_DEF);
+ callout_init(&ioat->timer, 1);
+ TASK_INIT(&ioat->reset_task, 0, ioat_reset_hw_task, ioat);
+
+ /* Establish lock order for Witness */
+ mtx_lock(&ioat->submit_lock);
+ mtx_lock(&ioat->cleanup_lock);
+ mtx_unlock(&ioat->cleanup_lock);
+ mtx_unlock(&ioat->submit_lock);
+
+ ioat->is_resize_pending = FALSE;
+ ioat->is_completion_pending = FALSE;
+ ioat->is_reset_pending = FALSE;
+ ioat->is_channel_running = FALSE;
+
+ bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0,
+ BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
+ sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL,
+ &ioat->comp_update_tag);
+
+ error = bus_dmamem_alloc(ioat->comp_update_tag,
+ (void **)&ioat->comp_update, BUS_DMA_ZERO, &ioat->comp_update_map);
+ if (ioat->comp_update == NULL)
+ return (ENOMEM);
+
+ error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map,
+ ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat,
+ 0);
+ if (error != 0)
+ return (error);
+
+ ioat->ring_size_order = IOAT_MIN_ORDER;
+
+ num_descriptors = 1 << ioat->ring_size_order;
+
+ bus_dma_tag_create(bus_get_dma_tag(ioat->device), 0x40, 0x0,
+ BUS_SPACE_MAXADDR_40BIT, BUS_SPACE_MAXADDR, NULL, NULL,
+ sizeof(struct ioat_dma_hw_descriptor), 1,
+ sizeof(struct ioat_dma_hw_descriptor), 0, NULL, NULL,
+ &ioat->hw_desc_tag);
+
+ ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT,
+ M_ZERO | M_WAITOK);
+
+ ring = ioat->ring;
+ for (i = 0; i < num_descriptors; i++) {
+ ring[i] = ioat_alloc_ring_entry(ioat, M_WAITOK);
+ if (ring[i] == NULL)
+ return (ENOMEM);
+
+ ring[i]->id = i;
+ }
+
+ for (i = 0; i < num_descriptors - 1; i++) {
+ next = ring[i + 1];
+ dma_hw_desc = ring[i]->u.dma;
+
+ dma_hw_desc->next = next->hw_desc_bus_addr;
+ }
+
+ ring[i]->u.dma->next = ring[0]->hw_desc_bus_addr;
+
+ ioat->head = ioat->hw_head = 0;
+ ioat->tail = 0;
+ ioat->last_seen = 0;
+ return (0);
+}
+
+static int
+ioat_map_pci_bar(struct ioat_softc *ioat)
+{
+
+ ioat->pci_resource_id = PCIR_BAR(0);
+ ioat->pci_resource = bus_alloc_resource_any(ioat->device,
+ SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE);
+
+ if (ioat->pci_resource == NULL) {
+ ioat_log_message(0, "unable to allocate pci resource\n");
+ return (ENODEV);
+ }
+
+ ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource);
+ ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource);
+ return (0);
+}
+
+static void
+ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
+{
+ struct ioat_softc *ioat = arg;
+
+ KASSERT(error == 0, ("%s: error:%d", __func__, error));
+ ioat->comp_update_bus_addr = seg[0].ds_addr;
+}
+
+static void
+ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+ bus_addr_t *baddr;
+
+ KASSERT(error == 0, ("%s: error:%d", __func__, error));
+ baddr = arg;
+ *baddr = segs->ds_addr;
+}
+
+/*
+ * Interrupt setup and handlers
+ */
+static int
+ioat_setup_intr(struct ioat_softc *ioat)
+{
+ uint32_t num_vectors;
+ int error;
+ boolean_t use_msix;
+ boolean_t force_legacy_interrupts;
+
+ use_msix = FALSE;
+ force_legacy_interrupts = FALSE;
+
+ if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) {
+ num_vectors = 1;
+ pci_alloc_msix(ioat->device, &num_vectors);
+ if (num_vectors == 1)
+ use_msix = TRUE;
+ }
+
+ if (use_msix) {
+ ioat->rid = 1;
+ ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
+ &ioat->rid, RF_ACTIVE);
+ } else {
+ ioat->rid = 0;
+ ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
+ &ioat->rid, RF_SHAREABLE | RF_ACTIVE);
+ }
+ if (ioat->res == NULL) {
+ ioat_log_message(0, "bus_alloc_resource failed\n");
+ return (ENOMEM);
+ }
+
+ ioat->tag = NULL;
+ error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE |
+ INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag);
+ if (error != 0) {
+ ioat_log_message(0, "bus_setup_intr failed\n");
+ return (error);
+ }
+
+ ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN);
+ return (0);
+}
+
+static boolean_t
+ioat_model_resets_msix(struct ioat_softc *ioat)
+{
+ u_int32_t pciid;
+
+ pciid = pci_get_devid(ioat->device);
+ switch (pciid) {
+ /* BWD: */
+ case 0x0c508086:
+ case 0x0c518086:
+ case 0x0c528086:
+ case 0x0c538086:
+ /* BDXDE: */
+ case 0x6f508086:
+ case 0x6f518086:
+ case 0x6f528086:
+ case 0x6f538086:
+ return (TRUE);
+ }
+
+ return (FALSE);
+}
+
+static void
+ioat_interrupt_handler(void *arg)
+{
+ struct ioat_softc *ioat = arg;
+
+ ioat->stats.interrupts++;
+ ioat_process_events(ioat);
+}
+
+static int
+chanerr_to_errno(uint32_t chanerr)
+{
+
+ if (chanerr == 0)
+ return (0);
+ if ((chanerr & (IOAT_CHANERR_XSADDERR | IOAT_CHANERR_XDADDERR)) != 0)
+ return (EFAULT);
+ if ((chanerr & (IOAT_CHANERR_RDERR | IOAT_CHANERR_WDERR)) != 0)
+ return (EIO);
+ /* This one is probably our fault: */
+ if ((chanerr & IOAT_CHANERR_NDADDERR) != 0)
+ return (EIO);
+ return (EIO);
+}
+
+static void
+ioat_process_events(struct ioat_softc *ioat)
+{
+ struct ioat_descriptor *desc;
+ struct bus_dmadesc *dmadesc;
+ uint64_t comp_update, status;
+ uint32_t completed, chanerr;
+ int error;
+
+ mtx_lock(&ioat->cleanup_lock);
+
+ completed = 0;
+ comp_update = *ioat->comp_update;
+ status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
+
+ CTR0(KTR_IOAT, __func__);
+
+ if (status == ioat->last_seen) {
+ /*
+ * If we landed in process_events and nothing has been
+ * completed, check for a timeout due to channel halt.
+ */
+ comp_update = ioat_get_chansts(ioat);
+ goto out;
+ }
+
+ while (1) {
+ desc = ioat_get_ring_entry(ioat, ioat->tail);
+ dmadesc = &desc->bus_dmadesc;
+ CTR1(KTR_IOAT, "completing desc %d", ioat->tail);
+
+ if (dmadesc->callback_fn != NULL)
+ dmadesc->callback_fn(dmadesc->callback_arg, 0);
+
+ completed++;
+ ioat->tail++;
+ if (desc->hw_desc_bus_addr == status)
+ break;
+ }
+
+ ioat->last_seen = desc->hw_desc_bus_addr;
+
+ if (ioat->head == ioat->tail) {
+ ioat->is_completion_pending = FALSE;
+ callout_reset(&ioat->timer, IOAT_INTR_TIMO,
+ ioat_timer_callback, ioat);
+ }
+
+ ioat->stats.descriptors_processed += completed;
+
+out:
+ ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
+ mtx_unlock(&ioat->cleanup_lock);
+
+ if (completed != 0) {
+ ioat_putn(ioat, completed, IOAT_ACTIVE_DESCR_REF);
+ wakeup(&ioat->tail);
+ }
+
+ if (!is_ioat_halted(comp_update) && !is_ioat_suspended(comp_update))
+ return;
+
+ ioat->stats.channel_halts++;
+
+ /*
+ * Fatal programming error on this DMA channel. Flush any outstanding
+ * work with error status and restart the engine.
+ */
+ ioat_log_message(0, "Channel halted due to fatal programming error\n");
+ mtx_lock(&ioat->submit_lock);
+ mtx_lock(&ioat->cleanup_lock);
+ ioat->quiescing = TRUE;
+
+ chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
+ ioat_halted_debug(ioat, chanerr);
+ ioat->stats.last_halt_chanerr = chanerr;
+
+ while (ioat_get_active(ioat) > 0) {
+ desc = ioat_get_ring_entry(ioat, ioat->tail);
+ dmadesc = &desc->bus_dmadesc;
+ CTR1(KTR_IOAT, "completing err desc %d", ioat->tail);
+
+ if (dmadesc->callback_fn != NULL)
+ dmadesc->callback_fn(dmadesc->callback_arg,
+ chanerr_to_errno(chanerr));
+
+ ioat_putn_locked(ioat, 1, IOAT_ACTIVE_DESCR_REF);
+ ioat->tail++;
+ ioat->stats.descriptors_processed++;
+ ioat->stats.descriptors_error++;
+ }
+
+ /* Clear error status */
+ ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
+
+ mtx_unlock(&ioat->cleanup_lock);
+ mtx_unlock(&ioat->submit_lock);
+
+ ioat_log_message(0, "Resetting channel to recover from error\n");
+ error = taskqueue_enqueue(taskqueue_thread, &ioat->reset_task);
+ KASSERT(error == 0,
+ ("%s: taskqueue_enqueue failed: %d", __func__, error));
+}
+
+static void
+ioat_reset_hw_task(void *ctx, int pending __unused)
+{
+ struct ioat_softc *ioat;
+ int error;
+
+ ioat = ctx;
+ ioat_log_message(1, "%s: Resetting channel\n", __func__);
+
+ error = ioat_reset_hw(ioat);
+ KASSERT(error == 0, ("%s: reset failed: %d", __func__, error));
+ (void)error;
+}
+
+/*
+ * User API functions
+ */
+bus_dmaengine_t
+ioat_get_dmaengine(uint32_t index, int flags)
+{
+ struct ioat_softc *ioat;
+
+ KASSERT((flags & ~(M_NOWAIT | M_WAITOK)) == 0,
+ ("invalid flags: 0x%08x", flags));
+ KASSERT((flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK),
+ ("invalid wait | nowait"));
+
+ if (index >= ioat_channel_index)
+ return (NULL);
+
+ ioat = ioat_channel[index];
+ if (ioat == NULL || ioat->destroying)
+ return (NULL);
+
+ if (ioat->quiescing) {
+ if ((flags & M_NOWAIT) != 0)
+ return (NULL);
+
+ mtx_lock(IOAT_REFLK);
+ while (ioat->quiescing && !ioat->destroying)
+ msleep(&ioat->quiescing, IOAT_REFLK, 0, "getdma", 0);
+ mtx_unlock(IOAT_REFLK);
+
+ if (ioat->destroying)
+ return (NULL);
+ }
+
+ /*
+ * There's a race here between the quiescing check and HW reset or
+ * module destroy.
+ */
+ return (&ioat_get(ioat, IOAT_DMAENGINE_REF)->dmaengine);
+}
+
+void
+ioat_put_dmaengine(bus_dmaengine_t dmaengine)
+{
+ struct ioat_softc *ioat;
+
+ ioat = to_ioat_softc(dmaengine);
+ ioat_put(ioat, IOAT_DMAENGINE_REF);
+}
+
+int
+ioat_get_hwversion(bus_dmaengine_t dmaengine)
+{
+ struct ioat_softc *ioat;
+
+ ioat = to_ioat_softc(dmaengine);
+ return (ioat->version);
+}
+
+size_t
+ioat_get_max_io_size(bus_dmaengine_t dmaengine)
+{
+ struct ioat_softc *ioat;
+
+ ioat = to_ioat_softc(dmaengine);
+ return (ioat->max_xfer_size);
+}
+
+int
+ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay)
+{
+ struct ioat_softc *ioat;
+
+ ioat = to_ioat_softc(dmaengine);
+ if (!ioat->intrdelay_supported)
+ return (ENODEV);
+ if (delay > ioat->intrdelay_max)
+ return (ERANGE);
+
+ ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay);
+ ioat->cached_intrdelay =
+ ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK;
+ return (0);
+}
+
+uint16_t
+ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine)
+{
+ struct ioat_softc *ioat;
+
+ ioat = to_ioat_softc(dmaengine);
+ return (ioat->intrdelay_max);
+}
+
+void
+ioat_acquire(bus_dmaengine_t dmaengine)
+{
+ struct ioat_softc *ioat;
+
+ ioat = to_ioat_softc(dmaengine);
+ mtx_lock(&ioat->submit_lock);
+ CTR0(KTR_IOAT, __func__);
+}
+
+int
+ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags)
+{
+ struct ioat_softc *ioat;
+ int error;
+
+ ioat = to_ioat_softc(dmaengine);
+ ioat_acquire(dmaengine);
+
+ error = ioat_reserve_space(ioat, n, mflags);
+ if (error != 0)
+ ioat_release(dmaengine);
+ return (error);
+}
+
+void
+ioat_release(bus_dmaengine_t dmaengine)
+{
+ struct ioat_softc *ioat;
+
+ ioat = to_ioat_softc(dmaengine);
+ CTR0(KTR_IOAT, __func__);
+ ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET, (uint16_t)ioat->hw_head);
+ mtx_unlock(&ioat->submit_lock);
+}
+
+static struct ioat_descriptor *
+ioat_op_generic(struct ioat_softc *ioat, uint8_t op,
+ uint32_t size, uint64_t src, uint64_t dst,
+ bus_dmaengine_callback_t callback_fn, void *callback_arg,
+ uint32_t flags)
+{
+ struct ioat_generic_hw_descriptor *hw_desc;
+ struct ioat_descriptor *desc;
+ int mflags;
+
+ mtx_assert(&ioat->submit_lock, MA_OWNED);
+
+ KASSERT((flags & ~_DMA_GENERIC_FLAGS) == 0,
+ ("Unrecognized flag(s): %#x", flags & ~_DMA_GENERIC_FLAGS));
+ if ((flags & DMA_NO_WAIT) != 0)
+ mflags = M_NOWAIT;
+ else
+ mflags = M_WAITOK;
+
+ if (size > ioat->max_xfer_size) {
+ ioat_log_message(0, "%s: max_xfer_size = %d, requested = %u\n",
+ __func__, ioat->max_xfer_size, (unsigned)size);
+ return (NULL);
+ }
+
+ if (ioat_reserve_space(ioat, 1, mflags) != 0)
+ return (NULL);
+
+ desc = ioat_get_ring_entry(ioat, ioat->head);
+ hw_desc = desc->u.generic;
+
+ hw_desc->u.control_raw = 0;
+ hw_desc->u.control_generic.op = op;
+ hw_desc->u.control_generic.completion_update = 1;
+
+ if ((flags & DMA_INT_EN) != 0)
+ hw_desc->u.control_generic.int_enable = 1;
+ if ((flags & DMA_FENCE) != 0)
+ hw_desc->u.control_generic.fence = 1;
+
+ hw_desc->size = size;
+ hw_desc->src_addr = src;
+ hw_desc->dest_addr = dst;
+
+ desc->bus_dmadesc.callback_fn = callback_fn;
+ desc->bus_dmadesc.callback_arg = callback_arg;
+ return (desc);
+}
+
+struct bus_dmadesc *
+ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn,
+ void *callback_arg, uint32_t flags)
+{
+ struct ioat_dma_hw_descriptor *hw_desc;
+ struct ioat_descriptor *desc;
+ struct ioat_softc *ioat;
+
+ CTR0(KTR_IOAT, __func__);
+ ioat = to_ioat_softc(dmaengine);
+
+ desc = ioat_op_generic(ioat, IOAT_OP_COPY, 8, 0, 0, callback_fn,
+ callback_arg, flags);
+ if (desc == NULL)
+ return (NULL);
+
+ hw_desc = desc->u.dma;
+ hw_desc->u.control.null = 1;
+ ioat_submit_single(ioat);
+ return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
+ bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,
+ void *callback_arg, uint32_t flags)
+{
+ struct ioat_dma_hw_descriptor *hw_desc;
+ struct ioat_descriptor *desc;
+ struct ioat_softc *ioat;
+
+ CTR0(KTR_IOAT, __func__);
+ ioat = to_ioat_softc(dmaengine);
+
+ if (((src | dst) & (0xffffull << 48)) != 0) {
+ ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n",
+ __func__);
+ return (NULL);
+ }
+
+ desc = ioat_op_generic(ioat, IOAT_OP_COPY, len, src, dst, callback_fn,
+ callback_arg, flags);
+ if (desc == NULL)
+ return (NULL);
+
+ hw_desc = desc->u.dma;
+ if (g_ioat_debug_level >= 3)
+ dump_descriptor(hw_desc);
+
+ ioat_submit_single(ioat);
+ return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1,
+ bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2,
+ bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
+{
+ struct ioat_dma_hw_descriptor *hw_desc;
+ struct ioat_descriptor *desc;
+ struct ioat_softc *ioat;
+
+ CTR0(KTR_IOAT, __func__);
+ ioat = to_ioat_softc(dmaengine);
+
+ if (((src1 | src2 | dst1 | dst2) & (0xffffull << 48)) != 0) {
+ ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n",
+ __func__);
+ return (NULL);
+ }
+ if (((src1 | src2 | dst1 | dst2) & PAGE_MASK) != 0) {
+ ioat_log_message(0, "%s: Addresses must be page-aligned\n",
+ __func__);
+ return (NULL);
+ }
+
+ desc = ioat_op_generic(ioat, IOAT_OP_COPY, 2 * PAGE_SIZE, src1, dst1,
+ callback_fn, callback_arg, flags);
+ if (desc == NULL)
+ return (NULL);
+
+ hw_desc = desc->u.dma;
+ if (src2 != src1 + PAGE_SIZE) {
+ hw_desc->u.control.src_page_break = 1;
+ hw_desc->next_src_addr = src2;
+ }
+ if (dst2 != dst1 + PAGE_SIZE) {
+ hw_desc->u.control.dest_page_break = 1;
+ hw_desc->next_dest_addr = dst2;
+ }
+
+ if (g_ioat_debug_level >= 3)
+ dump_descriptor(hw_desc);
+
+ ioat_submit_single(ioat);
+ return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_copy_crc(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src,
+ bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr,
+ bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
+{
+ struct ioat_crc32_hw_descriptor *hw_desc;
+ struct ioat_descriptor *desc;
+ struct ioat_softc *ioat;
+ uint32_t teststore;
+ uint8_t op;
+
+ CTR0(KTR_IOAT, __func__);
+ ioat = to_ioat_softc(dmaengine);
+
+ if ((ioat->capabilities & IOAT_DMACAP_MOVECRC) == 0) {
+ ioat_log_message(0, "%s: Device lacks MOVECRC capability\n",
+ __func__);
+ return (NULL);
+ }
+ if (((src | dst) & (0xffffffull << 40)) != 0) {
+ ioat_log_message(0, "%s: High 24 bits of src/dst invalid\n",
+ __func__);
+ return (NULL);
+ }
+ teststore = (flags & _DMA_CRC_TESTSTORE);
+ if (teststore == _DMA_CRC_TESTSTORE) {
+ ioat_log_message(0, "%s: TEST and STORE invalid\n", __func__);
+ return (NULL);
+ }
+ if (teststore == 0 && (flags & DMA_CRC_INLINE) != 0) {
+ ioat_log_message(0, "%s: INLINE invalid without TEST or STORE\n",
+ __func__);
+ return (NULL);
+ }
+
+ switch (teststore) {
+ case DMA_CRC_STORE:
+ op = IOAT_OP_MOVECRC_STORE;
+ break;
+ case DMA_CRC_TEST:
+ op = IOAT_OP_MOVECRC_TEST;
+ break;
+ default:
+ KASSERT(teststore == 0, ("bogus"));
+ op = IOAT_OP_MOVECRC;
+ break;
+ }
+
+ if ((flags & DMA_CRC_INLINE) == 0 &&
+ (crcptr & (0xffffffull << 40)) != 0) {
+ ioat_log_message(0,
+ "%s: High 24 bits of crcptr invalid\n", __func__);
+ return (NULL);
+ }
+
+ desc = ioat_op_generic(ioat, op, len, src, dst, callback_fn,
+ callback_arg, flags & ~_DMA_CRC_FLAGS);
+ if (desc == NULL)
+ return (NULL);
+
+ hw_desc = desc->u.crc32;
+
+ if ((flags & DMA_CRC_INLINE) == 0)
+ hw_desc->crc_address = crcptr;
+ else
+ hw_desc->u.control.crc_location = 1;
+
+ if (initialseed != NULL) {
+ hw_desc->u.control.use_seed = 1;
+ hw_desc->seed = *initialseed;
+ }
+
+ if (g_ioat_debug_level >= 3)
+ dump_descriptor(hw_desc);
+
+ ioat_submit_single(ioat);
+ return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_crc(bus_dmaengine_t dmaengine, bus_addr_t src, bus_size_t len,
+ uint32_t *initialseed, bus_addr_t crcptr,
+ bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags)
+{
+ struct ioat_crc32_hw_descriptor *hw_desc;
+ struct ioat_descriptor *desc;
+ struct ioat_softc *ioat;
+ uint32_t teststore;
+ uint8_t op;
+
+ CTR0(KTR_IOAT, __func__);
+ ioat = to_ioat_softc(dmaengine);
+
+ if ((ioat->capabilities & IOAT_DMACAP_CRC) == 0) {
+ ioat_log_message(0, "%s: Device lacks CRC capability\n",
+ __func__);
+ return (NULL);
+ }
+ if ((src & (0xffffffull << 40)) != 0) {
+ ioat_log_message(0, "%s: High 24 bits of src invalid\n",
+ __func__);
+ return (NULL);
+ }
+ teststore = (flags & _DMA_CRC_TESTSTORE);
+ if (teststore == _DMA_CRC_TESTSTORE) {
+ ioat_log_message(0, "%s: TEST and STORE invalid\n", __func__);
+ return (NULL);
+ }
+ if (teststore == 0 && (flags & DMA_CRC_INLINE) != 0) {
+ ioat_log_message(0, "%s: INLINE invalid without TEST or STORE\n",
+ __func__);
+ return (NULL);
+ }
+
+ switch (teststore) {
+ case DMA_CRC_STORE:
+ op = IOAT_OP_CRC_STORE;
+ break;
+ case DMA_CRC_TEST:
+ op = IOAT_OP_CRC_TEST;
+ break;
+ default:
+ KASSERT(teststore == 0, ("bogus"));
+ op = IOAT_OP_CRC;
+ break;
+ }
+
+ if ((flags & DMA_CRC_INLINE) == 0 &&
+ (crcptr & (0xffffffull << 40)) != 0) {
+ ioat_log_message(0,
+ "%s: High 24 bits of crcptr invalid\n", __func__);
+ return (NULL);
+ }
+
+ desc = ioat_op_generic(ioat, op, len, src, 0, callback_fn,
+ callback_arg, flags & ~_DMA_CRC_FLAGS);
+ if (desc == NULL)
+ return (NULL);
+
+ hw_desc = desc->u.crc32;
+
+ if ((flags & DMA_CRC_INLINE) == 0)
+ hw_desc->crc_address = crcptr;
+ else
+ hw_desc->u.control.crc_location = 1;
+
+ if (initialseed != NULL) {
+ hw_desc->u.control.use_seed = 1;
+ hw_desc->seed = *initialseed;
+ }
+
+ if (g_ioat_debug_level >= 3)
+ dump_descriptor(hw_desc);
+
+ ioat_submit_single(ioat);
+ return (&desc->bus_dmadesc);
+}
+
+struct bus_dmadesc *
+ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern,
+ bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg,
+ uint32_t flags)
+{
+ struct ioat_fill_hw_descriptor *hw_desc;
+ struct ioat_descriptor *desc;
+ struct ioat_softc *ioat;
+
+ CTR0(KTR_IOAT, __func__);
+ ioat = to_ioat_softc(dmaengine);
+
+ if ((ioat->capabilities & IOAT_DMACAP_BFILL) == 0) {
+ ioat_log_message(0, "%s: Device lacks BFILL capability\n",
+ __func__);
+ return (NULL);
+ }
+
+ if ((dst & (0xffffull << 48)) != 0) {
+ ioat_log_message(0, "%s: High 16 bits of dst invalid\n",
+ __func__);
+ return (NULL);
+ }
+
+ desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, fillpattern, dst,
+ callback_fn, callback_arg, flags);
+ if (desc == NULL)
+ return (NULL);
+
+ hw_desc = desc->u.fill;
+ if (g_ioat_debug_level >= 3)
+ dump_descriptor(hw_desc);
+
+ ioat_submit_single(ioat);
+ return (&desc->bus_dmadesc);
+}
+
+/*
+ * Ring Management
+ */
+static inline uint32_t
+ioat_get_active(struct ioat_softc *ioat)
+{
+
+ return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1));
+}
+
+static inline uint32_t
+ioat_get_ring_space(struct ioat_softc *ioat)
+{
+
+ return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1);
+}
+
+static struct ioat_descriptor *
+ioat_alloc_ring_entry(struct ioat_softc *ioat, int mflags)
+{
+ struct ioat_generic_hw_descriptor *hw_desc;
+ struct ioat_descriptor *desc;
+ int error, busdmaflag;
+
+ error = ENOMEM;
+ hw_desc = NULL;
+
+ if ((mflags & M_WAITOK) != 0)
+ busdmaflag = BUS_DMA_WAITOK;
+ else
+ busdmaflag = BUS_DMA_NOWAIT;
+
+ desc = malloc(sizeof(*desc), M_IOAT, mflags);
+ if (desc == NULL)
+ goto out;
+
+ bus_dmamem_alloc(ioat->hw_desc_tag, (void **)&hw_desc,
+ BUS_DMA_ZERO | busdmaflag, &ioat->hw_desc_map);
+ if (hw_desc == NULL)
+ goto out;
+
+ memset(&desc->bus_dmadesc, 0, sizeof(desc->bus_dmadesc));
+ desc->u.generic = hw_desc;
+
+ error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc,
+ sizeof(*hw_desc), ioat_dmamap_cb, &desc->hw_desc_bus_addr,
+ busdmaflag);
+ if (error)
+ goto out;
+
+out:
+ if (error) {
+ ioat_free_ring_entry(ioat, desc);
+ return (NULL);
+ }
+ return (desc);
+}
+
+static void
+ioat_free_ring_entry(struct ioat_softc *ioat, struct ioat_descriptor *desc)
+{
+
+ if (desc == NULL)
+ return;
+
+ if (desc->u.generic)
+ bus_dmamem_free(ioat->hw_desc_tag, desc->u.generic,
+ ioat->hw_desc_map);
+ free(desc, M_IOAT);
+}
+
+/*
+ * Reserves space in this IOAT descriptor ring by ensuring enough slots remain
+ * for 'num_descs'.
+ *
+ * If mflags contains M_WAITOK, blocks until enough space is available.
+ *
+ * Returns zero on success, or an errno on error. If num_descs is beyond the
+ * maximum ring size, returns EINVAl; if allocation would block and mflags
+ * contains M_NOWAIT, returns EAGAIN.
+ *
+ * Must be called with the submit_lock held; returns with the lock held. The
+ * lock may be dropped to allocate the ring.
+ *
+ * (The submit_lock is needed to add any entries to the ring, so callers are
+ * assured enough room is available.)
+ */
+static int
+ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags)
+{
+ struct ioat_descriptor **new_ring;
+ uint32_t order;
+ int error;
+
+ mtx_assert(&ioat->submit_lock, MA_OWNED);
+ error = 0;
+
+ if (num_descs < 1 || num_descs > (1 << IOAT_MAX_ORDER)) {
+ error = EINVAL;
+ goto out;
+ }
+ if (ioat->quiescing) {
+ error = ENXIO;
+ goto out;
+ }
+
+ for (;;) {
+ if (ioat_get_ring_space(ioat) >= num_descs)
+ goto out;
+
+ order = ioat->ring_size_order;
+ if (ioat->is_resize_pending || order == IOAT_MAX_ORDER) {
+ if ((mflags & M_WAITOK) != 0) {
+ msleep(&ioat->tail, &ioat->submit_lock, 0,
+ "ioat_rsz", 0);
+ continue;
+ }
+
+ error = EAGAIN;
+ break;
+ }
+
+ ioat->is_resize_pending = TRUE;
+ for (;;) {
+ mtx_unlock(&ioat->submit_lock);
+
+ new_ring = ioat_prealloc_ring(ioat, 1 << (order + 1),
+ TRUE, mflags);
+
+ mtx_lock(&ioat->submit_lock);
+ KASSERT(ioat->ring_size_order == order,
+ ("is_resize_pending should protect order"));
+
+ if (new_ring == NULL) {
+ KASSERT((mflags & M_WAITOK) == 0,
+ ("allocation failed"));
+ error = EAGAIN;
+ break;
+ }
+
+ error = ring_grow(ioat, order, new_ring);
+ if (error == 0)
+ break;
+ }
+ ioat->is_resize_pending = FALSE;
+ wakeup(&ioat->tail);
+ if (error)
+ break;
+ }
+
+out:
+ mtx_assert(&ioat->submit_lock, MA_OWNED);
+ return (error);
+}
+
+static struct ioat_descriptor **
+ioat_prealloc_ring(struct ioat_softc *ioat, uint32_t size, boolean_t need_dscr,
+ int mflags)
+{
+ struct ioat_descriptor **ring;
+ uint32_t i;
+ int error;
+
+ KASSERT(size > 0 && powerof2(size), ("bogus size"));
+
+ ring = malloc(size * sizeof(*ring), M_IOAT, M_ZERO | mflags);
+ if (ring == NULL)
+ return (NULL);
+
+ if (need_dscr) {
+ error = ENOMEM;
+ for (i = size / 2; i < size; i++) {
+ ring[i] = ioat_alloc_ring_entry(ioat, mflags);
+ if (ring[i] == NULL)
+ goto out;
+ ring[i]->id = i;
+ }
+ }
+ error = 0;
+
+out:
+ if (error != 0 && ring != NULL) {
+ ioat_free_ring(ioat, size, ring);
+ ring = NULL;
+ }
+ return (ring);
+}
+
+static void
+ioat_free_ring(struct ioat_softc *ioat, uint32_t size,
+ struct ioat_descriptor **ring)
+{
+ uint32_t i;
+
+ for (i = 0; i < size; i++) {
+ if (ring[i] != NULL)
+ ioat_free_ring_entry(ioat, ring[i]);
+ }
+ free(ring, M_IOAT);
+}
+
+static struct ioat_descriptor *
+ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index)
+{
+
+ return (ioat->ring[index % (1 << ioat->ring_size_order)]);
+}
+
+static int
+ring_grow(struct ioat_softc *ioat, uint32_t oldorder,
+ struct ioat_descriptor **newring)
+{
+ struct ioat_descriptor *tmp, *next;
+ struct ioat_dma_hw_descriptor *hw;
+ uint32_t oldsize, newsize, head, tail, i, end;
+ int error;
+
+ CTR0(KTR_IOAT, __func__);
+
+ mtx_assert(&ioat->submit_lock, MA_OWNED);
+
+ if (oldorder != ioat->ring_size_order || oldorder >= IOAT_MAX_ORDER) {
+ error = EINVAL;
+ goto out;
+ }
+
+ oldsize = (1 << oldorder);
+ newsize = (1 << (oldorder + 1));
+
+ mtx_lock(&ioat->cleanup_lock);
+
+ head = ioat->head & (oldsize - 1);
+ tail = ioat->tail & (oldsize - 1);
+
+ /* Copy old descriptors to new ring */
+ for (i = 0; i < oldsize; i++)
+ newring[i] = ioat->ring[i];
+
+ /*
+ * If head has wrapped but tail hasn't, we must swap some descriptors
+ * around so that tail can increment directly to head.
+ */
+ if (head < tail) {
+ for (i = 0; i <= head; i++) {
+ tmp = newring[oldsize + i];
+
+ newring[oldsize + i] = newring[i];
+ newring[oldsize + i]->id = oldsize + i;
+
+ newring[i] = tmp;
+ newring[i]->id = i;
+ }
+ head += oldsize;
+ }
+
+ KASSERT(head >= tail, ("invariants"));
+
+ /* Head didn't wrap; we only need to link in oldsize..newsize */
+ if (head < oldsize) {
+ i = oldsize - 1;
+ end = newsize;
+ } else {
+ /* Head did wrap; link newhead..newsize and 0..oldhead */
+ i = head;
+ end = newsize + (head - oldsize) + 1;
+ }
+
+ /*
+ * Fix up hardware ring, being careful not to trample the active
+ * section (tail -> head).
+ */
+ for (; i < end; i++) {
+ KASSERT((i & (newsize - 1)) < tail ||
+ (i & (newsize - 1)) >= head, ("trampling snake"));
+
+ next = newring[(i + 1) & (newsize - 1)];
+ hw = newring[i & (newsize - 1)]->u.dma;
+ hw->next = next->hw_desc_bus_addr;
+ }
+
+ free(ioat->ring, M_IOAT);
+ ioat->ring = newring;
+ ioat->ring_size_order = oldorder + 1;
+ ioat->tail = tail;
+ ioat->head = head;
+ error = 0;
+
+ mtx_unlock(&ioat->cleanup_lock);
+out:
+ if (error)
+ ioat_free_ring(ioat, (1 << (oldorder + 1)), newring);
+ return (error);
+}
+
+static int
+ring_shrink(struct ioat_softc *ioat, uint32_t oldorder,
+ struct ioat_descriptor **newring)
+{
+ struct ioat_dma_hw_descriptor *hw;
+ struct ioat_descriptor *ent, *next;
+ uint32_t oldsize, newsize, current_idx, new_idx, i;
+ int error;
+
+ CTR0(KTR_IOAT, __func__);
+
+ mtx_assert(&ioat->submit_lock, MA_OWNED);
+
+ if (oldorder != ioat->ring_size_order || oldorder <= IOAT_MIN_ORDER) {
+ error = EINVAL;
+ goto out_unlocked;
+ }
+
+ oldsize = (1 << oldorder);
+ newsize = (1 << (oldorder - 1));
+
+ mtx_lock(&ioat->cleanup_lock);
+
+ /* Can't shrink below current active set! */
+ if (ioat_get_active(ioat) >= newsize) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Copy current descriptors to the new ring, dropping the removed
+ * descriptors.
+ */
+ for (i = 0; i < newsize; i++) {
+ current_idx = (ioat->tail + i) & (oldsize - 1);
+ new_idx = (ioat->tail + i) & (newsize - 1);
+
+ newring[new_idx] = ioat->ring[current_idx];
+ newring[new_idx]->id = new_idx;
+ }
+
+ /* Free deleted descriptors */
+ for (i = newsize; i < oldsize; i++) {
+ ent = ioat_get_ring_entry(ioat, ioat->tail + i);
+ ioat_free_ring_entry(ioat, ent);
+ }
+
+ /* Fix up hardware ring. */
+ hw = newring[(ioat->tail + newsize - 1) & (newsize - 1)]->u.dma;
+ next = newring[(ioat->tail + newsize) & (newsize - 1)];
+ hw->next = next->hw_desc_bus_addr;
+
+ free(ioat->ring, M_IOAT);
+ ioat->ring = newring;
+ ioat->ring_size_order = oldorder - 1;
+ error = 0;
+
+out:
+ mtx_unlock(&ioat->cleanup_lock);
+out_unlocked:
+ if (error)
+ ioat_free_ring(ioat, (1 << (oldorder - 1)), newring);
+ return (error);
+}
+
+static void
+ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr)
+{
+ struct ioat_descriptor *desc;
+
+ ioat_log_message(0, "Channel halted (%b)\n", (int)chanerr,
+ IOAT_CHANERR_STR);
+ if (chanerr == 0)
+ return;
+
+ mtx_assert(&ioat->cleanup_lock, MA_OWNED);
+
+ desc = ioat_get_ring_entry(ioat, ioat->tail + 0);
+ dump_descriptor(desc->u.raw);
+
+ desc = ioat_get_ring_entry(ioat, ioat->tail + 1);
+ dump_descriptor(desc->u.raw);
+}
+
+static void
+ioat_timer_callback(void *arg)
+{
+ struct ioat_descriptor **newring;
+ struct ioat_softc *ioat;
+ uint32_t order;
+
+ ioat = arg;
+ ioat_log_message(1, "%s\n", __func__);
+
+ if (ioat->is_completion_pending) {
+ ioat_process_events(ioat);
+ return;
+ }
+
+ /* Slowly scale the ring down if idle. */
+ mtx_lock(&ioat->submit_lock);
+ order = ioat->ring_size_order;
+ if (ioat->is_resize_pending || order == IOAT_MIN_ORDER) {
+ mtx_unlock(&ioat->submit_lock);
+ goto out;
+ }
+ ioat->is_resize_pending = TRUE;
+ mtx_unlock(&ioat->submit_lock);
+
+ newring = ioat_prealloc_ring(ioat, 1 << (order - 1), FALSE,
+ M_NOWAIT);
+
+ mtx_lock(&ioat->submit_lock);
+ KASSERT(ioat->ring_size_order == order,
+ ("resize_pending protects order"));
+
+ if (newring != NULL)
+ ring_shrink(ioat, order, newring);
+
+ ioat->is_resize_pending = FALSE;
+ mtx_unlock(&ioat->submit_lock);
+
+out:
+ if (ioat->ring_size_order > IOAT_MIN_ORDER)
+ callout_reset(&ioat->timer, 10 * hz,
+ ioat_timer_callback, ioat);
+}
+
+/*
+ * Support Functions
+ */
+static void
+ioat_submit_single(struct ioat_softc *ioat)
+{
+
+ ioat_get(ioat, IOAT_ACTIVE_DESCR_REF);
+ atomic_add_rel_int(&ioat->head, 1);
+ atomic_add_rel_int(&ioat->hw_head, 1);
+
+ if (!ioat->is_completion_pending) {
+ ioat->is_completion_pending = TRUE;
+ callout_reset(&ioat->timer, IOAT_INTR_TIMO,
+ ioat_timer_callback, ioat);
+ }
+
+ ioat->stats.descriptors_submitted++;
+}
+
+static int
+ioat_reset_hw(struct ioat_softc *ioat)
+{
+ uint64_t status;
+ uint32_t chanerr;
+ unsigned timeout;
+ int error;
+
+ mtx_lock(IOAT_REFLK);
+ ioat->quiescing = TRUE;
+ ioat_drain_locked(ioat);
+ mtx_unlock(IOAT_REFLK);
+
+ status = ioat_get_chansts(ioat);
+ if (is_ioat_active(status) || is_ioat_idle(status))
+ ioat_suspend(ioat);
+
+ /* Wait at most 20 ms */
+ for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) &&
+ timeout < 20; timeout++) {
+ DELAY(1000);
+ status = ioat_get_chansts(ioat);
+ }
+ if (timeout == 20) {
+ error = ETIMEDOUT;
+ goto out;
+ }
+
+ KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce"));
+
+ chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
+ ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
+
+ /*
+ * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors
+ * that can cause stability issues for IOAT v3.
+ */
+ pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07,
+ 4);
+ chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4);
+ pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4);
+
+ /*
+ * BDXDE and BWD models reset MSI-X registers on device reset.
+ * Save/restore their contents manually.
+ */
+ if (ioat_model_resets_msix(ioat)) {
+ ioat_log_message(1, "device resets MSI-X registers; saving\n");
+ pci_save_state(ioat->device);
+ }
+
+ ioat_reset(ioat);
+
+ /* Wait at most 20 ms */
+ for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++)
+ DELAY(1000);
+ if (timeout == 20) {
+ error = ETIMEDOUT;
+ goto out;
+ }
+
+ if (ioat_model_resets_msix(ioat)) {
+ ioat_log_message(1, "device resets registers; restored\n");
+ pci_restore_state(ioat->device);
+ }
+
+ /* Reset attempts to return the hardware to "halted." */
+ status = ioat_get_chansts(ioat);
+ if (is_ioat_active(status) || is_ioat_idle(status)) {
+ /* So this really shouldn't happen... */
+ ioat_log_message(0, "Device is active after a reset?\n");
+ ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
+ error = 0;
+ goto out;
+ }
+
+ chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
+ if (chanerr != 0) {
+ mtx_lock(&ioat->cleanup_lock);
+ ioat_halted_debug(ioat, chanerr);
+ mtx_unlock(&ioat->cleanup_lock);
+ error = EIO;
+ goto out;
+ }
+
+ /*
+ * Bring device back online after reset. Writing CHAINADDR brings the
+ * device back to active.
+ *
+ * The internal ring counter resets to zero, so we have to start over
+ * at zero as well.
+ */
+ ioat->tail = ioat->head = ioat->hw_head = 0;
+ ioat->last_seen = 0;
+
+ ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
+ ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
+ ioat_write_chainaddr(ioat, ioat->ring[0]->hw_desc_bus_addr);
+ error = 0;
+
+out:
+ mtx_lock(IOAT_REFLK);
+ ioat->quiescing = FALSE;
+ wakeup(&ioat->quiescing);
+ mtx_unlock(IOAT_REFLK);
+
+ if (error == 0)
+ error = ioat_start_channel(ioat);
+
+ return (error);
+}
+
+static int
+sysctl_handle_chansts(SYSCTL_HANDLER_ARGS)
+{
+ struct ioat_softc *ioat;
+ struct sbuf sb;
+ uint64_t status;
+ int error;
+
+ ioat = arg1;
+
+ status = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS;
+
+ sbuf_new_for_sysctl(&sb, NULL, 256, req);
+ switch (status) {
+ case IOAT_CHANSTS_ACTIVE:
+ sbuf_printf(&sb, "ACTIVE");
+ break;
+ case IOAT_CHANSTS_IDLE:
+ sbuf_printf(&sb, "IDLE");
+ break;
+ case IOAT_CHANSTS_SUSPENDED:
+ sbuf_printf(&sb, "SUSPENDED");
+ break;
+ case IOAT_CHANSTS_HALTED:
+ sbuf_printf(&sb, "HALTED");
+ break;
+ case IOAT_CHANSTS_ARMED:
+ sbuf_printf(&sb, "ARMED");
+ break;
+ default:
+ sbuf_printf(&sb, "UNKNOWN");
+ break;
+ }
+ error = sbuf_finish(&sb);
+ sbuf_delete(&sb);
+
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ return (EINVAL);
+}
+
+static int
+sysctl_handle_dpi(SYSCTL_HANDLER_ARGS)
+{
+ struct ioat_softc *ioat;
+ struct sbuf sb;
+#define PRECISION "1"
+ const uintmax_t factor = 10;
+ uintmax_t rate;
+ int error;
+
+ ioat = arg1;
+ sbuf_new_for_sysctl(&sb, NULL, 16, req);
+
+ if (ioat->stats.interrupts == 0) {
+ sbuf_printf(&sb, "NaN");
+ goto out;
+ }
+ rate = ioat->stats.descriptors_processed * factor /
+ ioat->stats.interrupts;
+ sbuf_printf(&sb, "%ju.%." PRECISION "ju", rate / factor,
+ rate % factor);
+#undef PRECISION
+out:
+ error = sbuf_finish(&sb);
+ sbuf_delete(&sb);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ return (EINVAL);
+}
+
+static int
+sysctl_handle_error(SYSCTL_HANDLER_ARGS)
+{
+ struct ioat_descriptor *desc;
+ struct ioat_softc *ioat;
+ int error, arg;
+
+ ioat = arg1;
+
+ arg = 0;
+ error = SYSCTL_OUT(req, &arg, sizeof(arg));
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ error = SYSCTL_IN(req, &arg, sizeof(arg));
+ if (error != 0)
+ return (error);
+
+ if (arg != 0) {
+ ioat_acquire(&ioat->dmaengine);
+ desc = ioat_op_generic(ioat, IOAT_OP_COPY, 1,
+ 0xffff000000000000ull, 0xffff000000000000ull, NULL, NULL,
+ 0);
+ if (desc == NULL)
+ error = ENOMEM;
+ else
+ ioat_submit_single(ioat);
+ ioat_release(&ioat->dmaengine);
+ }
+ return (error);
+}
+
+static int
+sysctl_handle_reset(SYSCTL_HANDLER_ARGS)
+{
+ struct ioat_softc *ioat;
+ int error, arg;
+
+ ioat = arg1;
+
+ arg = 0;
+ error = SYSCTL_OUT(req, &arg, sizeof(arg));
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ error = SYSCTL_IN(req, &arg, sizeof(arg));
+ if (error != 0)
+ return (error);
+
+ if (arg != 0)
+ error = ioat_reset_hw(ioat);
+
+ return (error);
+}
+
+static void
+dump_descriptor(void *hw_desc)
+{
+ int i, j;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 8; j++)
+ printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]);
+ printf("\n");
+ }
+}
+
+static void
+ioat_setup_sysctl(device_t device)
+{
+ struct sysctl_oid_list *par, *statpar, *state, *hammer;
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *tree, *tmp;
+ struct ioat_softc *ioat;
+
+ ioat = DEVICE2SOFTC(device);
+ ctx = device_get_sysctl_ctx(device);
+ tree = device_get_sysctl_tree(device);
+ par = SYSCTL_CHILDREN(tree);
+
+ SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD,
+ &ioat->version, 0, "HW version (0xMM form)");
+ SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD,
+ &ioat->max_xfer_size, 0, "HW maximum transfer size");
+ SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD,
+ &ioat->intrdelay_supported, 0, "Is INTRDELAY supported");
+#ifdef notyet
+ SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD,
+ &ioat->intrdelay_max, 0,
+ "Maximum configurable INTRDELAY on this channel (microseconds)");
+#endif
+
+ tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state", CTLFLAG_RD, NULL,
+ "IOAT channel internal state");
+ state = SYSCTL_CHILDREN(tmp);
+
+ SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "ring_size_order", CTLFLAG_RD,
+ &ioat->ring_size_order, 0, "SW descriptor ring size order");
+ SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "head", CTLFLAG_RD, &ioat->head,
+ 0, "SW descriptor head pointer index");
+ SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail,
+ 0, "SW descriptor tail pointer index");
+ SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "hw_head", CTLFLAG_RD,
+ &ioat->hw_head, 0, "HW DMACOUNT");
+
+ SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD,
+ ioat->comp_update, "HW addr of last completion");
+
+ SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_resize_pending", CTLFLAG_RD,
+ &ioat->is_resize_pending, 0, "resize pending");
+ SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_completion_pending",
+ CTLFLAG_RD, &ioat->is_completion_pending, 0, "completion pending");
+ SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_reset_pending", CTLFLAG_RD,
+ &ioat->is_reset_pending, 0, "reset pending");
+ SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_channel_running", CTLFLAG_RD,
+ &ioat->is_channel_running, 0, "channel running");
+
+ SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts",
+ CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_chansts, "A",
+ "String of the channel status");
+
+#ifdef notyet
+ SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD,
+ &ioat->cached_intrdelay, 0,
+ "Current INTRDELAY on this channel (cached, microseconds)");
+#endif
+
+ tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer", CTLFLAG_RD, NULL,
+ "Big hammers (mostly for testing)");
+ hammer = SYSCTL_CHILDREN(tmp);
+
+ SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_reset",
+ CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_reset, "I",
+ "Set to non-zero to reset the hardware");
+ SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_error",
+ CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_error, "I",
+ "Set to non-zero to inject a recoverable hardware error");
+
+ tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "stats", CTLFLAG_RD, NULL,
+ "IOAT channel statistics");
+ statpar = SYSCTL_CHILDREN(tmp);
+
+ SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "interrupts", CTLFLAG_RW,
+ &ioat->stats.interrupts,
+ "Number of interrupts processed on this channel");
+ SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "descriptors", CTLFLAG_RW,
+ &ioat->stats.descriptors_processed,
+ "Number of descriptors processed on this channel");
+ SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "submitted", CTLFLAG_RW,
+ &ioat->stats.descriptors_submitted,
+ "Number of descriptors submitted to this channel");
+ SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "errored", CTLFLAG_RW,
+ &ioat->stats.descriptors_error,
+ "Number of descriptors failed by channel errors");
+#ifdef notyet
+ SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "halts", CTLFLAG_RW,
+ &ioat->stats.channel_halts, 0,
+ "Number of times the channel has halted");
+ SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "last_halt_chanerr", CTLFLAG_RW,
+ &ioat->stats.last_halt_chanerr, 0,
+ "The raw CHANERR when the channel was last halted");
+#endif
+
+ SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "desc_per_interrupt",
+ CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_dpi, "A",
+ "Descriptors per interrupt");
+}
+
+static inline struct ioat_softc *
+ioat_get(struct ioat_softc *ioat, enum ioat_ref_kind kind)
+{
+ uint32_t old;
+
+ KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus"));
+
+ old = atomic_fetchadd_32(&ioat->refcnt, 1);
+ KASSERT(old < UINT32_MAX, ("refcnt overflow"));
+
+#ifdef INVARIANTS
+ old = atomic_fetchadd_32(&ioat->refkinds[kind], 1);
+ KASSERT(old < UINT32_MAX, ("refcnt kind overflow"));
+#endif
+
+ return (ioat);
+}
+
+static inline void
+ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind)
+{
+
+ _ioat_putn(ioat, n, kind, FALSE);
+}
+
+static inline void
+ioat_putn_locked(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind)
+{
+
+ _ioat_putn(ioat, n, kind, TRUE);
+}
+
+static inline void
+_ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind,
+ boolean_t locked)
+{
+ uint32_t old;
+
+ KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus"));
+
+ if (n == 0)
+ return;
+
+#ifdef INVARIANTS
+ old = atomic_fetchadd_32(&ioat->refkinds[kind], -n);
+ KASSERT(old >= n, ("refcnt kind underflow"));
+#endif
+
+ /* Skip acquiring the lock if resulting refcnt > 0. */
+ for (;;) {
+ old = ioat->refcnt;
+ if (old <= n)
+ break;
+ if (atomic_cmpset_32(&ioat->refcnt, old, old - n))
+ return;
+ }
+
+ if (locked)
+ mtx_assert(IOAT_REFLK, MA_OWNED);
+ else
+ mtx_lock(IOAT_REFLK);
+
+ old = atomic_fetchadd_32(&ioat->refcnt, -n);
+ KASSERT(old >= n, ("refcnt error"));
+
+ if (old == n)
+ wakeup(IOAT_REFLK);
+ if (!locked)
+ mtx_unlock(IOAT_REFLK);
+}
+
+static inline void
+ioat_put(struct ioat_softc *ioat, enum ioat_ref_kind kind)
+{
+
+ ioat_putn(ioat, 1, kind);
+}
+
+static void
+ioat_drain_locked(struct ioat_softc *ioat)
+{
+
+ mtx_assert(IOAT_REFLK, MA_OWNED);
+ while (ioat->refcnt > 0)
+ msleep(IOAT_REFLK, IOAT_REFLK, 0, "ioat_drain", 0);
+}
diff --git a/sys/dev/ioat/ioat.h b/sys/dev/ioat/ioat.h
new file mode 100644
index 0000000..2e10124
--- /dev/null
+++ b/sys/dev/ioat/ioat.h
@@ -0,0 +1,218 @@
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__FBSDID("$FreeBSD$");
+
+#ifndef __IOAT_H__
+#define __IOAT_H__
+
+#include <sys/param.h>
+#include <machine/bus.h>
+
+/*
+ * This file defines the public interface to the IOAT driver.
+ */
+
+/*
+ * Enables an interrupt for this operation. Typically, you would only enable
+ * this on the last operation in a group
+ */
+#define DMA_INT_EN 0x1
+/*
+ * Like M_NOWAIT. Operations will return NULL if they cannot allocate a
+ * descriptor without blocking.
+ */
+#define DMA_NO_WAIT 0x2
+/*
+ * Disallow prefetching the source of the following operation. Ordinarily, DMA
+ * operations can be pipelined on some hardware. E.g., operation 2's source
+ * may be prefetched before operation 1 completes.
+ */
+#define DMA_FENCE 0x4
+#define _DMA_GENERIC_FLAGS (DMA_INT_EN | DMA_NO_WAIT | DMA_FENCE)
+
+/*
+ * Emit a CRC32C as the result of a ioat_copy_crc() or ioat_crc().
+ */
+#define DMA_CRC_STORE 0x8
+
+/*
+ * Compare the CRC32C of a ioat_copy_crc() or ioat_crc() against an expeceted
+ * value. It is invalid to specify both TEST and STORE.
+ */
+#define DMA_CRC_TEST 0x10
+#define _DMA_CRC_TESTSTORE (DMA_CRC_STORE | DMA_CRC_TEST)
+
+/*
+ * Use an inline comparison CRC32C or emit an inline CRC32C result. Invalid
+ * without one of STORE or TEST.
+ */
+#define DMA_CRC_INLINE 0x20
+#define _DMA_CRC_FLAGS (DMA_CRC_STORE | DMA_CRC_TEST | DMA_CRC_INLINE)
+
+/*
+ * Hardware revision number. Different hardware revisions support different
+ * features. For example, 3.2 cannot read from MMIO space, while 3.3 can.
+ */
+#define IOAT_VER_3_0 0x30
+#define IOAT_VER_3_2 0x32
+#define IOAT_VER_3_3 0x33
+
+typedef void *bus_dmaengine_t;
+struct bus_dmadesc;
+typedef void (*bus_dmaengine_callback_t)(void *arg, int error);
+
+/*
+ * Called first to acquire a reference to the DMA channel
+ *
+ * Flags may be M_WAITOK or M_NOWAIT.
+ */
+bus_dmaengine_t ioat_get_dmaengine(uint32_t channel_index, int flags);
+
+/* Release the DMA channel */
+void ioat_put_dmaengine(bus_dmaengine_t dmaengine);
+
+/* Check the DMA engine's HW version */
+int ioat_get_hwversion(bus_dmaengine_t dmaengine);
+size_t ioat_get_max_io_size(bus_dmaengine_t dmaengine);
+
+/*
+ * Set interrupt coalescing on a DMA channel.
+ *
+ * The argument is in microseconds. A zero value disables coalescing. Any
+ * other value delays interrupt generation for N microseconds to provide
+ * opportunity to coalesce multiple operations into a single interrupt.
+ *
+ * Returns an error status, or zero on success.
+ *
+ * - ERANGE if the given value exceeds the delay supported by the hardware.
+ * (All current hardware supports a maximum of 0x3fff microseconds delay.)
+ * - ENODEV if the hardware does not support interrupt coalescing.
+ */
+int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay);
+
+/*
+ * Return the maximum supported coalescing period, for use in
+ * ioat_set_interrupt_coalesce(). If the hardware does not support coalescing,
+ * returns zero.
+ */
+uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine);
+
+/*
+ * Acquire must be called before issuing an operation to perform. Release is
+ * called after. Multiple operations can be issued within the context of one
+ * acquire and release
+ */
+void ioat_acquire(bus_dmaengine_t dmaengine);
+void ioat_release(bus_dmaengine_t dmaengine);
+
+/*
+ * Acquire_reserve can be called to ensure there is room for N descriptors. If
+ * it succeeds, the next N valid operations will successfully enqueue.
+ *
+ * It may fail with:
+ * - ENXIO if the channel is in an errored state, or the driver is being
+ * unloaded
+ * - EAGAIN if mflags included M_NOWAIT
+ *
+ * On failure, the caller does not hold the dmaengine.
+ */
+int ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags);
+
+/*
+ * Issue a blockfill operation. The 64-bit pattern 'fillpattern' is written to
+ * 'len' physically contiguous bytes at 'dst'.
+ *
+ * Only supported on devices with the BFILL capability.
+ */
+struct bus_dmadesc *ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst,
+ uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn,
+ void *callback_arg, uint32_t flags);
+
+/* Issues the copy data operation */
+struct bus_dmadesc *ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
+ bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,
+ void *callback_arg, uint32_t flags);
+
+/*
+ * Issue a copy data operation, with constraints:
+ * - src1, src2, dst1, dst2 are all page-aligned addresses
+ * - The quantity to copy is exactly 2 pages;
+ * - src1 -> dst1, src2 -> dst2
+ *
+ * Why use this instead of normal _copy()? You can copy two non-contiguous
+ * pages (src, dst, or both) with one descriptor.
+ */
+struct bus_dmadesc *ioat_copy_8k_aligned(bus_dmaengine_t dmaengine,
+ bus_addr_t dst1, bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2,
+ bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags);
+
+/*
+ * Copy len bytes from dst to src, like ioat_copy().
+ *
+ * Additionally, accumulate a CRC32C of the data.
+ *
+ * If initialseed is not NULL, the value it points to is used to seed the
+ * initial value of the CRC32C.
+ *
+ * If flags include DMA_CRC_STORE and not DMA_CRC_INLINE, crcptr is written
+ * with the 32-bit CRC32C result (in wire format).
+ *
+ * If flags include DMA_CRC_TEST and not DMA_CRC_INLINE, the computed CRC32C is
+ * compared with the 32-bit CRC32C pointed to by crcptr. If they do not match,
+ * a channel error is raised.
+ *
+ * If the DMA_CRC_INLINE flag is set, crcptr is ignored and the DMA engine uses
+ * the 4 bytes trailing the source data (TEST) or the destination data (STORE).
+ */
+struct bus_dmadesc *ioat_copy_crc(bus_dmaengine_t dmaengine, bus_addr_t dst,
+ bus_addr_t src, bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr,
+ bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags);
+
+/*
+ * ioat_crc() is nearly identical to ioat_copy_crc(), but does not actually
+ * move data around.
+ *
+ * Like ioat_copy_crc, ioat_crc computes a CRC32C over len bytes pointed to by
+ * src. The flags affect its operation in the same way, with one exception:
+ *
+ * If flags includes both DMA_CRC_STORE and DMA_CRC_INLINE, the computed CRC32C
+ * is written to the 4 bytes trailing the *source* data.
+ */
+struct bus_dmadesc *ioat_crc(bus_dmaengine_t dmaengine, bus_addr_t src,
+ bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr,
+ bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags);
+
+/*
+ * Issues a null operation. This issues the operation to the hardware, but the
+ * hardware doesn't do anything with it.
+ */
+struct bus_dmadesc *ioat_null(bus_dmaengine_t dmaengine,
+ bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags);
+
+
+#endif /* __IOAT_H__ */
+
diff --git a/sys/dev/ioat/ioat_hw.h b/sys/dev/ioat/ioat_hw.h
new file mode 100644
index 0000000..6dfe9a6
--- /dev/null
+++ b/sys/dev/ioat/ioat_hw.h
@@ -0,0 +1,167 @@
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__FBSDID("$FreeBSD$");
+
+#ifndef __IOAT_HW_H__
+#define __IOAT_HW_H__
+
+#define IOAT_MAX_CHANNELS 32
+
+#define IOAT_CHANCNT_OFFSET 0x00
+
+#define IOAT_XFERCAP_OFFSET 0x01
+/* Only bits [4:0] are valid. */
+#define IOAT_XFERCAP_VALID_MASK 0x1f
+
+#define IOAT_GENCTRL_OFFSET 0x02
+
+#define IOAT_INTRCTRL_OFFSET 0x03
+#define IOAT_INTRCTRL_MASTER_INT_EN 0x01
+
+#define IOAT_ATTNSTATUS_OFFSET 0x04
+
+#define IOAT_CBVER_OFFSET 0x08
+
+#define IOAT_INTRDELAY_OFFSET 0x0C
+#define IOAT_INTRDELAY_SUPPORTED (1 << 15)
+/* Reserved. (1 << 14) */
+/* [13:0] is the coalesce period, in microseconds. */
+#define IOAT_INTRDELAY_US_MASK ((1 << 14) - 1)
+
+#define IOAT_CS_STATUS_OFFSET 0x0E
+
+#define IOAT_DMACAPABILITY_OFFSET 0x10
+#define IOAT_DMACAP_PB (1 << 0)
+#define IOAT_DMACAP_CRC (1 << 1)
+#define IOAT_DMACAP_MARKER_SKIP (1 << 2)
+#define IOAT_DMACAP_OLD_XOR (1 << 3)
+#define IOAT_DMACAP_DCA (1 << 4)
+#define IOAT_DMACAP_MOVECRC (1 << 5)
+#define IOAT_DMACAP_BFILL (1 << 6)
+#define IOAT_DMACAP_EXT_APIC (1 << 7)
+#define IOAT_DMACAP_XOR (1 << 8)
+#define IOAT_DMACAP_PQ (1 << 9)
+#define IOAT_DMACAP_DMA_DIF (1 << 10)
+#define IOAT_DMACAP_DWBES (1 << 13)
+#define IOAT_DMACAP_RAID16SS (1 << 17)
+#define IOAT_DMACAP_DMAMC (1 << 18)
+#define IOAT_DMACAP_CTOS (1 << 19)
+
+#define IOAT_DMACAP_STR \
+ "\20\24Completion_Timeout_Support\23DMA_with_Multicasting_Support" \
+ "\22RAID_Super_descriptors\16Descriptor_Write_Back_Error_Support" \
+ "\13DMA_with_DIF\12PQ\11XOR\10Extended_APIC_ID\07Block_Fill\06Move_CRC" \
+ "\05DCA\04Old_XOR\03Marker_Skipping\02CRC\01Page_Break"
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET 0x80
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
+#define IOAT_CHANCTRL_COMPL_DCA_EN 0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
+#define IOAT_CHANCTRL_INT_REARM 0x0001
+#define IOAT_CHANCTRL_RUN (IOAT_CHANCTRL_INT_REARM |\
+ IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+ IOAT_CHANCTRL_ANY_ERR_ABORT_EN |\
+ IOAT_CHANCTRL_ERR_INT_EN)
+
+#define IOAT_CHANCMD_OFFSET 0x84
+#define IOAT_CHANCMD_RESET 0x20
+#define IOAT_CHANCMD_SUSPEND 0x04
+
+#define IOAT_DMACOUNT_OFFSET 0x86
+
+#define IOAT_CHANSTS_OFFSET_LOW 0x88
+#define IOAT_CHANSTS_OFFSET_HIGH 0x8C
+#define IOAT_CHANSTS_OFFSET 0x88
+
+#define IOAT_CHANSTS_STATUS 0x7ULL
+#define IOAT_CHANSTS_ACTIVE 0x0
+#define IOAT_CHANSTS_IDLE 0x1
+#define IOAT_CHANSTS_SUSPENDED 0x2
+#define IOAT_CHANSTS_HALTED 0x3
+#define IOAT_CHANSTS_ARMED 0x4
+
+#define IOAT_CHANSTS_UNAFFILIATED_ERROR 0x8ULL
+#define IOAT_CHANSTS_SOFT_ERROR 0x10ULL
+
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK (~0x3FULL)
+
+#define IOAT_CHAINADDR_OFFSET_LOW 0x90
+#define IOAT_CHAINADDR_OFFSET_HIGH 0x94
+
+#define IOAT_CHANCMP_OFFSET_LOW 0x98
+#define IOAT_CHANCMP_OFFSET_HIGH 0x9C
+
+#define IOAT_CHANERR_OFFSET 0xA8
+
+#define IOAT_CHANERR_XSADDERR (1 << 0)
+#define IOAT_CHANERR_XDADDERR (1 << 1)
+#define IOAT_CHANERR_NDADDERR (1 << 2)
+#define IOAT_CHANERR_DERR (1 << 3)
+#define IOAT_CHANERR_CHADDERR (1 << 4)
+#define IOAT_CHANERR_CCMDERR (1 << 5)
+#define IOAT_CHANERR_CUNCORERR (1 << 6)
+#define IOAT_CHANERR_DUNCORERR (1 << 7)
+#define IOAT_CHANERR_RDERR (1 << 8)
+#define IOAT_CHANERR_WDERR (1 << 9)
+#define IOAT_CHANERR_DCERR (1 << 10)
+#define IOAT_CHANERR_DXSERR (1 << 11)
+#define IOAT_CHANERR_CMPADDERR (1 << 12)
+#define IOAT_CHANERR_INTCFGERR (1 << 13)
+#define IOAT_CHANERR_SEDERR (1 << 14)
+#define IOAT_CHANERR_UNAFFERR (1 << 15)
+#define IOAT_CHANERR_CXPERR (1 << 16)
+/* Reserved. (1 << 17) */
+#define IOAT_CHANERR_DCNTERR (1 << 18)
+#define IOAT_CHANERR_DIFFERR (1 << 19)
+#define IOAT_CHANERR_GTVERR (1 << 20)
+#define IOAT_CHANERR_ATVERR (1 << 21)
+#define IOAT_CHANERR_RTVERR (1 << 22)
+#define IOAT_CHANERR_BBERR (1 << 23)
+#define IOAT_CHANERR_RDIFFERR (1 << 24)
+#define IOAT_CHANERR_RGTVERR (1 << 25)
+#define IOAT_CHANERR_RATVERR (1 << 26)
+#define IOAT_CHANERR_RRTVERR (1 << 27)
+
+#define IOAT_CHANERR_STR \
+ "\20\34RRTVERR\33RATVERR\32RGTVERR\31RDIFFERR\30BBERR\27RTVERR\26ATVERR" \
+ "\25GTVERR\24DIFFERR\23DCNTERR\21CXPERR\20UNAFFERR\17SEDERR\16INTCFGERR" \
+ "\15CMPADDERR\14DXSERR\13DCERR\12WDERR\11RDERR\10DUNCORERR\07CUNCORERR" \
+ "\06CCMDERR\05CHADDERR\04DERR\03NDADDERR\02XDADDERR\01XSADDERR"
+
+
+#define IOAT_CFG_CHANERR_INT_OFFSET 0x180
+#define IOAT_CFG_CHANERRMASK_INT_OFFSET 0x184
+
+#define IOAT_MIN_ORDER 4
+#define IOAT_MAX_ORDER 16
+
+#endif /* __IOAT_HW_H__ */
diff --git a/sys/dev/ioat/ioat_internal.h b/sys/dev/ioat/ioat_internal.h
new file mode 100644
index 0000000..322671c
--- /dev/null
+++ b/sys/dev/ioat/ioat_internal.h
@@ -0,0 +1,600 @@
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__FBSDID("$FreeBSD$");
+
+#ifndef __IOAT_INTERNAL_H__
+#define __IOAT_INTERNAL_H__
+
+#include <sys/_task.h>
+
+#define DEVICE2SOFTC(dev) ((struct ioat_softc *) device_get_softc(dev))
+#define KTR_IOAT KTR_SPARE3
+
+#define ioat_read_chancnt(ioat) \
+ ioat_read_1((ioat), IOAT_CHANCNT_OFFSET)
+
+#define ioat_read_xfercap(ioat) \
+ (ioat_read_1((ioat), IOAT_XFERCAP_OFFSET) & IOAT_XFERCAP_VALID_MASK)
+
+#define ioat_write_intrctrl(ioat, value) \
+ ioat_write_1((ioat), IOAT_INTRCTRL_OFFSET, (value))
+
+#define ioat_read_cbver(ioat) \
+ (ioat_read_1((ioat), IOAT_CBVER_OFFSET) & 0xFF)
+
+#define ioat_read_dmacapability(ioat) \
+ ioat_read_4((ioat), IOAT_DMACAPABILITY_OFFSET)
+
+#define ioat_write_chanctrl(ioat, value) \
+ ioat_write_2((ioat), IOAT_CHANCTRL_OFFSET, (value))
+
+static __inline uint64_t
+ioat_bus_space_read_8_lower_first(bus_space_tag_t tag,
+ bus_space_handle_t handle, bus_size_t offset)
+{
+ return (bus_space_read_4(tag, handle, offset) |
+ ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32);
+}
+
+static __inline void
+ioat_bus_space_write_8_lower_first(bus_space_tag_t tag,
+ bus_space_handle_t handle, bus_size_t offset, uint64_t val)
+{
+ bus_space_write_4(tag, handle, offset, val);
+ bus_space_write_4(tag, handle, offset + 4, val >> 32);
+}
+
+#ifdef __i386__
+#define ioat_bus_space_read_8 ioat_bus_space_read_8_lower_first
+#define ioat_bus_space_write_8 ioat_bus_space_write_8_lower_first
+#else
+#define ioat_bus_space_read_8(tag, handle, offset) \
+ bus_space_read_8((tag), (handle), (offset))
+#define ioat_bus_space_write_8(tag, handle, offset, val) \
+ bus_space_write_8((tag), (handle), (offset), (val))
+#endif
+
+#define ioat_read_1(ioat, offset) \
+ bus_space_read_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+ (offset))
+
+#define ioat_read_2(ioat, offset) \
+ bus_space_read_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+ (offset))
+
+#define ioat_read_4(ioat, offset) \
+ bus_space_read_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+ (offset))
+
+#define ioat_read_8(ioat, offset) \
+ ioat_bus_space_read_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+ (offset))
+
+#define ioat_read_double_4(ioat, offset) \
+ ioat_bus_space_read_8_lower_first((ioat)->pci_bus_tag, \
+ (ioat)->pci_bus_handle, (offset))
+
+#define ioat_write_1(ioat, offset, value) \
+ bus_space_write_1((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+ (offset), (value))
+
+#define ioat_write_2(ioat, offset, value) \
+ bus_space_write_2((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+ (offset), (value))
+
+#define ioat_write_4(ioat, offset, value) \
+ bus_space_write_4((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+ (offset), (value))
+
+#define ioat_write_8(ioat, offset, value) \
+ ioat_bus_space_write_8((ioat)->pci_bus_tag, (ioat)->pci_bus_handle, \
+ (offset), (value))
+
+#define ioat_write_double_4(ioat, offset, value) \
+ ioat_bus_space_write_8_lower_first((ioat)->pci_bus_tag, \
+ (ioat)->pci_bus_handle, (offset), (value))
+
+MALLOC_DECLARE(M_IOAT);
+
+SYSCTL_DECL(_hw_ioat);
+
+extern int g_ioat_debug_level;
+
+struct generic_dma_control {
+ uint32_t int_enable:1;
+ uint32_t src_snoop_disable:1;
+ uint32_t dest_snoop_disable:1;
+ uint32_t completion_update:1;
+ uint32_t fence:1;
+ uint32_t reserved1:1;
+ uint32_t src_page_break:1;
+ uint32_t dest_page_break:1;
+ uint32_t bundle:1;
+ uint32_t dest_dca:1;
+ uint32_t hint:1;
+ uint32_t reserved2:13;
+ uint32_t op:8;
+};
+
+struct ioat_generic_hw_descriptor {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct generic_dma_control control_generic;
+ } u;
+ uint64_t src_addr;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t reserved[4];
+};
+
+struct ioat_dma_hw_descriptor {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct generic_dma_control control_generic;
+ struct {
+ uint32_t int_enable:1;
+ uint32_t src_snoop_disable:1;
+ uint32_t dest_snoop_disable:1;
+ uint32_t completion_update:1;
+ uint32_t fence:1;
+ uint32_t null:1;
+ uint32_t src_page_break:1;
+ uint32_t dest_page_break:1;
+ uint32_t bundle:1;
+ uint32_t dest_dca:1;
+ uint32_t hint:1;
+ uint32_t reserved:13;
+ #define IOAT_OP_COPY 0x00
+ uint32_t op:8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t next_src_addr;
+ uint64_t next_dest_addr;
+ uint64_t user1;
+ uint64_t user2;
+};
+
+struct ioat_fill_hw_descriptor {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct generic_dma_control control_generic;
+ struct {
+ uint32_t int_enable:1;
+ uint32_t reserved:1;
+ uint32_t dest_snoop_disable:1;
+ uint32_t completion_update:1;
+ uint32_t fence:1;
+ uint32_t reserved2:2;
+ uint32_t dest_page_break:1;
+ uint32_t bundle:1;
+ uint32_t reserved3:15;
+ #define IOAT_OP_FILL 0x01
+ uint32_t op:8;
+ } control;
+ } u;
+ uint64_t src_data;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t reserved;
+ uint64_t next_dest_addr;
+ uint64_t user1;
+ uint64_t user2;
+};
+
+struct ioat_crc32_hw_descriptor {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct generic_dma_control control_generic;
+ struct {
+ uint32_t int_enable:1;
+ uint32_t src_snoop_disable:1;
+ uint32_t dest_snoop_disable:1;
+ uint32_t completion_update:1;
+ uint32_t fence:1;
+ uint32_t reserved1:3;
+ uint32_t bundle:1;
+ uint32_t dest_dca:1;
+ uint32_t hint:1;
+ uint32_t use_seed:1;
+ /*
+ * crc_location:
+ * For IOAT_OP_MOVECRC_TEST and IOAT_OP_CRC_TEST:
+ * 0: comparison value is pointed to by CRC Address
+ * field.
+ * 1: comparison value follows data in wire format
+ * ("inverted reflected bit order") in the 4 bytes
+ * following the source data.
+ *
+ * For IOAT_OP_CRC_STORE:
+ * 0: Result will be stored at location pointed to by
+ * CRC Address field (in wire format).
+ * 1: Result will be stored directly following the
+ * source data.
+ *
+ * For IOAT_OP_MOVECRC_STORE:
+ * 0: Result will be stored at location pointed to by
+ * CRC Address field (in wire format).
+ * 1: Result will be stored directly following the
+ * *destination* data.
+ */
+ uint32_t crc_location:1;
+ uint32_t reserved2:11;
+ /*
+ * MOVECRC - Move data in the same way as standard copy
+ * operation, but also compute CRC32.
+ *
+ * CRC - Only compute CRC on source data.
+ *
+ * There is a CRC accumulator register in the hardware.
+ * If 'initial' is set, it is initialized to the value
+ * in 'seed.'
+ *
+ * In all modes, these operators accumulate size bytes
+ * at src_addr into the running CRC32C.
+ *
+ * Store mode emits the accumulated CRC, in wire
+ * format, as specified by the crc_location bit above.
+ *
+ * Test mode compares the accumulated CRC against the
+ * reference CRC, as described in crc_location above.
+ * On failure, halts the DMA engine with a CRC error
+ * status.
+ */
+ #define IOAT_OP_MOVECRC 0x41
+ #define IOAT_OP_MOVECRC_TEST 0x42
+ #define IOAT_OP_MOVECRC_STORE 0x43
+ #define IOAT_OP_CRC 0x81
+ #define IOAT_OP_CRC_TEST 0x82
+ #define IOAT_OP_CRC_STORE 0x83
+ uint32_t op:8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t next_src_addr;
+ uint64_t next_dest_addr;
+ uint32_t seed;
+ uint32_t reserved;
+ uint64_t crc_address;
+};
+
+struct ioat_xor_hw_descriptor {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct generic_dma_control control_generic;
+ struct {
+ uint32_t int_enable:1;
+ uint32_t src_snoop_disable:1;
+ uint32_t dest_snoop_disable:1;
+ uint32_t completion_update:1;
+ uint32_t fence:1;
+ uint32_t src_count:3;
+ uint32_t bundle:1;
+ uint32_t dest_dca:1;
+ uint32_t hint:1;
+ uint32_t reserved:13;
+ #define IOAT_OP_XOR 0x87
+ #define IOAT_OP_XOR_VAL 0x88
+ uint32_t op:8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t dest_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+};
+
+struct ioat_xor_ext_hw_descriptor {
+ uint64_t src_addr6;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t next;
+ uint64_t reserved[4];
+};
+
+struct ioat_pq_hw_descriptor {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct generic_dma_control control_generic;
+ struct {
+ uint32_t int_enable:1;
+ uint32_t src_snoop_disable:1;
+ uint32_t dest_snoop_disable:1;
+ uint32_t completion_update:1;
+ uint32_t fence:1;
+ uint32_t src_count:3;
+ uint32_t bundle:1;
+ uint32_t dest_dca:1;
+ uint32_t hint:1;
+ uint32_t p_disable:1;
+ uint32_t q_disable:1;
+ uint32_t reserved:11;
+ #define IOAT_OP_PQ 0x89
+ #define IOAT_OP_PQ_VAL 0x8a
+ uint32_t op:8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t src_addr3;
+ uint8_t coef[8];
+ uint64_t q_addr;
+};
+
+struct ioat_pq_ext_hw_descriptor {
+ uint64_t src_addr4;
+ uint64_t src_addr5;
+ uint64_t src_addr6;
+ uint64_t next;
+ uint64_t src_addr7;
+ uint64_t src_addr8;
+ uint64_t reserved[2];
+};
+
+struct ioat_pq_update_hw_descriptor {
+ uint32_t size;
+ union {
+ uint32_t control_raw;
+ struct generic_dma_control control_generic;
+ struct {
+ uint32_t int_enable:1;
+ uint32_t src_snoop_disable:1;
+ uint32_t dest_snoop_disable:1;
+ uint32_t completion_update:1;
+ uint32_t fence:1;
+ uint32_t src_cnt:3;
+ uint32_t bundle:1;
+ uint32_t dest_dca:1;
+ uint32_t hint:1;
+ uint32_t p_disable:1;
+ uint32_t q_disable:1;
+ uint32_t reserved:3;
+ uint32_t coef:8;
+ #define IOAT_OP_PQ_UP 0x8b
+ uint32_t op:8;
+ } control;
+ } u;
+ uint64_t src_addr;
+ uint64_t p_addr;
+ uint64_t next;
+ uint64_t src_addr2;
+ uint64_t p_src;
+ uint64_t q_src;
+ uint64_t q_addr;
+};
+
+struct ioat_raw_hw_descriptor {
+ uint64_t field[8];
+};
+
+struct bus_dmadesc {
+ bus_dmaengine_callback_t callback_fn;
+ void *callback_arg;
+};
+
+struct ioat_descriptor {
+ struct bus_dmadesc bus_dmadesc;
+ union {
+ struct ioat_generic_hw_descriptor *generic;
+ struct ioat_dma_hw_descriptor *dma;
+ struct ioat_fill_hw_descriptor *fill;
+ struct ioat_crc32_hw_descriptor *crc32;
+ struct ioat_xor_hw_descriptor *xor;
+ struct ioat_xor_ext_hw_descriptor *xor_ext;
+ struct ioat_pq_hw_descriptor *pq;
+ struct ioat_pq_ext_hw_descriptor *pq_ext;
+ struct ioat_raw_hw_descriptor *raw;
+ } u;
+ uint32_t id;
+ bus_addr_t hw_desc_bus_addr;
+};
+
+/* Unused by this driver at this time. */
+#define IOAT_OP_MARKER 0x84
+
+/*
+ * Deprecated OPs -- v3 DMA generates an abort if given these. And this driver
+ * doesn't support anything older than v3.
+ */
+#define IOAT_OP_OLD_XOR 0x85
+#define IOAT_OP_OLD_XOR_VAL 0x86
+
+enum ioat_ref_kind {
+ IOAT_DMAENGINE_REF = 0,
+ IOAT_ACTIVE_DESCR_REF,
+ IOAT_NUM_REF_KINDS
+};
+
+/* One of these per allocated PCI device. */
+struct ioat_softc {
+ bus_dmaengine_t dmaengine;
+#define to_ioat_softc(_dmaeng) \
+({ \
+ bus_dmaengine_t *_p = (_dmaeng); \
+ (struct ioat_softc *)((char *)_p - \
+ offsetof(struct ioat_softc, dmaengine)); \
+})
+
+ int version;
+ int chan_idx;
+
+ struct mtx submit_lock;
+ device_t device;
+ bus_space_tag_t pci_bus_tag;
+ bus_space_handle_t pci_bus_handle;
+ int pci_resource_id;
+ struct resource *pci_resource;
+ uint32_t max_xfer_size;
+ uint32_t capabilities;
+ uint16_t intrdelay_max;
+ uint16_t cached_intrdelay;
+
+ struct resource *res;
+ int rid;
+ void *tag;
+
+ bus_dma_tag_t hw_desc_tag;
+ bus_dmamap_t hw_desc_map;
+
+ bus_dma_tag_t comp_update_tag;
+ bus_dmamap_t comp_update_map;
+ uint64_t *comp_update;
+ bus_addr_t comp_update_bus_addr;
+
+ struct callout timer;
+ struct task reset_task;
+
+ boolean_t quiescing;
+ boolean_t destroying;
+ boolean_t is_resize_pending;
+ boolean_t is_completion_pending;
+ boolean_t is_reset_pending;
+ boolean_t is_channel_running;
+ boolean_t intrdelay_supported;
+
+ uint32_t head;
+ uint32_t tail;
+ uint32_t hw_head;
+ uint32_t ring_size_order;
+ bus_addr_t last_seen;
+
+ struct ioat_descriptor **ring;
+
+ struct mtx cleanup_lock;
+ volatile uint32_t refcnt;
+#ifdef INVARIANTS
+ volatile uint32_t refkinds[IOAT_NUM_REF_KINDS];
+#endif
+
+ struct {
+ uint64_t interrupts;
+ uint64_t descriptors_processed;
+ uint64_t descriptors_error;
+ uint64_t descriptors_submitted;
+
+ uint32_t channel_halts;
+ uint32_t last_halt_chanerr;
+ } stats;
+};
+
+void ioat_test_attach(void);
+void ioat_test_detach(void);
+
+static inline uint64_t
+ioat_get_chansts(struct ioat_softc *ioat)
+{
+ uint64_t status;
+
+ if (ioat->version >= IOAT_VER_3_3)
+ status = ioat_read_8(ioat, IOAT_CHANSTS_OFFSET);
+ else
+ /* Must read lower 4 bytes before upper 4 bytes. */
+ status = ioat_read_double_4(ioat, IOAT_CHANSTS_OFFSET);
+ return (status);
+}
+
+static inline void
+ioat_write_chancmp(struct ioat_softc *ioat, uint64_t addr)
+{
+
+ if (ioat->version >= IOAT_VER_3_3)
+ ioat_write_8(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
+ else
+ ioat_write_double_4(ioat, IOAT_CHANCMP_OFFSET_LOW, addr);
+}
+
+static inline void
+ioat_write_chainaddr(struct ioat_softc *ioat, uint64_t addr)
+{
+
+ if (ioat->version >= IOAT_VER_3_3)
+ ioat_write_8(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
+ else
+ ioat_write_double_4(ioat, IOAT_CHAINADDR_OFFSET_LOW, addr);
+}
+
+static inline boolean_t
+is_ioat_active(uint64_t status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline boolean_t
+is_ioat_idle(uint64_t status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_IDLE);
+}
+
+static inline boolean_t
+is_ioat_halted(uint64_t status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline boolean_t
+is_ioat_suspended(uint64_t status)
+{
+ return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+static inline void
+ioat_suspend(struct ioat_softc *ioat)
+{
+ ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_SUSPEND);
+}
+
+static inline void
+ioat_reset(struct ioat_softc *ioat)
+{
+ ioat_write_1(ioat, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET);
+}
+
+static inline boolean_t
+ioat_reset_pending(struct ioat_softc *ioat)
+{
+ uint8_t cmd;
+
+ cmd = ioat_read_1(ioat, IOAT_CHANCMD_OFFSET);
+ return ((cmd & IOAT_CHANCMD_RESET) != 0);
+}
+
+#endif /* __IOAT_INTERNAL_H__ */
diff --git a/sys/dev/ioat/ioat_test.c b/sys/dev/ioat/ioat_test.c
new file mode 100644
index 0000000..5d27b1b
--- /dev/null
+++ b/sys/dev/ioat/ioat_test.c
@@ -0,0 +1,602 @@
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/ioccom.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/stdarg.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include "ioat.h"
+#include "ioat_hw.h"
+#include "ioat_internal.h"
+#include "ioat_test.h"
+
+#ifndef time_after
+#define time_after(a,b) ((long)(b) - (long)(a) < 0)
+#endif
+
+MALLOC_DEFINE(M_IOAT_TEST, "ioat_test", "ioat test allocations");
+
+#define IOAT_MAX_BUFS 256
+
+struct test_transaction {
+ void *buf[IOAT_MAX_BUFS];
+ uint32_t length;
+ uint32_t depth;
+ struct ioat_test *test;
+ TAILQ_ENTRY(test_transaction) entry;
+};
+
+#define IT_LOCK() mtx_lock(&ioat_test_lk)
+#define IT_UNLOCK() mtx_unlock(&ioat_test_lk)
+#define IT_ASSERT() mtx_assert(&ioat_test_lk, MA_OWNED)
+static struct mtx ioat_test_lk;
+MTX_SYSINIT(ioat_test_lk, &ioat_test_lk, "test coordination mtx", MTX_DEF);
+
+static int g_thread_index = 1;
+static struct cdev *g_ioat_cdev = NULL;
+
+#define ioat_test_log(v, ...) _ioat_test_log((v), "ioat_test: " __VA_ARGS__)
+static inline void _ioat_test_log(int verbosity, const char *fmt, ...);
+
+static void
+ioat_test_transaction_destroy(struct test_transaction *tx)
+{
+ struct ioat_test *test;
+ int i;
+
+ test = tx->test;
+
+ for (i = 0; i < IOAT_MAX_BUFS; i++) {
+ if (tx->buf[i] != NULL) {
+ if (test->testkind == IOAT_TEST_DMA_8K)
+ free(tx->buf[i], M_IOAT_TEST);
+ else
+ contigfree(tx->buf[i], tx->length, M_IOAT_TEST);
+ tx->buf[i] = NULL;
+ }
+ }
+
+ free(tx, M_IOAT_TEST);
+}
+
+static struct
+test_transaction *ioat_test_transaction_create(struct ioat_test *test,
+ unsigned num_buffers)
+{
+ struct test_transaction *tx;
+ unsigned i;
+
+ tx = malloc(sizeof(*tx), M_IOAT_TEST, M_NOWAIT | M_ZERO);
+ if (tx == NULL)
+ return (NULL);
+
+ tx->length = test->buffer_size;
+
+ for (i = 0; i < num_buffers; i++) {
+ if (test->testkind == IOAT_TEST_DMA_8K)
+ tx->buf[i] = malloc(test->buffer_size, M_IOAT_TEST,
+ M_NOWAIT);
+ else
+ tx->buf[i] = contigmalloc(test->buffer_size,
+ M_IOAT_TEST, M_NOWAIT, 0, BUS_SPACE_MAXADDR,
+ PAGE_SIZE, 0);
+
+ if (tx->buf[i] == NULL) {
+ ioat_test_transaction_destroy(tx);
+ return (NULL);
+ }
+ }
+ return (tx);
+}
+
+static void
+dump_hex(void *p, size_t chunks)
+{
+ size_t i, j;
+
+ for (i = 0; i < chunks; i++) {
+ for (j = 0; j < 8; j++)
+ printf("%08x ", ((uint32_t *)p)[i * 8 + j]);
+ printf("\n");
+ }
+}
+
+static bool
+ioat_compare_ok(struct test_transaction *tx)
+{
+ struct ioat_test *test;
+ char *dst, *src;
+ uint32_t i, j;
+
+ test = tx->test;
+
+ for (i = 0; i < tx->depth; i++) {
+ dst = tx->buf[2 * i + 1];
+ src = tx->buf[2 * i];
+
+ if (test->testkind == IOAT_TEST_FILL) {
+ for (j = 0; j < tx->length; j += sizeof(uint64_t)) {
+ if (memcmp(src, &dst[j],
+ MIN(sizeof(uint64_t), tx->length - j))
+ != 0)
+ return (false);
+ }
+ } else if (test->testkind == IOAT_TEST_DMA) {
+ if (memcmp(src, dst, tx->length) != 0)
+ return (false);
+ } else if (test->testkind == IOAT_TEST_RAW_DMA) {
+ if (test->raw_write)
+ dst = test->raw_vtarget;
+ dump_hex(dst, tx->length / 32);
+ }
+ }
+ return (true);
+}
+
+static void
+ioat_dma_test_callback(void *arg, int error)
+{
+ struct test_transaction *tx;
+ struct ioat_test *test;
+
+ if (error != 0)
+ ioat_test_log(0, "%s: Got error: %d\n", __func__, error);
+
+ tx = arg;
+ test = tx->test;
+
+ if (test->verify && !ioat_compare_ok(tx)) {
+ ioat_test_log(0, "miscompare found\n");
+ atomic_add_32(&test->status[IOAT_TEST_MISCOMPARE], tx->depth);
+ } else if (!test->too_late)
+ atomic_add_32(&test->status[IOAT_TEST_OK], tx->depth);
+
+ IT_LOCK();
+ TAILQ_REMOVE(&test->pend_q, tx, entry);
+ TAILQ_INSERT_TAIL(&test->free_q, tx, entry);
+ wakeup(&test->free_q);
+ IT_UNLOCK();
+}
+
+static int
+ioat_test_prealloc_memory(struct ioat_test *test, int index)
+{
+ uint32_t i, j, k;
+ struct test_transaction *tx;
+
+ for (i = 0; i < test->transactions; i++) {
+ tx = ioat_test_transaction_create(test, test->chain_depth * 2);
+ if (tx == NULL) {
+ ioat_test_log(0, "tx == NULL - memory exhausted\n");
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return (ENOMEM);
+ }
+
+ TAILQ_INSERT_HEAD(&test->free_q, tx, entry);
+
+ tx->test = test;
+ tx->depth = test->chain_depth;
+
+ /* fill in source buffers */
+ for (j = 0; j < (tx->length / sizeof(uint32_t)); j++) {
+ uint32_t val = j + (index << 28);
+
+ for (k = 0; k < test->chain_depth; k++) {
+ ((uint32_t *)tx->buf[2*k])[j] = ~val;
+ ((uint32_t *)tx->buf[2*k+1])[j] = val;
+ }
+ }
+ }
+ return (0);
+}
+
+static void
+ioat_test_release_memory(struct ioat_test *test)
+{
+ struct test_transaction *tx, *s;
+
+ TAILQ_FOREACH_SAFE(tx, &test->free_q, entry, s)
+ ioat_test_transaction_destroy(tx);
+ TAILQ_INIT(&test->free_q);
+
+ TAILQ_FOREACH_SAFE(tx, &test->pend_q, entry, s)
+ ioat_test_transaction_destroy(tx);
+ TAILQ_INIT(&test->pend_q);
+}
+
+static void
+ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma)
+{
+ struct test_transaction *tx;
+ struct bus_dmadesc *desc;
+ bus_dmaengine_callback_t cb;
+ bus_addr_t src, dest;
+ uint64_t fillpattern;
+ uint32_t i, flags;
+
+ desc = NULL;
+
+ IT_LOCK();
+ while (TAILQ_EMPTY(&test->free_q))
+ msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0);
+
+ tx = TAILQ_FIRST(&test->free_q);
+ TAILQ_REMOVE(&test->free_q, tx, entry);
+ TAILQ_INSERT_HEAD(&test->pend_q, tx, entry);
+ IT_UNLOCK();
+
+ if (test->testkind != IOAT_TEST_MEMCPY)
+ ioat_acquire(dma);
+ for (i = 0; i < tx->depth; i++) {
+ if (test->testkind == IOAT_TEST_MEMCPY) {
+ memcpy(tx->buf[2 * i + 1], tx->buf[2 * i], tx->length);
+ if (i == tx->depth - 1)
+ ioat_dma_test_callback(tx, 0);
+ continue;
+ }
+
+ src = vtophys((vm_offset_t)tx->buf[2*i]);
+ dest = vtophys((vm_offset_t)tx->buf[2*i+1]);
+
+ if (test->testkind == IOAT_TEST_RAW_DMA) {
+ if (test->raw_write)
+ dest = test->raw_target;
+ else
+ src = test->raw_target;
+ }
+
+ if (i == tx->depth - 1) {
+ cb = ioat_dma_test_callback;
+ flags = DMA_INT_EN;
+ } else {
+ cb = NULL;
+ flags = 0;
+ }
+
+ if (test->testkind == IOAT_TEST_DMA ||
+ test->testkind == IOAT_TEST_RAW_DMA)
+ desc = ioat_copy(dma, dest, src, tx->length, cb, tx,
+ flags);
+ else if (test->testkind == IOAT_TEST_FILL) {
+ fillpattern = *(uint64_t *)tx->buf[2*i];
+ desc = ioat_blockfill(dma, dest, fillpattern,
+ tx->length, cb, tx, flags);
+ } else if (test->testkind == IOAT_TEST_DMA_8K) {
+ bus_addr_t src2, dst2;
+
+ src2 = vtophys((vm_offset_t)tx->buf[2*i] + PAGE_SIZE);
+ dst2 = vtophys((vm_offset_t)tx->buf[2*i+1] + PAGE_SIZE);
+
+ desc = ioat_copy_8k_aligned(dma, dest, dst2, src, src2,
+ cb, tx, flags);
+ }
+ if (desc == NULL)
+ break;
+ }
+ if (test->testkind == IOAT_TEST_MEMCPY)
+ return;
+ ioat_release(dma);
+
+ /*
+ * We couldn't issue an IO -- either the device is being detached or
+ * the HW reset. Essentially spin until the device comes back up or
+ * our timer expires.
+ */
+ if (desc == NULL && tx->depth > 0) {
+ atomic_add_32(&test->status[IOAT_TEST_NO_DMA_ENGINE], tx->depth);
+ IT_LOCK();
+ TAILQ_REMOVE(&test->pend_q, tx, entry);
+ TAILQ_INSERT_HEAD(&test->free_q, tx, entry);
+ IT_UNLOCK();
+ }
+}
+
+static void
+ioat_dma_test(void *arg)
+{
+ struct ioat_softc *ioat;
+ struct ioat_test *test;
+ bus_dmaengine_t dmaengine;
+ uint32_t loops;
+ int index, rc, start, end, error;
+
+ test = arg;
+ memset(__DEVOLATILE(void *, test->status), 0, sizeof(test->status));
+
+ if (test->testkind == IOAT_TEST_DMA_8K &&
+ test->buffer_size != 2 * PAGE_SIZE) {
+ ioat_test_log(0, "Asked for 8k test and buffer size isn't 8k\n");
+ test->status[IOAT_TEST_INVALID_INPUT]++;
+ return;
+ }
+
+ if (test->buffer_size > 1024 * 1024) {
+ ioat_test_log(0, "Buffer size too large >1MB\n");
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return;
+ }
+
+ if (test->chain_depth * 2 > IOAT_MAX_BUFS) {
+ ioat_test_log(0, "Depth too large (> %u)\n",
+ (unsigned)IOAT_MAX_BUFS / 2);
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return;
+ }
+
+ if (btoc((uint64_t)test->buffer_size * test->chain_depth *
+ test->transactions) > (physmem / 4)) {
+ ioat_test_log(0, "Sanity check failed -- test would "
+ "use more than 1/4 of phys mem.\n");
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return;
+ }
+
+ if ((uint64_t)test->transactions * test->chain_depth > (1<<16)) {
+ ioat_test_log(0, "Sanity check failed -- test would "
+ "use more than available IOAT ring space.\n");
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return;
+ }
+
+ if (test->testkind >= IOAT_NUM_TESTKINDS) {
+ ioat_test_log(0, "Invalid kind %u\n",
+ (unsigned)test->testkind);
+ test->status[IOAT_TEST_INVALID_INPUT]++;
+ return;
+ }
+
+ dmaengine = ioat_get_dmaengine(test->channel_index, M_NOWAIT);
+ if (dmaengine == NULL) {
+ ioat_test_log(0, "Couldn't acquire dmaengine\n");
+ test->status[IOAT_TEST_NO_DMA_ENGINE]++;
+ return;
+ }
+ ioat = to_ioat_softc(dmaengine);
+
+ if (test->testkind == IOAT_TEST_FILL &&
+ (ioat->capabilities & IOAT_DMACAP_BFILL) == 0)
+ {
+ ioat_test_log(0,
+ "Hardware doesn't support block fill, aborting test\n");
+ test->status[IOAT_TEST_INVALID_INPUT]++;
+ goto out;
+ }
+
+ if (test->coalesce_period > ioat->intrdelay_max) {
+ ioat_test_log(0,
+ "Hardware doesn't support intrdelay of %u us.\n",
+ (unsigned)test->coalesce_period);
+ test->status[IOAT_TEST_INVALID_INPUT]++;
+ goto out;
+ }
+ error = ioat_set_interrupt_coalesce(dmaengine, test->coalesce_period);
+ if (error == ENODEV && test->coalesce_period == 0)
+ error = 0;
+ if (error != 0) {
+ ioat_test_log(0, "ioat_set_interrupt_coalesce: %d\n", error);
+ test->status[IOAT_TEST_INVALID_INPUT]++;
+ goto out;
+ }
+
+ if (test->zero_stats)
+ memset(&ioat->stats, 0, sizeof(ioat->stats));
+
+ if (test->testkind == IOAT_TEST_RAW_DMA) {
+ if (test->raw_is_virtual) {
+ test->raw_vtarget = (void *)test->raw_target;
+ test->raw_target = vtophys(test->raw_vtarget);
+ } else {
+ test->raw_vtarget = pmap_mapdev(test->raw_target,
+ test->buffer_size);
+ }
+ }
+
+ index = g_thread_index++;
+ TAILQ_INIT(&test->free_q);
+ TAILQ_INIT(&test->pend_q);
+
+ if (test->duration == 0)
+ ioat_test_log(1, "Thread %d: num_loops remaining: 0x%08x\n",
+ index, test->transactions);
+ else
+ ioat_test_log(1, "Thread %d: starting\n", index);
+
+ rc = ioat_test_prealloc_memory(test, index);
+ if (rc != 0) {
+ ioat_test_log(0, "prealloc_memory: %d\n", rc);
+ goto out;
+ }
+ wmb();
+
+ test->too_late = false;
+ start = ticks;
+ end = start + (((sbintime_t)test->duration * hz) / 1000);
+
+ for (loops = 0;; loops++) {
+ if (test->duration == 0 && loops >= test->transactions)
+ break;
+ else if (test->duration != 0 && time_after(ticks, end)) {
+ test->too_late = true;
+ break;
+ }
+
+ ioat_test_submit_1_tx(test, dmaengine);
+ }
+
+ ioat_test_log(1, "Test Elapsed: %d ticks (overrun %d), %d sec.\n",
+ ticks - start, ticks - end, (ticks - start) / hz);
+
+ IT_LOCK();
+ while (!TAILQ_EMPTY(&test->pend_q))
+ msleep(&test->free_q, &ioat_test_lk, 0, "ioattestcompl", hz);
+ IT_UNLOCK();
+
+ ioat_test_log(1, "Test Elapsed2: %d ticks (overrun %d), %d sec.\n",
+ ticks - start, ticks - end, (ticks - start) / hz);
+
+ ioat_test_release_memory(test);
+out:
+ if (test->testkind == IOAT_TEST_RAW_DMA && !test->raw_is_virtual)
+ pmap_unmapdev((vm_offset_t)test->raw_vtarget,
+ test->buffer_size);
+ ioat_put_dmaengine(dmaengine);
+}
+
+static int
+ioat_test_open(struct cdev *dev, int flags, int fmt, struct thread *td)
+{
+
+ return (0);
+}
+
+static int
+ioat_test_close(struct cdev *dev, int flags, int fmt, struct thread *td)
+{
+
+ return (0);
+}
+
+static int
+ioat_test_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int flag,
+ struct thread *td)
+{
+
+ switch (cmd) {
+ case IOAT_DMATEST:
+ ioat_dma_test(arg);
+ break;
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+static struct cdevsw ioat_cdevsw = {
+ .d_version = D_VERSION,
+ .d_flags = 0,
+ .d_open = ioat_test_open,
+ .d_close = ioat_test_close,
+ .d_ioctl = ioat_test_ioctl,
+ .d_name = "ioat_test",
+};
+
+static int
+enable_ioat_test(bool enable)
+{
+
+ mtx_assert(&Giant, MA_OWNED);
+
+ if (enable && g_ioat_cdev == NULL) {
+ g_ioat_cdev = make_dev(&ioat_cdevsw, 0, UID_ROOT, GID_WHEEL,
+ 0600, "ioat_test");
+ } else if (!enable && g_ioat_cdev != NULL) {
+ destroy_dev(g_ioat_cdev);
+ g_ioat_cdev = NULL;
+ }
+ return (0);
+}
+
+static int
+sysctl_enable_ioat_test(SYSCTL_HANDLER_ARGS)
+{
+ int error, enabled;
+
+ enabled = (g_ioat_cdev != NULL);
+ error = sysctl_handle_int(oidp, &enabled, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+
+ enable_ioat_test(enabled);
+ return (0);
+}
+SYSCTL_PROC(_hw_ioat, OID_AUTO, enable_ioat_test, CTLTYPE_INT | CTLFLAG_RW,
+ 0, 0, sysctl_enable_ioat_test, "I",
+ "Non-zero: Enable the /dev/ioat_test device");
+
+void
+ioat_test_attach(void)
+{
+#ifdef notyet
+ char *val;
+
+ val = kern_getenv("hw.ioat.enable_ioat_test");
+ if (val != NULL && strcmp(val, "0") != 0) {
+#else
+ int val = 0;
+
+ TUNABLE_INT_FETCH("hw.ioat.enable_ioat_test", &val);
+ if (val != 0) {
+#endif
+ mtx_lock(&Giant);
+ enable_ioat_test(true);
+ mtx_unlock(&Giant);
+ }
+#ifdef notyet
+ freeenv(val);
+#endif
+}
+
+void
+ioat_test_detach(void)
+{
+
+ mtx_lock(&Giant);
+ enable_ioat_test(false);
+ mtx_unlock(&Giant);
+}
+
+static inline void
+_ioat_test_log(int verbosity, const char *fmt, ...)
+{
+ va_list argp;
+
+ if (verbosity > g_ioat_debug_level)
+ return;
+
+ va_start(argp, fmt);
+ vprintf(fmt, argp);
+ va_end(argp);
+}
diff --git a/sys/dev/ioat/ioat_test.h b/sys/dev/ioat/ioat_test.h
new file mode 100644
index 0000000..8ef521c
--- /dev/null
+++ b/sys/dev/ioat/ioat_test.h
@@ -0,0 +1,90 @@
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__FBSDID("$FreeBSD$");
+
+#ifndef __IOAT_TEST_H__
+#define __IOAT_TEST_H__
+
+enum ioat_res {
+ IOAT_TEST_OK = 0,
+ IOAT_TEST_NO_DMA_ENGINE,
+ IOAT_TEST_NO_MEMORY,
+ IOAT_TEST_MISCOMPARE,
+ IOAT_TEST_INVALID_INPUT,
+ IOAT_NUM_RES
+};
+
+enum ioat_test_kind {
+ IOAT_TEST_FILL = 0,
+ IOAT_TEST_DMA,
+ IOAT_TEST_RAW_DMA,
+ IOAT_TEST_DMA_8K,
+ IOAT_TEST_MEMCPY,
+ IOAT_NUM_TESTKINDS
+};
+
+struct test_transaction;
+
+struct ioat_test {
+ volatile uint32_t status[IOAT_NUM_RES];
+ uint32_t channel_index;
+
+ enum ioat_test_kind testkind;
+
+ /* HW max of 1MB */
+ uint32_t buffer_size;
+ uint32_t chain_depth;
+ uint32_t transactions;
+
+ /*
+ * If non-zero, duration is time in ms;
+ * If zero, bounded by 'transactions' above.
+ */
+ uint32_t duration;
+
+ /* If true, check for miscompares after a copy. */
+ bool verify;
+
+ /* DMA directly to/from some memory address */
+ uint64_t raw_target;
+ void *raw_vtarget;
+ bool raw_write;
+ bool raw_is_virtual;
+
+ bool zero_stats;
+ /* Configure coalesce period */
+ uint16_t coalesce_period;
+
+ /* Internal usage -- not test inputs */
+ TAILQ_HEAD(, test_transaction) free_q;
+ TAILQ_HEAD(, test_transaction) pend_q;
+ volatile bool too_late;
+};
+
+#define IOAT_DMATEST _IOWR('i', 0, struct ioat_test)
+
+#endif /* __IOAT_TEST_H__ */
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 5135093..9d075d1 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -155,6 +155,7 @@ SUBDIR= \
${_iir} \
${_imgact_binmisc} \
${_io} \
+ ${_ioat} \
${_ipoib} \
${_ipdivert} \
${_ipfilter} \
@@ -723,6 +724,7 @@ _if_ndis= if_ndis
_igb= igb
_iir= iir
_io= io
+_ioat= ioat
_ipmi= ipmi
.if ${MK_OFED} != "no" || defined(ALL_MODULES)
_ipoib= ipoib
diff --git a/sys/modules/ioat/Makefile b/sys/modules/ioat/Makefile
new file mode 100644
index 0000000..5a2c417
--- /dev/null
+++ b/sys/modules/ioat/Makefile
@@ -0,0 +1,15 @@
+# ioat Loadable Kernel Module
+#
+# $FreeBSD$
+
+IOAT_SRC_PATH = ${.CURDIR}/../..
+
+.PATH: ${IOAT_SRC_PATH}/dev/ioat
+
+KMOD= ioat
+SRCS= ioat.c ioat_test.c
+SRCS+= device_if.h bus_if.h pci_if.h
+
+CFLAGS+= -I${IOAT_SRC_PATH}
+
+.include <bsd.kmod.mk>
diff --git a/tools/tools/ioat/Makefile b/tools/tools/ioat/Makefile
new file mode 100644
index 0000000..7ea3e6e
--- /dev/null
+++ b/tools/tools/ioat/Makefile
@@ -0,0 +1,9 @@
+# $FreeBSD$
+
+PROG= ioatcontrol
+MAN= ioatcontrol.8
+CFLAGS+= -I${.CURDIR:H:H:H}/sys/dev/ioat
+WARNS?= 6
+LIBADD= util
+
+.include <bsd.prog.mk>
diff --git a/tools/tools/ioat/ioatcontrol.8 b/tools/tools/ioat/ioatcontrol.8
new file mode 100644
index 0000000..9e156fd
--- /dev/null
+++ b/tools/tools/ioat/ioatcontrol.8
@@ -0,0 +1,182 @@
+.\" Copyright (c) 2015 EMC / Isilon Storage Division
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd December 14, 2015
+.Dt IOATCONTROL 8
+.Os
+.Sh NAME
+.Nm ioatcontrol
+.Nd Userspace tool to test
+.Xr ioat 4
+.Sh SYNOPSIS
+.Nm
+.Op Fl c Ar period
+.Op Fl E
+.Op Fl f
+.Op Fl m
+.Op Fl V
+.Op Fl z
+.Ar channel_number
+.Ar num_txns
+.Ar [ bufsize
+.Ar [ chain-len
+.Ar [ duration ] ] ]
+.Nm
+.Fl r
+.Op Fl c Ar period
+.Op Fl v
+.Op Fl V
+.Op Fl w
+.Op Fl z
+.Ar channel_number
+.Ar address
+.Ar [ bufsize ]
+.Sh DESCRIPTION
+.Nm
+allows one to issue some number of test operations to the
+.Xr ioat 4
+driver on a specific hardware channel.
+The arguments are as follows:
+.Bl -tag -width Ds
+.It Fl c Ar period
+Configure the channel's interrupt coalescing period, in microseconds.
+.It Fl E
+Test non-contiguous 8k copy.
+.It Fl f
+Test block fill (by default,
+.Nm
+tests copy)
+.It Fl m
+Test memcpy instead of DMA.
+.It Fl V
+Verify copies/fills for accuracy
+.It Fl z
+Zero device statistics before running test.
+.El
+.Pp
+Alternatively one can use
+.Nm
+.Fl r
+to issue DMA to or from a specific
+.Ar address .
+The arguments in "raw" mode are:
+.Bl -tag -width Ds
+.It Fl c Ar period
+As above.
+.It Fl v
+.Ar address
+is a kernel virtual address (by default,
+.Ar address
+is assumed to be a physical address)
+.It Fl V
+Dump the resulting hex to syslog
+.It Fl w
+Write to the specified
+.Ar address
+(by default,
+.Nm
+.Fl r
+reads)
+.It Fl z
+As above.
+.El
+.Pp
+.Nm
+operates in one of two modes; if the
+.Ar duration
+argument is passed,
+.Nm
+tries to estimate the copy rate in bytes per second by running
+.Ar num_txns
+repeatedly in loop.
+If
+.Ar duration
+is not passed,
+.Nm
+only runs through
+.Ar num_txns
+once and prints the total bytes copied, as well as error information.
+.Pp
+The
+.Ar bufsize
+argument determines the size of buffers to use for each
+.Fn ioat_copy
+invocation.
+The default is 256 KB.
+In raw mode, the default is 4 KB.
+.Pp
+The
+.Ar chain-len
+argument determines the number of copies to chain together in a single DMA
+transaction.
+The default is 1, and the maximum is currently 4.
+.Pp
+The
+.Ar duration
+argument specifies an approximate time limit for the test, in milliseconds.
+.Pp
+The test will allocate two chunks of memory for each component of each
+transaction's chain.
+It will initialize them with specific data patterns.
+During the test, it submits DMA requests to copy between pairs of buffers.
+If the
+.Fl V
+flag was specified, it will compare the contents in the callback for a copy
+error.
+.Sh FILES
+.Pa /dev/ioat_test
+.Pp
+The interface between
+.Nm
+and
+.Xr ioat 4 .
+.Xr ioat 4
+exposes it with
+.Cd hw.ioat.enable_ioat_test=1 .
+.Sh DIAGNOSTICS
+The wait channel
+.Va test_submit
+indicates that the test code has enqueued all requested transactions and is
+waiting on the IOAT hardware to complete one before issuing another operation.
+.Sh SEE ALSO
+.Xr ioat 4
+.Sh HISTORY
+The
+.Xr ioat 4
+driver first appeared in
+.Fx 11.0 .
+.Sh AUTHORS
+The
+.Xr ioat 4
+driver and
+.Nm
+tool were developed by
+.An \&Jim Harris Aq Mt jimharris@FreeBSD.org ,
+.An \&Carl Delsey Aq Mt carl.r.delsey@intel.com ,
+and
+.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
+This manual page was written by
+.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
diff --git a/tools/tools/ioat/ioatcontrol.c b/tools/tools/ioat/ioatcontrol.c
new file mode 100644
index 0000000..32decc7
--- /dev/null
+++ b/tools/tools/ioat/ioatcontrol.c
@@ -0,0 +1,258 @@
+/*-
+ * Copyright (C) 2012 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/ioctl.h>
+#include <sys/queue.h>
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include <libutil.h>
+
+#include "ioat_test.h"
+
+static int prettyprint(struct ioat_test *);
+
+static void
+usage(void)
+{
+
+ printf("Usage: %s [-E|-f|-m] OPTIONS <channel #> <txns> [<bufsize> "
+ "[<chain-len> [duration]]]\n", getprogname());
+ printf(" %s -r [-v] OPTIONS <channel #> <addr> [<bufsize>]\n\n",
+ getprogname());
+ printf(" OPTIONS:\n");
+ printf(" -c <period> - Enable interrupt coalescing (us)\n");
+ printf(" -V - Enable verification\n");
+ printf(" -z - Zero device stats before test\n");
+ exit(EX_USAGE);
+}
+
+static void
+main_raw(struct ioat_test *t, int argc, char **argv)
+{
+ int fd;
+
+ /* Raw DMA defaults */
+ t->testkind = IOAT_TEST_RAW_DMA;
+ t->transactions = 1;
+ t->chain_depth = 1;
+ t->buffer_size = 4 * 1024;
+
+ t->raw_target = strtoull(argv[1], NULL, 0);
+ if (t->raw_target == 0) {
+ printf("Target shoudln't be NULL\n");
+ exit(EX_USAGE);
+ }
+
+ if (argc >= 3) {
+ t->buffer_size = atoi(argv[2]);
+ if (t->buffer_size == 0) {
+ printf("Buffer size must be greater than zero\n");
+ exit(EX_USAGE);
+ }
+ }
+
+ fd = open("/dev/ioat_test", O_RDWR);
+ if (fd < 0) {
+ printf("Cannot open /dev/ioat_test\n");
+ exit(EX_UNAVAILABLE);
+ }
+
+ (void)ioctl(fd, IOAT_DMATEST, t);
+ close(fd);
+
+ exit(prettyprint(t));
+}
+
+int
+main(int argc, char **argv)
+{
+ struct ioat_test t;
+ int fd, ch;
+ bool fflag, rflag, Eflag, mflag;
+ unsigned modeflags;
+
+ fflag = rflag = Eflag = mflag = false;
+ modeflags = 0;
+
+ while ((ch = getopt(argc, argv, "c:EfmrvVwz")) != -1) {
+ switch (ch) {
+ case 'c':
+ t.coalesce_period = atoi(optarg);
+ break;
+ case 'E':
+ Eflag = true;
+ modeflags++;
+ break;
+ case 'f':
+ fflag = true;
+ modeflags++;
+ break;
+ case 'm':
+ mflag = true;
+ modeflags++;
+ break;
+ case 'r':
+ rflag = true;
+ modeflags++;
+ break;
+ case 'v':
+ t.raw_is_virtual = true;
+ break;
+ case 'V':
+ t.verify = true;
+ break;
+ case 'w':
+ t.raw_write = true;
+ break;
+ case 'z':
+ t.zero_stats = true;
+ break;
+ default:
+ usage();
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 2)
+ usage();
+
+ if (modeflags > 1) {
+ printf("Invalid: Cannot use >1 mode flag (-E, -f, -m, or -r)\n");
+ usage();
+ }
+
+ /* Defaults for optional args */
+ t.buffer_size = 256 * 1024;
+ t.chain_depth = 2;
+ t.duration = 0;
+ t.testkind = IOAT_TEST_DMA;
+
+ if (fflag)
+ t.testkind = IOAT_TEST_FILL;
+ else if (Eflag) {
+ t.testkind = IOAT_TEST_DMA_8K;
+ t.buffer_size = 8 * 1024;
+ } else if (mflag)
+ t.testkind = IOAT_TEST_MEMCPY;
+
+ t.channel_index = atoi(argv[0]);
+ if (t.channel_index > 8) {
+ printf("Channel number must be between 0 and 7.\n");
+ return (EX_USAGE);
+ }
+
+ if (rflag) {
+ main_raw(&t, argc, argv);
+ return (EX_OK);
+ }
+
+ t.transactions = atoi(argv[1]);
+
+ if (argc >= 3) {
+ t.buffer_size = atoi(argv[2]);
+ if (t.buffer_size == 0) {
+ printf("Buffer size must be greater than zero\n");
+ return (EX_USAGE);
+ }
+ }
+
+ if (argc >= 4) {
+ t.chain_depth = atoi(argv[3]);
+ if (t.chain_depth < 1) {
+ printf("Chain length must be greater than zero\n");
+ return (EX_USAGE);
+ }
+ }
+
+ if (argc >= 5) {
+ t.duration = atoi(argv[4]);
+ if (t.duration < 1) {
+ printf("Duration must be greater than zero\n");
+ return (EX_USAGE);
+ }
+ }
+
+ fd = open("/dev/ioat_test", O_RDWR);
+ if (fd < 0) {
+ printf("Cannot open /dev/ioat_test\n");
+ return (EX_UNAVAILABLE);
+ }
+
+ (void)ioctl(fd, IOAT_DMATEST, &t);
+ close(fd);
+
+ return (prettyprint(&t));
+}
+
+static int
+prettyprint(struct ioat_test *t)
+{
+ char bps[10], bytesh[10];
+ uintmax_t bytes;
+
+ if (t->status[IOAT_TEST_NO_DMA_ENGINE] != 0 ||
+ t->status[IOAT_TEST_NO_MEMORY] != 0 ||
+ t->status[IOAT_TEST_MISCOMPARE] != 0) {
+ printf("Errors:\n");
+ if (t->status[IOAT_TEST_NO_DMA_ENGINE] != 0)
+ printf("\tNo DMA engine present: %u\n",
+ (unsigned)t->status[IOAT_TEST_NO_DMA_ENGINE]);
+ if (t->status[IOAT_TEST_NO_MEMORY] != 0)
+ printf("\tOut of memory: %u\n",
+ (unsigned)t->status[IOAT_TEST_NO_MEMORY]);
+ if (t->status[IOAT_TEST_MISCOMPARE] != 0)
+ printf("\tMiscompares: %u\n",
+ (unsigned)t->status[IOAT_TEST_MISCOMPARE]);
+ }
+
+ printf("Processed %u txns\n", (unsigned)t->status[IOAT_TEST_OK] /
+ t->chain_depth);
+ bytes = (uintmax_t)t->buffer_size * t->status[IOAT_TEST_OK];
+
+ humanize_number(bytesh, sizeof(bytesh), (int64_t)bytes, "B",
+ HN_AUTOSCALE, HN_DECIMAL);
+ if (t->duration) {
+ humanize_number(bps, sizeof(bps),
+ (int64_t)1000 * bytes / t->duration, "B/s", HN_AUTOSCALE,
+ HN_DECIMAL);
+ printf("%ju (%s) copied in %u ms (%s)\n", bytes, bytesh,
+ (unsigned)t->duration, bps);
+ } else
+ printf("%ju (%s) copied\n", bytes, bytesh);
+
+ return (EX_OK);
+}
OpenPOWER on IntegriCloud