summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcem <cem@FreeBSD.org>2015-10-22 04:38:05 +0000
committercem <cem@FreeBSD.org>2015-10-22 04:38:05 +0000
commitc8639a493325ed9b19a31fa6e31d3116a112af78 (patch)
tree511f4aaffd042632494654fc4192e5c257afb7aa
parentbf2670a3a7e8340257ce7dd15678fa1c01769b00 (diff)
downloadFreeBSD-src-c8639a493325ed9b19a31fa6e31d3116a112af78.zip
FreeBSD-src-c8639a493325ed9b19a31fa6e31d3116a112af78.tar.gz
Improve flexibility of ioat_test / ioatcontrol(8)
The test logic now preallocates memory before running the test. The buffer size is now configurable. Post-copy verification is configurable. The number of copies to chain into one transaction (one interrupt) is configurable. A 'duration' mode is added, which repeats the test until the duration has elapsed, reporting the B/s and transactions completed. ioatcontrol.8 has been updated to document the new arguments. Initial limits (on this particular Broadwell-DE) (and when the interrupts are working) seem to be: 256 interrupts/sec or ~6 GB/s, whichever limit is more restrictive. Unfortunately, it seems the interrupt-reset handling on Broadwell isn't working as intended. That will be fixed in a later commit. Sponsored by: EMC / Isilon Storage Division
-rw-r--r--sys/dev/ioat/ioat_test.c250
-rw-r--r--sys/dev/ioat/ioat_test.h38
-rw-r--r--tools/tools/ioat/Makefile1
-rw-r--r--tools/tools/ioat/ioatcontrol.862
-rw-r--r--tools/tools/ioat/ioatcontrol.c107
5 files changed, 379 insertions, 79 deletions
diff --git a/sys/dev/ioat/ioat_test.c b/sys/dev/ioat/ioat_test.c
index 8352e74..8ac6a9b 100644
--- a/sys/dev/ioat/ioat_test.c
+++ b/sys/dev/ioat/ioat_test.c
@@ -51,18 +51,28 @@ __FBSDID("$FreeBSD$");
#include "ioat_internal.h"
#include "ioat_test.h"
+#ifndef time_after
+#define time_after(a,b) ((long)(b) - (long)(a) < 0)
+#endif
+
MALLOC_DEFINE(M_IOAT_TEST, "ioat_test", "ioat test allocations");
-#define IOAT_TEST_SIZE 0x40000
-#define IOAT_MAX_BUFS 8
+#define IOAT_MAX_BUFS 256
struct test_transaction {
- uint8_t num_buffers;
void *buf[IOAT_MAX_BUFS];
uint32_t length;
+ uint32_t depth;
struct ioat_test *test;
+ TAILQ_ENTRY(test_transaction) entry;
};
+#define IT_LOCK() mtx_lock(&ioat_test_lk)
+#define IT_UNLOCK() mtx_unlock(&ioat_test_lk)
+#define IT_ASSERT() mtx_assert(&ioat_test_lk, MA_OWNED)
+static struct mtx ioat_test_lk;
+MTX_SYSINIT(ioat_test_lk, &ioat_test_lk, "test coordination mtx", MTX_DEF);
+
static int g_thread_index = 1;
static struct cdev *g_ioat_cdev = NULL;
@@ -73,7 +83,7 @@ ioat_test_transaction_destroy(struct test_transaction *tx)
for (i = 0; i < IOAT_MAX_BUFS; i++) {
if (tx->buf[i] != NULL) {
- contigfree(tx->buf[i], IOAT_TEST_SIZE, M_IOAT_TEST);
+ contigfree(tx->buf[i], tx->length, M_IOAT_TEST);
tx->buf[i] = NULL;
}
}
@@ -82,17 +92,16 @@ ioat_test_transaction_destroy(struct test_transaction *tx)
}
static struct
-test_transaction *ioat_test_transaction_create(uint8_t num_buffers,
+test_transaction *ioat_test_transaction_create(unsigned num_buffers,
uint32_t buffer_size)
{
struct test_transaction *tx;
- int i;
+ unsigned i;
- tx = malloc(sizeof(struct test_transaction), M_IOAT_TEST, M_NOWAIT | M_ZERO);
+ tx = malloc(sizeof(*tx), M_IOAT_TEST, M_NOWAIT | M_ZERO);
if (tx == NULL)
return (NULL);
- tx->num_buffers = num_buffers;
tx->length = buffer_size;
for (i = 0; i < num_buffers; i++) {
@@ -107,6 +116,18 @@ test_transaction *ioat_test_transaction_create(uint8_t num_buffers,
return (tx);
}
+static bool
+ioat_compare_ok(struct test_transaction *tx)
+{
+ uint32_t i;
+
+ for (i = 0; i < tx->depth; i++) {
+ if (memcmp(tx->buf[2*i], tx->buf[2*i+1], tx->length) != 0)
+ return (false);
+ }
+ return (true);
+}
+
static void
ioat_dma_test_callback(void *arg)
{
@@ -116,82 +137,195 @@ ioat_dma_test_callback(void *arg)
tx = arg;
test = tx->test;
- if (memcmp(tx->buf[0], tx->buf[1], tx->length) != 0) {
+ if (test->verify && !ioat_compare_ok(tx)) {
ioat_log_message(0, "miscompare found\n");
- test->status = IOAT_TEST_MISCOMPARE;
+ atomic_add_32(&test->status[IOAT_TEST_MISCOMPARE], tx->depth);
+ } else if (!test->too_late)
+ atomic_add_32(&test->status[IOAT_TEST_OK], tx->depth);
+
+ IT_LOCK();
+ TAILQ_REMOVE(&test->pend_q, tx, entry);
+ TAILQ_INSERT_TAIL(&test->free_q, tx, entry);
+ wakeup(&test->free_q);
+ IT_UNLOCK();
+}
+
+static int
+ioat_test_prealloc_memory(struct ioat_test *test, int index)
+{
+ uint32_t i, j, k;
+ struct test_transaction *tx;
+
+ for (i = 0; i < test->transactions; i++) {
+ tx = ioat_test_transaction_create(test->chain_depth * 2,
+ test->buffer_size);
+ if (tx == NULL) {
+ ioat_log_message(0, "tx == NULL - memory exhausted\n");
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return (ENOMEM);
+ }
+
+ TAILQ_INSERT_HEAD(&test->free_q, tx, entry);
+
+ tx->test = test;
+ tx->depth = test->chain_depth;
+
+ /* fill in source buffers */
+ for (j = 0; j < (tx->length / sizeof(uint32_t)); j++) {
+ uint32_t val = j + (index << 28);
+
+ for (k = 0; k < test->chain_depth; k++) {
+ ((uint32_t *)tx->buf[2*k])[j] = ~val;
+ ((uint32_t *)tx->buf[2*k+1])[j] = val;
+ }
+ }
}
- atomic_add_32(&test->num_completions, 1);
- ioat_test_transaction_destroy(tx);
- if (test->num_completions == test->num_loops)
- wakeup(test);
+ return (0);
}
static void
-ioat_dma_test(void *arg)
+ioat_test_release_memory(struct ioat_test *test)
+{
+ struct test_transaction *tx, *s;
+
+ TAILQ_FOREACH_SAFE(tx, &test->free_q, entry, s)
+ ioat_test_transaction_destroy(tx);
+ TAILQ_INIT(&test->free_q);
+
+ TAILQ_FOREACH_SAFE(tx, &test->pend_q, entry, s)
+ ioat_test_transaction_destroy(tx);
+ TAILQ_INIT(&test->pend_q);
+}
+
+static void
+ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma)
{
struct test_transaction *tx;
+ struct bus_dmadesc *desc;
+ bus_dmaengine_callback_t cb;
+ bus_addr_t src, dest;
+ uint32_t i, flags;
+
+ IT_LOCK();
+ while (TAILQ_EMPTY(&test->free_q))
+ msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0);
+
+ tx = TAILQ_FIRST(&test->free_q);
+ TAILQ_REMOVE(&test->free_q, tx, entry);
+ TAILQ_INSERT_HEAD(&test->pend_q, tx, entry);
+ IT_UNLOCK();
+
+ ioat_acquire(dma);
+ for (i = 0; i < tx->depth; i++) {
+ src = vtophys((vm_offset_t)tx->buf[2*i]);
+ dest = vtophys((vm_offset_t)tx->buf[2*i+1]);
+
+ if (i == tx->depth - 1) {
+ cb = ioat_dma_test_callback;
+ flags = DMA_INT_EN;
+ } else {
+ cb = NULL;
+ flags = 0;
+ }
+
+ desc = ioat_copy(dma, src, dest, tx->length, cb, tx, flags);
+ if (desc == NULL)
+ panic("Failed to allocate a ring slot "
+ "-- this shouldn't happen!");
+ }
+ ioat_release(dma);
+}
+
+static void
+ioat_dma_test(void *arg)
+{
struct ioat_test *test;
bus_dmaengine_t dmaengine;
uint32_t loops;
- int index, i;
+ int index, rc, start, end;
test = arg;
- loops = test->num_loops;
-
- test->status = IOAT_TEST_OK;
- test->num_completions = 0;
+ memset(__DEVOLATILE(void *, test->status), 0, sizeof(test->status));
- index = g_thread_index++;
- dmaengine = ioat_get_dmaengine(test->channel_index);
+ if (test->buffer_size > 1024 * 1024) {
+ ioat_log_message(0, "Buffer size too large >1MB\n");
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return;
+ }
- if (dmaengine == NULL) {
- ioat_log_message(0, "Couldn't acquire dmaengine\n");
- test->status = IOAT_TEST_NO_DMA_ENGINE;
+ if (test->chain_depth * 2 > IOAT_MAX_BUFS) {
+ ioat_log_message(0, "Depth too large (> %u)\n",
+ (unsigned)IOAT_MAX_BUFS / 2);
+ test->status[IOAT_TEST_NO_MEMORY]++;
return;
}
- ioat_log_message(0, "Thread %d: num_loops remaining: 0x%07x\n", index,
- test->num_loops);
+ if (btoc((uint64_t)test->buffer_size * test->chain_depth *
+ test->transactions) > (physmem / 4)) {
+ ioat_log_message(0, "Sanity check failed -- test would "
+ "use more than 1/4 of phys mem.\n");
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return;
+ }
- for (loops = 0; loops < test->num_loops; loops++) {
- bus_addr_t src, dest;
+ if ((uint64_t)test->transactions * test->chain_depth > (1<<16)) {
+ ioat_log_message(0, "Sanity check failed -- test would "
+ "use more than available IOAT ring space.\n");
+ test->status[IOAT_TEST_NO_MEMORY]++;
+ return;
+ }
- if (loops % 0x10000 == 0) {
- ioat_log_message(0, "Thread %d: "
- "num_loops remaining: 0x%07x\n", index,
- test->num_loops - loops);
- }
+ dmaengine = ioat_get_dmaengine(test->channel_index);
+ if (dmaengine == NULL) {
+ ioat_log_message(0, "Couldn't acquire dmaengine\n");
+ test->status[IOAT_TEST_NO_DMA_ENGINE]++;
+ return;
+ }
- tx = ioat_test_transaction_create(2, IOAT_TEST_SIZE);
- if (tx == NULL) {
- ioat_log_message(0, "tx == NULL - memory exhausted\n");
- atomic_add_32(&test->num_completions, 1);
- test->status = IOAT_TEST_NO_MEMORY;
- continue;
+ index = g_thread_index++;
+ TAILQ_INIT(&test->free_q);
+ TAILQ_INIT(&test->pend_q);
+
+ if (test->duration == 0)
+ ioat_log_message(1, "Thread %d: num_loops remaining: 0x%08x\n",
+ index, test->transactions);
+ else
+ ioat_log_message(1, "Thread %d: starting\n", index);
+
+ rc = ioat_test_prealloc_memory(test, index);
+ if (rc != 0) {
+ ioat_log_message(0, "prealloc_memory: %d\n", rc);
+ return;
+ }
+ wmb();
+
+ test->too_late = false;
+ start = ticks;
+ end = start + (((sbintime_t)test->duration * hz) / 1000);
+
+ for (loops = 0;; loops++) {
+ if (test->duration == 0 && loops >= test->transactions)
+ break;
+ else if (test->duration != 0 && time_after(ticks, end)) {
+ test->too_late = true;
+ break;
}
- tx->test = test;
- wmb();
-
- /* fill in source buffer */
- for (i = 0; i < (IOAT_TEST_SIZE / sizeof(uint32_t)); i++) {
- uint32_t val = i + (loops << 16) + (index << 28);
- ((uint32_t *)tx->buf[0])[i] = ~val;
- ((uint32_t *)tx->buf[1])[i] = val;
- }
+ ioat_test_submit_1_tx(test, dmaengine);
+ }
- src = pmap_kextract((vm_offset_t)tx->buf[0]);
- dest = pmap_kextract((vm_offset_t)tx->buf[1]);
+ ioat_log_message(1, "Test Elapsed: %d ticks (overrun %d), %d sec.\n",
+ ticks - start, ticks - end, (ticks - start) / hz);
- ioat_acquire(dmaengine);
- ioat_copy(dmaengine, src, dest, IOAT_TEST_SIZE,
- ioat_dma_test_callback, tx, DMA_INT_EN);
- ioat_release(dmaengine);
- }
+ IT_LOCK();
+ while (!TAILQ_EMPTY(&test->pend_q))
+ msleep(&test->free_q, &ioat_test_lk, 0, "ioattestcompl", hz);
+ IT_UNLOCK();
- while (test->num_completions < test->num_loops)
- tsleep(test, 0, "compl", 5 * hz);
+ ioat_log_message(1, "Test Elapsed2: %d ticks (overrun %d), %d sec.\n",
+ ticks - start, ticks - end, (ticks - start) / hz);
+ ioat_test_release_memory(test);
}
static int
diff --git a/sys/dev/ioat/ioat_test.h b/sys/dev/ioat/ioat_test.h
index 636a971..290d09b 100644
--- a/sys/dev/ioat/ioat_test.h
+++ b/sys/dev/ioat/ioat_test.h
@@ -29,17 +29,39 @@ __FBSDID("$FreeBSD$");
#ifndef __IOAT_TEST_H__
#define __IOAT_TEST_H__
+enum ioat_res {
+ IOAT_TEST_OK = 0,
+ IOAT_TEST_NO_DMA_ENGINE,
+ IOAT_TEST_NO_MEMORY,
+ IOAT_TEST_MISCOMPARE,
+ IOAT_NUM_RES
+};
+
+struct test_transaction;
+
struct ioat_test {
+ volatile uint32_t status[IOAT_NUM_RES];
uint32_t channel_index;
- uint32_t num_loops;
- volatile uint32_t num_completions;
- uint32_t status;
-};
-#define IOAT_TEST_OK 0
-#define IOAT_TEST_NO_DMA_ENGINE 1
-#define IOAT_TEST_NO_MEMORY 2
-#define IOAT_TEST_MISCOMPARE 3
+ /* HW max of 1MB */
+ uint32_t buffer_size;
+ uint32_t chain_depth;
+ uint32_t transactions;
+
+ /*
+ * If non-zero, duration is time in ms;
+ * If zero, bounded by 'transactions' above.
+ */
+ uint32_t duration;
+
+ /* If true, check for miscompares after a copy. */
+ bool verify;
+
+ /* Internal usage -- not test inputs */
+ TAILQ_HEAD(, test_transaction) free_q;
+ TAILQ_HEAD(, test_transaction) pend_q;
+ volatile bool too_late;
+};
#define IOAT_DMATEST _IOWR('i', 0, struct ioat_test)
diff --git a/tools/tools/ioat/Makefile b/tools/tools/ioat/Makefile
index a3f4968..7ea3e6e 100644
--- a/tools/tools/ioat/Makefile
+++ b/tools/tools/ioat/Makefile
@@ -4,5 +4,6 @@ PROG= ioatcontrol
MAN= ioatcontrol.8
CFLAGS+= -I${.CURDIR:H:H:H}/sys/dev/ioat
WARNS?= 6
+LIBADD= util
.include <bsd.prog.mk>
diff --git a/tools/tools/ioat/ioatcontrol.8 b/tools/tools/ioat/ioatcontrol.8
index 762ce14..b04db85 100644
--- a/tools/tools/ioat/ioatcontrol.8
+++ b/tools/tools/ioat/ioatcontrol.8
@@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd August 24, 2015
+.Dd October 21, 2015
.Dt IOATCONTROL 8
.Os
.Sh NAME
@@ -33,18 +33,64 @@
.Xr ioat 4
.Sh SYNOPSIS
.Nm
+.Op Fl V
.Ar channel_number
-.Ar num_loops
+.Ar num_txns
+.Ar [ bufsize
+.Ar [ chain-len
+.Ar [ duration ] ] ]
.Sh DESCRIPTION
.Nm
allows one to issue some number of test operations to the
.Xr ioat 4
driver on a specific hardware channel.
+The arguments are as follows:
+.Bl -tag -width Ds
+.It Fl V
+Verify copies for accuracy
+.El
.Pp
-Each loop will allocate two chunks of memory, write data patterns to them,
-submit a DMA request to copy one buffer to the other, and compare the contents
-in the callback.
-If the contents are not as expected, an error is reported.
+.Nm
+operates in one of two modes; if the
+.Ar duration
+argument is passed,
+.Nm
+tries to estimate the copy rate in bytes per second by running
+.Ar num_txns
+repeatedly in loop.
+If
+.Ar duration
+is not passed,
+.Nm
+only runs through
+.Ar num_txns
+once and prints the total bytes copied, as well as error information.
+.Pp
+The
+.Ar bufsize
+argument determines the size of buffers to use for each
+.Fn ioat_copy
+invocation.
+The default is 256 KB.
+.Pp
+The
+.Ar chain-len
+argument determines the number of copies to chain together in a single DMA
+transaction.
+The default is 1, and the maximum is currently 4.
+.Pp
+The
+.Ar duration
+argument specifies an approximate time limit for the test, in milliseconds.
+.Pp
+The test will allocate two chunks of memory for each component of each
+transaction's chain.
+It will initialize them with specific data patterns.
+During the test, it submits DMA requests to copy between pairs of buffers.
+If the
+.Fl V
+flag was specified, it will compare the contents in the callback for a copy
+error.
.Sh FILES
.Pa /dev/ioat_test
.Pp
@@ -55,6 +101,10 @@ and
.Nm
exposes it with
.Cd hw.ioat.enable_ioat_test=1 .
+.Sh DIAGNOSTICS
+The wait channel
+.Va test_submit
+indicates that the test code is keeping the DMA engine full of work.
.Sh SEE ALSO
.Xr ioat 4
.Sh HISTORY
diff --git a/tools/tools/ioat/ioatcontrol.c b/tools/tools/ioat/ioatcontrol.c
index 7cfb816..88fbea6 100644
--- a/tools/tools/ioat/ioatcontrol.c
+++ b/tools/tools/ioat/ioatcontrol.c
@@ -28,34 +28,88 @@
__FBSDID("$FreeBSD$");
#include <sys/ioctl.h>
+#include <sys/queue.h>
#include <fcntl.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <sysexits.h>
#include <unistd.h>
+#include <libutil.h>
+
#include "ioat_test.h"
+static int prettyprint(struct ioat_test *);
+
+static void
+usage(void)
+{
+
+ printf("Usage: %s [-V] <channel #> <txns> [<bufsize> "
+ "[<chain-len> [duration]]]\n", getprogname());
+ exit(EX_USAGE);
+}
+
int
main(int argc, char **argv)
{
struct ioat_test t;
- int fd;
+ int fd, ch;
- if (argc < 3) {
- printf("Usage: %s <channel #> <num_loops>\n", argv[0]);
- return (EX_USAGE);
+ while ((ch = getopt(argc, argv, "V")) != -1) {
+ switch (ch) {
+ case 'V':
+ t.verify = true;
+ break;
+ default:
+ usage();
+ }
}
+ argc -= optind;
+ argv += optind;
+
+ if (argc < 2)
+ usage();
+
+ /* Defaults for optional args */
+ t.buffer_size = 256 * 1024;
+ t.chain_depth = 2;
+ t.duration = 0;
- t.channel_index = atoi(argv[1]);
+ t.channel_index = atoi(argv[0]);
if (t.channel_index > 8) {
printf("Channel number must be between 0 and 7.\n");
return (EX_USAGE);
}
- t.num_loops = atoi(argv[2]);
+ t.transactions = atoi(argv[1]);
+
+ if (argc >= 3) {
+ t.buffer_size = atoi(argv[2]);
+ if (t.buffer_size == 0) {
+ printf("Buffer size must be greater than zero\n");
+ return (EX_USAGE);
+ }
+ }
+
+ if (argc >= 4) {
+ t.chain_depth = atoi(argv[3]);
+ if (t.chain_depth < 1) {
+ printf("Chain length must be greater than zero\n");
+ return (EX_USAGE);
+ }
+ }
+
+ if (argc >= 5) {
+ t.duration = atoi(argv[4]);
+ if (t.duration < 1) {
+ printf("Duration must be greater than zero\n");
+ return (EX_USAGE);
+ }
+ }
fd = open("/dev/ioat_test", O_RDWR);
if (fd < 0) {
@@ -66,5 +120,44 @@ main(int argc, char **argv)
(void)ioctl(fd, IOAT_DMATEST, &t);
close(fd);
- return (t.status);
+ return (prettyprint(&t));
+}
+
+static int
+prettyprint(struct ioat_test *t)
+{
+ char bps[10], bytesh[10];
+ uintmax_t bytes;
+
+ if (t->status[IOAT_TEST_NO_DMA_ENGINE] != 0 ||
+ t->status[IOAT_TEST_NO_MEMORY] != 0 ||
+ t->status[IOAT_TEST_MISCOMPARE] != 0) {
+ printf("Errors:\n");
+ if (t->status[IOAT_TEST_NO_DMA_ENGINE] != 0)
+ printf("\tNo DMA engine present: %u\n",
+ (unsigned)t->status[IOAT_TEST_NO_DMA_ENGINE]);
+ if (t->status[IOAT_TEST_NO_MEMORY] != 0)
+ printf("\tOut of memory: %u\n",
+ (unsigned)t->status[IOAT_TEST_NO_MEMORY]);
+ if (t->status[IOAT_TEST_MISCOMPARE] != 0)
+ printf("\tMiscompares: %u\n",
+ (unsigned)t->status[IOAT_TEST_MISCOMPARE]);
+ }
+
+ printf("Processed %u txns\n", (unsigned)t->status[IOAT_TEST_OK] /
+ t->chain_depth);
+ bytes = (uintmax_t)t->buffer_size * t->status[IOAT_TEST_OK];
+
+ humanize_number(bytesh, sizeof(bytesh), (int64_t)bytes, "B",
+ HN_AUTOSCALE, HN_DECIMAL);
+ if (t->duration) {
+ humanize_number(bps, sizeof(bps),
+ (int64_t)1000 * bytes / t->duration, "B/s", HN_AUTOSCALE,
+ HN_DECIMAL);
+ printf("%ju (%s) copied in %u ms (%s)\n", bytes, bytesh,
+ (unsigned)t->duration, bps);
+ } else
+ printf("%ju (%s) copied\n", bytes, bytesh);
+
+ return (EX_OK);
}
OpenPOWER on IntegriCloud