summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjimharris <jimharris@FreeBSD.org>2012-10-18 00:44:39 +0000
committerjimharris <jimharris@FreeBSD.org>2012-10-18 00:44:39 +0000
commita5279c222fba5a40054c2b884024700c8f22a6f7 (patch)
tree0025fbe683191b1597b5b4914d3977f0c0a525df
parentbca33a6688fc34f33afa47494cf922c6d21b99d3 (diff)
downloadFreeBSD-src-a5279c222fba5a40054c2b884024700c8f22a6f7.zip
FreeBSD-src-a5279c222fba5a40054c2b884024700c8f22a6f7.tar.gz
Preallocate a limited number of nvme_tracker objects per qpair, rather
than dynamically creating them at runtime. Sponsored by: Intel
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c25
-rw-r--r--sys/dev/nvme/nvme_private.h23
-rw-r--r--sys/dev/nvme/nvme_qpair.c89
-rw-r--r--sys/dev/nvme/nvme_sysctl.c6
4 files changed, 80 insertions, 63 deletions
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index adc34e9..94987e2 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -217,7 +217,13 @@ nvme_ctrlr_construct_admin_qpair(struct nvme_controller *ctrlr)
* The admin queue's max xfer size is treated differently than the
* max I/O xfer size. 16KB is sufficient here - maybe even less?
*/
- nvme_qpair_construct(qpair, 0, 0, num_entries, 16*1024, ctrlr);
+ nvme_qpair_construct(qpair,
+ 0, /* qpair ID */
+ 0, /* vector */
+ num_entries,
+ NVME_ADMIN_TRACKERS,
+ 16*1024, /* max xfer size */
+ ctrlr);
}
static int
@@ -225,13 +231,11 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
{
struct nvme_qpair *qpair;
union cap_lo_register cap_lo;
- int i, num_entries;
+ int i, num_entries, num_trackers;
num_entries = NVME_IO_ENTRIES;
TUNABLE_INT_FETCH("hw.nvme.io_entries", &num_entries);
- num_entries = max(num_entries, NVME_MIN_IO_ENTRIES);
-
/*
* NVMe spec sets a hard limit of 64K max entries, but
* devices may specify a smaller limit, so we need to check
@@ -240,6 +244,18 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
cap_lo.raw = nvme_mmio_read_4(ctrlr, cap_lo);
num_entries = min(num_entries, cap_lo.bits.mqes+1);
+ num_trackers = NVME_IO_TRACKERS;
+ TUNABLE_INT_FETCH("hw.nvme.io_trackers", &num_trackers);
+
+ num_trackers = max(num_trackers, NVME_MIN_IO_TRACKERS);
+ num_trackers = min(num_trackers, NVME_MAX_IO_TRACKERS);
+ /*
+ * No need to have more trackers than entries in the submit queue.
+ * Note also that for a queue size of N, we can only have (N-1)
+ * commands outstanding, hence the "-1" here.
+ */
+ num_trackers = min(num_trackers, (num_entries-1));
+
ctrlr->max_xfer_size = NVME_MAX_XFER_SIZE;
TUNABLE_INT_FETCH("hw.nvme.max_xfer_size", &ctrlr->max_xfer_size);
/*
@@ -270,6 +286,7 @@ nvme_ctrlr_construct_io_qpairs(struct nvme_controller *ctrlr)
i+1, /* qpair ID */
ctrlr->msix_enabled ? i+1 : 0, /* vector */
num_entries,
+ num_trackers,
ctrlr->max_xfer_size,
ctrlr);
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 9554725..0797b53 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -68,14 +68,25 @@ MALLOC_DECLARE(M_NVME);
*/
#define NVME_MAX_XFER_SIZE NVME_MAX_PRP_LIST_ENTRIES * PAGE_SIZE
+#define NVME_ADMIN_TRACKERS (16)
#define NVME_ADMIN_ENTRIES (128)
/* min and max are defined in admin queue attributes section of spec */
#define NVME_MIN_ADMIN_ENTRIES (2)
#define NVME_MAX_ADMIN_ENTRIES (4096)
-#define NVME_IO_ENTRIES (1024)
-/* min is a reasonable value picked for the nvme(4) driver */
-#define NVME_MIN_IO_ENTRIES (128)
+/*
+ * NVME_IO_ENTRIES defines the size of an I/O qpair's submission and completion
+ * queues, while NVME_IO_TRACKERS defines the maximum number of I/O that we
+ * will allow outstanding on an I/O qpair at any time. The only advantage in
+ * having IO_ENTRIES > IO_TRACKERS is for debugging purposes - when dumping
+ * the contents of the submission and completion queues, it will show a longer
+ * history of data.
+ */
+#define NVME_IO_ENTRIES (256)
+#define NVME_IO_TRACKERS (128)
+#define NVME_MIN_IO_TRACKERS (16)
+#define NVME_MAX_IO_TRACKERS (1024)
+
/*
* NVME_MAX_IO_ENTRIES is not defined, since it is specified in CC.MQES
* for each controller.
@@ -134,6 +145,7 @@ struct nvme_qpair {
uint32_t max_xfer_size;
uint32_t num_entries;
+ uint32_t num_trackers;
uint32_t sq_tdbl_off;
uint32_t cq_hdbl_off;
@@ -155,8 +167,6 @@ struct nvme_qpair {
bus_dmamap_t cpl_dma_map;
uint64_t cpl_bus_addr;
- uint32_t num_tr;
-
SLIST_HEAD(, nvme_tracker) free_tr;
struct nvme_tracker **act_tr;
@@ -348,12 +358,11 @@ void nvme_ctrlr_submit_io_request(struct nvme_controller *ctrlr,
void nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
uint16_t vector, uint32_t num_entries,
- uint32_t max_xfer_size,
+ uint32_t num_trackers, uint32_t max_xfer_size,
struct nvme_controller *ctrlr);
void nvme_qpair_submit_cmd(struct nvme_qpair *qpair,
struct nvme_tracker *tr);
void nvme_qpair_process_completions(struct nvme_qpair *qpair);
-struct nvme_tracker * nvme_qpair_allocate_tracker(struct nvme_qpair *qpair);
void nvme_qpair_submit_request(struct nvme_qpair *qpair,
struct nvme_request *req);
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
index 97a5c28..0abac7c 100644
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -74,42 +74,20 @@ nvme_completion_check_retry(const struct nvme_completion *cpl)
}
}
-struct nvme_tracker *
-nvme_qpair_allocate_tracker(struct nvme_qpair *qpair)
+static void
+nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr,
+ uint16_t cid)
{
- struct nvme_tracker *tr;
-
- tr = SLIST_FIRST(&qpair->free_tr);
- if (tr == NULL) {
- /*
- * We can't support more trackers than we have entries in
- * our queue, because it would generate invalid indices
- * into the qpair's active tracker array.
- */
- if (qpair->num_tr == qpair->num_entries) {
- return (NULL);
- }
-
- tr = malloc(sizeof(struct nvme_tracker), M_NVME,
- M_ZERO | M_NOWAIT);
-
- if (tr == NULL) {
- return (NULL);
- }
- bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map);
- bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
+ bus_dmamap_create(qpair->dma_tag, 0, &tr->payload_dma_map);
+ bus_dmamap_create(qpair->dma_tag, 0, &tr->prp_dma_map);
- bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
- sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
-
- callout_init_mtx(&tr->timer, &qpair->lock, 0);
- tr->cid = qpair->num_tr++;
- tr->qpair = qpair;
- } else
- SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
+ bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp,
+ sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0);
- return (tr);
+ callout_init_mtx(&tr->timer, &qpair->lock, 0);
+ tr->cid = cid;
+ tr->qpair = qpair;
}
void
@@ -163,6 +141,10 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair)
tr->payload_dma_map);
nvme_free_request(req);
+
+ if (SLIST_EMPTY(&qpair->free_tr))
+ wakeup(qpair);
+
SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
}
@@ -188,9 +170,11 @@ nvme_qpair_msix_handler(void *arg)
void
nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
- uint16_t vector, uint32_t num_entries, uint32_t max_xfer_size,
- struct nvme_controller *ctrlr)
+ uint16_t vector, uint32_t num_entries, uint32_t num_trackers,
+ uint32_t max_xfer_size, struct nvme_controller *ctrlr)
{
+ struct nvme_tracker *tr;
+ uint32_t i;
qpair->id = id;
qpair->vector = vector;
@@ -233,7 +217,7 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
qpair->num_cmds = 0;
qpair->num_intr_handler_calls = 0;
- qpair->num_tr = 0;
+ qpair->num_trackers = num_trackers;
qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
/* TODO: error checking on contigmalloc, bus_dmamap_load calls */
@@ -259,6 +243,18 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id,
SLIST_INIT(&qpair->free_tr);
+ for (i = 0; i < num_trackers; i++) {
+ tr = malloc(sizeof(*tr), M_NVME, M_ZERO | M_NOWAIT);
+
+ if (tr == NULL) {
+ printf("warning: nvme tracker malloc failed\n");
+ break;
+ }
+
+ nvme_qpair_construct_tracker(qpair, tr, i);
+ SLIST_INSERT_HEAD(&qpair->free_tr, tr, slist);
+ }
+
qpair->act_tr = malloc(sizeof(struct nvme_tracker *) * qpair->num_entries,
M_NVME, M_ZERO | M_NOWAIT);
}
@@ -379,19 +375,6 @@ nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr)
req->cmd.cid = tr->cid;
qpair->act_tr[tr->cid] = tr;
- /*
- * TODO: rather than spin until entries free up, put this tracker
- * on a queue, and submit from the interrupt handler when
- * entries free up.
- */
- if ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head) {
- do {
- mtx_unlock(&qpair->lock);
- DELAY(5);
- mtx_lock(&qpair->lock);
- } while ((qpair->sq_tail+1) % qpair->num_entries == qpair->sq_head);
- }
-
callout_reset(&tr->timer, NVME_TIMEOUT_IN_SEC * hz, nvme_timeout, tr);
/* Copy the command from the tracker to the submission queue. */
@@ -415,7 +398,15 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req)
mtx_lock(&qpair->lock);
- tr = nvme_qpair_allocate_tracker(qpair);
+ tr = SLIST_FIRST(&qpair->free_tr);
+
+ while (tr == NULL) {
+ msleep(qpair, &qpair->lock, PRIBIO, "qpair_tr", 0);
+ printf("msleep\n");
+ tr = SLIST_FIRST(&qpair->free_tr);
+ }
+
+ SLIST_REMOVE_HEAD(&qpair->free_tr, slist);
tr->req = req;
if (req->uio == NULL) {
diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c
index 9cfdcdd..85187af 100644
--- a/sys/dev/nvme/nvme_sysctl.c
+++ b/sys/dev/nvme/nvme_sysctl.c
@@ -192,9 +192,9 @@ nvme_sysctl_initialize_queue(struct nvme_qpair *qpair,
SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_entries",
CTLFLAG_RD, &qpair->num_entries, 0,
"Number of entries in hardware queue");
- SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_tr",
- CTLFLAG_RD, &qpair->num_tr, 0,
- "Number of trackers allocated");
+ SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "num_trackers",
+ CTLFLAG_RD, &qpair->num_trackers, 0,
+ "Number of trackers pre-allocated for this queue pair");
SYSCTL_ADD_UINT(ctrlr_ctx, que_list, OID_AUTO, "sq_head",
CTLFLAG_RD, &qpair->sq_head, 0,
"Current head of submission queue (as observed by driver)");
OpenPOWER on IntegriCloud