summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjimharris <jimharris@FreeBSD.org>2016-01-07 20:32:04 +0000
committerjimharris <jimharris@FreeBSD.org>2016-01-07 20:32:04 +0000
commit94f3dfd067c77fbbb987f93fa707eedd0b2ec324 (patch)
tree1bee2433928863fd257357674f06adc088f31cb2
parentb96b9a614879f9b489f3d73361c78e28b696e184 (diff)
downloadFreeBSD-src-94f3dfd067c77fbbb987f93fa707eedd0b2ec324.zip
FreeBSD-src-94f3dfd067c77fbbb987f93fa707eedd0b2ec324.tar.gz
nvme: add hw.nvme.min_cpus_per_ioq tunable
Due to FreeBSD system-wide limits on number of MSI-X vectors (https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=199321), it may be desirable to allocate fewer than the maximum number of vectors for an NVMe device, in order to save vectors for other devices (usually Ethernet) that can take better advantage of them and may be probed after NVMe. This tunable is expressed in terms of minimum number of CPUs per I/O queue instead of max number of queues per controller, to allow for a more even distribution of CPUs per queue. This avoids cases where some number of CPUs have a dedicated queue, but other CPUs need to share queues. Ideally the PR referenced above will eventually be fixed and the mechanism implemented here becomes obsolete anyways. While here, fix a bug in the CPUs per I/O queue calculation to properly account for the admin queue's MSI-X vector. Reviewed by: gallatin MFC after: 3 days Sponsored by: Intel
-rw-r--r--share/man/man4/nvme.418
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c26
-rw-r--r--sys/dev/nvme/nvme_sysctl.c6
3 files changed, 41 insertions, 9 deletions
diff --git a/share/man/man4/nvme.4 b/share/man/man4/nvme.4
index 068b006..83ca8ae 100644
--- a/share/man/man4/nvme.4
+++ b/share/man/man4/nvme.4
@@ -1,5 +1,5 @@
.\"
-.\" Copyright (c) 2012-2014 Intel Corporation
+.\" Copyright (c) 2012-2016 Intel Corporation
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd March 18, 2014
+.Dd January 7, 2016
.Dt NVME 4
.Os
.Sh NAME
@@ -89,7 +89,10 @@ not 0, and this driver follows that convention.
By default,
.Nm
will create an I/O queue pair for each CPU, provided enough MSI-X vectors
-can be allocated.
+and NVMe queue pairs can be allocated. If not enough vectors or queue
+pairs are available, nvme(4) will use a smaller number of queue pairs and
+assign multiple CPUs per queue pair.
+.Pp
To force a single I/O queue pair shared by all CPUs, set the following
tunable value in
.Xr loader.conf 5 :
@@ -97,6 +100,13 @@ tunable value in
hw.nvme.per_cpu_io_queues=0
.Ed
.Pp
+To assign more than one CPU per I/O queue pair, thereby reducing the number
+of MSI-X vectors consumed by the device, set the following tunable value in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+hw.nvme.min_cpus_per_ioq=X
+.Ed
+.Pp
To force legacy interrupts for all
.Nm
driver instances, set the following tunable value in
@@ -109,6 +119,8 @@ Note that use of INTx implies disabling of per-CPU I/O queue pairs.
.Sh SYSCTL VARIABLES
The following controller-level sysctls are currently implemented:
.Bl -tag -width indent
+.It Va dev.nvme.0.num_cpus_per_ioq
+(R) Number of CPUs associated with each I/O queue pair.
.It Va dev.nvme.0.int_coal_time
(R/W) Interrupt coalescing timer period in microseconds.
Set to 0 to disable.
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 151f025..6125894 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (C) 2012-2015 Intel Corporation
+ * Copyright (C) 2012-2016 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -978,13 +978,27 @@ nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
{
device_t dev;
int per_cpu_io_queues;
+ int min_cpus_per_ioq;
int num_vectors_requested, num_vectors_allocated;
int num_vectors_available;
dev = ctrlr->dev;
+ min_cpus_per_ioq = 1;
+ TUNABLE_INT_FETCH("hw.nvme.min_cpus_per_ioq", &min_cpus_per_ioq);
+
+ if (min_cpus_per_ioq < 1) {
+ min_cpus_per_ioq = 1;
+ } else if (min_cpus_per_ioq > mp_ncpus) {
+ min_cpus_per_ioq = mp_ncpus;
+ }
+
per_cpu_io_queues = 1;
TUNABLE_INT_FETCH("hw.nvme.per_cpu_io_queues", &per_cpu_io_queues);
+ if (per_cpu_io_queues == 0) {
+ min_cpus_per_ioq = mp_ncpus;
+ }
+
ctrlr->force_intx = 0;
TUNABLE_INT_FETCH("hw.nvme.force_intx", &ctrlr->force_intx);
@@ -1010,10 +1024,12 @@ nvme_ctrlr_setup_interrupts(struct nvme_controller *ctrlr)
return;
}
- if (per_cpu_io_queues)
- ctrlr->num_cpus_per_ioq = NVME_CEILING(mp_ncpus, num_vectors_available + 1);
- else
- ctrlr->num_cpus_per_ioq = mp_ncpus;
+ /*
+ * Do not use all vectors for I/O queues - one must be saved for the
+ * admin queue.
+ */
+ ctrlr->num_cpus_per_ioq = max(min_cpus_per_ioq,
+ NVME_CEILING(mp_ncpus, num_vectors_available - 1));
ctrlr->num_io_queues = NVME_CEILING(mp_ncpus, ctrlr->num_cpus_per_ioq);
num_vectors_requested = ctrlr->num_io_queues + 1;
diff --git a/sys/dev/nvme/nvme_sysctl.c b/sys/dev/nvme/nvme_sysctl.c
index 8b99111..08cd15e 100644
--- a/sys/dev/nvme/nvme_sysctl.c
+++ b/sys/dev/nvme/nvme_sysctl.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (C) 2012-2013 Intel Corporation
+ * Copyright (C) 2012-2016 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -267,6 +267,10 @@ nvme_sysctl_initialize_ctrlr(struct nvme_controller *ctrlr)
ctrlr_tree = device_get_sysctl_tree(ctrlr->dev);
ctrlr_list = SYSCTL_CHILDREN(ctrlr_tree);
+ SYSCTL_ADD_UINT(ctrlr_ctx, ctrlr_list, OID_AUTO, "num_cpus_per_ioq",
+ CTLFLAG_RD, &ctrlr->num_cpus_per_ioq, 0,
+ "Number of CPUs assigned per I/O queue pair");
+
SYSCTL_ADD_PROC(ctrlr_ctx, ctrlr_list, OID_AUTO,
"int_coal_time", CTLTYPE_UINT | CTLFLAG_RW, ctrlr, 0,
nvme_sysctl_int_coal_time, "IU",
OpenPOWER on IntegriCloud