summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrstone <rstone@FreeBSD.org>2015-03-01 00:40:57 +0000
committerrstone <rstone@FreeBSD.org>2015-03-01 00:40:57 +0000
commitc476927dd3a4d7cebb958ccf26db0c7ea9b99513 (patch)
treeaeb36fc4115d443d1ebaae0f7d59cddafa0de9d3
parentf0a0142dd0f7d8bd0ae394686c21c67b3e9f0ae7 (diff)
downloadFreeBSD-src-c476927dd3a4d7cebb958ccf26db0c7ea9b99513.zip
FreeBSD-src-c476927dd3a4d7cebb958ccf26db0c7ea9b99513.tar.gz
Pass SR-IOV configuration to kernel using an nvlist
Pass all SR-IOV configuration to the kernel using an nvlist. The main benefit that this offers is flexibility. It allows a driver to accept any number of parameters of any type supported by the SR-IOV configuration infrastructure with having to make any changes outside of the driver. It also offers the user very fine-grained control over the configuration of the VFs -- if they want, they can have different configuration applied to every VF. Differential Revision: https://reviews.freebsd.org/D82 Reviewed by: jhb MFC after: 1 month Sponsored by: Sandvine Inc.
-rw-r--r--sys/dev/pci/pci_if.m2
-rw-r--r--sys/dev/pci/pci_iov.c126
-rw-r--r--sys/sys/iov.h95
3 files changed, 193 insertions, 30 deletions
diff --git a/sys/dev/pci/pci_if.m b/sys/dev/pci/pci_if.m
index 5fdb7cb..9f97d9a 100644
--- a/sys/dev/pci/pci_if.m
+++ b/sys/dev/pci/pci_if.m
@@ -217,6 +217,7 @@ METHOD int iov_detach {
METHOD int init_iov {
device_t dev;
uint16_t num_vfs;
+ const struct nvlist *config;
};
METHOD void uninit_iov {
@@ -226,6 +227,7 @@ METHOD void uninit_iov {
METHOD int add_vf {
device_t dev;
uint16_t vfnum;
+ const struct nvlist *config;
};
METHOD device_t create_iov_child {
diff --git a/sys/dev/pci/pci_iov.c b/sys/dev/pci/pci_iov.c
index fe01e44..a26afea 100644
--- a/sys/dev/pci/pci_iov.c
+++ b/sys/dev/pci/pci_iov.c
@@ -70,6 +70,18 @@ static struct cdevsw iov_cdevsw = {
.d_ioctl = pci_iov_ioctl
};
+SYSCTL_DECL(_hw_pci);
+
+/*
+ * The maximum amount of memory we will allocate for user configuration of an
+ * SR-IOV device. 1MB ought to be enough for anyone, but leave this
+ * configurable just in case.
+ */
+static u_long pci_iov_max_config = 1024 * 1024;
+SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN,
+ &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration.");
+
+
#define IOV_READ(d, r, w) \
pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
@@ -348,6 +360,51 @@ pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
}
}
+static int
+pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg,
+ nvlist_t **ret)
+{
+ void *packed_config;
+ nvlist_t *config;
+ int error;
+
+ config = NULL;
+ packed_config = NULL;
+
+ if (arg->len > pci_iov_max_config) {
+ error = EMSGSIZE;
+ goto out;
+ }
+
+ packed_config = malloc(arg->len, M_SRIOV, M_WAITOK);
+
+ error = copyin(arg->config, packed_config, arg->len);
+ if (error != 0)
+ goto out;
+
+ config = nvlist_unpack(packed_config, arg->len);
+ if (config == NULL) {
+ error = EINVAL;
+ goto out;
+ }
+
+ error = pci_iov_schema_validate_config(iov->iov_schema, config);
+ if (error != 0)
+ goto out;
+
+ error = nvlist_error(config);
+ if (error != 0)
+ goto out;
+
+ *ret = config;
+ config = NULL;
+
+out:
+ nvlist_destroy(config);
+ free(packed_config, M_SRIOV);
+ return (error);
+}
+
/*
* Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
* capability. This bit is only writeable on the lowest-numbered PF but
@@ -422,6 +479,16 @@ pci_iov_config_page_size(struct pci_devinfo *dinfo)
}
static int
+pci_init_iov(device_t dev, uint16_t num_vfs, const nvlist_t *config)
+{
+ const nvlist_t *device, *driver_config;
+
+ device = nvlist_get_nvlist(config, PF_CONFIG_NAME);
+ driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
+ return (PCI_INIT_IOV(dev, num_vfs, driver_config));
+}
+
+static int
pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
{
int error;
@@ -479,9 +546,11 @@ pci_iov_setup_bars(struct pci_devinfo *dinfo)
}
static void
-pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
+pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config,
uint16_t first_rid, uint16_t rid_stride)
{
+ char device_name[VF_MAX_NAME];
+ const nvlist_t *device, *driver_config, *iov_config;
device_t bus, dev, vf;
struct pcicfg_iov *iov;
struct pci_devinfo *vfinfo;
@@ -498,12 +567,23 @@ pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
-
+ snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i);
+ device = nvlist_get_nvlist(config, device_name);
+ iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME);
+ driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME);
vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
if (vf == NULL)
break;
+ /*
+ * If we are creating passthrough devices then force the ppt
+ * driver to attach to prevent a VF driver from claiming the
+ * VFs.
+ */
+ if (nvlist_get_bool(iov_config, "passthrough"))
+ device_set_devclass(vf, "ppt");
+
vfinfo = device_get_ivars(vf);
vfinfo->cfg.iov = iov;
@@ -511,7 +591,7 @@ pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
pci_iov_add_bars(iov, vfinfo);
- error = PCI_ADD_VF(dev, i);
+ error = PCI_ADD_VF(dev, i, driver_config);
if (error != 0) {
device_printf(dev, "Failed to add VF %d\n", i);
pci_delete_child(bus, vf);
@@ -525,14 +605,14 @@ static int
pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
{
device_t bus, dev;
- const char *driver;
struct pci_devinfo *dinfo;
struct pcicfg_iov *iov;
+ nvlist_t *config;
int i, error;
uint16_t rid_off, rid_stride;
uint16_t first_rid, last_rid;
uint16_t iov_ctl;
- uint16_t total_vfs;
+ uint16_t num_vfs, total_vfs;
int iov_inited;
mtx_lock(&Giant);
@@ -541,6 +621,7 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
dev = dinfo->cfg.dev;
bus = device_get_parent(dev);
iov_inited = 0;
+ config = NULL;
if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) {
mtx_unlock(&Giant);
@@ -548,22 +629,17 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
}
iov->iov_flags |= IOV_BUSY;
- total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
+ error = pci_iov_parse_config(iov, arg, &config);
+ if (error != 0)
+ goto out;
- if (arg->num_vfs > total_vfs) {
+ num_vfs = pci_iov_config_get_num_vfs(config);
+ total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
+ if (num_vfs > total_vfs) {
error = EINVAL;
goto out;
}
- /*
- * If we are creating passthrough devices then force the ppt driver to
- * attach to prevent a VF driver from claming the VFs.
- */
- if (arg->passthrough)
- driver = "ppt";
- else
- driver = NULL;
-
error = pci_iov_config_page_size(dinfo);
if (error != 0)
goto out;
@@ -572,19 +648,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
if (error != 0)
goto out;
- error = PCI_INIT_IOV(dev, arg->num_vfs);
-
+ error = pci_init_iov(dev, num_vfs, config);
if (error != 0)
goto out;
-
iov_inited = 1;
- IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2);
+
+ IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2);
rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
first_rid = pci_get_rid(dev) + rid_off;
- last_rid = first_rid + (arg->num_vfs - 1) * rid_stride;
+ last_rid = first_rid + (num_vfs - 1) * rid_stride;
/* We don't yet support allocating extra bus numbers for VFs. */
if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
@@ -600,7 +675,7 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
if (error != 0)
goto out;
- iov->iov_num_vfs = arg->num_vfs;
+ iov->iov_num_vfs = num_vfs;
error = pci_iov_setup_bars(dinfo);
if (error != 0)
@@ -612,7 +687,10 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
/* Per specification, we must wait 100ms before accessing VFs. */
pause("iov", roundup(hz, 10));
- pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride);
+ pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride);
+
+ nvlist_destroy(config);
+ iov->iov_flags &= ~IOV_BUSY;
mtx_unlock(&Giant);
return (0);
@@ -635,6 +713,8 @@ out:
rman_fini(&iov->rman);
iov->iov_flags &= ~IOV_RMAN_INITED;
}
+
+ nvlist_destroy(config);
iov->iov_num_vfs = 0;
iov->iov_flags &= ~IOV_BUSY;
mtx_unlock(&Giant);
diff --git a/sys/sys/iov.h b/sys/sys/iov.h
index db5648d..139bf4e 100644
--- a/sys/sys/iov.h
+++ b/sys/sys/iov.h
@@ -46,12 +46,6 @@
#define DEFAULT_SCHEMA_NAME "DEFAULT"
#define REQUIRED_SCHEMA_NAME "REQUIRED"
-struct pci_iov_arg
-{
- int num_vfs;
- int passthrough;
-};
-
/*
* Because each PF device is expected to expose a unique set of possible
* configurations, the SR-IOV infrastructure dynamically queries the PF
@@ -168,7 +162,94 @@ struct pci_iov_schema
int error;
};
-#define IOV_CONFIG _IOWR('p', 10, struct pci_iov_arg)
+/*
+ * SR-IOV configuration is passed to the kernel as a packed nvlist. See nv(3)
+ * for the details of the nvlist API. The expected format of the nvlist is:
+ *
+ * BASIC RULES
+ * 1) All keys are case-insensitive.
+ * 2) No keys that are not specified below may exist at any level of the
+ * config nvlist.
+ * 3) Unless otherwise specified, all keys are optional. It should go without
+ * saying a key being mandatory is transitive: that is, if a key is
+ * specified to contain a sub-nodes that contains a mandatory key, then
+ * the outer key is implicitly mandatory. If a key is mandatory then the
+ * associated value is also mandatory.
+ * 4) Order of keys is irrelevant.
+ *
+ * TOP LEVEL OF CONFIG NVLIST
+ * 1) All keys specified in this section are mandatory.
+ * 2) There must be a top-level key with the name PF_CONFIG_NAME. The value
+ * associated is an nvlist that follows the "device node" format. The
+ * parameters in this node specify parameters that apply to the PF.
+ * 3) For every VF being configured (this is set via the "num_vfs" parameter
+ * in the PF section), there must be a top-level key whose name is VF_PREFIX
+ * immediately followed by the index of the VF as a decimal integer. For
+ * example, this would be VF-0 for the first VF. VFs are numbered starting
+ * from 0. The value associated with this key follows the "device node"
+ * format. The parameters in this node specify configuration that applies
+ * to the VF specified in the key. Leading zeros are not permitted in VF
+ * index. Configuration for the second VF must be specified in a node with
+ * the key VF-1. VF-01 is not a valid key.
+ *
+ * DEVICE NODES
+ * 1) All keys specified in this section are mandatory.
+ * 2) The device node must contain a key with the name DRIVER_CONFIG_NAME. The
+ * value associated with this key is an nvlist following the subsystem node
+ * format. The parameters in this key specify configuration that is specific
+ * to a particular device driver.
+ * 3) The device node must contain a key with the name IOV_CONFIG_NAME. The
+ * value associated with this key is an nvlist following the subsystem node
+ * format. The parameters in this key specify configuration that is consumed
+ * by the SR-IOV infrastructure.
+ *
+ * SUBSYSTEM NODES
+ * 1) A subsystem node specifies configuration parameters that apply to a
+ * particular subsystem (driver or infrastructure) of a particular device
+ * (PF or individual VF).
+ * Note: We will refer to the section of the configuration schema that
+ * specifies the parameters for this subsystem and device
+ * configuration as the device/subystem schema.
+ * 2) The subsystem node must contain only keys that correspond to parameters
+ * that are specified in the device/subsystem schema.
+ * 3) Every parameter specified as required in the device/subsystem schema is
+ * a mandatory key in the subsystem node.
+ * Note: All parameters that are not required in device/subsystem schema are
+ * optional keys. In particular, any parameter specified to have a
+ * default value in the device/subsystem schema is optional. The
+ * kernel is responsible for applying default values.
+ * 4) The value of every parameter in the device node must conform to the
+ * restrictions of the type specified for that parameter in the device/
+ * subsystem schema.
+ *
+ * The following is an example of a valid configuration, when validated against
+ * the schema example given above.
+ *
+ * PF (NVLIST):
+ * driver (NVLIST):
+ * iov (NVLIST):
+ * num_vfs (NUMBER): 3 (3) (0x3)
+ * device (STRING): [ix0]
+ * VF-0 (NVLIST):
+ * driver (NVLIST):
+ * vlan (NUMBER): 1000 (1000) (0x3e8)
+ * iov (NVLIST):
+ * passthrough (BOOL): TRUE
+ * VF-1 (NVLIST):
+ * driver (NVLIST):
+ * iov (NVLIST):
+ * VF-2 (NVLIST):
+ * driver (NVLIST):
+ * mac-addr (BINARY): 6 020102030405
+ * iov (NVLIST):
+ */
+struct pci_iov_arg
+{
+ void *config;
+ size_t len;
+};
+
+#define IOV_CONFIG _IOW('p', 10, struct pci_iov_arg)
#define IOV_DELETE _IO('p', 11)
#define IOV_GET_SCHEMA _IOWR('p', 12, struct pci_iov_schema)
OpenPOWER on IntegriCloud