diff options
author | jimharris <jimharris@FreeBSD.org> | 2013-10-08 15:44:04 +0000 |
---|---|---|
committer | jimharris <jimharris@FreeBSD.org> | 2013-10-08 15:44:04 +0000 |
commit | 509a7951936691b0bf8b3257febf8fa9a25dbdc0 (patch) | |
tree | aa31a19ca886956f2cbdfd2767059ed99e6f1fae | |
parent | 4e3872abc7d06e32c81c7d991a15245489dfa749 (diff) | |
download | FreeBSD-src-509a7951936691b0bf8b3257febf8fa9a25dbdc0.zip FreeBSD-src-509a7951936691b0bf8b3257febf8fa9a25dbdc0.tar.gz |
Add driver-assisted striping for upcoming Intel NVMe controllers that can
benefit from it.
Sponsored by: Intel
Reviewed by: kib (earlier version), carl
Approved by: re (hrs)
MFC after: 1 week
-rw-r--r-- | sys/dev/nvd/nvd.c | 11 | ||||
-rw-r--r-- | sys/dev/nvme/nvme.h | 2 | ||||
-rw-r--r-- | sys/dev/nvme/nvme_ns.c | 223 | ||||
-rw-r--r-- | sys/dev/nvme/nvme_private.h | 1 |
4 files changed, 225 insertions, 12 deletions
diff --git a/sys/dev/nvd/nvd.c b/sys/dev/nvd/nvd.c index b2e880b..b48fb35 100644 --- a/sys/dev/nvd/nvd.c +++ b/sys/dev/nvd/nvd.c @@ -187,17 +187,6 @@ nvd_done(void *arg, const struct nvme_completion *cpl) atomic_add_int(&ndisk->cur_depth, -1); - /* - * TODO: add more extensive translation of NVMe status codes - * to different bio error codes (i.e. EIO, EINVAL, etc.) - */ - if (nvme_completion_is_error(cpl)) { - bp->bio_error = EIO; - bp->bio_flags |= BIO_ERROR; - bp->bio_resid = bp->bio_bcount; - } else - bp->bio_resid = 0; - biodone(bp); } diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h index f904933..cb4701a 100644 --- a/sys/dev/nvme/nvme.h +++ b/sys/dev/nvme/nvme.h @@ -535,7 +535,7 @@ struct nvme_controller_data { uint8_t reserved6[1024]; /* bytes 3072-4095: vendor specific */ - uint8_t reserved7[1024]; + uint8_t vs[1024]; } __packed __aligned(4); struct nvme_namespace_data { diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index 5fa3ba0..e22f5bd 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -34,13 +34,31 @@ __FBSDID("$FreeBSD$"); #include <sys/disk.h> #include <sys/fcntl.h> #include <sys/ioccom.h> +#include <sys/malloc.h> #include <sys/module.h> #include <sys/proc.h> #include <dev/pci/pcivar.h> +#include <geom/geom.h> + #include "nvme_private.h" +static void nvme_bio_child_inbed(struct bio *parent, int bio_error); +static void nvme_bio_child_done(void *arg, + const struct nvme_completion *cpl); +static uint32_t nvme_get_num_segments(uint64_t addr, uint64_t size, + uint32_t alignment); +static void nvme_free_child_bios(int num_bios, + struct bio **child_bios); +static struct bio ** nvme_allocate_child_bios(int num_bios); +static struct bio ** nvme_construct_child_bios(struct bio *bp, + uint32_t alignment, + int *num_bios); +static int nvme_ns_split_bio(struct nvme_namespace *ns, + struct bio *bp, + uint32_t alignment); + static int nvme_ns_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, struct thread *td) @@ -202,18 +220,218 @@ nvme_ns_bio_done(void *arg, const struct nvme_completion *status) if (bp->bio_driver2) free(bp->bio_driver2, M_NVME); + if (nvme_completion_is_error(status)) { + bp->bio_flags |= BIO_ERROR; + if (bp->bio_error == 0) + bp->bio_error = EIO; + } + + if ((bp->bio_flags & BIO_ERROR) == 0) + bp->bio_resid = 0; + else + bp->bio_resid = bp->bio_bcount; + bp_cb_fn(bp, status); } +static void +nvme_bio_child_inbed(struct bio *parent, int bio_error) +{ + struct nvme_completion parent_cpl; + int inbed; + + if (bio_error != 0) { + parent->bio_flags |= BIO_ERROR; + parent->bio_error = bio_error; + } + + /* + * atomic_fetchadd will return value before adding 1, so we still + * must add 1 to get the updated inbed number. + */ + inbed = atomic_fetchadd_int(&parent->bio_inbed, 1) + 1; + if (inbed == parent->bio_children) { + bzero(&parent_cpl, sizeof(parent_cpl)); + if (parent->bio_flags & BIO_ERROR) + parent_cpl.status.sc = NVME_SC_DATA_TRANSFER_ERROR; + nvme_ns_bio_done(parent, &parent_cpl); + } +} + +static void +nvme_bio_child_done(void *arg, const struct nvme_completion *cpl) +{ + struct bio *child = arg; + struct bio *parent; + int bio_error; + + parent = child->bio_parent; + g_destroy_bio(child); + bio_error = nvme_completion_is_error(cpl) ? EIO : 0; + nvme_bio_child_inbed(parent, bio_error); +} + +static uint32_t +nvme_get_num_segments(uint64_t addr, uint64_t size, uint32_t align) +{ + uint32_t num_segs, offset, remainder; + + if (align == 0) + return (1); + + KASSERT((align & (align - 1)) == 0, ("alignment not power of 2\n")); + + num_segs = size / align; + remainder = size & (align - 1); + offset = addr & (align - 1); + if (remainder > 0 || offset > 0) + num_segs += 1 + (remainder + offset - 1) / align; + return (num_segs); +} + +static void +nvme_free_child_bios(int num_bios, struct bio **child_bios) +{ + int i; + + for (i = 0; i < num_bios; i++) { + if (child_bios[i] != NULL) + g_destroy_bio(child_bios[i]); + } + + free(child_bios, M_NVME); +} + +static struct bio ** +nvme_allocate_child_bios(int num_bios) +{ + struct bio **child_bios; + int err = 0, i; + + child_bios = malloc(num_bios * sizeof(struct bio *), M_NVME, M_NOWAIT); + if (child_bios == NULL) + return (NULL); + + for (i = 0; i < num_bios; i++) { + child_bios[i] = g_new_bio(); + if (child_bios[i] == NULL) + err = ENOMEM; + } + + if (err == ENOMEM) { + nvme_free_child_bios(num_bios, child_bios); + return (NULL); + } + + return (child_bios); +} + +static struct bio ** +nvme_construct_child_bios(struct bio *bp, uint32_t alignment, int *num_bios) +{ + struct bio **child_bios; + struct bio *child; + uint64_t cur_offset; + caddr_t data; + uint32_t rem_bcount; + int i; +#ifdef NVME_UNMAPPED_BIO_SUPPORT + struct vm_page **ma; + uint32_t ma_offset; +#endif + + *num_bios = nvme_get_num_segments(bp->bio_offset, bp->bio_bcount, + alignment); + child_bios = nvme_allocate_child_bios(*num_bios); + if (child_bios == NULL) + return (NULL); + + bp->bio_children = *num_bios; + bp->bio_inbed = 0; + cur_offset = bp->bio_offset; + rem_bcount = bp->bio_bcount; + data = bp->bio_data; +#ifdef NVME_UNMAPPED_BIO_SUPPORT + ma_offset = bp->bio_ma_offset; + ma = bp->bio_ma; +#endif + + for (i = 0; i < *num_bios; i++) { + child = child_bios[i]; + child->bio_parent = bp; + child->bio_cmd = bp->bio_cmd; + child->bio_offset = cur_offset; + child->bio_bcount = min(rem_bcount, + alignment - (cur_offset & (alignment - 1))); + child->bio_flags = bp->bio_flags; +#ifdef NVME_UNMAPPED_BIO_SUPPORT + if (bp->bio_flags & BIO_UNMAPPED) { + child->bio_ma_offset = ma_offset; + child->bio_ma = ma; + child->bio_ma_n = + nvme_get_num_segments(child->bio_ma_offset, + child->bio_bcount, PAGE_SIZE); + ma_offset = (ma_offset + child->bio_bcount) & + PAGE_MASK; + ma += child->bio_ma_n; + if (ma_offset != 0) + ma -= 1; + } else +#endif + { + child->bio_data = data; + data += child->bio_bcount; + } + cur_offset += child->bio_bcount; + rem_bcount -= child->bio_bcount; + } + + return (child_bios); +} + +static int +nvme_ns_split_bio(struct nvme_namespace *ns, struct bio *bp, + uint32_t alignment) +{ + struct bio *child; + struct bio **child_bios; + int err, i, num_bios; + + child_bios = nvme_construct_child_bios(bp, alignment, &num_bios); + if (child_bios == NULL) + return (ENOMEM); + + for (i = 0; i < num_bios; i++) { + child = child_bios[i]; + err = nvme_ns_bio_process(ns, child, nvme_bio_child_done); + if (err != 0) { + nvme_bio_child_inbed(bp, err); + g_destroy_bio(child); + } + } + + free(child_bios, M_NVME); + return (0); +} + int nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp, nvme_cb_fn_t cb_fn) { struct nvme_dsm_range *dsm_range; + uint32_t num_bios; int err; bp->bio_driver1 = cb_fn; + if (ns->stripesize > 0 && + (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { + num_bios = nvme_get_num_segments(bp->bio_offset, + bp->bio_bcount, ns->stripesize); + if (num_bios > 1) + return (nvme_ns_split_bio(ns, bp, ns->stripesize)); + } + switch (bp->bio_cmd) { case BIO_READ: err = nvme_ns_cmd_read_bio(ns, bp, nvme_ns_bio_done, bp); @@ -276,6 +494,11 @@ nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, ns->ctrlr = ctrlr; ns->id = id; + ns->stripesize = 0; + + if (pci_get_devid(ctrlr->dev) == 0x09538086 && ctrlr->cdata.vs[3] != 0) + ns->stripesize = + (1 << ctrlr->cdata.vs[3]) * ctrlr->min_page_size; /* * Namespaces are reconstructed after a controller reset, so check diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index f0f4453..1c2333e 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -238,6 +238,7 @@ struct nvme_namespace { uint16_t flags; struct cdev *cdev; void *cons_cookie[NVME_MAX_CONSUMERS]; + uint32_t stripesize; struct mtx lock; }; |