diff options
author | cperciva <cperciva@FreeBSD.org> | 2015-07-30 03:50:01 +0000 |
---|---|---|
committer | cperciva <cperciva@FreeBSD.org> | 2015-07-30 03:50:01 +0000 |
commit | d40e2b99300c55d54e6d9ce8e2de858398fdc450 (patch) | |
tree | eddf054ba8a190c96efebed965abe9012dff13c2 /sys/dev/xen | |
parent | 79c179226363746b26232b9c4a631fa093f94994 (diff) | |
download | FreeBSD-src-d40e2b99300c55d54e6d9ce8e2de858398fdc450.zip FreeBSD-src-d40e2b99300c55d54e6d9ce8e2de858398fdc450.tar.gz |
Add support for Xen blkif indirect segment I/Os. This makes it possible for
the blkfront driver to perform I/Os of up to 2 MB, subject to support from
the blkback to which it is connected and the initiation of such large I/Os
by the rest of the kernel. In practice, the I/O size is increased from 40 kB
to 128 kB.
The changes to xen/interface/io/blkif.h consist merely of merging updates
from the upstream Xen repository.
In dev/xen/blkfront/block.h we add some convenience macros and structure
fields used for indirect-page I/Os: The device records its negotiated limit
on the number of indirect pages used, while each I/O command structure gains
permanently allocated page(s) for indirect page references and the Xen grant
references for those pages.
In dev/xen/blkfront/blkfront.c we now check in xbd_queue_cb whether a request
is small enough to handle without an indirection page, and either follow the
previous behaviour or use new code for issuing an indirect segment I/O. In
xbd_connect we read the size of indirect segment I/Os supported by the backend
and select the maximum size we will use; then allocate the pages and Xen grant
references for each I/O command structure. In xbd_free those grants and pages
are released.
A new loader tunable, hw.xbd.xbd_enable_indirect, can be set to 0 in order to
disable this functionality; it works by pretending that the backend does not
support this feature. Some backends exhibit a loss of performance with large
I/Os, so users may wish to test with and without this functionality enabled.
Reviewed by: royger
MFC after: 3 days
Relnotes: yes
Diffstat (limited to 'sys/dev/xen')
-rw-r--r-- | sys/dev/xen/blkfront/blkfront.c | 109 | ||||
-rw-r--r-- | sys/dev/xen/blkfront/block.h | 25 |
2 files changed, 111 insertions, 23 deletions
diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c index 0610732..312a077 100644 --- a/sys/dev/xen/blkfront/blkfront.c +++ b/sys/dev/xen/blkfront/blkfront.c @@ -84,6 +84,11 @@ static void xbd_startio(struct xbd_softc *sc); /*---------------------------- Global Static Data ----------------------------*/ static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data"); +static int xbd_enable_indirect = 1; +SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters"); +SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN, + &xbd_enable_indirect, 0, "Enable xbd indirect segments"); + /*---------------------------- Command Processing ----------------------------*/ static void xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag) @@ -205,7 +210,6 @@ xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { struct xbd_softc *sc; struct xbd_command *cm; - blkif_request_t *ring_req; int op; cm = arg; @@ -218,22 +222,47 @@ xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) return; } - KASSERT(nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST, + KASSERT(nsegs <= sc->xbd_max_request_segments, ("Too many segments in a blkfront I/O")); - /* Fill out a communications ring structure. */ - ring_req = RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); - sc->xbd_ring.req_prod_pvt++; - ring_req->id = cm->cm_id; - ring_req->operation = cm->cm_operation; - ring_req->sector_number = cm->cm_sector_number; - ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; - ring_req->nr_segments = nsegs; - cm->cm_nseg = nsegs; - xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, - xenbus_get_otherend_id(sc->xbd_dev), - cm->cm_operation == BLKIF_OP_WRITE, - cm->cm_sg_refs, ring_req->seg); + if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) { + blkif_request_t *ring_req; + + /* Fill out a blkif_request_t structure. */ + ring_req = (blkif_request_t *) + RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); + sc->xbd_ring.req_prod_pvt++; + ring_req->id = cm->cm_id; + ring_req->operation = cm->cm_operation; + ring_req->sector_number = cm->cm_sector_number; + ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; + ring_req->nr_segments = nsegs; + cm->cm_nseg = nsegs; + xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, + xenbus_get_otherend_id(sc->xbd_dev), + cm->cm_operation == BLKIF_OP_WRITE, + cm->cm_sg_refs, ring_req->seg); + } else { + blkif_request_indirect_t *ring_req; + + /* Fill out a blkif_request_indirect_t structure. */ + ring_req = (blkif_request_indirect_t *) + RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt); + sc->xbd_ring.req_prod_pvt++; + ring_req->id = cm->cm_id; + ring_req->operation = BLKIF_OP_INDIRECT; + ring_req->indirect_op = cm->cm_operation; + ring_req->sector_number = cm->cm_sector_number; + ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk; + ring_req->nr_segments = nsegs; + cm->cm_nseg = nsegs; + xbd_mksegarray(segs, nsegs, &cm->cm_gref_head, + xenbus_get_otherend_id(sc->xbd_dev), + cm->cm_operation == BLKIF_OP_WRITE, + cm->cm_sg_refs, cm->cm_indirectionpages); + memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs, + sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages); + } if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_PREREAD; @@ -1015,6 +1044,16 @@ xbd_free(struct xbd_softc *sc) cm->cm_sg_refs = NULL; } + if (cm->cm_indirectionpages != NULL) { + gnttab_end_foreign_access_references( + sc->xbd_max_request_indirectpages, + &cm->cm_indirectionrefs[0]); + contigfree(cm->cm_indirectionpages, PAGE_SIZE * + sc->xbd_max_request_indirectpages, + M_XENBLOCKFRONT); + cm->cm_indirectionpages = NULL; + } + bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map); } free(sc->xbd_shadow, M_XENBLOCKFRONT); @@ -1051,9 +1090,6 @@ xbd_initialize(struct xbd_softc *sc) */ max_ring_page_order = 0; sc->xbd_ring_pages = 1; - sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; - sc->xbd_max_request_size = - XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); /* * Protocol negotiation. @@ -1167,7 +1203,7 @@ xbd_connect(struct xbd_softc *sc) unsigned long sectors, sector_size; unsigned int binfo; int err, feature_barrier, feature_flush; - int i; + int i, j; if (sc->xbd_state == XBD_STATE_CONNECTED || sc->xbd_state == XBD_STATE_SUSPENDED) @@ -1198,6 +1234,22 @@ xbd_connect(struct xbd_softc *sc) if (err == 0 && feature_flush != 0) sc->xbd_flags |= XBDF_FLUSH; + err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), + "feature-max-indirect-segments", "%" PRIu32, + &sc->xbd_max_request_segments, NULL); + if ((err != 0) || (xbd_enable_indirect == 0)) + sc->xbd_max_request_segments = 0; + if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS) + sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS; + if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS)) + sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS); + sc->xbd_max_request_indirectpages = + XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments); + if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST) + sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST; + sc->xbd_max_request_size = + XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments); + /* Allocate datastructures based on negotiated values. */ err = bus_dma_tag_create( bus_get_dma_tag(sc->xbd_dev), /* parent */ @@ -1230,6 +1282,7 @@ xbd_connect(struct xbd_softc *sc) for (i = 0; i < sc->xbd_max_requests; i++) { struct xbd_command *cm; + void * indirectpages; cm = &sc->xbd_shadow[i]; cm->cm_sg_refs = malloc( @@ -1242,6 +1295,24 @@ xbd_connect(struct xbd_softc *sc) cm->cm_sc = sc; if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0) break; + if (sc->xbd_max_request_indirectpages > 0) { + indirectpages = contigmalloc( + PAGE_SIZE * sc->xbd_max_request_indirectpages, + M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0); + } else { + indirectpages = NULL; + } + for (j = 0; j < sc->xbd_max_request_indirectpages; j++) { + if (gnttab_grant_foreign_access( + xenbus_get_otherend_id(sc->xbd_dev), + (vtomach(indirectpages) >> PAGE_SHIFT) + j, + 1 /* grant read-only access */, + &cm->cm_indirectionrefs[j])) + break; + } + if (j < sc->xbd_max_request_indirectpages) + break; + cm->cm_indirectionpages = indirectpages; xbd_free_command(cm); } diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h index 8dcd322..28c6ff2 100644 --- a/sys/dev/xen/blkfront/block.h +++ b/sys/dev/xen/blkfront/block.h @@ -75,11 +75,25 @@ __CONST_RING_SIZE(blkif, PAGE_SIZE * XBD_MAX_RING_PAGES) /** - * The maximum mapped region size per request we will allow in a negotiated - * block-front/back communication channel. + * The maximum number of blkif segments which can be provided per indirect + * page in an indirect request. + */ +#define XBD_MAX_SEGMENTS_PER_PAGE \ + (PAGE_SIZE / sizeof(struct blkif_request_segment)) + +/** + * The maximum number of blkif segments which can be provided in an indirect + * request. + */ +#define XBD_MAX_INDIRECT_SEGMENTS \ + (BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST * XBD_MAX_SEGMENTS_PER_PAGE) + +/** + * Compute the number of indirect segment pages required for an I/O with the + * specified number of indirect segments. */ -#define XBD_MAX_REQUEST_SIZE \ - MIN(MAXPHYS, XBD_SEGS_TO_SIZE(BLKIF_MAX_SEGMENTS_PER_REQUEST)) +#define XBD_INDIRECT_SEGS_TO_PAGES(segs) \ + ((segs + XBD_MAX_SEGMENTS_PER_PAGE - 1) / XBD_MAX_SEGMENTS_PER_PAGE) typedef enum { XBDCF_Q_MASK = 0xFF, @@ -111,6 +125,8 @@ struct xbd_command { blkif_sector_t cm_sector_number; int cm_status; xbd_cbcf_t *cm_complete; + void *cm_indirectionpages; + grant_ref_t cm_indirectionrefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; }; typedef enum { @@ -165,6 +181,7 @@ struct xbd_softc { uint32_t xbd_max_requests; uint32_t xbd_max_request_segments; uint32_t xbd_max_request_size; + uint32_t xbd_max_request_indirectpages; grant_ref_t xbd_ring_ref[XBD_MAX_RING_PAGES]; blkif_front_ring_t xbd_ring; xen_intr_handle_t xen_intr_handle; |