summaryrefslogtreecommitdiffstats
path: root/sys/dev
diff options
context:
space:
mode:
authorgibbs <gibbs@FreeBSD.org>2013-06-26 20:39:07 +0000
committergibbs <gibbs@FreeBSD.org>2013-06-26 20:39:07 +0000
commite7f9e472fe57a341b8332bd98447ab01e94e65a3 (patch)
tree016ccaed3fa2945d44f8e207e7d9663a103a5b1a /sys/dev
parentef282086af7ae18f6589ca446e36e606ca79a751 (diff)
downloadFreeBSD-src-e7f9e472fe57a341b8332bd98447ab01e94e65a3.zip
FreeBSD-src-e7f9e472fe57a341b8332bd98447ab01e94e65a3.tar.gz
In the Xen block front driver, take advantage of backends that
support cache flush and write barrier commands. sys/dev/xen/blkfront/block.h: Add per-command flag that specifies that the I/O queue must be frozen after this command is dispatched. This is used to implement "single-stepping". Remove the unused per-command flag that indicates a polled command. Add block device instance flags to record backend features. Add a block device instance flag to indicate the I/O queue is frozen until all outstanding I/O completes. Enhance the queue API to allow the number of elements in a queue to be interrogated. Prefer "inline" to "__inline". sys/dev/xen/blkfront/blkfront.c: Formalize queue freeze semantics by adding methods for both global and command-associated queue freezing. Provide mechanism to freeze the I/O queue until all outstanding I/O completes. Use this to implement barrier semantics (BIO_ORDERED) when the backend does not support BLKIF_OP_WRITE_BARRIER commands. Implement BIO_FLUSH as either a BLKIF_OP_FLUSH_DISKCACHE command or a 0 byte write barrier. Currently, all publicly available backends perform a diskcache flush when processing barrier commands, and this frontend behavior matches what is done in Linux. Simplify code by using new queue length API. Report backend features during device attach and via sysctl. Submitted by: Roger Pau Monné Submitted by: gibbs (Merge with new driver queue API, sysctl support)
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/xen/blkfront/blkfront.c172
-rw-r--r--sys/dev/xen/blkfront/block.h30
2 files changed, 175 insertions, 27 deletions
diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c
index 0b2f2d9..9f1b082 100644
--- a/sys/dev/xen/blkfront/blkfront.c
+++ b/sys/dev/xen/blkfront/blkfront.c
@@ -111,6 +111,26 @@ xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag)
sc->xbd_qfrozen_cnt--;
}
+static void
+xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag)
+{
+ if ((cm->cm_flags & XBDCF_FROZEN) != 0)
+ return;
+
+ cm->cm_flags |= XBDCF_FROZEN|cm_flag;
+ xbd_freeze(sc, XBDF_NONE);
+}
+
+static void
+xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm)
+{
+ if ((cm->cm_flags & XBDCF_FROZEN) == 0)
+ return;
+
+ cm->cm_flags &= ~XBDCF_FROZEN;
+ xbd_thaw(sc, XBDF_NONE);
+}
+
static inline void
xbd_flush_requests(struct xbd_softc *sc)
{
@@ -263,8 +283,7 @@ xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm)
* we just attempted to map, so we can't rely on bus dma
* blocking for it too.
*/
- xbd_freeze(sc, XBDF_NONE);
- cm->cm_flags |= XBDCF_FROZEN|XBDCF_ASYNC_MAPPING;
+ xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING);
return (0);
}
@@ -318,10 +337,46 @@ xbd_bio_command(struct xbd_softc *sc)
cm->cm_bp = bp;
cm->cm_data = bp->bio_data;
cm->cm_datalen = bp->bio_bcount;
- cm->cm_operation = (bp->bio_cmd == BIO_READ) ?
- BLKIF_OP_READ : BLKIF_OP_WRITE;
cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno;
+ switch (bp->bio_cmd) {
+ case BIO_READ:
+ cm->cm_operation = BLKIF_OP_READ;
+ break;
+ case BIO_WRITE:
+ cm->cm_operation = BLKIF_OP_WRITE;
+ if ((bp->bio_flags & BIO_ORDERED) != 0) {
+ if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
+ cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
+ } else {
+ /*
+ * Single step this command.
+ */
+ cm->cm_flags |= XBDCF_Q_FREEZE;
+ if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
+ /*
+ * Wait for in-flight requests to
+ * finish.
+ */
+ xbd_freeze(sc, XBDF_WAIT_IDLE);
+ xbd_requeue_cm(cm, XBD_Q_READY);
+ return (NULL);
+ }
+ }
+ }
+ break;
+ case BIO_FLUSH:
+ if ((sc->xbd_flags & XBDF_FLUSH) != 0)
+ cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE;
+ else if ((sc->xbd_flags & XBDF_BARRIER) != 0)
+ cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
+ else
+ panic("flush request, but no flush support available");
+ break;
+ default:
+ panic("unknown bio command %d", bp->bio_cmd);
+ }
+
return (cm);
}
@@ -356,6 +411,14 @@ xbd_startio(struct xbd_softc *sc)
if (cm == NULL)
break;
+ if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) {
+ /*
+ * Single step command. Future work is
+ * held off until this command completes.
+ */
+ xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE);
+ }
+
if ((error = xbd_queue_request(sc, cm)) != 0) {
printf("xbd_queue_request returned %d\n", error);
break;
@@ -425,7 +488,8 @@ xbd_int(void *xsc)
if (cm->cm_operation == BLKIF_OP_READ)
op = BUS_DMASYNC_POSTREAD;
- else if (cm->cm_operation == BLKIF_OP_WRITE)
+ else if (cm->cm_operation == BLKIF_OP_WRITE ||
+ cm->cm_operation == BLKIF_OP_WRITE_BARRIER)
op = BUS_DMASYNC_POSTWRITE;
else
op = 0;
@@ -436,10 +500,7 @@ xbd_int(void *xsc)
* Release any hold this command has on future command
* dispatch.
*/
- if ((cm->cm_flags & XBDCF_FROZEN) != 0) {
- xbd_thaw(sc, XBDF_NONE);
- cm->cm_flags &= ~XBDCF_FROZEN;
- }
+ xbd_cm_thaw(sc, cm);
/*
* Directly call the i/o complete routine to save an
@@ -465,6 +526,9 @@ xbd_int(void *xsc)
sc->xbd_ring.sring->rsp_event = i + 1;
}
+ if (xbd_queue_length(sc, XBD_Q_BUSY) == 0)
+ xbd_thaw(sc, XBDF_WAIT_IDLE);
+
xbd_startio(sc);
if (unlikely(sc->xbd_state == XBD_STATE_SUSPENDED))
@@ -483,13 +547,13 @@ xbd_quiesce(struct xbd_softc *sc)
int mtd;
// While there are outstanding requests
- while (!TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq)) {
+ while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd);
if (mtd) {
/* Recieved request completions, update queue. */
xbd_int(sc);
}
- if (!TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq)) {
+ if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
/*
* Still pending requests, wait for the disk i/o
* to complete.
@@ -750,11 +814,55 @@ xbd_free_ring(struct xbd_softc *sc)
}
/*-------------------------- Initialization/Teardown -------------------------*/
+static int
+xbd_feature_string(struct xbd_softc *sc, char *features, size_t len)
+{
+ struct sbuf sb;
+ int feature_cnt;
+
+ sbuf_new(&sb, features, len, SBUF_FIXEDLEN);
+
+ feature_cnt = 0;
+ if ((sc->xbd_flags & XBDF_FLUSH) != 0) {
+ sbuf_printf(&sb, "flush");
+ feature_cnt++;
+ }
+
+ if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
+ if (feature_cnt != 0)
+ sbuf_printf(&sb, ", ");
+ sbuf_printf(&sb, "write_barrier");
+ feature_cnt++;
+ }
+
+ (void) sbuf_finish(&sb);
+ return (sbuf_len(&sb));
+}
+
+static int
+xbd_sysctl_features(SYSCTL_HANDLER_ARGS)
+{
+ char features[80];
+ struct xbd_softc *sc = arg1;
+ int error;
+ int len;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+
+ len = xbd_feature_string(sc, features, sizeof(features));
+
+ /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */
+ return (SYSCTL_OUT(req, features, len + 1/*NUL*/));
+}
+
static void
xbd_setup_sysctl(struct xbd_softc *xbd)
{
struct sysctl_ctx_list *sysctl_ctx = NULL;
struct sysctl_oid *sysctl_tree = NULL;
+ struct sysctl_oid_list *children;
sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev);
if (sysctl_ctx == NULL)
@@ -764,22 +872,31 @@ xbd_setup_sysctl(struct xbd_softc *xbd)
if (sysctl_tree == NULL)
return;
- SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+ children = SYSCTL_CHILDREN(sysctl_tree);
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
"maximum outstanding requests (negotiated)");
- SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
+ "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
+ "maximum outstanding requests (negotiated)");
+
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"max_request_segments", CTLFLAG_RD,
&xbd->xbd_max_request_segments, 0,
"maximum number of pages per requests (negotiated)");
- SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0,
"maximum size in bytes of a request (negotiated)");
- SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
+ SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0,
"communication channel pages (negotiated)");
+
+ SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO,
+ "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0,
+ xbd_sysctl_features, "A", "protocol features (negotiated)");
}
/*
@@ -854,6 +971,7 @@ int
xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
int vdevice, uint16_t vdisk_info, unsigned long sector_size)
{
+ char features[80];
int unit, error = 0;
const char *name;
@@ -861,9 +979,14 @@ xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
sc->xbd_unit = unit;
- if (strcmp(name, "xbd"))
+ if (strcmp(name, "xbd") != 0)
device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit);
+ if (xbd_feature_string(sc, features, sizeof(features)) > 0) {
+ device_printf(sc->xbd_dev, "features: %s\n",
+ features);
+ }
+
sc->xbd_disk = disk_alloc();
sc->xbd_disk->d_unit = sc->xbd_unit;
sc->xbd_disk->d_open = xbd_open;
@@ -878,6 +1001,11 @@ xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
sc->xbd_disk->d_mediasize = sectors * sector_size;
sc->xbd_disk->d_maxsize = sc->xbd_max_request_size;
sc->xbd_disk->d_flags = 0;
+ if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) {
+ sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
+ device_printf(sc->xbd_dev,
+ "synchronize cache commands enabled.\n");
+ }
disk_create(sc->xbd_disk, DISK_VERSION);
return error;
@@ -1183,7 +1311,7 @@ xbd_connect(struct xbd_softc *sc)
device_t dev = sc->xbd_dev;
unsigned long sectors, sector_size;
unsigned int binfo;
- int err, feature_barrier;
+ int err, feature_barrier, feature_flush;
if (sc->xbd_state == XBD_STATE_CONNECTED ||
sc->xbd_state == XBD_STATE_SUSPENDED)
@@ -1205,9 +1333,15 @@ xbd_connect(struct xbd_softc *sc)
err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
"feature-barrier", "%lu", &feature_barrier,
NULL);
- if (!err || feature_barrier)
+ if (err == 0 && feature_barrier != 0)
sc->xbd_flags |= XBDF_BARRIER;
+ err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
+ "feature-flush-cache", "%lu", &feature_flush,
+ NULL);
+ if (err == 0 && feature_flush != 0)
+ sc->xbd_flags |= XBDF_FLUSH;
+
if (sc->xbd_disk == NULL) {
device_printf(dev, "%juMB <%s> at %s",
(uintmax_t) sectors / (1048576 / sector_size),
@@ -1339,7 +1473,7 @@ xbd_suspend(device_t dev)
/* Wait for outstanding I/O to drain. */
retval = 0;
- while (TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq) == 0) {
+ while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock,
PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
retval = EBUSY;
diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h
index 7cfe241..0f7d6cb 100644
--- a/sys/dev/xen/blkfront/block.h
+++ b/sys/dev/xen/blkfront/block.h
@@ -94,8 +94,11 @@
typedef enum {
XBDCF_Q_MASK = 0xFF,
+ /* This command has contributed to xbd_qfrozen_cnt. */
XBDCF_FROZEN = 1<<8,
- XBDCF_POLLED = 1<<9,
+ /* Freeze the command queue on dispatch (i.e. single step command). */
+ XBDCF_Q_FREEZE = 1<<9,
+ /* Bus DMA returned EINPROGRESS for this command. */
XBDCF_ASYNC_MAPPING = 1<<10,
XBDCF_INITIALIZER = XBDCF_Q_MASK
} xbdc_flag_t;
@@ -147,9 +150,14 @@ typedef enum {
XBDF_NONE = 0,
XBDF_OPEN = 1 << 0, /* drive is open (can't shut down) */
XBDF_BARRIER = 1 << 1, /* backend supports barriers */
- XBDF_READY = 1 << 2, /* Is ready */
- XBDF_CM_SHORTAGE = 1 << 3, /* Free cm resource shortage active. */
- XBDF_GNT_SHORTAGE = 1 << 4 /* Grant ref resource shortage active */
+ XBDF_FLUSH = 1 << 2, /* backend supports flush */
+ XBDF_READY = 1 << 3, /* Is ready */
+ XBDF_CM_SHORTAGE = 1 << 4, /* Free cm resource shortage active. */
+ XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */
+ XBDF_WAIT_IDLE = 1 << 6 /*
+ * No new work until oustanding work
+ * completes.
+ */
} xbd_flag_t;
/*
@@ -206,6 +214,12 @@ xbd_removed_qentry(struct xbd_softc *sc, xbd_q_index_t index)
sc->xbd_cm_q[index].q_length--;
}
+static inline uint32_t
+xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index)
+{
+ return (sc->xbd_cm_q[index].q_length);
+}
+
static inline void
xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index)
{
@@ -289,27 +303,27 @@ xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index)
xbd_removed_qentry(cm->cm_sc, index);
}
-static __inline void
+static inline void
xbd_initq_bio(struct xbd_softc *sc)
{
bioq_init(&sc->xbd_bioq);
}
-static __inline void
+static inline void
xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp)
{
bioq_insert_tail(&sc->xbd_bioq, bp);
xbd_added_qentry(sc, XBD_Q_BIO);
}
-static __inline void
+static inline void
xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp)
{
bioq_insert_head(&sc->xbd_bioq, bp);
xbd_added_qentry(sc, XBD_Q_BIO);
}
-static __inline struct bio *
+static inline struct bio *
xbd_dequeue_bio(struct xbd_softc *sc)
{
struct bio *bp;
OpenPOWER on IntegriCloud