summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/xen/blkfront/blkfront.c955
-rw-r--r--sys/dev/xen/blkfront/block.h238
2 files changed, 654 insertions, 539 deletions
diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c
index 12d27b3..ddba6a7 100644
--- a/sys/dev/xen/blkfront/blkfront.c
+++ b/sys/dev/xen/blkfront/blkfront.c
@@ -1,6 +1,7 @@
/*
* XenBSD block device driver
*
+ * Copyright (c) 2009 Scott Long, Yahoo!
* Copyright (c) 2009 Frank Suchomel, Citrix
* Copyright (c) 2009 Doug F. Rabson, Citrix
* Copyright (c) 2005 Kip Macy
@@ -46,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <machine/resource.h>
#include <machine/intr_machdep.h>
#include <machine/vmparam.h>
+#include <sys/bus_dma.h>
#include <machine/xen/xen-os.h>
#include <machine/xen/xenfunc.h>
@@ -63,27 +65,21 @@ __FBSDID("$FreeBSD$");
#include "xenbus_if.h"
-#define ASSERT(S) KASSERT(S, (#S))
/* prototypes */
-struct xb_softc;
+static void xb_free_command(struct xb_command *cm);
static void xb_startio(struct xb_softc *sc);
-static void connect(device_t, struct blkfront_info *);
+static void connect(struct xb_softc *);
static void blkfront_closing(device_t);
static int blkfront_detach(device_t);
-static int talk_to_backend(device_t, struct blkfront_info *);
-static int setup_blkring(device_t, struct blkfront_info *);
+static int talk_to_backend(struct xb_softc *);
+static int setup_blkring(struct xb_softc *);
static void blkif_int(void *);
-#if 0
-static void blkif_restart_queue(void *arg);
-#endif
-static void blkif_recover(struct blkfront_info *);
-static void blkif_completion(struct blk_shadow *);
-static void blkif_free(struct blkfront_info *, int);
+static void blkif_recover(struct xb_softc *);
+static void blkif_completion(struct xb_command *);
+static void blkif_free(struct xb_softc *, int);
+static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int);
#define GRANT_INVALID_REF 0
-#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
-
-LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
/* Control whether runtime update of vbds is enabled. */
#define ENABLE_VBD_UPDATE 0
@@ -92,7 +88,6 @@ LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
static void vbd_update(void);
#endif
-
#define BLKIF_STATE_DISCONNECTED 0
#define BLKIF_STATE_CONNECTED 1
#define BLKIF_STATE_SUSPENDED 2
@@ -111,44 +106,34 @@ static char * blkif_status_name[] = {
[BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
};
#endif
-#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
+
#if 0
#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
#else
#define DPRINTK(fmt, args...)
#endif
-static grant_ref_t gref_head;
#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
-static void kick_pending_request_queues(struct blkfront_info *);
+#define BLKIF_MAXIO (32 * 1024)
+
static int blkif_open(struct disk *dp);
static int blkif_close(struct disk *dp);
static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
-static int blkif_queue_request(struct bio *bp);
+static int blkif_queue_request(struct xb_softc *sc, struct xb_command *cm);
static void xb_strategy(struct bio *bp);
// In order to quiesce the device during kernel dumps, outstanding requests to
// DOM0 for disk reads/writes need to be accounted for.
-static int blkif_queued_requests;
static int xb_dump(void *, void *, vm_offset_t, off_t, size_t);
-
/* XXX move to xb_vbd.c when VBD update support is added */
#define MAX_VBDS 64
#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */
#define XBD_SECTOR_SHFT 9
-static struct mtx blkif_io_lock;
-
-static vm_paddr_t
-pfn_to_mfn(vm_paddr_t pfn)
-{
- return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
-}
-
/*
* Translate Linux major/minor to an appropriate name and unit
* number. For HVM guests, this allows us to use the same drive names
@@ -217,23 +202,18 @@ blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
}
int
-xlvbd_add(device_t dev, blkif_sector_t capacity,
- int vdevice, uint16_t vdisk_info, uint16_t sector_size,
- struct blkfront_info *info)
+xlvbd_add(struct xb_softc *sc, blkif_sector_t capacity,
+ int vdevice, uint16_t vdisk_info, uint16_t sector_size)
{
- struct xb_softc *sc;
int unit, error = 0;
const char *name;
blkfront_vdevice_to_unit(vdevice, &unit, &name);
- sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
sc->xb_unit = unit;
- sc->xb_info = info;
- info->sc = sc;
if (strcmp(name, "xbd"))
- device_printf(dev, "attaching as %s%d\n", name, unit);
+ device_printf(sc->xb_dev, "attaching as %s%d\n", name, unit);
memset(&sc->xb_disk, 0, sizeof(sc->xb_disk));
sc->xb_disk = disk_alloc();
@@ -247,31 +227,18 @@ xlvbd_add(device_t dev, blkif_sector_t capacity,
sc->xb_disk->d_drv1 = sc;
sc->xb_disk->d_sectorsize = sector_size;
- /* XXX */
sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
-#if 0
- sc->xb_disk->d_maxsize = DFLTPHYS;
-#else /* XXX: xen can't handle large single i/o requests */
- sc->xb_disk->d_maxsize = 4096;
-#endif
-#ifdef notyet
- XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
- xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
- sc->xb_disk->d_mediasize);
-#endif
+ sc->xb_disk->d_maxsize = BLKIF_MAXIO;
sc->xb_disk->d_flags = 0;
disk_create(sc->xb_disk, DISK_VERSION_00);
- bioq_init(&sc->xb_bioq);
return error;
}
void
-xlvbd_del(struct blkfront_info *info)
+xlvbd_del(struct xb_softc *sc)
{
- struct xb_softc *sc;
- sc = info->sc;
disk_destroy(sc->xb_disk);
}
/************************ end VBD support *****************/
@@ -289,102 +256,147 @@ xb_strategy(struct bio *bp)
if (sc == NULL) {
bp->bio_error = EINVAL;
bp->bio_flags |= BIO_ERROR;
- goto bad;
+ bp->bio_resid = bp->bio_bcount;
+ biodone(bp);
+ return;
}
- DPRINTK("");
-
/*
* Place it in the queue of disk activities for this disk
*/
- mtx_lock(&blkif_io_lock);
+ mtx_lock(&sc->xb_io_lock);
- bioq_disksort(&sc->xb_bioq, bp);
+ xb_enqueue_bio(sc, bp);
xb_startio(sc);
- mtx_unlock(&blkif_io_lock);
+ mtx_unlock(&sc->xb_io_lock);
return;
+}
- bad:
- /*
- * Correctly set the bio to indicate a failed tranfer.
- */
- bp->bio_resid = bp->bio_bcount;
+static void
+xb_bio_complete(struct xb_softc *sc, struct xb_command *cm)
+{
+ struct bio *bp;
+
+ bp = cm->bp;
+
+ if ( unlikely(cm->status != BLKIF_RSP_OKAY) ) {
+ disk_err(bp, "disk error" , -1, 0);
+ printf(" status: %x\n", cm->status);
+ bp->bio_flags |= BIO_ERROR;
+ }
+
+ if (bp->bio_flags & BIO_ERROR)
+ bp->bio_error = EIO;
+ else
+ bp->bio_resid = 0;
+
+ xb_free_command(cm);
biodone(bp);
- return;
}
-static void xb_quiesce(struct blkfront_info *info);
// Quiesce the disk writes for a dump file before allowing the next buffer.
static void
-xb_quiesce(struct blkfront_info *info)
+xb_quiesce(struct xb_softc *sc)
{
int mtd;
// While there are outstanding requests
- while (blkif_queued_requests) {
- RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, mtd);
+ while (!TAILQ_EMPTY(&sc->cm_busy)) {
+ RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, mtd);
if (mtd) {
- // Recieved request completions, update queue.
- blkif_int(info);
+ /* Recieved request completions, update queue. */
+ blkif_int(sc);
}
- if (blkif_queued_requests) {
- // Still pending requests, wait for the disk i/o to complete
+ if (!TAILQ_EMPTY(&sc->cm_busy)) {
+ /*
+ * Still pending requests, wait for the disk i/o
+ * to complete.
+ */
HYPERVISOR_yield();
}
}
}
-// Some bio structures for dumping core
-#define DUMP_BIO_NO 16 // 16 * 4KB = 64KB dump block
-static struct bio xb_dump_bp[DUMP_BIO_NO];
+/* Kernel dump function for a paravirtualized disk device */
+static void
+xb_dump_complete(struct xb_command *cm)
+{
+
+ xb_enqueue_complete(cm);
+}
-// Kernel dump function for a paravirtualized disk device
static int
xb_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
size_t length)
{
- int sbp;
- int mbp;
- size_t chunk;
- struct disk *dp = arg;
- struct xb_softc *sc = (struct xb_softc *) dp->d_drv1;
- int rc = 0;
-
- xb_quiesce(sc->xb_info); // All quiet on the western front.
- if (length > 0) {
- // If this lock is held, then this module is failing, and a successful
- // kernel dump is highly unlikely anyway.
- mtx_lock(&blkif_io_lock);
- // Split the 64KB block into 16 4KB blocks
- for (sbp=0; length>0 && sbp<DUMP_BIO_NO; sbp++) {
- chunk = length > PAGE_SIZE ? PAGE_SIZE : length;
- xb_dump_bp[sbp].bio_disk = dp;
- xb_dump_bp[sbp].bio_pblkno = offset / dp->d_sectorsize;
- xb_dump_bp[sbp].bio_bcount = chunk;
- xb_dump_bp[sbp].bio_resid = chunk;
- xb_dump_bp[sbp].bio_data = virtual;
- xb_dump_bp[sbp].bio_cmd = BIO_WRITE;
- xb_dump_bp[sbp].bio_done = NULL;
-
- bioq_disksort(&sc->xb_bioq, &xb_dump_bp[sbp]);
-
- length -= chunk;
- offset += chunk;
- virtual = (char *) virtual + chunk;
+ struct disk *dp = arg;
+ struct xb_softc *sc = (struct xb_softc *) dp->d_drv1;
+ struct xb_command *cm;
+ size_t chunk;
+ int sbp;
+ int rc = 0;
+
+ if (length <= 0)
+ return (rc);
+
+ xb_quiesce(sc); /* All quiet on the western front. */
+
+ /*
+ * If this lock is held, then this module is failing, and a
+ * successful kernel dump is highly unlikely anyway.
+ */
+ mtx_lock(&sc->xb_io_lock);
+
+ /* Split the 64KB block as needed */
+ for (sbp=0; length > 0; sbp++) {
+ cm = xb_dequeue_free(sc);
+ if (cm == NULL) {
+ mtx_unlock(&sc->xb_io_lock);
+ device_printf(sc->xb_dev, "dump: no more commands?\n");
+ return (EBUSY);
}
- // Tell DOM0 to do the I/O
- xb_startio(sc);
- mtx_unlock(&blkif_io_lock);
-
- // Must wait for the completion: the dump routine reuses the same
- // 16 x 4KB buffer space.
- xb_quiesce(sc->xb_info); // All quite on the eastern front
- // If there were any errors, bail out...
- for (mbp=0; mbp<sbp; mbp++) {
- if ((rc = xb_dump_bp[mbp].bio_error)) break;
+
+ if (gnttab_alloc_grant_references(
+ BLKIF_MAX_SEGMENTS_PER_REQUEST, &cm->gref_head) < 0) {
+ xb_free_command(cm);
+ mtx_unlock(&sc->xb_io_lock);
+ device_printf(sc->xb_dev, "no more grant allocs?\n");
+ return (EBUSY);
}
+
+ chunk = length > BLKIF_MAXIO ? BLKIF_MAXIO : length;
+ cm->data = virtual;
+ cm->datalen = chunk;
+ cm->operation = BLKIF_OP_WRITE;
+ cm->sector_number = offset / dp->d_sectorsize;
+ cm->cm_complete = xb_dump_complete;
+
+ xb_enqueue_ready(cm);
+
+ length -= chunk;
+ offset += chunk;
+ virtual = (char *) virtual + chunk;
}
+
+ /* Tell DOM0 to do the I/O */
+ xb_startio(sc);
+ mtx_unlock(&sc->xb_io_lock);
+
+ /* Poll for the completion. */
+ xb_quiesce(sc); /* All quite on the eastern front */
+
+ /* If there were any errors, bail out... */
+ while ((cm = xb_dequeue_complete(sc)) != NULL) {
+ if (cm->status != BLKIF_RSP_OKAY) {
+ device_printf(sc->xb_dev,
+ "Dump I/O failed at sector %jd\n",
+ cm->sector_number);
+ rc = EIO;
+ }
+ xb_free_command(cm);
+ }
+
return (rc);
}
@@ -410,9 +422,10 @@ blkfront_probe(device_t dev)
static int
blkfront_attach(device_t dev)
{
- int error, vdevice, i, unit;
- struct blkfront_info *info;
+ struct xb_softc *sc;
+ struct xb_command *cm;
const char *name;
+ int error, vdevice, i, unit;
/* FIXME: Use dynamic device id if this is not set. */
error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
@@ -427,29 +440,56 @@ blkfront_attach(device_t dev)
if (!strcmp(name, "xbd"))
device_set_unit(dev, unit);
- info = device_get_softc(dev);
-
- /*
- * XXX debug only
- */
- for (i = 0; i < sizeof(*info); i++)
- if (((uint8_t *)info)[i] != 0)
- panic("non-null memory");
+ sc = device_get_softc(dev);
+ mtx_init(&sc->xb_io_lock, "blkfront i/o lock", NULL, MTX_DEF);
+ xb_initq_free(sc);
+ xb_initq_busy(sc);
+ xb_initq_ready(sc);
+ xb_initq_complete(sc);
+ xb_initq_bio(sc);
+
+ /* Allocate parent DMA tag */
+ if (bus_dma_tag_create( NULL, /* parent */
+ 4096, 0, /* algnmnt, boundary */
+ BUS_SPACE_MAXADDR, /* lowaddr */
+ BUS_SPACE_MAXADDR, /* highaddr */
+ NULL, NULL, /* filter, filterarg */
+ BLKIF_MAXIO, /* maxsize */
+ BLKIF_MAX_SEGMENTS_PER_REQUEST, /* nsegments */
+ PAGE_SIZE, /* maxsegsize */
+ BUS_DMA_ALLOCNOW, /* flags */
+ busdma_lock_mutex, /* lockfunc */
+ &sc->xb_io_lock, /* lockarg */
+ &sc->xb_io_dmat)) {
+ device_printf(dev, "Cannot allocate parent DMA tag\n");
+ return (ENOMEM);
+ }
+#ifdef notyet
+ if (bus_dma_tag_set(sc->xb_io_dmat, BUS_DMA_SET_MINSEGSZ,
+ XBD_SECTOR_SIZE)) {
+ device_printf(dev, "Cannot set sector size\n");
+ return (EINVAL);
+ }
+#endif
- info->shadow_free = 0;
- info->xbdev = dev;
- info->vdevice = vdevice;
- info->connected = BLKIF_STATE_DISCONNECTED;
+ sc->xb_dev = dev;
+ sc->vdevice = vdevice;
+ sc->connected = BLKIF_STATE_DISCONNECTED;
/* work queue needed ? */
- for (i = 0; i < BLK_RING_SIZE; i++)
- info->shadow[i].req.id = i+1;
- info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+ for (i = 0; i < BLK_RING_SIZE; i++) {
+ cm = &sc->shadow[i];
+ cm->req.id = i;
+ cm->cm_sc = sc;
+ if (bus_dmamap_create(sc->xb_io_dmat, 0, &cm->map) != 0)
+ break;
+ xb_free_command(cm);
+ }
/* Front end dir is a number, which is used as the id. */
- info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
+ sc->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
- error = talk_to_backend(dev, info);
+ error = talk_to_backend(sc);
if (error)
return (error);
@@ -459,12 +499,12 @@ blkfront_attach(device_t dev)
static int
blkfront_suspend(device_t dev)
{
- struct blkfront_info *info = device_get_softc(dev);
+ struct xb_softc *sc = device_get_softc(dev);
/* Prevent new requests being issued until we fix things up. */
- mtx_lock(&blkif_io_lock);
- info->connected = BLKIF_STATE_SUSPENDED;
- mtx_unlock(&blkif_io_lock);
+ mtx_lock(&sc->xb_io_lock);
+ sc->connected = BLKIF_STATE_SUSPENDED;
+ mtx_unlock(&sc->xb_io_lock);
return (0);
}
@@ -472,29 +512,31 @@ blkfront_suspend(device_t dev)
static int
blkfront_resume(device_t dev)
{
- struct blkfront_info *info = device_get_softc(dev);
+ struct xb_softc *sc = device_get_softc(dev);
int err;
DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
- blkif_free(info, 1);
- err = talk_to_backend(dev, info);
- if (info->connected == BLKIF_STATE_SUSPENDED && !err)
- blkif_recover(info);
+ blkif_free(sc, 1);
+ err = talk_to_backend(sc);
+ if (sc->connected == BLKIF_STATE_SUSPENDED && !err)
+ blkif_recover(sc);
return (err);
}
/* Common code used when first setting up, and when resuming. */
static int
-talk_to_backend(device_t dev, struct blkfront_info *info)
+talk_to_backend(struct xb_softc *sc)
{
- const char *message = NULL;
+ device_t dev;
struct xenbus_transaction xbt;
+ const char *message = NULL;
int err;
/* Create shared ring, alloc event channel. */
- err = setup_blkring(dev, info);
+ dev = sc->xb_dev;
+ err = setup_blkring(sc);
if (err)
goto out;
@@ -506,13 +548,13 @@ talk_to_backend(device_t dev, struct blkfront_info *info)
}
err = xenbus_printf(xbt, xenbus_get_node(dev),
- "ring-ref","%u", info->ring_ref);
+ "ring-ref","%u", sc->ring_ref);
if (err) {
message = "writing ring-ref";
goto abort_transaction;
}
err = xenbus_printf(xbt, xenbus_get_node(dev),
- "event-channel", "%u", irq_to_evtchn_port(info->irq));
+ "event-channel", "%u", irq_to_evtchn_port(sc->irq));
if (err) {
message = "writing event-channel";
goto abort_transaction;
@@ -540,47 +582,47 @@ talk_to_backend(device_t dev, struct blkfront_info *info)
if (message)
xenbus_dev_fatal(dev, err, "%s", message);
destroy_blkring:
- blkif_free(info, 0);
+ blkif_free(sc, 0);
out:
return err;
}
static int
-setup_blkring(device_t dev, struct blkfront_info *info)
+setup_blkring(struct xb_softc *sc)
{
blkif_sring_t *sring;
int error;
- info->ring_ref = GRANT_INVALID_REF;
+ sc->ring_ref = GRANT_INVALID_REF;
sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
if (sring == NULL) {
- xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
+ xenbus_dev_fatal(sc->xb_dev, ENOMEM, "allocating shared ring");
return ENOMEM;
}
SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+ FRONT_RING_INIT(&sc->ring, sring, PAGE_SIZE);
- error = xenbus_grant_ring(dev,
- (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
+ error = xenbus_grant_ring(sc->xb_dev,
+ (vtomach(sc->ring.sring) >> PAGE_SHIFT), &sc->ring_ref);
if (error) {
free(sring, M_DEVBUF);
- info->ring.sring = NULL;
+ sc->ring.sring = NULL;
goto fail;
}
- error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
- "xbd", (driver_intr_t *)blkif_int, info,
- INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
+ error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(sc->xb_dev),
+ "xbd", (driver_intr_t *)blkif_int, sc,
+ INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq);
if (error) {
- xenbus_dev_fatal(dev, error,
+ xenbus_dev_fatal(sc->xb_dev, error,
"bind_evtchn_to_irqhandler failed");
goto fail;
}
return (0);
fail:
- blkif_free(info, 0);
+ blkif_free(sc, 0);
return (error);
}
@@ -591,7 +633,7 @@ setup_blkring(device_t dev, struct blkfront_info *info)
static int
blkfront_backend_changed(device_t dev, XenbusState backend_state)
{
- struct blkfront_info *info = device_get_softc(dev);
+ struct xb_softc *sc = device_get_softc(dev);
DPRINTK("backend_state=%d\n", backend_state);
@@ -606,22 +648,22 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state)
break;
case XenbusStateConnected:
- connect(dev, info);
+ connect(sc);
break;
case XenbusStateClosing:
- if (info->users > 0)
+ if (sc->users > 0)
xenbus_dev_error(dev, -EBUSY,
"Device in use; refusing to close");
else
blkfront_closing(dev);
#ifdef notyet
- bd = bdget(info->dev);
+ bd = bdget(sc->dev);
if (bd == NULL)
xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
down(&bd->bd_sem);
- if (info->users > 0)
+ if (sc->users > 0)
xenbus_dev_error(dev, -EBUSY,
"Device in use; refusing to close");
else
@@ -639,14 +681,15 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state)
** the details about the physical device - #sectors, size, etc).
*/
static void
-connect(device_t dev, struct blkfront_info *info)
+connect(struct xb_softc *sc)
{
+ device_t dev = sc->xb_dev;
unsigned long sectors, sector_size;
unsigned int binfo;
- int err;
+ int err, feature_barrier;
- if( (info->connected == BLKIF_STATE_CONNECTED) ||
- (info->connected == BLKIF_STATE_SUSPENDED) )
+ if( (sc->connected == BLKIF_STATE_CONNECTED) ||
+ (sc->connected == BLKIF_STATE_SUSPENDED) )
return;
DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
@@ -663,10 +706,10 @@ connect(device_t dev, struct blkfront_info *info)
return;
}
err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
- "feature-barrier", "%lu", &info->feature_barrier,
+ "feature-barrier", "%lu", &feature_barrier,
NULL);
- if (err)
- info->feature_barrier = 0;
+ if (!err || feature_barrier)
+ sc->xb_flags |= XB_BARRIER;
device_printf(dev, "%juMB <%s> at %s",
(uintmax_t) sectors / (1048576 / sector_size),
@@ -674,20 +717,17 @@ connect(device_t dev, struct blkfront_info *info)
xenbus_get_node(dev));
bus_print_child_footer(device_get_parent(dev), dev);
- xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info);
+ xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size);
(void)xenbus_set_state(dev, XenbusStateConnected);
/* Kick pending requests. */
- mtx_lock(&blkif_io_lock);
- info->connected = BLKIF_STATE_CONNECTED;
- kick_pending_request_queues(info);
- mtx_unlock(&blkif_io_lock);
- info->is_ready = 1;
+ mtx_lock(&sc->xb_io_lock);
+ sc->connected = BLKIF_STATE_CONNECTED;
+ xb_startio(sc);
+ sc->xb_flags |= XB_READY;
+ mtx_unlock(&sc->xb_io_lock);
-#if 0
- add_disk(info->gd);
-#endif
}
/**
@@ -699,14 +739,14 @@ connect(device_t dev, struct blkfront_info *info)
static void
blkfront_closing(device_t dev)
{
- struct blkfront_info *info = device_get_softc(dev);
+ struct xb_softc *sc = device_get_softc(dev);
DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
- if (info->mi) {
+ if (sc->mi) {
DPRINTK("Calling xlvbd_del\n");
- xlvbd_del(info);
- info->mi = NULL;
+ xlvbd_del(sc);
+ sc->mi = NULL;
}
xenbus_set_state(dev, XenbusStateClosed);
@@ -716,92 +756,33 @@ blkfront_closing(device_t dev)
static int
blkfront_detach(device_t dev)
{
- struct blkfront_info *info = device_get_softc(dev);
+ struct xb_softc *sc = device_get_softc(dev);
DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
- blkif_free(info, 0);
+ blkif_free(sc, 0);
+ mtx_destroy(&sc->xb_io_lock);
return 0;
}
-static inline int
-GET_ID_FROM_FREELIST(struct blkfront_info *info)
-{
- unsigned long nfree = info->shadow_free;
-
- KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
- info->shadow_free = info->shadow[nfree].req.id;
- info->shadow[nfree].req.id = 0x0fffffee; /* debug */
- atomic_add_int(&blkif_queued_requests, 1);
- return nfree;
-}
-
static inline void
-ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
-{
- info->shadow[id].req.id = info->shadow_free;
- info->shadow[id].request = 0;
- info->shadow_free = id;
- atomic_subtract_int(&blkif_queued_requests, 1);
-}
-
-static inline void
-flush_requests(struct blkfront_info *info)
+flush_requests(struct xb_softc *sc)
{
int notify;
- RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify);
if (notify)
- notify_remote_via_irq(info->irq);
-}
-
-static void
-kick_pending_request_queues(struct blkfront_info *info)
-{
- /* XXX check if we can't simplify */
-#if 0
- if (!RING_FULL(&info->ring)) {
- /* Re-enable calldowns. */
- blk_start_queue(info->rq);
- /* Kick things off immediately. */
- do_blkif_request(info->rq);
- }
-#endif
- if (!RING_FULL(&info->ring)) {
-#if 0
- sc = LIST_FIRST(&xbsl_head);
- LIST_REMOVE(sc, entry);
- /* Re-enable calldowns. */
- blk_start_queue(di->rq);
-#endif
- /* Kick things off immediately. */
- xb_startio(info->sc);
- }
-}
-
-#if 0
-/* XXX */
-static void blkif_restart_queue(void *arg)
-{
- struct blkfront_info *info = (struct blkfront_info *)arg;
-
- mtx_lock(&blkif_io_lock);
- kick_pending_request_queues(info);
- mtx_unlock(&blkif_io_lock);
+ notify_remote_via_irq(sc->irq);
}
-#endif
static void blkif_restart_queue_callback(void *arg)
{
-#if 0
- struct blkfront_info *info = (struct blkfront_info *)arg;
- /* XXX BSD equiv ? */
+ struct xb_softc *sc = arg;
- schedule_work(&info->work);
-#endif
+ xb_startio(sc);
}
static int
@@ -815,7 +796,7 @@ blkif_open(struct disk *dp)
}
sc->xb_flags |= XB_OPEN;
- sc->xb_info->users++;
+ sc->users++;
return (0);
}
@@ -827,11 +808,11 @@ blkif_close(struct disk *dp)
if (sc == NULL)
return (ENXIO);
sc->xb_flags &= ~XB_OPEN;
- if (--(sc->xb_info->users) == 0) {
+ if (--(sc->users) == 0) {
/* Check whether we have been instructed to close. We will
have ignored this request initially, as the device was
still mounted. */
- device_t dev = sc->xb_info->xbdev;
+ device_t dev = sc->xb_dev;
XenbusState state =
xenbus_read_driver_state(xenbus_get_otherend_path(dev));
@@ -852,6 +833,18 @@ blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td
return (ENOTTY);
}
+static void
+xb_free_command(struct xb_command *cm)
+{
+
+ KASSERT((cm->cm_flags & XB_ON_XBQ_MASK) == 0,
+ ("Freeing command that is still on a queue\n"));
+
+ cm->cm_flags = 0;
+ cm->bp = NULL;
+ cm->cm_complete = NULL;
+ xb_enqueue_free(cm);
+}
/*
* blkif_queue_request
@@ -863,106 +856,152 @@ blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td
* buffer: buffer to read/write into. this should be a
* virtual address in the guest os.
*/
-static int blkif_queue_request(struct bio *bp)
+static struct xb_command *
+xb_bio_command(struct xb_softc *sc)
{
- caddr_t alignbuf;
- vm_paddr_t buffer_ma;
- blkif_request_t *ring_req;
- unsigned long id;
- uint64_t fsect, lsect;
- struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
- struct blkfront_info *info = sc->xb_info;
- int ref;
-
- if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
- return 1;
-
- if (gnttab_alloc_grant_references(
- BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
- gnttab_request_free_callback(
- &info->callback,
- blkif_restart_queue_callback,
- info,
+ struct xb_command *cm;
+ struct bio *bp;
+
+ if (unlikely(sc->connected != BLKIF_STATE_CONNECTED))
+ return (NULL);
+
+ bp = xb_dequeue_bio(sc);
+ if (bp == NULL)
+ return (NULL);
+
+ if ((cm = xb_dequeue_free(sc)) == NULL) {
+ xb_requeue_bio(sc, bp);
+ return (NULL);
+ }
+
+ if (gnttab_alloc_grant_references(BLKIF_MAX_SEGMENTS_PER_REQUEST,
+ &cm->gref_head) < 0) {
+ gnttab_request_free_callback(&sc->callback,
+ blkif_restart_queue_callback, sc,
BLKIF_MAX_SEGMENTS_PER_REQUEST);
- return 1;
+ xb_requeue_bio(sc, bp);
+ xb_enqueue_free(cm);
+ sc->xb_flags |= XB_FROZEN;
+ return (NULL);
+ }
+
+ /* XXX Can we grab refs before doing the load so that the ref can
+ * be filled out here?
+ */
+ cm->bp = bp;
+ cm->data = bp->bio_data;
+ cm->datalen = bp->bio_bcount;
+ cm->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
+ BLKIF_OP_WRITE;
+ cm->sector_number = (blkif_sector_t)bp->bio_pblkno;
+
+ return (cm);
+}
+
+static int
+blkif_queue_request(struct xb_softc *sc, struct xb_command *cm)
+{
+ int error;
+
+ error = bus_dmamap_load(sc->xb_io_dmat, cm->map, cm->data, cm->datalen,
+ blkif_queue_cb, cm, 0);
+ if (error == EINPROGRESS) {
+ printf("EINPROGRESS\n");
+ sc->xb_flags |= XB_FROZEN;
+ cm->cm_flags |= XB_CMD_FROZEN;
+ return (0);
}
- /* Check if the buffer is properly aligned */
- if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
- int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE :
- PAGE_SIZE;
- caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF,
- M_NOWAIT);
+ return (error);
+}
+
+static void
+blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
+{
+ struct xb_softc *sc;
+ struct xb_command *cm;
+ blkif_request_t *ring_req;
+ vm_paddr_t buffer_ma;
+ uint64_t fsect, lsect;
+ int ref, i, op;
- alignbuf = (char *)roundup2((u_long)newbuf, align);
+ cm = arg;
+ sc = cm->cm_sc;
- /* save a copy of the current buffer */
- bp->bio_driver1 = newbuf;
- bp->bio_driver2 = alignbuf;
+ if (error) {
+ printf("error %d in blkif_queue_cb\n", error);
+ cm->bp->bio_error = EIO;
+ biodone(cm->bp);
+ xb_free_command(cm);
+ return;
+ }
- /* Copy the data for a write */
- if (bp->bio_cmd == BIO_WRITE)
- bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
- } else
- alignbuf = bp->bio_data;
-
/* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&info->ring,
- info->ring.req_prod_pvt);
- id = GET_ID_FROM_FREELIST(info);
- info->shadow[id].request = (unsigned long)bp;
-
- ring_req->id = id;
- ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
- BLKIF_OP_WRITE;
-
- ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
- ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
-
- ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer
- * chaining is not supported.
- */
-
- buffer_ma = vtomach(alignbuf);
- fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
- lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
- /* install a grant reference. */
- ref = gnttab_claim_grant_reference(&gref_head);
- KASSERT( ref != -ENOSPC, ("grant_reference failed") );
-
- gnttab_grant_foreign_access_ref(
- ref,
- xenbus_get_otherend_id(info->xbdev),
- buffer_ma >> PAGE_SHIFT,
- ring_req->operation & 1 ); /* ??? */
- info->shadow[id].frame[ring_req->nr_segments] =
- buffer_ma >> PAGE_SHIFT;
-
- ring_req->seg[ring_req->nr_segments] =
- (struct blkif_request_segment) {
- .gref = ref,
- .first_sect = fsect,
- .last_sect = lsect };
-
- ring_req->nr_segments++;
- KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
- ("XEN buffer must be sector aligned"));
- KASSERT(lsect <= 7,
- ("XEN disk driver data cannot cross a page boundary"));
-
- buffer_ma &= ~PAGE_MASK;
+ ring_req = RING_GET_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
+ if (ring_req == NULL) {
+ /* XXX Is this possible? */
+ printf("ring_req NULL, requeuing\n");
+ xb_enqueue_ready(cm);
+ return;
+ }
+ ring_req->id = cm->req.id;
+ ring_req->operation = cm->operation;
+ ring_req->sector_number = cm->sector_number;
+ ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
+ ring_req->nr_segments = nsegs;
+
+ for (i = 0; i < nsegs; i++) {
+ buffer_ma = segs[i].ds_addr;
+ fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
+ lsect = fsect + (segs[i].ds_len >> XBD_SECTOR_SHFT) - 1;
+
+ KASSERT(lsect <= 7,
+ ("XEN disk driver data cannot cross a page boundary"));
+
+ /* install a grant reference. */
+ ref = gnttab_claim_grant_reference(&cm->gref_head);
+ KASSERT( ref != ENOSPC, ("grant_reference failed") );
+
+ gnttab_grant_foreign_access_ref(
+ ref,
+ xenbus_get_otherend_id(sc->xb_dev),
+ buffer_ma >> PAGE_SHIFT,
+ ring_req->operation & 1 ); /* ??? */
+
+ ring_req->seg[i] =
+ (struct blkif_request_segment) {
+ .gref = ref,
+ .first_sect = fsect,
+ .last_sect = lsect };
+ }
+
+
+ if (cm->operation == BLKIF_OP_READ)
+ op = BUS_DMASYNC_PREREAD;
+ else if (cm->operation == BLKIF_OP_WRITE)
+ op = BUS_DMASYNC_PREWRITE;
+ else
+ op = 0;
+ bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
- info->ring.req_prod_pvt++;
+ sc->ring.req_prod_pvt++;
/* Keep a private copy so we can reissue requests when recovering. */
- info->shadow[id].req = *ring_req;
+ cm->req = *ring_req;
- gnttab_free_grant_references(gref_head);
+ xb_enqueue_busy(cm);
- return 0;
-}
+ gnttab_free_grant_references(cm->gref_head);
+ /*
+ * This flag means that we're probably executing in the busdma swi
+ * instead of in the startio context, so an explicit flush is needed.
+ */
+ if (cm->cm_flags & XB_CMD_FROZEN)
+ flush_requests(sc);
+ return;
+}
/*
* Dequeue buffers and place them in the shared communication ring.
@@ -974,140 +1013,131 @@ static int blkif_queue_request(struct bio *bp)
static void
xb_startio(struct xb_softc *sc)
{
- struct bio *bp;
- int queued = 0;
- struct blkfront_info *info = sc->xb_info;
- DPRINTK("");
+ struct xb_command *cm;
+ int error, queued = 0;
+
+ mtx_assert(&sc->xb_io_lock, MA_OWNED);
+
+ while (!RING_FULL(&sc->ring)) {
+ if (sc->xb_flags & XB_FROZEN)
+ break;
+
+ cm = xb_dequeue_ready(sc);
- mtx_assert(&blkif_io_lock, MA_OWNED);
+ if (cm == NULL)
+ cm = xb_bio_command(sc);
- while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
+ if (cm == NULL)
+ break;
- if (RING_FULL(&info->ring))
- goto wait;
-
- if (blkif_queue_request(bp)) {
- wait:
- bioq_insert_head(&sc->xb_bioq, bp);
+ if ((error = blkif_queue_request(sc, cm)) != 0) {
+ printf("blkif_queue_request returned %d\n", error);
break;
}
queued++;
}
if (queued != 0)
- flush_requests(sc->xb_info);
+ flush_requests(sc);
}
static void
blkif_int(void *xsc)
{
- struct xb_softc *sc = NULL;
- struct bio *bp;
+ struct xb_softc *sc = xsc;
+ struct xb_command *cm;
blkif_response_t *bret;
RING_IDX i, rp;
- struct blkfront_info *info = xsc;
- DPRINTK("");
-
- TRACE_ENTER;
+ int op;
- mtx_lock(&blkif_io_lock);
+ mtx_lock(&sc->xb_io_lock);
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
- mtx_unlock(&blkif_io_lock);
+ if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) {
+ mtx_unlock(&sc->xb_io_lock);
return;
}
again:
- rp = info->ring.sring->rsp_prod;
+ rp = sc->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for (i = info->ring.rsp_cons; i != rp; i++) {
- unsigned long id;
-
- bret = RING_GET_RESPONSE(&info->ring, i);
- id = bret->id;
- bp = (struct bio *)info->shadow[id].request;
-
- blkif_completion(&info->shadow[id]);
-
- ADD_ID_TO_FREELIST(info, id);
-
- switch (bret->operation) {
- case BLKIF_OP_READ:
- /* had an unaligned buffer that needs to be copied */
- if (bp->bio_driver1)
- bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
- /* FALLTHROUGH */
- case BLKIF_OP_WRITE:
-
- /* free the copy buffer */
- if (bp->bio_driver1) {
- free(bp->bio_driver1, M_DEVBUF);
- bp->bio_driver1 = NULL;
- }
-
- if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
- printf("Bad return from blkdev data request: %x\n",
- bret->status);
- bp->bio_flags |= BIO_ERROR;
- }
+ for (i = sc->ring.rsp_cons; i != rp; i++) {
+ bret = RING_GET_RESPONSE(&sc->ring, i);
+ cm = &sc->shadow[bret->id];
- sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+ xb_remove_busy(cm);
+ blkif_completion(cm);
- if (bp->bio_flags & BIO_ERROR)
- bp->bio_error = EIO;
- else
- bp->bio_resid = 0;
-
- biodone(bp);
- break;
- default:
- panic("received invalid operation");
- break;
- }
+ if (cm->operation == BLKIF_OP_READ)
+ op = BUS_DMASYNC_POSTREAD;
+ else if (cm->operation == BLKIF_OP_WRITE)
+ op = BUS_DMASYNC_POSTWRITE;
+ else
+ op = 0;
+ bus_dmamap_sync(sc->xb_io_dmat, cm->map, op);
+ bus_dmamap_unload(sc->xb_io_dmat, cm->map);
+
+ /*
+ * If commands are completing then resources are probably
+ * being freed as well. It's a cheap assumption even when
+ * wrong.
+ */
+ sc->xb_flags &= ~XB_FROZEN;
+
+ /*
+ * Directly call the i/o complete routine to save an
+ * an indirection in the common case.
+ */
+ cm->status = bret->status;
+ if (cm->bp)
+ xb_bio_complete(sc, cm);
+ else if (cm->cm_complete)
+ (cm->cm_complete)(cm);
+ else
+ xb_free_command(cm);
}
- info->ring.rsp_cons = i;
+ sc->ring.rsp_cons = i;
- if (i != info->ring.req_prod_pvt) {
+ if (i != sc->ring.req_prod_pvt) {
int more_to_do;
- RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ RING_FINAL_CHECK_FOR_RESPONSES(&sc->ring, more_to_do);
if (more_to_do)
goto again;
} else {
- info->ring.sring->rsp_event = i + 1;
+ sc->ring.sring->rsp_event = i + 1;
}
- kick_pending_request_queues(info);
+ xb_startio(sc);
- mtx_unlock(&blkif_io_lock);
+ mtx_unlock(&sc->xb_io_lock);
}
static void
-blkif_free(struct blkfront_info *info, int suspend)
+blkif_free(struct xb_softc *sc, int suspend)
{
/* Prevent new requests being issued until we fix things up. */
- mtx_lock(&blkif_io_lock);
- info->connected = suspend ?
+ mtx_lock(&sc->xb_io_lock);
+ sc->connected = suspend ?
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
- mtx_unlock(&blkif_io_lock);
+ mtx_unlock(&sc->xb_io_lock);
/* Free resources associated with old device channel. */
- if (info->ring_ref != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(info->ring_ref,
- info->ring.sring);
- info->ring_ref = GRANT_INVALID_REF;
- info->ring.sring = NULL;
+ if (sc->ring_ref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(sc->ring_ref,
+ sc->ring.sring);
+ sc->ring_ref = GRANT_INVALID_REF;
+ sc->ring.sring = NULL;
}
- if (info->irq)
- unbind_from_irqhandler(info->irq);
- info->irq = 0;
+ if (sc->irq)
+ unbind_from_irqhandler(sc->irq);
+ sc->irq = 0;
}
static void
-blkif_completion(struct blk_shadow *s)
+blkif_completion(struct xb_command *s)
{
int i;
@@ -1116,70 +1146,16 @@ blkif_completion(struct blk_shadow *s)
}
static void
-blkif_recover(struct blkfront_info *info)
+blkif_recover(struct xb_softc *sc)
{
- int i, j;
- blkif_request_t *req;
- struct blk_shadow *copy;
-
- if (!info->sc)
- return;
-
- /* Stage 1: Make a safe copy of the shadow state. */
- copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
- memcpy(copy, info->shadow, sizeof(info->shadow));
-
- /* Stage 2: Set up free list. */
- memset(&info->shadow, 0, sizeof(info->shadow));
- for (i = 0; i < BLK_RING_SIZE; i++)
- info->shadow[i].req.id = i+1;
- info->shadow_free = info->ring.req_prod_pvt;
- info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
-
- /* Stage 3: Find pending requests and requeue them. */
- for (i = 0; i < BLK_RING_SIZE; i++) {
- /* Not in use? */
- if (copy[i].request == 0)
- continue;
-
- /* Grab a request slot and copy shadow state into it. */
- req = RING_GET_REQUEST(
- &info->ring, info->ring.req_prod_pvt);
- *req = copy[i].req;
-
- /* We get a new request id, and must reset the shadow state. */
- req->id = GET_ID_FROM_FREELIST(info);
- memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
-
- /* Rewrite any grant references invalidated by suspend/resume. */
- for (j = 0; j < req->nr_segments; j++)
- gnttab_grant_foreign_access_ref(
- req->seg[j].gref,
- xenbus_get_otherend_id(info->xbdev),
- pfn_to_mfn(info->shadow[req->id].frame[j]),
- 0 /* assume not readonly */);
-
- info->shadow[req->id].req = *req;
-
- info->ring.req_prod_pvt++;
- }
-
- free(copy, M_DEVBUF);
-
- xenbus_set_state(info->xbdev, XenbusStateConnected);
-
- /* Now safe for us to use the shared ring */
- mtx_lock(&blkif_io_lock);
- info->connected = BLKIF_STATE_CONNECTED;
- mtx_unlock(&blkif_io_lock);
-
- /* Send off requeued requests */
- mtx_lock(&blkif_io_lock);
- flush_requests(info);
-
- /* Kick any other new requests queued since we resumed */
- kick_pending_request_queues(info);
- mtx_unlock(&blkif_io_lock);
+ /*
+ * XXX The whole concept of not quiescing and completing all i/o
+ * during suspend, and then hoping to recover and replay the
+ * resulting abandoned I/O during resume, is laughable. At best,
+ * it invalidates the i/o ordering rules required by just about
+ * every filesystem, and at worst it'll corrupt data. The code
+ * has been removed until further notice.
+ */
}
/* ** Driver registration ** */
@@ -1201,11 +1177,8 @@ static device_method_t blkfront_methods[] = {
static driver_t blkfront_driver = {
"xbd",
blkfront_methods,
- sizeof(struct blkfront_info),
+ sizeof(struct xb_softc),
};
devclass_t blkfront_devclass;
DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0);
-
-MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
-
diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h
index 11ed8e8..32bfc96 100644
--- a/sys/dev/xen/blkfront/block.h
+++ b/sys/dev/xen/blkfront/block.h
@@ -1,7 +1,7 @@
/*
- *
* XenBSD block device driver
*
+ * Copyright (c) 2009 Scott Long, Yahoo!
* Copyright (c) 2009 Frank Suchomel, Citrix
* Copyright (c) 2009 Doug F. Rabson, Citrix
* Copyright (c) 2005 Kip Macy
@@ -23,8 +23,8 @@
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
*
* $FreeBSD$
*/
@@ -50,66 +50,208 @@ struct xlbd_major_info
struct xlbd_type_info *type;
};
-struct blk_shadow {
- blkif_request_t req;
- unsigned long request;
- unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+struct xb_command {
+ TAILQ_ENTRY(xb_command) cm_link;
+ struct xb_softc *cm_sc;
+ u_int cm_flags;
+#define XB_CMD_FROZEN (1<<0)
+#define XB_CMD_POLLED (1<<1)
+#define XB_ON_XBQ_FREE (1<<2)
+#define XB_ON_XBQ_READY (1<<3)
+#define XB_ON_XBQ_BUSY (1<<4)
+#define XB_ON_XBQ_COMPLETE (1<<5)
+#define XB_ON_XBQ_MASK ((1<<2)|(1<<3)|(1<<4)|(1<<5))
+ bus_dmamap_t map;
+ blkif_request_t req;
+ struct bio *bp;
+ grant_ref_t gref_head;
+ void *data;
+ size_t datalen;
+ int operation;
+ blkif_sector_t sector_number;
+ int status;
+ void (* cm_complete)(struct xb_command *);
};
#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+#define XBQ_FREE 0
+#define XBQ_BIO 1
+#define XBQ_READY 2
+#define XBQ_BUSY 3
+#define XBQ_COMPLETE 4
+#define XBQ_COUNT 5
-struct xb_softc {
- device_t xb_dev;
- struct disk *xb_disk; /* disk params */
- struct bio_queue_head xb_bioq; /* sort queue */
- int xb_unit;
- int xb_flags;
- struct blkfront_info *xb_info;
- LIST_ENTRY(xb_softc) entry;
-#define XB_OPEN (1<<0) /* drive is open (can't shut down) */
+struct xb_qstat {
+ uint32_t q_length;
+ uint32_t q_max;
};
+union xb_statrequest {
+ uint32_t ms_item;
+ struct xb_qstat ms_qstat;
+};
/*
- * We have one of these per vbd, whether ide, scsi or 'other'. They
- * hang in private_data off the gendisk structure. We may end up
- * putting all kinds of interesting stuff here :-)
+ * We have one of these per vbd, whether ide, scsi or 'other'.
*/
-struct blkfront_info
-{
- device_t xbdev;
- dev_t dev;
- struct gendisk *gd;
- int vdevice;
- blkif_vdev_t handle;
- int connected;
- int ring_ref;
- blkif_front_ring_t ring;
- unsigned int irq;
- struct xlbd_major_info *mi;
-#if 0
- request_queue_t *rq;
- struct work_struct work;
-#endif
- struct gnttab_free_callback callback;
- struct blk_shadow shadow[BLK_RING_SIZE];
- unsigned long shadow_free;
- struct xb_softc *sc;
- int feature_barrier;
- int is_ready;
+struct xb_softc {
+ device_t xb_dev;
+ struct disk *xb_disk; /* disk params */
+ struct bio_queue_head xb_bioq; /* sort queue */
+ int xb_unit;
+ int xb_flags;
+#define XB_OPEN (1<<0) /* drive is open (can't shut down) */
+#define XB_BARRIER (1 << 1) /* backend supports barriers */
+#define XB_READY (1 << 2) /* Is ready */
+#define XB_FROZEN (1 << 3) /* Waiting for resources */
+ int vdevice;
+ blkif_vdev_t handle;
+ int connected;
+ int ring_ref;
+ blkif_front_ring_t ring;
+ unsigned int irq;
+ struct xlbd_major_info *mi;
+ struct gnttab_free_callback callback;
+ TAILQ_HEAD(,xb_command) cm_free;
+ TAILQ_HEAD(,xb_command) cm_ready;
+ TAILQ_HEAD(,xb_command) cm_busy;
+ TAILQ_HEAD(,xb_command) cm_complete;
+ struct xb_qstat xb_qstat[XBQ_COUNT];
+ bus_dma_tag_t xb_io_dmat;
+
/**
* The number of people holding this device open. We won't allow a
* hot-unplug unless this is 0.
*/
- int users;
+ int users;
+ struct mtx xb_io_lock;
+ struct xb_command shadow[BLK_RING_SIZE];
};
-/* Note that xlvbd_add doesn't call add_disk for you: you're expected
- to call add_disk on info->gd once the disk is properly connected
- up. */
-int xlvbd_add(device_t, blkif_sector_t capacity, int device,
- uint16_t vdisk_info, uint16_t sector_size, struct blkfront_info *info);
-void xlvbd_del(struct blkfront_info *info);
+
+int xlvbd_add(struct xb_softc *, blkif_sector_t capacity, int device,
+ uint16_t vdisk_info, uint16_t sector_size);
+void xlvbd_del(struct xb_softc *);
+
+#define XBQ_ADD(sc, qname) \
+ do { \
+ struct xb_qstat *qs; \
+ \
+ qs = &(sc)->xb_qstat[qname]; \
+ qs->q_length++; \
+ if (qs->q_length > qs->q_max) \
+ qs->q_max = qs->q_length; \
+ } while (0)
+
+#define XBQ_REMOVE(sc, qname) (sc)->xb_qstat[qname].q_length--
+
+#define XBQ_INIT(sc, qname) \
+ do { \
+ sc->xb_qstat[qname].q_length = 0; \
+ sc->xb_qstat[qname].q_max = 0; \
+ } while (0)
+
+#define XBQ_COMMAND_QUEUE(name, index) \
+ static __inline void \
+ xb_initq_ ## name (struct xb_softc *sc) \
+ { \
+ TAILQ_INIT(&sc->cm_ ## name); \
+ XBQ_INIT(sc, index); \
+ } \
+ static __inline void \
+ xb_enqueue_ ## name (struct xb_command *cm) \
+ { \
+ if ((cm->cm_flags & XB_ON_XBQ_MASK) != 0) { \
+ printf("command %p is on another queue, " \
+ "flags = %#x\n", cm, cm->cm_flags); \
+ panic("command is on another queue"); \
+ } \
+ TAILQ_INSERT_TAIL(&cm->cm_sc->cm_ ## name, cm, cm_link); \
+ cm->cm_flags |= XB_ON_ ## index; \
+ XBQ_ADD(cm->cm_sc, index); \
+ } \
+ static __inline void \
+ xb_requeue_ ## name (struct xb_command *cm) \
+ { \
+ if ((cm->cm_flags & XB_ON_XBQ_MASK) != 0) { \
+ printf("command %p is on another queue, " \
+ "flags = %#x\n", cm, cm->cm_flags); \
+ panic("command is on another queue"); \
+ } \
+ TAILQ_INSERT_HEAD(&cm->cm_sc->cm_ ## name, cm, cm_link); \
+ cm->cm_flags |= XB_ON_ ## index; \
+ XBQ_ADD(cm->cm_sc, index); \
+ } \
+ static __inline struct xb_command * \
+ xb_dequeue_ ## name (struct xb_softc *sc) \
+ { \
+ struct xb_command *cm; \
+ \
+ if ((cm = TAILQ_FIRST(&sc->cm_ ## name)) != NULL) { \
+ if ((cm->cm_flags & XB_ON_ ## index) == 0) { \
+ printf("command %p not in queue, " \
+ "flags = %#x, bit = %#x\n", cm, \
+ cm->cm_flags, XB_ON_ ## index); \
+ panic("command not in queue"); \
+ } \
+ TAILQ_REMOVE(&sc->cm_ ## name, cm, cm_link); \
+ cm->cm_flags &= ~XB_ON_ ## index; \
+ XBQ_REMOVE(sc, index); \
+ } \
+ return (cm); \
+ } \
+ static __inline void \
+ xb_remove_ ## name (struct xb_command *cm) \
+ { \
+ if ((cm->cm_flags & XB_ON_ ## index) == 0) { \
+ printf("command %p not in queue, flags = %#x, " \
+ "bit = %#x\n", cm, cm->cm_flags, \
+ XB_ON_ ## index); \
+ panic("command not in queue"); \
+ } \
+ TAILQ_REMOVE(&cm->cm_sc->cm_ ## name, cm, cm_link); \
+ cm->cm_flags &= ~XB_ON_ ## index; \
+ XBQ_REMOVE(cm->cm_sc, index); \
+ } \
+struct hack
+
+XBQ_COMMAND_QUEUE(free, XBQ_FREE);
+XBQ_COMMAND_QUEUE(ready, XBQ_READY);
+XBQ_COMMAND_QUEUE(busy, XBQ_BUSY);
+XBQ_COMMAND_QUEUE(complete, XBQ_COMPLETE);
+
+static __inline void
+xb_initq_bio(struct xb_softc *sc)
+{
+ bioq_init(&sc->xb_bioq);
+ XBQ_INIT(sc, XBQ_BIO);
+}
+
+static __inline void
+xb_enqueue_bio(struct xb_softc *sc, struct bio *bp)
+{
+ bioq_insert_tail(&sc->xb_bioq, bp);
+ XBQ_ADD(sc, XBQ_BIO);
+}
+
+static __inline void
+xb_requeue_bio(struct xb_softc *sc, struct bio *bp)
+{
+ bioq_insert_head(&sc->xb_bioq, bp);
+ XBQ_ADD(sc, XBQ_BIO);
+}
+
+static __inline struct bio *
+xb_dequeue_bio(struct xb_softc *sc)
+{
+ struct bio *bp;
+
+ if ((bp = bioq_first(&sc->xb_bioq)) != NULL) {
+ bioq_remove(&sc->xb_bioq, bp);
+ XBQ_REMOVE(sc, XBQ_BIO);
+ }
+ return (bp);
+}
#endif /* __XEN_DRIVERS_BLOCK_H__ */
OpenPOWER on IntegriCloud