summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--share/man/man4/pass.4132
-rw-r--r--sys/cam/ata/ata_da.c25
-rw-r--r--sys/cam/cam_ccb.h3
-rw-r--r--sys/cam/cam_xpt.c11
-rw-r--r--sys/cam/cam_xpt.h4
-rw-r--r--sys/cam/scsi/scsi_da.c29
-rw-r--r--sys/cam/scsi/scsi_pass.c1604
-rw-r--r--sys/cam/scsi/scsi_pass.h8
-rw-r--r--sys/dev/md/md.c307
-rw-r--r--sys/geom/geom_disk.c188
-rw-r--r--sys/geom/geom_io.c9
-rw-r--r--sys/ia64/include/bus.h3
-rw-r--r--sys/kern/subr_bus_dma.c69
-rw-r--r--sys/kern/subr_uio.c54
-rw-r--r--sys/pc98/include/bus.h6
-rw-r--r--sys/sys/bio.h1
-rw-r--r--sys/sys/uio.h5
-rw-r--r--usr.sbin/Makefile1
-rw-r--r--usr.sbin/camdd/Makefile11
-rw-r--r--usr.sbin/camdd/camdd.8283
-rw-r--r--usr.sbin/camdd/camdd.c3428
21 files changed, 5983 insertions, 198 deletions
diff --git a/share/man/man4/pass.4 b/share/man/man4/pass.4
index 7819ea3..00b9ccd 100644
--- a/share/man/man4/pass.4
+++ b/share/man/man4/pass.4
@@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd October 10, 1998
+.Dd March 17, 2015
.Dt PASS 4
.Os
.Sh NAME
@@ -53,9 +53,13 @@ The
.Nm
driver attaches to every
.Tn SCSI
+and
+.Tn ATA
device found in the system.
Since it attaches to every device, it provides a generic means of accessing
.Tn SCSI
+and
+.Tn ATA
devices, and allows the user to access devices which have no
"standard" peripheral driver associated with them.
.Sh KERNEL CONFIGURATION
@@ -65,10 +69,12 @@ device in the kernel;
.Nm
devices are automatically allocated as
.Tn SCSI
+and
+.Tn ATA
devices are found.
.Sh IOCTLS
-.Bl -tag -width 012345678901234
-.It CAMIOCOMMAND
+.Bl -tag -width 5n
+.It CAMIOCOMMAND union ccb *
This ioctl takes most kinds of CAM CCBs and passes them through to the CAM
transport layer for action.
Note that some CCB types are not allowed
@@ -79,7 +85,7 @@ Some examples of xpt-only CCBs are XPT_SCAN_BUS,
XPT_DEV_MATCH, XPT_RESET_BUS, XPT_SCAN_LUN, XPT_ENG_INQ, and XPT_ENG_EXEC.
These CCB types have various attributes that make it illogical or
impossible to service them through the passthrough interface.
-.It CAMGETPASSTHRU
+.It CAMGETPASSTHRU union ccb *
This ioctl takes an XPT_GDEVLIST CCB, and returns the passthrough device
corresponding to the device in question.
Although this ioctl is available through the
@@ -90,6 +96,109 @@ ioctl.
It is probably more useful to issue this ioctl through the
.Xr xpt 4
device.
+.It CAMIOQUEUE union ccb *
+Queue a CCB to the
+.Xr pass 4
+driver to be executed asynchronously.
+The caller may use
+.Xr select 2 ,
+.Xr poll 2
+or
+.Xr kevent 2
+to receive notification when the CCB has completed.
+.Pp
+This ioctl takes most CAM CCBs, but some CCB types are not allowed through
+the pass device, and must be sent through the
+.Xr xpt 4
+device instead.
+Some examples of xpt-only CCBs are XPT_SCAN_BUS,
+XPT_DEV_MATCH, XPT_RESET_BUS, XPT_SCAN_LUN, XPT_ENG_INQ, and XPT_ENG_EXEC.
+These CCB types have various attributes that make it illogical or
+impossible to service them through the passthrough interface.
+.Pp
+Although the
+.Dv CAMIOQUEUE
+ioctl is not defined to take an argument, it does require a
+pointer to a union ccb.
+It is not defined to take an argument to avoid an extra malloc and copy
+inside the generic
+.Xr ioctl 2
+handler.
+.pp
+The completed CCB will be returned via the
+.Dv CAMIOGET
+ioctl.
+An error will only be returned from the
+.Dv CAMIOQUEUE
+ioctl if there is an error allocating memory for the request or copying
+memory from userland.
+All other errors will be reported as standard CAM CCB status errors.
+Since the CCB is not copied back to the user process from the pass driver
+in the
+.Dv CAMIOQUEUE
+ioctl, the user's passed-in CCB will not be modfied.
+This is the case even with immediate CCBs.
+Instead, the completed CCB must be retrieved via the
+.Dv CAMIOGET
+ioctl and the status examined.
+.Pp
+Multiple CCBs may be queued via the
+.Dv CAMIOQUEUE
+ioctl at any given time, and they may complete in a different order than
+the order that they were submitted.
+The caller must take steps to identify CCBs that are queued and completed.
+The
+.Dv periph_priv
+structure inside struct ccb_hdr is available for userland use with the
+.Dv CAMIOQUEUE
+and
+.Dv CAMIOGET
+ioctls, and will be preserved across calls.
+Also, the periph_links linked list pointers inside struct ccb_hdr are
+available for userland use with the
+.Dv CAMIOQUEUE
+and
+.Dv CAMIOGET
+ioctls and will be preserved across calls.
+.It CAMIOGET union ccb *
+Retrieve completed CAM CCBs queued via the
+.Dv CAMIOQUEUE
+ioctl.
+An error will only be returned from the
+.Dv CAMIOGET
+ioctl if the
+.Xr pass 4
+driver fails to copy data to the user process or if there are no completed
+CCBs available to retrieve.
+If no CCBs are available to retrieve,
+errno will be set to
+.Dv ENOENT .
+.Pp
+All other errors will be reported as standard CAM CCB status errors.
+.Pp
+Although the
+.Dv CAMIOGET
+ioctl is not defined to take an argument, it does require a
+pointer to a union ccb.
+It is not defined to take an argument to avoid an extra malloc and copy
+inside the generic
+.Xr ioctl 2
+handler.
+.Pp
+The pass driver will report via
+.Xr select 2 ,
+.Xr poll 2
+or
+.Xr kevent 2
+when a CCB has completed.
+One CCB may be retrieved per
+.Dv CAMIOGET
+call.
+CCBs may be returned in an order different than the order they were
+submitted.
+So the caller should use the
+.Dv periph_priv
+area inside the CCB header to store pointers to identifying information.
.El
.Sh FILES
.Bl -tag -width /dev/passn -compact
@@ -103,18 +212,21 @@ CAM subsystem.
.Sh DIAGNOSTICS
None.
.Sh SEE ALSO
+.Xr kqueue 2 ,
+.Xr poll 2 ,
+.Xr select 2 ,
.Xr cam 3 ,
.Xr cam 4 ,
.Xr cam_cdbparse 3 ,
+.Xr cd 4 ,
+.Xr ctl 4 ,
+.Xr da 4 ,
+.Xr sa 4 ,
.Xr xpt 4 ,
-.Xr camcontrol 8
+.Xr camcontrol 8 ,
+.Xr camdd 8
.Sh HISTORY
The CAM passthrough driver first appeared in
.Fx 3.0 .
.Sh AUTHORS
.An Kenneth Merry Aq ken@FreeBSD.org
-.Sh BUGS
-It might be nice to have a way to asynchronously send CCBs through the
-passthrough driver.
-This would probably require some sort of read/write
-interface or an asynchronous ioctl interface.
diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c
index f88899e..005c684 100644
--- a/sys/cam/ata/ata_da.c
+++ b/sys/cam/ata/ata_da.c
@@ -1573,12 +1573,26 @@ adastart(struct cam_periph *periph, union ccb *start_ccb)
}
switch (bp->bio_cmd) {
case BIO_WRITE:
- softc->flags |= ADA_FLAG_DIRTY;
- /* FALLTHROUGH */
case BIO_READ:
{
uint64_t lba = bp->bio_pblkno;
uint16_t count = bp->bio_bcount / softc->params.secsize;
+ void *data_ptr;
+ int rw_op;
+
+ if (bp->bio_cmd == BIO_WRITE) {
+ softc->flags |= ADA_FLAG_DIRTY;
+ rw_op = CAM_DIR_OUT;
+ } else {
+ rw_op = CAM_DIR_IN;
+ }
+
+ data_ptr = bp->bio_data;
+ if ((bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0) {
+ rw_op |= CAM_DATA_BIO;
+ data_ptr = bp;
+ }
+
#ifdef ADA_TEST_FAILURE
int fail = 0;
@@ -1623,12 +1637,9 @@ adastart(struct cam_periph *periph, union ccb *start_ccb)
cam_fill_ataio(ataio,
ada_retry_count,
adadone,
- (bp->bio_cmd == BIO_READ ? CAM_DIR_IN :
- CAM_DIR_OUT) | ((bp->bio_flags & BIO_UNMAPPED)
- != 0 ? CAM_DATA_BIO : 0),
+ rw_op,
tag_code,
- ((bp->bio_flags & BIO_UNMAPPED) != 0) ? (void *)bp :
- bp->bio_data,
+ data_ptr,
bp->bio_bcount,
ada_default_timeout*1000);
diff --git a/sys/cam/cam_ccb.h b/sys/cam/cam_ccb.h
index 98bb9ea..12d3803 100644
--- a/sys/cam/cam_ccb.h
+++ b/sys/cam/cam_ccb.h
@@ -111,6 +111,9 @@ typedef enum {
typedef enum {
CAM_EXTLUN_VALID = 0x00000001,/* 64bit lun field is valid */
+ CAM_USER_DATA_ADDR = 0x00000002,/* Userspace data pointers */
+ CAM_SG_FORMAT_IOVEC = 0x00000004,/* iovec instead of busdma S/G*/
+ CAM_UNMAPPED_BUF = 0x00000008 /* use unmapped I/O */
} ccb_xflags;
/* XPT Opcodes for xpt_action */
diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c
index ba0863a..6773829 100644
--- a/sys/cam/cam_xpt.c
+++ b/sys/cam/cam_xpt.c
@@ -3337,7 +3337,8 @@ xpt_merge_ccb(union ccb *master_ccb, union ccb *slave_ccb)
}
void
-xpt_setup_ccb(struct ccb_hdr *ccb_h, struct cam_path *path, u_int32_t priority)
+xpt_setup_ccb_flags(struct ccb_hdr *ccb_h, struct cam_path *path,
+ u_int32_t priority, u_int32_t flags)
{
CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_setup_ccb\n"));
@@ -3355,10 +3356,16 @@ xpt_setup_ccb(struct ccb_hdr *ccb_h, struct cam_path *path, u_int32_t priority)
ccb_h->target_lun = CAM_TARGET_WILDCARD;
}
ccb_h->pinfo.index = CAM_UNQUEUED_INDEX;
- ccb_h->flags = 0;
+ ccb_h->flags = flags;
ccb_h->xflags = 0;
}
+void
+xpt_setup_ccb(struct ccb_hdr *ccb_h, struct cam_path *path, u_int32_t priority)
+{
+ xpt_setup_ccb_flags(ccb_h, path, priority, /*flags*/ 0);
+}
+
/* Path manipulation functions */
cam_status
xpt_create_path(struct cam_path **new_path_ptr, struct cam_periph *perph,
diff --git a/sys/cam/cam_xpt.h b/sys/cam/cam_xpt.h
index 1d983c9..ca7dccc 100644
--- a/sys/cam/cam_xpt.h
+++ b/sys/cam/cam_xpt.h
@@ -70,6 +70,10 @@ void xpt_action_default(union ccb *new_ccb);
union ccb *xpt_alloc_ccb(void);
union ccb *xpt_alloc_ccb_nowait(void);
void xpt_free_ccb(union ccb *free_ccb);
+void xpt_setup_ccb_flags(struct ccb_hdr *ccb_h,
+ struct cam_path *path,
+ u_int32_t priority,
+ u_int32_t flags);
void xpt_setup_ccb(struct ccb_hdr *ccb_h,
struct cam_path *path,
u_int32_t priority);
diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
index 4e3fe76..1cd687a 100644
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@@ -2332,29 +2332,40 @@ skipstate:
switch (bp->bio_cmd) {
case BIO_WRITE:
- softc->flags |= DA_FLAG_DIRTY;
- /* FALLTHROUGH */
case BIO_READ:
+ {
+ void *data_ptr;
+ int rw_op;
+
+ if (bp->bio_cmd == BIO_WRITE) {
+ softc->flags |= DA_FLAG_DIRTY;
+ rw_op = SCSI_RW_WRITE;
+ } else {
+ rw_op = SCSI_RW_READ;
+ }
+
+ data_ptr = bp->bio_data;
+ if ((bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0) {
+ rw_op |= SCSI_RW_BIO;
+ data_ptr = bp;
+ }
+
scsi_read_write(&start_ccb->csio,
/*retries*/da_retry_count,
/*cbfcnp*/dadone,
/*tag_action*/tag_code,
- /*read_op*/(bp->bio_cmd == BIO_READ ?
- SCSI_RW_READ : SCSI_RW_WRITE) |
- ((bp->bio_flags & BIO_UNMAPPED) != 0 ?
- SCSI_RW_BIO : 0),
+ rw_op,
/*byte2*/0,
softc->minimum_cmd_size,
/*lba*/bp->bio_pblkno,
/*block_count*/bp->bio_bcount /
softc->params.secsize,
- /*data_ptr*/ (bp->bio_flags &
- BIO_UNMAPPED) != 0 ? (void *)bp :
- bp->bio_data,
+ data_ptr,
/*dxfer_len*/ bp->bio_bcount,
/*sense_len*/SSD_FULL_SIZE,
da_default_timeout * 1000);
break;
+ }
case BIO_FLUSH:
/*
* BIO_FLUSH doesn't currently communicate
diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c
index 174151e..09cda5b 100644
--- a/sys/cam/scsi/scsi_pass.c
+++ b/sys/cam/scsi/scsi_pass.c
@@ -28,27 +28,39 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_kdtrace.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/conf.h>
#include <sys/types.h>
#include <sys/bio.h>
-#include <sys/malloc.h>
-#include <sys/fcntl.h>
-#include <sys/conf.h>
-#include <sys/errno.h>
+#include <sys/bus.h>
#include <sys/devicestat.h>
+#include <sys/errno.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
#include <sys/proc.h>
+#include <sys/poll.h>
+#include <sys/selinfo.h>
+#include <sys/sdt.h>
#include <sys/taskqueue.h>
+#include <vm/uma.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <machine/bus.h>
#include <cam/cam.h>
#include <cam/cam_ccb.h>
#include <cam/cam_periph.h>
#include <cam/cam_queue.h>
+#include <cam/cam_xpt.h>
#include <cam/cam_xpt_periph.h>
#include <cam/cam_debug.h>
-#include <cam/cam_sim.h>
#include <cam/cam_compat.h>
+#include <cam/cam_xpt_periph.h>
#include <cam/scsi/scsi_all.h>
#include <cam/scsi/scsi_pass.h>
@@ -57,7 +69,11 @@ typedef enum {
PASS_FLAG_OPEN = 0x01,
PASS_FLAG_LOCKED = 0x02,
PASS_FLAG_INVALID = 0x04,
- PASS_FLAG_INITIAL_PHYSPATH = 0x08
+ PASS_FLAG_INITIAL_PHYSPATH = 0x08,
+ PASS_FLAG_ZONE_INPROG = 0x10,
+ PASS_FLAG_ZONE_VALID = 0x20,
+ PASS_FLAG_UNMAPPED_CAPABLE = 0x40,
+ PASS_FLAG_ABANDONED_REF_SET = 0x80
} pass_flags;
typedef enum {
@@ -65,38 +81,104 @@ typedef enum {
} pass_state;
typedef enum {
- PASS_CCB_BUFFER_IO
+ PASS_CCB_BUFFER_IO,
+ PASS_CCB_QUEUED_IO
} pass_ccb_types;
#define ccb_type ppriv_field0
-#define ccb_bp ppriv_ptr1
+#define ccb_ioreq ppriv_ptr1
-struct pass_softc {
- pass_state state;
- pass_flags flags;
- u_int8_t pd_type;
- union ccb saved_ccb;
- int open_count;
- u_int maxio;
- struct devstat *device_stats;
- struct cdev *dev;
- struct cdev *alias_dev;
- struct task add_physpath_task;
+/*
+ * The maximum number of memory segments we preallocate.
+ */
+#define PASS_MAX_SEGS 16
+
+typedef enum {
+ PASS_IO_NONE = 0x00,
+ PASS_IO_USER_SEG_MALLOC = 0x01,
+ PASS_IO_KERN_SEG_MALLOC = 0x02,
+ PASS_IO_ABANDONED = 0x04
+} pass_io_flags;
+
+struct pass_io_req {
+ union ccb ccb;
+ union ccb *alloced_ccb;
+ union ccb *user_ccb_ptr;
+ camq_entry user_periph_links;
+ ccb_ppriv_area user_periph_priv;
+ struct cam_periph_map_info mapinfo;
+ pass_io_flags flags;
+ ccb_flags data_flags;
+ int num_user_segs;
+ bus_dma_segment_t user_segs[PASS_MAX_SEGS];
+ int num_kern_segs;
+ bus_dma_segment_t kern_segs[PASS_MAX_SEGS];
+ bus_dma_segment_t *user_segptr;
+ bus_dma_segment_t *kern_segptr;
+ int num_bufs;
+ uint32_t dirs[CAM_PERIPH_MAXMAPS];
+ uint32_t lengths[CAM_PERIPH_MAXMAPS];
+ uint8_t *user_bufs[CAM_PERIPH_MAXMAPS];
+ uint8_t *kern_bufs[CAM_PERIPH_MAXMAPS];
+ struct bintime start_time;
+ TAILQ_ENTRY(pass_io_req) links;
};
+struct pass_softc {
+ pass_state state;
+ pass_flags flags;
+ u_int8_t pd_type;
+ union ccb saved_ccb;
+ int open_count;
+ u_int maxio;
+ struct devstat *device_stats;
+ struct cdev *dev;
+ struct cdev *alias_dev;
+ struct task add_physpath_task;
+ struct task shutdown_kqueue_task;
+ struct selinfo read_select;
+ TAILQ_HEAD(, pass_io_req) incoming_queue;
+ TAILQ_HEAD(, pass_io_req) active_queue;
+ TAILQ_HEAD(, pass_io_req) abandoned_queue;
+ TAILQ_HEAD(, pass_io_req) done_queue;
+ struct cam_periph *periph;
+ char zone_name[12];
+ char io_zone_name[12];
+ uma_zone_t pass_zone;
+ uma_zone_t pass_io_zone;
+ size_t io_zone_size;
+};
static d_open_t passopen;
static d_close_t passclose;
static d_ioctl_t passioctl;
static d_ioctl_t passdoioctl;
+static d_poll_t passpoll;
+static d_kqfilter_t passkqfilter;
+static void passreadfiltdetach(struct knote *kn);
+static int passreadfilt(struct knote *kn, long hint);
static periph_init_t passinit;
static periph_ctor_t passregister;
static periph_oninv_t passoninvalidate;
static periph_dtor_t passcleanup;
-static void pass_add_physpath(void *context, int pending);
+static periph_start_t passstart;
+static void pass_shutdown_kqueue(void *context, int pending);
+static void pass_add_physpath(void *context, int pending);
static void passasync(void *callback_arg, u_int32_t code,
struct cam_path *path, void *arg);
+static void passdone(struct cam_periph *periph,
+ union ccb *done_ccb);
+static int passcreatezone(struct cam_periph *periph);
+static void passiocleanup(struct pass_softc *softc,
+ struct pass_io_req *io_req);
+static int passcopysglist(struct cam_periph *periph,
+ struct pass_io_req *io_req,
+ ccb_flags direction);
+static int passmemsetup(struct cam_periph *periph,
+ struct pass_io_req *io_req);
+static int passmemdone(struct cam_periph *periph,
+ struct pass_io_req *io_req);
static int passerror(union ccb *ccb, u_int32_t cam_flags,
u_int32_t sense_flags);
static int passsendccb(struct cam_periph *periph, union ccb *ccb,
@@ -116,9 +198,19 @@ static struct cdevsw pass_cdevsw = {
.d_open = passopen,
.d_close = passclose,
.d_ioctl = passioctl,
+ .d_poll = passpoll,
+ .d_kqfilter = passkqfilter,
.d_name = "pass",
};
+static struct filterops passread_filtops = {
+ .f_isfd = 1,
+ .f_detach = passreadfiltdetach,
+ .f_event = passreadfilt
+};
+
+static MALLOC_DEFINE(M_SCSIPASS, "scsi_pass", "scsi passthrough buffers");
+
static void
passinit(void)
{
@@ -138,6 +230,60 @@ passinit(void)
}
static void
+passrejectios(struct cam_periph *periph)
+{
+ struct pass_io_req *io_req, *io_req2;
+ struct pass_softc *softc;
+
+ softc = (struct pass_softc *)periph->softc;
+
+ /*
+ * The user can no longer get status for I/O on the done queue, so
+ * clean up all outstanding I/O on the done queue.
+ */
+ TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) {
+ TAILQ_REMOVE(&softc->done_queue, io_req, links);
+ passiocleanup(softc, io_req);
+ uma_zfree(softc->pass_zone, io_req);
+ }
+
+ /*
+ * The underlying device is gone, so we can't issue these I/Os.
+ * The devfs node has been shut down, so we can't return status to
+ * the user. Free any I/O left on the incoming queue.
+ */
+ TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links, io_req2) {
+ TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
+ passiocleanup(softc, io_req);
+ uma_zfree(softc->pass_zone, io_req);
+ }
+
+ /*
+ * Normally we would put I/Os on the abandoned queue and acquire a
+ * reference when we saw the final close. But, the device went
+ * away and devfs may have moved everything off to deadfs by the
+ * time the I/O done callback is called; as a result, we won't see
+ * any more closes. So, if we have any active I/Os, we need to put
+ * them on the abandoned queue. When the abandoned queue is empty,
+ * we'll release the remaining reference (see below) to the peripheral.
+ */
+ TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links, io_req2) {
+ TAILQ_REMOVE(&softc->active_queue, io_req, links);
+ io_req->flags |= PASS_IO_ABANDONED;
+ TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req, links);
+ }
+
+ /*
+ * If we put any I/O on the abandoned queue, acquire a reference.
+ */
+ if ((!TAILQ_EMPTY(&softc->abandoned_queue))
+ && ((softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0)) {
+ cam_periph_doacquire(periph);
+ softc->flags |= PASS_FLAG_ABANDONED_REF_SET;
+ }
+}
+
+static void
passdevgonecb(void *arg)
{
struct cam_periph *periph;
@@ -165,17 +311,26 @@ passdevgonecb(void *arg)
/*
* Release the reference held for the device node, it is gone now.
+ * Accordingly, inform all queued I/Os of their fate.
*/
cam_periph_release_locked(periph);
+ passrejectios(periph);
/*
- * We reference the lock directly here, instead of using
+ * We reference the SIM lock directly here, instead of using
* cam_periph_unlock(). The reason is that the final call to
* cam_periph_release_locked() above could result in the periph
* getting freed. If that is the case, dereferencing the periph
* with a cam_periph_unlock() call would cause a page fault.
*/
mtx_unlock(mtx);
+
+ /*
+ * We have to remove our kqueue context from a thread because it
+ * may sleep. It would be nice if we could get a callback from
+ * kqueue when it is done cleaning up resources.
+ */
+ taskqueue_enqueue(taskqueue_thread, &softc->shutdown_kqueue_task);
}
static void
@@ -197,12 +352,6 @@ passoninvalidate(struct cam_periph *periph)
* when it has cleaned up its state.
*/
destroy_dev_sched_cb(softc->dev, passdevgonecb, periph);
-
- /*
- * XXX Return all queued I/O with ENXIO.
- * XXX Handle any transactions queued to the card
- * with XPT_ABORT_CCB.
- */
}
static void
@@ -212,9 +361,40 @@ passcleanup(struct cam_periph *periph)
softc = (struct pass_softc *)periph->softc;
+ cam_periph_assert(periph, MA_OWNED);
+ KASSERT(TAILQ_EMPTY(&softc->active_queue),
+ ("%s called when there are commands on the active queue!\n",
+ __func__));
+ KASSERT(TAILQ_EMPTY(&softc->abandoned_queue),
+ ("%s called when there are commands on the abandoned queue!\n",
+ __func__));
+ KASSERT(TAILQ_EMPTY(&softc->incoming_queue),
+ ("%s called when there are commands on the incoming queue!\n",
+ __func__));
+ KASSERT(TAILQ_EMPTY(&softc->done_queue),
+ ("%s called when there are commands on the done queue!\n",
+ __func__));
+
devstat_remove_entry(softc->device_stats);
cam_periph_unlock(periph);
+
+ /*
+ * We call taskqueue_drain() for the physpath task to make sure it
+ * is complete. We drop the lock because this can potentially
+ * sleep. XXX KDM that is bad. Need a way to get a callback when
+ * a taskqueue is drained.
+ *
+ * Note that we don't drain the kqueue shutdown task queue. This
+ * is because we hold a reference on the periph for kqueue, and
+ * release that reference from the kqueue shutdown task queue. So
+ * we cannot come into this routine unless we've released that
+ * reference. Also, because that could be the last reference, we
+ * could be called from the cam_periph_release() call in
+ * pass_shutdown_kqueue(). In that case, the taskqueue_drain()
+ * would deadlock. It would be preferable if we had a way to
+ * get a callback when a taskqueue is done.
+ */
taskqueue_drain(taskqueue_thread, &softc->add_physpath_task);
cam_periph_lock(periph);
@@ -223,10 +403,29 @@ passcleanup(struct cam_periph *periph)
}
static void
+pass_shutdown_kqueue(void *context, int pending)
+{
+ struct cam_periph *periph;
+ struct pass_softc *softc;
+
+ periph = context;
+ softc = periph->softc;
+
+ knlist_clear(&softc->read_select.si_note, /*is_locked*/ 0);
+ knlist_destroy(&softc->read_select.si_note);
+
+ /*
+ * Release the reference we held for kqueue.
+ */
+ cam_periph_release(periph);
+}
+
+static void
pass_add_physpath(void *context, int pending)
{
struct cam_periph *periph;
struct pass_softc *softc;
+ struct mtx *mtx;
char *physpath;
/*
@@ -236,34 +435,38 @@ pass_add_physpath(void *context, int pending)
periph = context;
softc = periph->softc;
physpath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK);
- cam_periph_lock(periph);
- if (periph->flags & CAM_PERIPH_INVALID) {
- cam_periph_unlock(periph);
+ mtx = cam_periph_mtx(periph);
+ mtx_lock(mtx);
+
+ if (periph->flags & CAM_PERIPH_INVALID)
goto out;
- }
+
if (xpt_getattr(physpath, MAXPATHLEN,
"GEOM::physpath", periph->path) == 0
&& strlen(physpath) != 0) {
- cam_periph_unlock(periph);
+ mtx_unlock(mtx);
make_dev_physpath_alias(MAKEDEV_WAITOK, &softc->alias_dev,
softc->dev, softc->alias_dev, physpath);
- cam_periph_lock(periph);
+ mtx_lock(mtx);
}
+out:
/*
* Now that we've made our alias, we no longer have to have a
* reference to the device.
*/
- if ((softc->flags & PASS_FLAG_INITIAL_PHYSPATH) == 0) {
+ if ((softc->flags & PASS_FLAG_INITIAL_PHYSPATH) == 0)
softc->flags |= PASS_FLAG_INITIAL_PHYSPATH;
- cam_periph_unlock(periph);
- dev_rel(softc->dev);
- }
- else
- cam_periph_unlock(periph);
-out:
+ /*
+ * We always acquire a reference to the periph before queueing this
+ * task queue function, so it won't go away before we run.
+ */
+ while (pending-- > 0)
+ cam_periph_release_locked(periph);
+ mtx_unlock(mtx);
+
free(physpath, M_DEVBUF);
}
@@ -291,7 +494,7 @@ passasync(void *callback_arg, u_int32_t code,
* process.
*/
status = cam_periph_alloc(passregister, passoninvalidate,
- passcleanup, NULL, "pass",
+ passcleanup, passstart, "pass",
CAM_PERIPH_BIO, path,
passasync, AC_FOUND_DEVICE, cgd);
@@ -315,8 +518,19 @@ passasync(void *callback_arg, u_int32_t code,
buftype = (uintptr_t)arg;
if (buftype == CDAI_TYPE_PHYS_PATH) {
struct pass_softc *softc;
+ cam_status status;
softc = (struct pass_softc *)periph->softc;
+ /*
+ * Acquire a reference to the periph before we
+ * start the taskqueue, so that we don't run into
+ * a situation where the periph goes away before
+ * the task queue has a chance to run.
+ */
+ status = cam_periph_acquire(periph);
+ if (status != CAM_REQ_CMP)
+ break;
+
taskqueue_enqueue(taskqueue_thread,
&softc->add_physpath_task);
}
@@ -361,6 +575,17 @@ passregister(struct cam_periph *periph, void *arg)
softc->pd_type = T_DIRECT;
periph->softc = softc;
+ softc->periph = periph;
+ TAILQ_INIT(&softc->incoming_queue);
+ TAILQ_INIT(&softc->active_queue);
+ TAILQ_INIT(&softc->abandoned_queue);
+ TAILQ_INIT(&softc->done_queue);
+ snprintf(softc->zone_name, sizeof(softc->zone_name), "%s%d",
+ periph->periph_name, periph->unit_number);
+ snprintf(softc->io_zone_name, sizeof(softc->io_zone_name), "%s%dIO",
+ periph->periph_name, periph->unit_number);
+ softc->io_zone_size = MAXPHYS;
+ knlist_init_mtx(&softc->read_select.si_note, cam_periph_mtx(periph));
bzero(&cpi, sizeof(cpi));
xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
@@ -374,6 +599,9 @@ passregister(struct cam_periph *periph, void *arg)
else
softc->maxio = cpi.maxio; /* real value */
+ if (cpi.hba_misc & PIM_UNMAPPED)
+ softc->flags |= PASS_FLAG_UNMAPPED_CAPABLE;
+
/*
* We pass in 0 for a blocksize, since we don't
* know what the blocksize of this device is, if
@@ -391,6 +619,23 @@ passregister(struct cam_periph *periph, void *arg)
DEVSTAT_PRIORITY_PASS);
/*
+ * Initialize the taskqueue handler for shutting down kqueue.
+ */
+ TASK_INIT(&softc->shutdown_kqueue_task, /*priority*/ 0,
+ pass_shutdown_kqueue, periph);
+
+ /*
+ * Acquire a reference to the periph that we can release once we've
+ * cleaned up the kqueue.
+ */
+ if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
+ xpt_print(periph->path, "%s: lost periph during "
+ "registration!\n", __func__);
+ cam_periph_lock(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
+
+ /*
* Acquire a reference to the periph before we create the devfs
* instance for it. We'll release this reference once the devfs
* instance has been freed.
@@ -408,12 +653,15 @@ passregister(struct cam_periph *periph, void *arg)
periph->periph_name, periph->unit_number);
/*
- * Now that we have made the devfs instance, hold a reference to it
- * until the task queue has run to setup the physical path alias.
- * That way devfs won't get rid of the device before we add our
- * alias.
+ * Hold a reference to the periph before we create the physical
+ * path alias so it can't go away.
*/
- dev_ref(softc->dev);
+ if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
+ xpt_print(periph->path, "%s: lost periph during "
+ "registration!\n", __func__);
+ cam_periph_lock(periph);
+ return (CAM_REQ_CMP_ERR);
+ }
cam_periph_lock(periph);
softc->dev->si_drv1 = periph;
@@ -514,6 +762,55 @@ passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
softc = periph->softc;
softc->open_count--;
+ if (softc->open_count == 0) {
+ struct pass_io_req *io_req, *io_req2;
+ int need_unlock;
+
+ need_unlock = 0;
+
+ TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) {
+ TAILQ_REMOVE(&softc->done_queue, io_req, links);
+ passiocleanup(softc, io_req);
+ uma_zfree(softc->pass_zone, io_req);
+ }
+
+ TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links,
+ io_req2) {
+ TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
+ passiocleanup(softc, io_req);
+ uma_zfree(softc->pass_zone, io_req);
+ }
+
+ /*
+ * If there are any active I/Os, we need to forcibly acquire a
+ * reference to the peripheral so that we don't go away
+ * before they complete. We'll release the reference when
+ * the abandoned queue is empty.
+ */
+ io_req = TAILQ_FIRST(&softc->active_queue);
+ if ((io_req != NULL)
+ && (softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0) {
+ cam_periph_doacquire(periph);
+ softc->flags |= PASS_FLAG_ABANDONED_REF_SET;
+ }
+
+ /*
+ * Since the I/O in the active queue is not under our
+ * control, just set a flag so that we can clean it up when
+ * it completes and put it on the abandoned queue. This
+ * will prevent our sending spurious completions in the
+ * event that the device is opened again before these I/Os
+ * complete.
+ */
+ TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links,
+ io_req2) {
+ TAILQ_REMOVE(&softc->active_queue, io_req, links);
+ io_req->flags |= PASS_IO_ABANDONED;
+ TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req,
+ links);
+ }
+ }
+
cam_periph_release_locked(periph);
/*
@@ -533,6 +830,915 @@ passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
return (0);
}
+
+static void
+passstart(struct cam_periph *periph, union ccb *start_ccb)
+{
+ struct pass_softc *softc;
+
+ softc = (struct pass_softc *)periph->softc;
+
+ switch (softc->state) {
+ case PASS_STATE_NORMAL: {
+ struct pass_io_req *io_req;
+
+ /*
+ * Check for any queued I/O requests that require an
+ * allocated slot.
+ */
+ io_req = TAILQ_FIRST(&softc->incoming_queue);
+ if (io_req == NULL) {
+ xpt_release_ccb(start_ccb);
+ break;
+ }
+ TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
+ TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links);
+ /*
+ * Merge the user's CCB into the allocated CCB.
+ */
+ xpt_merge_ccb(start_ccb, &io_req->ccb);
+ start_ccb->ccb_h.ccb_type = PASS_CCB_QUEUED_IO;
+ start_ccb->ccb_h.ccb_ioreq = io_req;
+ start_ccb->ccb_h.cbfcnp = passdone;
+ io_req->alloced_ccb = start_ccb;
+ binuptime(&io_req->start_time);
+ devstat_start_transaction(softc->device_stats,
+ &io_req->start_time);
+
+ xpt_action(start_ccb);
+
+ /*
+ * If we have any more I/O waiting, schedule ourselves again.
+ */
+ if (!TAILQ_EMPTY(&softc->incoming_queue))
+ xpt_schedule(periph, CAM_PRIORITY_NORMAL);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static void
+passdone(struct cam_periph *periph, union ccb *done_ccb)
+{
+ struct pass_softc *softc;
+ struct ccb_scsiio *csio;
+
+ softc = (struct pass_softc *)periph->softc;
+
+ cam_periph_assert(periph, MA_OWNED);
+
+ csio = &done_ccb->csio;
+ switch (csio->ccb_h.ccb_type) {
+ case PASS_CCB_QUEUED_IO: {
+ struct pass_io_req *io_req;
+
+ io_req = done_ccb->ccb_h.ccb_ioreq;
+#if 0
+ xpt_print(periph->path, "%s: called for user CCB %p\n",
+ __func__, io_req->user_ccb_ptr);
+#endif
+ if (((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
+ && (done_ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER)
+ && ((io_req->flags & PASS_IO_ABANDONED) == 0)) {
+ int error;
+
+ error = passerror(done_ccb, CAM_RETRY_SELTO,
+ SF_RETRY_UA | SF_NO_PRINT);
+
+ if (error == ERESTART) {
+ /*
+ * A retry was scheduled, so
+ * just return.
+ */
+ return;
+ }
+ }
+
+ /*
+ * Copy the allocated CCB contents back to the malloced CCB
+ * so we can give status back to the user when he requests it.
+ */
+ bcopy(done_ccb, &io_req->ccb, sizeof(*done_ccb));
+
+ /*
+ * Log data/transaction completion with devstat(9).
+ */
+ switch (done_ccb->ccb_h.func_code) {
+ case XPT_SCSI_IO:
+ devstat_end_transaction(softc->device_stats,
+ done_ccb->csio.dxfer_len - done_ccb->csio.resid,
+ done_ccb->csio.tag_action & 0x3,
+ ((done_ccb->ccb_h.flags & CAM_DIR_MASK) ==
+ CAM_DIR_NONE) ? DEVSTAT_NO_DATA :
+ (done_ccb->ccb_h.flags & CAM_DIR_OUT) ?
+ DEVSTAT_WRITE : DEVSTAT_READ, NULL,
+ &io_req->start_time);
+ break;
+ case XPT_ATA_IO:
+ devstat_end_transaction(softc->device_stats,
+ done_ccb->ataio.dxfer_len - done_ccb->ataio.resid,
+ done_ccb->ataio.tag_action & 0x3,
+ ((done_ccb->ccb_h.flags & CAM_DIR_MASK) ==
+ CAM_DIR_NONE) ? DEVSTAT_NO_DATA :
+ (done_ccb->ccb_h.flags & CAM_DIR_OUT) ?
+ DEVSTAT_WRITE : DEVSTAT_READ, NULL,
+ &io_req->start_time);
+ break;
+ case XPT_SMP_IO:
+ /*
+ * XXX KDM this isn't quite right, but there isn't
+ * currently an easy way to represent a bidirectional
+ * transfer in devstat. The only way to do it
+ * and have the byte counts come out right would
+ * mean that we would have to record two
+ * transactions, one for the request and one for the
+ * response. For now, so that we report something,
+ * just treat the entire thing as a read.
+ */
+ devstat_end_transaction(softc->device_stats,
+ done_ccb->smpio.smp_request_len +
+ done_ccb->smpio.smp_response_len,
+ DEVSTAT_TAG_SIMPLE, DEVSTAT_READ, NULL,
+ &io_req->start_time);
+ break;
+ default:
+ devstat_end_transaction(softc->device_stats, 0,
+ DEVSTAT_TAG_NONE, DEVSTAT_NO_DATA, NULL,
+ &io_req->start_time);
+ break;
+ }
+
+ /*
+ * In the normal case, take the completed I/O off of the
+ * active queue and put it on the done queue. Notitfy the
+ * user that we have a completed I/O.
+ */
+ if ((io_req->flags & PASS_IO_ABANDONED) == 0) {
+ TAILQ_REMOVE(&softc->active_queue, io_req, links);
+ TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links);
+ selwakeuppri(&softc->read_select, PRIBIO);
+ KNOTE_LOCKED(&softc->read_select.si_note, 0);
+ } else {
+ /*
+ * In the case of an abandoned I/O (final close
+ * without fetching the I/O), take it off of the
+ * abandoned queue and free it.
+ */
+ TAILQ_REMOVE(&softc->abandoned_queue, io_req, links);
+ passiocleanup(softc, io_req);
+ uma_zfree(softc->pass_zone, io_req);
+
+ /*
+ * Release the done_ccb here, since we may wind up
+ * freeing the peripheral when we decrement the
+ * reference count below.
+ */
+ xpt_release_ccb(done_ccb);
+
+ /*
+ * If the abandoned queue is empty, we can release
+ * our reference to the periph since we won't have
+ * any more completions coming.
+ */
+ if ((TAILQ_EMPTY(&softc->abandoned_queue))
+ && (softc->flags & PASS_FLAG_ABANDONED_REF_SET)) {
+ softc->flags &= ~PASS_FLAG_ABANDONED_REF_SET;
+ cam_periph_release_locked(periph);
+ }
+
+ /*
+ * We have already released the CCB, so we can
+ * return.
+ */
+ return;
+ }
+ break;
+ }
+ }
+ xpt_release_ccb(done_ccb);
+}
+
+static int
+passcreatezone(struct cam_periph *periph)
+{
+ struct pass_softc *softc;
+ int error;
+
+ error = 0;
+ softc = (struct pass_softc *)periph->softc;
+
+ cam_periph_assert(periph, MA_OWNED);
+ KASSERT(((softc->flags & PASS_FLAG_ZONE_VALID) == 0),
+ ("%s called when the pass(4) zone is valid!\n", __func__));
+ KASSERT((softc->pass_zone == NULL),
+ ("%s called when the pass(4) zone is allocated!\n", __func__));
+
+ if ((softc->flags & PASS_FLAG_ZONE_INPROG) == 0) {
+
+ /*
+ * We're the first context through, so we need to create
+ * the pass(4) UMA zone for I/O requests.
+ */
+ softc->flags |= PASS_FLAG_ZONE_INPROG;
+
+ /*
+ * uma_zcreate() does a blocking (M_WAITOK) allocation,
+ * so we cannot hold a mutex while we call it.
+ */
+ cam_periph_unlock(periph);
+
+ softc->pass_zone = uma_zcreate(softc->zone_name,
+ sizeof(struct pass_io_req), NULL, NULL, NULL, NULL,
+ /*align*/ 0, /*flags*/ 0);
+
+ softc->pass_io_zone = uma_zcreate(softc->io_zone_name,
+ softc->io_zone_size, NULL, NULL, NULL, NULL,
+ /*align*/ 0, /*flags*/ 0);
+
+ cam_periph_lock(periph);
+
+ if ((softc->pass_zone == NULL)
+ || (softc->pass_io_zone == NULL)) {
+ if (softc->pass_zone == NULL)
+ xpt_print(periph->path, "unable to allocate "
+ "IO Req UMA zone\n");
+ else
+ xpt_print(periph->path, "unable to allocate "
+ "IO UMA zone\n");
+ softc->flags &= ~PASS_FLAG_ZONE_INPROG;
+ goto bailout;
+ }
+
+ /*
+ * Set the flags appropriately and notify any other waiters.
+ */
+ softc->flags &= PASS_FLAG_ZONE_INPROG;
+ softc->flags |= PASS_FLAG_ZONE_VALID;
+ wakeup(&softc->pass_zone);
+ } else {
+ /*
+ * In this case, the UMA zone has not yet been created, but
+ * another context is in the process of creating it. We
+ * need to sleep until the creation is either done or has
+ * failed.
+ */
+ while ((softc->flags & PASS_FLAG_ZONE_INPROG)
+ && ((softc->flags & PASS_FLAG_ZONE_VALID) == 0)) {
+ error = msleep(&softc->pass_zone,
+ cam_periph_mtx(periph), PRIBIO,
+ "paszon", 0);
+ if (error != 0)
+ goto bailout;
+ }
+ /*
+ * If the zone creation failed, no luck for the user.
+ */
+ if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0){
+ error = ENOMEM;
+ goto bailout;
+ }
+ }
+bailout:
+ return (error);
+}
+
+static void
+passiocleanup(struct pass_softc *softc, struct pass_io_req *io_req)
+{
+ union ccb *ccb;
+ u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
+ int i, numbufs;
+
+ ccb = &io_req->ccb;
+
+ switch (ccb->ccb_h.func_code) {
+ case XPT_DEV_MATCH:
+ numbufs = min(io_req->num_bufs, 2);
+
+ if (numbufs == 1) {
+ data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches;
+ } else {
+ data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns;
+ data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches;
+ }
+ break;
+ case XPT_SCSI_IO:
+ case XPT_CONT_TARGET_IO:
+ data_ptrs[0] = &ccb->csio.data_ptr;
+ numbufs = min(io_req->num_bufs, 1);
+ break;
+ case XPT_ATA_IO:
+ data_ptrs[0] = &ccb->ataio.data_ptr;
+ numbufs = min(io_req->num_bufs, 1);
+ break;
+ case XPT_SMP_IO:
+ numbufs = min(io_req->num_bufs, 2);
+ data_ptrs[0] = &ccb->smpio.smp_request;
+ data_ptrs[1] = &ccb->smpio.smp_response;
+ break;
+ case XPT_DEV_ADVINFO:
+ numbufs = min(io_req->num_bufs, 1);
+ data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
+ break;
+ default:
+ /* allow ourselves to be swapped once again */
+ return;
+ break; /* NOTREACHED */
+ }
+
+ if (io_req->flags & PASS_IO_USER_SEG_MALLOC) {
+ free(io_req->user_segptr, M_SCSIPASS);
+ io_req->user_segptr = NULL;
+ }
+
+ /*
+ * We only want to free memory we malloced.
+ */
+ if (io_req->data_flags == CAM_DATA_VADDR) {
+ for (i = 0; i < io_req->num_bufs; i++) {
+ if (io_req->kern_bufs[i] == NULL)
+ continue;
+
+ free(io_req->kern_bufs[i], M_SCSIPASS);
+ io_req->kern_bufs[i] = NULL;
+ }
+ } else if (io_req->data_flags == CAM_DATA_SG) {
+ for (i = 0; i < io_req->num_kern_segs; i++) {
+ if ((uint8_t *)(uintptr_t)
+ io_req->kern_segptr[i].ds_addr == NULL)
+ continue;
+
+ uma_zfree(softc->pass_io_zone, (uint8_t *)(uintptr_t)
+ io_req->kern_segptr[i].ds_addr);
+ io_req->kern_segptr[i].ds_addr = 0;
+ }
+ }
+
+ if (io_req->flags & PASS_IO_KERN_SEG_MALLOC) {
+ free(io_req->kern_segptr, M_SCSIPASS);
+ io_req->kern_segptr = NULL;
+ }
+
+ if (io_req->data_flags != CAM_DATA_PADDR) {
+ for (i = 0; i < numbufs; i++) {
+ /*
+ * Restore the user's buffer pointers to their
+ * previous values.
+ */
+ if (io_req->user_bufs[i] != NULL)
+ *data_ptrs[i] = io_req->user_bufs[i];
+ }
+ }
+
+}
+
+static int
+passcopysglist(struct cam_periph *periph, struct pass_io_req *io_req,
+ ccb_flags direction)
+{
+ bus_size_t kern_watermark, user_watermark, len_copied, len_to_copy;
+ bus_dma_segment_t *user_sglist, *kern_sglist;
+ int i, j, error;
+
+ error = 0;
+ kern_watermark = 0;
+ user_watermark = 0;
+ len_to_copy = 0;
+ len_copied = 0;
+ user_sglist = io_req->user_segptr;
+ kern_sglist = io_req->kern_segptr;
+
+ for (i = 0, j = 0; i < io_req->num_user_segs &&
+ j < io_req->num_kern_segs;) {
+ uint8_t *user_ptr, *kern_ptr;
+
+ len_to_copy = min(user_sglist[i].ds_len -user_watermark,
+ kern_sglist[j].ds_len - kern_watermark);
+
+ user_ptr = (uint8_t *)(uintptr_t)user_sglist[i].ds_addr;
+ user_ptr = user_ptr + user_watermark;
+ kern_ptr = (uint8_t *)(uintptr_t)kern_sglist[j].ds_addr;
+ kern_ptr = kern_ptr + kern_watermark;
+
+ user_watermark += len_to_copy;
+ kern_watermark += len_to_copy;
+
+ if (!useracc(user_ptr, len_to_copy,
+ (direction == CAM_DIR_IN) ? VM_PROT_WRITE : VM_PROT_READ)) {
+ xpt_print(periph->path, "%s: unable to access user "
+ "S/G list element %p len %zu\n", __func__,
+ user_ptr, len_to_copy);
+ error = EFAULT;
+ goto bailout;
+ }
+
+ if (direction == CAM_DIR_IN) {
+ error = copyout(kern_ptr, user_ptr, len_to_copy);
+ if (error != 0) {
+ xpt_print(periph->path, "%s: copyout of %u "
+ "bytes from %p to %p failed with "
+ "error %d\n", __func__, len_to_copy,
+ kern_ptr, user_ptr, error);
+ goto bailout;
+ }
+ } else {
+ error = copyin(user_ptr, kern_ptr, len_to_copy);
+ if (error != 0) {
+ xpt_print(periph->path, "%s: copyin of %u "
+ "bytes from %p to %p failed with "
+ "error %d\n", __func__, len_to_copy,
+ user_ptr, kern_ptr, error);
+ goto bailout;
+ }
+ }
+
+ len_copied += len_to_copy;
+
+ if (user_sglist[i].ds_len == user_watermark) {
+ i++;
+ user_watermark = 0;
+ }
+
+ if (kern_sglist[j].ds_len == kern_watermark) {
+ j++;
+ kern_watermark = 0;
+ }
+ }
+
+bailout:
+
+ return (error);
+}
+
+static int
+passmemsetup(struct cam_periph *periph, struct pass_io_req *io_req)
+{
+ union ccb *ccb;
+ struct pass_softc *softc;
+ int numbufs, i;
+ uint8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
+ uint32_t lengths[CAM_PERIPH_MAXMAPS];
+ uint32_t dirs[CAM_PERIPH_MAXMAPS];
+ uint32_t num_segs;
+ uint16_t *seg_cnt_ptr;
+ size_t maxmap;
+ int error;
+
+ cam_periph_assert(periph, MA_NOTOWNED);
+
+ softc = periph->softc;
+
+ error = 0;
+ ccb = &io_req->ccb;
+ maxmap = 0;
+ num_segs = 0;
+ seg_cnt_ptr = NULL;
+
+ switch(ccb->ccb_h.func_code) {
+ case XPT_DEV_MATCH:
+ if (ccb->cdm.match_buf_len == 0) {
+ printf("%s: invalid match buffer length 0\n", __func__);
+ return(EINVAL);
+ }
+ if (ccb->cdm.pattern_buf_len > 0) {
+ data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns;
+ lengths[0] = ccb->cdm.pattern_buf_len;
+ dirs[0] = CAM_DIR_OUT;
+ data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches;
+ lengths[1] = ccb->cdm.match_buf_len;
+ dirs[1] = CAM_DIR_IN;
+ numbufs = 2;
+ } else {
+ data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches;
+ lengths[0] = ccb->cdm.match_buf_len;
+ dirs[0] = CAM_DIR_IN;
+ numbufs = 1;
+ }
+ io_req->data_flags = CAM_DATA_VADDR;
+ break;
+ case XPT_SCSI_IO:
+ case XPT_CONT_TARGET_IO:
+ if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
+ return(0);
+
+ /*
+ * The user shouldn't be able to supply a bio.
+ */
+ if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
+ return (EINVAL);
+
+ io_req->data_flags = ccb->ccb_h.flags & CAM_DATA_MASK;
+
+ data_ptrs[0] = &ccb->csio.data_ptr;
+ lengths[0] = ccb->csio.dxfer_len;
+ dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
+ num_segs = ccb->csio.sglist_cnt;
+ seg_cnt_ptr = &ccb->csio.sglist_cnt;
+ numbufs = 1;
+ maxmap = softc->maxio;
+ break;
+ case XPT_ATA_IO:
+ if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
+ return(0);
+
+ /*
+ * We only support a single virtual address for ATA I/O.
+ */
+ if ((ccb->ccb_h.flags & CAM_DATA_MASK) != CAM_DATA_VADDR)
+ return (EINVAL);
+
+ io_req->data_flags = CAM_DATA_VADDR;
+
+ data_ptrs[0] = &ccb->ataio.data_ptr;
+ lengths[0] = ccb->ataio.dxfer_len;
+ dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
+ numbufs = 1;
+ maxmap = softc->maxio;
+ break;
+ case XPT_SMP_IO:
+ io_req->data_flags = CAM_DATA_VADDR;
+
+ data_ptrs[0] = &ccb->smpio.smp_request;
+ lengths[0] = ccb->smpio.smp_request_len;
+ dirs[0] = CAM_DIR_OUT;
+ data_ptrs[1] = &ccb->smpio.smp_response;
+ lengths[1] = ccb->smpio.smp_response_len;
+ dirs[1] = CAM_DIR_IN;
+ numbufs = 2;
+ maxmap = softc->maxio;
+ break;
+ case XPT_DEV_ADVINFO:
+ if (ccb->cdai.bufsiz == 0)
+ return (0);
+
+ io_req->data_flags = CAM_DATA_VADDR;
+
+ data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
+ lengths[0] = ccb->cdai.bufsiz;
+ dirs[0] = CAM_DIR_IN;
+ numbufs = 1;
+ break;
+ default:
+ return(EINVAL);
+ break; /* NOTREACHED */
+ }
+
+ io_req->num_bufs = numbufs;
+
+ /*
+ * If there is a maximum, check to make sure that the user's
+ * request fits within the limit. In general, we should only have
+ * a maximum length for requests that go to hardware. Otherwise it
+ * is whatever we're able to malloc.
+ */
+ for (i = 0; i < numbufs; i++) {
+ io_req->user_bufs[i] = *data_ptrs[i];
+ io_req->dirs[i] = dirs[i];
+ io_req->lengths[i] = lengths[i];
+
+ if (maxmap == 0)
+ continue;
+
+ if (lengths[i] <= maxmap)
+ continue;
+
+ xpt_print(periph->path, "%s: data length %u > max allowed %u "
+ "bytes\n", __func__, lengths[i], maxmap);
+ error = EINVAL;
+ goto bailout;
+ }
+
+ switch (io_req->data_flags) {
+ case CAM_DATA_VADDR:
+ /* Map or copy the buffer into kernel address space */
+ for (i = 0; i < numbufs; i++) {
+ uint8_t *tmp_buf;
+
+ /*
+ * If for some reason no length is specified, we
+ * don't need to allocate anything.
+ */
+ if (io_req->lengths[i] == 0)
+ continue;
+
+ /*
+ * Make sure that the user's buffer is accessible
+ * to that process.
+ */
+ if (!useracc(io_req->user_bufs[i], io_req->lengths[i],
+ (io_req->dirs[i] == CAM_DIR_IN) ? VM_PROT_WRITE :
+ VM_PROT_READ)) {
+ xpt_print(periph->path, "%s: user address %p "
+ "length %u is not accessible\n", __func__,
+ io_req->user_bufs[i], io_req->lengths[i]);
+ error = EFAULT;
+ goto bailout;
+ }
+
+ tmp_buf = malloc(lengths[i], M_SCSIPASS,
+ M_WAITOK | M_ZERO);
+ io_req->kern_bufs[i] = tmp_buf;
+ *data_ptrs[i] = tmp_buf;
+
+#if 0
+ xpt_print(periph->path, "%s: malloced %p len %u, user "
+ "buffer %p, operation: %s\n", __func__,
+ tmp_buf, lengths[i], io_req->user_bufs[i],
+ (dirs[i] == CAM_DIR_IN) ? "read" : "write");
+#endif
+ /*
+ * We only need to copy in if the user is writing.
+ */
+ if (dirs[i] != CAM_DIR_OUT)
+ continue;
+
+ error = copyin(io_req->user_bufs[i],
+ io_req->kern_bufs[i], lengths[i]);
+ if (error != 0) {
+ xpt_print(periph->path, "%s: copy of user "
+ "buffer from %p to %p failed with "
+ "error %d\n", __func__,
+ io_req->user_bufs[i],
+ io_req->kern_bufs[i], error);
+ goto bailout;
+ }
+ }
+ break;
+ case CAM_DATA_PADDR:
+ /* Pass down the pointer as-is */
+ break;
+ case CAM_DATA_SG: {
+ size_t sg_length, size_to_go, alloc_size;
+ uint32_t num_segs_needed;
+
+ /*
+ * Copy the user S/G list in, and then copy in the
+ * individual segments.
+ */
+ /*
+ * We shouldn't see this, but check just in case.
+ */
+ if (numbufs != 1) {
+ xpt_print(periph->path, "%s: cannot currently handle "
+ "more than one S/G list per CCB\n", __func__);
+ error = EINVAL;
+ goto bailout;
+ }
+
+ /*
+ * We have to have at least one segment.
+ */
+ if (num_segs == 0) {
+ xpt_print(periph->path, "%s: CAM_DATA_SG flag set, "
+ "but sglist_cnt=0!\n", __func__);
+ error = EINVAL;
+ goto bailout;
+ }
+
+ /*
+ * Make sure the user specified the total length and didn't
+ * just leave it to us to decode the S/G list.
+ */
+ if (lengths[0] == 0) {
+ xpt_print(periph->path, "%s: no dxfer_len specified, "
+ "but CAM_DATA_SG flag is set!\n", __func__);
+ error = EINVAL;
+ goto bailout;
+ }
+
+ /*
+ * We allocate buffers in io_zone_size increments for an
+ * S/G list. This will generally be MAXPHYS.
+ */
+ if (lengths[0] <= softc->io_zone_size)
+ num_segs_needed = 1;
+ else {
+ num_segs_needed = lengths[0] / softc->io_zone_size;
+ if ((lengths[0] % softc->io_zone_size) != 0)
+ num_segs_needed++;
+ }
+
+ /* Figure out the size of the S/G list */
+ sg_length = num_segs * sizeof(bus_dma_segment_t);
+ io_req->num_user_segs = num_segs;
+ io_req->num_kern_segs = num_segs_needed;
+
+ /* Save the user's S/G list pointer for later restoration */
+ io_req->user_bufs[0] = *data_ptrs[0];
+
+ /*
+ * If we have enough segments allocated by default to handle
+ * the length of the user's S/G list,
+ */
+ if (num_segs > PASS_MAX_SEGS) {
+ io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) *
+ num_segs, M_SCSIPASS, M_WAITOK | M_ZERO);
+ io_req->flags |= PASS_IO_USER_SEG_MALLOC;
+ } else
+ io_req->user_segptr = io_req->user_segs;
+
+ if (!useracc(*data_ptrs[0], sg_length, VM_PROT_READ)) {
+ xpt_print(periph->path, "%s: unable to access user "
+ "S/G list at %p\n", __func__, *data_ptrs[0]);
+ error = EFAULT;
+ goto bailout;
+ }
+
+ error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length);
+ if (error != 0) {
+ xpt_print(periph->path, "%s: copy of user S/G list "
+ "from %p to %p failed with error %d\n",
+ __func__, *data_ptrs[0], io_req->user_segptr,
+ error);
+ goto bailout;
+ }
+
+ if (num_segs_needed > PASS_MAX_SEGS) {
+ io_req->kern_segptr = malloc(sizeof(bus_dma_segment_t) *
+ num_segs_needed, M_SCSIPASS, M_WAITOK | M_ZERO);
+ io_req->flags |= PASS_IO_KERN_SEG_MALLOC;
+ } else {
+ io_req->kern_segptr = io_req->kern_segs;
+ }
+
+ /*
+ * Allocate the kernel S/G list.
+ */
+ for (size_to_go = lengths[0], i = 0;
+ size_to_go > 0 && i < num_segs_needed;
+ i++, size_to_go -= alloc_size) {
+ uint8_t *kern_ptr;
+
+ alloc_size = min(size_to_go, softc->io_zone_size);
+ kern_ptr = uma_zalloc(softc->pass_io_zone, M_WAITOK);
+ io_req->kern_segptr[i].ds_addr =
+ (bus_addr_t)(uintptr_t)kern_ptr;
+ io_req->kern_segptr[i].ds_len = alloc_size;
+ }
+ if (size_to_go > 0) {
+ printf("%s: size_to_go = %zu, software error!\n",
+ __func__, size_to_go);
+ error = EINVAL;
+ goto bailout;
+ }
+
+ *data_ptrs[0] = (uint8_t *)io_req->kern_segptr;
+ *seg_cnt_ptr = io_req->num_kern_segs;
+
+ /*
+ * We only need to copy data here if the user is writing.
+ */
+ if (dirs[0] == CAM_DIR_OUT)
+ error = passcopysglist(periph, io_req, dirs[0]);
+ break;
+ }
+ case CAM_DATA_SG_PADDR: {
+ size_t sg_length;
+
+ /*
+ * We shouldn't see this, but check just in case.
+ */
+ if (numbufs != 1) {
+ printf("%s: cannot currently handle more than one "
+ "S/G list per CCB\n", __func__);
+ error = EINVAL;
+ goto bailout;
+ }
+
+ /*
+ * We have to have at least one segment.
+ */
+ if (num_segs == 0) {
+ xpt_print(periph->path, "%s: CAM_DATA_SG_PADDR flag "
+ "set, but sglist_cnt=0!\n", __func__);
+ error = EINVAL;
+ goto bailout;
+ }
+
+ /*
+ * Make sure the user specified the total length and didn't
+ * just leave it to us to decode the S/G list.
+ */
+ if (lengths[0] == 0) {
+ xpt_print(periph->path, "%s: no dxfer_len specified, "
+ "but CAM_DATA_SG flag is set!\n", __func__);
+ error = EINVAL;
+ goto bailout;
+ }
+
+ /* Figure out the size of the S/G list */
+ sg_length = num_segs * sizeof(bus_dma_segment_t);
+ io_req->num_user_segs = num_segs;
+ io_req->num_kern_segs = io_req->num_user_segs;
+
+ /* Save the user's S/G list pointer for later restoration */
+ io_req->user_bufs[0] = *data_ptrs[0];
+
+ if (num_segs > PASS_MAX_SEGS) {
+ io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) *
+ num_segs, M_SCSIPASS, M_WAITOK | M_ZERO);
+ io_req->flags |= PASS_IO_USER_SEG_MALLOC;
+ } else
+ io_req->user_segptr = io_req->user_segs;
+
+ io_req->kern_segptr = io_req->user_segptr;
+
+ error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length);
+ if (error != 0) {
+ xpt_print(periph->path, "%s: copy of user S/G list "
+ "from %p to %p failed with error %d\n",
+ __func__, *data_ptrs[0], io_req->user_segptr,
+ error);
+ goto bailout;
+ }
+ break;
+ }
+ default:
+ case CAM_DATA_BIO:
+ /*
+ * A user shouldn't be attaching a bio to the CCB. It
+ * isn't a user-accessible structure.
+ */
+ error = EINVAL;
+ break;
+ }
+
+bailout:
+ if (error != 0)
+ passiocleanup(softc, io_req);
+
+ return (error);
+}
+
+static int
+passmemdone(struct cam_periph *periph, struct pass_io_req *io_req)
+{
+ struct pass_softc *softc;
+ union ccb *ccb;
+ int error;
+ int i;
+
+ error = 0;
+ softc = (struct pass_softc *)periph->softc;
+ ccb = &io_req->ccb;
+
+ switch (io_req->data_flags) {
+ case CAM_DATA_VADDR:
+ /*
+ * Copy back to the user buffer if this was a read.
+ */
+ for (i = 0; i < io_req->num_bufs; i++) {
+ if (io_req->dirs[i] != CAM_DIR_IN)
+ continue;
+
+ error = copyout(io_req->kern_bufs[i],
+ io_req->user_bufs[i], io_req->lengths[i]);
+ if (error != 0) {
+ xpt_print(periph->path, "Unable to copy %u "
+ "bytes from %p to user address %p\n",
+ io_req->lengths[i],
+ io_req->kern_bufs[i],
+ io_req->user_bufs[i]);
+ goto bailout;
+ }
+
+ }
+ break;
+ case CAM_DATA_PADDR:
+ /* Do nothing. The pointer is a physical address already */
+ break;
+ case CAM_DATA_SG:
+ /*
+ * Copy back to the user buffer if this was a read.
+ * Restore the user's S/G list buffer pointer.
+ */
+ if (io_req->dirs[0] == CAM_DIR_IN)
+ error = passcopysglist(periph, io_req, io_req->dirs[0]);
+ break;
+ case CAM_DATA_SG_PADDR:
+ /*
+ * Restore the user's S/G list buffer pointer. No need to
+ * copy.
+ */
+ break;
+ default:
+ case CAM_DATA_BIO:
+ error = EINVAL;
+ break;
+ }
+
+bailout:
+ /*
+ * Reset the user's pointers to their original values and free
+ * allocated memory.
+ */
+ passiocleanup(softc, io_req);
+
+ return (error);
+}
+
static int
passioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
{
@@ -622,15 +1828,317 @@ passdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread
break;
}
+ case CAMIOQUEUE:
+ {
+ struct pass_io_req *io_req;
+ union ccb **user_ccb, *ccb;
+ xpt_opcode fc;
+
+ if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0) {
+ error = passcreatezone(periph);
+ if (error != 0)
+ goto bailout;
+ }
+
+ /*
+ * We're going to do a blocking allocation for this I/O
+ * request, so we have to drop the lock.
+ */
+ cam_periph_unlock(periph);
+
+ io_req = uma_zalloc(softc->pass_zone, M_WAITOK | M_ZERO);
+ ccb = &io_req->ccb;
+ user_ccb = (union ccb **)addr;
+
+ /*
+ * Unlike the CAMIOCOMMAND ioctl above, we only have a
+ * pointer to the user's CCB, so we have to copy the whole
+ * thing in to a buffer we have allocated (above) instead
+ * of allowing the ioctl code to malloc a buffer and copy
+ * it in.
+ *
+ * This is an advantage for this asynchronous interface,
+ * since we don't want the memory to get freed while the
+ * CCB is outstanding.
+ */
+#if 0
+ xpt_print(periph->path, "Copying user CCB %p to "
+ "kernel address %p\n", *user_ccb, ccb);
+#endif
+ error = copyin(*user_ccb, ccb, sizeof(*ccb));
+ if (error != 0) {
+ xpt_print(periph->path, "Copy of user CCB %p to "
+ "kernel address %p failed with error %d\n",
+ *user_ccb, ccb, error);
+ uma_zfree(softc->pass_zone, io_req);
+ cam_periph_lock(periph);
+ break;
+ }
+
+ /*
+ * Some CCB types, like scan bus and scan lun can only go
+ * through the transport layer device.
+ */
+ if (ccb->ccb_h.func_code & XPT_FC_XPT_ONLY) {
+ xpt_print(periph->path, "CCB function code %#x is "
+ "restricted to the XPT device\n",
+ ccb->ccb_h.func_code);
+ uma_zfree(softc->pass_zone, io_req);
+ cam_periph_lock(periph);
+ error = ENODEV;
+ break;
+ }
+
+ /*
+ * Save the user's CCB pointer as well as his linked list
+ * pointers and peripheral private area so that we can
+ * restore these later.
+ */
+ io_req->user_ccb_ptr = *user_ccb;
+ io_req->user_periph_links = ccb->ccb_h.periph_links;
+ io_req->user_periph_priv = ccb->ccb_h.periph_priv;
+
+ /*
+ * Now that we've saved the user's values, we can set our
+ * own peripheral private entry.
+ */
+ ccb->ccb_h.ccb_ioreq = io_req;
+
+ /* Compatibility for RL/priority-unaware code. */
+ priority = ccb->ccb_h.pinfo.priority;
+ if (priority <= CAM_PRIORITY_OOB)
+ priority += CAM_PRIORITY_OOB + 1;
+
+ /*
+ * Setup fields in the CCB like the path and the priority.
+ * The path in particular cannot be done in userland, since
+ * it is a pointer to a kernel data structure.
+ */
+ xpt_setup_ccb_flags(&ccb->ccb_h, periph->path, priority,
+ ccb->ccb_h.flags);
+
+ /*
+ * Setup our done routine. There is no way for the user to
+ * have a valid pointer here.
+ */
+ ccb->ccb_h.cbfcnp = passdone;
+
+ fc = ccb->ccb_h.func_code;
+ /*
+ * If this function code has memory that can be mapped in
+ * or out, we need to call passmemsetup().
+ */
+ if ((fc == XPT_SCSI_IO) || (fc == XPT_ATA_IO)
+ || (fc == XPT_SMP_IO) || (fc == XPT_DEV_MATCH)
+ || (fc == XPT_DEV_ADVINFO)) {
+ error = passmemsetup(periph, io_req);
+ if (error != 0) {
+ uma_zfree(softc->pass_zone, io_req);
+ cam_periph_lock(periph);
+ break;
+ }
+ } else
+ io_req->mapinfo.num_bufs_used = 0;
+
+ cam_periph_lock(periph);
+
+ /*
+ * Everything goes on the incoming queue initially.
+ */
+ TAILQ_INSERT_TAIL(&softc->incoming_queue, io_req, links);
+
+ /*
+ * If the CCB is queued, and is not a user CCB, then
+ * we need to allocate a slot for it. Call xpt_schedule()
+ * so that our start routine will get called when a CCB is
+ * available.
+ */
+ if ((fc & XPT_FC_QUEUED)
+ && ((fc & XPT_FC_USER_CCB) == 0)) {
+ xpt_schedule(periph, priority);
+ break;
+ }
+
+ /*
+ * At this point, the CCB in question is either an
+ * immediate CCB (like XPT_DEV_ADVINFO) or it is a user CCB
+ * and therefore should be malloced, not allocated via a slot.
+ * Remove the CCB from the incoming queue and add it to the
+ * active queue.
+ */
+ TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
+ TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links);
+
+ xpt_action(ccb);
+
+ /*
+ * If this is not a queued CCB (i.e. it is an immediate CCB),
+ * then it is already done. We need to put it on the done
+ * queue for the user to fetch.
+ */
+ if ((fc & XPT_FC_QUEUED) == 0) {
+ TAILQ_REMOVE(&softc->active_queue, io_req, links);
+ TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links);
+ }
+ break;
+ }
+ case CAMIOGET:
+ {
+ union ccb **user_ccb;
+ struct pass_io_req *io_req;
+ int old_error;
+
+ user_ccb = (union ccb **)addr;
+ old_error = 0;
+
+ io_req = TAILQ_FIRST(&softc->done_queue);
+ if (io_req == NULL) {
+ error = ENOENT;
+ break;
+ }
+
+ /*
+ * Remove the I/O from the done queue.
+ */
+ TAILQ_REMOVE(&softc->done_queue, io_req, links);
+
+ /*
+ * We have to drop the lock during the copyout because the
+ * copyout can result in VM faults that require sleeping.
+ */
+ cam_periph_unlock(periph);
+
+ /*
+ * Do any needed copies (e.g. for reads) and revert the
+ * pointers in the CCB back to the user's pointers.
+ */
+ error = passmemdone(periph, io_req);
+
+ old_error = error;
+
+ io_req->ccb.ccb_h.periph_links = io_req->user_periph_links;
+ io_req->ccb.ccb_h.periph_priv = io_req->user_periph_priv;
+
+#if 0
+ xpt_print(periph->path, "Copying to user CCB %p from "
+ "kernel address %p\n", *user_ccb, &io_req->ccb);
+#endif
+
+ error = copyout(&io_req->ccb, *user_ccb, sizeof(union ccb));
+ if (error != 0) {
+ xpt_print(periph->path, "Copy to user CCB %p from "
+ "kernel address %p failed with error %d\n",
+ *user_ccb, &io_req->ccb, error);
+ }
+
+ /*
+ * Prefer the first error we got back, and make sure we
+ * don't overwrite bad status with good.
+ */
+ if (old_error != 0)
+ error = old_error;
+
+ cam_periph_lock(periph);
+
+ /*
+ * At this point, if there was an error, we could potentially
+ * re-queue the I/O and try again. But why? The error
+ * would almost certainly happen again. We might as well
+ * not leak memory.
+ */
+ uma_zfree(softc->pass_zone, io_req);
+ break;
+ }
default:
error = cam_periph_ioctl(periph, cmd, addr, passerror);
break;
}
+bailout:
cam_periph_unlock(periph);
+
return(error);
}
+static int
+passpoll(struct cdev *dev, int poll_events, struct thread *td)
+{
+ struct cam_periph *periph;
+ struct pass_softc *softc;
+ int revents;
+
+ periph = (struct cam_periph *)dev->si_drv1;
+ if (periph == NULL)
+ return (ENXIO);
+
+ softc = (struct pass_softc *)periph->softc;
+
+ revents = poll_events & (POLLOUT | POLLWRNORM);
+ if ((poll_events & (POLLIN | POLLRDNORM)) != 0) {
+ cam_periph_lock(periph);
+
+ if (!TAILQ_EMPTY(&softc->done_queue)) {
+ revents |= poll_events & (POLLIN | POLLRDNORM);
+ }
+ cam_periph_unlock(periph);
+ if (revents == 0)
+ selrecord(td, &softc->read_select);
+ }
+
+ return (revents);
+}
+
+static int
+passkqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct cam_periph *periph;
+ struct pass_softc *softc;
+
+ periph = (struct cam_periph *)dev->si_drv1;
+ if (periph == NULL)
+ return (ENXIO);
+
+ softc = (struct pass_softc *)periph->softc;
+
+ kn->kn_hook = (caddr_t)periph;
+ kn->kn_fop = &passread_filtops;
+ knlist_add(&softc->read_select.si_note, kn, 0);
+
+ return (0);
+}
+
+static void
+passreadfiltdetach(struct knote *kn)
+{
+ struct cam_periph *periph;
+ struct pass_softc *softc;
+
+ periph = (struct cam_periph *)kn->kn_hook;
+ softc = (struct pass_softc *)periph->softc;
+
+ knlist_remove(&softc->read_select.si_note, kn, 0);
+}
+
+static int
+passreadfilt(struct knote *kn, long hint)
+{
+ struct cam_periph *periph;
+ struct pass_softc *softc;
+ int retval;
+
+ periph = (struct cam_periph *)kn->kn_hook;
+ softc = (struct pass_softc *)periph->softc;
+
+ cam_periph_assert(periph, MA_OWNED);
+
+ if (TAILQ_EMPTY(&softc->done_queue))
+ retval = 0;
+ else
+ retval = 1;
+
+ return (retval);
+}
+
/*
* Generally, "ccb" should be the CCB supplied by the kernel. "inccb"
* should be the CCB that is copied in from the user.
@@ -652,6 +2160,10 @@ passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb)
xpt_merge_ccb(ccb, inccb);
/*
+ */
+ ccb->ccb_h.cbfcnp = passdone;
+
+ /*
* Let cam_periph_mapmem do a sanity check on the data pointer format.
* Even if no data transfer is needed, it's a cheap check and it
* simplifies the code.
diff --git a/sys/cam/scsi/scsi_pass.h b/sys/cam/scsi/scsi_pass.h
index ae0e058..797ef08 100644
--- a/sys/cam/scsi/scsi_pass.h
+++ b/sys/cam/scsi/scsi_pass.h
@@ -39,4 +39,12 @@
#define CAMIOCOMMAND _IOWR(CAM_VERSION, 2, union ccb)
#define CAMGETPASSTHRU _IOWR(CAM_VERSION, 3, union ccb)
+/*
+ * These two ioctls take a union ccb *, but that is not explicitly declared
+ * to avoid having the ioctl handling code malloc and free their own copy
+ * of the CCB or the CCB pointer.
+ */
+#define CAMIOQUEUE _IO(CAM_VERSION, 4)
+#define CAMIOGET _IO(CAM_VERSION, 5)
+
#endif
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index dccd5b3..27ef8b3 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -99,6 +99,8 @@
#include <vm/swap_pager.h>
#include <vm/uma.h>
+#include <machine/bus.h>
+
#define MD_MODVER 1
#define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */
@@ -435,7 +437,7 @@ g_md_start(struct bio *bp)
#define MD_MALLOC_MOVE_CMP 5
static int
-md_malloc_move(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
+md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
void *ptr, u_char fill, int op)
{
struct sf_buf *sf;
@@ -497,7 +499,7 @@ md_malloc_move(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
}
break;
default:
- KASSERT(0, ("md_malloc_move unknown op %d\n", op));
+ KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op));
break;
}
if (error != 0)
@@ -520,10 +522,68 @@ md_malloc_move(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
}
static int
+md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs,
+ unsigned len, void *ptr, u_char fill, int op)
+{
+ bus_dma_segment_t *vlist;
+ uint8_t *p, *end, first;
+ off_t *uc;
+ int ma_offs, seg_len;
+
+ vlist = *pvlist;
+ ma_offs = *pma_offs;
+ uc = ptr;
+
+ for (; len != 0; len -= seg_len) {
+ seg_len = imin(vlist->ds_len - ma_offs, len);
+ p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs;
+ switch (op) {
+ case MD_MALLOC_MOVE_ZERO:
+ bzero(p, seg_len);
+ break;
+ case MD_MALLOC_MOVE_FILL:
+ memset(p, fill, seg_len);
+ break;
+ case MD_MALLOC_MOVE_READ:
+ bcopy(ptr, p, seg_len);
+ cpu_flush_dcache(p, seg_len);
+ break;
+ case MD_MALLOC_MOVE_WRITE:
+ bcopy(p, ptr, seg_len);
+ break;
+ case MD_MALLOC_MOVE_CMP:
+ end = p + seg_len;
+ first = *uc = *p;
+ /* Confirm all following bytes match the first */
+ while (++p < end) {
+ if (*p != first)
+ return (EDOOFUS);
+ }
+ break;
+ default:
+ KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op));
+ break;
+ }
+
+ ma_offs += seg_len;
+ if (ma_offs == vlist->ds_len) {
+ ma_offs = 0;
+ vlist++;
+ }
+ ptr = (uint8_t *)ptr + seg_len;
+ }
+ *pvlist = vlist;
+ *pma_offs = ma_offs;
+
+ return (0);
+}
+
+static int
mdstart_malloc(struct md_s *sc, struct bio *bp)
{
u_char *dst;
vm_page_t *m;
+ bus_dma_segment_t *vlist;
int i, error, error1, ma_offs, notmapped;
off_t secno, nsec, uc;
uintptr_t sp, osp;
@@ -538,10 +598,16 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
}
notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0;
+ vlist = (bp->bio_flags & BIO_VLIST) != 0 ?
+ (bus_dma_segment_t *)bp->bio_data : NULL;
if (notmapped) {
m = bp->bio_ma;
ma_offs = bp->bio_ma_offset;
dst = NULL;
+ KASSERT(vlist == NULL, ("vlists cannot be unmapped"));
+ } else if (vlist != NULL) {
+ ma_offs = bp->bio_ma_offset;
+ dst = NULL;
} else {
dst = bp->bio_data;
}
@@ -557,23 +623,36 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
} else if (bp->bio_cmd == BIO_READ) {
if (osp == 0) {
if (notmapped) {
- error = md_malloc_move(&m, &ma_offs,
+ error = md_malloc_move_ma(&m, &ma_offs,
sc->sectorsize, NULL, 0,
MD_MALLOC_MOVE_ZERO);
+ } else if (vlist != NULL) {
+ error = md_malloc_move_vlist(&vlist,
+ &ma_offs, sc->sectorsize, NULL, 0,
+ MD_MALLOC_MOVE_ZERO);
} else
bzero(dst, sc->sectorsize);
} else if (osp <= 255) {
if (notmapped) {
- error = md_malloc_move(&m, &ma_offs,
+ error = md_malloc_move_ma(&m, &ma_offs,
sc->sectorsize, NULL, osp,
MD_MALLOC_MOVE_FILL);
+ } else if (vlist != NULL) {
+ error = md_malloc_move_vlist(&vlist,
+ &ma_offs, sc->sectorsize, NULL, osp,
+ MD_MALLOC_MOVE_FILL);
} else
memset(dst, osp, sc->sectorsize);
} else {
if (notmapped) {
- error = md_malloc_move(&m, &ma_offs,
+ error = md_malloc_move_ma(&m, &ma_offs,
sc->sectorsize, (void *)osp, 0,
MD_MALLOC_MOVE_READ);
+ } else if (vlist != NULL) {
+ error = md_malloc_move_vlist(&vlist,
+ &ma_offs, sc->sectorsize,
+ (void *)osp, 0,
+ MD_MALLOC_MOVE_READ);
} else {
bcopy((void *)osp, dst, sc->sectorsize);
cpu_flush_dcache(dst, sc->sectorsize);
@@ -583,10 +662,15 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
} else if (bp->bio_cmd == BIO_WRITE) {
if (sc->flags & MD_COMPRESS) {
if (notmapped) {
- error1 = md_malloc_move(&m, &ma_offs,
+ error1 = md_malloc_move_ma(&m, &ma_offs,
sc->sectorsize, &uc, 0,
MD_MALLOC_MOVE_CMP);
i = error1 == 0 ? sc->sectorsize : 0;
+ } else if (vlist != NULL) {
+ error1 = md_malloc_move_vlist(&vlist,
+ &ma_offs, sc->sectorsize, &uc, 0,
+ MD_MALLOC_MOVE_CMP);
+ i = error1 == 0 ? sc->sectorsize : 0;
} else {
uc = dst[0];
for (i = 1; i < sc->sectorsize; i++) {
@@ -611,10 +695,15 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
break;
}
if (notmapped) {
- error = md_malloc_move(&m,
+ error = md_malloc_move_ma(&m,
&ma_offs, sc->sectorsize,
(void *)sp, 0,
MD_MALLOC_MOVE_WRITE);
+ } else if (vlist != NULL) {
+ error = md_malloc_move_vlist(
+ &vlist, &ma_offs,
+ sc->sectorsize, (void *)sp,
+ 0, MD_MALLOC_MOVE_WRITE);
} else {
bcopy(dst, (void *)sp,
sc->sectorsize);
@@ -622,10 +711,15 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
error = s_write(sc->indir, secno, sp);
} else {
if (notmapped) {
- error = md_malloc_move(&m,
+ error = md_malloc_move_ma(&m,
&ma_offs, sc->sectorsize,
(void *)osp, 0,
MD_MALLOC_MOVE_WRITE);
+ } else if (vlist != NULL) {
+ error = md_malloc_move_vlist(
+ &vlist, &ma_offs,
+ sc->sectorsize, (void *)osp,
+ 0, MD_MALLOC_MOVE_WRITE);
} else {
bcopy(dst, (void *)osp,
sc->sectorsize);
@@ -641,26 +735,78 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
if (error != 0)
break;
secno++;
- if (!notmapped)
+ if (!notmapped && vlist == NULL)
dst += sc->sectorsize;
}
bp->bio_resid = 0;
return (error);
}
+static void
+mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len)
+{
+ off_t seg_len;
+
+ while (offset >= vlist->ds_len) {
+ offset -= vlist->ds_len;
+ vlist++;
+ }
+
+ while (len != 0) {
+ seg_len = omin(len, vlist->ds_len - offset);
+ bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset),
+ seg_len);
+ offset = 0;
+ src = (uint8_t *)src + seg_len;
+ len -= seg_len;
+ vlist++;
+ }
+}
+
+static void
+mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len)
+{
+ off_t seg_len;
+
+ while (offset >= vlist->ds_len) {
+ offset -= vlist->ds_len;
+ vlist++;
+ }
+
+ while (len != 0) {
+ seg_len = omin(len, vlist->ds_len - offset);
+ bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst,
+ seg_len);
+ offset = 0;
+ dst = (uint8_t *)dst + seg_len;
+ len -= seg_len;
+ vlist++;
+ }
+}
+
static int
mdstart_preload(struct md_s *sc, struct bio *bp)
{
+ uint8_t *p;
+ p = sc->pl_ptr + bp->bio_offset;
switch (bp->bio_cmd) {
case BIO_READ:
- bcopy(sc->pl_ptr + bp->bio_offset, bp->bio_data,
- bp->bio_length);
+ if ((bp->bio_flags & BIO_VLIST) != 0) {
+ mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data,
+ bp->bio_ma_offset, bp->bio_length);
+ } else {
+ bcopy(p, bp->bio_data, bp->bio_length);
+ }
cpu_flush_dcache(bp->bio_data, bp->bio_length);
break;
case BIO_WRITE:
- bcopy(bp->bio_data, sc->pl_ptr + bp->bio_offset,
- bp->bio_length);
+ if ((bp->bio_flags & BIO_VLIST) != 0) {
+ mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data,
+ bp->bio_ma_offset, p, bp->bio_length);
+ } else {
+ bcopy(bp->bio_data, p, bp->bio_length);
+ }
break;
}
bp->bio_resid = 0;
@@ -673,16 +819,23 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
int error;
struct uio auio;
struct iovec aiov;
+ struct iovec *piov;
struct mount *mp;
struct vnode *vp;
struct buf *pb;
+ bus_dma_segment_t *vlist;
struct thread *td;
- off_t end, zerosize;
+ off_t len, zerosize;
+ int ma_offs;
switch (bp->bio_cmd) {
case BIO_READ:
+ auio.uio_rw = UIO_READ;
+ break;
case BIO_WRITE:
case BIO_DELETE:
+ auio.uio_rw = UIO_WRITE;
+ break;
case BIO_FLUSH:
break;
default:
@@ -691,6 +844,9 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
td = curthread;
vp = sc->vnode;
+ pb = NULL;
+ piov = NULL;
+ ma_offs = bp->bio_ma_offset;
/*
* VNODE I/O
@@ -709,73 +865,66 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
return (error);
}
- bzero(&auio, sizeof(auio));
+ auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
+ auio.uio_resid = bp->bio_length;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_td = td;
- /*
- * Special case for BIO_DELETE. On the surface, this is very
- * similar to BIO_WRITE, except that we write from our own
- * fixed-length buffer, so we have to loop. The net result is
- * that the two cases end up having very little in common.
- */
if (bp->bio_cmd == BIO_DELETE) {
+ /*
+ * Emulate BIO_DELETE by writing zeros.
+ */
zerosize = ZERO_REGION_SIZE -
(ZERO_REGION_SIZE % sc->sectorsize);
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_rw = UIO_WRITE;
- auio.uio_td = td;
- end = bp->bio_offset + bp->bio_length;
- (void) vn_start_write(vp, &mp, V_WAIT);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- error = 0;
- while (auio.uio_offset < end) {
- aiov.iov_base = __DECONST(void *, zero_region);
- aiov.iov_len = end - auio.uio_offset;
- if (aiov.iov_len > zerosize)
- aiov.iov_len = zerosize;
- auio.uio_resid = aiov.iov_len;
- error = VOP_WRITE(vp, &auio,
- sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred);
- if (error != 0)
- break;
+ auio.uio_iovcnt = howmany(bp->bio_length, zerosize);
+ piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK);
+ auio.uio_iov = piov;
+ len = bp->bio_length;
+ while (len > 0) {
+ piov->iov_base = __DECONST(void *, zero_region);
+ piov->iov_len = len;
+ if (len > zerosize)
+ piov->iov_len = zerosize;
+ len -= piov->iov_len;
+ piov++;
}
- VOP_UNLOCK(vp, 0);
- vn_finished_write(mp);
- bp->bio_resid = end - auio.uio_offset;
- return (error);
- }
-
- KASSERT(bp->bio_length <= MAXPHYS, ("bio_length %jd",
- (uintmax_t)bp->bio_length));
- if ((bp->bio_flags & BIO_UNMAPPED) == 0) {
- pb = NULL;
- aiov.iov_base = bp->bio_data;
- } else {
+ piov = auio.uio_iov;
+ } else if ((bp->bio_flags & BIO_VLIST) != 0) {
+ piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK);
+ auio.uio_iov = piov;
+ vlist = (bus_dma_segment_t *)bp->bio_data;
+ len = bp->bio_length;
+ while (len > 0) {
+ piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr +
+ ma_offs);
+ piov->iov_len = vlist->ds_len - ma_offs;
+ if (piov->iov_len > len)
+ piov->iov_len = len;
+ len -= piov->iov_len;
+ ma_offs = 0;
+ vlist++;
+ piov++;
+ }
+ auio.uio_iovcnt = piov - auio.uio_iov;
+ piov = auio.uio_iov;
+ } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
pb = getpbuf(&md_vnode_pbuf_freecnt);
pmap_qenter((vm_offset_t)pb->b_data, bp->bio_ma, bp->bio_ma_n);
- aiov.iov_base = (void *)((vm_offset_t)pb->b_data +
- bp->bio_ma_offset);
+ aiov.iov_base = (void *)((vm_offset_t)pb->b_data + ma_offs);
+ aiov.iov_len = bp->bio_length;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ } else {
+ aiov.iov_base = bp->bio_data;
+ aiov.iov_len = bp->bio_length;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
}
- aiov.iov_len = bp->bio_length;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
- auio.uio_segflg = UIO_SYSSPACE;
- if (bp->bio_cmd == BIO_READ)
- auio.uio_rw = UIO_READ;
- else if (bp->bio_cmd == BIO_WRITE)
- auio.uio_rw = UIO_WRITE;
- else
- panic("wrong BIO_OP in mdstart_vnode");
- auio.uio_resid = bp->bio_length;
- auio.uio_td = td;
/*
* When reading set IO_DIRECT to try to avoid double-caching
* the data. When writing IO_DIRECT is not optimal.
*/
- if (bp->bio_cmd == BIO_READ) {
+ if (auio.uio_rw == UIO_READ) {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred);
VOP_UNLOCK(vp, 0);
@@ -787,10 +936,15 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
}
- if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+
+ if (pb) {
pmap_qremove((vm_offset_t)pb->b_data, bp->bio_ma_n);
relpbuf(pb, &md_vnode_pbuf_freecnt);
}
+
+ if (piov != NULL)
+ free(piov, M_MD);
+
bp->bio_resid = auio.uio_resid;
return (error);
}
@@ -801,6 +955,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
vm_page_t m;
u_char *p;
vm_pindex_t i, lastp;
+ bus_dma_segment_t *vlist;
int rv, ma_offs, offs, len, lastend;
switch (bp->bio_cmd) {
@@ -813,7 +968,10 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
}
p = bp->bio_data;
- ma_offs = (bp->bio_flags & BIO_UNMAPPED) == 0 ? 0 : bp->bio_ma_offset;
+ ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ?
+ bp->bio_ma_offset : 0;
+ vlist = (bp->bio_flags & BIO_VLIST) != 0 ?
+ (bus_dma_segment_t *)bp->bio_data : NULL;
/*
* offs is the offset at which to start operating on the
@@ -853,6 +1011,10 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
pmap_copy_pages(&m, offs, bp->bio_ma,
ma_offs, len);
+ } else if ((bp->bio_flags & BIO_VLIST) != 0) {
+ physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs,
+ vlist, ma_offs, len);
+ cpu_flush_dcache(p, len);
} else {
physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len);
cpu_flush_dcache(p, len);
@@ -869,6 +1031,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
pmap_copy_pages(bp->bio_ma, ma_offs, &m,
offs, len);
+ } else if ((bp->bio_flags & BIO_VLIST) != 0) {
+ physcopyin_vlist(vlist, ma_offs,
+ VM_PAGE_TO_PHYS(m) + offs, len);
} else {
physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len);
}
diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c
index 9319b97..1a879f7 100644
--- a/sys/geom/geom_disk.c
+++ b/sys/geom/geom_disk.c
@@ -58,6 +58,8 @@ __FBSDID("$FreeBSD$");
#include <dev/led/led.h>
+#include <machine/bus.h>
+
struct g_disk_softc {
struct mtx done_mtx;
struct disk *dp;
@@ -273,6 +275,145 @@ g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct t
return (error);
}
+static off_t
+g_disk_maxsize(struct disk *dp, struct bio *bp)
+{
+ if (bp->bio_cmd == BIO_DELETE)
+ return (dp->d_delmaxsize);
+ return (dp->d_maxsize);
+}
+
+static int
+g_disk_maxsegs(struct disk *dp, struct bio *bp)
+{
+ return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1);
+}
+
+static void
+g_disk_advance(struct disk *dp, struct bio *bp, off_t off)
+{
+
+ bp->bio_offset += off;
+ bp->bio_length -= off;
+
+ if ((bp->bio_flags & BIO_VLIST) != 0) {
+ bus_dma_segment_t *seg, *end;
+
+ seg = (bus_dma_segment_t *)bp->bio_data;
+ end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
+ off += bp->bio_ma_offset;
+ while (off >= seg->ds_len) {
+ KASSERT((seg != end),
+ ("vlist request runs off the end"));
+ off -= seg->ds_len;
+ seg++;
+ }
+ bp->bio_ma_offset = off;
+ bp->bio_ma_n = end - seg;
+ bp->bio_data = (void *)seg;
+ } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+ bp->bio_ma += off / PAGE_SIZE;
+ bp->bio_ma_offset += off;
+ bp->bio_ma_offset %= PAGE_SIZE;
+ bp->bio_ma_n -= off / PAGE_SIZE;
+ } else {
+ bp->bio_data += off;
+ }
+}
+
+static void
+g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset,
+ off_t *plength, int *ppages)
+{
+ uintptr_t seg_page_base;
+ uintptr_t seg_page_end;
+ off_t offset;
+ off_t length;
+ int seg_pages;
+
+ offset = *poffset;
+ length = *plength;
+
+ if (length > seg->ds_len - offset)
+ length = seg->ds_len - offset;
+
+ seg_page_base = trunc_page(seg->ds_addr + offset);
+ seg_page_end = round_page(seg->ds_addr + offset + length);
+ seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT;
+
+ if (seg_pages > *ppages) {
+ seg_pages = *ppages;
+ length = (seg_page_base + (seg_pages << PAGE_SHIFT)) -
+ (seg->ds_addr + offset);
+ }
+
+ *poffset = 0;
+ *plength -= length;
+ *ppages -= seg_pages;
+}
+
+static off_t
+g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg)
+{
+ bus_dma_segment_t *seg, *end;
+ off_t residual;
+ off_t offset;
+ int pages;
+
+ seg = (bus_dma_segment_t *)bp->bio_data;
+ end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
+ residual = bp->bio_length;
+ offset = bp->bio_ma_offset;
+ pages = g_disk_maxsegs(dp, bp);
+ while (residual != 0 && pages != 0) {
+ KASSERT((seg != end),
+ ("vlist limit runs off the end"));
+ g_disk_seg_limit(seg, &offset, &residual, &pages);
+ seg++;
+ }
+ if (pendseg != NULL)
+ *pendseg = seg;
+ return (residual);
+}
+
+static bool
+g_disk_limit(struct disk *dp, struct bio *bp)
+{
+ bool limited = false;
+ off_t maxsz;
+
+ maxsz = g_disk_maxsize(dp, bp);
+
+ /*
+ * XXX: If we have a stripesize we should really use it here.
+ * Care should be taken in the delete case if this is done
+ * as deletes can be very sensitive to size given how they
+ * are processed.
+ */
+ if (bp->bio_length > maxsz) {
+ bp->bio_length = maxsz;
+ limited = true;
+ }
+
+ if ((bp->bio_flags & BIO_VLIST) != 0) {
+ bus_dma_segment_t *firstseg, *endseg;
+ off_t residual;
+
+ firstseg = (bus_dma_segment_t*)bp->bio_data;
+ residual = g_disk_vlist_limit(dp, bp, &endseg);
+ if (residual != 0) {
+ bp->bio_ma_n = endseg - firstseg;
+ bp->bio_length -= residual;
+ limited = true;
+ }
+ } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+ bp->bio_ma_n =
+ howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE);
+ }
+
+ return (limited);
+}
+
static void
g_disk_start(struct bio *bp)
{
@@ -297,6 +438,9 @@ g_disk_start(struct bio *bp)
/* fall-through */
case BIO_READ:
case BIO_WRITE:
+ KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 ||
+ (bp->bio_flags & BIO_UNMAPPED) == 0,
+ ("unmapped bio not supported by disk %s", dp->d_name));
off = 0;
bp3 = NULL;
bp2 = g_clone_bio(bp);
@@ -304,39 +448,10 @@ g_disk_start(struct bio *bp)
error = ENOMEM;
break;
}
- do {
- off_t d_maxsize;
-
- d_maxsize = (bp->bio_cmd == BIO_DELETE) ?
- dp->d_delmaxsize : dp->d_maxsize;
- bp2->bio_offset += off;
- bp2->bio_length -= off;
- if ((bp->bio_flags & BIO_UNMAPPED) == 0) {
- bp2->bio_data += off;
- } else {
- KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO)
- != 0,
- ("unmapped bio not supported by disk %s",
- dp->d_name));
- bp2->bio_ma += off / PAGE_SIZE;
- bp2->bio_ma_offset += off;
- bp2->bio_ma_offset %= PAGE_SIZE;
- bp2->bio_ma_n -= off / PAGE_SIZE;
- }
- if (bp2->bio_length > d_maxsize) {
- /*
- * XXX: If we have a stripesize we should really
- * use it here. Care should be taken in the delete
- * case if this is done as deletes can be very
- * sensitive to size given how they are processed.
- */
- bp2->bio_length = d_maxsize;
- if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
- bp2->bio_ma_n = howmany(
- bp2->bio_ma_offset +
- bp2->bio_length, PAGE_SIZE);
- }
- off += d_maxsize;
+ for (;;) {
+ if (g_disk_limit(dp, bp2)) {
+ off += bp2->bio_length;
+
/*
* To avoid a race, we need to grab the next bio
* before we schedule this one. See "notes".
@@ -355,9 +470,14 @@ g_disk_start(struct bio *bp)
g_disk_lock_giant(dp);
dp->d_strategy(bp2);
g_disk_unlock_giant(dp);
+
+ if (bp3 == NULL)
+ break;
+
bp2 = bp3;
bp3 = NULL;
- } while (bp2 != NULL);
+ g_disk_advance(dp, bp2, off);
+ }
break;
case BIO_GETATTR:
/* Give the driver a chance to override */
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
index f1edc70..9dff151 100644
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@@ -205,11 +205,12 @@ g_clone_bio(struct bio *bp)
/*
* BIO_ORDERED flag may be used by disk drivers to enforce
* ordering restrictions, so this flag needs to be cloned.
- * BIO_UNMAPPED should be inherited, to properly indicate
- * which way the buffer is passed.
+ * BIO_UNMAPPED and BIO_VLIST should be inherited, to properly
+ * indicate which way the buffer is passed.
* Other bio flags are not suitable for cloning.
*/
- bp2->bio_flags = bp->bio_flags & (BIO_ORDERED | BIO_UNMAPPED);
+ bp2->bio_flags = bp->bio_flags &
+ (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST);
bp2->bio_length = bp->bio_length;
bp2->bio_offset = bp->bio_offset;
bp2->bio_data = bp->bio_data;
@@ -240,7 +241,7 @@ g_duplicate_bio(struct bio *bp)
struct bio *bp2;
bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
- bp2->bio_flags = bp->bio_flags & BIO_UNMAPPED;
+ bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST);
bp2->bio_parent = bp;
bp2->bio_cmd = bp->bio_cmd;
bp2->bio_length = bp->bio_length;
diff --git a/sys/ia64/include/bus.h b/sys/ia64/include/bus.h
index 966a75d3..a9b09c6 100644
--- a/sys/ia64/include/bus.h
+++ b/sys/ia64/include/bus.h
@@ -123,6 +123,7 @@
#define BUS_SPACE_UNRESTRICTED (~0)
+#ifdef _KERNEL
/*
* Map and unmap a region of device bus space into CPU virtual address space.
@@ -815,6 +816,8 @@ bus_space_copy_region_8(bus_space_tag_t bst, bus_space_handle_t sbsh,
#define bus_space_copy_region_stream_4 bus_space_copy_region_4
#define bus_space_copy_region_stream_8 bus_space_copy_region_8
+#endif /* _KERNEL */
+
#include <machine/bus_dma.h>
#endif /* _MACHINE_BUS_H_ */
diff --git a/sys/kern/subr_bus_dma.c b/sys/kern/subr_bus_dma.c
index a16d8c8..ae30276 100644
--- a/sys/kern/subr_bus_dma.c
+++ b/sys/kern/subr_bus_dma.c
@@ -54,19 +54,32 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
/*
- * Load a list of virtual addresses.
+ * Load up data starting at offset within a region specified by a
+ * list of virtual address ranges until either length or the region
+ * are exhausted.
*/
static int
_bus_dmamap_load_vlist(bus_dma_tag_t dmat, bus_dmamap_t map,
bus_dma_segment_t *list, int sglist_cnt, struct pmap *pmap, int *nsegs,
- int flags)
+ int flags, size_t offset, size_t length)
{
int error;
error = 0;
- for (; sglist_cnt > 0; sglist_cnt--, list++) {
- error = _bus_dmamap_load_buffer(dmat, map,
- (void *)(uintptr_t)list->ds_addr, list->ds_len, pmap,
+ for (; sglist_cnt > 0 && length != 0; sglist_cnt--, list++) {
+ char *addr;
+ size_t ds_len;
+
+ KASSERT((offset < list->ds_len),
+ ("Invalid mid-segment offset"));
+ addr = (char *)(uintptr_t)list->ds_addr + offset;
+ ds_len = list->ds_len - offset;
+ offset = 0;
+ if (ds_len > length)
+ ds_len = length;
+ length -= ds_len;
+ KASSERT((ds_len != 0), ("Segment length is zero"));
+ error = _bus_dmamap_load_buffer(dmat, map, addr, ds_len, pmap,
flags, NULL, nsegs);
if (error)
break;
@@ -118,22 +131,48 @@ _bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
}
/*
+ * Load tlen data starting at offset within a region specified by a list of
+ * physical pages.
+ */
+static int
+_bus_dmamap_load_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+ vm_page_t *pages, bus_size_t tlen, int offset, int *nsegs, int flags)
+{
+ vm_paddr_t paddr;
+ bus_size_t len;
+ int error, i;
+
+ for (i = 0, error = 0; error == 0 && tlen > 0; i++, tlen -= len) {
+ len = min(PAGE_SIZE - offset, tlen);
+ paddr = VM_PAGE_TO_PHYS(pages[i]) + offset;
+ error = _bus_dmamap_load_phys(dmat, map, paddr, len,
+ flags, NULL, nsegs);
+ offset = 0;
+ }
+ return (error);
+}
+
+/*
* Load from block io.
*/
static int
_bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
int *nsegs, int flags)
{
- int error;
- if ((bio->bio_flags & BIO_UNMAPPED) == 0) {
- error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data,
- bio->bio_bcount, kernel_pmap, flags, NULL, nsegs);
- } else {
- error = _bus_dmamap_load_ma(dmat, map, bio->bio_ma,
- bio->bio_bcount, bio->bio_ma_offset, flags, NULL, nsegs);
+ if ((bio->bio_flags & BIO_VLIST) != 0) {
+ bus_dma_segment_t *segs = (bus_dma_segment_t *)bio->bio_data;
+ return (_bus_dmamap_load_vlist(dmat, map, segs, bio->bio_ma_n,
+ kernel_pmap, nsegs, flags, bio->bio_ma_offset,
+ bio->bio_bcount));
}
- return (error);
+
+ if ((bio->bio_flags & BIO_UNMAPPED) != 0)
+ return (_bus_dmamap_load_pages(dmat, map, bio->bio_ma,
+ bio->bio_bcount, bio->bio_ma_offset, nsegs, flags));
+
+ return (_bus_dmamap_load_buffer(dmat, map, bio->bio_data,
+ bio->bio_bcount, kernel_pmap, flags, NULL, nsegs));
}
int
@@ -219,7 +258,7 @@ _bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb,
case CAM_DATA_SG:
error = _bus_dmamap_load_vlist(dmat, map,
(bus_dma_segment_t *)data_ptr, sglist_cnt, kernel_pmap,
- nsegs, flags);
+ nsegs, flags, 0, dxfer_len);
break;
case CAM_DATA_SG_PADDR:
error = _bus_dmamap_load_plist(dmat, map,
@@ -494,7 +533,7 @@ bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map,
break;
case MEMDESC_VLIST:
error = _bus_dmamap_load_vlist(dmat, map, mem->u.md_list,
- mem->md_opaque, kernel_pmap, &nsegs, flags);
+ mem->md_opaque, kernel_pmap, &nsegs, flags, 0, SIZE_T_MAX);
break;
case MEMDESC_PLIST:
error = _bus_dmamap_load_plist(dmat, map, mem->u.md_list,
diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c
index 87892fd..3712f92 100644
--- a/sys/kern/subr_uio.c
+++ b/sys/kern/subr_uio.c
@@ -62,6 +62,8 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pageout.h>
#include <vm/vm_map.h>
+#include <machine/bus.h>
+
SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, UIO_MAXIOV,
"Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
@@ -136,6 +138,58 @@ physcopyout(vm_paddr_t src, void *dst, size_t len)
#undef PHYS_PAGE_COUNT
int
+physcopyin_vlist(bus_dma_segment_t *src, off_t offset, vm_paddr_t dst,
+ size_t len)
+{
+ size_t seg_len;
+ int error;
+
+ error = 0;
+ while (offset >= src->ds_len) {
+ offset -= src->ds_len;
+ src++;
+ }
+
+ while (len > 0 && error == 0) {
+ seg_len = MIN(src->ds_len - offset, len);
+ error = physcopyin((void *)(uintptr_t)(src->ds_addr + offset),
+ dst, seg_len);
+ offset = 0;
+ src++;
+ len -= seg_len;
+ dst += seg_len;
+ }
+
+ return (error);
+}
+
+int
+physcopyout_vlist(vm_paddr_t src, bus_dma_segment_t *dst, off_t offset,
+ size_t len)
+{
+ size_t seg_len;
+ int error;
+
+ error = 0;
+ while (offset >= dst->ds_len) {
+ offset -= dst->ds_len;
+ dst++;
+ }
+
+ while (len > 0 && error == 0) {
+ seg_len = MIN(dst->ds_len - offset, len);
+ error = physcopyout(src, (void *)(uintptr_t)(dst->ds_addr +
+ offset), seg_len);
+ offset = 0;
+ dst++;
+ len -= seg_len;
+ src += seg_len;
+ }
+
+ return (error);
+}
+
+int
uiomove(void *cp, int n, struct uio *uio)
{
diff --git a/sys/pc98/include/bus.h b/sys/pc98/include/bus.h
index 3292474..2060414 100644
--- a/sys/pc98/include/bus.h
+++ b/sys/pc98/include/bus.h
@@ -78,7 +78,9 @@
#ifndef _PC98_BUS_H_
#define _PC98_BUS_H_
+#ifdef _KERNEL
#include <sys/systm.h>
+#endif /* _KERNEL */
#include <machine/_bus.h>
#include <machine/cpufunc.h>
@@ -92,6 +94,8 @@
#define BUS_SPACE_UNRESTRICTED (~0)
+#ifdef _KERNEL
+
/*
* address relocation table
*/
@@ -639,4 +643,6 @@ bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh,
#define bus_space_copy_region_stream_4(t, h1, o1, h2, o2, c) \
bus_space_copy_region_4((t), (h1), (o1), (h2), (o2), (c))
+#endif /* _KERNEL */
+
#endif /* _PC98_BUS_H_ */
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 535ce61..8b3a5fc 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -61,6 +61,7 @@
#define BIO_ORDERED 0x08
#define BIO_UNMAPPED 0x10
#define BIO_TRANSIENT_MAPPING 0x20
+#define BIO_VLIST 0x40
#ifdef _KERNEL
struct disk;
diff --git a/sys/sys/uio.h b/sys/sys/uio.h
index 271a2f7..ff21b09 100644
--- a/sys/sys/uio.h
+++ b/sys/sys/uio.h
@@ -85,6 +85,7 @@ struct uio {
struct vm_object;
struct vm_page;
+struct bus_dma_segment;
struct uio *cloneuio(struct uio *uiop);
int copyinfrom(const void * __restrict src, void * __restrict dst,
@@ -98,6 +99,10 @@ int copyout_map(struct thread *td, vm_offset_t *addr, size_t sz);
int copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz);
int physcopyin(void *src, vm_paddr_t dst, size_t len);
int physcopyout(vm_paddr_t src, void *dst, size_t len);
+int physcopyin_vlist(struct bus_dma_segment *src, off_t offset,
+ vm_paddr_t dst, size_t len);
+int physcopyout_vlist(vm_paddr_t src, struct bus_dma_segment *dst,
+ off_t offset, size_t len);
int uiomove(void *cp, int n, struct uio *uio);
int uiomove_frombuf(void *buf, int buflen, struct uio *uio);
int uiomove_fromphys(struct vm_page *ma[], vm_offset_t offset, int n,
diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile
index 57effb8..8e97961 100644
--- a/usr.sbin/Makefile
+++ b/usr.sbin/Makefile
@@ -7,6 +7,7 @@ SUBDIR= adduser \
arp \
binmiscctl \
bsdconfig \
+ camdd \
cdcontrol \
chkgrp \
chown \
diff --git a/usr.sbin/camdd/Makefile b/usr.sbin/camdd/Makefile
new file mode 100644
index 0000000..0028668
--- /dev/null
+++ b/usr.sbin/camdd/Makefile
@@ -0,0 +1,11 @@
+# $FreeBSD$
+
+PROG= camdd
+SRCS= camdd.c
+SDIR= ${.CURDIR}/../../sys
+DPADD= ${LIBCAM} ${LIBMT} ${LIBSBUF} ${LIBBSDXML} ${LIBUTIL} ${LIBTHR}
+LDADD= -lcam -lmt -lsbuf -lbsdxml -lutil -lthr
+NO_WTHREAD_SAFETY= 1
+MAN= camdd.8
+
+.include <bsd.prog.mk>
diff --git a/usr.sbin/camdd/camdd.8 b/usr.sbin/camdd/camdd.8
new file mode 100644
index 0000000..af556bb
--- /dev/null
+++ b/usr.sbin/camdd/camdd.8
@@ -0,0 +1,283 @@
+.\"
+.\" Copyright (c) 2015 Spectra Logic Corporation
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions, and the following disclaimer,
+.\" without modification.
+.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer
+.\" substantially similar to the "NO WARRANTY" disclaimer below
+.\" ("Disclaimer") and any redistribution must be conditioned upon
+.\" including a substantially similar Disclaimer requirement for further
+.\" binary redistribution.
+.\"
+.\" NO WARRANTY
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGES.
+.\"
+.\" Authors: Ken Merry (Spectra Logic Corporation)
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 11, 2015
+.Dt CAMDD 8
+.Os
+.Sh NAME
+.Nm camdd
+.Nd CAM data transfer utility
+.Sh SYNOPSIS
+.Nm
+.Aq Fl i|o Ar pass=pass_dev|file=filename,bs=blocksize,[...]
+.Op Fl C Ar retry_count
+.Op Fl E
+.Op Fl m Ar max_io
+.Op Fl t Ar timeout
+.Op Fl v
+.Op Fl h
+.Sh DESCRIPTION
+The
+.Nm
+utility is a sequential data transfer utility that offers standard
+.Xr read 2
+and
+.Xr write 2
+operation in addition to a mode that uses the asynchronous
+.Xr pass 4
+API.
+The asynchronous
+.Xr pass 4
+API allows multiple requests to be queued to a device simultaneously.
+.Pp
+.Nm
+collects performance information and will display it when the transfer
+completes, when
+.Nm
+is terminated or when it receives a SIGINFO signal.
+.Pp
+The following options are available:
+.Bl -tag -width 12n
+.It Fl i | o Ar args
+Specify the input and output device or file.
+Both
+.Fl i
+and
+.Fl o
+must be specified.
+There are a number of parameters that can be specified.
+One of the first two (file or pass) MUST be specified to indicate which I/O
+method to use on the device in question.
+.Bl -tag -width 9n
+.It pass=dev
+Specify a
+.Xr pass 4
+device to operate on.
+This requests that
+.Nm
+access the device in question be accessed via the asynchronous
+.Xr pass 4
+interface.
+.Pp
+The device name can be a
+.Xr pass 4
+name and unit number, for instance
+.Dq pass0 ,
+or a regular peripheral driver name and unit number, for instance
+.Dq da5 .
+It can also be the path of a
+.Xr pass 4
+or other disk device, like
+.Dq /dev/da5 .
+It may also be a bus:target:lun, for example:
+.Dq 0:5:0 .
+.Pp
+Only
+.Xr pass 4
+devices for
+.Tn SCSI
+disk-like devices are supported.
+.Tn ATA
+devices are not currently supported, but support could be added later.
+Specifically,
+.Tn SCSI
+Direct Access (type 0), WORM (type 4), CDROM (type 5), and RBC (Reduced
+Block Command, type 14) devices are supported.
+Tape drives, medium changers, enclosures etc. are not supported.
+.It file=path
+Specify a file or device to operate on.
+This requests that the file or device in question be accessed using the
+standard
+.Xr read 2
+and
+.Xr write 2
+system calls.
+The file interface does not support queueing multiple commands at a time.
+It does support probing disk sector size and capacity information, and tape
+blocksize and maximum transfer size information.
+The file interface supports standard files, disks, tape drives, special
+devices, pipes and standard input and output.
+If the file is specified as a
+.Dq - ,
+standard input or standard output are used.
+For tape devices, the specified blocksize will be the size that
+.Nm
+attempts to use to write to or read from the tape.
+When writing to a tape device, the blocksize is treated like a disk sector
+size.
+So, that means
+.Nm
+will not write anything smaller than the sector size.
+At the end of a transfer, if there isn't sufficient data from the reader
+to yield a full block,
+.Nm
+will add zeros on the end of the data from the reader to make up a full
+block.
+.It bs=N
+Specify the blocksize to use for transfers.
+.Nm
+will attempt to read or write using the requested blocksize.
+.Pp
+Note that the blocksize given only applies to either the input or the
+output path.
+To use the same blocksize for the input and output transfers, you must
+specify that blocksize with both the
+.Fl i
+and
+.Fl o
+arguments.
+.Pp
+The blocksize may be specified in bytes, or using any suffix (e.g. k, M, G)
+supported by
+.Xr expand_number 3 .
+.It offset=N
+Specify the starting offset for the input or output device or file.
+The offset may be specified in bytes, or by using any suffix (e.g. k, M, G)
+supported by
+.Xr expand_number 3 .
+.It depth=N
+Specify a desired queue depth for the input or output path.
+.Nm
+will attempt to keep the requested number of requests of the specified
+blocksize queued to the input or output device.
+Queue depths greater than 1 are only supported for the asynchronous
+.Xr pass 4
+output method.
+The queue depth is maintained on a best effort basis, and may not be
+possible to maintain for especially fast devices.
+For writes, maintaining the queue depth also depends on a sufficiently
+fast reading device.
+.It mcs=N
+Specify the minimum command size to use for
+.Xr pass 4
+devices.
+Some devices do not support 6 byte
+.Tn SCSI
+commands.
+The
+.Xr da 4
+device handles this restriction automatically, but the
+.Xr pass 4
+device allows the user to specify the
+.Tn SCSI
+command used.
+If a device does not accept 6 byte
+.Tn SCSI
+READ/WRITE commands (which is the default at lower LBAs), it will generally
+accept 10 byte
+.Tn SCSI
+commands instead.
+.It debug=N
+Specify the debug level for this device.
+There is currently only one debug level setting, so setting this to any
+non-zero value will turn on debugging.
+The debug facility may be expanded in the future.
+.El
+.It Fl C Ar count
+Specify the retry count for commands sent via the asynchronous
+.Xr pass 4
+interface.
+This does not apply to commands sent via the file interface.
+.It Fl E
+Enable kernel error recovery for the
+.Xr pass 4
+driver.
+If error recovery is not enabled, unit attention conditions and other
+transient failures may cause the transfer to fail.
+.It Fl m Ar size
+Specify the maximum amount of data to be transferred.
+This may be specified in bytes, or by using any suffix (e.g. K, M, G)
+supported by
+.Xr expand_number 3 .
+.It Fl t Ar timeout
+Specify the command timeout in seconds to use for commands sent via the
+.Xr pass 4
+driver.
+.It Fl v
+Enable verbose reporting of errors.
+This is recommended to aid in debugging any
+.Tn SCSI
+issues that come up.
+.It Fl h
+Display the
+.Nm
+usage message.
+.El
+.Pp
+If
+.Nm
+receives a SIGINFO signal, it will print the current input and output byte
+counts, elapsed runtime and average throughput.
+If
+.Nm
+receives a SIGINT signal, it will print the current input and output byte
+counts, elapsed runtime and average throughput and then exit.
+.Sh EXAMPLES
+.Dl camdd -i pass=da8,bs=512k,depth=4 -o pass=da3,bs=512k,depth=4
+.Pp
+Copy all data from da8 to da3 using a blocksize of 512k for both drives,
+and attempt to maintain a queue depth of 4 on both the input and output
+devices.
+The transfer will stop when the end of either device is reached.
+.Pp
+.Dl camdd -i file=/dev/zero,bs=1M -o pass=da5,bs=1M,depth=4 -m 100M
+.Pp
+Read 1MB blocks of zeros from /dev/zero, and write them to da5 with a
+desired queue depth of 4.
+Stop the transfer after 100MB has been written.
+.Pp
+.Dl camdd -i pass=da8,bs=1M,depth=3 -o file=disk.img
+.Pp
+Copy disk da8 using a 1MB blocksize and desired queue depth of 3 to the
+file disk.img.
+.Pp
+.Dl camdd -i file=/etc/rc -o file=-
+.Pp
+Read the file /etc/rc and write it to standard output.
+.Pp
+.Dl camdd -i pass=da10,bs=64k,depth=16 -o file=/dev/nsa0,bs=128k
+.Pp
+Copy 64K blocks from the disk da10 with a queue depth of 16, and write
+to the tape drive sa0 with a 128k blocksize.
+The copy will stop when either the end of the disk or tape is reached.
+.Sh SEE ALSO
+.Xr cam 3 ,
+.Xr cam 4 ,
+.Xr pass 4 ,
+.Xr camcontrol 8
+.Sh HISTORY
+.Nm
+first appeared in
+.Fx 10.2
+.Sh AUTHORS
+.An Kenneth Merry Aq Mt ken@FreeBSD.org
diff --git a/usr.sbin/camdd/camdd.c b/usr.sbin/camdd/camdd.c
new file mode 100644
index 0000000..573214e
--- /dev/null
+++ b/usr.sbin/camdd/camdd.c
@@ -0,0 +1,3428 @@
+/*-
+ * Copyright (c) 1997-2007 Kenneth D. Merry
+ * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Ken Merry (Spectra Logic Corporation)
+ */
+
+/*
+ * This is eventually intended to be:
+ * - A basic data transfer/copy utility
+ * - A simple benchmark utility
+ * - An example of how to use the asynchronous pass(4) driver interface.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/ioctl.h>
+#include <sys/stdint.h>
+#include <sys/types.h>
+#include <sys/endian.h>
+#include <sys/param.h>
+#include <sys/sbuf.h>
+#include <sys/stat.h>
+#include <sys/event.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <vm/vm.h>
+#include <machine/bus.h>
+#include <sys/bus.h>
+#include <sys/bus_dma.h>
+#include <sys/mtio.h>
+#include <sys/conf.h>
+#include <sys/disk.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <semaphore.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <err.h>
+#include <libutil.h>
+#include <pthread.h>
+#include <assert.h>
+#include <bsdxml.h>
+
+#include <cam/cam.h>
+#include <cam/cam_debug.h>
+#include <cam/cam_ccb.h>
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_da.h>
+#include <cam/scsi/scsi_pass.h>
+#include <cam/scsi/scsi_message.h>
+#include <cam/scsi/smp_all.h>
+#include <camlib.h>
+#include <mtlib.h>
+#include <zlib.h>
+
+typedef enum {
+ CAMDD_CMD_NONE = 0x00000000,
+ CAMDD_CMD_HELP = 0x00000001,
+ CAMDD_CMD_WRITE = 0x00000002,
+ CAMDD_CMD_READ = 0x00000003
+} camdd_cmdmask;
+
+typedef enum {
+ CAMDD_ARG_NONE = 0x00000000,
+ CAMDD_ARG_VERBOSE = 0x00000001,
+ CAMDD_ARG_DEVICE = 0x00000002,
+ CAMDD_ARG_BUS = 0x00000004,
+ CAMDD_ARG_TARGET = 0x00000008,
+ CAMDD_ARG_LUN = 0x00000010,
+ CAMDD_ARG_UNIT = 0x00000020,
+ CAMDD_ARG_TIMEOUT = 0x00000040,
+ CAMDD_ARG_ERR_RECOVER = 0x00000080,
+ CAMDD_ARG_RETRIES = 0x00000100
+} camdd_argmask;
+
+typedef enum {
+ CAMDD_DEV_NONE = 0x00,
+ CAMDD_DEV_PASS = 0x01,
+ CAMDD_DEV_FILE = 0x02
+} camdd_dev_type;
+
+struct camdd_io_opts {
+ camdd_dev_type dev_type;
+ char *dev_name;
+ uint64_t blocksize;
+ uint64_t queue_depth;
+ uint64_t offset;
+ int min_cmd_size;
+ int write_dev;
+ uint64_t debug;
+};
+
+typedef enum {
+ CAMDD_BUF_NONE,
+ CAMDD_BUF_DATA,
+ CAMDD_BUF_INDIRECT
+} camdd_buf_type;
+
+struct camdd_buf_indirect {
+ /*
+ * Pointer to the source buffer.
+ */
+ struct camdd_buf *src_buf;
+
+ /*
+ * Offset into the source buffer, in bytes.
+ */
+ uint64_t offset;
+ /*
+ * Pointer to the starting point in the source buffer.
+ */
+ uint8_t *start_ptr;
+
+ /*
+ * Length of this chunk in bytes.
+ */
+ size_t len;
+};
+
+struct camdd_buf_data {
+ /*
+ * Buffer allocated when we allocate this camdd_buf. This should
+ * be the size of the blocksize for this device.
+ */
+ uint8_t *buf;
+
+ /*
+ * The amount of backing store allocated in buf. Generally this
+ * will be the blocksize of the device.
+ */
+ uint32_t alloc_len;
+
+ /*
+ * The amount of data that was put into the buffer (on reads) or
+ * the amount of data we have put onto the src_list so far (on
+ * writes).
+ */
+ uint32_t fill_len;
+
+ /*
+ * The amount of data that was not transferred.
+ */
+ uint32_t resid;
+
+ /*
+ * Starting byte offset on the reader.
+ */
+ uint64_t src_start_offset;
+
+ /*
+ * CCB used for pass(4) device targets.
+ */
+ union ccb ccb;
+
+ /*
+ * Number of scatter/gather segments.
+ */
+ int sg_count;
+
+ /*
+ * Set if we had to tack on an extra buffer to round the transfer
+ * up to a sector size.
+ */
+ int extra_buf;
+
+ /*
+ * Scatter/gather list used generally when we're the writer for a
+ * pass(4) device.
+ */
+ bus_dma_segment_t *segs;
+
+ /*
+ * Scatter/gather list used generally when we're the writer for a
+ * file or block device;
+ */
+ struct iovec *iovec;
+};
+
+union camdd_buf_types {
+ struct camdd_buf_indirect indirect;
+ struct camdd_buf_data data;
+};
+
+typedef enum {
+ CAMDD_STATUS_NONE,
+ CAMDD_STATUS_OK,
+ CAMDD_STATUS_SHORT_IO,
+ CAMDD_STATUS_EOF,
+ CAMDD_STATUS_ERROR
+} camdd_buf_status;
+
+struct camdd_buf {
+ camdd_buf_type buf_type;
+ union camdd_buf_types buf_type_spec;
+
+ camdd_buf_status status;
+
+ uint64_t lba;
+ size_t len;
+
+ /*
+ * A reference count of how many indirect buffers point to this
+ * buffer.
+ */
+ int refcount;
+
+ /*
+ * A link back to our parent device.
+ */
+ struct camdd_dev *dev;
+ STAILQ_ENTRY(camdd_buf) links;
+ STAILQ_ENTRY(camdd_buf) work_links;
+
+ /*
+ * A count of the buffers on the src_list.
+ */
+ int src_count;
+
+ /*
+ * List of buffers from our partner thread that are the components
+ * of this buffer for the I/O. Uses src_links.
+ */
+ STAILQ_HEAD(,camdd_buf) src_list;
+ STAILQ_ENTRY(camdd_buf) src_links;
+};
+
+#define NUM_DEV_TYPES 2
+
+struct camdd_dev_pass {
+ int scsi_dev_type;
+ struct cam_device *dev;
+ uint64_t max_sector;
+ uint32_t block_len;
+ uint32_t cpi_maxio;
+};
+
+typedef enum {
+ CAMDD_FILE_NONE,
+ CAMDD_FILE_REG,
+ CAMDD_FILE_STD,
+ CAMDD_FILE_PIPE,
+ CAMDD_FILE_DISK,
+ CAMDD_FILE_TAPE,
+ CAMDD_FILE_TTY,
+ CAMDD_FILE_MEM
+} camdd_file_type;
+
+typedef enum {
+ CAMDD_FF_NONE = 0x00,
+ CAMDD_FF_CAN_SEEK = 0x01
+} camdd_file_flags;
+
+struct camdd_dev_file {
+ int fd;
+ struct stat sb;
+ char filename[MAXPATHLEN + 1];
+ camdd_file_type file_type;
+ camdd_file_flags file_flags;
+ uint8_t *tmp_buf;
+};
+
+struct camdd_dev_block {
+ int fd;
+ uint64_t size_bytes;
+ uint32_t block_len;
+};
+
+union camdd_dev_spec {
+ struct camdd_dev_pass pass;
+ struct camdd_dev_file file;
+ struct camdd_dev_block block;
+};
+
+typedef enum {
+ CAMDD_DEV_FLAG_NONE = 0x00,
+ CAMDD_DEV_FLAG_EOF = 0x01,
+ CAMDD_DEV_FLAG_PEER_EOF = 0x02,
+ CAMDD_DEV_FLAG_ACTIVE = 0x04,
+ CAMDD_DEV_FLAG_EOF_SENT = 0x08,
+ CAMDD_DEV_FLAG_EOF_QUEUED = 0x10
+} camdd_dev_flags;
+
+struct camdd_dev {
+ camdd_dev_type dev_type;
+ union camdd_dev_spec dev_spec;
+ camdd_dev_flags flags;
+ char device_name[MAXPATHLEN+1];
+ uint32_t blocksize;
+ uint32_t sector_size;
+ uint64_t max_sector;
+ uint64_t sector_io_limit;
+ int min_cmd_size;
+ int write_dev;
+ int retry_count;
+ int io_timeout;
+ int debug;
+ uint64_t start_offset_bytes;
+ uint64_t next_io_pos_bytes;
+ uint64_t next_peer_pos_bytes;
+ uint64_t next_completion_pos_bytes;
+ uint64_t peer_bytes_queued;
+ uint64_t bytes_transferred;
+ uint32_t target_queue_depth;
+ uint32_t cur_active_io;
+ uint8_t *extra_buf;
+ uint32_t extra_buf_len;
+ struct camdd_dev *peer_dev;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int kq;
+
+ int (*run)(struct camdd_dev *dev);
+ int (*fetch)(struct camdd_dev *dev);
+
+ /*
+ * Buffers that are available for I/O. Uses links.
+ */
+ STAILQ_HEAD(,camdd_buf) free_queue;
+
+ /*
+ * Free indirect buffers. These are used for breaking a large
+ * buffer into multiple pieces.
+ */
+ STAILQ_HEAD(,camdd_buf) free_indirect_queue;
+
+ /*
+ * Buffers that have been queued to the kernel. Uses links.
+ */
+ STAILQ_HEAD(,camdd_buf) active_queue;
+
+ /*
+ * Will generally contain one of our buffers that is waiting for enough
+ * I/O from our partner thread to be able to execute. This will
+ * generally happen when our per-I/O-size is larger than the
+ * partner thread's per-I/O-size. Uses links.
+ */
+ STAILQ_HEAD(,camdd_buf) pending_queue;
+
+ /*
+ * Number of buffers on the pending queue
+ */
+ int num_pending_queue;
+
+ /*
+ * Buffers that are filled and ready to execute. This is used when
+ * our partner (reader) thread sends us blocks that are larger than
+ * our blocksize, and so we have to split them into multiple pieces.
+ */
+ STAILQ_HEAD(,camdd_buf) run_queue;
+
+ /*
+ * Number of buffers on the run queue.
+ */
+ int num_run_queue;
+
+ STAILQ_HEAD(,camdd_buf) reorder_queue;
+
+ int num_reorder_queue;
+
+ /*
+ * Buffers that have been queued to us by our partner thread
+ * (generally the reader thread) to be written out. Uses
+ * work_links.
+ */
+ STAILQ_HEAD(,camdd_buf) work_queue;
+
+ /*
+ * Buffers that have been completed by our partner thread. Uses
+ * work_links.
+ */
+ STAILQ_HEAD(,camdd_buf) peer_done_queue;
+
+ /*
+ * Number of buffers on the peer done queue.
+ */
+ uint32_t num_peer_done_queue;
+
+ /*
+ * A list of buffers that we have queued to our peer thread. Uses
+ * links.
+ */
+ STAILQ_HEAD(,camdd_buf) peer_work_queue;
+
+ /*
+ * Number of buffers on the peer work queue.
+ */
+ uint32_t num_peer_work_queue;
+};
+
+static sem_t camdd_sem;
+static int need_exit = 0;
+static int error_exit = 0;
+static int need_status = 0;
+
+#ifndef min
+#define min(a, b) (a < b) ? a : b
+#endif
+
+/*
+ * XXX KDM private copy of timespecsub(). This is normally defined in
+ * sys/time.h, but is only enabled in the kernel. If that definition is
+ * enabled in userland, it breaks the build of libnetbsd.
+ */
+#ifndef timespecsub
+#define timespecsub(vvp, uvp) \
+ do { \
+ (vvp)->tv_sec -= (uvp)->tv_sec; \
+ (vvp)->tv_nsec -= (uvp)->tv_nsec; \
+ if ((vvp)->tv_nsec < 0) { \
+ (vvp)->tv_sec--; \
+ (vvp)->tv_nsec += 1000000000; \
+ } \
+ } while (0)
+#endif
+
+
+/* Generically usefull offsets into the peripheral private area */
+#define ppriv_ptr0 periph_priv.entries[0].ptr
+#define ppriv_ptr1 periph_priv.entries[1].ptr
+#define ppriv_field0 periph_priv.entries[0].field
+#define ppriv_field1 periph_priv.entries[1].field
+
+#define ccb_buf ppriv_ptr0
+
+#define CAMDD_FILE_DEFAULT_BLOCK 524288
+#define CAMDD_FILE_DEFAULT_DEPTH 1
+#define CAMDD_PASS_MAX_BLOCK 1048576
+#define CAMDD_PASS_DEFAULT_DEPTH 6
+#define CAMDD_PASS_RW_TIMEOUT 60 * 1000
+
+static int parse_btl(char *tstr, int *bus, int *target, int *lun,
+ camdd_argmask *arglst);
+void camdd_free_dev(struct camdd_dev *dev);
+struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
+ struct kevent *new_ke, int num_ke,
+ int retry_count, int timeout);
+static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
+ camdd_buf_type buf_type);
+void camdd_release_buf(struct camdd_buf *buf);
+struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
+int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
+ uint32_t sector_size, uint32_t *num_sectors_used,
+ int *double_buf_needed);
+uint32_t camdd_buf_get_len(struct camdd_buf *buf);
+void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
+int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
+ uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
+struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
+ int retry_count, int timeout);
+struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
+ struct camdd_io_opts *io_opts,
+ camdd_argmask arglist, int probe_retry_count,
+ int probe_timeout, int io_retry_count,
+ int io_timeout);
+void *camdd_file_worker(void *arg);
+camdd_buf_status camdd_ccb_status(union ccb *ccb);
+int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
+int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
+void camdd_peer_done(struct camdd_buf *buf);
+void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
+ int *error_count);
+int camdd_pass_fetch(struct camdd_dev *dev);
+int camdd_file_run(struct camdd_dev *dev);
+int camdd_pass_run(struct camdd_dev *dev);
+int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
+int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
+void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
+ uint32_t *peer_depth, uint32_t *our_bytes,
+ uint32_t *peer_bytes);
+void *camdd_worker(void *arg);
+void camdd_sig_handler(int sig);
+void camdd_print_status(struct camdd_dev *camdd_dev,
+ struct camdd_dev *other_dev,
+ struct timespec *start_time);
+int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
+ uint64_t max_io, int retry_count, int timeout);
+int camdd_parse_io_opts(char *args, int is_write,
+ struct camdd_io_opts *io_opts);
+void usage(void);
+
+/*
+ * Parse out a bus, or a bus, target and lun in the following
+ * format:
+ * bus
+ * bus:target
+ * bus:target:lun
+ *
+ * Returns the number of parsed components, or 0.
+ */
+static int
+parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
+{
+ char *tmpstr;
+ int convs = 0;
+
+ while (isspace(*tstr) && (*tstr != '\0'))
+ tstr++;
+
+ tmpstr = (char *)strtok(tstr, ":");
+ if ((tmpstr != NULL) && (*tmpstr != '\0')) {
+ *bus = strtol(tmpstr, NULL, 0);
+ *arglst |= CAMDD_ARG_BUS;
+ convs++;
+ tmpstr = (char *)strtok(NULL, ":");
+ if ((tmpstr != NULL) && (*tmpstr != '\0')) {
+ *target = strtol(tmpstr, NULL, 0);
+ *arglst |= CAMDD_ARG_TARGET;
+ convs++;
+ tmpstr = (char *)strtok(NULL, ":");
+ if ((tmpstr != NULL) && (*tmpstr != '\0')) {
+ *lun = strtol(tmpstr, NULL, 0);
+ *arglst |= CAMDD_ARG_LUN;
+ convs++;
+ }
+ }
+ }
+
+ return convs;
+}
+
+/*
+ * XXX KDM clean up and free all of the buffers on the queue!
+ */
+void
+camdd_free_dev(struct camdd_dev *dev)
+{
+ if (dev == NULL)
+ return;
+
+ switch (dev->dev_type) {
+ case CAMDD_DEV_FILE: {
+ struct camdd_dev_file *file_dev = &dev->dev_spec.file;
+
+ if (file_dev->fd != -1)
+ close(file_dev->fd);
+ free(file_dev->tmp_buf);
+ break;
+ }
+ case CAMDD_DEV_PASS: {
+ struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
+
+ if (pass_dev->dev != NULL)
+ cam_close_device(pass_dev->dev);
+ break;
+ }
+ default:
+ break;
+ }
+
+ free(dev);
+}
+
+struct camdd_dev *
+camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
+ int retry_count, int timeout)
+{
+ struct camdd_dev *dev = NULL;
+ struct kevent *ke;
+ size_t ke_size;
+ int retval = 0;
+
+ dev = malloc(sizeof(*dev));
+ if (dev == NULL) {
+ warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
+ goto bailout;
+ }
+
+ bzero(dev, sizeof(*dev));
+
+ dev->dev_type = dev_type;
+ dev->io_timeout = timeout;
+ dev->retry_count = retry_count;
+ STAILQ_INIT(&dev->free_queue);
+ STAILQ_INIT(&dev->free_indirect_queue);
+ STAILQ_INIT(&dev->active_queue);
+ STAILQ_INIT(&dev->pending_queue);
+ STAILQ_INIT(&dev->run_queue);
+ STAILQ_INIT(&dev->reorder_queue);
+ STAILQ_INIT(&dev->work_queue);
+ STAILQ_INIT(&dev->peer_done_queue);
+ STAILQ_INIT(&dev->peer_work_queue);
+ retval = pthread_mutex_init(&dev->mutex, NULL);
+ if (retval != 0) {
+ warnc(retval, "%s: failed to initialize mutex", __func__);
+ goto bailout;
+ }
+
+ retval = pthread_cond_init(&dev->cond, NULL);
+ if (retval != 0) {
+ warnc(retval, "%s: failed to initialize condition variable",
+ __func__);
+ goto bailout;
+ }
+
+ dev->kq = kqueue();
+ if (dev->kq == -1) {
+ warn("%s: Unable to create kqueue", __func__);
+ goto bailout;
+ }
+
+ ke_size = sizeof(struct kevent) * (num_ke + 4);
+ ke = malloc(ke_size);
+ if (ke == NULL) {
+ warn("%s: unable to malloc %zu bytes", __func__, ke_size);
+ goto bailout;
+ }
+ bzero(ke, ke_size);
+ if (num_ke > 0)
+ bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
+
+ EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
+ EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
+ EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
+ EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
+ EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
+ EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
+
+ retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
+ if (retval == -1) {
+ warn("%s: Unable to register kevents", __func__);
+ goto bailout;
+ }
+
+
+ return (dev);
+
+bailout:
+ free(dev);
+
+ return (NULL);
+}
+
+static struct camdd_buf *
+camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
+{
+ struct camdd_buf *buf = NULL;
+ uint8_t *data_ptr = NULL;
+
+ /*
+ * We only need to allocate data space for data buffers.
+ */
+ switch (buf_type) {
+ case CAMDD_BUF_DATA:
+ data_ptr = malloc(dev->blocksize);
+ if (data_ptr == NULL) {
+ warn("unable to allocate %u bytes", dev->blocksize);
+ goto bailout_error;
+ }
+ break;
+ default:
+ break;
+ }
+
+ buf = malloc(sizeof(*buf));
+ if (buf == NULL) {
+ warn("unable to allocate %zu bytes", sizeof(*buf));
+ goto bailout_error;
+ }
+
+ bzero(buf, sizeof(*buf));
+ buf->buf_type = buf_type;
+ buf->dev = dev;
+ switch (buf_type) {
+ case CAMDD_BUF_DATA: {
+ struct camdd_buf_data *data;
+
+ data = &buf->buf_type_spec.data;
+
+ data->alloc_len = dev->blocksize;
+ data->buf = data_ptr;
+ break;
+ }
+ case CAMDD_BUF_INDIRECT:
+ break;
+ default:
+ break;
+ }
+ STAILQ_INIT(&buf->src_list);
+
+ return (buf);
+
+bailout_error:
+ if (data_ptr != NULL)
+ free(data_ptr);
+
+ if (buf != NULL)
+ free(buf);
+
+ return (NULL);
+}
+
+void
+camdd_release_buf(struct camdd_buf *buf)
+{
+ struct camdd_dev *dev;
+
+ dev = buf->dev;
+
+ switch (buf->buf_type) {
+ case CAMDD_BUF_DATA: {
+ struct camdd_buf_data *data;
+
+ data = &buf->buf_type_spec.data;
+
+ if (data->segs != NULL) {
+ if (data->extra_buf != 0) {
+ void *extra_buf;
+
+ extra_buf = (void *)
+ data->segs[data->sg_count - 1].ds_addr;
+ free(extra_buf);
+ data->extra_buf = 0;
+ }
+ free(data->segs);
+ data->segs = NULL;
+ data->sg_count = 0;
+ } else if (data->iovec != NULL) {
+ if (data->extra_buf != 0) {
+ free(data->iovec[data->sg_count - 1].iov_base);
+ data->extra_buf = 0;
+ }
+ free(data->iovec);
+ data->iovec = NULL;
+ data->sg_count = 0;
+ }
+ STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
+ break;
+ }
+ case CAMDD_BUF_INDIRECT:
+ STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
+ break;
+ default:
+ err(1, "%s: Invalid buffer type %d for released buffer",
+ __func__, buf->buf_type);
+ break;
+ }
+}
+
+struct camdd_buf *
+camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
+{
+ struct camdd_buf *buf = NULL;
+
+ switch (buf_type) {
+ case CAMDD_BUF_DATA:
+ buf = STAILQ_FIRST(&dev->free_queue);
+ if (buf != NULL) {
+ struct camdd_buf_data *data;
+ uint8_t *data_ptr;
+ uint32_t alloc_len;
+
+ STAILQ_REMOVE_HEAD(&dev->free_queue, links);
+ data = &buf->buf_type_spec.data;
+ data_ptr = data->buf;
+ alloc_len = data->alloc_len;
+ bzero(buf, sizeof(*buf));
+ data->buf = data_ptr;
+ data->alloc_len = alloc_len;
+ }
+ break;
+ case CAMDD_BUF_INDIRECT:
+ buf = STAILQ_FIRST(&dev->free_indirect_queue);
+ if (buf != NULL) {
+ STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
+
+ bzero(buf, sizeof(*buf));
+ }
+ break;
+ default:
+ warnx("Unknown buffer type %d requested", buf_type);
+ break;
+ }
+
+
+ if (buf == NULL)
+ return (camdd_alloc_buf(dev, buf_type));
+ else {
+ STAILQ_INIT(&buf->src_list);
+ buf->dev = dev;
+ buf->buf_type = buf_type;
+
+ return (buf);
+ }
+}
+
+int
+camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
+ uint32_t *num_sectors_used, int *double_buf_needed)
+{
+ struct camdd_buf *tmp_buf;
+ struct camdd_buf_data *data;
+ uint8_t *extra_buf = NULL;
+ size_t extra_buf_len = 0;
+ int i, retval = 0;
+
+ data = &buf->buf_type_spec.data;
+
+ data->sg_count = buf->src_count;
+ /*
+ * Compose a scatter/gather list from all of the buffers in the list.
+ * If the length of the buffer isn't a multiple of the sector size,
+ * we'll have to add an extra buffer. This should only happen
+ * at the end of a transfer.
+ */
+ if ((data->fill_len % sector_size) != 0) {
+ extra_buf_len = sector_size - (data->fill_len % sector_size);
+ extra_buf = calloc(extra_buf_len, 1);
+ if (extra_buf == NULL) {
+ warn("%s: unable to allocate %zu bytes for extra "
+ "buffer space", __func__, extra_buf_len);
+ retval = 1;
+ goto bailout;
+ }
+ data->extra_buf = 1;
+ data->sg_count++;
+ }
+ if (iovec == 0) {
+ data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
+ if (data->segs == NULL) {
+ warn("%s: unable to allocate %zu bytes for S/G list",
+ __func__, sizeof(bus_dma_segment_t) *
+ data->sg_count);
+ retval = 1;
+ goto bailout;
+ }
+
+ } else {
+ data->iovec = calloc(data->sg_count, sizeof(struct iovec));
+ if (data->iovec == NULL) {
+ warn("%s: unable to allocate %zu bytes for S/G list",
+ __func__, sizeof(struct iovec) * data->sg_count);
+ retval = 1;
+ goto bailout;
+ }
+ }
+
+ for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
+ i < buf->src_count && tmp_buf != NULL; i++,
+ tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
+
+ if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
+ struct camdd_buf_data *tmp_data;
+
+ tmp_data = &tmp_buf->buf_type_spec.data;
+ if (iovec == 0) {
+ data->segs[i].ds_addr =
+ (bus_addr_t) tmp_data->buf;
+ data->segs[i].ds_len = tmp_data->fill_len -
+ tmp_data->resid;
+ } else {
+ data->iovec[i].iov_base = tmp_data->buf;
+ data->iovec[i].iov_len = tmp_data->fill_len -
+ tmp_data->resid;
+ }
+ if (((tmp_data->fill_len - tmp_data->resid) %
+ sector_size) != 0)
+ *double_buf_needed = 1;
+ } else {
+ struct camdd_buf_indirect *tmp_ind;
+
+ tmp_ind = &tmp_buf->buf_type_spec.indirect;
+ if (iovec == 0) {
+ data->segs[i].ds_addr =
+ (bus_addr_t)tmp_ind->start_ptr;
+ data->segs[i].ds_len = tmp_ind->len;
+ } else {
+ data->iovec[i].iov_base = tmp_ind->start_ptr;
+ data->iovec[i].iov_len = tmp_ind->len;
+ }
+ if ((tmp_ind->len % sector_size) != 0)
+ *double_buf_needed = 1;
+ }
+ }
+
+ if (extra_buf != NULL) {
+ if (iovec == 0) {
+ data->segs[i].ds_addr = (bus_addr_t)extra_buf;
+ data->segs[i].ds_len = extra_buf_len;
+ } else {
+ data->iovec[i].iov_base = extra_buf;
+ data->iovec[i].iov_len = extra_buf_len;
+ }
+ i++;
+ }
+ if ((tmp_buf != NULL) || (i != data->sg_count)) {
+ warnx("buffer source count does not match "
+ "number of buffers in list!");
+ retval = 1;
+ goto bailout;
+ }
+
+bailout:
+ if (retval == 0) {
+ *num_sectors_used = (data->fill_len + extra_buf_len) /
+ sector_size;
+ }
+ return (retval);
+}
+
+uint32_t
+camdd_buf_get_len(struct camdd_buf *buf)
+{
+ uint32_t len = 0;
+
+ if (buf->buf_type != CAMDD_BUF_DATA) {
+ struct camdd_buf_indirect *indirect;
+
+ indirect = &buf->buf_type_spec.indirect;
+ len = indirect->len;
+ } else {
+ struct camdd_buf_data *data;
+
+ data = &buf->buf_type_spec.data;
+ len = data->fill_len;
+ }
+
+ return (len);
+}
+
+void
+camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
+{
+ struct camdd_buf_data *data;
+
+ assert(buf->buf_type == CAMDD_BUF_DATA);
+
+ data = &buf->buf_type_spec.data;
+
+ STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
+ buf->src_count++;
+
+ data->fill_len += camdd_buf_get_len(child_buf);
+}
+
+typedef enum {
+ CAMDD_TS_MAX_BLK,
+ CAMDD_TS_MIN_BLK,
+ CAMDD_TS_BLK_GRAN,
+ CAMDD_TS_EFF_IOSIZE
+} camdd_status_item_index;
+
+static struct camdd_status_items {
+ const char *name;
+ struct mt_status_entry *entry;
+} req_status_items[] = {
+ { "max_blk", NULL },
+ { "min_blk", NULL },
+ { "blk_gran", NULL },
+ { "max_effective_iosize", NULL }
+};
+
+int
+camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
+ uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
+{
+ struct mt_status_data status_data;
+ char *xml_str = NULL;
+ unsigned int i;
+ int retval = 0;
+
+ retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
+ if (retval != 0)
+ err(1, "Couldn't get XML string from %s", filename);
+
+ retval = mt_get_status(xml_str, &status_data);
+ if (retval != XML_STATUS_OK) {
+ warn("couldn't get status for %s", filename);
+ retval = 1;
+ goto bailout;
+ } else
+ retval = 0;
+
+ if (status_data.error != 0) {
+ warnx("%s", status_data.error_str);
+ retval = 1;
+ goto bailout;
+ }
+
+ for (i = 0; i < sizeof(req_status_items) /
+ sizeof(req_status_items[0]); i++) {
+ char *name;
+
+ name = __DECONST(char *, req_status_items[i].name);
+ req_status_items[i].entry = mt_status_entry_find(&status_data,
+ name);
+ if (req_status_items[i].entry == NULL) {
+ errx(1, "Cannot find status entry %s",
+ req_status_items[i].name);
+ }
+ }
+
+ *max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
+ *max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
+ *min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
+ *blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
+bailout:
+
+ free(xml_str);
+ mt_status_free(&status_data);
+
+ return (retval);
+}
+
+struct camdd_dev *
+camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
+ int timeout)
+{
+ struct camdd_dev *dev = NULL;
+ struct camdd_dev_file *file_dev;
+ uint64_t blocksize = io_opts->blocksize;
+
+ dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
+ if (dev == NULL)
+ goto bailout;
+
+ file_dev = &dev->dev_spec.file;
+ file_dev->fd = fd;
+ strlcpy(file_dev->filename, io_opts->dev_name,
+ sizeof(file_dev->filename));
+ strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
+ if (blocksize == 0)
+ dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
+ else
+ dev->blocksize = blocksize;
+
+ if ((io_opts->queue_depth != 0)
+ && (io_opts->queue_depth != 1)) {
+ warnx("Queue depth %ju for %s ignored, only 1 outstanding "
+ "command supported", (uintmax_t)io_opts->queue_depth,
+ io_opts->dev_name);
+ }
+ dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
+ dev->run = camdd_file_run;
+ dev->fetch = NULL;
+
+ /*
+ * We can effectively access files on byte boundaries. We'll reset
+ * this for devices like disks that can be accessed on sector
+ * boundaries.
+ */
+ dev->sector_size = 1;
+
+ if ((fd != STDIN_FILENO)
+ && (fd != STDOUT_FILENO)) {
+ int retval;
+
+ retval = fstat(fd, &file_dev->sb);
+ if (retval != 0) {
+ warn("Cannot stat %s", dev->device_name);
+ goto bailout;
+ camdd_free_dev(dev);
+ dev = NULL;
+ }
+ if (S_ISREG(file_dev->sb.st_mode)) {
+ file_dev->file_type = CAMDD_FILE_REG;
+ } else if (S_ISCHR(file_dev->sb.st_mode)) {
+ int type;
+
+ if (ioctl(fd, FIODTYPE, &type) == -1)
+ err(1, "FIODTYPE ioctl failed on %s",
+ dev->device_name);
+ else {
+ if (type & D_TAPE)
+ file_dev->file_type = CAMDD_FILE_TAPE;
+ else if (type & D_DISK)
+ file_dev->file_type = CAMDD_FILE_DISK;
+ else if (type & D_MEM)
+ file_dev->file_type = CAMDD_FILE_MEM;
+ else if (type & D_TTY)
+ file_dev->file_type = CAMDD_FILE_TTY;
+ }
+ } else if (S_ISDIR(file_dev->sb.st_mode)) {
+ errx(1, "cannot operate on directory %s",
+ dev->device_name);
+ } else if (S_ISFIFO(file_dev->sb.st_mode)) {
+ file_dev->file_type = CAMDD_FILE_PIPE;
+ } else
+ errx(1, "Cannot determine file type for %s",
+ dev->device_name);
+
+ switch (file_dev->file_type) {
+ case CAMDD_FILE_REG:
+ if (file_dev->sb.st_size != 0)
+ dev->max_sector = file_dev->sb.st_size - 1;
+ else
+ dev->max_sector = 0;
+ file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
+ break;
+ case CAMDD_FILE_TAPE: {
+ uint64_t max_iosize, max_blk, min_blk, blk_gran;
+ /*
+ * Check block limits and maximum effective iosize.
+ * Make sure the blocksize is within the block
+ * limits (and a multiple of the minimum blocksize)
+ * and that the blocksize is <= maximum effective
+ * iosize.
+ */
+ retval = camdd_probe_tape(fd, dev->device_name,
+ &max_iosize, &max_blk, &min_blk, &blk_gran);
+ if (retval != 0)
+ errx(1, "Unable to probe tape %s",
+ dev->device_name);
+
+ /*
+ * The blocksize needs to be <= the maximum
+ * effective I/O size of the tape device. Note
+ * that this also takes into account the maximum
+ * blocksize reported by READ BLOCK LIMITS.
+ */
+ if (dev->blocksize > max_iosize) {
+ warnx("Blocksize %u too big for %s, limiting "
+ "to %ju", dev->blocksize, dev->device_name,
+ max_iosize);
+ dev->blocksize = max_iosize;
+ }
+
+ /*
+ * The blocksize needs to be at least min_blk;
+ */
+ if (dev->blocksize < min_blk) {
+ warnx("Blocksize %u too small for %s, "
+ "increasing to %ju", dev->blocksize,
+ dev->device_name, min_blk);
+ dev->blocksize = min_blk;
+ }
+
+ /*
+ * And the blocksize needs to be a multiple of
+ * the block granularity.
+ */
+ if ((blk_gran != 0)
+ && (dev->blocksize % (1 << blk_gran))) {
+ warnx("Blocksize %u for %s not a multiple of "
+ "%d, adjusting to %d", dev->blocksize,
+ dev->device_name, (1 << blk_gran),
+ dev->blocksize & ~((1 << blk_gran) - 1));
+ dev->blocksize &= ~((1 << blk_gran) - 1);
+ }
+
+ if (dev->blocksize == 0) {
+ errx(1, "Unable to derive valid blocksize for "
+ "%s", dev->device_name);
+ }
+
+ /*
+ * For tape drives, set the sector size to the
+ * blocksize so that we make sure not to write
+ * less than the blocksize out to the drive.
+ */
+ dev->sector_size = dev->blocksize;
+ break;
+ }
+ case CAMDD_FILE_DISK: {
+ off_t media_size;
+ unsigned int sector_size;
+
+ file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
+
+ if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
+ err(1, "DIOCGSECTORSIZE ioctl failed on %s",
+ dev->device_name);
+ }
+
+ if (sector_size == 0) {
+ errx(1, "DIOCGSECTORSIZE ioctl returned "
+ "invalid sector size %u for %s",
+ sector_size, dev->device_name);
+ }
+
+ if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
+ err(1, "DIOCGMEDIASIZE ioctl failed on %s",
+ dev->device_name);
+ }
+
+ if (media_size == 0) {
+ errx(1, "DIOCGMEDIASIZE ioctl returned "
+ "invalid media size %ju for %s",
+ (uintmax_t)media_size, dev->device_name);
+ }
+
+ if (dev->blocksize % sector_size) {
+ errx(1, "%s blocksize %u not a multiple of "
+ "sector size %u", dev->device_name,
+ dev->blocksize, sector_size);
+ }
+
+ dev->sector_size = sector_size;
+ dev->max_sector = (media_size / sector_size) - 1;
+ break;
+ }
+ case CAMDD_FILE_MEM:
+ file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if ((io_opts->offset != 0)
+ && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
+ warnx("Offset %ju specified for %s, but we cannot seek on %s",
+ io_opts->offset, io_opts->dev_name, io_opts->dev_name);
+ goto bailout_error;
+ }
+#if 0
+ else if ((io_opts->offset != 0)
+ && ((io_opts->offset % dev->sector_size) != 0)) {
+ warnx("Offset %ju for %s is not a multiple of the "
+ "sector size %u", io_opts->offset,
+ io_opts->dev_name, dev->sector_size);
+ goto bailout_error;
+ } else {
+ dev->start_offset_bytes = io_opts->offset;
+ }
+#endif
+
+bailout:
+ return (dev);
+
+bailout_error:
+ camdd_free_dev(dev);
+ return (NULL);
+}
+
+/*
+ * Need to implement this. Do a basic probe:
+ * - Check the inquiry data, make sure we're talking to a device that we
+ * can reasonably expect to talk to -- direct, RBC, CD, WORM.
+ * - Send a test unit ready, make sure the device is available.
+ * - Get the capacity and block size.
+ */
+struct camdd_dev *
+camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
+ camdd_argmask arglist, int probe_retry_count,
+ int probe_timeout, int io_retry_count, int io_timeout)
+{
+ union ccb *ccb;
+ uint64_t maxsector;
+ uint32_t cpi_maxio, max_iosize, pass_numblocks;
+ uint32_t block_len;
+ struct scsi_read_capacity_data rcap;
+ struct scsi_read_capacity_data_long rcaplong;
+ struct camdd_dev *dev;
+ struct camdd_dev_pass *pass_dev;
+ struct kevent ke;
+ int scsi_dev_type;
+ int retval;
+
+ dev = NULL;
+
+ scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
+ maxsector = 0;
+ block_len = 0;
+
+ /*
+ * For devices that support READ CAPACITY, we'll attempt to get the
+ * capacity. Otherwise, we really don't support tape or other
+ * devices via SCSI passthrough, so just return an error in that case.
+ */
+ switch (scsi_dev_type) {
+ case T_DIRECT:
+ case T_WORM:
+ case T_CDROM:
+ case T_OPTICAL:
+ case T_RBC:
+ break;
+ default:
+ errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
+ break; /*NOTREACHED*/
+ }
+
+ ccb = cam_getccb(cam_dev);
+
+ if (ccb == NULL) {
+ warnx("%s: error allocating ccb", __func__);
+ goto bailout;
+ }
+
+ bzero(&(&ccb->ccb_h)[1],
+ sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
+
+ scsi_read_capacity(&ccb->csio,
+ /*retries*/ probe_retry_count,
+ /*cbfcnp*/ NULL,
+ /*tag_action*/ MSG_SIMPLE_Q_TAG,
+ &rcap,
+ SSD_FULL_SIZE,
+ /*timeout*/ probe_timeout ? probe_timeout : 5000);
+
+ /* Disable freezing the device queue */
+ ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
+
+ if (arglist & CAMDD_ARG_ERR_RECOVER)
+ ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
+
+ if (cam_send_ccb(cam_dev, ccb) < 0) {
+ warn("error sending READ CAPACITY command");
+
+ cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
+ CAM_EPF_ALL, stderr);
+
+ goto bailout;
+ }
+
+ if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
+ cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
+ retval = 1;
+ goto bailout;
+ }
+
+ maxsector = scsi_4btoul(rcap.addr);
+ block_len = scsi_4btoul(rcap.length);
+
+ /*
+ * A last block of 2^32-1 means that the true capacity is over 2TB,
+ * and we need to issue the long READ CAPACITY to get the real
+ * capacity. Otherwise, we're all set.
+ */
+ if (maxsector != 0xffffffff)
+ goto rcap_done;
+
+ scsi_read_capacity_16(&ccb->csio,
+ /*retries*/ probe_retry_count,
+ /*cbfcnp*/ NULL,
+ /*tag_action*/ MSG_SIMPLE_Q_TAG,
+ /*lba*/ 0,
+ /*reladdr*/ 0,
+ /*pmi*/ 0,
+ (uint8_t *)&rcaplong,
+ sizeof(rcaplong),
+ /*sense_len*/ SSD_FULL_SIZE,
+ /*timeout*/ probe_timeout ? probe_timeout : 5000);
+
+ /* Disable freezing the device queue */
+ ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
+
+ if (arglist & CAMDD_ARG_ERR_RECOVER)
+ ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
+
+ if (cam_send_ccb(cam_dev, ccb) < 0) {
+ warn("error sending READ CAPACITY (16) command");
+
+ cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
+ CAM_EPF_ALL, stderr);
+
+ retval = 1;
+ goto bailout;
+ }
+
+ if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
+ cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
+ goto bailout;
+ }
+
+ maxsector = scsi_8btou64(rcaplong.addr);
+ block_len = scsi_4btoul(rcaplong.length);
+
+rcap_done:
+
+ bzero(&(&ccb->ccb_h)[1],
+ sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
+
+ ccb->ccb_h.func_code = XPT_PATH_INQ;
+ ccb->ccb_h.flags = CAM_DIR_NONE;
+ ccb->ccb_h.retry_count = 1;
+
+ if (cam_send_ccb(cam_dev, ccb) < 0) {
+ warn("error sending XPT_PATH_INQ CCB");
+
+ cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
+ CAM_EPF_ALL, stderr);
+ goto bailout;
+ }
+
+ EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
+
+ dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
+ io_timeout);
+ if (dev == NULL)
+ goto bailout;
+
+ pass_dev = &dev->dev_spec.pass;
+ pass_dev->scsi_dev_type = scsi_dev_type;
+ pass_dev->dev = cam_dev;
+ pass_dev->max_sector = maxsector;
+ pass_dev->block_len = block_len;
+ pass_dev->cpi_maxio = ccb->cpi.maxio;
+ snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
+ pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
+ dev->sector_size = block_len;
+ dev->max_sector = maxsector;
+
+
+ /*
+ * Determine the optimal blocksize to use for this device.
+ */
+
+ /*
+ * If the controller has not specified a maximum I/O size,
+ * just go with 128K as a somewhat conservative value.
+ */
+ if (pass_dev->cpi_maxio == 0)
+ cpi_maxio = 131072;
+ else
+ cpi_maxio = pass_dev->cpi_maxio;
+
+ /*
+ * If the controller has a large maximum I/O size, limit it
+ * to something smaller so that the kernel doesn't have trouble
+ * allocating buffers to copy data in and out for us.
+ * XXX KDM this is until we have unmapped I/O support in the kernel.
+ */
+ max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
+
+ /*
+ * If we weren't able to get a block size for some reason,
+ * default to 512 bytes.
+ */
+ block_len = pass_dev->block_len;
+ if (block_len == 0)
+ block_len = 512;
+
+ /*
+ * Figure out how many blocksize chunks will fit in the
+ * maximum I/O size.
+ */
+ pass_numblocks = max_iosize / block_len;
+
+ /*
+ * And finally, multiple the number of blocks by the LBA
+ * length to get our maximum block size;
+ */
+ dev->blocksize = pass_numblocks * block_len;
+
+ if (io_opts->blocksize != 0) {
+ if ((io_opts->blocksize % dev->sector_size) != 0) {
+ warnx("Blocksize %ju for %s is not a multiple of "
+ "sector size %u", (uintmax_t)io_opts->blocksize,
+ dev->device_name, dev->sector_size);
+ goto bailout_error;
+ }
+ dev->blocksize = io_opts->blocksize;
+ }
+ dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
+ if (io_opts->queue_depth != 0)
+ dev->target_queue_depth = io_opts->queue_depth;
+
+ if (io_opts->offset != 0) {
+ if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
+ warnx("Offset %ju is past the end of device %s",
+ io_opts->offset, dev->device_name);
+ goto bailout_error;
+ }
+#if 0
+ else if ((io_opts->offset % dev->sector_size) != 0) {
+ warnx("Offset %ju for %s is not a multiple of the "
+ "sector size %u", io_opts->offset,
+ dev->device_name, dev->sector_size);
+ goto bailout_error;
+ }
+ dev->start_offset_bytes = io_opts->offset;
+#endif
+ }
+
+ dev->min_cmd_size = io_opts->min_cmd_size;
+
+ dev->run = camdd_pass_run;
+ dev->fetch = camdd_pass_fetch;
+
+bailout:
+ cam_freeccb(ccb);
+
+ return (dev);
+
+bailout_error:
+ cam_freeccb(ccb);
+
+ camdd_free_dev(dev);
+
+ return (NULL);
+}
+
+void *
+camdd_worker(void *arg)
+{
+ struct camdd_dev *dev = arg;
+ struct camdd_buf *buf;
+ struct timespec ts, *kq_ts;
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 0;
+
+ pthread_mutex_lock(&dev->mutex);
+
+ dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
+
+ for (;;) {
+ struct kevent ke;
+ int retval = 0;
+
+ /*
+ * XXX KDM check the reorder queue depth?
+ */
+ if (dev->write_dev == 0) {
+ uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
+ uint32_t target_depth = dev->target_queue_depth;
+ uint32_t peer_target_depth =
+ dev->peer_dev->target_queue_depth;
+ uint32_t peer_blocksize = dev->peer_dev->blocksize;
+
+ camdd_get_depth(dev, &our_depth, &peer_depth,
+ &our_bytes, &peer_bytes);
+
+#if 0
+ while (((our_depth < target_depth)
+ && (peer_depth < peer_target_depth))
+ || ((peer_bytes + our_bytes) <
+ (peer_blocksize * 2))) {
+#endif
+ while (((our_depth + peer_depth) <
+ (target_depth + peer_target_depth))
+ || ((peer_bytes + our_bytes) <
+ (peer_blocksize * 3))) {
+
+ retval = camdd_queue(dev, NULL);
+ if (retval == 1)
+ break;
+ else if (retval != 0) {
+ error_exit = 1;
+ goto bailout;
+ }
+
+ camdd_get_depth(dev, &our_depth, &peer_depth,
+ &our_bytes, &peer_bytes);
+ }
+ }
+ /*
+ * See if we have any I/O that is ready to execute.
+ */
+ buf = STAILQ_FIRST(&dev->run_queue);
+ if (buf != NULL) {
+ while (dev->target_queue_depth > dev->cur_active_io) {
+ retval = dev->run(dev);
+ if (retval == -1) {
+ dev->flags |= CAMDD_DEV_FLAG_EOF;
+ error_exit = 1;
+ break;
+ } else if (retval != 0) {
+ break;
+ }
+ }
+ }
+
+ /*
+ * We've reached EOF, or our partner has reached EOF.
+ */
+ if ((dev->flags & CAMDD_DEV_FLAG_EOF)
+ || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
+ if (dev->write_dev != 0) {
+ if ((STAILQ_EMPTY(&dev->work_queue))
+ && (dev->num_run_queue == 0)
+ && (dev->cur_active_io == 0)) {
+ goto bailout;
+ }
+ } else {
+ /*
+ * If we're the reader, and the writer
+ * got EOF, he is already done. If we got
+ * the EOF, then we need to wait until
+ * everything is flushed out for the writer.
+ */
+ if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
+ goto bailout;
+ } else if ((dev->num_peer_work_queue == 0)
+ && (dev->num_peer_done_queue == 0)
+ && (dev->cur_active_io == 0)
+ && (dev->num_run_queue == 0)) {
+ goto bailout;
+ }
+ }
+ /*
+ * XXX KDM need to do something about the pending
+ * queue and cleanup resources.
+ */
+ }
+
+ if ((dev->write_dev == 0)
+ && (dev->cur_active_io == 0)
+ && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
+ kq_ts = &ts;
+ else
+ kq_ts = NULL;
+
+ /*
+ * Run kevent to see if there are events to process.
+ */
+ pthread_mutex_unlock(&dev->mutex);
+ retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
+ pthread_mutex_lock(&dev->mutex);
+ if (retval == -1) {
+ warn("%s: error returned from kevent",__func__);
+ goto bailout;
+ } else if (retval != 0) {
+ switch (ke.filter) {
+ case EVFILT_READ:
+ if (dev->fetch != NULL) {
+ retval = dev->fetch(dev);
+ if (retval == -1) {
+ error_exit = 1;
+ goto bailout;
+ }
+ }
+ break;
+ case EVFILT_SIGNAL:
+ /*
+ * We register for this so we don't get
+ * an error as a result of a SIGINFO or a
+ * SIGINT. It will actually get handled
+ * by the signal handler. If we get a
+ * SIGINT, bail out without printing an
+ * error message. Any other signals
+ * will result in the error message above.
+ */
+ if (ke.ident == SIGINT)
+ goto bailout;
+ break;
+ case EVFILT_USER:
+ retval = 0;
+ /*
+ * Check to see if the other thread has
+ * queued any I/O for us to do. (In this
+ * case we're the writer.)
+ */
+ for (buf = STAILQ_FIRST(&dev->work_queue);
+ buf != NULL;
+ buf = STAILQ_FIRST(&dev->work_queue)) {
+ STAILQ_REMOVE_HEAD(&dev->work_queue,
+ work_links);
+ retval = camdd_queue(dev, buf);
+ /*
+ * We keep going unless we get an
+ * actual error. If we get EOF, we
+ * still want to remove the buffers
+ * from the queue and send the back
+ * to the reader thread.
+ */
+ if (retval == -1) {
+ error_exit = 1;
+ goto bailout;
+ } else
+ retval = 0;
+ }
+
+ /*
+ * Next check to see if the other thread has
+ * queued any completed buffers back to us.
+ * (In this case we're the reader.)
+ */
+ for (buf = STAILQ_FIRST(&dev->peer_done_queue);
+ buf != NULL;
+ buf = STAILQ_FIRST(&dev->peer_done_queue)){
+ STAILQ_REMOVE_HEAD(
+ &dev->peer_done_queue, work_links);
+ dev->num_peer_done_queue--;
+ camdd_peer_done(buf);
+ }
+ break;
+ default:
+ warnx("%s: unknown kevent filter %d",
+ __func__, ke.filter);
+ break;
+ }
+ }
+ }
+
+bailout:
+
+ dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
+
+ /* XXX KDM cleanup resources here? */
+
+ pthread_mutex_unlock(&dev->mutex);
+
+ need_exit = 1;
+ sem_post(&camdd_sem);
+
+ return (NULL);
+}
+
+/*
+ * Simplistic translation of CCB status to our local status.
+ */
+camdd_buf_status
+camdd_ccb_status(union ccb *ccb)
+{
+ camdd_buf_status status = CAMDD_STATUS_NONE;
+ cam_status ccb_status;
+
+ ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
+
+ switch (ccb_status) {
+ case CAM_REQ_CMP: {
+ if (ccb->csio.resid == 0) {
+ status = CAMDD_STATUS_OK;
+ } else if (ccb->csio.dxfer_len > ccb->csio.resid) {
+ status = CAMDD_STATUS_SHORT_IO;
+ } else {
+ status = CAMDD_STATUS_EOF;
+ }
+ break;
+ }
+ case CAM_SCSI_STATUS_ERROR: {
+ switch (ccb->csio.scsi_status) {
+ case SCSI_STATUS_OK:
+ case SCSI_STATUS_COND_MET:
+ case SCSI_STATUS_INTERMED:
+ case SCSI_STATUS_INTERMED_COND_MET:
+ status = CAMDD_STATUS_OK;
+ break;
+ case SCSI_STATUS_CMD_TERMINATED:
+ case SCSI_STATUS_CHECK_COND:
+ case SCSI_STATUS_QUEUE_FULL:
+ case SCSI_STATUS_BUSY:
+ case SCSI_STATUS_RESERV_CONFLICT:
+ default:
+ status = CAMDD_STATUS_ERROR;
+ break;
+ }
+ break;
+ }
+ default:
+ status = CAMDD_STATUS_ERROR;
+ break;
+ }
+
+ return (status);
+}
+
+/*
+ * Queue a buffer to our peer's work thread for writing.
+ *
+ * Returns 0 for success, -1 for failure, 1 if the other thread exited.
+ */
+int
+camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
+{
+ struct kevent ke;
+ STAILQ_HEAD(, camdd_buf) local_queue;
+ struct camdd_buf *buf1, *buf2;
+ struct camdd_buf_data *data = NULL;
+ uint64_t peer_bytes_queued = 0;
+ int active = 1;
+ int retval = 0;
+
+ STAILQ_INIT(&local_queue);
+
+ /*
+ * Since we're the reader, we need to queue our I/O to the writer
+ * in sequential order in order to make sure it gets written out
+ * in sequential order.
+ *
+ * Check the next expected I/O starting offset. If this doesn't
+ * match, put it on the reorder queue.
+ */
+ if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
+
+ /*
+ * If there is nothing on the queue, there is no sorting
+ * needed.
+ */
+ if (STAILQ_EMPTY(&dev->reorder_queue)) {
+ STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
+ dev->num_reorder_queue++;
+ goto bailout;
+ }
+
+ /*
+ * Sort in ascending order by starting LBA. There should
+ * be no identical LBAs.
+ */
+ for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
+ buf1 = buf2) {
+ buf2 = STAILQ_NEXT(buf1, links);
+ if (buf->lba < buf1->lba) {
+ /*
+ * If we're less than the first one, then
+ * we insert at the head of the list
+ * because this has to be the first element
+ * on the list.
+ */
+ STAILQ_INSERT_HEAD(&dev->reorder_queue,
+ buf, links);
+ dev->num_reorder_queue++;
+ break;
+ } else if (buf->lba > buf1->lba) {
+ if (buf2 == NULL) {
+ STAILQ_INSERT_TAIL(&dev->reorder_queue,
+ buf, links);
+ dev->num_reorder_queue++;
+ break;
+ } else if (buf->lba < buf2->lba) {
+ STAILQ_INSERT_AFTER(&dev->reorder_queue,
+ buf1, buf, links);
+ dev->num_reorder_queue++;
+ break;
+ }
+ } else {
+ errx(1, "Found buffers with duplicate LBA %ju!",
+ buf->lba);
+ }
+ }
+ goto bailout;
+ } else {
+
+ /*
+ * We're the next expected I/O completion, so put ourselves
+ * on the local queue to be sent to the writer. We use
+ * work_links here so that we can queue this to the
+ * peer_work_queue before taking the buffer off of the
+ * local_queue.
+ */
+ dev->next_completion_pos_bytes += buf->len;
+ STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
+
+ /*
+ * Go through the reorder queue looking for more sequential
+ * I/O and add it to the local queue.
+ */
+ for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
+ buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
+ /*
+ * As soon as we see an I/O that is out of sequence,
+ * we're done.
+ */
+ if ((buf1->lba * dev->sector_size) !=
+ dev->next_completion_pos_bytes)
+ break;
+
+ STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
+ dev->num_reorder_queue--;
+ STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
+ dev->next_completion_pos_bytes += buf1->len;
+ }
+ }
+
+ /*
+ * Setup the event to let the other thread know that it has work
+ * pending.
+ */
+ EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
+ NOTE_TRIGGER, 0, NULL);
+
+ /*
+ * Put this on our shadow queue so that we know what we've queued
+ * to the other thread.
+ */
+ STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
+ if (buf1->buf_type != CAMDD_BUF_DATA) {
+ errx(1, "%s: should have a data buffer, not an "
+ "indirect buffer", __func__);
+ }
+ data = &buf1->buf_type_spec.data;
+
+ /*
+ * We only need to send one EOF to the writer, and don't
+ * need to continue sending EOFs after that.
+ */
+ if (buf1->status == CAMDD_STATUS_EOF) {
+ if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
+ STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
+ work_links);
+ camdd_release_buf(buf1);
+ retval = 1;
+ continue;
+ }
+ dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
+ }
+
+
+ STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
+ peer_bytes_queued += (data->fill_len - data->resid);
+ dev->peer_bytes_queued += (data->fill_len - data->resid);
+ dev->num_peer_work_queue++;
+ }
+
+ if (STAILQ_FIRST(&local_queue) == NULL)
+ goto bailout;
+
+ /*
+ * Drop our mutex and pick up the other thread's mutex. We need to
+ * do this to avoid deadlocks.
+ */
+ pthread_mutex_unlock(&dev->mutex);
+ pthread_mutex_lock(&dev->peer_dev->mutex);
+
+ if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
+ /*
+ * Put the buffers on the other thread's incoming work queue.
+ */
+ for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
+ buf1 = STAILQ_FIRST(&local_queue)) {
+ STAILQ_REMOVE_HEAD(&local_queue, work_links);
+ STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
+ work_links);
+ }
+ /*
+ * Send an event to the other thread's kqueue to let it know
+ * that there is something on the work queue.
+ */
+ retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
+ if (retval == -1)
+ warn("%s: unable to add peer work_queue kevent",
+ __func__);
+ else
+ retval = 0;
+ } else
+ active = 0;
+
+ pthread_mutex_unlock(&dev->peer_dev->mutex);
+ pthread_mutex_lock(&dev->mutex);
+
+ /*
+ * If the other side isn't active, run through the queue and
+ * release all of the buffers.
+ */
+ if (active == 0) {
+ for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
+ buf1 = STAILQ_FIRST(&local_queue)) {
+ STAILQ_REMOVE_HEAD(&local_queue, work_links);
+ STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
+ links);
+ dev->num_peer_work_queue--;
+ camdd_release_buf(buf1);
+ }
+ dev->peer_bytes_queued -= peer_bytes_queued;
+ retval = 1;
+ }
+
+bailout:
+ return (retval);
+}
+
+/*
+ * Return a buffer to the reader thread when we have completed writing it.
+ */
+int
+camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
+{
+ struct kevent ke;
+ int retval = 0;
+
+ /*
+ * Setup the event to let the other thread know that we have
+ * completed a buffer.
+ */
+ EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
+ NOTE_TRIGGER, 0, NULL);
+
+ /*
+ * Drop our lock and acquire the other thread's lock before
+ * manipulating
+ */
+ pthread_mutex_unlock(&dev->mutex);
+ pthread_mutex_lock(&dev->peer_dev->mutex);
+
+ /*
+ * Put the buffer on the reader thread's peer done queue now that
+ * we have completed it.
+ */
+ STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
+ work_links);
+ dev->peer_dev->num_peer_done_queue++;
+
+ /*
+ * Send an event to the peer thread to let it know that we've added
+ * something to its peer done queue.
+ */
+ retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
+ if (retval == -1)
+ warn("%s: unable to add peer_done_queue kevent", __func__);
+ else
+ retval = 0;
+
+ /*
+ * Drop the other thread's lock and reacquire ours.
+ */
+ pthread_mutex_unlock(&dev->peer_dev->mutex);
+ pthread_mutex_lock(&dev->mutex);
+
+ return (retval);
+}
+
+/*
+ * Free a buffer that was written out by the writer thread and returned to
+ * the reader thread.
+ */
+void
+camdd_peer_done(struct camdd_buf *buf)
+{
+ struct camdd_dev *dev;
+ struct camdd_buf_data *data;
+
+ dev = buf->dev;
+ if (buf->buf_type != CAMDD_BUF_DATA) {
+ errx(1, "%s: should have a data buffer, not an "
+ "indirect buffer", __func__);
+ }
+
+ data = &buf->buf_type_spec.data;
+
+ STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
+ dev->num_peer_work_queue--;
+ dev->peer_bytes_queued -= (data->fill_len - data->resid);
+
+ if (buf->status == CAMDD_STATUS_EOF)
+ dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
+
+ STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
+}
+
+/*
+ * Assumes caller holds the lock for this device.
+ */
+void
+camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
+ int *error_count)
+{
+ int retval = 0;
+
+ /*
+ * If we're the reader, we need to send the completed I/O
+ * to the writer. If we're the writer, we need to just
+ * free up resources, or let the reader know if we've
+ * encountered an error.
+ */
+ if (dev->write_dev == 0) {
+ retval = camdd_queue_peer_buf(dev, buf);
+ if (retval != 0)
+ (*error_count)++;
+ } else {
+ struct camdd_buf *tmp_buf, *next_buf;
+
+ STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
+ next_buf) {
+ struct camdd_buf *src_buf;
+ struct camdd_buf_indirect *indirect;
+
+ STAILQ_REMOVE(&buf->src_list, tmp_buf,
+ camdd_buf, src_links);
+
+ tmp_buf->status = buf->status;
+
+ if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
+ camdd_complete_peer_buf(dev, tmp_buf);
+ continue;
+ }
+
+ indirect = &tmp_buf->buf_type_spec.indirect;
+ src_buf = indirect->src_buf;
+ src_buf->refcount--;
+ /*
+ * XXX KDM we probably need to account for
+ * exactly how many bytes we were able to
+ * write. Allocate the residual to the
+ * first N buffers? Or just track the
+ * number of bytes written? Right now the reader
+ * doesn't do anything with a residual.
+ */
+ src_buf->status = buf->status;
+ if (src_buf->refcount <= 0)
+ camdd_complete_peer_buf(dev, src_buf);
+ STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
+ tmp_buf, links);
+ }
+
+ STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
+ }
+}
+
+/*
+ * Fetch all completed commands from the pass(4) device.
+ *
+ * Returns the number of commands received, or -1 if any of the commands
+ * completed with an error. Returns 0 if no commands are available.
+ */
+int
+camdd_pass_fetch(struct camdd_dev *dev)
+{
+ struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
+ union ccb ccb;
+ int retval = 0, num_fetched = 0, error_count = 0;
+
+ pthread_mutex_unlock(&dev->mutex);
+ /*
+ * XXX KDM we don't distinguish between EFAULT and ENOENT.
+ */
+ while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
+ struct camdd_buf *buf;
+ struct camdd_buf_data *data;
+ cam_status ccb_status;
+ union ccb *buf_ccb;
+
+ buf = ccb.ccb_h.ccb_buf;
+ data = &buf->buf_type_spec.data;
+ buf_ccb = &data->ccb;
+
+ num_fetched++;
+
+ /*
+ * Copy the CCB back out so we get status, sense data, etc.
+ */
+ bcopy(&ccb, buf_ccb, sizeof(ccb));
+
+ pthread_mutex_lock(&dev->mutex);
+
+ /*
+ * We're now done, so take this off the active queue.
+ */
+ STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
+ dev->cur_active_io--;
+
+ ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
+ if (ccb_status != CAM_REQ_CMP) {
+ cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
+ CAM_EPF_ALL, stderr);
+ }
+
+ data->resid = ccb.csio.resid;
+ dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
+
+ if (buf->status == CAMDD_STATUS_NONE)
+ buf->status = camdd_ccb_status(&ccb);
+ if (buf->status == CAMDD_STATUS_ERROR)
+ error_count++;
+ else if (buf->status == CAMDD_STATUS_EOF) {
+ /*
+ * Once we queue this buffer to our partner thread,
+ * he will know that we've hit EOF.
+ */
+ dev->flags |= CAMDD_DEV_FLAG_EOF;
+ }
+
+ camdd_complete_buf(dev, buf, &error_count);
+
+ /*
+ * Unlock in preparation for the ioctl call.
+ */
+ pthread_mutex_unlock(&dev->mutex);
+ }
+
+ pthread_mutex_lock(&dev->mutex);
+
+ if (error_count > 0)
+ return (-1);
+ else
+ return (num_fetched);
+}
+
+/*
+ * Returns -1 for error, 0 for success/continue, and 1 for resource
+ * shortage/stop processing.
+ */
+int
+camdd_file_run(struct camdd_dev *dev)
+{
+ struct camdd_dev_file *file_dev = &dev->dev_spec.file;
+ struct camdd_buf_data *data;
+ struct camdd_buf *buf;
+ off_t io_offset;
+ int retval = 0, write_dev = dev->write_dev;
+ int error_count = 0, no_resources = 0, double_buf_needed = 0;
+ uint32_t num_sectors = 0, db_len = 0;
+
+ buf = STAILQ_FIRST(&dev->run_queue);
+ if (buf == NULL) {
+ no_resources = 1;
+ goto bailout;
+ } else if ((dev->write_dev == 0)
+ && (dev->flags & (CAMDD_DEV_FLAG_EOF |
+ CAMDD_DEV_FLAG_EOF_SENT))) {
+ STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
+ dev->num_run_queue--;
+ buf->status = CAMDD_STATUS_EOF;
+ error_count++;
+ goto bailout;
+ }
+
+ /*
+ * If we're writing, we need to go through the source buffer list
+ * and create an S/G list.
+ */
+ if (write_dev != 0) {
+ retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
+ dev->sector_size, &num_sectors, &double_buf_needed);
+ if (retval != 0) {
+ no_resources = 1;
+ goto bailout;
+ }
+ }
+
+ STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
+ dev->num_run_queue--;
+
+ data = &buf->buf_type_spec.data;
+
+ /*
+ * pread(2) and pwrite(2) offsets are byte offsets.
+ */
+ io_offset = buf->lba * dev->sector_size;
+
+ /*
+ * Unlock the mutex while we read or write.
+ */
+ pthread_mutex_unlock(&dev->mutex);
+
+ /*
+ * Note that we don't need to double buffer if we're the reader
+ * because in that case, we have allocated a single buffer of
+ * sufficient size to do the read. This copy is necessary on
+ * writes because if one of the components of the S/G list is not
+ * a sector size multiple, the kernel will reject the write. This
+ * is unfortunate but not surprising. So this will make sure that
+ * we're using a single buffer that is a multiple of the sector size.
+ */
+ if ((double_buf_needed != 0)
+ && (data->sg_count > 1)
+ && (write_dev != 0)) {
+ uint32_t cur_offset;
+ int i;
+
+ if (file_dev->tmp_buf == NULL)
+ file_dev->tmp_buf = calloc(dev->blocksize, 1);
+ if (file_dev->tmp_buf == NULL) {
+ buf->status = CAMDD_STATUS_ERROR;
+ error_count++;
+ goto bailout;
+ }
+ for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
+ bcopy(data->iovec[i].iov_base,
+ &file_dev->tmp_buf[cur_offset],
+ data->iovec[i].iov_len);
+ cur_offset += data->iovec[i].iov_len;
+ }
+ db_len = cur_offset;
+ }
+
+ if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
+ if (write_dev == 0) {
+ /*
+ * XXX KDM is there any way we would need a S/G
+ * list here?
+ */
+ retval = pread(file_dev->fd, data->buf,
+ buf->len, io_offset);
+ } else {
+ if (double_buf_needed != 0) {
+ retval = pwrite(file_dev->fd, file_dev->tmp_buf,
+ db_len, io_offset);
+ } else if (data->sg_count == 0) {
+ retval = pwrite(file_dev->fd, data->buf,
+ data->fill_len, io_offset);
+ } else {
+ retval = pwritev(file_dev->fd, data->iovec,
+ data->sg_count, io_offset);
+ }
+ }
+ } else {
+ if (write_dev == 0) {
+ /*
+ * XXX KDM is there any way we would need a S/G
+ * list here?
+ */
+ retval = read(file_dev->fd, data->buf, buf->len);
+ } else {
+ if (double_buf_needed != 0) {
+ retval = write(file_dev->fd, file_dev->tmp_buf,
+ db_len);
+ } else if (data->sg_count == 0) {
+ retval = write(file_dev->fd, data->buf,
+ data->fill_len);
+ } else {
+ retval = writev(file_dev->fd, data->iovec,
+ data->sg_count);
+ }
+ }
+ }
+
+ /* We're done, re-acquire the lock */
+ pthread_mutex_lock(&dev->mutex);
+
+ if (retval >= (ssize_t)data->fill_len) {
+ /*
+ * If the bytes transferred is more than the request size,
+ * that indicates an overrun, which should only happen at
+ * the end of a transfer if we have to round up to a sector
+ * boundary.
+ */
+ if (buf->status == CAMDD_STATUS_NONE)
+ buf->status = CAMDD_STATUS_OK;
+ data->resid = 0;
+ dev->bytes_transferred += retval;
+ } else if (retval == -1) {
+ warn("Error %s %s", (write_dev) ? "writing to" :
+ "reading from", file_dev->filename);
+
+ buf->status = CAMDD_STATUS_ERROR;
+ data->resid = data->fill_len;
+ error_count++;
+
+ if (dev->debug == 0)
+ goto bailout;
+
+ if ((double_buf_needed != 0)
+ && (write_dev != 0)) {
+ fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
+ "offset %ju\n", __func__, file_dev->fd,
+ file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
+ (uintmax_t)io_offset);
+ } else if (data->sg_count == 0) {
+ fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
+ "offset %ju\n", __func__, file_dev->fd, data->buf,
+ data->fill_len, (uintmax_t)buf->lba,
+ (uintmax_t)io_offset);
+ } else {
+ int i;
+
+ fprintf(stderr, "%s: fd %d, len %u, lba %ju "
+ "offset %ju\n", __func__, file_dev->fd,
+ data->fill_len, (uintmax_t)buf->lba,
+ (uintmax_t)io_offset);
+
+ for (i = 0; i < data->sg_count; i++) {
+ fprintf(stderr, "index %d ptr %p len %zu\n",
+ i, data->iovec[i].iov_base,
+ data->iovec[i].iov_len);
+ }
+ }
+ } else if (retval == 0) {
+ buf->status = CAMDD_STATUS_EOF;
+ if (dev->debug != 0)
+ printf("%s: got EOF from %s!\n", __func__,
+ file_dev->filename);
+ data->resid = data->fill_len;
+ error_count++;
+ } else if (retval < (ssize_t)data->fill_len) {
+ if (buf->status == CAMDD_STATUS_NONE)
+ buf->status = CAMDD_STATUS_SHORT_IO;
+ data->resid = data->fill_len - retval;
+ dev->bytes_transferred += retval;
+ }
+
+bailout:
+ if (buf != NULL) {
+ if (buf->status == CAMDD_STATUS_EOF) {
+ struct camdd_buf *buf2;
+ dev->flags |= CAMDD_DEV_FLAG_EOF;
+ STAILQ_FOREACH(buf2, &dev->run_queue, links)
+ buf2->status = CAMDD_STATUS_EOF;
+ }
+
+ camdd_complete_buf(dev, buf, &error_count);
+ }
+
+ if (error_count != 0)
+ return (-1);
+ else if (no_resources != 0)
+ return (1);
+ else
+ return (0);
+}
+
+/*
+ * Execute one command from the run queue. Returns 0 for success, 1 for
+ * stop processing, and -1 for error.
+ */
+int
+camdd_pass_run(struct camdd_dev *dev)
+{
+ struct camdd_buf *buf = NULL;
+ struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
+ struct camdd_buf_data *data;
+ uint32_t num_blocks, sectors_used = 0;
+ union ccb *ccb;
+ int retval = 0, is_write = dev->write_dev;
+ int double_buf_needed = 0;
+
+ buf = STAILQ_FIRST(&dev->run_queue);
+ if (buf == NULL) {
+ retval = 1;
+ goto bailout;
+ }
+
+ /*
+ * If we're writing, we need to go through the source buffer list
+ * and create an S/G list.
+ */
+ if (is_write != 0) {
+ retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
+ &sectors_used, &double_buf_needed);
+ if (retval != 0) {
+ retval = -1;
+ goto bailout;
+ }
+ }
+
+ STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
+ dev->num_run_queue--;
+
+ data = &buf->buf_type_spec.data;
+
+ ccb = &data->ccb;
+ bzero(&(&ccb->ccb_h)[1],
+ sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
+
+ /*
+ * In almost every case the number of blocks should be the device
+ * block size. The exception may be at the end of an I/O stream
+ * for a partial block or at the end of a device.
+ */
+ if (is_write != 0)
+ num_blocks = sectors_used;
+ else
+ num_blocks = data->fill_len / pass_dev->block_len;
+
+ scsi_read_write(&ccb->csio,
+ /*retries*/ dev->retry_count,
+ /*cbfcnp*/ NULL,
+ /*tag_action*/ MSG_SIMPLE_Q_TAG,
+ /*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
+ SCSI_RW_WRITE,
+ /*byte2*/ 0,
+ /*minimum_cmd_size*/ dev->min_cmd_size,
+ /*lba*/ buf->lba,
+ /*block_count*/ num_blocks,
+ /*data_ptr*/ (data->sg_count != 0) ?
+ (uint8_t *)data->segs : data->buf,
+ /*dxfer_len*/ (num_blocks * pass_dev->block_len),
+ /*sense_len*/ SSD_FULL_SIZE,
+ /*timeout*/ dev->io_timeout);
+
+ /* Disable freezing the device queue */
+ ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
+
+ if (dev->retry_count != 0)
+ ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
+
+ if (data->sg_count != 0) {
+ ccb->csio.sglist_cnt = data->sg_count;
+ ccb->ccb_h.flags |= CAM_DATA_SG;
+ }
+
+ /*
+ * Store a pointer to the buffer in the CCB. The kernel will
+ * restore this when we get it back, and we'll use it to identify
+ * the buffer this CCB came from.
+ */
+ ccb->ccb_h.ccb_buf = buf;
+
+ /*
+ * Unlock our mutex in preparation for issuing the ioctl.
+ */
+ pthread_mutex_unlock(&dev->mutex);
+ /*
+ * Queue the CCB to the pass(4) driver.
+ */
+ if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
+ pthread_mutex_lock(&dev->mutex);
+
+ warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
+ pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
+ warn("%s: CCB address is %p", __func__, ccb);
+ retval = -1;
+
+ STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
+ } else {
+ pthread_mutex_lock(&dev->mutex);
+
+ dev->cur_active_io++;
+ STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
+ }
+
+bailout:
+ return (retval);
+}
+
+int
+camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
+{
+ struct camdd_dev_pass *pass_dev;
+ uint32_t num_blocks;
+ int retval = 0;
+
+ pass_dev = &dev->dev_spec.pass;
+
+ *lba = dev->next_io_pos_bytes / dev->sector_size;
+ *len = dev->blocksize;
+ num_blocks = *len / dev->sector_size;
+
+ /*
+ * If max_sector is 0, then we have no set limit. This can happen
+ * if we're writing to a file in a filesystem, or reading from
+ * something like /dev/zero.
+ */
+ if ((dev->max_sector != 0)
+ || (dev->sector_io_limit != 0)) {
+ uint64_t max_sector;
+
+ if ((dev->max_sector != 0)
+ && (dev->sector_io_limit != 0))
+ max_sector = min(dev->sector_io_limit, dev->max_sector);
+ else if (dev->max_sector != 0)
+ max_sector = dev->max_sector;
+ else
+ max_sector = dev->sector_io_limit;
+
+
+ /*
+ * Check to see whether we're starting off past the end of
+ * the device. If so, we need to just send an EOF
+ * notification to the writer.
+ */
+ if (*lba > max_sector) {
+ *len = 0;
+ retval = 1;
+ } else if (((*lba + num_blocks) > max_sector + 1)
+ || ((*lba + num_blocks) < *lba)) {
+ /*
+ * If we get here (but pass the first check), we
+ * can trim the request length down to go to the
+ * end of the device.
+ */
+ num_blocks = (max_sector + 1) - *lba;
+ *len = num_blocks * dev->sector_size;
+ retval = 1;
+ }
+ }
+
+ dev->next_io_pos_bytes += *len;
+
+ return (retval);
+}
+
+/*
+ * Returns 0 for success, 1 for EOF detected, and -1 for failure.
+ */
+int
+camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
+{
+ struct camdd_buf *buf = NULL;
+ struct camdd_buf_data *data;
+ struct camdd_dev_pass *pass_dev;
+ size_t new_len;
+ struct camdd_buf_data *rb_data;
+ int is_write = dev->write_dev;
+ int eof_flush_needed = 0;
+ int retval = 0;
+ int error;
+
+ pass_dev = &dev->dev_spec.pass;
+
+ /*
+ * If we've gotten EOF or our partner has, we should not continue
+ * queueing I/O. If we're a writer, though, we should continue
+ * to write any buffers that don't have EOF status.
+ */
+ if ((dev->flags & CAMDD_DEV_FLAG_EOF)
+ || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
+ && (is_write == 0))) {
+ /*
+ * Tell the worker thread that we have seen EOF.
+ */
+ retval = 1;
+
+ /*
+ * If we're the writer, send the buffer back with EOF status.
+ */
+ if (is_write) {
+ read_buf->status = CAMDD_STATUS_EOF;
+
+ error = camdd_complete_peer_buf(dev, read_buf);
+ }
+ goto bailout;
+ }
+
+ if (is_write == 0) {
+ buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
+ if (buf == NULL) {
+ retval = -1;
+ goto bailout;
+ }
+ data = &buf->buf_type_spec.data;
+
+ retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
+ if (retval != 0) {
+ buf->status = CAMDD_STATUS_EOF;
+
+ if ((buf->len == 0)
+ && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
+ CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
+ camdd_release_buf(buf);
+ goto bailout;
+ }
+ dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
+ }
+
+ data->fill_len = buf->len;
+ data->src_start_offset = buf->lba * dev->sector_size;
+
+ /*
+ * Put this on the run queue.
+ */
+ STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
+ dev->num_run_queue++;
+
+ /* We're done. */
+ goto bailout;
+ }
+
+ /*
+ * Check for new EOF status from the reader.
+ */
+ if ((read_buf->status == CAMDD_STATUS_EOF)
+ || (read_buf->status == CAMDD_STATUS_ERROR)) {
+ dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
+ if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
+ && (read_buf->len == 0)) {
+ camdd_complete_peer_buf(dev, read_buf);
+ retval = 1;
+ goto bailout;
+ } else
+ eof_flush_needed = 1;
+ }
+
+ /*
+ * See if we have a buffer we're composing with pieces from our
+ * partner thread.
+ */
+ buf = STAILQ_FIRST(&dev->pending_queue);
+ if (buf == NULL) {
+ uint64_t lba;
+ ssize_t len;
+
+ retval = camdd_get_next_lba_len(dev, &lba, &len);
+ if (retval != 0) {
+ read_buf->status = CAMDD_STATUS_EOF;
+
+ if (len == 0) {
+ dev->flags |= CAMDD_DEV_FLAG_EOF;
+ error = camdd_complete_peer_buf(dev, read_buf);
+ goto bailout;
+ }
+ }
+
+ /*
+ * If we don't have a pending buffer, we need to grab a new
+ * one from the free list or allocate another one.
+ */
+ buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
+ if (buf == NULL) {
+ retval = 1;
+ goto bailout;
+ }
+
+ buf->lba = lba;
+ buf->len = len;
+
+ STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
+ dev->num_pending_queue++;
+ }
+
+ data = &buf->buf_type_spec.data;
+
+ rb_data = &read_buf->buf_type_spec.data;
+
+ if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
+ && (dev->debug != 0)) {
+ printf("%s: WARNING: reader offset %#jx != expected offset "
+ "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
+ (uintmax_t)dev->next_peer_pos_bytes);
+ }
+ dev->next_peer_pos_bytes = rb_data->src_start_offset +
+ (rb_data->fill_len - rb_data->resid);
+
+ new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
+ if (new_len < buf->len) {
+ /*
+ * There are three cases here:
+ * 1. We need more data to fill up a block, so we put
+ * this I/O on the queue and wait for more I/O.
+ * 2. We have a pending buffer in the queue that is
+ * smaller than our blocksize, but we got an EOF. So we
+ * need to go ahead and flush the write out.
+ * 3. We got an error.
+ */
+
+ /*
+ * Increment our fill length.
+ */
+ data->fill_len += (rb_data->fill_len - rb_data->resid);
+
+ /*
+ * Add the new read buffer to the list for writing.
+ */
+ STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
+
+ /* Increment the count */
+ buf->src_count++;
+
+ if (eof_flush_needed == 0) {
+ /*
+ * We need to exit, because we don't have enough
+ * data yet.
+ */
+ goto bailout;
+ } else {
+ /*
+ * Take the buffer off of the pending queue.
+ */
+ STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
+ links);
+ dev->num_pending_queue--;
+
+ /*
+ * If we need an EOF flush, but there is no data
+ * to flush, go ahead and return this buffer.
+ */
+ if (data->fill_len == 0) {
+ camdd_complete_buf(dev, buf, /*error_count*/0);
+ retval = 1;
+ goto bailout;
+ }
+
+ /*
+ * Put this on the next queue for execution.
+ */
+ STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
+ dev->num_run_queue++;
+ }
+ } else if (new_len == buf->len) {
+ /*
+ * We have enough data to completey fill one block,
+ * so we're ready to issue the I/O.
+ */
+
+ /*
+ * Take the buffer off of the pending queue.
+ */
+ STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
+ dev->num_pending_queue--;
+
+ /*
+ * Add the new read buffer to the list for writing.
+ */
+ STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
+
+ /* Increment the count */
+ buf->src_count++;
+
+ /*
+ * Increment our fill length.
+ */
+ data->fill_len += (rb_data->fill_len - rb_data->resid);
+
+ /*
+ * Put this on the next queue for execution.
+ */
+ STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
+ dev->num_run_queue++;
+ } else {
+ struct camdd_buf *idb;
+ struct camdd_buf_indirect *indirect;
+ uint32_t len_to_go, cur_offset;
+
+
+ idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
+ if (idb == NULL) {
+ retval = 1;
+ goto bailout;
+ }
+ indirect = &idb->buf_type_spec.indirect;
+ indirect->src_buf = read_buf;
+ read_buf->refcount++;
+ indirect->offset = 0;
+ indirect->start_ptr = rb_data->buf;
+ /*
+ * We've already established that there is more
+ * data in read_buf than we have room for in our
+ * current write request. So this particular chunk
+ * of the request should just be the remainder
+ * needed to fill up a block.
+ */
+ indirect->len = buf->len - (data->fill_len - data->resid);
+
+ camdd_buf_add_child(buf, idb);
+
+ /*
+ * This buffer is ready to execute, so we can take
+ * it off the pending queue and put it on the run
+ * queue.
+ */
+ STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
+ links);
+ dev->num_pending_queue--;
+ STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
+ dev->num_run_queue++;
+
+ cur_offset = indirect->offset + indirect->len;
+
+ /*
+ * The resulting I/O would be too large to fit in
+ * one block. We need to split this I/O into
+ * multiple pieces. Allocate as many buffers as needed.
+ */
+ for (len_to_go = rb_data->fill_len - rb_data->resid -
+ indirect->len; len_to_go > 0;) {
+ struct camdd_buf *new_buf;
+ struct camdd_buf_data *new_data;
+ uint64_t lba;
+ ssize_t len;
+
+ retval = camdd_get_next_lba_len(dev, &lba, &len);
+ if ((retval != 0)
+ && (len == 0)) {
+ /*
+ * The device has already been marked
+ * as EOF, and there is no space left.
+ */
+ goto bailout;
+ }
+
+ new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
+ if (new_buf == NULL) {
+ retval = 1;
+ goto bailout;
+ }
+
+ new_buf->lba = lba;
+ new_buf->len = len;
+
+ idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
+ if (idb == NULL) {
+ retval = 1;
+ goto bailout;
+ }
+
+ indirect = &idb->buf_type_spec.indirect;
+
+ indirect->src_buf = read_buf;
+ read_buf->refcount++;
+ indirect->offset = cur_offset;
+ indirect->start_ptr = rb_data->buf + cur_offset;
+ indirect->len = min(len_to_go, new_buf->len);
+#if 0
+ if (((indirect->len % dev->sector_size) != 0)
+ || ((indirect->offset % dev->sector_size) != 0)) {
+ warnx("offset %ju len %ju not aligned with "
+ "sector size %u", indirect->offset,
+ (uintmax_t)indirect->len, dev->sector_size);
+ }
+#endif
+ cur_offset += indirect->len;
+ len_to_go -= indirect->len;
+
+ camdd_buf_add_child(new_buf, idb);
+
+ new_data = &new_buf->buf_type_spec.data;
+
+ if ((new_data->fill_len == new_buf->len)
+ || (eof_flush_needed != 0)) {
+ STAILQ_INSERT_TAIL(&dev->run_queue,
+ new_buf, links);
+ dev->num_run_queue++;
+ } else if (new_data->fill_len < buf->len) {
+ STAILQ_INSERT_TAIL(&dev->pending_queue,
+ new_buf, links);
+ dev->num_pending_queue++;
+ } else {
+ warnx("%s: too much data in new "
+ "buffer!", __func__);
+ retval = 1;
+ goto bailout;
+ }
+ }
+ }
+
+bailout:
+ return (retval);
+}
+
+void
+camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
+ uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
+{
+ *our_depth = dev->cur_active_io + dev->num_run_queue;
+ if (dev->num_peer_work_queue >
+ dev->num_peer_done_queue)
+ *peer_depth = dev->num_peer_work_queue -
+ dev->num_peer_done_queue;
+ else
+ *peer_depth = 0;
+ *our_bytes = *our_depth * dev->blocksize;
+ *peer_bytes = dev->peer_bytes_queued;
+}
+
+void
+camdd_sig_handler(int sig)
+{
+ if (sig == SIGINFO)
+ need_status = 1;
+ else {
+ need_exit = 1;
+ error_exit = 1;
+ }
+
+ sem_post(&camdd_sem);
+}
+
+void
+camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
+ struct timespec *start_time)
+{
+ struct timespec done_time;
+ uint64_t total_ns;
+ long double mb_sec, total_sec;
+ int error = 0;
+
+ error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
+ if (error != 0) {
+ warn("Unable to get done time");
+ return;
+ }
+
+ timespecsub(&done_time, start_time);
+
+ total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
+ total_sec = total_ns;
+ total_sec /= 1000000000;
+
+ fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
+ "%.4Lf seconds elapsed\n",
+ (uintmax_t)camdd_dev->bytes_transferred,
+ (camdd_dev->write_dev == 0) ? "read from" : "written to",
+ camdd_dev->device_name,
+ (uintmax_t)other_dev->bytes_transferred,
+ (other_dev->write_dev == 0) ? "read from" : "written to",
+ other_dev->device_name, total_sec);
+
+ mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
+ mb_sec /= 1024 * 1024;
+ mb_sec *= 1000000000;
+ mb_sec /= total_ns;
+ fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
+}
+
+int
+camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
+ int retry_count, int timeout)
+{
+ char *device = NULL;
+ struct cam_device *new_cam_dev = NULL;
+ struct camdd_dev *devs[2];
+ struct timespec start_time;
+ pthread_t threads[2];
+ int unit = 0;
+ int error = 0;
+ int i;
+
+ if (num_io_opts != 2) {
+ warnx("Must have one input and one output path");
+ error = 1;
+ goto bailout;
+ }
+
+ bzero(devs, sizeof(devs));
+
+ for (i = 0; i < num_io_opts; i++) {
+ switch (io_opts[i].dev_type) {
+ case CAMDD_DEV_PASS: {
+ camdd_argmask new_arglist = CAMDD_ARG_NONE;
+ int bus = 0, target = 0, lun = 0;
+ char name[30];
+ int rv;
+
+ if (isdigit(io_opts[i].dev_name[0])) {
+ /* device specified as bus:target[:lun] */
+ rv = parse_btl(io_opts[i].dev_name, &bus,
+ &target, &lun, &new_arglist);
+ if (rv < 2) {
+ warnx("numeric device specification "
+ "must be either bus:target, or "
+ "bus:target:lun");
+ error = 1;
+ goto bailout;
+ }
+ /* default to 0 if lun was not specified */
+ if ((new_arglist & CAMDD_ARG_LUN) == 0) {
+ lun = 0;
+ new_arglist |= CAMDD_ARG_LUN;
+ }
+ } else {
+ if (cam_get_device(io_opts[i].dev_name, name,
+ sizeof name, &unit) == -1) {
+ warnx("%s", cam_errbuf);
+ error = 1;
+ goto bailout;
+ }
+ device = strdup(name);
+ new_arglist |= CAMDD_ARG_DEVICE |CAMDD_ARG_UNIT;
+ }
+
+ if (new_arglist & (CAMDD_ARG_BUS | CAMDD_ARG_TARGET))
+ new_cam_dev = cam_open_btl(bus, target, lun,
+ O_RDWR, NULL);
+ else
+ new_cam_dev = cam_open_spec_device(device, unit,
+ O_RDWR, NULL);
+ if (new_cam_dev == NULL) {
+ warnx("%s", cam_errbuf);
+ error = 1;
+ goto bailout;
+ }
+
+ devs[i] = camdd_probe_pass(new_cam_dev,
+ /*io_opts*/ &io_opts[i],
+ CAMDD_ARG_ERR_RECOVER,
+ /*probe_retry_count*/ 3,
+ /*probe_timeout*/ 5000,
+ /*io_retry_count*/ retry_count,
+ /*io_timeout*/ timeout);
+ if (devs[i] == NULL) {
+ warn("Unable to probe device %s%u",
+ new_cam_dev->device_name,
+ new_cam_dev->dev_unit_num);
+ error = 1;
+ goto bailout;
+ }
+ break;
+ }
+ case CAMDD_DEV_FILE: {
+ int fd = -1;
+
+ if (io_opts[i].dev_name[0] == '-') {
+ if (io_opts[i].write_dev != 0)
+ fd = STDOUT_FILENO;
+ else
+ fd = STDIN_FILENO;
+ } else {
+ if (io_opts[i].write_dev != 0) {
+ fd = open(io_opts[i].dev_name,
+ O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
+ } else {
+ fd = open(io_opts[i].dev_name,
+ O_RDONLY);
+ }
+ }
+ if (fd == -1) {
+ warn("error opening file %s",
+ io_opts[i].dev_name);
+ error = 1;
+ goto bailout;
+ }
+
+ devs[i] = camdd_probe_file(fd, &io_opts[i],
+ retry_count, timeout);
+ if (devs[i] == NULL) {
+ error = 1;
+ goto bailout;
+ }
+
+ break;
+ }
+ default:
+ warnx("Unknown device type %d (%s)",
+ io_opts[i].dev_type, io_opts[i].dev_name);
+ error = 1;
+ goto bailout;
+ break; /*NOTREACHED */
+ }
+
+ devs[i]->write_dev = io_opts[i].write_dev;
+
+ devs[i]->start_offset_bytes = io_opts[i].offset;
+
+ if (max_io != 0) {
+ devs[i]->sector_io_limit =
+ (devs[i]->start_offset_bytes /
+ devs[i]->sector_size) +
+ (max_io / devs[i]->sector_size) - 1;
+ devs[i]->sector_io_limit =
+ (devs[i]->start_offset_bytes /
+ devs[i]->sector_size) +
+ (max_io / devs[i]->sector_size) - 1;
+ }
+
+ devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
+ devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
+ }
+
+ devs[0]->peer_dev = devs[1];
+ devs[1]->peer_dev = devs[0];
+ devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
+ devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
+
+ sem_init(&camdd_sem, /*pshared*/ 0, 0);
+
+ signal(SIGINFO, camdd_sig_handler);
+ signal(SIGINT, camdd_sig_handler);
+
+ error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
+ if (error != 0) {
+ warn("Unable to get start time");
+ goto bailout;
+ }
+
+ for (i = 0; i < num_io_opts; i++) {
+ error = pthread_create(&threads[i], NULL, camdd_worker,
+ (void *)devs[i]);
+ if (error != 0) {
+ warnc(error, "pthread_create() failed");
+ goto bailout;
+ }
+ }
+
+ for (;;) {
+ if ((sem_wait(&camdd_sem) == -1)
+ || (need_exit != 0)) {
+ struct kevent ke;
+
+ for (i = 0; i < num_io_opts; i++) {
+ EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
+ EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
+
+ devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
+
+ error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
+ NULL);
+ if (error == -1)
+ warn("%s: unable to wake up thread",
+ __func__);
+ error = 0;
+ }
+ break;
+ } else if (need_status != 0) {
+ camdd_print_status(devs[0], devs[1], &start_time);
+ need_status = 0;
+ }
+ }
+ for (i = 0; i < num_io_opts; i++) {
+ pthread_join(threads[i], NULL);
+ }
+
+ camdd_print_status(devs[0], devs[1], &start_time);
+
+bailout:
+
+ for (i = 0; i < num_io_opts; i++)
+ camdd_free_dev(devs[i]);
+
+ return (error + error_exit);
+}
+
+void
+usage(void)
+{
+ fprintf(stderr,
+"usage: camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
+" <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
+" <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
+" <-i|-o file=/dev/nsa0,bs=512K>\n"
+" [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
+"Option description\n"
+"-i <arg=val> Specify input device/file and parameters\n"
+"-o <arg=val> Specify output device/file and parameters\n"
+"Input and Output parameters\n"
+"pass=name Specify a pass(4) device like pass0 or /dev/pass0\n"
+"file=name Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
+" or - for stdin/stdout\n"
+"bs=blocksize Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
+"offset=len Specify starting offset in bytes or using K, M, G suffix\n"
+" NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
+"depth=N Specify a numeric queue depth. This only applies to pass(4)\n"
+"mcs=N Specify a minimum cmd size for pass(4) read/write commands\n"
+"Optional arguments\n"
+"-C retry_cnt Specify a retry count for pass(4) devices\n"
+"-E Enable CAM error recovery for pass(4) devices\n"
+"-m max_io Specify the maximum amount to be transferred in bytes or\n"
+" using K, G, M, etc. suffixes\n"
+"-t timeout Specify the I/O timeout to use with pass(4) devices\n"
+"-v Enable verbose error recovery\n"
+"-h Print this message\n");
+}
+
+
+int
+camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
+{
+ char *tmpstr, *tmpstr2;
+ char *orig_tmpstr = NULL;
+ int retval = 0;
+
+ io_opts->write_dev = is_write;
+
+ tmpstr = strdup(args);
+ if (tmpstr == NULL) {
+ warn("strdup failed");
+ retval = 1;
+ goto bailout;
+ }
+ orig_tmpstr = tmpstr;
+ while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
+ char *name, *value;
+
+ /*
+ * If the user creates an empty parameter by putting in two
+ * commas, skip over it and look for the next field.
+ */
+ if (*tmpstr2 == '\0')
+ continue;
+
+ name = strsep(&tmpstr2, "=");
+ if (*name == '\0') {
+ warnx("Got empty I/O parameter name");
+ retval = 1;
+ goto bailout;
+ }
+ value = strsep(&tmpstr2, "=");
+ if ((value == NULL)
+ || (*value == '\0')) {
+ warnx("Empty I/O parameter value for %s", name);
+ retval = 1;
+ goto bailout;
+ }
+ if (strncasecmp(name, "file", 4) == 0) {
+ io_opts->dev_type = CAMDD_DEV_FILE;
+ io_opts->dev_name = strdup(value);
+ if (io_opts->dev_name == NULL) {
+ warn("Error allocating memory");
+ retval = 1;
+ goto bailout;
+ }
+ } else if (strncasecmp(name, "pass", 4) == 0) {
+ io_opts->dev_type = CAMDD_DEV_PASS;
+ io_opts->dev_name = strdup(value);
+ if (io_opts->dev_name == NULL) {
+ warn("Error allocating memory");
+ retval = 1;
+ goto bailout;
+ }
+ } else if ((strncasecmp(name, "bs", 2) == 0)
+ || (strncasecmp(name, "blocksize", 9) == 0)) {
+ retval = expand_number(value, &io_opts->blocksize);
+ if (retval == -1) {
+ warn("expand_number(3) failed on %s=%s", name,
+ value);
+ retval = 1;
+ goto bailout;
+ }
+ } else if (strncasecmp(name, "depth", 5) == 0) {
+ char *endptr;
+
+ io_opts->queue_depth = strtoull(value, &endptr, 0);
+ if (*endptr != '\0') {
+ warnx("invalid queue depth %s", value);
+ retval = 1;
+ goto bailout;
+ }
+ } else if (strncasecmp(name, "mcs", 3) == 0) {
+ char *endptr;
+
+ io_opts->min_cmd_size = strtol(value, &endptr, 0);
+ if ((*endptr != '\0')
+ || ((io_opts->min_cmd_size > 16)
+ || (io_opts->min_cmd_size < 0))) {
+ warnx("invalid minimum cmd size %s", value);
+ retval = 1;
+ goto bailout;
+ }
+ } else if (strncasecmp(name, "offset", 6) == 0) {
+ retval = expand_number(value, &io_opts->offset);
+ if (retval == -1) {
+ warn("expand_number(3) failed on %s=%s", name,
+ value);
+ retval = 1;
+ goto bailout;
+ }
+ } else if (strncasecmp(name, "debug", 5) == 0) {
+ char *endptr;
+
+ io_opts->debug = strtoull(value, &endptr, 0);
+ if (*endptr != '\0') {
+ warnx("invalid debug level %s", value);
+ retval = 1;
+ goto bailout;
+ }
+ } else {
+ warnx("Unrecognized parameter %s=%s", name, value);
+ }
+ }
+bailout:
+ free(orig_tmpstr);
+
+ return (retval);
+}
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ camdd_argmask arglist = CAMDD_ARG_NONE;
+ int timeout = 0, retry_count = 1;
+ int error = 0;
+ uint64_t max_io = 0;
+ struct camdd_io_opts *opt_list = NULL;
+
+ if (argc == 1) {
+ usage();
+ exit(1);
+ }
+
+ opt_list = calloc(2, sizeof(struct camdd_io_opts));
+ if (opt_list == NULL) {
+ warn("Unable to allocate option list");
+ error = 1;
+ goto bailout;
+ }
+
+ while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
+ switch (c) {
+ case 'C':
+ retry_count = strtol(optarg, NULL, 0);
+ if (retry_count < 0)
+ errx(1, "retry count %d is < 0",
+ retry_count);
+ arglist |= CAMDD_ARG_RETRIES;
+ break;
+ case 'E':
+ arglist |= CAMDD_ARG_ERR_RECOVER;
+ break;
+ case 'i':
+ case 'o':
+ if (((c == 'i')
+ && (opt_list[0].dev_type != CAMDD_DEV_NONE))
+ || ((c == 'o')
+ && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
+ errx(1, "Only one input and output path "
+ "allowed");
+ }
+ error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
+ (c == 'o') ? &opt_list[1] : &opt_list[0]);
+ if (error != 0)
+ goto bailout;
+ break;
+ case 'm':
+ error = expand_number(optarg, &max_io);
+ if (error == -1) {
+ warn("invalid maximum I/O amount %s", optarg);
+ error = 1;
+ goto bailout;
+ }
+ break;
+ case 't':
+ timeout = strtol(optarg, NULL, 0);
+ if (timeout < 0)
+ errx(1, "invalid timeout %d", timeout);
+ /* Convert the timeout from seconds to ms */
+ timeout *= 1000;
+ arglist |= CAMDD_ARG_TIMEOUT;
+ break;
+ case 'v':
+ arglist |= CAMDD_ARG_VERBOSE;
+ break;
+ case 'h':
+ default:
+ usage();
+ exit(1);
+ break; /*NOTREACHED*/
+ }
+ }
+
+ if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
+ || (opt_list[1].dev_type == CAMDD_DEV_NONE))
+ errx(1, "Must specify both -i and -o");
+
+ /*
+ * Set the timeout if the user hasn't specified one.
+ */
+ if (timeout == 0)
+ timeout = CAMDD_PASS_RW_TIMEOUT;
+
+ error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
+
+bailout:
+ free(opt_list);
+
+ exit(error);
+}
OpenPOWER on IntegriCloud