21 files changed, 5983 insertions, 198 deletions
diff --git a/share/man/man4/pass.4 b/share/man/man4/pass.4
index 7819ea3..00b9ccd 100644
--- a/share/man/man4/pass.4
+++ b/share/man/man4/pass.4
@@ -27,7 +27,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 10, 1998
+.Dd March 17, 2015
 .Dt PASS 4
 .Os
 .Sh NAME
@@ -53,9 +53,13 @@ The
 .Nm
 driver attaches to every
 .Tn SCSI
+and
+.Tn ATA
 device found in the system.
 Since it attaches to every device, it provides a generic means of accessing
 .Tn SCSI
+and
+.Tn ATA
 devices, and allows the user to access devices which have no
 "standard" peripheral driver associated with them.
 .Sh KERNEL CONFIGURATION
@@ -65,10 +69,12 @@ device in the kernel;
 .Nm
 devices are automatically allocated as
 .Tn SCSI
+and
+.Tn ATA
 devices are found.
 .Sh IOCTLS
-.Bl -tag -width 012345678901234
-.It CAMIOCOMMAND
+.Bl -tag -width 5n
+.It CAMIOCOMMAND union ccb *
 This ioctl takes most kinds of CAM CCBs and passes them through to the CAM
 transport layer for action.
 Note that some CCB types are not allowed
@@ -79,7 +85,7 @@ Some examples of xpt-only CCBs are XPT_SCAN_BUS,
 XPT_DEV_MATCH, XPT_RESET_BUS, XPT_SCAN_LUN, XPT_ENG_INQ, and XPT_ENG_EXEC.
 These CCB types have various attributes that make it illogical or
 impossible to service them through the passthrough interface.
-.It CAMGETPASSTHRU
+.It CAMGETPASSTHRU union ccb *
 This ioctl takes an XPT_GDEVLIST CCB, and returns the passthrough device
 corresponding to the device in question.
 Although this ioctl is available through the
@@ -90,6 +96,109 @@ ioctl.
 It is probably more useful to issue this ioctl through the
 .Xr xpt 4
 device.
+.It CAMIOQUEUE union ccb *
+Queue a CCB to the
+.Xr pass 4
+driver to be executed asynchronously.
+The caller may use
+.Xr select 2 ,
+.Xr poll 2 
+or
+.Xr kevent 2
+to receive notification when the CCB has completed.
+.Pp
+This ioctl takes most CAM CCBs, but some CCB types are not allowed through
+the pass device, and must be sent through the
+.Xr xpt 4
+device instead.
+Some examples of xpt-only CCBs are XPT_SCAN_BUS,
+XPT_DEV_MATCH, XPT_RESET_BUS, XPT_SCAN_LUN, XPT_ENG_INQ, and XPT_ENG_EXEC.
+These CCB types have various attributes that make it illogical or
+impossible to service them through the passthrough interface.
+.Pp
+Although the
+.Dv CAMIOQUEUE
+ioctl is not defined to take an argument, it does require a
+pointer to a union ccb.
+It is not defined to take an argument to avoid an extra malloc and copy 
+inside the generic 
+.Xr ioctl 2
+handler.
+.pp
+The completed CCB will be returned via the
+.Dv CAMIOGET
+ioctl.
+An error will only be returned from the
+.Dv CAMIOQUEUE
+ioctl if there is an error allocating memory for the request or copying
+memory from userland.
+All other errors will be reported as standard CAM CCB status errors.
+Since the CCB is not copied back to the user process from the pass driver
+in the
+.Dv CAMIOQUEUE
+ioctl, the user's passed-in CCB will not be modfied.
+This is the case even with immediate CCBs.
+Instead, the completed CCB must be retrieved via the
+.Dv CAMIOGET
+ioctl and the status examined.
+.Pp
+Multiple CCBs may be queued via the
+.Dv CAMIOQUEUE
+ioctl at any given time, and they may complete in a different order than
+the order that they were submitted.
+The caller must take steps to identify CCBs that are queued and completed.
+The
+.Dv periph_priv
+structure inside struct ccb_hdr is available for userland use with the
+.Dv CAMIOQUEUE
+and
+.Dv CAMIOGET
+ioctls, and will be preserved across calls.
+Also, the periph_links linked list pointers inside struct ccb_hdr are
+available for userland use with the
+.Dv CAMIOQUEUE
+and
+.Dv CAMIOGET
+ioctls and will be preserved across calls.
+.It CAMIOGET union ccb *
+Retrieve completed CAM CCBs queued via the
+.Dv CAMIOQUEUE
+ioctl.
+An error will only be returned from the
+.Dv CAMIOGET
+ioctl if the
+.Xr pass 4
+driver fails to copy data to the user process or if there are no completed
+CCBs available to retrieve.
+If no CCBs are available to retrieve,
+errno will be set to 
+.Dv ENOENT .
+.Pp
+All other errors will be reported as standard CAM CCB status errors.
+.Pp
+Although the
+.Dv CAMIOGET
+ioctl is not defined to take an argument, it does require a
+pointer to a union ccb.
+It is not defined to take an argument to avoid an extra malloc and copy 
+inside the generic 
+.Xr ioctl 2
+handler.
+.Pp
+The pass driver will report via
+.Xr select 2 ,
+.Xr poll 2 
+or
+.Xr kevent 2
+when a CCB has completed.
+One CCB may be retrieved per
+.Dv CAMIOGET
+call.
+CCBs may be returned in an order different than the order they were
+submitted.
+So the caller should use the
+.Dv periph_priv
+area inside the CCB header to store pointers to identifying information.
 .El
 .Sh FILES
 .Bl -tag -width /dev/passn -compact
@@ -103,18 +212,21 @@ CAM subsystem.
 .Sh DIAGNOSTICS
 None.
 .Sh SEE ALSO
+.Xr kqueue 2 ,
+.Xr poll 2 ,
+.Xr select 2 ,
 .Xr cam 3 ,
 .Xr cam 4 ,
 .Xr cam_cdbparse 3 ,
+.Xr cd 4 ,
+.Xr ctl 4 ,
+.Xr da 4 ,
+.Xr sa 4 ,
 .Xr xpt 4 ,
-.Xr camcontrol 8
+.Xr camcontrol 8 ,
+.Xr camdd 8
 .Sh HISTORY
 The CAM passthrough driver first appeared in
 .Fx 3.0 .
 .Sh AUTHORS
 .An Kenneth Merry Aq ken@FreeBSD.org
-.Sh BUGS
-It might be nice to have a way to asynchronously send CCBs through the
-passthrough driver.
-This would probably require some sort of read/write
-interface or an asynchronous ioctl interface.
diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c
index f88899e..005c684 100644
--- a/sys/cam/ata/ata_da.c
+++ b/sys/cam/ata/ata_da.c
@@ -1573,12 +1573,26 @@ adastart(struct cam_periph *periph, union ccb *start_ccb)
 		}
 		switch (bp->bio_cmd) {
 		case BIO_WRITE:
-			softc->flags |= ADA_FLAG_DIRTY;
-			/* FALLTHROUGH */
 		case BIO_READ:
 		{
 			uint64_t lba = bp->bio_pblkno;
 			uint16_t count = bp->bio_bcount / softc->params.secsize;
+			void *data_ptr;
+			int rw_op;
+
+			if (bp->bio_cmd == BIO_WRITE) {
+				softc->flags |= ADA_FLAG_DIRTY;
+				rw_op = CAM_DIR_OUT;
+			} else {
+				rw_op = CAM_DIR_IN;
+			}
+
+			data_ptr = bp->bio_data;
+			if ((bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0) {
+				rw_op |= CAM_DATA_BIO;
+				data_ptr = bp;
+			}
+
 #ifdef ADA_TEST_FAILURE
 			int fail = 0;
 
@@ -1623,12 +1637,9 @@ adastart(struct cam_periph *periph, union ccb *start_ccb)
 			cam_fill_ataio(ataio,
 			    ada_retry_count,
 			    adadone,
-			    (bp->bio_cmd == BIO_READ ? CAM_DIR_IN :
-				CAM_DIR_OUT) | ((bp->bio_flags & BIO_UNMAPPED)
-				!= 0 ? CAM_DATA_BIO : 0),
+			    rw_op,
 			    tag_code,
-			    ((bp->bio_flags & BIO_UNMAPPED) != 0) ? (void *)bp :
-				bp->bio_data,
+			    data_ptr,
 			    bp->bio_bcount,
 			    ada_default_timeout*1000);
 
diff --git a/sys/cam/cam_ccb.h b/sys/cam/cam_ccb.h
index 98bb9ea..12d3803 100644
--- a/sys/cam/cam_ccb.h
+++ b/sys/cam/cam_ccb.h
@@ -111,6 +111,9 @@ typedef enum {
 
 typedef enum {
 	CAM_EXTLUN_VALID	= 0x00000001,/* 64bit lun field is valid      */
+	CAM_USER_DATA_ADDR	= 0x00000002,/* Userspace data pointers */
+	CAM_SG_FORMAT_IOVEC	= 0x00000004,/* iovec instead of busdma S/G*/
+	CAM_UNMAPPED_BUF	= 0x00000008 /* use unmapped I/O */
 } ccb_xflags;
 
 /* XPT Opcodes for xpt_action */
diff --git a/sys/cam/cam_xpt.c b/sys/cam/cam_xpt.c
index ba0863a..6773829 100644
--- a/sys/cam/cam_xpt.c
+++ b/sys/cam/cam_xpt.c
@@ -3337,7 +3337,8 @@ xpt_merge_ccb(union ccb *master_ccb, union ccb *slave_ccb)
 }
 
 void
-xpt_setup_ccb(struct ccb_hdr *ccb_h, struct cam_path *path, u_int32_t priority)
+xpt_setup_ccb_flags(struct ccb_hdr *ccb_h, struct cam_path *path,
+		    u_int32_t priority, u_int32_t flags)
 {
 
 	CAM_DEBUG(path, CAM_DEBUG_TRACE, ("xpt_setup_ccb\n"));
@@ -3355,10 +3356,16 @@ xpt_setup_ccb(struct ccb_hdr *ccb_h, struct cam_path *path, u_int32_t priority)
 		ccb_h->target_lun = CAM_TARGET_WILDCARD;
 	}
 	ccb_h->pinfo.index = CAM_UNQUEUED_INDEX;
-	ccb_h->flags = 0;
+	ccb_h->flags = flags;
 	ccb_h->xflags = 0;
 }
 
+void
+xpt_setup_ccb(struct ccb_hdr *ccb_h, struct cam_path *path, u_int32_t priority)
+{
+	xpt_setup_ccb_flags(ccb_h, path, priority, /*flags*/ 0);
+}
+
 /* Path manipulation functions */
 cam_status
 xpt_create_path(struct cam_path **new_path_ptr, struct cam_periph *perph,
diff --git a/sys/cam/cam_xpt.h b/sys/cam/cam_xpt.h
index 1d983c9..ca7dccc 100644
--- a/sys/cam/cam_xpt.h
+++ b/sys/cam/cam_xpt.h
@@ -70,6 +70,10 @@ void			xpt_action_default(union ccb *new_ccb);
 union ccb		*xpt_alloc_ccb(void);
 union ccb		*xpt_alloc_ccb_nowait(void);
 void			xpt_free_ccb(union ccb *free_ccb);
+void			xpt_setup_ccb_flags(struct ccb_hdr *ccb_h,
+					    struct cam_path *path,
+					    u_int32_t priority,
+					    u_int32_t flags);
 void			xpt_setup_ccb(struct ccb_hdr *ccb_h,
 				      struct cam_path *path,
 				      u_int32_t priority);
diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
index 4e3fe76..1cd687a 100644
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@@ -2332,29 +2332,40 @@ skipstate:
 
 		switch (bp->bio_cmd) {
 		case BIO_WRITE:
-			softc->flags |= DA_FLAG_DIRTY;
-			/* FALLTHROUGH */
 		case BIO_READ:
+		{
+			void *data_ptr;
+			int rw_op;
+
+			if (bp->bio_cmd == BIO_WRITE) {
+				softc->flags |= DA_FLAG_DIRTY;
+				rw_op = SCSI_RW_WRITE;
+			} else {
+				rw_op = SCSI_RW_READ;
+			}
+
+			data_ptr = bp->bio_data;
+			if ((bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0) {
+				rw_op |= SCSI_RW_BIO;
+				data_ptr = bp;
+			}
+
 			scsi_read_write(&start_ccb->csio,
 					/*retries*/da_retry_count,
 					/*cbfcnp*/dadone,
 					/*tag_action*/tag_code,
-					/*read_op*/(bp->bio_cmd == BIO_READ ?
-					SCSI_RW_READ : SCSI_RW_WRITE) |
-					((bp->bio_flags & BIO_UNMAPPED) != 0 ?
-					SCSI_RW_BIO : 0),
+					rw_op,
 					/*byte2*/0,
 					softc->minimum_cmd_size,
 					/*lba*/bp->bio_pblkno,
 					/*block_count*/bp->bio_bcount /
 					softc->params.secsize,
-					/*data_ptr*/ (bp->bio_flags &
-					BIO_UNMAPPED) != 0 ? (void *)bp :
-					bp->bio_data,
+					data_ptr,
 					/*dxfer_len*/ bp->bio_bcount,
 					/*sense_len*/SSD_FULL_SIZE,
 					da_default_timeout * 1000);
 			break;
+		}
 		case BIO_FLUSH:
 			/*
 			 * BIO_FLUSH doesn't currently communicate
diff --git a/sys/cam/scsi/scsi_pass.c b/sys/cam/scsi/scsi_pass.c
index 174151e..09cda5b 100644
--- a/sys/cam/scsi/scsi_pass.c
+++ b/sys/cam/scsi/scsi_pass.c
@@ -28,27 +28,39 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_kdtrace.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/conf.h>
 #include <sys/types.h>
 #include <sys/bio.h>
-#include <sys/malloc.h>
-#include <sys/fcntl.h>
-#include <sys/conf.h>
-#include <sys/errno.h>
+#include <sys/bus.h>
 #include <sys/devicestat.h>
+#include <sys/errno.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
 #include <sys/proc.h>
+#include <sys/poll.h>
+#include <sys/selinfo.h>
+#include <sys/sdt.h>
 #include <sys/taskqueue.h>
+#include <vm/uma.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <machine/bus.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
 #include <cam/cam_periph.h>
 #include <cam/cam_queue.h>
+#include <cam/cam_xpt.h>
 #include <cam/cam_xpt_periph.h>
 #include <cam/cam_debug.h>
-#include <cam/cam_sim.h>
 #include <cam/cam_compat.h>
+#include <cam/cam_xpt_periph.h>
 
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_pass.h>
@@ -57,7 +69,11 @@ typedef enum {
 	PASS_FLAG_OPEN			= 0x01,
 	PASS_FLAG_LOCKED		= 0x02,
 	PASS_FLAG_INVALID		= 0x04,
-	PASS_FLAG_INITIAL_PHYSPATH	= 0x08
+	PASS_FLAG_INITIAL_PHYSPATH	= 0x08,
+	PASS_FLAG_ZONE_INPROG		= 0x10,
+	PASS_FLAG_ZONE_VALID		= 0x20,
+	PASS_FLAG_UNMAPPED_CAPABLE	= 0x40,
+	PASS_FLAG_ABANDONED_REF_SET	= 0x80
 } pass_flags;
 
 typedef enum {
@@ -65,38 +81,104 @@ typedef enum {
 } pass_state;
 
 typedef enum {
-	PASS_CCB_BUFFER_IO
+	PASS_CCB_BUFFER_IO,
+	PASS_CCB_QUEUED_IO
 } pass_ccb_types;
 
 #define ccb_type	ppriv_field0
-#define ccb_bp		ppriv_ptr1
+#define ccb_ioreq	ppriv_ptr1
 
-struct pass_softc {
-	pass_state	 state;
-	pass_flags	 flags;
-	u_int8_t	 pd_type;
-	union ccb	 saved_ccb;
-	int		 open_count;
-	u_int		 maxio;
-	struct devstat	*device_stats;
-	struct cdev	*dev;
-	struct cdev	*alias_dev;
-	struct task	 add_physpath_task;
+/*
+ * The maximum number of memory segments we preallocate.
+ */
+#define	PASS_MAX_SEGS	16
+
+typedef enum {
+	PASS_IO_NONE		= 0x00,
+	PASS_IO_USER_SEG_MALLOC	= 0x01,
+	PASS_IO_KERN_SEG_MALLOC	= 0x02,
+	PASS_IO_ABANDONED	= 0x04
+} pass_io_flags; 
+
+struct pass_io_req {
+	union ccb			 ccb;
+	union ccb			*alloced_ccb;
+	union ccb			*user_ccb_ptr;
+	camq_entry			 user_periph_links;
+	ccb_ppriv_area			 user_periph_priv;
+	struct cam_periph_map_info	 mapinfo;
+	pass_io_flags			 flags;
+	ccb_flags			 data_flags;
+	int				 num_user_segs;
+	bus_dma_segment_t		 user_segs[PASS_MAX_SEGS];
+	int				 num_kern_segs;
+	bus_dma_segment_t		 kern_segs[PASS_MAX_SEGS];
+	bus_dma_segment_t		*user_segptr;
+	bus_dma_segment_t		*kern_segptr;
+	int				 num_bufs;
+	uint32_t			 dirs[CAM_PERIPH_MAXMAPS];
+	uint32_t			 lengths[CAM_PERIPH_MAXMAPS];
+	uint8_t				*user_bufs[CAM_PERIPH_MAXMAPS];
+	uint8_t				*kern_bufs[CAM_PERIPH_MAXMAPS];
+	struct bintime			 start_time;
+	TAILQ_ENTRY(pass_io_req)	 links;
 };
 
+struct pass_softc {
+	pass_state		  state;
+	pass_flags		  flags;
+	u_int8_t		  pd_type;
+	union ccb		  saved_ccb;
+	int			  open_count;
+	u_int		 	  maxio;
+	struct devstat		 *device_stats;
+	struct cdev		 *dev;
+	struct cdev		 *alias_dev;
+	struct task		  add_physpath_task;
+	struct task		  shutdown_kqueue_task;
+	struct selinfo		  read_select;
+	TAILQ_HEAD(, pass_io_req) incoming_queue;
+	TAILQ_HEAD(, pass_io_req) active_queue;
+	TAILQ_HEAD(, pass_io_req) abandoned_queue;
+	TAILQ_HEAD(, pass_io_req) done_queue;
+	struct cam_periph	 *periph;
+	char			  zone_name[12];
+	char			  io_zone_name[12];
+	uma_zone_t		  pass_zone;
+	uma_zone_t		  pass_io_zone;
+	size_t			  io_zone_size;
+};
 
 static	d_open_t	passopen;
 static	d_close_t	passclose;
 static	d_ioctl_t	passioctl;
 static	d_ioctl_t	passdoioctl;
+static	d_poll_t	passpoll;
+static	d_kqfilter_t	passkqfilter;
+static	void		passreadfiltdetach(struct knote *kn);
+static	int		passreadfilt(struct knote *kn, long hint);
 
 static	periph_init_t	passinit;
 static	periph_ctor_t	passregister;
 static	periph_oninv_t	passoninvalidate;
 static	periph_dtor_t	passcleanup;
-static void		pass_add_physpath(void *context, int pending);
+static	periph_start_t	passstart;
+static	void		pass_shutdown_kqueue(void *context, int pending);
+static	void		pass_add_physpath(void *context, int pending);
 static	void		passasync(void *callback_arg, u_int32_t code,
 				  struct cam_path *path, void *arg);
+static	void		passdone(struct cam_periph *periph, 
+				 union ccb *done_ccb);
+static	int		passcreatezone(struct cam_periph *periph);
+static	void		passiocleanup(struct pass_softc *softc, 
+				      struct pass_io_req *io_req);
+static	int		passcopysglist(struct cam_periph *periph,
+				       struct pass_io_req *io_req,
+				       ccb_flags direction);
+static	int		passmemsetup(struct cam_periph *periph,
+				     struct pass_io_req *io_req);
+static	int		passmemdone(struct cam_periph *periph,
+				    struct pass_io_req *io_req);
 static	int		passerror(union ccb *ccb, u_int32_t cam_flags, 
 				  u_int32_t sense_flags);
 static 	int		passsendccb(struct cam_periph *periph, union ccb *ccb,
@@ -116,9 +198,19 @@ static struct cdevsw pass_cdevsw = {
 	.d_open =	passopen,
 	.d_close =	passclose,
 	.d_ioctl =	passioctl,
+	.d_poll = 	passpoll,
+	.d_kqfilter = 	passkqfilter,
 	.d_name =	"pass",
 };
 
+static struct filterops passread_filtops = {
+	.f_isfd	=	1,
+	.f_detach =	passreadfiltdetach,
+	.f_event =	passreadfilt
+};
+
+static MALLOC_DEFINE(M_SCSIPASS, "scsi_pass", "scsi passthrough buffers");
+
 static void
 passinit(void)
 {
@@ -138,6 +230,60 @@ passinit(void)
 }
 
 static void
+passrejectios(struct cam_periph *periph)
+{
+	struct pass_io_req *io_req, *io_req2;
+	struct pass_softc *softc;
+
+	softc = (struct pass_softc *)periph->softc;
+
+	/*
+	 * The user can no longer get status for I/O on the done queue, so
+	 * clean up all outstanding I/O on the done queue.
+	 */
+	TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) {
+		TAILQ_REMOVE(&softc->done_queue, io_req, links);
+		passiocleanup(softc, io_req);
+		uma_zfree(softc->pass_zone, io_req);
+	}
+
+	/*
+	 * The underlying device is gone, so we can't issue these I/Os.
+	 * The devfs node has been shut down, so we can't return status to
+	 * the user.  Free any I/O left on the incoming queue.
+	 */
+	TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links, io_req2) {
+		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
+		passiocleanup(softc, io_req);
+		uma_zfree(softc->pass_zone, io_req);
+	}
+
+	/*
+	 * Normally we would put I/Os on the abandoned queue and acquire a
+	 * reference when we saw the final close.  But, the device went
+	 * away and devfs may have moved everything off to deadfs by the
+	 * time the I/O done callback is called; as a result, we won't see
+	 * any more closes.  So, if we have any active I/Os, we need to put
+	 * them on the abandoned queue.  When the abandoned queue is empty,
+	 * we'll release the remaining reference (see below) to the peripheral.
+	 */
+	TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links, io_req2) {
+		TAILQ_REMOVE(&softc->active_queue, io_req, links);
+		io_req->flags |= PASS_IO_ABANDONED;
+		TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req, links);
+	}
+
+	/*
+	 * If we put any I/O on the abandoned queue, acquire a reference.
+	 */
+	if ((!TAILQ_EMPTY(&softc->abandoned_queue))
+	 && ((softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0)) {
+		cam_periph_doacquire(periph);
+		softc->flags |= PASS_FLAG_ABANDONED_REF_SET;
+	}
+}
+
+static void
 passdevgonecb(void *arg)
 {
 	struct cam_periph *periph;
@@ -165,17 +311,26 @@ passdevgonecb(void *arg)
 
 	/*
 	 * Release the reference held for the device node, it is gone now.
+	 * Accordingly, inform all queued I/Os of their fate.
 	 */
 	cam_periph_release_locked(periph);
+	passrejectios(periph);
 
 	/*
-	 * We reference the lock directly here, instead of using
+	 * We reference the SIM lock directly here, instead of using
 	 * cam_periph_unlock().  The reason is that the final call to
 	 * cam_periph_release_locked() above could result in the periph
 	 * getting freed.  If that is the case, dereferencing the periph
 	 * with a cam_periph_unlock() call would cause a page fault.
 	 */
 	mtx_unlock(mtx);
+
+	/*
+	 * We have to remove our kqueue context from a thread because it
+	 * may sleep.  It would be nice if we could get a callback from
+	 * kqueue when it is done cleaning up resources.
+	 */
+	taskqueue_enqueue(taskqueue_thread, &softc->shutdown_kqueue_task);
 }
 
 static void
@@ -197,12 +352,6 @@ passoninvalidate(struct cam_periph *periph)
 	 * when it has cleaned up its state.
 	 */
 	destroy_dev_sched_cb(softc->dev, passdevgonecb, periph);
-
-	/*
-	 * XXX Return all queued I/O with ENXIO.
-	 * XXX Handle any transactions queued to the card
-	 *     with XPT_ABORT_CCB.
-	 */
 }
 
 static void
@@ -212,9 +361,40 @@ passcleanup(struct cam_periph *periph)
 
 	softc = (struct pass_softc *)periph->softc;
 
+	cam_periph_assert(periph, MA_OWNED);
+	KASSERT(TAILQ_EMPTY(&softc->active_queue),
+		("%s called when there are commands on the active queue!\n",
+		__func__));
+	KASSERT(TAILQ_EMPTY(&softc->abandoned_queue),
+		("%s called when there are commands on the abandoned queue!\n",
+		__func__));
+	KASSERT(TAILQ_EMPTY(&softc->incoming_queue),
+		("%s called when there are commands on the incoming queue!\n",
+		__func__));
+	KASSERT(TAILQ_EMPTY(&softc->done_queue),
+		("%s called when there are commands on the done queue!\n",
+		__func__));
+
 	devstat_remove_entry(softc->device_stats);
 
 	cam_periph_unlock(periph);
+
+	/*
+	 * We call taskqueue_drain() for the physpath task to make sure it
+	 * is complete.  We drop the lock because this can potentially
+	 * sleep.  XXX KDM that is bad.  Need a way to get a callback when
+	 * a taskqueue is drained.
+	 *
+ 	 * Note that we don't drain the kqueue shutdown task queue.  This
+	 * is because we hold a reference on the periph for kqueue, and
+	 * release that reference from the kqueue shutdown task queue.  So
+	 * we cannot come into this routine unless we've released that
+	 * reference.  Also, because that could be the last reference, we
+	 * could be called from the cam_periph_release() call in
+	 * pass_shutdown_kqueue().  In that case, the taskqueue_drain()
+	 * would deadlock.  It would be preferable if we had a way to
+	 * get a callback when a taskqueue is done.
+	 */
 	taskqueue_drain(taskqueue_thread, &softc->add_physpath_task);
 
 	cam_periph_lock(periph);
@@ -223,10 +403,29 @@ passcleanup(struct cam_periph *periph)
 }
 
 static void
+pass_shutdown_kqueue(void *context, int pending)
+{
+	struct cam_periph *periph;
+	struct pass_softc *softc;
+
+	periph = context;
+	softc = periph->softc;
+
+	knlist_clear(&softc->read_select.si_note, /*is_locked*/ 0);
+	knlist_destroy(&softc->read_select.si_note);
+
+	/*
+	 * Release the reference we held for kqueue.
+	 */
+	cam_periph_release(periph);
+}
+
+static void
 pass_add_physpath(void *context, int pending)
 {
 	struct cam_periph *periph;
 	struct pass_softc *softc;
+	struct mtx *mtx;
 	char *physpath;
 
 	/*
@@ -236,34 +435,38 @@ pass_add_physpath(void *context, int pending)
 	periph = context;
 	softc = periph->softc;
 	physpath = malloc(MAXPATHLEN, M_DEVBUF, M_WAITOK);
-	cam_periph_lock(periph);
-	if (periph->flags & CAM_PERIPH_INVALID) {
-		cam_periph_unlock(periph);
+	mtx = cam_periph_mtx(periph);
+	mtx_lock(mtx);
+
+	if (periph->flags & CAM_PERIPH_INVALID)
 		goto out;
-	}
+
 	if (xpt_getattr(physpath, MAXPATHLEN,
 			"GEOM::physpath", periph->path) == 0
 	 && strlen(physpath) != 0) {
 
-		cam_periph_unlock(periph);
+		mtx_unlock(mtx);
 		make_dev_physpath_alias(MAKEDEV_WAITOK, &softc->alias_dev,
 					softc->dev, softc->alias_dev, physpath);
-		cam_periph_lock(periph);
+		mtx_lock(mtx);
 	}
 
+out:
 	/*
 	 * Now that we've made our alias, we no longer have to have a
 	 * reference to the device.
 	 */
-	if ((softc->flags & PASS_FLAG_INITIAL_PHYSPATH) == 0) {
+	if ((softc->flags & PASS_FLAG_INITIAL_PHYSPATH) == 0)
 		softc->flags |= PASS_FLAG_INITIAL_PHYSPATH;
-		cam_periph_unlock(periph);
-		dev_rel(softc->dev);
-	}
-	else
-		cam_periph_unlock(periph);
 
-out:
+	/*
+	 * We always acquire a reference to the periph before queueing this
+	 * task queue function, so it won't go away before we run.
+	 */
+	while (pending-- > 0)
+		cam_periph_release_locked(periph);
+	mtx_unlock(mtx);
+
 	free(physpath, M_DEVBUF);
 }
 
@@ -291,7 +494,7 @@ passasync(void *callback_arg, u_int32_t code,
 		 * process.
 		 */
 		status = cam_periph_alloc(passregister, passoninvalidate,
-					  passcleanup, NULL, "pass",
+					  passcleanup, passstart, "pass",
 					  CAM_PERIPH_BIO, path,
 					  passasync, AC_FOUND_DEVICE, cgd);
 
@@ -315,8 +518,19 @@ passasync(void *callback_arg, u_int32_t code,
 		buftype = (uintptr_t)arg;
 		if (buftype == CDAI_TYPE_PHYS_PATH) {
 			struct pass_softc *softc;
+			cam_status status;
 
 			softc = (struct pass_softc *)periph->softc;
+			/*
+			 * Acquire a reference to the periph before we
+			 * start the taskqueue, so that we don't run into
+			 * a situation where the periph goes away before
+			 * the task queue has a chance to run.
+			 */
+			status = cam_periph_acquire(periph);
+			if (status != CAM_REQ_CMP)
+				break;
+
 			taskqueue_enqueue(taskqueue_thread,
 					  &softc->add_physpath_task);
 		}
@@ -361,6 +575,17 @@ passregister(struct cam_periph *periph, void *arg)
 		softc->pd_type = T_DIRECT;
 
 	periph->softc = softc;
+	softc->periph = periph;
+	TAILQ_INIT(&softc->incoming_queue);
+	TAILQ_INIT(&softc->active_queue);
+	TAILQ_INIT(&softc->abandoned_queue);
+	TAILQ_INIT(&softc->done_queue);
+	snprintf(softc->zone_name, sizeof(softc->zone_name), "%s%d",
+		 periph->periph_name, periph->unit_number);
+	snprintf(softc->io_zone_name, sizeof(softc->io_zone_name), "%s%dIO",
+		 periph->periph_name, periph->unit_number);
+	softc->io_zone_size = MAXPHYS;
+	knlist_init_mtx(&softc->read_select.si_note, cam_periph_mtx(periph));
 
 	bzero(&cpi, sizeof(cpi));
 	xpt_setup_ccb(&cpi.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
@@ -374,6 +599,9 @@ passregister(struct cam_periph *periph, void *arg)
 	else
 		softc->maxio = cpi.maxio;	/* real value */
 
+	if (cpi.hba_misc & PIM_UNMAPPED)
+		softc->flags |= PASS_FLAG_UNMAPPED_CAPABLE;
+
 	/*
 	 * We pass in 0 for a blocksize, since we don't 
 	 * know what the blocksize of this device is, if 
@@ -391,6 +619,23 @@ passregister(struct cam_periph *periph, void *arg)
 			  DEVSTAT_PRIORITY_PASS);
 
 	/*
+	 * Initialize the taskqueue handler for shutting down kqueue.
+	 */
+	TASK_INIT(&softc->shutdown_kqueue_task, /*priority*/ 0,
+		  pass_shutdown_kqueue, periph);
+
+	/*
+	 * Acquire a reference to the periph that we can release once we've
+	 * cleaned up the kqueue.
+	 */
+	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
+		xpt_print(periph->path, "%s: lost periph during "
+			  "registration!\n", __func__);
+		cam_periph_lock(periph);
+		return (CAM_REQ_CMP_ERR);
+	}
+
+	/*
 	 * Acquire a reference to the periph before we create the devfs
 	 * instance for it.  We'll release this reference once the devfs
 	 * instance has been freed.
@@ -408,12 +653,15 @@ passregister(struct cam_periph *periph, void *arg)
 			      periph->periph_name, periph->unit_number);
 
 	/*
-	 * Now that we have made the devfs instance, hold a reference to it
-	 * until the task queue has run to setup the physical path alias.
-	 * That way devfs won't get rid of the device before we add our
-	 * alias.
+	 * Hold a reference to the periph before we create the physical
+	 * path alias so it can't go away.
 	 */
-	dev_ref(softc->dev);
+	if (cam_periph_acquire(periph) != CAM_REQ_CMP) {
+		xpt_print(periph->path, "%s: lost periph during "
+			  "registration!\n", __func__);
+		cam_periph_lock(periph);
+		return (CAM_REQ_CMP_ERR);
+	}
 
 	cam_periph_lock(periph);
 	softc->dev->si_drv1 = periph;
@@ -514,6 +762,55 @@ passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 	softc = periph->softc;
 	softc->open_count--;
 
+	if (softc->open_count == 0) {
+		struct pass_io_req *io_req, *io_req2;
+		int need_unlock;
+
+		need_unlock = 0;
+
+		TAILQ_FOREACH_SAFE(io_req, &softc->done_queue, links, io_req2) {
+			TAILQ_REMOVE(&softc->done_queue, io_req, links);
+			passiocleanup(softc, io_req);
+			uma_zfree(softc->pass_zone, io_req);
+		}
+
+		TAILQ_FOREACH_SAFE(io_req, &softc->incoming_queue, links,
+				   io_req2) {
+			TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
+			passiocleanup(softc, io_req);
+			uma_zfree(softc->pass_zone, io_req);
+		}
+
+		/*
+		 * If there are any active I/Os, we need to forcibly acquire a
+		 * reference to the peripheral so that we don't go away
+		 * before they complete.  We'll release the reference when
+		 * the abandoned queue is empty.
+		 */
+		io_req = TAILQ_FIRST(&softc->active_queue);
+		if ((io_req != NULL)
+		 && (softc->flags & PASS_FLAG_ABANDONED_REF_SET) == 0) {
+			cam_periph_doacquire(periph);
+			softc->flags |= PASS_FLAG_ABANDONED_REF_SET;
+		}
+
+		/*
+		 * Since the I/O in the active queue is not under our
+		 * control, just set a flag so that we can clean it up when
+		 * it completes and put it on the abandoned queue.  This
+		 * will prevent our sending spurious completions in the
+		 * event that the device is opened again before these I/Os
+		 * complete.
+		 */
+		TAILQ_FOREACH_SAFE(io_req, &softc->active_queue, links,
+				   io_req2) {
+			TAILQ_REMOVE(&softc->active_queue, io_req, links);
+			io_req->flags |= PASS_IO_ABANDONED;
+			TAILQ_INSERT_TAIL(&softc->abandoned_queue, io_req,
+					  links);
+		}
+	}
+
 	cam_periph_release_locked(periph);
 
 	/*
@@ -533,6 +830,915 @@ passclose(struct cdev *dev, int flag, int fmt, struct thread *td)
 	return (0);
 }
 
+
+static void
+passstart(struct cam_periph *periph, union ccb *start_ccb)
+{
+	struct pass_softc *softc;
+
+	softc = (struct pass_softc *)periph->softc;
+
+	switch (softc->state) {
+	case PASS_STATE_NORMAL: {
+		struct pass_io_req *io_req;
+
+		/*
+		 * Check for any queued I/O requests that require an
+		 * allocated slot.
+		 */
+		io_req = TAILQ_FIRST(&softc->incoming_queue);
+		if (io_req == NULL) {
+			xpt_release_ccb(start_ccb);
+			break;
+		}
+		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
+		TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links);
+		/*
+		 * Merge the user's CCB into the allocated CCB.
+		 */
+		xpt_merge_ccb(start_ccb, &io_req->ccb);
+		start_ccb->ccb_h.ccb_type = PASS_CCB_QUEUED_IO;
+		start_ccb->ccb_h.ccb_ioreq = io_req;
+		start_ccb->ccb_h.cbfcnp = passdone;
+		io_req->alloced_ccb = start_ccb;
+		binuptime(&io_req->start_time);
+		devstat_start_transaction(softc->device_stats,
+					  &io_req->start_time);
+
+		xpt_action(start_ccb);
+
+		/*
+		 * If we have any more I/O waiting, schedule ourselves again.
+		 */
+		if (!TAILQ_EMPTY(&softc->incoming_queue))
+			xpt_schedule(periph, CAM_PRIORITY_NORMAL);
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+static void
+passdone(struct cam_periph *periph, union ccb *done_ccb)
+{ 
+	struct pass_softc *softc;
+	struct ccb_scsiio *csio;
+
+	softc = (struct pass_softc *)periph->softc;
+
+	cam_periph_assert(periph, MA_OWNED);
+
+	csio = &done_ccb->csio;
+	switch (csio->ccb_h.ccb_type) {
+	case PASS_CCB_QUEUED_IO: {
+		struct pass_io_req *io_req;
+
+		io_req = done_ccb->ccb_h.ccb_ioreq;
+#if 0
+		xpt_print(periph->path, "%s: called for user CCB %p\n",
+			  __func__, io_req->user_ccb_ptr);
+#endif
+		if (((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
+		 && (done_ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER)
+		 && ((io_req->flags & PASS_IO_ABANDONED) == 0)) {
+			int error;
+
+			error = passerror(done_ccb, CAM_RETRY_SELTO,
+					  SF_RETRY_UA | SF_NO_PRINT);
+
+			if (error == ERESTART) {
+				/*
+				 * A retry was scheduled, so
+ 				 * just return.
+				 */
+				return;
+			}
+		}
+
+		/*
+		 * Copy the allocated CCB contents back to the malloced CCB
+		 * so we can give status back to the user when he requests it.
+		 */
+		bcopy(done_ccb, &io_req->ccb, sizeof(*done_ccb));
+
+		/*
+		 * Log data/transaction completion with devstat(9).
+		 */
+		switch (done_ccb->ccb_h.func_code) {
+		case XPT_SCSI_IO:
+			devstat_end_transaction(softc->device_stats,
+			    done_ccb->csio.dxfer_len - done_ccb->csio.resid,
+			    done_ccb->csio.tag_action & 0x3,
+			    ((done_ccb->ccb_h.flags & CAM_DIR_MASK) ==
+			    CAM_DIR_NONE) ? DEVSTAT_NO_DATA :
+			    (done_ccb->ccb_h.flags & CAM_DIR_OUT) ?
+			    DEVSTAT_WRITE : DEVSTAT_READ, NULL,
+			    &io_req->start_time);
+			break;
+		case XPT_ATA_IO:
+			devstat_end_transaction(softc->device_stats,
+			    done_ccb->ataio.dxfer_len - done_ccb->ataio.resid,
+			    done_ccb->ataio.tag_action & 0x3,
+			    ((done_ccb->ccb_h.flags & CAM_DIR_MASK) ==
+			    CAM_DIR_NONE) ? DEVSTAT_NO_DATA : 
+			    (done_ccb->ccb_h.flags & CAM_DIR_OUT) ?
+			    DEVSTAT_WRITE : DEVSTAT_READ, NULL,
+			    &io_req->start_time);
+			break;
+		case XPT_SMP_IO:
+			/*
+			 * XXX KDM this isn't quite right, but there isn't
+			 * currently an easy way to represent a bidirectional 
+			 * transfer in devstat.  The only way to do it
+			 * and have the byte counts come out right would
+			 * mean that we would have to record two
+			 * transactions, one for the request and one for the
+			 * response.  For now, so that we report something,
+			 * just treat the entire thing as a read.
+			 */
+			devstat_end_transaction(softc->device_stats,
+			    done_ccb->smpio.smp_request_len +
+			    done_ccb->smpio.smp_response_len,
+			    DEVSTAT_TAG_SIMPLE, DEVSTAT_READ, NULL,
+			    &io_req->start_time);
+			break;
+		default:
+			devstat_end_transaction(softc->device_stats, 0,
+			    DEVSTAT_TAG_NONE, DEVSTAT_NO_DATA, NULL,
+			    &io_req->start_time);
+			break;
+		}
+
+		/*
+		 * In the normal case, take the completed I/O off of the
+		 * active queue and put it on the done queue.  Notitfy the
+		 * user that we have a completed I/O.
+		 */
+		if ((io_req->flags & PASS_IO_ABANDONED) == 0) {
+			TAILQ_REMOVE(&softc->active_queue, io_req, links);
+			TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links);
+			selwakeuppri(&softc->read_select, PRIBIO);
+			KNOTE_LOCKED(&softc->read_select.si_note, 0);
+		} else {
+			/*
+			 * In the case of an abandoned I/O (final close
+			 * without fetching the I/O), take it off of the
+			 * abandoned queue and free it.
+			 */
+			TAILQ_REMOVE(&softc->abandoned_queue, io_req, links);
+			passiocleanup(softc, io_req);
+			uma_zfree(softc->pass_zone, io_req);
+
+			/*
+			 * Release the done_ccb here, since we may wind up
+			 * freeing the peripheral when we decrement the
+			 * reference count below.
+			 */
+			xpt_release_ccb(done_ccb);
+
+			/*
+			 * If the abandoned queue is empty, we can release
+			 * our reference to the periph since we won't have
+			 * any more completions coming.
+			 */
+			if ((TAILQ_EMPTY(&softc->abandoned_queue))
+			 && (softc->flags & PASS_FLAG_ABANDONED_REF_SET)) {
+				softc->flags &= ~PASS_FLAG_ABANDONED_REF_SET;
+				cam_periph_release_locked(periph);
+			}
+
+			/*
+			 * We have already released the CCB, so we can
+			 * return.
+			 */
+			return;
+		}
+		break;
+	}
+	}
+	xpt_release_ccb(done_ccb);
+}
+
+static int
+passcreatezone(struct cam_periph *periph)
+{
+	struct pass_softc *softc;
+	int error;
+
+	error = 0;
+	softc = (struct pass_softc *)periph->softc;
+
+	cam_periph_assert(periph, MA_OWNED);
+	KASSERT(((softc->flags & PASS_FLAG_ZONE_VALID) == 0), 
+		("%s called when the pass(4) zone is valid!\n", __func__));
+	KASSERT((softc->pass_zone == NULL), 
+		("%s called when the pass(4) zone is allocated!\n", __func__));
+
+	if ((softc->flags & PASS_FLAG_ZONE_INPROG) == 0) {
+
+		/*
+		 * We're the first context through, so we need to create
+		 * the pass(4) UMA zone for I/O requests.
+		 */
+		softc->flags |= PASS_FLAG_ZONE_INPROG;
+
+		/*
+		 * uma_zcreate() does a blocking (M_WAITOK) allocation,
+		 * so we cannot hold a mutex while we call it.
+		 */
+		cam_periph_unlock(periph);
+
+		softc->pass_zone = uma_zcreate(softc->zone_name,
+		    sizeof(struct pass_io_req), NULL, NULL, NULL, NULL,
+		    /*align*/ 0, /*flags*/ 0);
+
+		softc->pass_io_zone = uma_zcreate(softc->io_zone_name,
+		    softc->io_zone_size, NULL, NULL, NULL, NULL,
+		    /*align*/ 0, /*flags*/ 0);
+
+		cam_periph_lock(periph);
+
+		if ((softc->pass_zone == NULL)
+		 || (softc->pass_io_zone == NULL)) {
+			if (softc->pass_zone == NULL)
+				xpt_print(periph->path, "unable to allocate "
+				    "IO Req UMA zone\n");
+			else
+				xpt_print(periph->path, "unable to allocate "
+				    "IO UMA zone\n");
+			softc->flags &= ~PASS_FLAG_ZONE_INPROG;
+			goto bailout;
+		}
+
+		/*
+		 * Set the flags appropriately and notify any other waiters.
+		 */
+		softc->flags &= PASS_FLAG_ZONE_INPROG;
+		softc->flags |= PASS_FLAG_ZONE_VALID;
+		wakeup(&softc->pass_zone);
+	} else {
+		/*
+		 * In this case, the UMA zone has not yet been created, but
+		 * another context is in the process of creating it.  We
+		 * need to sleep until the creation is either done or has
+		 * failed.
+		 */
+		while ((softc->flags & PASS_FLAG_ZONE_INPROG)
+		    && ((softc->flags & PASS_FLAG_ZONE_VALID) == 0)) {
+			error = msleep(&softc->pass_zone,
+				       cam_periph_mtx(periph), PRIBIO,
+				       "paszon", 0);
+			if (error != 0)
+				goto bailout;
+		}
+		/*
+		 * If the zone creation failed, no luck for the user.
+		 */
+		if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0){
+			error = ENOMEM;
+			goto bailout;
+		}
+	}
+bailout:
+	return (error);
+}
+
+static void
+passiocleanup(struct pass_softc *softc, struct pass_io_req *io_req)
+{
+	union ccb *ccb;
+	u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
+	int i, numbufs;
+
+	ccb = &io_req->ccb;
+
+	switch (ccb->ccb_h.func_code) {
+	case XPT_DEV_MATCH:
+		numbufs = min(io_req->num_bufs, 2);
+
+		if (numbufs == 1) {
+			data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches;
+		} else {
+			data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns;
+			data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches;
+		}
+		break;
+	case XPT_SCSI_IO:
+	case XPT_CONT_TARGET_IO:
+		data_ptrs[0] = &ccb->csio.data_ptr;
+		numbufs = min(io_req->num_bufs, 1);
+		break;
+	case XPT_ATA_IO:
+		data_ptrs[0] = &ccb->ataio.data_ptr;
+		numbufs = min(io_req->num_bufs, 1);
+		break;
+	case XPT_SMP_IO:
+		numbufs = min(io_req->num_bufs, 2);
+		data_ptrs[0] = &ccb->smpio.smp_request;
+		data_ptrs[1] = &ccb->smpio.smp_response;
+		break;
+	case XPT_DEV_ADVINFO:
+		numbufs = min(io_req->num_bufs, 1);
+		data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
+		break;
+	default:
+		/* allow ourselves to be swapped once again */
+		return;
+		break; /* NOTREACHED */ 
+	}
+
+	if (io_req->flags & PASS_IO_USER_SEG_MALLOC) {
+		free(io_req->user_segptr, M_SCSIPASS);
+		io_req->user_segptr = NULL;
+	}
+
+	/*
+	 * We only want to free memory we malloced.
+	 */
+	if (io_req->data_flags == CAM_DATA_VADDR) {
+		for (i = 0; i < io_req->num_bufs; i++) {
+			if (io_req->kern_bufs[i] == NULL)
+				continue;
+
+			free(io_req->kern_bufs[i], M_SCSIPASS);
+			io_req->kern_bufs[i] = NULL;
+		}
+	} else if (io_req->data_flags == CAM_DATA_SG) {
+		for (i = 0; i < io_req->num_kern_segs; i++) {
+			if ((uint8_t *)(uintptr_t)
+			    io_req->kern_segptr[i].ds_addr == NULL)
+				continue;
+
+			uma_zfree(softc->pass_io_zone, (uint8_t *)(uintptr_t)
+			    io_req->kern_segptr[i].ds_addr);
+			io_req->kern_segptr[i].ds_addr = 0;
+		}
+	}
+
+	if (io_req->flags & PASS_IO_KERN_SEG_MALLOC) {
+		free(io_req->kern_segptr, M_SCSIPASS);
+		io_req->kern_segptr = NULL;
+	}
+
+	if (io_req->data_flags != CAM_DATA_PADDR) {
+		for (i = 0; i < numbufs; i++) {
+			/*
+			 * Restore the user's buffer pointers to their
+			 * previous values.
+			 */
+			if (io_req->user_bufs[i] != NULL)
+				*data_ptrs[i] = io_req->user_bufs[i];
+		}
+	}
+
+}
+
+static int
+passcopysglist(struct cam_periph *periph, struct pass_io_req *io_req,
+	       ccb_flags direction)
+{
+	bus_size_t kern_watermark, user_watermark, len_copied, len_to_copy;
+	bus_dma_segment_t *user_sglist, *kern_sglist;
+	int i, j, error;
+
+	error = 0;
+	kern_watermark = 0;
+	user_watermark = 0;
+	len_to_copy = 0;
+	len_copied = 0;
+	user_sglist = io_req->user_segptr;
+	kern_sglist = io_req->kern_segptr;
+
+	for (i = 0, j = 0; i < io_req->num_user_segs &&
+	     j < io_req->num_kern_segs;) {
+		uint8_t *user_ptr, *kern_ptr;
+
+		len_to_copy = min(user_sglist[i].ds_len -user_watermark,
+		    kern_sglist[j].ds_len - kern_watermark);
+
+		user_ptr = (uint8_t *)(uintptr_t)user_sglist[i].ds_addr;
+		user_ptr = user_ptr + user_watermark;
+		kern_ptr = (uint8_t *)(uintptr_t)kern_sglist[j].ds_addr;
+		kern_ptr = kern_ptr + kern_watermark;
+
+		user_watermark += len_to_copy;
+		kern_watermark += len_to_copy;
+
+		if (!useracc(user_ptr, len_to_copy,
+		    (direction == CAM_DIR_IN) ? VM_PROT_WRITE : VM_PROT_READ)) {
+			xpt_print(periph->path, "%s: unable to access user "
+				  "S/G list element %p len %zu\n", __func__,
+				  user_ptr, len_to_copy);
+			error = EFAULT;
+			goto bailout;
+		}
+
+		if (direction == CAM_DIR_IN) {
+			error = copyout(kern_ptr, user_ptr, len_to_copy);
+			if (error != 0) {
+				xpt_print(periph->path, "%s: copyout of %u "
+					  "bytes from %p to %p failed with "
+					  "error %d\n", __func__, len_to_copy,
+					  kern_ptr, user_ptr, error);
+				goto bailout;
+			}
+		} else {
+			error = copyin(user_ptr, kern_ptr, len_to_copy);
+			if (error != 0) {
+				xpt_print(periph->path, "%s: copyin of %u "
+					  "bytes from %p to %p failed with "
+					  "error %d\n", __func__, len_to_copy,
+					  user_ptr, kern_ptr, error);
+				goto bailout;
+			}
+		}
+
+		len_copied += len_to_copy;
+
+		if (user_sglist[i].ds_len == user_watermark) {
+			i++;
+			user_watermark = 0;
+		}
+
+		if (kern_sglist[j].ds_len == kern_watermark) {
+			j++;
+			kern_watermark = 0;
+		}
+	}
+
+bailout:
+
+	return (error);
+}
+
+static int
+passmemsetup(struct cam_periph *periph, struct pass_io_req *io_req)
+{
+	union ccb *ccb;
+	struct pass_softc *softc;
+	int numbufs, i;
+	uint8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
+	uint32_t lengths[CAM_PERIPH_MAXMAPS];
+	uint32_t dirs[CAM_PERIPH_MAXMAPS];
+	uint32_t num_segs;
+	uint16_t *seg_cnt_ptr;
+	size_t maxmap;
+	int error;
+
+	cam_periph_assert(periph, MA_NOTOWNED);
+
+	softc = periph->softc;
+
+	error = 0;
+	ccb = &io_req->ccb;
+	maxmap = 0;
+	num_segs = 0;
+	seg_cnt_ptr = NULL;
+
+	switch(ccb->ccb_h.func_code) {
+	case XPT_DEV_MATCH:
+		if (ccb->cdm.match_buf_len == 0) {
+			printf("%s: invalid match buffer length 0\n", __func__);
+			return(EINVAL);
+		}
+		if (ccb->cdm.pattern_buf_len > 0) {
+			data_ptrs[0] = (u_int8_t **)&ccb->cdm.patterns;
+			lengths[0] = ccb->cdm.pattern_buf_len;
+			dirs[0] = CAM_DIR_OUT;
+			data_ptrs[1] = (u_int8_t **)&ccb->cdm.matches;
+			lengths[1] = ccb->cdm.match_buf_len;
+			dirs[1] = CAM_DIR_IN;
+			numbufs = 2;
+		} else {
+			data_ptrs[0] = (u_int8_t **)&ccb->cdm.matches;
+			lengths[0] = ccb->cdm.match_buf_len;
+			dirs[0] = CAM_DIR_IN;
+			numbufs = 1;
+		}
+		io_req->data_flags = CAM_DATA_VADDR;
+		break;
+	case XPT_SCSI_IO:
+	case XPT_CONT_TARGET_IO:
+		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
+			return(0);
+
+		/*
+		 * The user shouldn't be able to supply a bio.
+		 */
+		if ((ccb->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
+			return (EINVAL);
+
+		io_req->data_flags = ccb->ccb_h.flags & CAM_DATA_MASK;
+
+		data_ptrs[0] = &ccb->csio.data_ptr;
+		lengths[0] = ccb->csio.dxfer_len;
+		dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
+		num_segs = ccb->csio.sglist_cnt;
+		seg_cnt_ptr = &ccb->csio.sglist_cnt;
+		numbufs = 1;
+		maxmap = softc->maxio;
+		break;
+	case XPT_ATA_IO:
+		if ((ccb->ccb_h.flags & CAM_DIR_MASK) == CAM_DIR_NONE)
+			return(0);
+
+		/*
+		 * We only support a single virtual address for ATA I/O.
+		 */
+		if ((ccb->ccb_h.flags & CAM_DATA_MASK) != CAM_DATA_VADDR)
+			return (EINVAL);
+
+		io_req->data_flags = CAM_DATA_VADDR;
+
+		data_ptrs[0] = &ccb->ataio.data_ptr;
+		lengths[0] = ccb->ataio.dxfer_len;
+		dirs[0] = ccb->ccb_h.flags & CAM_DIR_MASK;
+		numbufs = 1;
+		maxmap = softc->maxio;
+		break;
+	case XPT_SMP_IO:
+		io_req->data_flags = CAM_DATA_VADDR;
+
+		data_ptrs[0] = &ccb->smpio.smp_request;
+		lengths[0] = ccb->smpio.smp_request_len;
+		dirs[0] = CAM_DIR_OUT;
+		data_ptrs[1] = &ccb->smpio.smp_response;
+		lengths[1] = ccb->smpio.smp_response_len;
+		dirs[1] = CAM_DIR_IN;
+		numbufs = 2;
+		maxmap = softc->maxio;
+		break;
+	case XPT_DEV_ADVINFO:
+		if (ccb->cdai.bufsiz == 0)
+			return (0);
+
+		io_req->data_flags = CAM_DATA_VADDR;
+
+		data_ptrs[0] = (uint8_t **)&ccb->cdai.buf;
+		lengths[0] = ccb->cdai.bufsiz;
+		dirs[0] = CAM_DIR_IN;
+		numbufs = 1;
+		break;
+	default:
+		return(EINVAL);
+		break; /* NOTREACHED */
+	}
+
+	io_req->num_bufs = numbufs;
+
+	/*
+	 * If there is a maximum, check to make sure that the user's
+	 * request fits within the limit.  In general, we should only have
+	 * a maximum length for requests that go to hardware.  Otherwise it
+	 * is whatever we're able to malloc.
+	 */
+	for (i = 0; i < numbufs; i++) {
+		io_req->user_bufs[i] = *data_ptrs[i];
+		io_req->dirs[i] = dirs[i];
+		io_req->lengths[i] = lengths[i];
+
+		if (maxmap == 0)
+			continue;
+
+		if (lengths[i] <= maxmap)
+			continue;
+
+		xpt_print(periph->path, "%s: data length %u > max allowed %u "
+			  "bytes\n", __func__, lengths[i], maxmap);
+		error = EINVAL;
+		goto bailout;
+	}
+
+	switch (io_req->data_flags) {
+	case CAM_DATA_VADDR:
+		/* Map or copy the buffer into kernel address space */
+		for (i = 0; i < numbufs; i++) {
+			uint8_t *tmp_buf;
+
+			/*
+			 * If for some reason no length is specified, we
+			 * don't need to allocate anything.
+			 */
+			if (io_req->lengths[i] == 0)
+				continue;
+
+			/*
+			 * Make sure that the user's buffer is accessible
+			 * to that process.
+			 */
+			if (!useracc(io_req->user_bufs[i], io_req->lengths[i],
+			    (io_req->dirs[i] == CAM_DIR_IN) ? VM_PROT_WRITE :
+			     VM_PROT_READ)) {
+				xpt_print(periph->path, "%s: user address %p "
+				    "length %u is not accessible\n", __func__,
+				    io_req->user_bufs[i], io_req->lengths[i]);
+				error = EFAULT;
+				goto bailout;
+			}
+
+			tmp_buf = malloc(lengths[i], M_SCSIPASS,
+					 M_WAITOK | M_ZERO);
+			io_req->kern_bufs[i] = tmp_buf;
+			*data_ptrs[i] = tmp_buf;
+
+#if 0
+			xpt_print(periph->path, "%s: malloced %p len %u, user "
+				  "buffer %p, operation: %s\n", __func__,
+				  tmp_buf, lengths[i], io_req->user_bufs[i],
+				  (dirs[i] == CAM_DIR_IN) ? "read" : "write");
+#endif
+			/*
+			 * We only need to copy in if the user is writing.
+			 */
+			if (dirs[i] != CAM_DIR_OUT)
+				continue;
+
+			error = copyin(io_req->user_bufs[i],
+				       io_req->kern_bufs[i], lengths[i]);
+			if (error != 0) {
+				xpt_print(periph->path, "%s: copy of user "
+					  "buffer from %p to %p failed with "
+					  "error %d\n", __func__,
+					  io_req->user_bufs[i],
+					  io_req->kern_bufs[i], error);
+				goto bailout;
+			}
+		}
+		break;
+	case CAM_DATA_PADDR:
+		/* Pass down the pointer as-is */
+		break;
+	case CAM_DATA_SG: {
+		size_t sg_length, size_to_go, alloc_size;
+		uint32_t num_segs_needed;
+
+		/*
+		 * Copy the user S/G list in, and then copy in the
+		 * individual segments.
+		 */
+		/*
+		 * We shouldn't see this, but check just in case.
+		 */
+		if (numbufs != 1) {
+			xpt_print(periph->path, "%s: cannot currently handle "
+				  "more than one S/G list per CCB\n", __func__);
+			error = EINVAL;
+			goto bailout;
+		}
+
+		/*
+		 * We have to have at least one segment.
+		 */
+		if (num_segs == 0) {
+			xpt_print(periph->path, "%s: CAM_DATA_SG flag set, "
+				  "but sglist_cnt=0!\n", __func__);
+			error = EINVAL;
+			goto bailout;
+		}
+
+		/*
+		 * Make sure the user specified the total length and didn't
+		 * just leave it to us to decode the S/G list.
+		 */
+		if (lengths[0] == 0) {
+			xpt_print(periph->path, "%s: no dxfer_len specified, "
+				  "but CAM_DATA_SG flag is set!\n", __func__);
+			error = EINVAL;
+			goto bailout;
+		}
+
+		/*
+		 * We allocate buffers in io_zone_size increments for an
+		 * S/G list.  This will generally be MAXPHYS.
+		 */
+		if (lengths[0] <= softc->io_zone_size)
+			num_segs_needed = 1;
+		else {
+			num_segs_needed = lengths[0] / softc->io_zone_size;
+			if ((lengths[0] % softc->io_zone_size) != 0)
+				num_segs_needed++;
+		}
+
+		/* Figure out the size of the S/G list */
+		sg_length = num_segs * sizeof(bus_dma_segment_t);
+		io_req->num_user_segs = num_segs;
+		io_req->num_kern_segs = num_segs_needed;
+
+		/* Save the user's S/G list pointer for later restoration */
+		io_req->user_bufs[0] = *data_ptrs[0];
+
+		/*
+		 * If we have enough segments allocated by default to handle
+		 * the length of the user's S/G list,
+		 */
+		if (num_segs > PASS_MAX_SEGS) {
+			io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) *
+			    num_segs, M_SCSIPASS, M_WAITOK | M_ZERO);
+			io_req->flags |= PASS_IO_USER_SEG_MALLOC;
+		} else
+			io_req->user_segptr = io_req->user_segs;
+
+		if (!useracc(*data_ptrs[0], sg_length, VM_PROT_READ)) {
+			xpt_print(periph->path, "%s: unable to access user "
+				  "S/G list at %p\n", __func__, *data_ptrs[0]);
+			error = EFAULT;
+			goto bailout;
+		}
+
+		error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length);
+		if (error != 0) {
+			xpt_print(periph->path, "%s: copy of user S/G list "
+				  "from %p to %p failed with error %d\n",
+				  __func__, *data_ptrs[0], io_req->user_segptr,
+				  error);
+			goto bailout;
+		}
+
+		if (num_segs_needed > PASS_MAX_SEGS) {
+			io_req->kern_segptr = malloc(sizeof(bus_dma_segment_t) *
+			    num_segs_needed, M_SCSIPASS, M_WAITOK | M_ZERO);
+			io_req->flags |= PASS_IO_KERN_SEG_MALLOC;
+		} else {
+			io_req->kern_segptr = io_req->kern_segs;
+		}
+
+		/*
+		 * Allocate the kernel S/G list.
+		 */
+		for (size_to_go = lengths[0], i = 0;
+		     size_to_go > 0 && i < num_segs_needed;
+		     i++, size_to_go -= alloc_size) {
+			uint8_t *kern_ptr;
+
+			alloc_size = min(size_to_go, softc->io_zone_size);
+			kern_ptr = uma_zalloc(softc->pass_io_zone, M_WAITOK);
+			io_req->kern_segptr[i].ds_addr =
+			    (bus_addr_t)(uintptr_t)kern_ptr;
+			io_req->kern_segptr[i].ds_len = alloc_size;
+		}
+		if (size_to_go > 0) {
+			printf("%s: size_to_go = %zu, software error!\n",
+			       __func__, size_to_go);
+			error = EINVAL;
+			goto bailout;
+		}
+
+		*data_ptrs[0] = (uint8_t *)io_req->kern_segptr;
+		*seg_cnt_ptr = io_req->num_kern_segs;
+
+		/*
+		 * We only need to copy data here if the user is writing.
+		 */
+		if (dirs[0] == CAM_DIR_OUT)
+			error = passcopysglist(periph, io_req, dirs[0]);
+		break;
+	}
+	case CAM_DATA_SG_PADDR: {
+		size_t sg_length;
+
+		/*
+		 * We shouldn't see this, but check just in case.
+		 */
+		if (numbufs != 1) {
+			printf("%s: cannot currently handle more than one "
+			       "S/G list per CCB\n", __func__);
+			error = EINVAL;
+			goto bailout;
+		}
+
+		/*
+		 * We have to have at least one segment.
+		 */
+		if (num_segs == 0) {
+			xpt_print(periph->path, "%s: CAM_DATA_SG_PADDR flag "
+				  "set, but sglist_cnt=0!\n", __func__);
+			error = EINVAL;
+			goto bailout;
+		}
+
+		/*
+		 * Make sure the user specified the total length and didn't
+		 * just leave it to us to decode the S/G list.
+		 */
+		if (lengths[0] == 0) {
+			xpt_print(periph->path, "%s: no dxfer_len specified, "
+				  "but CAM_DATA_SG flag is set!\n", __func__);
+			error = EINVAL;
+			goto bailout;
+		}
+
+		/* Figure out the size of the S/G list */
+		sg_length = num_segs * sizeof(bus_dma_segment_t);
+		io_req->num_user_segs = num_segs;
+		io_req->num_kern_segs = io_req->num_user_segs;
+
+		/* Save the user's S/G list pointer for later restoration */
+		io_req->user_bufs[0] = *data_ptrs[0];
+
+		if (num_segs > PASS_MAX_SEGS) {
+			io_req->user_segptr = malloc(sizeof(bus_dma_segment_t) *
+			    num_segs, M_SCSIPASS, M_WAITOK | M_ZERO);
+			io_req->flags |= PASS_IO_USER_SEG_MALLOC;
+		} else
+			io_req->user_segptr = io_req->user_segs;
+
+		io_req->kern_segptr = io_req->user_segptr;
+
+		error = copyin(*data_ptrs[0], io_req->user_segptr, sg_length);
+		if (error != 0) {
+			xpt_print(periph->path, "%s: copy of user S/G list "
+				  "from %p to %p failed with error %d\n",
+				  __func__, *data_ptrs[0], io_req->user_segptr,
+				  error);
+			goto bailout;
+		}
+		break;
+	}
+	default:
+	case CAM_DATA_BIO:
+		/*
+		 * A user shouldn't be attaching a bio to the CCB.  It
+		 * isn't a user-accessible structure.
+		 */
+		error = EINVAL;
+		break;
+	}
+
+bailout:
+	if (error != 0)
+		passiocleanup(softc, io_req);
+
+	return (error);
+}
+
+static int
+passmemdone(struct cam_periph *periph, struct pass_io_req *io_req)
+{
+	struct pass_softc *softc;
+	union ccb *ccb;
+	int error;
+	int i;
+
+	error = 0;
+	softc = (struct pass_softc *)periph->softc;
+	ccb = &io_req->ccb;
+
+	switch (io_req->data_flags) {
+	case CAM_DATA_VADDR:
+		/*
+		 * Copy back to the user buffer if this was a read.
+		 */
+		for (i = 0; i < io_req->num_bufs; i++) {
+			if (io_req->dirs[i] != CAM_DIR_IN)
+				continue;
+
+			error = copyout(io_req->kern_bufs[i],
+			    io_req->user_bufs[i], io_req->lengths[i]);
+			if (error != 0) {
+				xpt_print(periph->path, "Unable to copy %u "
+					  "bytes from %p to user address %p\n",
+					  io_req->lengths[i],
+					  io_req->kern_bufs[i],
+					  io_req->user_bufs[i]);
+				goto bailout;
+			}
+
+		}
+		break;
+	case CAM_DATA_PADDR:
+		/* Do nothing.  The pointer is a physical address already */
+		break;
+	case CAM_DATA_SG:
+		/*
+		 * Copy back to the user buffer if this was a read.
+		 * Restore the user's S/G list buffer pointer.
+		 */
+		if (io_req->dirs[0] == CAM_DIR_IN)
+			error = passcopysglist(periph, io_req, io_req->dirs[0]);
+		break;
+	case CAM_DATA_SG_PADDR:
+		/*
+		 * Restore the user's S/G list buffer pointer.  No need to
+		 * copy.
+		 */
+		break;
+	default:
+	case CAM_DATA_BIO:
+		error = EINVAL;
+		break;
+	}
+
+bailout:
+	/*
+	 * Reset the user's pointers to their original values and free
+	 * allocated memory.
+	 */
+	passiocleanup(softc, io_req);
+
+	return (error);
+}
+
 static int
 passioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td)
 {
@@ -622,15 +1828,317 @@ passdoioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread
 
 		break;
 	}
+	case CAMIOQUEUE:
+	{
+		struct pass_io_req *io_req;
+		union ccb **user_ccb, *ccb;
+		xpt_opcode fc;
+
+		if ((softc->flags & PASS_FLAG_ZONE_VALID) == 0) {
+			error = passcreatezone(periph);
+			if (error != 0)
+				goto bailout;
+		}
+
+		/*
+		 * We're going to do a blocking allocation for this I/O
+		 * request, so we have to drop the lock.
+		 */
+		cam_periph_unlock(periph);
+
+		io_req = uma_zalloc(softc->pass_zone, M_WAITOK | M_ZERO);
+		ccb = &io_req->ccb;
+		user_ccb = (union ccb **)addr;
+
+		/*
+		 * Unlike the CAMIOCOMMAND ioctl above, we only have a
+		 * pointer to the user's CCB, so we have to copy the whole
+		 * thing in to a buffer we have allocated (above) instead
+		 * of allowing the ioctl code to malloc a buffer and copy
+		 * it in.
+		 *
+		 * This is an advantage for this asynchronous interface,
+		 * since we don't want the memory to get freed while the
+		 * CCB is outstanding.
+		 */
+#if 0
+		xpt_print(periph->path, "Copying user CCB %p to "
+			  "kernel address %p\n", *user_ccb, ccb);
+#endif
+		error = copyin(*user_ccb, ccb, sizeof(*ccb));
+		if (error != 0) {
+			xpt_print(periph->path, "Copy of user CCB %p to "
+				  "kernel address %p failed with error %d\n",
+				  *user_ccb, ccb, error);
+			uma_zfree(softc->pass_zone, io_req);
+			cam_periph_lock(periph);
+			break;
+		}
+
+		/*
+		 * Some CCB types, like scan bus and scan lun can only go
+		 * through the transport layer device.
+		 */
+		if (ccb->ccb_h.func_code & XPT_FC_XPT_ONLY) {
+			xpt_print(periph->path, "CCB function code %#x is "
+			    "restricted to the XPT device\n",
+			    ccb->ccb_h.func_code);
+			uma_zfree(softc->pass_zone, io_req);
+			cam_periph_lock(periph);
+			error = ENODEV;
+			break;
+		}
+
+		/*
+		 * Save the user's CCB pointer as well as his linked list
+		 * pointers and peripheral private area so that we can
+		 * restore these later.
+		 */
+		io_req->user_ccb_ptr = *user_ccb;
+		io_req->user_periph_links = ccb->ccb_h.periph_links;
+		io_req->user_periph_priv = ccb->ccb_h.periph_priv;
+
+		/*
+		 * Now that we've saved the user's values, we can set our
+		 * own peripheral private entry.
+		 */
+		ccb->ccb_h.ccb_ioreq = io_req;
+
+		/* Compatibility for RL/priority-unaware code. */
+		priority = ccb->ccb_h.pinfo.priority;
+		if (priority <= CAM_PRIORITY_OOB)
+		    priority += CAM_PRIORITY_OOB + 1;
+
+		/*
+		 * Setup fields in the CCB like the path and the priority.
+		 * The path in particular cannot be done in userland, since
+		 * it is a pointer to a kernel data structure.
+		 */
+		xpt_setup_ccb_flags(&ccb->ccb_h, periph->path, priority,
+				    ccb->ccb_h.flags);
+
+		/*
+		 * Setup our done routine.  There is no way for the user to
+		 * have a valid pointer here.
+		 */
+		ccb->ccb_h.cbfcnp = passdone;
+
+		fc = ccb->ccb_h.func_code;
+		/*
+		 * If this function code has memory that can be mapped in
+		 * or out, we need to call passmemsetup().
+		 */
+		if ((fc == XPT_SCSI_IO) || (fc == XPT_ATA_IO)
+		 || (fc == XPT_SMP_IO) || (fc == XPT_DEV_MATCH)
+		 || (fc == XPT_DEV_ADVINFO)) {
+			error = passmemsetup(periph, io_req);
+			if (error != 0) {
+				uma_zfree(softc->pass_zone, io_req);
+				cam_periph_lock(periph);
+				break;
+			}
+		} else
+			io_req->mapinfo.num_bufs_used = 0;
+
+		cam_periph_lock(periph);
+
+		/*
+		 * Everything goes on the incoming queue initially.
+		 */
+		TAILQ_INSERT_TAIL(&softc->incoming_queue, io_req, links);
+
+		/*
+		 * If the CCB is queued, and is not a user CCB, then
+		 * we need to allocate a slot for it.  Call xpt_schedule()
+		 * so that our start routine will get called when a CCB is
+		 * available.
+		 */
+		if ((fc & XPT_FC_QUEUED)
+		 && ((fc & XPT_FC_USER_CCB) == 0)) {
+			xpt_schedule(periph, priority);
+			break;
+		} 
+
+		/*
+		 * At this point, the CCB in question is either an
+		 * immediate CCB (like XPT_DEV_ADVINFO) or it is a user CCB
+		 * and therefore should be malloced, not allocated via a slot.
+		 * Remove the CCB from the incoming queue and add it to the
+		 * active queue.
+		 */
+		TAILQ_REMOVE(&softc->incoming_queue, io_req, links);
+		TAILQ_INSERT_TAIL(&softc->active_queue, io_req, links);
+
+		xpt_action(ccb);
+
+		/*
+		 * If this is not a queued CCB (i.e. it is an immediate CCB),
+		 * then it is already done.  We need to put it on the done
+		 * queue for the user to fetch.
+		 */
+		if ((fc & XPT_FC_QUEUED) == 0) {
+			TAILQ_REMOVE(&softc->active_queue, io_req, links);
+			TAILQ_INSERT_TAIL(&softc->done_queue, io_req, links);
+		}
+		break;
+	}
+	case CAMIOGET:
+	{
+		union ccb **user_ccb;
+		struct pass_io_req *io_req;
+		int old_error;
+
+		user_ccb = (union ccb **)addr;
+		old_error = 0;
+
+		io_req = TAILQ_FIRST(&softc->done_queue);
+		if (io_req == NULL) {
+			error = ENOENT;
+			break;
+		}
+
+		/*
+		 * Remove the I/O from the done queue.
+		 */
+		TAILQ_REMOVE(&softc->done_queue, io_req, links);
+
+		/*
+		 * We have to drop the lock during the copyout because the
+		 * copyout can result in VM faults that require sleeping.
+		 */
+		cam_periph_unlock(periph);
+
+		/*
+		 * Do any needed copies (e.g. for reads) and revert the
+		 * pointers in the CCB back to the user's pointers.
+		 */
+		error = passmemdone(periph, io_req);
+
+		old_error = error;
+
+		io_req->ccb.ccb_h.periph_links = io_req->user_periph_links;
+		io_req->ccb.ccb_h.periph_priv = io_req->user_periph_priv;
+
+#if 0
+		xpt_print(periph->path, "Copying to user CCB %p from "
+			  "kernel address %p\n", *user_ccb, &io_req->ccb);
+#endif
+
+		error = copyout(&io_req->ccb, *user_ccb, sizeof(union ccb));
+		if (error != 0) {
+			xpt_print(periph->path, "Copy to user CCB %p from "
+				  "kernel address %p failed with error %d\n",
+				  *user_ccb, &io_req->ccb, error);
+		}
+
+		/*
+		 * Prefer the first error we got back, and make sure we
+		 * don't overwrite bad status with good.
+		 */
+		if (old_error != 0)
+			error = old_error;
+
+		cam_periph_lock(periph);
+
+		/*
+		 * At this point, if there was an error, we could potentially
+		 * re-queue the I/O and try again.  But why?  The error
+		 * would almost certainly happen again.  We might as well
+		 * not leak memory.
+		 */
+		uma_zfree(softc->pass_zone, io_req);
+		break;
+	}
 	default:
 		error = cam_periph_ioctl(periph, cmd, addr, passerror);
 		break;
 	}
 
+bailout:
 	cam_periph_unlock(periph);
+
 	return(error);
 }
 
+static int
+passpoll(struct cdev *dev, int poll_events, struct thread *td)
+{
+	struct cam_periph *periph;
+	struct pass_softc *softc;
+	int revents;
+
+	periph = (struct cam_periph *)dev->si_drv1;
+	if (periph == NULL)
+		return (ENXIO);
+
+	softc = (struct pass_softc *)periph->softc;
+
+	revents = poll_events & (POLLOUT | POLLWRNORM);
+	if ((poll_events & (POLLIN | POLLRDNORM)) != 0) {
+		cam_periph_lock(periph);
+
+		if (!TAILQ_EMPTY(&softc->done_queue)) {
+			revents |= poll_events & (POLLIN | POLLRDNORM);
+		}
+		cam_periph_unlock(periph);
+		if (revents == 0)
+			selrecord(td, &softc->read_select);
+	}
+
+	return (revents);
+}
+
+static int
+passkqfilter(struct cdev *dev, struct knote *kn)
+{
+	struct cam_periph *periph;
+	struct pass_softc *softc;
+
+	periph = (struct cam_periph *)dev->si_drv1;
+	if (periph == NULL)
+		return (ENXIO);
+
+	softc = (struct pass_softc *)periph->softc;
+
+	kn->kn_hook = (caddr_t)periph;
+	kn->kn_fop = &passread_filtops;
+	knlist_add(&softc->read_select.si_note, kn, 0);
+
+	return (0);
+}
+
+static void
+passreadfiltdetach(struct knote *kn)
+{
+	struct cam_periph *periph;
+	struct pass_softc *softc;
+
+	periph = (struct cam_periph *)kn->kn_hook;
+	softc = (struct pass_softc *)periph->softc;
+
+	knlist_remove(&softc->read_select.si_note, kn, 0);
+}
+
+static int
+passreadfilt(struct knote *kn, long hint)
+{
+	struct cam_periph *periph;
+	struct pass_softc *softc;
+	int retval;
+
+	periph = (struct cam_periph *)kn->kn_hook;
+	softc = (struct pass_softc *)periph->softc;
+
+	cam_periph_assert(periph, MA_OWNED);
+
+	if (TAILQ_EMPTY(&softc->done_queue))
+		retval = 0;
+	else
+		retval = 1;
+
+	return (retval);
+}
+
 /*
  * Generally, "ccb" should be the CCB supplied by the kernel.  "inccb"
  * should be the CCB that is copied in from the user.
@@ -652,6 +2160,10 @@ passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb)
 	xpt_merge_ccb(ccb, inccb);
 
 	/*
+	 */
+	ccb->ccb_h.cbfcnp = passdone;
+
+	/*
 	 * Let cam_periph_mapmem do a sanity check on the data pointer format.
 	 * Even if no data transfer is needed, it's a cheap check and it
 	 * simplifies the code.
diff --git a/sys/cam/scsi/scsi_pass.h b/sys/cam/scsi/scsi_pass.h
index ae0e058..797ef08 100644
--- a/sys/cam/scsi/scsi_pass.h
+++ b/sys/cam/scsi/scsi_pass.h
@@ -39,4 +39,12 @@
 #define CAMIOCOMMAND	_IOWR(CAM_VERSION, 2, union ccb)
 #define CAMGETPASSTHRU	_IOWR(CAM_VERSION, 3, union ccb)
 
+/*
+ * These two ioctls take a union ccb *, but that is not explicitly declared
+ * to avoid having the ioctl handling code malloc and free their own copy
+ * of the CCB or the CCB pointer.
+ */
+#define CAMIOQUEUE	_IO(CAM_VERSION, 4)
+#define CAMIOGET	_IO(CAM_VERSION, 5)
+
 #endif
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index dccd5b3..27ef8b3 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -99,6 +99,8 @@
 #include <vm/swap_pager.h>
 #include <vm/uma.h>
 
+#include <machine/bus.h>
+
 #define MD_MODVER 1
 
 #define MD_SHUTDOWN	0x10000		/* Tell worker thread to terminate. */
@@ -435,7 +437,7 @@ g_md_start(struct bio *bp)
 #define	MD_MALLOC_MOVE_CMP	5
 
 static int
-md_malloc_move(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
+md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
     void *ptr, u_char fill, int op)
 {
 	struct sf_buf *sf;
@@ -497,7 +499,7 @@ md_malloc_move(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
 			}
 			break;
 		default:
-			KASSERT(0, ("md_malloc_move unknown op %d\n", op));
+			KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op));
 			break;
 		}
 		if (error != 0)
@@ -520,10 +522,68 @@ md_malloc_move(vm_page_t **mp, int *ma_offs, unsigned sectorsize,
 }
 
 static int
+md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs,
+    unsigned len, void *ptr, u_char fill, int op)
+{
+	bus_dma_segment_t *vlist;
+	uint8_t *p, *end, first;
+	off_t *uc;
+	int ma_offs, seg_len;
+
+	vlist = *pvlist;
+	ma_offs = *pma_offs;
+	uc = ptr;
+
+	for (; len != 0; len -= seg_len) {
+		seg_len = imin(vlist->ds_len - ma_offs, len);
+		p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs;
+		switch (op) {
+		case MD_MALLOC_MOVE_ZERO:
+			bzero(p, seg_len);
+			break;
+		case MD_MALLOC_MOVE_FILL:
+			memset(p, fill, seg_len);
+			break;
+		case MD_MALLOC_MOVE_READ:
+			bcopy(ptr, p, seg_len);
+			cpu_flush_dcache(p, seg_len);
+			break;
+		case MD_MALLOC_MOVE_WRITE:
+			bcopy(p, ptr, seg_len);
+			break;
+		case MD_MALLOC_MOVE_CMP:
+			end = p + seg_len;
+			first = *uc = *p;
+			/* Confirm all following bytes match the first */
+			while (++p < end) {
+				if (*p != first)
+					return (EDOOFUS);
+			}
+			break;
+		default:
+			KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op));
+			break;
+		}
+
+		ma_offs += seg_len;
+		if (ma_offs == vlist->ds_len) {
+			ma_offs = 0;
+			vlist++;
+		}
+		ptr = (uint8_t *)ptr + seg_len;
+	}
+	*pvlist = vlist;
+	*pma_offs = ma_offs;
+
+	return (0);
+}
+
+static int
 mdstart_malloc(struct md_s *sc, struct bio *bp)
 {
 	u_char *dst;
 	vm_page_t *m;
+	bus_dma_segment_t *vlist;
 	int i, error, error1, ma_offs, notmapped;
 	off_t secno, nsec, uc;
 	uintptr_t sp, osp;
@@ -538,10 +598,16 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 	}
 
 	notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0;
+	vlist = (bp->bio_flags & BIO_VLIST) != 0 ?
+	    (bus_dma_segment_t *)bp->bio_data : NULL;
 	if (notmapped) {
 		m = bp->bio_ma;
 		ma_offs = bp->bio_ma_offset;
 		dst = NULL;
+		KASSERT(vlist == NULL, ("vlists cannot be unmapped"));
+	} else if (vlist != NULL) {
+		ma_offs = bp->bio_ma_offset;
+		dst = NULL;
 	} else {
 		dst = bp->bio_data;
 	}
@@ -557,23 +623,36 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 		} else if (bp->bio_cmd == BIO_READ) {
 			if (osp == 0) {
 				if (notmapped) {
-					error = md_malloc_move(&m, &ma_offs,
+					error = md_malloc_move_ma(&m, &ma_offs,
 					    sc->sectorsize, NULL, 0,
 					    MD_MALLOC_MOVE_ZERO);
+				} else if (vlist != NULL) {
+					error = md_malloc_move_vlist(&vlist,
+					    &ma_offs, sc->sectorsize, NULL, 0,
+					    MD_MALLOC_MOVE_ZERO);
 				} else
 					bzero(dst, sc->sectorsize);
 			} else if (osp <= 255) {
 				if (notmapped) {
-					error = md_malloc_move(&m, &ma_offs,
+					error = md_malloc_move_ma(&m, &ma_offs,
 					    sc->sectorsize, NULL, osp,
 					    MD_MALLOC_MOVE_FILL);
+				} else if (vlist != NULL) {
+					error = md_malloc_move_vlist(&vlist,
+					    &ma_offs, sc->sectorsize, NULL, osp,
+					    MD_MALLOC_MOVE_FILL);
 				} else
 					memset(dst, osp, sc->sectorsize);
 			} else {
 				if (notmapped) {
-					error = md_malloc_move(&m, &ma_offs,
+					error = md_malloc_move_ma(&m, &ma_offs,
 					    sc->sectorsize, (void *)osp, 0,
 					    MD_MALLOC_MOVE_READ);
+				} else if (vlist != NULL) {
+					error = md_malloc_move_vlist(&vlist,
+					    &ma_offs, sc->sectorsize,
+					    (void *)osp, 0,
+					    MD_MALLOC_MOVE_READ);
 				} else {
 					bcopy((void *)osp, dst, sc->sectorsize);
 					cpu_flush_dcache(dst, sc->sectorsize);
@@ -583,10 +662,15 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 		} else if (bp->bio_cmd == BIO_WRITE) {
 			if (sc->flags & MD_COMPRESS) {
 				if (notmapped) {
-					error1 = md_malloc_move(&m, &ma_offs,
+					error1 = md_malloc_move_ma(&m, &ma_offs,
 					    sc->sectorsize, &uc, 0,
 					    MD_MALLOC_MOVE_CMP);
 					i = error1 == 0 ? sc->sectorsize : 0;
+				} else if (vlist != NULL) {
+					error1 = md_malloc_move_vlist(&vlist,
+					    &ma_offs, sc->sectorsize, &uc, 0,
+					    MD_MALLOC_MOVE_CMP);
+					i = error1 == 0 ? sc->sectorsize : 0;
 				} else {
 					uc = dst[0];
 					for (i = 1; i < sc->sectorsize; i++) {
@@ -611,10 +695,15 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 						break;
 					}
 					if (notmapped) {
-						error = md_malloc_move(&m,
+						error = md_malloc_move_ma(&m,
 						    &ma_offs, sc->sectorsize,
 						    (void *)sp, 0,
 						    MD_MALLOC_MOVE_WRITE);
+					} else if (vlist != NULL) {
+						error = md_malloc_move_vlist(
+						    &vlist, &ma_offs,
+						    sc->sectorsize, (void *)sp,
+						    0, MD_MALLOC_MOVE_WRITE);
 					} else {
 						bcopy(dst, (void *)sp,
 						    sc->sectorsize);
@@ -622,10 +711,15 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 					error = s_write(sc->indir, secno, sp);
 				} else {
 					if (notmapped) {
-						error = md_malloc_move(&m,
+						error = md_malloc_move_ma(&m,
 						    &ma_offs, sc->sectorsize,
 						    (void *)osp, 0,
 						    MD_MALLOC_MOVE_WRITE);
+					} else if (vlist != NULL) {
+						error = md_malloc_move_vlist(
+						    &vlist, &ma_offs,
+						    sc->sectorsize, (void *)osp,
+						    0, MD_MALLOC_MOVE_WRITE);
 					} else {
 						bcopy(dst, (void *)osp,
 						    sc->sectorsize);
@@ -641,26 +735,78 @@ mdstart_malloc(struct md_s *sc, struct bio *bp)
 		if (error != 0)
 			break;
 		secno++;
-		if (!notmapped)
+		if (!notmapped && vlist == NULL)
 			dst += sc->sectorsize;
 	}
 	bp->bio_resid = 0;
 	return (error);
 }
 
+static void
+mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len)
+{
+	off_t seg_len;
+
+	while (offset >= vlist->ds_len) {
+		offset -= vlist->ds_len;
+		vlist++;
+	}
+
+	while (len != 0) {
+		seg_len = omin(len, vlist->ds_len - offset);
+		bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset),
+		    seg_len);
+		offset = 0;
+		src = (uint8_t *)src + seg_len;
+		len -= seg_len;
+		vlist++;
+	}
+}
+
+static void
+mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len)
+{
+	off_t seg_len;
+
+	while (offset >= vlist->ds_len) {
+		offset -= vlist->ds_len;
+		vlist++;
+	}
+
+	while (len != 0) {
+		seg_len = omin(len, vlist->ds_len - offset);
+		bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst,
+		    seg_len);
+		offset = 0;
+		dst = (uint8_t *)dst + seg_len;
+		len -= seg_len;
+		vlist++;
+	}
+}
+
 static int
 mdstart_preload(struct md_s *sc, struct bio *bp)
 {
+	uint8_t *p;
 
+	p = sc->pl_ptr + bp->bio_offset;
 	switch (bp->bio_cmd) {
 	case BIO_READ:
-		bcopy(sc->pl_ptr + bp->bio_offset, bp->bio_data,
-		    bp->bio_length);
+		if ((bp->bio_flags & BIO_VLIST) != 0) {
+			mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data,
+			    bp->bio_ma_offset, bp->bio_length);
+		} else {
+			bcopy(p, bp->bio_data, bp->bio_length);
+		}
 		cpu_flush_dcache(bp->bio_data, bp->bio_length);
 		break;
 	case BIO_WRITE:
-		bcopy(bp->bio_data, sc->pl_ptr + bp->bio_offset,
-		    bp->bio_length);
+		if ((bp->bio_flags & BIO_VLIST) != 0) {
+			mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data,
+			    bp->bio_ma_offset, p, bp->bio_length);
+		} else {
+			bcopy(bp->bio_data, p, bp->bio_length);
+		}
 		break;
 	}
 	bp->bio_resid = 0;
@@ -673,16 +819,23 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
 	int error;
 	struct uio auio;
 	struct iovec aiov;
+	struct iovec *piov;
 	struct mount *mp;
 	struct vnode *vp;
 	struct buf *pb;
+	bus_dma_segment_t *vlist;
 	struct thread *td;
-	off_t end, zerosize;
+	off_t len, zerosize;
+	int ma_offs;
 
 	switch (bp->bio_cmd) {
 	case BIO_READ:
+		auio.uio_rw = UIO_READ;
+		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
+		auio.uio_rw = UIO_WRITE;
+		break;
 	case BIO_FLUSH:
 		break;
 	default:
@@ -691,6 +844,9 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
 
 	td = curthread;
 	vp = sc->vnode;
+	pb = NULL;
+	piov = NULL;
+	ma_offs = bp->bio_ma_offset;
 
 	/*
 	 * VNODE I/O
@@ -709,73 +865,66 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
 		return (error);
 	}
 
-	bzero(&auio, sizeof(auio));
+	auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
+	auio.uio_resid = bp->bio_length;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_td = td;
 
-	/*
-	 * Special case for BIO_DELETE.  On the surface, this is very
-	 * similar to BIO_WRITE, except that we write from our own
-	 * fixed-length buffer, so we have to loop.  The net result is
-	 * that the two cases end up having very little in common.
-	 */
 	if (bp->bio_cmd == BIO_DELETE) {
+		/*
+		 * Emulate BIO_DELETE by writing zeros.
+		 */
 		zerosize = ZERO_REGION_SIZE -
 		    (ZERO_REGION_SIZE % sc->sectorsize);
-		auio.uio_iov = &aiov;
-		auio.uio_iovcnt = 1;
-		auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
-		auio.uio_segflg = UIO_SYSSPACE;
-		auio.uio_rw = UIO_WRITE;
-		auio.uio_td = td;
-		end = bp->bio_offset + bp->bio_length;
-		(void) vn_start_write(vp, &mp, V_WAIT);
-		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-		error = 0;
-		while (auio.uio_offset < end) {
-			aiov.iov_base = __DECONST(void *, zero_region);
-			aiov.iov_len = end - auio.uio_offset;
-			if (aiov.iov_len > zerosize)
-				aiov.iov_len = zerosize;
-			auio.uio_resid = aiov.iov_len;
-			error = VOP_WRITE(vp, &auio,
-			    sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred);
-			if (error != 0)
-				break;
+		auio.uio_iovcnt = howmany(bp->bio_length, zerosize);
+		piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK);
+		auio.uio_iov = piov;
+		len = bp->bio_length;
+		while (len > 0) {
+			piov->iov_base = __DECONST(void *, zero_region);
+			piov->iov_len = len;
+			if (len > zerosize)
+				piov->iov_len = zerosize;
+			len -= piov->iov_len;
+			piov++;
 		}
-		VOP_UNLOCK(vp, 0);
-		vn_finished_write(mp);
-		bp->bio_resid = end - auio.uio_offset;
-		return (error);
-	}
-
-	KASSERT(bp->bio_length <= MAXPHYS, ("bio_length %jd",
-	    (uintmax_t)bp->bio_length));
-	if ((bp->bio_flags & BIO_UNMAPPED) == 0) {
-		pb = NULL;
-		aiov.iov_base = bp->bio_data;
-	} else {
+		piov = auio.uio_iov;
+	} else if ((bp->bio_flags & BIO_VLIST) != 0) {
+		piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK);
+		auio.uio_iov = piov;
+		vlist = (bus_dma_segment_t *)bp->bio_data;
+		len = bp->bio_length;
+		while (len > 0) {
+			piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr +
+			    ma_offs);
+			piov->iov_len = vlist->ds_len - ma_offs;
+			if (piov->iov_len > len)
+				piov->iov_len = len;
+			len -= piov->iov_len;
+			ma_offs = 0;
+			vlist++;
+			piov++;
+		}
+		auio.uio_iovcnt = piov - auio.uio_iov;
+		piov = auio.uio_iov;
+	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 		pb = getpbuf(&md_vnode_pbuf_freecnt);
 		pmap_qenter((vm_offset_t)pb->b_data, bp->bio_ma, bp->bio_ma_n);
-		aiov.iov_base = (void *)((vm_offset_t)pb->b_data +
-		    bp->bio_ma_offset);
+		aiov.iov_base = (void *)((vm_offset_t)pb->b_data + ma_offs);
+		aiov.iov_len = bp->bio_length;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+	} else {
+		aiov.iov_base = bp->bio_data;
+		aiov.iov_len = bp->bio_length;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
 	}
-	aiov.iov_len = bp->bio_length;
-	auio.uio_iov = &aiov;
-	auio.uio_iovcnt = 1;
-	auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
-	auio.uio_segflg = UIO_SYSSPACE;
-	if (bp->bio_cmd == BIO_READ)
-		auio.uio_rw = UIO_READ;
-	else if (bp->bio_cmd == BIO_WRITE)
-		auio.uio_rw = UIO_WRITE;
-	else
-		panic("wrong BIO_OP in mdstart_vnode");
-	auio.uio_resid = bp->bio_length;
-	auio.uio_td = td;
 	/*
 	 * When reading set IO_DIRECT to try to avoid double-caching
 	 * the data.  When writing IO_DIRECT is not optimal.
 	 */
-	if (bp->bio_cmd == BIO_READ) {
+	if (auio.uio_rw == UIO_READ) {
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred);
 		VOP_UNLOCK(vp, 0);
@@ -787,10 +936,15 @@ mdstart_vnode(struct md_s *sc, struct bio *bp)
 		VOP_UNLOCK(vp, 0);
 		vn_finished_write(mp);
 	}
-	if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+
+	if (pb) {
 		pmap_qremove((vm_offset_t)pb->b_data, bp->bio_ma_n);
 		relpbuf(pb, &md_vnode_pbuf_freecnt);
 	}
+
+	if (piov != NULL)
+		free(piov, M_MD);
+
 	bp->bio_resid = auio.uio_resid;
 	return (error);
 }
@@ -801,6 +955,7 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 	vm_page_t m;
 	u_char *p;
 	vm_pindex_t i, lastp;
+	bus_dma_segment_t *vlist;
 	int rv, ma_offs, offs, len, lastend;
 
 	switch (bp->bio_cmd) {
@@ -813,7 +968,10 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 	}
 
 	p = bp->bio_data;
-	ma_offs = (bp->bio_flags & BIO_UNMAPPED) == 0 ? 0 : bp->bio_ma_offset;
+	ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ?
+	    bp->bio_ma_offset : 0;
+	vlist = (bp->bio_flags & BIO_VLIST) != 0 ?
+	    (bus_dma_segment_t *)bp->bio_data : NULL;
 
 	/*
 	 * offs is the offset at which to start operating on the
@@ -853,6 +1011,10 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 				pmap_copy_pages(&m, offs, bp->bio_ma,
 				    ma_offs, len);
+			} else if ((bp->bio_flags & BIO_VLIST) != 0) {
+				physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs,
+				    vlist, ma_offs, len);
+				cpu_flush_dcache(p, len);
 			} else {
 				physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len);
 				cpu_flush_dcache(p, len);
@@ -869,6 +1031,9 @@ mdstart_swap(struct md_s *sc, struct bio *bp)
 			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 				pmap_copy_pages(bp->bio_ma, ma_offs, &m,
 				    offs, len);
+			} else if ((bp->bio_flags & BIO_VLIST) != 0) {
+				physcopyin_vlist(vlist, ma_offs,
+				    VM_PAGE_TO_PHYS(m) + offs, len);
 			} else {
 				physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len);
 			}
diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c
index 9319b97..1a879f7 100644
--- a/sys/geom/geom_disk.c
+++ b/sys/geom/geom_disk.c
@@ -58,6 +58,8 @@ __FBSDID("$FreeBSD$");
 
 #include <dev/led/led.h>
 
+#include <machine/bus.h>
+
 struct g_disk_softc {
 	struct mtx		 done_mtx;
 	struct disk		*dp;
@@ -273,6 +275,145 @@ g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct t
 	return (error);
 }
 
+static off_t
+g_disk_maxsize(struct disk *dp, struct bio *bp)
+{
+	if (bp->bio_cmd == BIO_DELETE)
+		return (dp->d_delmaxsize);
+	return (dp->d_maxsize);
+}
+
+static int
+g_disk_maxsegs(struct disk *dp, struct bio *bp)
+{
+	return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1);
+}
+
+static void
+g_disk_advance(struct disk *dp, struct bio *bp, off_t off)
+{
+
+	bp->bio_offset += off;
+	bp->bio_length -= off;
+
+	if ((bp->bio_flags & BIO_VLIST) != 0) {
+		bus_dma_segment_t *seg, *end;
+
+		seg = (bus_dma_segment_t *)bp->bio_data;
+		end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
+		off += bp->bio_ma_offset;
+		while (off >= seg->ds_len) {
+			KASSERT((seg != end),
+			    ("vlist request runs off the end"));
+			off -= seg->ds_len;
+			seg++;
+		}
+		bp->bio_ma_offset = off;
+		bp->bio_ma_n = end - seg;
+		bp->bio_data = (void *)seg;
+	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+		bp->bio_ma += off / PAGE_SIZE;
+		bp->bio_ma_offset += off;
+		bp->bio_ma_offset %= PAGE_SIZE;
+		bp->bio_ma_n -= off / PAGE_SIZE;
+	} else {
+		bp->bio_data += off;
+	}
+}
+
+static void
+g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset,
+    off_t *plength, int *ppages)
+{
+	uintptr_t seg_page_base;
+	uintptr_t seg_page_end;
+	off_t offset;
+	off_t length;
+	int seg_pages;
+
+	offset = *poffset;
+	length = *plength;
+
+	if (length > seg->ds_len - offset)
+		length = seg->ds_len - offset;
+
+	seg_page_base = trunc_page(seg->ds_addr + offset);
+	seg_page_end  = round_page(seg->ds_addr + offset + length);
+	seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT;
+
+	if (seg_pages > *ppages) {
+		seg_pages = *ppages;
+		length = (seg_page_base + (seg_pages << PAGE_SHIFT)) -
+		    (seg->ds_addr + offset);
+	}
+
+	*poffset = 0;
+	*plength -= length;
+	*ppages -= seg_pages;
+}
+
+static off_t
+g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg)
+{
+	bus_dma_segment_t *seg, *end;
+	off_t residual;
+	off_t offset;
+	int pages;
+
+	seg = (bus_dma_segment_t *)bp->bio_data;
+	end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
+	residual = bp->bio_length;
+	offset = bp->bio_ma_offset;
+	pages = g_disk_maxsegs(dp, bp);
+	while (residual != 0 && pages != 0) {
+		KASSERT((seg != end),
+		    ("vlist limit runs off the end"));
+		g_disk_seg_limit(seg, &offset, &residual, &pages);
+		seg++;
+	}
+	if (pendseg != NULL)
+		*pendseg = seg;
+	return (residual);
+}
+
+static bool
+g_disk_limit(struct disk *dp, struct bio *bp)
+{
+	bool limited = false;
+	off_t maxsz;
+
+	maxsz = g_disk_maxsize(dp, bp);
+
+	/*
+	 * XXX: If we have a stripesize we should really use it here.
+	 *      Care should be taken in the delete case if this is done
+	 *      as deletes can be very sensitive to size given how they
+	 *      are processed.
+	 */
+	if (bp->bio_length > maxsz) {
+		bp->bio_length = maxsz;
+		limited = true;
+	}
+
+	if ((bp->bio_flags & BIO_VLIST) != 0) {
+		bus_dma_segment_t *firstseg, *endseg;
+		off_t residual;
+
+		firstseg = (bus_dma_segment_t*)bp->bio_data;
+		residual = g_disk_vlist_limit(dp, bp, &endseg);
+		if (residual != 0) {
+			bp->bio_ma_n = endseg - firstseg;
+			bp->bio_length -= residual;
+			limited = true;
+		}
+	} else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+		bp->bio_ma_n =
+		    howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE);
+	}
+
+	return (limited);
+}
+
 static void
 g_disk_start(struct bio *bp)
 {
@@ -297,6 +438,9 @@ g_disk_start(struct bio *bp)
 		/* fall-through */
 	case BIO_READ:
 	case BIO_WRITE:
+		KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 ||
+		    (bp->bio_flags & BIO_UNMAPPED) == 0,
+		    ("unmapped bio not supported by disk %s", dp->d_name));
 		off = 0;
 		bp3 = NULL;
 		bp2 = g_clone_bio(bp);
@@ -304,39 +448,10 @@ g_disk_start(struct bio *bp)
 			error = ENOMEM;
 			break;
 		}
-		do {
-			off_t d_maxsize;
-
-			d_maxsize = (bp->bio_cmd == BIO_DELETE) ?
-			    dp->d_delmaxsize : dp->d_maxsize;
-			bp2->bio_offset += off;
-			bp2->bio_length -= off;
-			if ((bp->bio_flags & BIO_UNMAPPED) == 0) {
-				bp2->bio_data += off;
-			} else {
-				KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO)
-				    != 0,
-				    ("unmapped bio not supported by disk %s",
-				    dp->d_name));
-				bp2->bio_ma += off / PAGE_SIZE;
-				bp2->bio_ma_offset += off;
-				bp2->bio_ma_offset %= PAGE_SIZE;
-				bp2->bio_ma_n -= off / PAGE_SIZE;
-			}
-			if (bp2->bio_length > d_maxsize) {
-				/*
-				 * XXX: If we have a stripesize we should really
-				 * use it here. Care should be taken in the delete
-				 * case if this is done as deletes can be very 
-				 * sensitive to size given how they are processed.
-				 */
-				bp2->bio_length = d_maxsize;
-				if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
-					bp2->bio_ma_n = howmany(
-					    bp2->bio_ma_offset +
-					    bp2->bio_length, PAGE_SIZE);
-				}
-				off += d_maxsize;
+		for (;;) {
+			if (g_disk_limit(dp, bp2)) {
+				off += bp2->bio_length;
+
 				/*
 				 * To avoid a race, we need to grab the next bio
 				 * before we schedule this one.  See "notes".
@@ -355,9 +470,14 @@ g_disk_start(struct bio *bp)
 			g_disk_lock_giant(dp);
 			dp->d_strategy(bp2);
 			g_disk_unlock_giant(dp);
+
+			if (bp3 == NULL)
+				break;
+
 			bp2 = bp3;
 			bp3 = NULL;
-		} while (bp2 != NULL);
+			g_disk_advance(dp, bp2, off);
+		}
 		break;
 	case BIO_GETATTR:
 		/* Give the driver a chance to override */
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
index f1edc70..9dff151 100644
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@@ -205,11 +205,12 @@ g_clone_bio(struct bio *bp)
 		/*
 		 *  BIO_ORDERED flag may be used by disk drivers to enforce
 		 *  ordering restrictions, so this flag needs to be cloned.
-		 *  BIO_UNMAPPED should be inherited, to properly indicate
-		 *  which way the buffer is passed.
+		 *  BIO_UNMAPPED and BIO_VLIST should be inherited, to properly
+		 *  indicate which way the buffer is passed.
 		 *  Other bio flags are not suitable for cloning.
 		 */
-		bp2->bio_flags = bp->bio_flags & (BIO_ORDERED | BIO_UNMAPPED);
+		bp2->bio_flags = bp->bio_flags &
+		    (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST);
 		bp2->bio_length = bp->bio_length;
 		bp2->bio_offset = bp->bio_offset;
 		bp2->bio_data = bp->bio_data;
@@ -240,7 +241,7 @@ g_duplicate_bio(struct bio *bp)
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
-	bp2->bio_flags = bp->bio_flags & BIO_UNMAPPED;
+	bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST);
 	bp2->bio_parent = bp;
 	bp2->bio_cmd = bp->bio_cmd;
 	bp2->bio_length = bp->bio_length;
diff --git a/sys/ia64/include/bus.h b/sys/ia64/include/bus.h
index 966a75d3..a9b09c6 100644
--- a/sys/ia64/include/bus.h
+++ b/sys/ia64/include/bus.h
@@ -123,6 +123,7 @@
 
 #define BUS_SPACE_UNRESTRICTED	(~0)
 
+#ifdef _KERNEL
 
 /*
  * Map and unmap a region of device bus space into CPU virtual address space.
@@ -815,6 +816,8 @@ bus_space_copy_region_8(bus_space_tag_t bst, bus_space_handle_t sbsh,
 #define	bus_space_copy_region_stream_4	bus_space_copy_region_4
 #define	bus_space_copy_region_stream_8	bus_space_copy_region_8
 
+#endif /* _KERNEL */
+
 #include <machine/bus_dma.h>
 
 #endif /* _MACHINE_BUS_H_ */
diff --git a/sys/kern/subr_bus_dma.c b/sys/kern/subr_bus_dma.c
index a16d8c8..ae30276 100644
--- a/sys/kern/subr_bus_dma.c
+++ b/sys/kern/subr_bus_dma.c
@@ -54,19 +54,32 @@ __FBSDID("$FreeBSD$");
 #include <machine/bus.h>
 
 /*
- * Load a list of virtual addresses.
+ * Load up data starting at offset within a region specified by a
+ * list of virtual address ranges until either length or the region
+ * are exhausted.
  */
 static int
 _bus_dmamap_load_vlist(bus_dma_tag_t dmat, bus_dmamap_t map,
     bus_dma_segment_t *list, int sglist_cnt, struct pmap *pmap, int *nsegs,
-    int flags)
+    int flags, size_t offset, size_t length)
 {
 	int error;
 
 	error = 0;
-	for (; sglist_cnt > 0; sglist_cnt--, list++) {
-		error = _bus_dmamap_load_buffer(dmat, map,
-		    (void *)(uintptr_t)list->ds_addr, list->ds_len, pmap,
+	for (; sglist_cnt > 0 && length != 0; sglist_cnt--, list++) {
+		char *addr;
+		size_t ds_len;
+
+		KASSERT((offset < list->ds_len),
+		    ("Invalid mid-segment offset"));
+		addr = (char *)(uintptr_t)list->ds_addr + offset;
+		ds_len = list->ds_len - offset;
+		offset = 0;
+		if (ds_len > length)
+			ds_len = length;
+		length -= ds_len;
+		KASSERT((ds_len != 0), ("Segment length is zero"));
+		error = _bus_dmamap_load_buffer(dmat, map, addr, ds_len, pmap,
 		    flags, NULL, nsegs);
 		if (error)
 			break;
@@ -118,22 +131,48 @@ _bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
 }
 
 /*
+ * Load tlen data starting at offset within a region specified by a list of
+ * physical pages.
+ */
+static int
+_bus_dmamap_load_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
+    vm_page_t *pages, bus_size_t tlen, int offset, int *nsegs, int flags)
+{
+	vm_paddr_t paddr;
+	bus_size_t len;
+	int error, i;
+ 
+	for (i = 0, error = 0; error == 0 && tlen > 0; i++, tlen -= len) {
+		len = min(PAGE_SIZE - offset, tlen);
+		paddr = VM_PAGE_TO_PHYS(pages[i]) + offset;
+		error = _bus_dmamap_load_phys(dmat, map, paddr, len,
+		    flags, NULL, nsegs);
+		offset = 0;
+	}
+	return (error);
+}
+ 
+/*
  * Load from block io.
  */
 static int
 _bus_dmamap_load_bio(bus_dma_tag_t dmat, bus_dmamap_t map, struct bio *bio,
     int *nsegs, int flags)
 {
-	int error;
 
-	if ((bio->bio_flags & BIO_UNMAPPED) == 0) {
-		error = _bus_dmamap_load_buffer(dmat, map, bio->bio_data,
-		    bio->bio_bcount, kernel_pmap, flags, NULL, nsegs);
-	} else {
-		error = _bus_dmamap_load_ma(dmat, map, bio->bio_ma,
-		    bio->bio_bcount, bio->bio_ma_offset, flags, NULL, nsegs);
+	if ((bio->bio_flags & BIO_VLIST) != 0) {
+		bus_dma_segment_t *segs = (bus_dma_segment_t *)bio->bio_data;
+		return (_bus_dmamap_load_vlist(dmat, map, segs, bio->bio_ma_n,
+		    kernel_pmap, nsegs, flags, bio->bio_ma_offset,
+		    bio->bio_bcount));
 	}
-	return (error);
+
+	if ((bio->bio_flags & BIO_UNMAPPED) != 0)
+		return (_bus_dmamap_load_pages(dmat, map, bio->bio_ma,
+		    bio->bio_bcount, bio->bio_ma_offset, nsegs, flags));
+
+	return (_bus_dmamap_load_buffer(dmat, map, bio->bio_data,
+	    bio->bio_bcount, kernel_pmap, flags, NULL, nsegs));
 }
 
 int
@@ -219,7 +258,7 @@ _bus_dmamap_load_ccb(bus_dma_tag_t dmat, bus_dmamap_t map, union ccb *ccb,
 	case CAM_DATA_SG:
 		error = _bus_dmamap_load_vlist(dmat, map,
 		    (bus_dma_segment_t *)data_ptr, sglist_cnt, kernel_pmap,
-		    nsegs, flags);
+		    nsegs, flags, 0, dxfer_len);
 		break;
 	case CAM_DATA_SG_PADDR:
 		error = _bus_dmamap_load_plist(dmat, map,
@@ -494,7 +533,7 @@ bus_dmamap_load_mem(bus_dma_tag_t dmat, bus_dmamap_t map,
 		break;
 	case MEMDESC_VLIST:
 		error = _bus_dmamap_load_vlist(dmat, map, mem->u.md_list,
-		    mem->md_opaque, kernel_pmap, &nsegs, flags);
+		    mem->md_opaque, kernel_pmap, &nsegs, flags, 0, SIZE_T_MAX);
 		break;
 	case MEMDESC_PLIST:
 		error = _bus_dmamap_load_plist(dmat, map, mem->u.md_list,
diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c
index 87892fd..3712f92 100644
--- a/sys/kern/subr_uio.c
+++ b/sys/kern/subr_uio.c
@@ -62,6 +62,8 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_pageout.h>
 #include <vm/vm_map.h>
 
+#include <machine/bus.h>
+
 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, UIO_MAXIOV,
 	"Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
 
@@ -136,6 +138,58 @@ physcopyout(vm_paddr_t src, void *dst, size_t len)
 #undef PHYS_PAGE_COUNT
 
 int
+physcopyin_vlist(bus_dma_segment_t *src, off_t offset, vm_paddr_t dst,
+    size_t len)
+{
+	size_t seg_len;
+	int error;
+
+	error = 0;
+	while (offset >= src->ds_len) {
+		offset -= src->ds_len;
+		src++;
+	}
+
+	while (len > 0 && error == 0) {
+		seg_len = MIN(src->ds_len - offset, len);
+		error = physcopyin((void *)(uintptr_t)(src->ds_addr + offset),
+		    dst, seg_len);
+		offset = 0;
+		src++;
+		len -= seg_len;
+		dst += seg_len;
+	}
+
+	return (error);
+}
+
+int
+physcopyout_vlist(vm_paddr_t src, bus_dma_segment_t *dst, off_t offset,
+    size_t len)
+{
+	size_t seg_len;
+	int error;
+
+	error = 0;
+	while (offset >= dst->ds_len) {
+		offset -= dst->ds_len;
+		dst++;
+	}
+
+	while (len > 0 && error == 0) {
+		seg_len = MIN(dst->ds_len - offset, len);
+		error = physcopyout(src, (void *)(uintptr_t)(dst->ds_addr +
+		    offset), seg_len);
+		offset = 0;
+		dst++;
+		len -= seg_len;
+		src += seg_len;
+	}
+
+	return (error);
+}
+
+int
 uiomove(void *cp, int n, struct uio *uio)
 {
 
diff --git a/sys/pc98/include/bus.h b/sys/pc98/include/bus.h
index 3292474..2060414 100644
--- a/sys/pc98/include/bus.h
+++ b/sys/pc98/include/bus.h
@@ -78,7 +78,9 @@
 #ifndef _PC98_BUS_H_
 #define _PC98_BUS_H_
 
+#ifdef _KERNEL
 #include <sys/systm.h>
+#endif /* _KERNEL */
 
 #include <machine/_bus.h>
 #include <machine/cpufunc.h>
@@ -92,6 +94,8 @@
 
 #define BUS_SPACE_UNRESTRICTED	(~0)
 
+#ifdef _KERNEL
+
 /*
  * address relocation table
  */
@@ -639,4 +643,6 @@ bus_space_barrier(bus_space_tag_t tag, bus_space_handle_t bsh,
 #define	bus_space_copy_region_stream_4(t, h1, o1, h2, o2, c) \
 	bus_space_copy_region_4((t), (h1), (o1), (h2), (o2), (c))
 
+#endif /* _KERNEL */
+
 #endif /* _PC98_BUS_H_ */
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 535ce61..8b3a5fc 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -61,6 +61,7 @@
 #define BIO_ORDERED	0x08
 #define	BIO_UNMAPPED	0x10
 #define	BIO_TRANSIENT_MAPPING	0x20
+#define	BIO_VLIST	0x40
 
 #ifdef _KERNEL
 struct disk;
diff --git a/sys/sys/uio.h b/sys/sys/uio.h
index 271a2f7..ff21b09 100644
--- a/sys/sys/uio.h
+++ b/sys/sys/uio.h
@@ -85,6 +85,7 @@ struct uio {
 
 struct vm_object;
 struct vm_page;
+struct bus_dma_segment;
 
 struct uio *cloneuio(struct uio *uiop);
 int	copyinfrom(const void * __restrict src, void * __restrict dst,
@@ -98,6 +99,10 @@ int	copyout_map(struct thread *td, vm_offset_t *addr, size_t sz);
 int	copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz);
 int	physcopyin(void *src, vm_paddr_t dst, size_t len);
 int	physcopyout(vm_paddr_t src, void *dst, size_t len);
+int	physcopyin_vlist(struct bus_dma_segment *src, off_t offset,
+	    vm_paddr_t dst, size_t len);
+int	physcopyout_vlist(vm_paddr_t src, struct bus_dma_segment *dst,
+	    off_t offset, size_t len);
 int	uiomove(void *cp, int n, struct uio *uio);
 int	uiomove_frombuf(void *buf, int buflen, struct uio *uio);
 int	uiomove_fromphys(struct vm_page *ma[], vm_offset_t offset, int n,
diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile
index 57effb8..8e97961 100644
--- a/usr.sbin/Makefile
+++ b/usr.sbin/Makefile
@@ -7,6 +7,7 @@ SUBDIR=	adduser \
 	arp \
 	binmiscctl \
 	bsdconfig \
+	camdd \
 	cdcontrol \
 	chkgrp \
 	chown \
diff --git a/usr.sbin/camdd/Makefile b/usr.sbin/camdd/Makefile
new file mode 100644
index 0000000..0028668
--- /dev/null
+++ b/usr.sbin/camdd/Makefile
@@ -0,0 +1,11 @@
+# $FreeBSD$
+
+PROG=	camdd
+SRCS=	camdd.c
+SDIR=	${.CURDIR}/../../sys
+DPADD=	${LIBCAM} ${LIBMT} ${LIBSBUF} ${LIBBSDXML} ${LIBUTIL} ${LIBTHR}
+LDADD=	-lcam -lmt -lsbuf -lbsdxml -lutil -lthr
+NO_WTHREAD_SAFETY=	1
+MAN=	camdd.8
+
+.include <bsd.prog.mk>
diff --git a/usr.sbin/camdd/camdd.8 b/usr.sbin/camdd/camdd.8
new file mode 100644
index 0000000..af556bb
--- /dev/null
+++ b/usr.sbin/camdd/camdd.8
@@ -0,0 +1,283 @@
+.\" 
+.\" Copyright (c) 2015 Spectra Logic Corporation
+.\" All rights reserved.
+.\" 
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions, and the following disclaimer,
+.\"    without modification.
+.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer
+.\"    substantially similar to the "NO WARRANTY" disclaimer below
+.\"    ("Disclaimer") and any redistribution must be conditioned upon
+.\"    including a substantially similar Disclaimer requirement for further
+.\"    binary redistribution.
+.\" 
+.\" NO WARRANTY
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGES.
+.\" 
+.\" Authors: Ken Merry           (Spectra Logic Corporation)
+.\" 
+.\" $FreeBSD$
+.\"
+.Dd November 11, 2015
+.Dt CAMDD 8
+.Os
+.Sh NAME
+.Nm camdd
+.Nd CAM data transfer utility
+.Sh SYNOPSIS
+.Nm
+.Aq Fl i|o Ar pass=pass_dev|file=filename,bs=blocksize,[...]
+.Op Fl C Ar retry_count
+.Op Fl E
+.Op Fl m Ar max_io
+.Op Fl t Ar timeout
+.Op Fl v
+.Op Fl h
+.Sh DESCRIPTION
+The
+.Nm
+utility is a sequential data transfer utility that offers standard
+.Xr read 2
+and
+.Xr write 2
+operation in addition to a mode that uses the asynchronous
+.Xr pass 4
+API.
+The asynchronous
+.Xr pass 4
+API allows multiple requests to be queued to a device simultaneously.
+.Pp
+.Nm
+collects performance information and will display it when the transfer
+completes, when
+.Nm
+is terminated or when it receives a SIGINFO signal.
+.Pp
+The following options are available:
+.Bl -tag -width 12n
+.It Fl i | o Ar args
+Specify the input and output device or file.
+Both 
+.Fl i
+and
+.Fl o
+must be specified.
+There are a number of parameters that can be specified.
+One of the first two (file or pass) MUST be specified to indicate which I/O
+method to use on the device in question.
+.Bl -tag -width 9n
+.It pass=dev
+Specify a
+.Xr pass 4
+device to operate on.
+This requests that
+.Nm
+access the device in question be accessed via the asynchronous
+.Xr pass 4
+interface.
+.Pp
+The device name can be a
+.Xr pass 4
+name and unit number, for instance
+.Dq pass0 ,
+or a regular peripheral driver name and unit number, for instance
+.Dq da5 .
+It can also be the path of a
+.Xr pass 4 
+or other disk device, like
+.Dq /dev/da5 .
+It may also be a bus:target:lun, for example:
+.Dq 0:5:0 .
+.Pp
+Only
+.Xr pass 4
+devices for
+.Tn SCSI
+disk-like devices are supported.
+.Tn ATA
+devices are not currently supported, but support could be added later.
+Specifically,
+.Tn SCSI
+Direct Access (type 0), WORM (type 4), CDROM (type 5), and RBC (Reduced
+Block Command, type 14) devices are supported.
+Tape drives, medium changers, enclosures etc. are not supported.
+.It file=path
+Specify a file or device to operate on.
+This requests that the file or device in question be accessed using the
+standard
+.Xr read 2
+and
+.Xr write 2
+system calls.
+The file interface does not support queueing multiple commands at a time.
+It does support probing disk sector size and capacity information, and tape
+blocksize and maximum transfer size information.
+The file interface supports standard files, disks, tape drives, special
+devices, pipes and standard input and output.
+If the file is specified as a 
+.Dq - ,
+standard input or standard output are used.
+For tape devices, the specified blocksize will be the size that
+.Nm
+attempts to use to write to or read from the tape.
+When writing to a tape device, the blocksize is treated like a disk sector
+size.
+So, that means
+.Nm
+will not write anything smaller than the sector size.
+At the end of a transfer, if there isn't sufficient data from the reader 
+to yield a full block,
+.Nm
+will add zeros on the end of the data from the reader to make up a full
+block.
+.It bs=N
+Specify the blocksize to use for transfers.
+.Nm
+will attempt to read or write using the requested blocksize.
+.Pp
+Note that the blocksize given only applies to either the input or the
+output path.
+To use the same blocksize for the input and output transfers, you must
+specify that blocksize with both the
+.Fl i
+and
+.Fl o
+arguments.
+.Pp
+The blocksize may be specified in bytes, or using any suffix (e.g. k, M, G)
+supported by
+.Xr expand_number 3 .
+.It offset=N
+Specify the starting offset for the input or output device or file.
+The offset may be specified in bytes, or by using any suffix (e.g. k, M, G)
+supported by
+.Xr expand_number 3 .
+.It depth=N
+Specify a desired queue depth for the input or output path.
+.Nm
+will attempt to keep the requested number of requests of the specified
+blocksize queued to the input or output device.
+Queue depths greater than 1 are only supported for the asynchronous
+.Xr pass 4 
+output method.
+The queue depth is maintained on a best effort basis, and may not be
+possible to maintain for especially fast devices.
+For writes, maintaining the queue depth also depends on a sufficiently
+fast reading device.
+.It mcs=N
+Specify the minimum command size to use for
+.Xr pass 4
+devices.
+Some devices do not support 6 byte
+.Tn SCSI
+commands.
+The
+.Xr da 4
+device handles this restriction automatically, but the
+.Xr pass 4
+device allows the user to specify the
+.Tn SCSI
+command used.
+If a device does not accept 6 byte
+.Tn SCSI
+READ/WRITE commands (which is the default at lower LBAs), it will generally
+accept 10 byte
+.Tn SCSI
+commands instead.
+.It debug=N
+Specify the debug level for this device.
+There is currently only one debug level setting, so setting this to any
+non-zero value will turn on debugging.
+The debug facility may be expanded in the future.
+.El
+.It Fl C Ar count
+Specify the retry count for commands sent via the asynchronous
+.Xr pass 4
+interface.
+This does not apply to commands sent via the file interface.
+.It Fl E
+Enable kernel error recovery for the
+.Xr pass 4
+driver.
+If error recovery is not enabled, unit attention conditions and other
+transient failures may cause the transfer to fail.
+.It Fl m Ar size
+Specify the maximum amount of data to be transferred.
+This may be specified in bytes, or by using any suffix (e.g. K, M, G)
+supported by
+.Xr expand_number 3 .
+.It Fl t Ar timeout
+Specify the command timeout in seconds to use for commands sent via the
+.Xr pass 4
+driver.
+.It Fl v
+Enable verbose reporting of errors.
+This is recommended to aid in debugging any
+.Tn SCSI
+issues that come up.
+.It Fl h
+Display the
+.Nm
+usage message.
+.El
+.Pp
+If
+.Nm
+receives a SIGINFO signal, it will print the current input and output byte
+counts, elapsed runtime and average throughput.
+If
+.Nm
+receives a SIGINT signal, it will print the current input and output byte
+counts, elapsed runtime and average throughput and then exit.
+.Sh EXAMPLES
+.Dl camdd -i pass=da8,bs=512k,depth=4 -o pass=da3,bs=512k,depth=4
+.Pp
+Copy all data from da8 to da3 using a blocksize of 512k for both drives,
+and attempt to maintain a queue depth of 4 on both the input and output
+devices.
+The transfer will stop when the end of either device is reached.
+.Pp
+.Dl camdd -i file=/dev/zero,bs=1M -o pass=da5,bs=1M,depth=4 -m 100M
+.Pp
+Read 1MB blocks of zeros from /dev/zero, and write them to da5 with a
+desired queue depth of 4.
+Stop the transfer after 100MB has been written.
+.Pp
+.Dl camdd -i pass=da8,bs=1M,depth=3 -o file=disk.img
+.Pp
+Copy disk da8 using a 1MB blocksize and desired queue depth of 3 to the
+file disk.img.
+.Pp
+.Dl camdd -i file=/etc/rc -o file=- 
+.Pp
+Read the file /etc/rc and write it to standard output.
+.Pp
+.Dl camdd -i pass=da10,bs=64k,depth=16 -o file=/dev/nsa0,bs=128k
+.Pp
+Copy 64K blocks from the disk da10 with a queue depth of 16, and write
+to the tape drive sa0 with a 128k blocksize.
+The copy will stop when either the end of the disk or tape is reached.
+.Sh SEE ALSO
+.Xr cam 3 ,
+.Xr cam 4 ,
+.Xr pass 4 ,
+.Xr camcontrol 8
+.Sh HISTORY
+.Nm
+first appeared in
+.Fx 10.2
+.Sh AUTHORS
+.An Kenneth Merry Aq Mt ken@FreeBSD.org
diff --git a/usr.sbin/camdd/camdd.c b/usr.sbin/camdd/camdd.c
new file mode 100644
index 0000000..573214e
--- /dev/null
+++ b/usr.sbin/camdd/camdd.c
@@ -0,0 +1,3428 @@
+/*-
+ * Copyright (c) 1997-2007 Kenneth D. Merry
+ * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Ken Merry           (Spectra Logic Corporation)
+ */
+
+/*
+ * This is eventually intended to be:
+ * - A basic data transfer/copy utility
+ * - A simple benchmark utility
+ * - An example of how to use the asynchronous pass(4) driver interface.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/ioctl.h>
+#include <sys/stdint.h>
+#include <sys/types.h>
+#include <sys/endian.h>
+#include <sys/param.h>
+#include <sys/sbuf.h>
+#include <sys/stat.h>
+#include <sys/event.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <vm/vm.h>
+#include <machine/bus.h>
+#include <sys/bus.h>
+#include <sys/bus_dma.h>
+#include <sys/mtio.h>
+#include <sys/conf.h>
+#include <sys/disk.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <semaphore.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <err.h>
+#include <libutil.h>
+#include <pthread.h>
+#include <assert.h>
+#include <bsdxml.h>
+
+#include <cam/cam.h>
+#include <cam/cam_debug.h>
+#include <cam/cam_ccb.h>
+#include <cam/scsi/scsi_all.h>
+#include <cam/scsi/scsi_da.h>
+#include <cam/scsi/scsi_pass.h>
+#include <cam/scsi/scsi_message.h>
+#include <cam/scsi/smp_all.h>
+#include <camlib.h>
+#include <mtlib.h>
+#include <zlib.h>
+
+typedef enum {
+	CAMDD_CMD_NONE		= 0x00000000,
+	CAMDD_CMD_HELP		= 0x00000001,
+	CAMDD_CMD_WRITE		= 0x00000002,
+	CAMDD_CMD_READ		= 0x00000003
+} camdd_cmdmask;
+
+typedef enum {
+	CAMDD_ARG_NONE		= 0x00000000,
+	CAMDD_ARG_VERBOSE	= 0x00000001,
+	CAMDD_ARG_DEVICE	= 0x00000002,
+	CAMDD_ARG_BUS		= 0x00000004,
+	CAMDD_ARG_TARGET	= 0x00000008,
+	CAMDD_ARG_LUN		= 0x00000010,
+	CAMDD_ARG_UNIT		= 0x00000020,
+	CAMDD_ARG_TIMEOUT	= 0x00000040,
+	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
+	CAMDD_ARG_RETRIES	= 0x00000100
+} camdd_argmask;
+
+typedef enum {
+	CAMDD_DEV_NONE		= 0x00,
+	CAMDD_DEV_PASS		= 0x01,
+	CAMDD_DEV_FILE		= 0x02
+} camdd_dev_type;
+
+struct camdd_io_opts {
+	camdd_dev_type	dev_type;
+	char		*dev_name;
+	uint64_t	blocksize;
+	uint64_t	queue_depth;
+	uint64_t	offset;
+	int		min_cmd_size;
+	int		write_dev;
+	uint64_t	debug;
+};
+
+typedef enum {
+	CAMDD_BUF_NONE,
+	CAMDD_BUF_DATA,
+	CAMDD_BUF_INDIRECT
+} camdd_buf_type;
+
+struct camdd_buf_indirect {
+	/*
+	 * Pointer to the source buffer.
+	 */
+	struct camdd_buf *src_buf;
+
+	/*
+	 * Offset into the source buffer, in bytes.
+	 */
+	uint64_t	  offset;
+	/*
+	 * Pointer to the starting point in the source buffer.
+	 */
+	uint8_t		 *start_ptr;
+
+	/*
+	 * Length of this chunk in bytes.
+	 */
+	size_t		  len;
+};
+
+struct camdd_buf_data {
+	/*
+	 * Buffer allocated when we allocate this camdd_buf.  This should
+	 * be the size of the blocksize for this device.
+	 */
+	uint8_t			*buf;
+
+	/*
+	 * The amount of backing store allocated in buf.  Generally this
+	 * will be the blocksize of the device.
+	 */
+	uint32_t		 alloc_len;
+
+	/*
+	 * The amount of data that was put into the buffer (on reads) or
+	 * the amount of data we have put onto the src_list so far (on
+	 * writes).
+	 */
+	uint32_t		 fill_len;
+
+	/*
+	 * The amount of data that was not transferred.
+	 */
+	uint32_t		 resid;
+
+	/*
+	 * Starting byte offset on the reader.
+	 */
+	uint64_t		 src_start_offset;
+	
+	/*
+	 * CCB used for pass(4) device targets.
+	 */
+	union ccb		 ccb;
+
+	/*
+	 * Number of scatter/gather segments.
+	 */
+	int			 sg_count;
+
+	/*
+	 * Set if we had to tack on an extra buffer to round the transfer
+	 * up to a sector size.
+	 */
+	int			 extra_buf;
+
+	/*
+	 * Scatter/gather list used generally when we're the writer for a
+	 * pass(4) device. 
+	 */
+	bus_dma_segment_t	*segs;
+
+	/*
+	 * Scatter/gather list used generally when we're the writer for a
+	 * file or block device;
+	 */
+	struct iovec		*iovec;
+};
+
+union camdd_buf_types {
+	struct camdd_buf_indirect	indirect;
+	struct camdd_buf_data		data;
+};
+
+typedef enum {
+	CAMDD_STATUS_NONE,
+	CAMDD_STATUS_OK,
+	CAMDD_STATUS_SHORT_IO,
+	CAMDD_STATUS_EOF,
+	CAMDD_STATUS_ERROR
+} camdd_buf_status;
+
+struct camdd_buf {
+	camdd_buf_type		 buf_type;
+	union camdd_buf_types	 buf_type_spec;
+
+	camdd_buf_status	 status;
+
+	uint64_t		 lba;
+	size_t			 len;
+
+	/*
+	 * A reference count of how many indirect buffers point to this
+	 * buffer.
+	 */
+	int			 refcount;
+
+	/*
+	 * A link back to our parent device.
+	 */
+	struct camdd_dev	*dev;
+	STAILQ_ENTRY(camdd_buf)  links;
+	STAILQ_ENTRY(camdd_buf)  work_links;
+
+	/*
+	 * A count of the buffers on the src_list.
+	 */
+	int			 src_count;
+
+	/*
+	 * List of buffers from our partner thread that are the components
+	 * of this buffer for the I/O.  Uses src_links.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 src_list;
+	STAILQ_ENTRY(camdd_buf)  src_links;
+};
+
+#define	NUM_DEV_TYPES	2
+
+struct camdd_dev_pass {
+	int			 scsi_dev_type;
+	struct cam_device	*dev;
+	uint64_t		 max_sector;
+	uint32_t		 block_len;
+	uint32_t		 cpi_maxio;
+};
+
+typedef enum {
+	CAMDD_FILE_NONE,
+	CAMDD_FILE_REG,
+	CAMDD_FILE_STD,
+	CAMDD_FILE_PIPE,
+	CAMDD_FILE_DISK,
+	CAMDD_FILE_TAPE,
+	CAMDD_FILE_TTY,
+	CAMDD_FILE_MEM
+} camdd_file_type;
+
+typedef enum {
+	CAMDD_FF_NONE 		= 0x00,
+	CAMDD_FF_CAN_SEEK	= 0x01
+} camdd_file_flags;
+
+struct camdd_dev_file {
+	int			 fd;
+	struct stat		 sb;
+	char			 filename[MAXPATHLEN + 1];
+	camdd_file_type		 file_type;
+	camdd_file_flags	 file_flags;
+	uint8_t			*tmp_buf;
+};
+
+struct camdd_dev_block {
+	int			 fd;
+	uint64_t		 size_bytes;
+	uint32_t		 block_len;
+};
+
+union camdd_dev_spec {
+	struct camdd_dev_pass	pass;
+	struct camdd_dev_file	file;
+	struct camdd_dev_block	block;
+};
+
+typedef enum {
+	CAMDD_DEV_FLAG_NONE		= 0x00,
+	CAMDD_DEV_FLAG_EOF		= 0x01,
+	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
+	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
+	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
+	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
+} camdd_dev_flags;
+
+struct camdd_dev {
+	camdd_dev_type		 dev_type;
+	union camdd_dev_spec	 dev_spec;
+	camdd_dev_flags		 flags;
+	char			 device_name[MAXPATHLEN+1];
+	uint32_t		 blocksize;
+	uint32_t		 sector_size;
+	uint64_t		 max_sector;
+	uint64_t		 sector_io_limit;
+	int			 min_cmd_size;
+	int			 write_dev;
+	int			 retry_count;
+	int			 io_timeout;
+	int			 debug;
+	uint64_t		 start_offset_bytes;
+	uint64_t		 next_io_pos_bytes;
+	uint64_t		 next_peer_pos_bytes;
+	uint64_t		 next_completion_pos_bytes;
+	uint64_t		 peer_bytes_queued;
+	uint64_t		 bytes_transferred;
+	uint32_t		 target_queue_depth;
+	uint32_t		 cur_active_io;
+	uint8_t			*extra_buf;
+	uint32_t		 extra_buf_len;
+	struct camdd_dev	*peer_dev;
+	pthread_mutex_t		 mutex;
+	pthread_cond_t		 cond;
+	int			 kq;
+
+	int			 (*run)(struct camdd_dev *dev);
+	int			 (*fetch)(struct camdd_dev *dev);
+
+	/*
+	 * Buffers that are available for I/O.  Uses links.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 free_queue;
+
+	/*
+	 * Free indirect buffers.  These are used for breaking a large
+	 * buffer into multiple pieces.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
+
+	/*
+	 * Buffers that have been queued to the kernel.  Uses links.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 active_queue;
+
+	/*
+	 * Will generally contain one of our buffers that is waiting for enough
+	 * I/O from our partner thread to be able to execute.  This will
+	 * generally happen when our per-I/O-size is larger than the
+	 * partner thread's per-I/O-size.  Uses links.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 pending_queue;
+
+	/*
+	 * Number of buffers on the pending queue
+	 */
+	int			 num_pending_queue;
+
+	/*
+	 * Buffers that are filled and ready to execute.  This is used when
+	 * our partner (reader) thread sends us blocks that are larger than
+	 * our blocksize, and so we have to split them into multiple pieces.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 run_queue;
+
+	/*
+	 * Number of buffers on the run queue.
+	 */
+	int			 num_run_queue;
+
+	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
+
+	int			 num_reorder_queue;
+
+	/*
+	 * Buffers that have been queued to us by our partner thread
+	 * (generally the reader thread) to be written out.  Uses
+	 * work_links.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 work_queue;
+
+	/*
+	 * Buffers that have been completed by our partner thread.  Uses
+	 * work_links.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
+
+	/*
+	 * Number of buffers on the peer done queue.
+	 */
+	uint32_t		 num_peer_done_queue;
+
+	/*
+	 * A list of buffers that we have queued to our peer thread.  Uses
+	 * links.
+	 */
+	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
+
+	/*
+	 * Number of buffers on the peer work queue.
+	 */
+	uint32_t		 num_peer_work_queue;
+};
+
+static sem_t camdd_sem;
+static int need_exit = 0;
+static int error_exit = 0;
+static int need_status = 0;
+
+#ifndef min
+#define	min(a, b) (a < b) ? a : b
+#endif
+
+/*
+ * XXX KDM private copy of timespecsub().  This is normally defined in
+ * sys/time.h, but is only enabled in the kernel.  If that definition is
+ * enabled in userland, it breaks the build of libnetbsd.
+ */
+#ifndef timespecsub
+#define	timespecsub(vvp, uvp)						\
+	do {								\
+		(vvp)->tv_sec -= (uvp)->tv_sec;				\
+		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
+		if ((vvp)->tv_nsec < 0) {				\
+			(vvp)->tv_sec--;				\
+			(vvp)->tv_nsec += 1000000000;			\
+		}							\
+	} while (0)
+#endif
+
+
+/* Generically usefull offsets into the peripheral private area */
+#define ppriv_ptr0 periph_priv.entries[0].ptr
+#define ppriv_ptr1 periph_priv.entries[1].ptr
+#define ppriv_field0 periph_priv.entries[0].field
+#define ppriv_field1 periph_priv.entries[1].field
+
+#define	ccb_buf	ppriv_ptr0
+
+#define	CAMDD_FILE_DEFAULT_BLOCK	524288
+#define	CAMDD_FILE_DEFAULT_DEPTH	1
+#define	CAMDD_PASS_MAX_BLOCK		1048576
+#define	CAMDD_PASS_DEFAULT_DEPTH	6
+#define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
+
+static int parse_btl(char *tstr, int *bus, int *target, int *lun,
+		     camdd_argmask *arglst);
+void camdd_free_dev(struct camdd_dev *dev);
+struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
+				  struct kevent *new_ke, int num_ke,
+				  int retry_count, int timeout);
+static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
+					 camdd_buf_type buf_type);
+void camdd_release_buf(struct camdd_buf *buf);
+struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
+int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
+			uint32_t sector_size, uint32_t *num_sectors_used,
+			int *double_buf_needed);
+uint32_t camdd_buf_get_len(struct camdd_buf *buf);
+void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
+int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
+		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
+struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
+				   int retry_count, int timeout);
+struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
+				   struct camdd_io_opts *io_opts,
+				   camdd_argmask arglist, int probe_retry_count,
+				   int probe_timeout, int io_retry_count,
+				   int io_timeout);
+void *camdd_file_worker(void *arg);
+camdd_buf_status camdd_ccb_status(union ccb *ccb);
+int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
+int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
+void camdd_peer_done(struct camdd_buf *buf);
+void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
+			int *error_count);
+int camdd_pass_fetch(struct camdd_dev *dev);
+int camdd_file_run(struct camdd_dev *dev);
+int camdd_pass_run(struct camdd_dev *dev);
+int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
+int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
+void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
+		     uint32_t *peer_depth, uint32_t *our_bytes,
+		     uint32_t *peer_bytes);
+void *camdd_worker(void *arg);
+void camdd_sig_handler(int sig);
+void camdd_print_status(struct camdd_dev *camdd_dev,
+			struct camdd_dev *other_dev,
+			struct timespec *start_time);
+int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
+	     uint64_t max_io, int retry_count, int timeout);
+int camdd_parse_io_opts(char *args, int is_write,
+			struct camdd_io_opts *io_opts);
+void usage(void);
+
+/*
+ * Parse out a bus, or a bus, target and lun in the following
+ * format:
+ * bus
+ * bus:target
+ * bus:target:lun
+ *
+ * Returns the number of parsed components, or 0.
+ */
+static int
+parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
+{
+	char *tmpstr;
+	int convs = 0;
+
+	while (isspace(*tstr) && (*tstr != '\0'))
+		tstr++;
+
+	tmpstr = (char *)strtok(tstr, ":");
+	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
+		*bus = strtol(tmpstr, NULL, 0);
+		*arglst |= CAMDD_ARG_BUS;
+		convs++;
+		tmpstr = (char *)strtok(NULL, ":");
+		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
+			*target = strtol(tmpstr, NULL, 0);
+			*arglst |= CAMDD_ARG_TARGET;
+			convs++;
+			tmpstr = (char *)strtok(NULL, ":");
+			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
+				*lun = strtol(tmpstr, NULL, 0);
+				*arglst |= CAMDD_ARG_LUN;
+				convs++;
+			}
+		}
+	}
+
+	return convs;
+}
+
+/*
+ * XXX KDM clean up and free all of the buffers on the queue!
+ */
+void
+camdd_free_dev(struct camdd_dev *dev)
+{
+	if (dev == NULL)
+		return;
+
+	switch (dev->dev_type) {
+	case CAMDD_DEV_FILE: {
+		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
+
+		if (file_dev->fd != -1)
+			close(file_dev->fd);
+		free(file_dev->tmp_buf);
+		break;
+	}
+	case CAMDD_DEV_PASS: {
+		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
+
+		if (pass_dev->dev != NULL)
+			cam_close_device(pass_dev->dev);
+		break;
+	}
+	default:
+		break;
+	}
+
+	free(dev);
+}
+
+struct camdd_dev *
+camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
+		int retry_count, int timeout)
+{
+	struct camdd_dev *dev = NULL;
+	struct kevent *ke;
+	size_t ke_size;
+	int retval = 0;
+
+	dev = malloc(sizeof(*dev));
+	if (dev == NULL) {
+		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
+		goto bailout;
+	}
+
+	bzero(dev, sizeof(*dev));
+
+	dev->dev_type = dev_type;
+	dev->io_timeout = timeout;
+	dev->retry_count = retry_count;
+	STAILQ_INIT(&dev->free_queue);
+	STAILQ_INIT(&dev->free_indirect_queue);
+	STAILQ_INIT(&dev->active_queue);
+	STAILQ_INIT(&dev->pending_queue);
+	STAILQ_INIT(&dev->run_queue);
+	STAILQ_INIT(&dev->reorder_queue);
+	STAILQ_INIT(&dev->work_queue);
+	STAILQ_INIT(&dev->peer_done_queue);
+	STAILQ_INIT(&dev->peer_work_queue);
+	retval = pthread_mutex_init(&dev->mutex, NULL);
+	if (retval != 0) {
+		warnc(retval, "%s: failed to initialize mutex", __func__);
+		goto bailout;
+	}
+
+	retval = pthread_cond_init(&dev->cond, NULL);
+	if (retval != 0) {
+		warnc(retval, "%s: failed to initialize condition variable",
+		      __func__);
+		goto bailout;
+	}
+
+	dev->kq = kqueue();
+	if (dev->kq == -1) {
+		warn("%s: Unable to create kqueue", __func__);
+		goto bailout;
+	}
+
+	ke_size = sizeof(struct kevent) * (num_ke + 4);
+	ke = malloc(ke_size);
+	if (ke == NULL) {
+		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
+		goto bailout;
+	}
+	bzero(ke, ke_size);
+	if (num_ke > 0)
+		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
+
+	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
+	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
+	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
+	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
+	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
+	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
+
+	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
+	if (retval == -1) {
+		warn("%s: Unable to register kevents", __func__);
+		goto bailout;
+	}
+
+
+	return (dev);
+
+bailout:
+	free(dev);
+
+	return (NULL);
+}
+
+static struct camdd_buf *
+camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
+{
+	struct camdd_buf *buf = NULL;
+	uint8_t *data_ptr = NULL;
+
+	/*
+	 * We only need to allocate data space for data buffers.
+	 */
+	switch (buf_type) {
+	case CAMDD_BUF_DATA:
+		data_ptr = malloc(dev->blocksize);
+		if (data_ptr == NULL) {
+			warn("unable to allocate %u bytes", dev->blocksize);
+			goto bailout_error;
+		}
+		break;
+	default:
+		break;
+	}
+	
+	buf = malloc(sizeof(*buf));
+	if (buf == NULL) {
+		warn("unable to allocate %zu bytes", sizeof(*buf));
+		goto bailout_error;
+	}
+
+	bzero(buf, sizeof(*buf));
+	buf->buf_type = buf_type;
+	buf->dev = dev;
+	switch (buf_type) {
+	case CAMDD_BUF_DATA: {
+		struct camdd_buf_data *data;
+
+		data = &buf->buf_type_spec.data;
+
+		data->alloc_len = dev->blocksize;
+		data->buf = data_ptr;
+		break;
+	}
+	case CAMDD_BUF_INDIRECT:
+		break;
+	default:
+		break;
+	}
+	STAILQ_INIT(&buf->src_list);
+
+	return (buf);
+
+bailout_error:
+	if (data_ptr != NULL)
+		free(data_ptr);
+
+	if (buf != NULL)
+		free(buf);
+
+	return (NULL);
+}
+
+void
+camdd_release_buf(struct camdd_buf *buf)
+{
+	struct camdd_dev *dev;
+
+	dev = buf->dev;
+
+	switch (buf->buf_type) {
+	case CAMDD_BUF_DATA: {
+		struct camdd_buf_data *data;
+
+		data = &buf->buf_type_spec.data;
+
+		if (data->segs != NULL) {
+			if (data->extra_buf != 0) {
+				void *extra_buf;
+
+				extra_buf = (void *)
+				    data->segs[data->sg_count - 1].ds_addr;
+				free(extra_buf);
+				data->extra_buf = 0;
+			}
+			free(data->segs);
+			data->segs = NULL;
+			data->sg_count = 0;
+		} else if (data->iovec != NULL) {
+			if (data->extra_buf != 0) {
+				free(data->iovec[data->sg_count - 1].iov_base);
+				data->extra_buf = 0;
+			}
+			free(data->iovec);
+			data->iovec = NULL;
+			data->sg_count = 0;
+		}
+		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
+		break;
+	}
+	case CAMDD_BUF_INDIRECT:
+		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
+		break;
+	default:
+		err(1, "%s: Invalid buffer type %d for released buffer",
+		    __func__, buf->buf_type);
+		break;
+	}
+}
+
+struct camdd_buf *
+camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
+{
+	struct camdd_buf *buf = NULL;
+
+	switch (buf_type) {
+	case CAMDD_BUF_DATA:
+		buf = STAILQ_FIRST(&dev->free_queue);
+		if (buf != NULL) {
+			struct camdd_buf_data *data;
+			uint8_t *data_ptr;
+			uint32_t alloc_len;
+
+			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
+			data = &buf->buf_type_spec.data;
+			data_ptr = data->buf;
+			alloc_len = data->alloc_len;
+			bzero(buf, sizeof(*buf));
+			data->buf = data_ptr;
+			data->alloc_len = alloc_len;
+		}
+		break;
+	case CAMDD_BUF_INDIRECT:
+		buf = STAILQ_FIRST(&dev->free_indirect_queue);
+		if (buf != NULL) {
+			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
+
+			bzero(buf, sizeof(*buf));
+		}
+		break;
+	default:
+		warnx("Unknown buffer type %d requested", buf_type);
+		break;
+	}
+
+
+	if (buf == NULL)
+		return (camdd_alloc_buf(dev, buf_type));
+	else {
+		STAILQ_INIT(&buf->src_list);
+		buf->dev = dev;
+		buf->buf_type = buf_type;
+
+		return (buf);
+	}
+}
+
+int
+camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
+		    uint32_t *num_sectors_used, int *double_buf_needed)
+{
+	struct camdd_buf *tmp_buf;
+	struct camdd_buf_data *data;
+	uint8_t *extra_buf = NULL;
+	size_t extra_buf_len = 0;
+	int i, retval = 0;
+
+	data = &buf->buf_type_spec.data;
+
+	data->sg_count = buf->src_count;
+	/*
+	 * Compose a scatter/gather list from all of the buffers in the list.
+	 * If the length of the buffer isn't a multiple of the sector size,
+	 * we'll have to add an extra buffer.  This should only happen
+	 * at the end of a transfer.
+	 */
+	if ((data->fill_len % sector_size) != 0) {
+		extra_buf_len = sector_size - (data->fill_len % sector_size);
+		extra_buf = calloc(extra_buf_len, 1);
+		if (extra_buf == NULL) {
+			warn("%s: unable to allocate %zu bytes for extra "
+			    "buffer space", __func__, extra_buf_len);
+			retval = 1;
+			goto bailout;
+		}
+		data->extra_buf = 1;
+		data->sg_count++;
+	}
+	if (iovec == 0) {
+		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
+		if (data->segs == NULL) {
+			warn("%s: unable to allocate %zu bytes for S/G list",
+			    __func__, sizeof(bus_dma_segment_t) *
+			    data->sg_count);
+			retval = 1;
+			goto bailout;
+		}
+
+	} else {
+		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
+		if (data->iovec == NULL) {
+			warn("%s: unable to allocate %zu bytes for S/G list",
+			    __func__, sizeof(struct iovec) * data->sg_count);
+			retval = 1;
+			goto bailout;
+		}
+	}
+
+	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
+	     i < buf->src_count && tmp_buf != NULL; i++,
+	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
+
+		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
+			struct camdd_buf_data *tmp_data;
+
+			tmp_data = &tmp_buf->buf_type_spec.data;
+			if (iovec == 0) {
+				data->segs[i].ds_addr =
+				    (bus_addr_t) tmp_data->buf;
+				data->segs[i].ds_len = tmp_data->fill_len -
+				    tmp_data->resid;
+			} else {
+				data->iovec[i].iov_base = tmp_data->buf;
+				data->iovec[i].iov_len = tmp_data->fill_len -
+				    tmp_data->resid;
+			}
+			if (((tmp_data->fill_len - tmp_data->resid) %
+			     sector_size) != 0)
+				*double_buf_needed = 1;
+		} else {
+			struct camdd_buf_indirect *tmp_ind;
+
+			tmp_ind = &tmp_buf->buf_type_spec.indirect;
+			if (iovec == 0) {
+				data->segs[i].ds_addr =
+				    (bus_addr_t)tmp_ind->start_ptr;
+				data->segs[i].ds_len = tmp_ind->len;
+			} else {
+				data->iovec[i].iov_base = tmp_ind->start_ptr;
+				data->iovec[i].iov_len = tmp_ind->len;
+			}
+			if ((tmp_ind->len % sector_size) != 0)
+				*double_buf_needed = 1;
+		}
+	}
+
+	if (extra_buf != NULL) {
+		if (iovec == 0) {
+			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
+			data->segs[i].ds_len = extra_buf_len;
+		} else {
+			data->iovec[i].iov_base = extra_buf;
+			data->iovec[i].iov_len = extra_buf_len;
+		}
+		i++;
+	}
+	if ((tmp_buf != NULL) || (i != data->sg_count)) {
+		warnx("buffer source count does not match "
+		      "number of buffers in list!");
+		retval = 1;
+		goto bailout;
+	}
+
+bailout:
+	if (retval == 0) {
+		*num_sectors_used = (data->fill_len + extra_buf_len) /
+		    sector_size;
+	}
+	return (retval);
+}
+
+uint32_t
+camdd_buf_get_len(struct camdd_buf *buf)
+{
+	uint32_t len = 0;
+
+	if (buf->buf_type != CAMDD_BUF_DATA) {
+		struct camdd_buf_indirect *indirect;
+
+		indirect = &buf->buf_type_spec.indirect;
+		len = indirect->len;
+	} else {
+		struct camdd_buf_data *data;
+
+		data = &buf->buf_type_spec.data;
+		len = data->fill_len;
+	}
+
+	return (len);
+}
+
+void
+camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
+{
+	struct camdd_buf_data *data;
+
+	assert(buf->buf_type == CAMDD_BUF_DATA);
+
+	data = &buf->buf_type_spec.data;
+
+	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
+	buf->src_count++;
+
+	data->fill_len += camdd_buf_get_len(child_buf);
+}
+
+typedef enum {
+	CAMDD_TS_MAX_BLK,
+	CAMDD_TS_MIN_BLK,
+	CAMDD_TS_BLK_GRAN,
+	CAMDD_TS_EFF_IOSIZE
+} camdd_status_item_index;
+
+static struct camdd_status_items {
+	const char *name;
+	struct mt_status_entry *entry;
+} req_status_items[] = {
+	{ "max_blk", NULL },
+	{ "min_blk", NULL },
+	{ "blk_gran", NULL },
+	{ "max_effective_iosize", NULL }
+};
+
+int
+camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
+		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
+{
+	struct mt_status_data status_data;
+	char *xml_str = NULL;
+	unsigned int i;
+	int retval = 0;
+	
+	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
+	if (retval != 0)
+		err(1, "Couldn't get XML string from %s", filename);
+
+	retval = mt_get_status(xml_str, &status_data);
+	if (retval != XML_STATUS_OK) {
+		warn("couldn't get status for %s", filename);
+		retval = 1;
+		goto bailout;
+	} else
+		retval = 0;
+
+	if (status_data.error != 0) {
+		warnx("%s", status_data.error_str);
+		retval = 1;
+		goto bailout;
+	}
+
+	for (i = 0; i < sizeof(req_status_items) /
+	     sizeof(req_status_items[0]); i++) {
+                char *name;
+
+		name = __DECONST(char *, req_status_items[i].name);
+		req_status_items[i].entry = mt_status_entry_find(&status_data,
+		    name);
+		if (req_status_items[i].entry == NULL) {
+			errx(1, "Cannot find status entry %s",
+			    req_status_items[i].name);
+		}
+	}
+
+	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
+	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
+	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
+	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
+bailout:
+
+	free(xml_str);
+	mt_status_free(&status_data);
+
+	return (retval);
+}
+
+struct camdd_dev *
+camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
+    int timeout)
+{
+	struct camdd_dev *dev = NULL;
+	struct camdd_dev_file *file_dev;
+	uint64_t blocksize = io_opts->blocksize;
+
+	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
+	if (dev == NULL)
+		goto bailout;
+
+	file_dev = &dev->dev_spec.file;
+	file_dev->fd = fd;
+	strlcpy(file_dev->filename, io_opts->dev_name,
+	    sizeof(file_dev->filename));
+	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
+	if (blocksize == 0)
+		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
+	else
+		dev->blocksize = blocksize;
+
+	if ((io_opts->queue_depth != 0)
+	 && (io_opts->queue_depth != 1)) {
+		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
+		    "command supported", (uintmax_t)io_opts->queue_depth,
+		    io_opts->dev_name);
+	}
+	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
+	dev->run = camdd_file_run;
+	dev->fetch = NULL;
+
+	/*
+	 * We can effectively access files on byte boundaries.  We'll reset
+	 * this for devices like disks that can be accessed on sector
+	 * boundaries.
+	 */
+	dev->sector_size = 1;
+
+	if ((fd != STDIN_FILENO)
+	 && (fd != STDOUT_FILENO)) {
+		int retval;
+
+		retval = fstat(fd, &file_dev->sb);
+		if (retval != 0) {
+			warn("Cannot stat %s", dev->device_name);
+			goto bailout;
+			camdd_free_dev(dev);
+			dev = NULL;
+		}
+		if (S_ISREG(file_dev->sb.st_mode)) {
+			file_dev->file_type = CAMDD_FILE_REG;
+		} else if (S_ISCHR(file_dev->sb.st_mode)) {
+			int type;
+
+			if (ioctl(fd, FIODTYPE, &type) == -1)
+				err(1, "FIODTYPE ioctl failed on %s",
+				    dev->device_name);
+			else {
+				if (type & D_TAPE)
+					file_dev->file_type = CAMDD_FILE_TAPE;
+				else if (type & D_DISK)
+					file_dev->file_type = CAMDD_FILE_DISK;
+				else if (type & D_MEM)
+					file_dev->file_type = CAMDD_FILE_MEM;
+				else if (type & D_TTY)
+					file_dev->file_type = CAMDD_FILE_TTY;
+			}
+		} else if (S_ISDIR(file_dev->sb.st_mode)) {
+			errx(1, "cannot operate on directory %s",
+			    dev->device_name);
+		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
+			file_dev->file_type = CAMDD_FILE_PIPE;
+		} else
+			errx(1, "Cannot determine file type for %s",
+			    dev->device_name);
+
+		switch (file_dev->file_type) {
+		case CAMDD_FILE_REG:
+			if (file_dev->sb.st_size != 0)
+				dev->max_sector = file_dev->sb.st_size - 1;
+			else
+				dev->max_sector = 0;
+			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
+			break;
+		case CAMDD_FILE_TAPE: {
+			uint64_t max_iosize, max_blk, min_blk, blk_gran;
+			/*
+			 * Check block limits and maximum effective iosize.
+			 * Make sure the blocksize is within the block
+			 * limits (and a multiple of the minimum blocksize)
+			 * and that the blocksize is <= maximum effective
+			 * iosize.
+			 */
+			retval = camdd_probe_tape(fd, dev->device_name,
+			    &max_iosize, &max_blk, &min_blk, &blk_gran);
+			if (retval != 0)
+				errx(1, "Unable to probe tape %s",
+				    dev->device_name);
+
+			/*
+			 * The blocksize needs to be <= the maximum
+			 * effective I/O size of the tape device.  Note
+			 * that this also takes into account the maximum
+			 * blocksize reported by READ BLOCK LIMITS.
+			 */
+			if (dev->blocksize > max_iosize) {
+				warnx("Blocksize %u too big for %s, limiting "
+				    "to %ju", dev->blocksize, dev->device_name,
+				    max_iosize);
+				dev->blocksize = max_iosize;
+			}
+
+			/*
+			 * The blocksize needs to be at least min_blk;
+			 */
+			if (dev->blocksize < min_blk) {
+				warnx("Blocksize %u too small for %s, "
+				    "increasing to %ju", dev->blocksize,
+				    dev->device_name, min_blk);
+				dev->blocksize = min_blk;
+			}
+
+			/*
+			 * And the blocksize needs to be a multiple of
+			 * the block granularity.
+			 */
+			if ((blk_gran != 0)
+			 && (dev->blocksize % (1 << blk_gran))) {
+				warnx("Blocksize %u for %s not a multiple of "
+				    "%d, adjusting to %d", dev->blocksize,
+				    dev->device_name, (1 << blk_gran),
+				    dev->blocksize & ~((1 << blk_gran) - 1));
+				dev->blocksize &= ~((1 << blk_gran) - 1);
+			}
+
+			if (dev->blocksize == 0) {
+				errx(1, "Unable to derive valid blocksize for "
+				    "%s", dev->device_name);
+			}
+
+			/*
+			 * For tape drives, set the sector size to the
+			 * blocksize so that we make sure not to write
+			 * less than the blocksize out to the drive.
+			 */
+			dev->sector_size = dev->blocksize;
+			break;
+		}
+		case CAMDD_FILE_DISK: {
+			off_t media_size;
+			unsigned int sector_size;
+
+			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
+
+			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
+				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
+				    dev->device_name);
+			}
+
+			if (sector_size == 0) {
+				errx(1, "DIOCGSECTORSIZE ioctl returned "
+				    "invalid sector size %u for %s",
+				    sector_size, dev->device_name);
+			}
+
+			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
+				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
+				    dev->device_name);
+			}
+
+			if (media_size == 0) {
+				errx(1, "DIOCGMEDIASIZE ioctl returned "
+				    "invalid media size %ju for %s",
+				    (uintmax_t)media_size, dev->device_name);
+			}
+
+			if (dev->blocksize % sector_size) {
+				errx(1, "%s blocksize %u not a multiple of "
+				    "sector size %u", dev->device_name,
+				    dev->blocksize, sector_size);
+			}
+
+			dev->sector_size = sector_size;
+			dev->max_sector = (media_size / sector_size) - 1;
+			break;
+		}
+		case CAMDD_FILE_MEM:
+			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if ((io_opts->offset != 0)
+	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
+		warnx("Offset %ju specified for %s, but we cannot seek on %s",
+		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
+		goto bailout_error;
+	}
+#if 0
+	else if ((io_opts->offset != 0)
+		&& ((io_opts->offset % dev->sector_size) != 0)) {
+		warnx("Offset %ju for %s is not a multiple of the "
+		      "sector size %u", io_opts->offset, 
+		      io_opts->dev_name, dev->sector_size);
+		goto bailout_error;
+	} else {
+		dev->start_offset_bytes = io_opts->offset;
+	}
+#endif
+
+bailout:
+	return (dev);
+
+bailout_error:
+	camdd_free_dev(dev);
+	return (NULL);
+}
+
+/*
+ * Need to implement this.  Do a basic probe:
+ * - Check the inquiry data, make sure we're talking to a device that we
+ *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
+ * - Send a test unit ready, make sure the device is available.
+ * - Get the capacity and block size.
+ */
+struct camdd_dev *
+camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
+		 camdd_argmask arglist, int probe_retry_count,
+		 int probe_timeout, int io_retry_count, int io_timeout)
+{
+	union ccb *ccb;
+	uint64_t maxsector;
+	uint32_t cpi_maxio, max_iosize, pass_numblocks;
+	uint32_t block_len;
+	struct scsi_read_capacity_data rcap;
+	struct scsi_read_capacity_data_long rcaplong;
+	struct camdd_dev *dev;
+	struct camdd_dev_pass *pass_dev;
+	struct kevent ke;
+	int scsi_dev_type;
+	int retval;
+
+	dev = NULL;
+
+	scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
+	maxsector = 0;
+	block_len = 0;
+
+	/*
+	 * For devices that support READ CAPACITY, we'll attempt to get the
+	 * capacity.  Otherwise, we really don't support tape or other
+	 * devices via SCSI passthrough, so just return an error in that case.
+	 */
+	switch (scsi_dev_type) {
+	case T_DIRECT:
+	case T_WORM:
+	case T_CDROM:
+	case T_OPTICAL:
+	case T_RBC:
+		break;
+	default:
+		errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
+		break; /*NOTREACHED*/
+	}
+
+	ccb = cam_getccb(cam_dev);
+
+	if (ccb == NULL) {
+		warnx("%s: error allocating ccb", __func__);
+		goto bailout;
+	}
+
+	bzero(&(&ccb->ccb_h)[1],
+	      sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
+
+	scsi_read_capacity(&ccb->csio,
+			   /*retries*/ probe_retry_count,
+			   /*cbfcnp*/ NULL,
+			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
+			   &rcap,
+			   SSD_FULL_SIZE,
+			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
+
+	/* Disable freezing the device queue */
+	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
+
+	if (arglist & CAMDD_ARG_ERR_RECOVER)
+		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
+
+	if (cam_send_ccb(cam_dev, ccb) < 0) {
+		warn("error sending READ CAPACITY command");
+
+		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
+				CAM_EPF_ALL, stderr);
+
+		goto bailout;
+	}
+
+	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
+		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
+		retval = 1;
+		goto bailout;
+	}
+
+	maxsector = scsi_4btoul(rcap.addr);
+	block_len = scsi_4btoul(rcap.length);
+
+	/*
+	 * A last block of 2^32-1 means that the true capacity is over 2TB,
+	 * and we need to issue the long READ CAPACITY to get the real
+	 * capacity.  Otherwise, we're all set.
+	 */
+	if (maxsector != 0xffffffff)
+		goto rcap_done;
+
+	scsi_read_capacity_16(&ccb->csio,
+			      /*retries*/ probe_retry_count,
+			      /*cbfcnp*/ NULL,
+			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
+			      /*lba*/ 0,
+			      /*reladdr*/ 0,
+			      /*pmi*/ 0,
+			      (uint8_t *)&rcaplong,
+			      sizeof(rcaplong),
+			      /*sense_len*/ SSD_FULL_SIZE,
+			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
+
+	/* Disable freezing the device queue */
+	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
+
+	if (arglist & CAMDD_ARG_ERR_RECOVER)
+		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
+
+	if (cam_send_ccb(cam_dev, ccb) < 0) {
+		warn("error sending READ CAPACITY (16) command");
+
+		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
+				CAM_EPF_ALL, stderr);
+
+		retval = 1;
+		goto bailout;
+	}
+
+	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
+		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
+		goto bailout;
+	}
+
+	maxsector = scsi_8btou64(rcaplong.addr);
+	block_len = scsi_4btoul(rcaplong.length);
+
+rcap_done:
+
+	bzero(&(&ccb->ccb_h)[1],
+	      sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
+
+	ccb->ccb_h.func_code = XPT_PATH_INQ;
+	ccb->ccb_h.flags = CAM_DIR_NONE;
+	ccb->ccb_h.retry_count = 1;
+	
+	if (cam_send_ccb(cam_dev, ccb) < 0) {
+		warn("error sending XPT_PATH_INQ CCB");
+
+		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
+				CAM_EPF_ALL, stderr);
+		goto bailout;
+	}
+
+	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
+
+	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
+			      io_timeout);
+	if (dev == NULL)
+		goto bailout;
+
+	pass_dev = &dev->dev_spec.pass;
+	pass_dev->scsi_dev_type = scsi_dev_type;
+	pass_dev->dev = cam_dev;
+	pass_dev->max_sector = maxsector;
+	pass_dev->block_len = block_len;
+	pass_dev->cpi_maxio = ccb->cpi.maxio;
+	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
+		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
+	dev->sector_size = block_len;
+	dev->max_sector = maxsector;
+	
+
+	/*
+	 * Determine the optimal blocksize to use for this device.
+	 */
+
+	/*
+	 * If the controller has not specified a maximum I/O size,
+	 * just go with 128K as a somewhat conservative value.
+	 */
+	if (pass_dev->cpi_maxio == 0)
+		cpi_maxio = 131072;
+	else
+		cpi_maxio = pass_dev->cpi_maxio;
+
+	/*
+	 * If the controller has a large maximum I/O size, limit it
+	 * to something smaller so that the kernel doesn't have trouble
+	 * allocating buffers to copy data in and out for us.
+	 * XXX KDM this is until we have unmapped I/O support in the kernel.
+	 */
+	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
+
+	/*
+	 * If we weren't able to get a block size for some reason,
+	 * default to 512 bytes.
+	 */
+	block_len = pass_dev->block_len;
+	if (block_len == 0)
+		block_len = 512;
+
+	/*
+	 * Figure out how many blocksize chunks will fit in the
+	 * maximum I/O size.
+	 */
+	pass_numblocks = max_iosize / block_len;
+
+	/*
+	 * And finally, multiple the number of blocks by the LBA
+	 * length to get our maximum block size;
+	 */
+	dev->blocksize = pass_numblocks * block_len;
+
+	if (io_opts->blocksize != 0) {
+		if ((io_opts->blocksize % dev->sector_size) != 0) {
+			warnx("Blocksize %ju for %s is not a multiple of "
+			      "sector size %u", (uintmax_t)io_opts->blocksize, 
+			      dev->device_name, dev->sector_size);
+			goto bailout_error;
+		}
+		dev->blocksize = io_opts->blocksize;
+	}
+	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
+	if (io_opts->queue_depth != 0)
+		dev->target_queue_depth = io_opts->queue_depth;
+
+	if (io_opts->offset != 0) {
+		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
+			warnx("Offset %ju is past the end of device %s",
+			    io_opts->offset, dev->device_name);
+			goto bailout_error;
+		}
+#if 0
+		else if ((io_opts->offset % dev->sector_size) != 0) {
+			warnx("Offset %ju for %s is not a multiple of the "
+			      "sector size %u", io_opts->offset, 
+			      dev->device_name, dev->sector_size);
+			goto bailout_error;
+		}
+		dev->start_offset_bytes = io_opts->offset;
+#endif
+	}
+
+	dev->min_cmd_size = io_opts->min_cmd_size;
+
+	dev->run = camdd_pass_run;
+	dev->fetch = camdd_pass_fetch;
+
+bailout:
+	cam_freeccb(ccb);
+
+	return (dev);
+
+bailout_error:
+	cam_freeccb(ccb);
+
+	camdd_free_dev(dev);
+
+	return (NULL);
+}
+
+void *
+camdd_worker(void *arg)
+{
+	struct camdd_dev *dev = arg;
+	struct camdd_buf *buf;
+	struct timespec ts, *kq_ts;
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = 0;
+
+	pthread_mutex_lock(&dev->mutex);
+
+	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
+
+	for (;;) {
+		struct kevent ke;
+		int retval = 0;
+
+		/*
+		 * XXX KDM check the reorder queue depth?
+		 */
+		if (dev->write_dev == 0) {
+			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
+			uint32_t target_depth = dev->target_queue_depth;
+			uint32_t peer_target_depth =
+			    dev->peer_dev->target_queue_depth;
+			uint32_t peer_blocksize = dev->peer_dev->blocksize;
+
+			camdd_get_depth(dev, &our_depth, &peer_depth,
+					&our_bytes, &peer_bytes);
+
+#if 0
+			while (((our_depth < target_depth)
+			     && (peer_depth < peer_target_depth))
+			    || ((peer_bytes + our_bytes) <
+				 (peer_blocksize * 2))) {
+#endif
+			while (((our_depth + peer_depth) <
+			        (target_depth + peer_target_depth))
+			    || ((peer_bytes + our_bytes) <
+				(peer_blocksize * 3))) {
+
+				retval = camdd_queue(dev, NULL);
+				if (retval == 1)
+					break;
+				else if (retval != 0) {
+					error_exit = 1;
+					goto bailout;
+				}
+
+				camdd_get_depth(dev, &our_depth, &peer_depth,
+						&our_bytes, &peer_bytes);
+			}
+		}
+		/*
+		 * See if we have any I/O that is ready to execute.
+		 */
+		buf = STAILQ_FIRST(&dev->run_queue);
+		if (buf != NULL) {
+			while (dev->target_queue_depth > dev->cur_active_io) {
+				retval = dev->run(dev);
+				if (retval == -1) {
+					dev->flags |= CAMDD_DEV_FLAG_EOF;
+					error_exit = 1;
+					break;
+				} else if (retval != 0) {
+					break;
+				}
+			}
+		}
+
+		/*
+		 * We've reached EOF, or our partner has reached EOF.
+		 */
+		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
+		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
+			if (dev->write_dev != 0) {
+			 	if ((STAILQ_EMPTY(&dev->work_queue))
+				 && (dev->num_run_queue == 0)
+				 && (dev->cur_active_io == 0)) {
+					goto bailout;
+				}
+			} else {
+				/*
+				 * If we're the reader, and the writer
+				 * got EOF, he is already done.  If we got
+				 * the EOF, then we need to wait until
+				 * everything is flushed out for the writer.
+				 */
+				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
+					goto bailout;
+				} else if ((dev->num_peer_work_queue == 0)
+					&& (dev->num_peer_done_queue == 0)
+					&& (dev->cur_active_io == 0)
+					&& (dev->num_run_queue == 0)) {
+					goto bailout;
+				}
+			}
+			/*
+			 * XXX KDM need to do something about the pending
+			 * queue and cleanup resources.
+			 */
+		} 
+
+		if ((dev->write_dev == 0)
+		 && (dev->cur_active_io == 0)
+		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
+			kq_ts = &ts;
+		else
+			kq_ts = NULL;
+
+		/*
+		 * Run kevent to see if there are events to process.
+		 */
+		pthread_mutex_unlock(&dev->mutex);
+		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
+		pthread_mutex_lock(&dev->mutex);
+		if (retval == -1) {
+			warn("%s: error returned from kevent",__func__);
+			goto bailout;
+		} else if (retval != 0) {
+			switch (ke.filter) {
+			case EVFILT_READ:
+				if (dev->fetch != NULL) {
+					retval = dev->fetch(dev);
+					if (retval == -1) {
+						error_exit = 1;
+						goto bailout;
+					}
+				}
+				break;
+			case EVFILT_SIGNAL:
+				/*
+				 * We register for this so we don't get
+				 * an error as a result of a SIGINFO or a
+				 * SIGINT.  It will actually get handled
+				 * by the signal handler.  If we get a
+				 * SIGINT, bail out without printing an
+				 * error message.  Any other signals 
+				 * will result in the error message above.
+				 */
+				if (ke.ident == SIGINT)
+					goto bailout;
+				break;
+			case EVFILT_USER:
+				retval = 0;
+				/*
+				 * Check to see if the other thread has
+				 * queued any I/O for us to do.  (In this
+				 * case we're the writer.)
+				 */
+				for (buf = STAILQ_FIRST(&dev->work_queue);
+				     buf != NULL;
+				     buf = STAILQ_FIRST(&dev->work_queue)) {
+					STAILQ_REMOVE_HEAD(&dev->work_queue,
+							   work_links);
+					retval = camdd_queue(dev, buf);
+					/*
+					 * We keep going unless we get an
+					 * actual error.  If we get EOF, we
+					 * still want to remove the buffers
+					 * from the queue and send the back
+					 * to the reader thread.
+					 */
+					if (retval == -1) {
+						error_exit = 1;
+						goto bailout;
+					} else
+						retval = 0;
+				}
+
+				/*
+				 * Next check to see if the other thread has
+				 * queued any completed buffers back to us.
+				 * (In this case we're the reader.)
+				 */
+				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
+				     buf != NULL;
+				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
+					STAILQ_REMOVE_HEAD(
+					    &dev->peer_done_queue, work_links);
+					dev->num_peer_done_queue--;
+					camdd_peer_done(buf);
+				}
+				break;
+			default:
+				warnx("%s: unknown kevent filter %d",
+				      __func__, ke.filter);
+				break;
+			}
+		}
+	}
+
+bailout:
+
+	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
+
+	/* XXX KDM cleanup resources here? */
+
+	pthread_mutex_unlock(&dev->mutex);
+
+	need_exit = 1;
+	sem_post(&camdd_sem);
+
+	return (NULL);
+}
+
+/*
+ * Simplistic translation of CCB status to our local status.
+ */
+camdd_buf_status
+camdd_ccb_status(union ccb *ccb)
+{
+	camdd_buf_status status = CAMDD_STATUS_NONE;
+	cam_status ccb_status;
+
+	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
+
+	switch (ccb_status) {
+	case CAM_REQ_CMP: {
+		if (ccb->csio.resid == 0) {
+			status = CAMDD_STATUS_OK;
+		} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
+			status = CAMDD_STATUS_SHORT_IO;
+		} else {
+			status = CAMDD_STATUS_EOF;
+		}
+		break;
+	}
+	case CAM_SCSI_STATUS_ERROR: {
+		switch (ccb->csio.scsi_status) {
+		case SCSI_STATUS_OK:
+		case SCSI_STATUS_COND_MET:
+		case SCSI_STATUS_INTERMED:
+		case SCSI_STATUS_INTERMED_COND_MET:
+			status = CAMDD_STATUS_OK;
+			break;
+		case SCSI_STATUS_CMD_TERMINATED:
+		case SCSI_STATUS_CHECK_COND:
+		case SCSI_STATUS_QUEUE_FULL:
+		case SCSI_STATUS_BUSY:
+		case SCSI_STATUS_RESERV_CONFLICT:
+		default:
+			status = CAMDD_STATUS_ERROR;
+			break;
+		}
+		break;
+	}
+	default:
+		status = CAMDD_STATUS_ERROR;
+		break;
+	}
+
+	return (status);
+}
+
+/*
+ * Queue a buffer to our peer's work thread for writing.
+ *
+ * Returns 0 for success, -1 for failure, 1 if the other thread exited.
+ */
+int
+camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
+{
+	struct kevent ke;
+	STAILQ_HEAD(, camdd_buf) local_queue;
+	struct camdd_buf *buf1, *buf2;
+	struct camdd_buf_data *data = NULL;
+	uint64_t peer_bytes_queued = 0;
+	int active = 1;
+	int retval = 0;
+
+	STAILQ_INIT(&local_queue);
+
+	/*
+	 * Since we're the reader, we need to queue our I/O to the writer
+	 * in sequential order in order to make sure it gets written out
+	 * in sequential order.
+	 *
+	 * Check the next expected I/O starting offset.  If this doesn't
+	 * match, put it on the reorder queue.
+	 */
+	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
+
+		/*
+		 * If there is nothing on the queue, there is no sorting
+		 * needed.
+		 */
+		if (STAILQ_EMPTY(&dev->reorder_queue)) {
+			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
+			dev->num_reorder_queue++;
+			goto bailout;
+		}
+
+		/*
+		 * Sort in ascending order by starting LBA.  There should
+		 * be no identical LBAs.
+		 */
+		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
+		     buf1 = buf2) {
+			buf2 = STAILQ_NEXT(buf1, links);
+			if (buf->lba < buf1->lba) {
+				/*
+				 * If we're less than the first one, then
+				 * we insert at the head of the list
+				 * because this has to be the first element
+				 * on the list.
+				 */
+				STAILQ_INSERT_HEAD(&dev->reorder_queue,
+						   buf, links);
+				dev->num_reorder_queue++;
+				break;
+			} else if (buf->lba > buf1->lba) {
+				if (buf2 == NULL) {
+					STAILQ_INSERT_TAIL(&dev->reorder_queue, 
+					    buf, links);
+					dev->num_reorder_queue++;
+					break;
+				} else if (buf->lba < buf2->lba) {
+					STAILQ_INSERT_AFTER(&dev->reorder_queue,
+					    buf1, buf, links);
+					dev->num_reorder_queue++;
+					break;
+				}
+			} else {
+				errx(1, "Found buffers with duplicate LBA %ju!",
+				     buf->lba);
+			}
+		}
+		goto bailout;
+	} else {
+
+		/*
+		 * We're the next expected I/O completion, so put ourselves
+		 * on the local queue to be sent to the writer.  We use
+		 * work_links here so that we can queue this to the 
+		 * peer_work_queue before taking the buffer off of the
+		 * local_queue.
+		 */
+		dev->next_completion_pos_bytes += buf->len;
+		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
+
+		/*
+		 * Go through the reorder queue looking for more sequential
+		 * I/O and add it to the local queue.
+		 */
+		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
+		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
+			/*
+			 * As soon as we see an I/O that is out of sequence,
+			 * we're done.
+			 */
+			if ((buf1->lba * dev->sector_size) !=
+			     dev->next_completion_pos_bytes)
+				break;
+
+			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
+			dev->num_reorder_queue--;
+			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
+			dev->next_completion_pos_bytes += buf1->len;
+		}
+	}
+
+	/*
+	 * Setup the event to let the other thread know that it has work
+	 * pending.
+	 */
+	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
+	       NOTE_TRIGGER, 0, NULL);
+
+	/*
+	 * Put this on our shadow queue so that we know what we've queued
+	 * to the other thread.
+	 */
+	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
+		if (buf1->buf_type != CAMDD_BUF_DATA) {
+			errx(1, "%s: should have a data buffer, not an "
+			    "indirect buffer", __func__);
+		}
+		data = &buf1->buf_type_spec.data;
+
+		/*
+		 * We only need to send one EOF to the writer, and don't
+		 * need to continue sending EOFs after that.
+		 */
+		if (buf1->status == CAMDD_STATUS_EOF) {
+			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
+				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
+				    work_links);
+				camdd_release_buf(buf1);
+				retval = 1;
+				continue;
+			}
+			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
+		}
+
+
+		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
+		peer_bytes_queued += (data->fill_len - data->resid);
+		dev->peer_bytes_queued += (data->fill_len - data->resid);
+		dev->num_peer_work_queue++;
+	}
+
+	if (STAILQ_FIRST(&local_queue) == NULL)
+		goto bailout;
+
+	/*
+	 * Drop our mutex and pick up the other thread's mutex.  We need to
+	 * do this to avoid deadlocks.
+	 */
+	pthread_mutex_unlock(&dev->mutex);
+	pthread_mutex_lock(&dev->peer_dev->mutex);
+
+	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
+		/*
+		 * Put the buffers on the other thread's incoming work queue.
+		 */
+		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
+		     buf1 = STAILQ_FIRST(&local_queue)) {
+			STAILQ_REMOVE_HEAD(&local_queue, work_links);
+			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
+					   work_links);
+		}
+		/*
+		 * Send an event to the other thread's kqueue to let it know
+		 * that there is something on the work queue.
+		 */
+		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
+		if (retval == -1)
+			warn("%s: unable to add peer work_queue kevent",
+			     __func__);
+		else
+			retval = 0;
+	} else
+		active = 0;
+
+	pthread_mutex_unlock(&dev->peer_dev->mutex);
+	pthread_mutex_lock(&dev->mutex);
+
+	/*
+	 * If the other side isn't active, run through the queue and
+	 * release all of the buffers.
+	 */
+	if (active == 0) {
+		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
+		     buf1 = STAILQ_FIRST(&local_queue)) {
+			STAILQ_REMOVE_HEAD(&local_queue, work_links);
+			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
+				      links);
+			dev->num_peer_work_queue--;
+			camdd_release_buf(buf1);
+		}
+		dev->peer_bytes_queued -= peer_bytes_queued;
+		retval = 1;
+	}
+
+bailout:
+	return (retval);
+}
+
+/*
+ * Return a buffer to the reader thread when we have completed writing it.
+ */
+int
+camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
+{
+	struct kevent ke;
+	int retval = 0;
+
+	/*
+	 * Setup the event to let the other thread know that we have
+	 * completed a buffer.
+	 */
+	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
+	       NOTE_TRIGGER, 0, NULL);
+
+	/*
+	 * Drop our lock and acquire the other thread's lock before
+	 * manipulating 
+	 */
+	pthread_mutex_unlock(&dev->mutex);
+	pthread_mutex_lock(&dev->peer_dev->mutex);
+
+	/*
+	 * Put the buffer on the reader thread's peer done queue now that
+	 * we have completed it.
+	 */
+	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
+			   work_links);
+	dev->peer_dev->num_peer_done_queue++;
+
+	/*
+	 * Send an event to the peer thread to let it know that we've added
+	 * something to its peer done queue.
+	 */
+	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
+	if (retval == -1)
+		warn("%s: unable to add peer_done_queue kevent", __func__);
+	else
+		retval = 0;
+
+	/*
+	 * Drop the other thread's lock and reacquire ours.
+	 */
+	pthread_mutex_unlock(&dev->peer_dev->mutex);
+	pthread_mutex_lock(&dev->mutex);
+
+	return (retval);
+}
+
+/*
+ * Free a buffer that was written out by the writer thread and returned to
+ * the reader thread.
+ */
+void
+camdd_peer_done(struct camdd_buf *buf)
+{
+	struct camdd_dev *dev;
+	struct camdd_buf_data *data;
+
+	dev = buf->dev;
+	if (buf->buf_type != CAMDD_BUF_DATA) {
+		errx(1, "%s: should have a data buffer, not an "
+		    "indirect buffer", __func__);
+	}
+
+	data = &buf->buf_type_spec.data;
+
+	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
+	dev->num_peer_work_queue--;
+	dev->peer_bytes_queued -= (data->fill_len - data->resid);
+
+	if (buf->status == CAMDD_STATUS_EOF)
+		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
+
+	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
+}
+
+/*
+ * Assumes caller holds the lock for this device.
+ */
+void
+camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
+		   int *error_count)
+{
+	int retval = 0;
+
+	/*
+	 * If we're the reader, we need to send the completed I/O
+	 * to the writer.  If we're the writer, we need to just
+	 * free up resources, or let the reader know if we've
+	 * encountered an error.
+	 */
+	if (dev->write_dev == 0) {
+		retval = camdd_queue_peer_buf(dev, buf);
+		if (retval != 0)
+			(*error_count)++;
+	} else {
+		struct camdd_buf *tmp_buf, *next_buf;
+
+		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
+				    next_buf) {
+			struct camdd_buf *src_buf;
+			struct camdd_buf_indirect *indirect;
+
+			STAILQ_REMOVE(&buf->src_list, tmp_buf,
+				      camdd_buf, src_links);
+
+			tmp_buf->status = buf->status;
+
+			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
+				camdd_complete_peer_buf(dev, tmp_buf);
+				continue;
+			}
+
+			indirect = &tmp_buf->buf_type_spec.indirect;
+			src_buf = indirect->src_buf;
+			src_buf->refcount--;
+			/*
+			 * XXX KDM we probably need to account for
+			 * exactly how many bytes we were able to
+			 * write.  Allocate the residual to the
+			 * first N buffers?  Or just track the
+			 * number of bytes written?  Right now the reader
+			 * doesn't do anything with a residual.
+			 */
+			src_buf->status = buf->status;
+			if (src_buf->refcount <= 0)
+				camdd_complete_peer_buf(dev, src_buf);
+			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
+					   tmp_buf, links);
+		}
+
+		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
+	}
+}
+
+/*
+ * Fetch all completed commands from the pass(4) device.
+ *
+ * Returns the number of commands received, or -1 if any of the commands
+ * completed with an error.  Returns 0 if no commands are available.
+ */
+int
+camdd_pass_fetch(struct camdd_dev *dev)
+{
+	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
+	union ccb ccb;
+	int retval = 0, num_fetched = 0, error_count = 0;
+
+	pthread_mutex_unlock(&dev->mutex);
+	/*
+	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
+	 */
+	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
+		struct camdd_buf *buf;
+		struct camdd_buf_data *data;
+		cam_status ccb_status;
+		union ccb *buf_ccb;
+
+		buf = ccb.ccb_h.ccb_buf;
+		data = &buf->buf_type_spec.data;
+		buf_ccb = &data->ccb;
+
+		num_fetched++;
+
+		/*
+		 * Copy the CCB back out so we get status, sense data, etc.
+		 */
+		bcopy(&ccb, buf_ccb, sizeof(ccb));
+
+		pthread_mutex_lock(&dev->mutex);
+
+		/*
+		 * We're now done, so take this off the active queue.
+		 */
+		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
+		dev->cur_active_io--;
+
+		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
+		if (ccb_status != CAM_REQ_CMP) {
+			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
+					CAM_EPF_ALL, stderr);
+		}
+
+		data->resid = ccb.csio.resid;
+		dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
+
+		if (buf->status == CAMDD_STATUS_NONE)
+			buf->status = camdd_ccb_status(&ccb);
+		if (buf->status == CAMDD_STATUS_ERROR)
+			error_count++;
+		else if (buf->status == CAMDD_STATUS_EOF) {
+			/*
+			 * Once we queue this buffer to our partner thread,
+			 * he will know that we've hit EOF.
+			 */
+			dev->flags |= CAMDD_DEV_FLAG_EOF;
+		}
+
+		camdd_complete_buf(dev, buf, &error_count);
+
+		/*
+		 * Unlock in preparation for the ioctl call.
+		 */
+		pthread_mutex_unlock(&dev->mutex);
+	}
+
+	pthread_mutex_lock(&dev->mutex);
+
+	if (error_count > 0)
+		return (-1);
+	else
+		return (num_fetched);
+}
+
+/*
+ * Returns -1 for error, 0 for success/continue, and 1 for resource
+ * shortage/stop processing.
+ */
+int
+camdd_file_run(struct camdd_dev *dev)
+{
+	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
+	struct camdd_buf_data *data;
+	struct camdd_buf *buf;
+	off_t io_offset;
+	int retval = 0, write_dev = dev->write_dev;
+	int error_count = 0, no_resources = 0, double_buf_needed = 0;
+	uint32_t num_sectors = 0, db_len = 0;
+
+	buf = STAILQ_FIRST(&dev->run_queue);
+	if (buf == NULL) {
+		no_resources = 1;
+		goto bailout;
+	} else if ((dev->write_dev == 0)
+		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
+				  CAMDD_DEV_FLAG_EOF_SENT))) {
+		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
+		dev->num_run_queue--;
+		buf->status = CAMDD_STATUS_EOF;
+		error_count++;
+		goto bailout;
+	}
+
+	/*
+	 * If we're writing, we need to go through the source buffer list
+	 * and create an S/G list.
+	 */
+	if (write_dev != 0) {
+		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
+		    dev->sector_size, &num_sectors, &double_buf_needed);
+		if (retval != 0) {
+			no_resources = 1;
+			goto bailout;
+		}
+	}
+
+	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
+	dev->num_run_queue--;
+
+	data = &buf->buf_type_spec.data;
+
+	/*
+	 * pread(2) and pwrite(2) offsets are byte offsets.
+	 */
+	io_offset = buf->lba * dev->sector_size;
+
+	/*
+	 * Unlock the mutex while we read or write.
+	 */
+	pthread_mutex_unlock(&dev->mutex);
+
+	/*
+	 * Note that we don't need to double buffer if we're the reader
+	 * because in that case, we have allocated a single buffer of
+	 * sufficient size to do the read.  This copy is necessary on
+	 * writes because if one of the components of the S/G list is not
+	 * a sector size multiple, the kernel will reject the write.  This
+	 * is unfortunate but not surprising.  So this will make sure that
+	 * we're using a single buffer that is a multiple of the sector size.
+	 */
+	if ((double_buf_needed != 0)
+	 && (data->sg_count > 1)
+	 && (write_dev != 0)) {
+		uint32_t cur_offset;
+		int i;
+
+		if (file_dev->tmp_buf == NULL)
+			file_dev->tmp_buf = calloc(dev->blocksize, 1);
+		if (file_dev->tmp_buf == NULL) {
+			buf->status = CAMDD_STATUS_ERROR;
+			error_count++;
+			goto bailout;
+		}
+		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
+			bcopy(data->iovec[i].iov_base,
+			    &file_dev->tmp_buf[cur_offset],
+			    data->iovec[i].iov_len);
+			cur_offset += data->iovec[i].iov_len;
+		}
+		db_len = cur_offset;
+	}
+
+	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
+		if (write_dev == 0) {
+			/*
+			 * XXX KDM is there any way we would need a S/G
+			 * list here?
+			 */
+			retval = pread(file_dev->fd, data->buf,
+			    buf->len, io_offset);
+		} else {
+			if (double_buf_needed != 0) {
+				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
+				    db_len, io_offset);
+			} else if (data->sg_count == 0) {
+				retval = pwrite(file_dev->fd, data->buf,
+				    data->fill_len, io_offset);
+			} else {
+				retval = pwritev(file_dev->fd, data->iovec,
+				    data->sg_count, io_offset);
+			}
+		}
+	} else {
+		if (write_dev == 0) {
+			/*
+			 * XXX KDM is there any way we would need a S/G
+			 * list here?
+			 */
+			retval = read(file_dev->fd, data->buf, buf->len);
+		} else {
+			if (double_buf_needed != 0) {
+				retval = write(file_dev->fd, file_dev->tmp_buf,
+				    db_len);
+			} else if (data->sg_count == 0) {
+				retval = write(file_dev->fd, data->buf,
+				    data->fill_len);
+			} else {
+				retval = writev(file_dev->fd, data->iovec,
+				    data->sg_count);
+			}
+		}
+	}
+
+	/* We're done, re-acquire the lock */
+	pthread_mutex_lock(&dev->mutex);
+
+	if (retval >= (ssize_t)data->fill_len) {
+		/*
+		 * If the bytes transferred is more than the request size,
+		 * that indicates an overrun, which should only happen at
+		 * the end of a transfer if we have to round up to a sector
+		 * boundary.
+		 */
+		if (buf->status == CAMDD_STATUS_NONE)
+			buf->status = CAMDD_STATUS_OK;
+		data->resid = 0;
+		dev->bytes_transferred += retval;
+	} else if (retval == -1) {
+		warn("Error %s %s", (write_dev) ? "writing to" :
+		    "reading from", file_dev->filename);
+
+		buf->status = CAMDD_STATUS_ERROR;
+		data->resid = data->fill_len;
+		error_count++;
+
+		if (dev->debug == 0)
+			goto bailout;
+
+		if ((double_buf_needed != 0)
+		 && (write_dev != 0)) {
+			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
+			    "offset %ju\n", __func__, file_dev->fd,
+			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
+			    (uintmax_t)io_offset);
+		} else if (data->sg_count == 0) {
+			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
+			    "offset %ju\n", __func__, file_dev->fd, data->buf,
+			    data->fill_len, (uintmax_t)buf->lba,
+			    (uintmax_t)io_offset);
+		} else {
+			int i;
+
+			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
+			    "offset %ju\n", __func__, file_dev->fd, 
+			    data->fill_len, (uintmax_t)buf->lba,
+			    (uintmax_t)io_offset);
+
+			for (i = 0; i < data->sg_count; i++) {
+				fprintf(stderr, "index %d ptr %p len %zu\n",
+				    i, data->iovec[i].iov_base,
+				    data->iovec[i].iov_len);
+			}
+		}
+	} else if (retval == 0) {
+		buf->status = CAMDD_STATUS_EOF;
+		if (dev->debug != 0)
+			printf("%s: got EOF from %s!\n", __func__,
+			    file_dev->filename);
+		data->resid = data->fill_len;
+		error_count++;
+	} else if (retval < (ssize_t)data->fill_len) {
+		if (buf->status == CAMDD_STATUS_NONE)
+			buf->status = CAMDD_STATUS_SHORT_IO;
+		data->resid = data->fill_len - retval;
+		dev->bytes_transferred += retval;
+	}
+
+bailout:
+	if (buf != NULL) {
+		if (buf->status == CAMDD_STATUS_EOF) {
+			struct camdd_buf *buf2;
+			dev->flags |= CAMDD_DEV_FLAG_EOF;
+			STAILQ_FOREACH(buf2, &dev->run_queue, links)
+				buf2->status = CAMDD_STATUS_EOF;
+		}
+
+		camdd_complete_buf(dev, buf, &error_count);
+	}
+
+	if (error_count != 0)
+		return (-1);
+	else if (no_resources != 0)
+		return (1);
+	else
+		return (0);
+}
+
+/*
+ * Execute one command from the run queue.  Returns 0 for success, 1 for
+ * stop processing, and -1 for error.
+ */
+int
+camdd_pass_run(struct camdd_dev *dev)
+{
+	struct camdd_buf *buf = NULL;
+	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
+	struct camdd_buf_data *data;
+	uint32_t num_blocks, sectors_used = 0;
+	union ccb *ccb;
+	int retval = 0, is_write = dev->write_dev;
+	int double_buf_needed = 0;
+
+	buf = STAILQ_FIRST(&dev->run_queue);
+	if (buf == NULL) {
+		retval = 1;
+		goto bailout;
+	}
+
+	/*
+	 * If we're writing, we need to go through the source buffer list
+	 * and create an S/G list.
+	 */
+	if (is_write != 0) {
+		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
+		    &sectors_used, &double_buf_needed);
+		if (retval != 0) {
+			retval = -1;
+			goto bailout;
+		}
+	}
+
+	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
+	dev->num_run_queue--;
+
+	data = &buf->buf_type_spec.data;
+
+	ccb = &data->ccb;
+	bzero(&(&ccb->ccb_h)[1],
+	      sizeof(struct ccb_scsiio) - sizeof(struct ccb_hdr));
+
+	/*
+	 * In almost every case the number of blocks should be the device
+	 * block size.  The exception may be at the end of an I/O stream
+	 * for a partial block or at the end of a device.
+	 */
+	if (is_write != 0)
+		num_blocks = sectors_used;
+	else
+		num_blocks = data->fill_len / pass_dev->block_len;
+
+	scsi_read_write(&ccb->csio,
+			/*retries*/ dev->retry_count,
+			/*cbfcnp*/ NULL,
+			/*tag_action*/ MSG_SIMPLE_Q_TAG,
+			/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
+				   SCSI_RW_WRITE,
+			/*byte2*/ 0,
+			/*minimum_cmd_size*/ dev->min_cmd_size,
+			/*lba*/ buf->lba,
+			/*block_count*/ num_blocks,
+			/*data_ptr*/ (data->sg_count != 0) ?
+				     (uint8_t *)data->segs : data->buf,
+			/*dxfer_len*/ (num_blocks * pass_dev->block_len),
+			/*sense_len*/ SSD_FULL_SIZE,
+			/*timeout*/ dev->io_timeout);
+
+	/* Disable freezing the device queue */
+	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
+
+	if (dev->retry_count != 0)
+		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
+
+	if (data->sg_count != 0) {
+		ccb->csio.sglist_cnt = data->sg_count;
+		ccb->ccb_h.flags |= CAM_DATA_SG;
+	}
+
+	/*
+	 * Store a pointer to the buffer in the CCB.  The kernel will
+	 * restore this when we get it back, and we'll use it to identify
+	 * the buffer this CCB came from.
+	 */
+	ccb->ccb_h.ccb_buf = buf;
+
+	/*
+	 * Unlock our mutex in preparation for issuing the ioctl.
+	 */
+	pthread_mutex_unlock(&dev->mutex);
+	/*
+	 * Queue the CCB to the pass(4) driver.
+	 */
+	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
+		pthread_mutex_lock(&dev->mutex);
+
+		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
+		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
+		warn("%s: CCB address is %p", __func__, ccb);
+		retval = -1;
+
+		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
+	} else {
+		pthread_mutex_lock(&dev->mutex);
+
+		dev->cur_active_io++;
+		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
+	}
+
+bailout:
+	return (retval);
+}
+
+int
+camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
+{
+	struct camdd_dev_pass *pass_dev;
+	uint32_t num_blocks;
+	int retval = 0;
+
+	pass_dev = &dev->dev_spec.pass;
+
+	*lba = dev->next_io_pos_bytes / dev->sector_size;
+	*len = dev->blocksize;
+	num_blocks = *len / dev->sector_size;
+
+	/*
+	 * If max_sector is 0, then we have no set limit.  This can happen
+	 * if we're writing to a file in a filesystem, or reading from
+	 * something like /dev/zero.
+	 */
+	if ((dev->max_sector != 0)
+	 || (dev->sector_io_limit != 0)) {
+		uint64_t max_sector;
+
+		if ((dev->max_sector != 0)
+		 && (dev->sector_io_limit != 0)) 
+			max_sector = min(dev->sector_io_limit, dev->max_sector);
+		else if (dev->max_sector != 0)
+			max_sector = dev->max_sector;
+		else
+			max_sector = dev->sector_io_limit;
+
+
+		/*
+		 * Check to see whether we're starting off past the end of
+		 * the device.  If so, we need to just send an EOF 	
+		 * notification to the writer.
+		 */
+		if (*lba > max_sector) {
+			*len = 0;
+			retval = 1;
+		} else if (((*lba + num_blocks) > max_sector + 1)
+			|| ((*lba + num_blocks) < *lba)) {
+			/*
+			 * If we get here (but pass the first check), we
+			 * can trim the request length down to go to the
+			 * end of the device.
+			 */
+			num_blocks = (max_sector + 1) - *lba;
+			*len = num_blocks * dev->sector_size;
+			retval = 1;
+		}
+	}
+
+	dev->next_io_pos_bytes += *len;
+
+	return (retval);
+}
+
+/*
+ * Returns 0 for success, 1 for EOF detected, and -1 for failure.
+ */
+int
+camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
+{
+	struct camdd_buf *buf = NULL;
+	struct camdd_buf_data *data;
+	struct camdd_dev_pass *pass_dev;
+	size_t new_len;
+	struct camdd_buf_data *rb_data;
+	int is_write = dev->write_dev;
+	int eof_flush_needed = 0;
+	int retval = 0;
+	int error;
+
+	pass_dev = &dev->dev_spec.pass;
+
+	/*
+	 * If we've gotten EOF or our partner has, we should not continue
+	 * queueing I/O.  If we're a writer, though, we should continue
+	 * to write any buffers that don't have EOF status.
+	 */
+	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
+	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
+	  && (is_write == 0))) {
+		/*
+		 * Tell the worker thread that we have seen EOF.
+		 */
+		retval = 1;
+
+		/*
+		 * If we're the writer, send the buffer back with EOF status.
+		 */
+		if (is_write) {
+			read_buf->status = CAMDD_STATUS_EOF;
+			
+			error = camdd_complete_peer_buf(dev, read_buf);
+		}
+		goto bailout;
+	}
+
+	if (is_write == 0) {
+		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
+		if (buf == NULL) {
+			retval = -1;
+			goto bailout;
+		}
+		data = &buf->buf_type_spec.data;
+
+		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
+		if (retval != 0) {
+			buf->status = CAMDD_STATUS_EOF;
+
+		 	if ((buf->len == 0)
+			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
+			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
+				camdd_release_buf(buf);
+				goto bailout;
+			}
+			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
+		}
+
+		data->fill_len = buf->len;
+		data->src_start_offset = buf->lba * dev->sector_size;
+
+		/*
+		 * Put this on the run queue.
+		 */
+		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
+		dev->num_run_queue++;
+
+		/* We're done. */
+		goto bailout;
+	}
+
+	/*
+	 * Check for new EOF status from the reader.
+	 */
+	if ((read_buf->status == CAMDD_STATUS_EOF)
+	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
+		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
+		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
+		 && (read_buf->len == 0)) {
+			camdd_complete_peer_buf(dev, read_buf);
+			retval = 1;
+			goto bailout;
+		} else
+			eof_flush_needed = 1;
+	}
+
+	/*
+	 * See if we have a buffer we're composing with pieces from our
+	 * partner thread.
+	 */
+	buf = STAILQ_FIRST(&dev->pending_queue);
+	if (buf == NULL) {
+		uint64_t lba;
+		ssize_t len;
+
+		retval = camdd_get_next_lba_len(dev, &lba, &len);
+		if (retval != 0) {
+			read_buf->status = CAMDD_STATUS_EOF;
+
+			if (len == 0) {
+				dev->flags |= CAMDD_DEV_FLAG_EOF;
+				error = camdd_complete_peer_buf(dev, read_buf);
+				goto bailout;
+			}
+		}
+
+		/*
+		 * If we don't have a pending buffer, we need to grab a new
+		 * one from the free list or allocate another one.
+		 */
+		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
+		if (buf == NULL) {
+			retval = 1;
+			goto bailout;
+		}
+
+		buf->lba = lba;
+		buf->len = len;
+
+		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
+		dev->num_pending_queue++;
+	}
+
+	data = &buf->buf_type_spec.data;
+
+	rb_data = &read_buf->buf_type_spec.data;
+
+	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
+	 && (dev->debug != 0)) {
+		printf("%s: WARNING: reader offset %#jx != expected offset "
+		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
+		    (uintmax_t)dev->next_peer_pos_bytes);
+	}
+	dev->next_peer_pos_bytes = rb_data->src_start_offset +
+	    (rb_data->fill_len - rb_data->resid);
+
+	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
+	if (new_len < buf->len) {
+		/*
+		 * There are three cases here:
+		 * 1. We need more data to fill up a block, so we put 
+		 *    this I/O on the queue and wait for more I/O.
+		 * 2. We have a pending buffer in the queue that is
+		 *    smaller than our blocksize, but we got an EOF.  So we
+		 *    need to go ahead and flush the write out.
+		 * 3. We got an error.
+		 */
+
+		/*
+		 * Increment our fill length.
+		 */
+		data->fill_len += (rb_data->fill_len - rb_data->resid);
+
+		/*
+		 * Add the new read buffer to the list for writing.
+		 */
+		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
+
+		/* Increment the count */
+		buf->src_count++;
+
+		if (eof_flush_needed == 0) {
+			/*
+			 * We need to exit, because we don't have enough
+			 * data yet.
+			 */
+			goto bailout;
+		} else {
+			/*
+			 * Take the buffer off of the pending queue.
+			 */
+			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
+				      links);
+			dev->num_pending_queue--;
+
+			/*
+			 * If we need an EOF flush, but there is no data
+			 * to flush, go ahead and return this buffer.
+			 */
+			if (data->fill_len == 0) {
+				camdd_complete_buf(dev, buf, /*error_count*/0);
+				retval = 1;
+				goto bailout;
+			}
+
+			/*
+			 * Put this on the next queue for execution.
+			 */
+			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
+			dev->num_run_queue++;
+		}
+	} else if (new_len == buf->len) {
+		/*
+		 * We have enough data to completey fill one block,
+		 * so we're ready to issue the I/O.
+		 */
+
+		/*
+		 * Take the buffer off of the pending queue.
+		 */
+		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
+		dev->num_pending_queue--;
+
+		/*
+		 * Add the new read buffer to the list for writing.
+		 */
+		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
+
+		/* Increment the count */
+		buf->src_count++;
+
+		/*
+		 * Increment our fill length.
+		 */
+		data->fill_len += (rb_data->fill_len - rb_data->resid);
+
+		/*
+		 * Put this on the next queue for execution.
+		 */
+		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
+		dev->num_run_queue++;
+	} else {
+		struct camdd_buf *idb;
+		struct camdd_buf_indirect *indirect;
+		uint32_t len_to_go, cur_offset;
+
+		
+		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
+		if (idb == NULL) {
+			retval = 1;
+			goto bailout;
+		}
+		indirect = &idb->buf_type_spec.indirect;
+		indirect->src_buf = read_buf;
+		read_buf->refcount++;
+		indirect->offset = 0;
+		indirect->start_ptr = rb_data->buf;
+		/*
+		 * We've already established that there is more
+		 * data in read_buf than we have room for in our
+		 * current write request.  So this particular chunk
+		 * of the request should just be the remainder
+		 * needed to fill up a block.
+		 */
+		indirect->len = buf->len - (data->fill_len - data->resid);
+
+		camdd_buf_add_child(buf, idb);
+
+		/*
+		 * This buffer is ready to execute, so we can take
+		 * it off the pending queue and put it on the run
+		 * queue.
+		 */
+		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
+			      links);
+		dev->num_pending_queue--;
+		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
+		dev->num_run_queue++;
+
+		cur_offset = indirect->offset + indirect->len;
+
+		/*
+		 * The resulting I/O would be too large to fit in
+		 * one block.  We need to split this I/O into
+		 * multiple pieces.  Allocate as many buffers as needed.
+		 */
+		for (len_to_go = rb_data->fill_len - rb_data->resid -
+		     indirect->len; len_to_go > 0;) {
+			struct camdd_buf *new_buf;
+			struct camdd_buf_data *new_data;
+			uint64_t lba;
+			ssize_t len;
+
+			retval = camdd_get_next_lba_len(dev, &lba, &len);
+			if ((retval != 0)
+			 && (len == 0)) {
+				/*
+				 * The device has already been marked
+				 * as EOF, and there is no space left.
+				 */
+				goto bailout;
+			}
+
+			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
+			if (new_buf == NULL) {
+				retval = 1;
+				goto bailout;
+			}
+
+			new_buf->lba = lba;
+			new_buf->len = len;
+
+			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
+			if (idb == NULL) {
+				retval = 1;
+				goto bailout;
+			}
+
+			indirect = &idb->buf_type_spec.indirect;
+
+			indirect->src_buf = read_buf;
+			read_buf->refcount++;
+			indirect->offset = cur_offset;
+			indirect->start_ptr = rb_data->buf + cur_offset;
+			indirect->len = min(len_to_go, new_buf->len);
+#if 0
+			if (((indirect->len % dev->sector_size) != 0)
+			 || ((indirect->offset % dev->sector_size) != 0)) {
+				warnx("offset %ju len %ju not aligned with "
+				    "sector size %u", indirect->offset,
+				    (uintmax_t)indirect->len, dev->sector_size);
+			}
+#endif
+			cur_offset += indirect->len;
+			len_to_go -= indirect->len;
+
+			camdd_buf_add_child(new_buf, idb);
+
+			new_data = &new_buf->buf_type_spec.data;
+
+			if ((new_data->fill_len == new_buf->len)
+			 || (eof_flush_needed != 0)) {
+				STAILQ_INSERT_TAIL(&dev->run_queue,
+						   new_buf, links);
+				dev->num_run_queue++;
+			} else if (new_data->fill_len < buf->len) {
+				STAILQ_INSERT_TAIL(&dev->pending_queue,
+					   	new_buf, links);
+				dev->num_pending_queue++;
+			} else {
+				warnx("%s: too much data in new "
+				      "buffer!", __func__);
+				retval = 1;
+				goto bailout;
+			}
+		}
+	}
+
+bailout:
+	return (retval);
+}
+
+void
+camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
+		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
+{
+	*our_depth = dev->cur_active_io + dev->num_run_queue;
+	if (dev->num_peer_work_queue >
+	    dev->num_peer_done_queue)
+		*peer_depth = dev->num_peer_work_queue -
+			      dev->num_peer_done_queue;
+	else
+		*peer_depth = 0;
+	*our_bytes = *our_depth * dev->blocksize;
+	*peer_bytes = dev->peer_bytes_queued;
+}
+
+void
+camdd_sig_handler(int sig)
+{
+	if (sig == SIGINFO)
+		need_status = 1;
+	else {
+		need_exit = 1;
+		error_exit = 1;
+	}
+
+	sem_post(&camdd_sem);
+}
+
+void
+camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev, 
+		   struct timespec *start_time)
+{
+	struct timespec done_time;
+	uint64_t total_ns;
+	long double mb_sec, total_sec;
+	int error = 0;
+
+	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
+	if (error != 0) {
+		warn("Unable to get done time");
+		return;
+	}
+
+	timespecsub(&done_time, start_time);
+	
+	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
+	total_sec = total_ns;
+	total_sec /= 1000000000;
+
+	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
+		"%.4Lf seconds elapsed\n",
+		(uintmax_t)camdd_dev->bytes_transferred,
+		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
+		camdd_dev->device_name,
+		(uintmax_t)other_dev->bytes_transferred,
+		(other_dev->write_dev == 0) ? "read from" : "written to",
+		other_dev->device_name, total_sec);
+
+	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
+	mb_sec /= 1024 * 1024;
+	mb_sec *= 1000000000;
+	mb_sec /= total_ns;
+	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
+}
+
+int
+camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
+	 int retry_count, int timeout)
+{
+	char *device = NULL;
+	struct cam_device *new_cam_dev = NULL;
+	struct camdd_dev *devs[2];
+	struct timespec start_time;
+	pthread_t threads[2];
+	int unit = 0;
+	int error = 0;
+	int i;
+
+	if (num_io_opts != 2) {
+		warnx("Must have one input and one output path");
+		error = 1;
+		goto bailout;
+	}
+
+	bzero(devs, sizeof(devs));
+
+	for (i = 0; i < num_io_opts; i++) {
+		switch (io_opts[i].dev_type) {
+		case CAMDD_DEV_PASS: {
+			camdd_argmask new_arglist = CAMDD_ARG_NONE;
+			int bus = 0, target = 0, lun = 0;
+			char name[30];
+			int rv;
+
+			if (isdigit(io_opts[i].dev_name[0])) {
+				/* device specified as bus:target[:lun] */
+				rv = parse_btl(io_opts[i].dev_name, &bus,
+				    &target, &lun, &new_arglist);
+				if (rv < 2) {
+					warnx("numeric device specification "
+					     "must be either bus:target, or "
+					     "bus:target:lun");
+					error = 1;
+					goto bailout;
+				}
+				/* default to 0 if lun was not specified */
+				if ((new_arglist & CAMDD_ARG_LUN) == 0) {
+					lun = 0;
+					new_arglist |= CAMDD_ARG_LUN;
+				}
+			} else {
+				if (cam_get_device(io_opts[i].dev_name, name,
+						   sizeof name, &unit) == -1) {
+					warnx("%s", cam_errbuf);
+					error = 1;
+					goto bailout;
+				}
+				device = strdup(name);
+				new_arglist |= CAMDD_ARG_DEVICE |CAMDD_ARG_UNIT;
+			}
+
+			if (new_arglist & (CAMDD_ARG_BUS | CAMDD_ARG_TARGET))
+				new_cam_dev = cam_open_btl(bus, target, lun,
+				    O_RDWR, NULL);
+			else
+				new_cam_dev = cam_open_spec_device(device, unit,
+				    O_RDWR, NULL);
+			if (new_cam_dev == NULL) {
+				warnx("%s", cam_errbuf);
+				error = 1;
+				goto bailout;
+			}
+
+			devs[i] = camdd_probe_pass(new_cam_dev,
+			    /*io_opts*/ &io_opts[i],
+			    CAMDD_ARG_ERR_RECOVER, 
+			    /*probe_retry_count*/ 3,
+			    /*probe_timeout*/ 5000,
+			    /*io_retry_count*/ retry_count,
+			    /*io_timeout*/ timeout);
+			if (devs[i] == NULL) {
+				warn("Unable to probe device %s%u",
+				     new_cam_dev->device_name,
+				     new_cam_dev->dev_unit_num);
+				error = 1;
+				goto bailout;
+			}
+			break;
+		}
+		case CAMDD_DEV_FILE: {
+			int fd = -1;
+
+			if (io_opts[i].dev_name[0] == '-') {
+				if (io_opts[i].write_dev != 0)
+					fd = STDOUT_FILENO;
+				else
+					fd = STDIN_FILENO;
+			} else {
+				if (io_opts[i].write_dev != 0) {
+					fd = open(io_opts[i].dev_name,
+					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
+				} else {
+					fd = open(io_opts[i].dev_name,
+					    O_RDONLY);
+				}
+			}
+			if (fd == -1) {
+				warn("error opening file %s",
+				    io_opts[i].dev_name);
+				error = 1;
+				goto bailout;
+			}
+
+			devs[i] = camdd_probe_file(fd, &io_opts[i],
+			    retry_count, timeout);
+			if (devs[i] == NULL) {
+				error = 1;
+				goto bailout;
+			}
+
+			break;
+		}
+		default:
+			warnx("Unknown device type %d (%s)",
+			    io_opts[i].dev_type, io_opts[i].dev_name);
+			error = 1;
+			goto bailout;
+			break; /*NOTREACHED */
+		}
+
+		devs[i]->write_dev = io_opts[i].write_dev;
+
+		devs[i]->start_offset_bytes = io_opts[i].offset;
+
+		if (max_io != 0) {
+			devs[i]->sector_io_limit =
+			    (devs[i]->start_offset_bytes /
+			    devs[i]->sector_size) +
+			    (max_io / devs[i]->sector_size) - 1;
+			devs[i]->sector_io_limit =
+			    (devs[i]->start_offset_bytes /
+			    devs[i]->sector_size) +
+			    (max_io / devs[i]->sector_size) - 1;
+		}
+
+		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
+		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
+	}
+
+	devs[0]->peer_dev = devs[1];
+	devs[1]->peer_dev = devs[0];
+	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
+	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
+
+	sem_init(&camdd_sem, /*pshared*/ 0, 0);
+
+	signal(SIGINFO, camdd_sig_handler);
+	signal(SIGINT, camdd_sig_handler);
+
+	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
+	if (error != 0) {
+		warn("Unable to get start time");
+		goto bailout;
+	}
+
+	for (i = 0; i < num_io_opts; i++) {
+		error = pthread_create(&threads[i], NULL, camdd_worker,
+				       (void *)devs[i]);
+		if (error != 0) {
+			warnc(error, "pthread_create() failed");
+			goto bailout;
+		}
+	}
+
+	for (;;) {
+		if ((sem_wait(&camdd_sem) == -1)
+		 || (need_exit != 0)) {
+			struct kevent ke;
+
+			for (i = 0; i < num_io_opts; i++) {
+				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
+				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
+
+				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
+
+				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
+						NULL);
+				if (error == -1)
+					warn("%s: unable to wake up thread",
+					    __func__);
+				error = 0;
+			}
+			break;
+		} else if (need_status != 0) {
+			camdd_print_status(devs[0], devs[1], &start_time);
+			need_status = 0;
+		}
+	} 
+	for (i = 0; i < num_io_opts; i++) {
+		pthread_join(threads[i], NULL);
+	}
+
+	camdd_print_status(devs[0], devs[1], &start_time);
+
+bailout:
+
+	for (i = 0; i < num_io_opts; i++)
+		camdd_free_dev(devs[i]);
+
+	return (error + error_exit);
+}
+
+void
+usage(void)
+{
+	fprintf(stderr,
+"usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
+"              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
+"              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
+"              <-i|-o file=/dev/nsa0,bs=512K>\n"
+"              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
+"Option description\n"
+"-i <arg=val>  Specify input device/file and parameters\n"
+"-o <arg=val>  Specify output device/file and parameters\n"
+"Input and Output parameters\n"
+"pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
+"file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
+"              or - for stdin/stdout\n"
+"bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
+"offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
+"              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
+"depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
+"mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
+"Optional arguments\n"
+"-C retry_cnt  Specify a retry count for pass(4) devices\n"
+"-E            Enable CAM error recovery for pass(4) devices\n"
+"-m max_io     Specify the maximum amount to be transferred in bytes or\n"
+"              using K, G, M, etc. suffixes\n"
+"-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
+"-v            Enable verbose error recovery\n"
+"-h            Print this message\n");
+}
+
+
+int
+camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
+{
+	char *tmpstr, *tmpstr2;
+	char *orig_tmpstr = NULL;
+	int retval = 0;
+
+	io_opts->write_dev = is_write;
+
+	tmpstr = strdup(args);
+	if (tmpstr == NULL) {
+		warn("strdup failed");
+		retval = 1;
+		goto bailout;
+	}
+	orig_tmpstr = tmpstr;
+	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
+		char *name, *value;
+
+		/*
+		 * If the user creates an empty parameter by putting in two
+		 * commas, skip over it and look for the next field.
+		 */
+		if (*tmpstr2 == '\0')
+			continue;
+
+		name = strsep(&tmpstr2, "=");
+		if (*name == '\0') {
+			warnx("Got empty I/O parameter name");
+			retval = 1;
+			goto bailout;
+		}
+		value = strsep(&tmpstr2, "=");
+		if ((value == NULL)
+		 || (*value == '\0')) {
+			warnx("Empty I/O parameter value for %s", name);
+			retval = 1;
+			goto bailout;
+		}
+		if (strncasecmp(name, "file", 4) == 0) {
+			io_opts->dev_type = CAMDD_DEV_FILE;
+			io_opts->dev_name = strdup(value);
+			if (io_opts->dev_name == NULL) {
+				warn("Error allocating memory");
+				retval = 1;
+				goto bailout;
+			}
+		} else if (strncasecmp(name, "pass", 4) == 0) {
+			io_opts->dev_type = CAMDD_DEV_PASS;
+			io_opts->dev_name = strdup(value);
+			if (io_opts->dev_name == NULL) {
+				warn("Error allocating memory");
+				retval = 1;
+				goto bailout;
+			}
+		} else if ((strncasecmp(name, "bs", 2) == 0)
+			|| (strncasecmp(name, "blocksize", 9) == 0)) {
+			retval = expand_number(value, &io_opts->blocksize);
+			if (retval == -1) {
+				warn("expand_number(3) failed on %s=%s", name,
+				    value);
+				retval = 1;
+				goto bailout;
+			}
+		} else if (strncasecmp(name, "depth", 5) == 0) {
+			char *endptr;
+
+			io_opts->queue_depth = strtoull(value, &endptr, 0);
+			if (*endptr != '\0') {
+				warnx("invalid queue depth %s", value);
+				retval = 1;
+				goto bailout;
+			}
+		} else if (strncasecmp(name, "mcs", 3) == 0) {
+			char *endptr;
+
+			io_opts->min_cmd_size = strtol(value, &endptr, 0);
+			if ((*endptr != '\0')
+			 || ((io_opts->min_cmd_size > 16)
+			  || (io_opts->min_cmd_size < 0))) {
+				warnx("invalid minimum cmd size %s", value);
+				retval = 1;
+				goto bailout;
+			}
+		} else if (strncasecmp(name, "offset", 6) == 0) {
+			retval = expand_number(value, &io_opts->offset);
+			if (retval == -1) {
+				warn("expand_number(3) failed on %s=%s", name,
+				    value);
+				retval = 1;
+				goto bailout;
+			}
+		} else if (strncasecmp(name, "debug", 5) == 0) {
+			char *endptr;
+
+			io_opts->debug = strtoull(value, &endptr, 0);
+			if (*endptr != '\0') {
+				warnx("invalid debug level %s", value);
+				retval = 1;
+				goto bailout;
+			}
+		} else {
+			warnx("Unrecognized parameter %s=%s", name, value);
+		}
+	}
+bailout:
+	free(orig_tmpstr);
+
+	return (retval);
+}
+
+int
+main(int argc, char **argv)
+{
+	int c;
+	camdd_argmask arglist = CAMDD_ARG_NONE;
+	int timeout = 0, retry_count = 1;
+	int error = 0;
+	uint64_t max_io = 0;
+	struct camdd_io_opts *opt_list = NULL;
+
+	if (argc == 1) {
+		usage();
+		exit(1);
+	}
+
+	opt_list = calloc(2, sizeof(struct camdd_io_opts));
+	if (opt_list == NULL) {
+		warn("Unable to allocate option list");
+		error = 1;
+		goto bailout;
+	}
+
+	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
+		switch (c) {
+		case 'C':
+			retry_count = strtol(optarg, NULL, 0);
+			if (retry_count < 0)
+				errx(1, "retry count %d is < 0",
+				     retry_count);
+			arglist |= CAMDD_ARG_RETRIES;
+			break;
+		case 'E':
+			arglist |= CAMDD_ARG_ERR_RECOVER;
+			break;
+		case 'i':
+		case 'o':
+			if (((c == 'i')
+			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
+			 || ((c == 'o')
+			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
+				errx(1, "Only one input and output path "
+				    "allowed");
+			}
+			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
+			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
+			if (error != 0)
+				goto bailout;
+			break;
+		case 'm':
+			error = expand_number(optarg, &max_io);
+			if (error == -1) {
+				warn("invalid maximum I/O amount %s", optarg);
+				error = 1;
+				goto bailout;
+			}
+			break;
+		case 't':
+			timeout = strtol(optarg, NULL, 0);
+			if (timeout < 0)
+				errx(1, "invalid timeout %d", timeout);
+			/* Convert the timeout from seconds to ms */
+			timeout *= 1000;
+			arglist |= CAMDD_ARG_TIMEOUT;
+			break;
+		case 'v':
+			arglist |= CAMDD_ARG_VERBOSE;
+			break;
+		case 'h':
+		default:
+			usage();
+			exit(1);
+			break; /*NOTREACHED*/
+		}
+	}
+
+	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
+	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
+		errx(1, "Must specify both -i and -o");
+
+	/*
+	 * Set the timeout if the user hasn't specified one.
+	 */
+	if (timeout == 0)
+		timeout = CAMDD_PASS_RW_TIMEOUT;
+
+	error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
+
+bailout:
+	free(opt_list);
+
+	exit(error);
+}