1 files changed, 706 insertions, 95 deletions
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index d00d279..f8a871b 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/condvar.h>
+#include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
@@ -53,8 +54,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/callout.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/uma.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
+#include <sys/sglist.h>
+#include <machine/bus.h>
+#include <sys/bus_dma.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
@@ -66,7 +71,6 @@ __FBSDID("$FreeBSD$");
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
-
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_vstorage.h"
 
@@ -77,8 +81,29 @@ __FBSDID("$FreeBSD$");
 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
 #define STORVSC_MAX_TARGETS		(2)
 
+#define STORVSC_WIN7_MAJOR 4
+#define STORVSC_WIN7_MINOR 2
+
+#define STORVSC_WIN8_MAJOR 5
+#define STORVSC_WIN8_MINOR 1
+
+#define HV_ALIGN(x, a) roundup2(x, a)
+
 struct storvsc_softc;
 
+struct hv_sgl_node {
+	LIST_ENTRY(hv_sgl_node) link;
+	struct sglist *sgl_data;
+};
+
+struct hv_sgl_page_pool{
+	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
+	LIST_HEAD(, hv_sgl_node) free_sgl_list;
+	boolean_t                is_init;
+} g_hv_sgl_page_pool;
+
+#define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
+
 enum storvsc_request_type {
 	WRITE_TYPE,
 	READ_TYPE,
@@ -96,20 +121,24 @@ struct hv_storvsc_request {
 	struct storvsc_softc *softc;
 	struct callout callout;
 	struct sema synch_sema; /*Synchronize the request/response if needed */
+	struct sglist *bounce_sgl;
+	unsigned int bounce_sgl_count;
+	uint64_t not_aligned_seg_bits;
 };
 
 struct storvsc_softc {
 	struct hv_device		*hs_dev;
-        LIST_HEAD(, hv_storvsc_request) hs_free_list;
-        struct mtx      		hs_lock;
-        struct storvsc_driver_props     *hs_drv_props;
-        int 				hs_unit;
-        uint32_t         		hs_frozen;
-        struct cam_sim  		*hs_sim;
-        struct cam_path 		*hs_path;
+	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
+	struct mtx			hs_lock;
+	struct storvsc_driver_props	*hs_drv_props;
+	int 				hs_unit;
+	uint32_t			hs_frozen;
+	struct cam_sim			*hs_sim;
+	struct cam_path 		*hs_path;
 	uint32_t			hs_num_out_reqs;
 	boolean_t			hs_destroy;
 	boolean_t			hs_drain_notify;
+	boolean_t			hs_open_multi_channel;
 	struct sema 			hs_drain_sema;	
 	struct hv_storvsc_request	hs_init_req;
 	struct hv_storvsc_request	hs_reset_req;
@@ -124,7 +153,7 @@ struct storvsc_softc {
  * The first can be tested by "sg_senddiag -vv /dev/daX",
  * and the second and third can be done by
  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
- */ 
+ */
 #define HVS_TIMEOUT_TEST 0
 
 /*
@@ -138,7 +167,7 @@ struct storvsc_driver_props {
 	char		*drv_name;
 	char		*drv_desc;
 	uint8_t		drv_max_luns_per_target;
-	uint8_t		drv_max_ios_per_target; 
+	uint8_t		drv_max_ios_per_target;
 	uint32_t	drv_ringbuffer_size;
 };
 
@@ -150,6 +179,8 @@ enum hv_storage_type {
 
 #define HS_MAX_ADAPTERS 10
 
+#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
+
 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
 static const hv_guid gStorVscDeviceType={
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
@@ -171,13 +202,16 @@ static struct storvsc_driver_props g_drv_props_table[] = {
 	 STORVSC_RINGBUFFER_SIZE}
 };
 
+static int storvsc_current_major;
+static int storvsc_current_minor;
+
 /* static functions */
 static int storvsc_probe(device_t dev);
 static int storvsc_attach(device_t dev);
 static int storvsc_detach(device_t dev);
 static void storvsc_poll(struct cam_sim * sim);
 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
-static void create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
+static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
 static void hv_storvsc_on_channel_callback(void *context);
@@ -186,6 +220,14 @@ static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
 					struct hv_storvsc_request *request);
 static int hv_storvsc_connect_vsp(struct hv_device *device);
 static void storvsc_io_done(struct hv_storvsc_request *reqp);
+static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+				bus_dma_segment_t *orig_sgl,
+				unsigned int orig_sgl_count,
+				uint64_t seg_bits);
+void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+				unsigned int dest_sgl_count,
+				struct sglist* src_sgl,
+				uint64_t seg_bits);
 
 static device_method_t storvsc_methods[] = {
 	/* Device interface */
@@ -207,7 +249,7 @@ MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
 
 
 /**
- * The host is capable of sending messages to us that are 
+ * The host is capable of sending messages to us that are
  * completely unsolicited. So, we need to address the race
  * condition where we may be in the process of unloading the
  * driver when the host may send us an unsolicited message.
@@ -223,7 +265,7 @@ MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
  *    destroyed.
  *
  * 3. Once the device is marked as being destroyed, we only
- *    permit incoming traffic to properly account for 
+ *    permit incoming traffic to properly account for
  *    packets already sent out.
  */
 static inline struct storvsc_softc *
@@ -260,6 +302,113 @@ get_stor_device(struct hv_device *device,
 }
 
 /**
+ * @brief Callback handler, will be invoked when receive mutil-channel offer
+ *
+ * @param context  new multi-channel
+ */
+static void
+storvsc_handle_sc_creation(void *context)
+{
+	hv_vmbus_channel *new_channel;
+	struct hv_device *device;
+	struct storvsc_softc *sc;
+	struct vmstor_chan_props props;
+	int ret = 0;
+
+	new_channel = (hv_vmbus_channel *)context;
+	device = new_channel->primary_channel->device;
+	sc = get_stor_device(device, TRUE);
+	if (sc == NULL)
+		return;
+
+	if (FALSE == sc->hs_open_multi_channel)
+		return;
+	
+	memset(&props, 0, sizeof(props));
+
+	ret = hv_vmbus_channel_open(new_channel,
+	    sc->hs_drv_props->drv_ringbuffer_size,
+  	    sc->hs_drv_props->drv_ringbuffer_size,
+	    (void *)&props,
+	    sizeof(struct vmstor_chan_props),
+	    hv_storvsc_on_channel_callback,
+	    new_channel);
+
+	return;
+}
+
+/**
+ * @brief Send multi-channel creation request to host
+ *
+ * @param device  a Hyper-V device pointer
+ * @param max_chans  the max channels supported by vmbus
+ */
+static void
+storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
+{
+	struct storvsc_softc *sc;
+	struct hv_storvsc_request *request;
+	struct vstor_packet *vstor_packet;	
+	int request_channels_cnt = 0;
+	int ret;
+
+	/* get multichannels count that need to create */
+	request_channels_cnt = MIN(max_chans, mp_ncpus);
+
+	sc = get_stor_device(dev, TRUE);
+	if (sc == NULL) {
+		printf("Storvsc_error: get sc failed while send mutilchannel "
+		    "request\n");
+		return;
+	}
+
+	request = &sc->hs_init_req;
+
+	/* Establish a handler for multi-channel */
+	dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
+
+	/* request the host to create multi-channel */
+	memset(request, 0, sizeof(struct hv_storvsc_request));
+	
+	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
+
+	vstor_packet = &request->vstor_packet;
+	
+	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
+	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
+
+	ret = hv_vmbus_channel_send_packet(
+	    dev->channel,
+	    vstor_packet,
+	    sizeof(struct vstor_packet),
+	    (uint64_t)(uintptr_t)request,
+	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+	/* wait for 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
+	if (ret != 0) {		
+		printf("Storvsc_error: create multi-channel timeout, %d\n",
+		    ret);
+		return;
+	}
+
+	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+	    vstor_packet->status != 0) {		
+		printf("Storvsc_error: create multi-channel invalid operation "
+		    "(%d) or statue (%u)\n",
+		    vstor_packet->operation, vstor_packet->status);
+		return;
+	}
+
+	sc->hs_open_multi_channel = TRUE;
+
+	if (bootverbose)
+		printf("Storvsc create multi-channel success!\n");
+}
+
+/**
  * @brief initialize channel connection to parent partition
  *
  * @param dev  a Hyper-V device pointer
@@ -272,11 +421,15 @@ hv_storvsc_channel_init(struct hv_device *dev)
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 	struct storvsc_softc *sc;
+	uint16_t max_chans = 0;
+	boolean_t support_multichannel = FALSE;
+
+	max_chans = 0;
+	support_multichannel = FALSE;
 
 	sc = get_stor_device(dev, TRUE);
-	if (sc == NULL) {
-		return ENODEV;
-	}
+	if (sc == NULL)
+		return (ENODEV);
 
 	request = &sc->hs_init_req;
 	memset(request, 0, sizeof(struct hv_storvsc_request));
@@ -300,15 +453,13 @@ hv_storvsc_channel_init(struct hv_device *dev)
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
-
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
 
-	if (ret != 0) {
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0) {
@@ -321,7 +472,8 @@ hv_storvsc_channel_init(struct hv_device *dev)
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
-	vstor_packet->u.version.major_minor = VMSTOR_PROTOCOL_VERSION_CURRENT;
+	vstor_packet->u.version.major_minor =
+	    VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor);
 
 	/* revision is only significant for Windows guests */
 	vstor_packet->u.version.revision = 0;
@@ -334,21 +486,19 @@ hv_storvsc_channel_init(struct hv_device *dev)
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret) {
+	if (ret)
 		goto cleanup;
-	}
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+		vstor_packet->status != 0)
 		goto cleanup;
-	}
 
 	/**
 	 * Query channel properties
@@ -365,22 +515,30 @@ hv_storvsc_channel_init(struct hv_device *dev)
 				HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 				HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if ( ret != 0) {
+	if ( ret != 0)
 		goto cleanup;
-	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+	    vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
+	/* multi-channels feature is supported by WIN8 and above version */
+	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
+	if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
+	    (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
+	    (vstor_packet->u.chan_props.flags &
+	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
+		support_multichannel = TRUE;
+	}
+
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
@@ -397,16 +555,22 @@ hv_storvsc_channel_init(struct hv_device *dev)
 		goto cleanup;
 	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+	    vstor_packet->status != 0)
 		goto cleanup;
-	}
+
+	/*
+	 * If multi-channel is supported, send multichannel create
+	 * request to host.
+	 */
+	if (support_multichannel)
+		storvsc_send_multichannel_request(dev, max_chans);
 
 cleanup:
 	sema_destroy(&request->synch_sema);
@@ -443,8 +607,7 @@ hv_storvsc_connect_vsp(struct hv_device *dev)
 		(void *)&props,
 		sizeof(struct vmstor_chan_props),
 		hv_storvsc_on_channel_callback,
-		dev);
-
+		dev->channel);
 
 	if (ret != 0) {
 		return ret;
@@ -490,7 +653,7 @@ hv_storvsc_host_reset(struct hv_device *dev)
 		goto cleanup;
 	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 		goto cleanup;
@@ -498,7 +661,7 @@ hv_storvsc_host_reset(struct hv_device *dev)
 
 
 	/*
-	 * At this point, all outstanding requests in the adapter 
+	 * At this point, all outstanding requests in the adapter
 	 * should have been flushed out and return to us
 	 */
 
@@ -521,6 +684,7 @@ hv_storvsc_io_request(struct hv_device *device,
 {
 	struct storvsc_softc *sc;
 	struct vstor_packet *vstor_packet = &request->vstor_packet;
+	struct hv_vmbus_channel* outgoing_channel = NULL;
 	int ret = 0;
 
 	sc = get_stor_device(device, TRUE);
@@ -539,19 +703,20 @@ hv_storvsc_io_request(struct hv_device *device,
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
 
+	outgoing_channel = vmbus_select_outgoing_channel(device->channel);
 
 	mtx_unlock(&request->softc->hs_lock);
 	if (request->data_buf.length) {
 		ret = hv_vmbus_channel_send_packet_multipagebuffer(
-				device->channel,
+				outgoing_channel,
 				&request->data_buf,
-				vstor_packet, 
-				sizeof(struct vstor_packet), 
+				vstor_packet,
+				sizeof(struct vstor_packet),
 				(uint64_t)(uintptr_t)request);
 
 	} else {
 		ret = hv_vmbus_channel_send_packet(
-			device->channel,
+			outgoing_channel,
 			vstor_packet,
 			sizeof(struct vstor_packet),
 			(uint64_t)(uintptr_t)request,
@@ -610,7 +775,8 @@ static void
 hv_storvsc_on_channel_callback(void *context)
 {
 	int ret = 0;
-	struct hv_device *device = (struct hv_device *)context;
+	hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
+	struct hv_device *device = NULL;
 	struct storvsc_softc *sc;
 	uint32_t bytes_recvd;
 	uint64_t request_id;
@@ -618,15 +784,22 @@ hv_storvsc_on_channel_callback(void *context)
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
+	if (channel->primary_channel != NULL){
+		device = channel->primary_channel->device;
+	} else {
+		device = channel->device;
+	}
+
+	KASSERT(device, ("device is NULL"));
+
 	sc = get_stor_device(device, FALSE);
 	if (sc == NULL) {
+		printf("Storvsc_error: get stor device failed.\n");
 		return;
 	}
 
-	KASSERT(device, ("device"));
-
 	ret = hv_vmbus_channel_recv_packet(
-			device->channel,
+			channel,
 			packet,
 			roundup2(sizeof(struct vstor_packet), 8),
 			&bytes_recvd,
@@ -634,21 +807,28 @@ hv_storvsc_on_channel_callback(void *context)
 
 	while ((ret == 0) && (bytes_recvd > 0)) {
 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
-		KASSERT(request, ("request"));
 
 		if ((request == &sc->hs_init_req) ||
 			(request == &sc->hs_reset_req)) {
 			memcpy(&request->vstor_packet, packet,
 				   sizeof(struct vstor_packet));
-			sema_post(&request->synch_sema); 
+			sema_post(&request->synch_sema);
 		} else {
 			vstor_packet = (struct vstor_packet *)packet;
 			switch(vstor_packet->operation) {
 			case VSTOR_OPERATION_COMPLETEIO:
+				if (request == NULL)
+					panic("VMBUS: storvsc received a "
+					    "packet with NULL request id in "
+					    "COMPLETEIO operation.");
+
 				hv_storvsc_on_iocompletion(sc,
 							vstor_packet, request);
 				break;
 			case VSTOR_OPERATION_REMOVEDEVICE:
+			case VSTOR_OPERATION_ENUMERATE_BUS:
+				printf("VMBUS: storvsc operation %d not "
+				    "implemented.\n", vstor_packet->operation);
 				/* TODO: implement */
 				break;
 			default:
@@ -656,7 +836,7 @@ hv_storvsc_on_channel_callback(void *context)
 			}			
 		}
 		ret = hv_vmbus_channel_recv_packet(
-				device->channel,
+				channel,
 				packet,
 				roundup2(sizeof(struct vstor_packet), 8),
 				&bytes_recvd,
@@ -680,7 +860,16 @@ storvsc_probe(device_t dev)
 {
 	int ata_disk_enable = 0;
 	int ret	= ENXIO;
-
+	
+	if ((HV_VMBUS_VERSION_WIN8 == hv_vmbus_protocal_version) ||
+	    (HV_VMBUS_VERSION_WIN8_1 == hv_vmbus_protocal_version)){
+		storvsc_current_major = STORVSC_WIN8_MAJOR;
+		storvsc_current_minor = STORVSC_WIN8_MINOR;
+	} else {
+		storvsc_current_major = STORVSC_WIN7_MAJOR;
+		storvsc_current_minor = STORVSC_WIN7_MINOR;
+	}
+	
 	switch (storvsc_get_storage_type(dev)) {
 	case DRIVER_BLKVSC:
 		if(bootverbose)
@@ -721,9 +910,11 @@ storvsc_attach(device_t dev)
 	enum hv_storage_type stor_type;
 	struct storvsc_softc *sc;
 	struct cam_devq *devq;
-	int ret, i;
+	int ret, i, j;
 	struct hv_storvsc_request *reqp;
 	struct root_hold_token *root_mount_token = NULL;
+	struct hv_sgl_node *sgl_node = NULL;
+	void *tmp_buff = NULL;
 
 	/*
 	 * We need to serialize storvsc attach calls.
@@ -764,8 +955,41 @@ storvsc_attach(device_t dev)
 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 	}
 
+	/* create sg-list page pool */
+	if (FALSE == g_hv_sgl_page_pool.is_init) {
+		g_hv_sgl_page_pool.is_init = TRUE;
+		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
+		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
+
+		/*
+		 * Pre-create SG list, each SG list with
+		 * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
+		 * segment has one page buffer
+		 */
+		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
+	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
+			    M_DEVBUF, M_WAITOK|M_ZERO);
+
+			sgl_node->sgl_data =
+			    sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
+			    M_WAITOK|M_ZERO);
+
+			for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
+				tmp_buff = malloc(PAGE_SIZE,
+				    M_DEVBUF, M_WAITOK|M_ZERO);
+
+				sgl_node->sgl_data->sg_segs[j].ss_paddr =
+				    (vm_paddr_t)tmp_buff;
+			}
+
+			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
+			    sgl_node, link);
+		}
+	}
+
 	sc->hs_destroy = FALSE;
 	sc->hs_drain_notify = FALSE;
+	sc->hs_open_multi_channel = FALSE;
 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
 
 	ret = hv_storvsc_connect_vsp(hv_dev);
@@ -834,6 +1058,20 @@ cleanup:
 		LIST_REMOVE(reqp, link);
 		free(reqp, M_DEVBUF);
 	}
+
+	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+		LIST_REMOVE(sgl_node, link);
+		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
+			if (NULL !=
+			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+			}
+		}
+		sglist_free(sgl_node->sgl_data);
+		free(sgl_node, M_DEVBUF);
+	}
+
 	return (ret);
 }
 
@@ -853,6 +1091,8 @@ storvsc_detach(device_t dev)
 	struct storvsc_softc *sc = device_get_softc(dev);
 	struct hv_storvsc_request *reqp = NULL;
 	struct hv_device *hv_device = vmbus_get_devctx(dev);
+	struct hv_sgl_node *sgl_node = NULL;
+	int j = 0;
 
 	mtx_lock(&hv_device->channel->inbound_lock);
 	sc->hs_destroy = TRUE;
@@ -884,6 +1124,20 @@ storvsc_detach(device_t dev)
 		free(reqp, M_DEVBUF);
 	}
 	mtx_unlock(&sc->hs_lock);
+
+	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+		LIST_REMOVE(sgl_node, link);
+		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
+			if (NULL !=
+			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+			}
+		}
+		sglist_free(sgl_node->sgl_data);
+		free(sgl_node, M_DEVBUF);
+	}
+	
 	return (0);
 }
 
@@ -939,7 +1193,7 @@ storvsc_timeout_test(struct hv_storvsc_request *reqp,
 				ticks, __func__, (ret == 0)?
 				"IO return detected" :
 				"IO return not detected");
-		/* 
+		/*
 		 * Now both the timer handler and io done are running
 		 * simultaneously. We want to confirm the io done always
 		 * finishes after the timer handler exits. So reqp used by
@@ -1023,7 +1277,7 @@ storvsc_poll(struct cam_sim *sim)
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	mtx_unlock(&sc->hs_lock);
-	hv_storvsc_on_channel_callback(sc->hs_dev);
+	hv_storvsc_on_channel_callback(sc->hs_dev->channel);
 	mtx_lock(&sc->hs_lock);
 }
 
@@ -1151,9 +1405,13 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
 
 		bzero(reqp, sizeof(struct hv_storvsc_request));
 		reqp->softc = sc;
-
-		ccb->ccb_h.status |= CAM_SIM_QUEUED;	    
-		create_storvsc_request(ccb, reqp);
+		
+		ccb->ccb_h.status |= CAM_SIM_QUEUED;
+		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
+			ccb->ccb_h.status = CAM_REQ_INVALID;
+			xpt_done(ccb);
+			return;
+		}
 
 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 			callout_init(&reqp->callout, CALLOUT_MPSAFE);
@@ -1194,6 +1452,212 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
 }
 
 /**
+ * @brief destroy bounce buffer
+ *
+ * This function is responsible for destroy a Scatter/Gather list
+ * that create by storvsc_create_bounce_buffer()
+ *
+ * @param sgl- the Scatter/Gather need be destroy
+ * @param sg_count- page count of the SG list.
+ *
+ */
+static void
+storvsc_destroy_bounce_buffer(struct sglist *sgl)
+{
+	struct hv_sgl_node *sgl_node = NULL;
+
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
+	LIST_REMOVE(sgl_node, link);
+	if (NULL == sgl_node) {
+		printf("storvsc error: not enough in use sgl\n");
+		return;
+	}
+	sgl_node->sgl_data = sgl;
+	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
+}
+
+/**
+ * @brief create bounce buffer
+ *
+ * This function is responsible for create a Scatter/Gather list,
+ * which hold several pages that can be aligned with page size.
+ *
+ * @param seg_count- SG-list segments count
+ * @param write - if WRITE_TYPE, set SG list page used size to 0,
+ * otherwise set used size to page size.
+ *
+ * return NULL if create failed
+ */
+static struct sglist *
+storvsc_create_bounce_buffer(uint16_t seg_count, int write)
+{
+	int i = 0;
+	struct sglist *bounce_sgl = NULL;
+	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
+	struct hv_sgl_node *sgl_node = NULL;	
+
+	/* get struct sglist from free_sgl_list */
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+	LIST_REMOVE(sgl_node, link);
+	if (NULL == sgl_node) {
+		printf("storvsc error: not enough free sgl\n");
+		return NULL;
+	}
+	bounce_sgl = sgl_node->sgl_data;
+	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
+
+	bounce_sgl->sg_maxseg = seg_count;
+
+	if (write == WRITE_TYPE)
+		bounce_sgl->sg_nseg = 0;
+	else
+		bounce_sgl->sg_nseg = seg_count;
+
+	for (i = 0; i < seg_count; i++)
+	        bounce_sgl->sg_segs[i].ss_len = buf_len;
+
+	return bounce_sgl;
+}
+
+/**
+ * @brief copy data from SG list to bounce buffer
+ *
+ * This function is responsible for copy data from one SG list's segments
+ * to another SG list which used as bounce buffer.
+ *
+ * @param bounce_sgl - the destination SG list
+ * @param orig_sgl - the segment of the source SG list.
+ * @param orig_sgl_count - the count of segments.
+ * @param orig_sgl_count - indicate which segment need bounce buffer,
+ *  set 1 means need.
+ *
+ */
+static void
+storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+			       bus_dma_segment_t *orig_sgl,
+			       unsigned int orig_sgl_count,
+			       uint64_t seg_bits)
+{
+	int src_sgl_idx = 0;
+
+	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
+		if (seg_bits & (1 << src_sgl_idx)) {
+			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
+			    (void*)orig_sgl[src_sgl_idx].ds_addr,
+			    orig_sgl[src_sgl_idx].ds_len);
+
+			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
+			    orig_sgl[src_sgl_idx].ds_len;
+		}
+	}
+}
+
+/**
+ * @brief copy data from SG list which used as bounce to another SG list
+ *
+ * This function is responsible for copy data from one SG list with bounce
+ * buffer to another SG list's segments.
+ *
+ * @param dest_sgl - the destination SG list's segments
+ * @param dest_sgl_count - the count of destination SG list's segment.
+ * @param src_sgl - the source SG list.
+ * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
+ *
+ */
+void
+storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+				    unsigned int dest_sgl_count,
+				    struct sglist* src_sgl,
+				    uint64_t seg_bits)
+{
+	int sgl_idx = 0;
+	
+	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
+		if (seg_bits & (1 << sgl_idx)) {
+			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
+			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
+			    src_sgl->sg_segs[sgl_idx].ss_len);
+		}
+	}
+}
+
+/**
+ * @brief check SG list with bounce buffer or not
+ *
+ * This function is responsible for check if need bounce buffer for SG list.
+ *
+ * @param sgl - the SG list's segments
+ * @param sg_count - the count of SG list's segment.
+ * @param bits - segmengs number that need bounce buffer
+ *
+ * return -1 if SG list needless bounce buffer
+ */
+static int
+storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
+				unsigned int sg_count,
+				uint64_t *bits)
+{
+	int i = 0;
+	int offset = 0;
+	uint64_t phys_addr = 0;
+	uint64_t tmp_bits = 0;
+	boolean_t found_hole = FALSE;
+	boolean_t pre_aligned = TRUE;
+
+	if (sg_count < 2){
+		return -1;
+	}
+
+	*bits = 0;
+	
+	phys_addr = vtophys(sgl[0].ds_addr);
+	offset =  phys_addr - trunc_page(phys_addr);
+
+	if (offset != 0) {
+		pre_aligned = FALSE;
+		tmp_bits |= 1;
+	}
+
+	for (i = 1; i < sg_count; i++) {
+		phys_addr = vtophys(sgl[i].ds_addr);
+		offset =  phys_addr - trunc_page(phys_addr);
+
+		if (offset == 0) {
+			if (FALSE == pre_aligned){
+				/*
+				 * This segment is aligned, if the previous
+				 * one is not aligned, find a hole
+				 */
+				found_hole = TRUE;
+			}
+			pre_aligned = TRUE;
+		} else {
+			tmp_bits |= 1 << i;
+			if (!pre_aligned) {
+				if (phys_addr != vtophys(sgl[i-1].ds_addr +
+				    sgl[i-1].ds_len)) {
+					/*
+					 * Check whether connect to previous
+					 * segment,if not, find the hole
+					 */
+					found_hole = TRUE;
+				}
+			} else {
+				found_hole = TRUE;
+			}
+			pre_aligned = FALSE;
+		}
+	}
+
+	if (!found_hole) {
+		return (-1);
+	} else {
+		*bits = tmp_bits;
+		return 0;
+	}
+}
+
+/**
  * @brief Fill in a request structure based on a CAM control block
  *
  * Fills in a request structure based on the contents of a CAM control
@@ -1203,7 +1667,7 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
  * @param ccb pointer to a CAM contorl block
  * @param reqp pointer to a request structure
  */
-static void
+static int
 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 {
 	struct ccb_scsiio *csio = &ccb->csio;
@@ -1211,6 +1675,7 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 	uint32_t bytes_to_copy = 0;
 	uint32_t pfn_num = 0;
 	uint32_t pfn;
+	uint64_t not_aligned_seg_bits = 0;
 	
 	/* refer to struct vmscsi_req for meanings of these two fields */
 	reqp->vstor_packet.u.vm_srb.port =
@@ -1231,48 +1696,172 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
-    	case CAM_DIR_OUT: 
-    		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
-    		break;
-    	case CAM_DIR_IN:
-    		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
-    		break;
-    	case CAM_DIR_NONE:
-    		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
-    		break;
-    	default:
-    		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
-    		break;
+	case CAM_DIR_OUT:
+		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;	
+		break;
+	case CAM_DIR_IN:
+		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
+		break;
+	case CAM_DIR_NONE:
+		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+		break;
+	default:
+		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+		break;
 	}
 
 	reqp->sense_data     = &csio->sense_data;
 	reqp->sense_info_len = csio->sense_len;
 
 	reqp->ccb = ccb;
-	/*
-	KASSERT((ccb->ccb_h.flags & CAM_SCATTER_VALID) == 0,
-			("ccb is scatter gather valid\n"));
-	*/
-	if (csio->dxfer_len != 0) {
-		reqp->data_buf.length = csio->dxfer_len;
+
+	if (0 == csio->dxfer_len) {
+		return (0);
+	}
+
+	reqp->data_buf.length = csio->dxfer_len;
+
+	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
+	case CAM_DATA_VADDR:
+	{
 		bytes_to_copy = csio->dxfer_len;
 		phys_addr = vtophys(csio->data_ptr);
-		reqp->data_buf.offset = phys_addr - trunc_page(phys_addr);
+		reqp->data_buf.offset = phys_addr & PAGE_MASK;
+		
+		while (bytes_to_copy != 0) {
+			int bytes, page_offset;
+			phys_addr =
+			    vtophys(&csio->data_ptr[reqp->data_buf.length -
+			    bytes_to_copy]);
+			pfn = phys_addr >> PAGE_SHIFT;
+			reqp->data_buf.pfn_array[pfn_num] = pfn;
+			page_offset = phys_addr & PAGE_MASK;
+
+			bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+
+			bytes_to_copy -= bytes;
+			pfn_num++;
+		}
+		break;
 	}
 
-	while (bytes_to_copy != 0) {
-		int bytes, page_offset;
-		phys_addr = vtophys(&csio->data_ptr[reqp->data_buf.length -
-		                                    bytes_to_copy]);
-		pfn = phys_addr >> PAGE_SHIFT;
-		reqp->data_buf.pfn_array[pfn_num] = pfn;
-		page_offset = phys_addr - trunc_page(phys_addr);
+	case CAM_DATA_SG:
+	{
+		int i = 0;
+		int offset = 0;
+		int ret;
+
+		bus_dma_segment_t *storvsc_sglist =
+		    (bus_dma_segment_t *)ccb->csio.data_ptr;
+		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
+
+		printf("Storvsc: get SG I/O operation, %d\n",
+		    reqp->vstor_packet.u.vm_srb.data_in);
+
+		if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
+			printf("Storvsc: %d segments is too much, "
+			    "only support %d segments\n",
+			    storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
+			return (EINVAL);
+		}
+
+		/*
+		 * We create our own bounce buffer function currently. Idealy
+		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
+		 * code there is no callback API to check the page alignment of
+		 * middle segments before busdma can decide if a bounce buffer
+		 * is needed for particular segment. There is callback,
+		 * "bus_dma_filter_t *filter", but the parrameters are not
+		 * sufficient for storvsc driver.
+		 * TODO:
+		 *	Add page alignment check in BUS_DMA(9) callback. Once
+		 *	this is complete, switch the following code to use
+		 *	BUS_DMA(9) for storvsc bounce buffer support.
+		 */
+		/* check if we need to create bounce buffer */
+		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
+		    storvsc_sg_count, &not_aligned_seg_bits);
+		if (ret != -1) {
+			reqp->bounce_sgl =
+			    storvsc_create_bounce_buffer(storvsc_sg_count,
+			    reqp->vstor_packet.u.vm_srb.data_in);
+			if (NULL == reqp->bounce_sgl) {
+				printf("Storvsc_error: "
+				    "create bounce buffer failed.\n");
+				return (ENOMEM);
+			}
+
+			reqp->bounce_sgl_count = storvsc_sg_count;
+			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
+
+			/*
+			 * if it is write, we need copy the original data
+			 *to bounce buffer
+			 */
+			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+				storvsc_copy_sgl_to_bounce_buf(
+				    reqp->bounce_sgl,
+				    storvsc_sglist,
+				    storvsc_sg_count,
+				    reqp->not_aligned_seg_bits);
+			}
+
+			/* transfer virtual address to physical frame number */
+			if (reqp->not_aligned_seg_bits & 0x1){
+ 				phys_addr =
+				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
+			}else{
+ 				phys_addr =
+					vtophys(storvsc_sglist[0].ds_addr);
+			}
+			reqp->data_buf.offset = phys_addr & PAGE_MASK;
+
+			pfn = phys_addr >> PAGE_SHIFT;
+			reqp->data_buf.pfn_array[0] = pfn;
+			
+			for (i = 1; i < storvsc_sg_count; i++) {
+				if (reqp->not_aligned_seg_bits & (1 << i)) {
+					phys_addr =
+					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
+				} else {
+					phys_addr =
+					    vtophys(storvsc_sglist[i].ds_addr);
+				}
+
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
+		} else {
+			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
+
+			reqp->data_buf.offset = phys_addr & PAGE_MASK;
 
-		bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+			for (i = 0; i < storvsc_sg_count; i++) {
+				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
 
-		bytes_to_copy -= bytes;
-		pfn_num++;
+			/* check the last segment cross boundary or not */
+			offset = phys_addr & PAGE_MASK;
+			if (offset) {
+				phys_addr =
+				    vtophys(storvsc_sglist[i-1].ds_addr +
+				    PAGE_SIZE - offset);
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
+			
+			reqp->bounce_sgl_count = 0;
+		}
+		break;
+	}
+	default:
+		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
+		return(EINVAL);
 	}
+
+	return(0);
 }
 
 /**
@@ -1291,7 +1880,29 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
 	struct ccb_scsiio *csio = &ccb->csio;
 	struct storvsc_softc *sc = reqp->softc;
 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
-	
+	bus_dma_segment_t *ori_sglist = NULL;
+	int ori_sg_count = 0;
+
+	/* destroy bounce buffer if it is used */
+	if (reqp->bounce_sgl_count) {
+		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
+		ori_sg_count = ccb->csio.sglist_cnt;
+
+		/*
+		 * If it is READ operation, we should copy back the data
+		 * to original SG list.
+		 */
+		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
+			    ori_sg_count,
+			    reqp->bounce_sgl,
+			    reqp->not_aligned_seg_bits);
+		}
+
+		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
+		reqp->bounce_sgl_count = 0;
+	}
+		
 	if (reqp->retries > 0) {
 		mtx_lock(&sc->hs_lock);
 #if HVS_TIMEOUT_TEST
@@ -1309,7 +1920,7 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
 		mtx_unlock(&sc->hs_lock);
 	}
 
-	/* 
+	/*
 	 * callout_drain() will wait for the timer handler to finish
 	 * if it is running. So we don't need any lock to synchronize
 	 * between this routine and the timer handler.