12 files changed, 1764 insertions, 474 deletions
diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h
index 8a45d89..5360b7c 100644
--- a/sys/dev/hyperv/include/hyperv.h
+++ b/sys/dev/hyperv/include/hyperv.h
@@ -46,6 +46,7 @@
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
+#include <sys/smp.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
 #include <vm/vm.h>
@@ -63,11 +64,22 @@ typedef uint8_t	hv_bool_uint8_t;
 #define HV_ERROR_MACHINE_LOCKED	0x800704F7
 
 /*
- * A revision number of vmbus that is used for ensuring both ends on a
- * partition are using compatible versions.
+ * VMBUS version is 32 bit, upper 16 bit for major_number and lower
+ * 16 bit for minor_number.
+ *
+ * 0.13  --  Windows Server 2008
+ * 1.1   --  Windows 7
+ * 2.4   --  Windows 8
+ * 3.0   --  Windows 8.1
  */
+#define HV_VMBUS_VERSION_WS2008		((0 << 16) | (13))
+#define HV_VMBUS_VERSION_WIN7		((1 << 16) | (1))
+#define HV_VMBUS_VERSION_WIN8		((2 << 16) | (4))
+#define HV_VMBUS_VERSION_WIN8_1		((3 << 16) | (0))
+
+#define HV_VMBUS_VERSION_INVALID	-1
 
-#define HV_VMBUS_REVISION_NUMBER	13
+#define HV_VMBUS_VERSION_CURRENT	HV_VMBUS_VERSION_WIN8_1
 
 /*
  * Make maximum size of pipe payload of 16K
@@ -112,6 +124,18 @@ typedef struct hv_guid {
 	 unsigned char data[16];
 } __packed hv_guid;
 
+#define HV_NIC_GUID							\
+	.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,	\
+		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
+
+#define HV_IDE_GUID							\
+	.data = {0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,	\
+		 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5}
+
+#define HV_SCSI_GUID							\
+	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,	\
+		 0xb6, 0x05, 0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f}
+
 /*
  * At the center of the Channel Management library is
  * the Channel Offer. This struct contains the
@@ -147,7 +171,11 @@ typedef struct hv_vmbus_channel_offer {
 		} __packed pipe;
 	} u;
 
-	uint32_t	padding;
+	/*
+	 * Sub_channel_index, newly added in Win8.
+	 */
+	uint16_t	sub_channel_index;
+	uint16_t	padding;
 
 } __packed hv_vmbus_channel_offer;
 
@@ -344,7 +372,25 @@ typedef struct {
 	hv_vmbus_channel_offer		offer;
 	uint32_t			child_rel_id;
 	uint8_t				monitor_id;
-	hv_bool_uint8_t			monitor_allocated;
+	/*
+	 * This field has been split into a bit field on Win7
+	 * and higher.
+	 */
+	uint8_t				monitor_allocated:1;
+	uint8_t				reserved:7;
+	/*
+	 * Following fields were added in win7 and higher.
+	 * Make sure to check the version before accessing these fields.
+	 *
+	 * If "is_dedicated_interrupt" is set, we must not set the
+	 * associated bit in the channel bitmap while sending the
+	 * interrupt to the host.
+	 *
+	 * connection_id is used in signaling the host.
+	 */
+	uint16_t			is_dedicated_interrupt:1;
+	uint16_t			reserved1:15;
+	uint32_t			connection_id;
 } __packed hv_vmbus_channel_offer_channel;
 
 /*
@@ -394,9 +440,11 @@ typedef struct
     hv_gpadl_handle	ring_buffer_gpadl_handle;
 
     /*
-     * GPADL for the channel's server context save area.
+     * Before win8, all incoming channel interrupts are only
+     * delivered on cpu 0. Setting this value to 0 would
+     * preserve the earlier behavior.
      */
-    hv_gpadl_handle	server_context_area_gpadl_handle;
+    uint32_t		target_vcpu;
 
     /*
      * The upstream ring buffer begins at offset zero in the memory described
@@ -646,14 +694,42 @@ typedef struct {
 } hv_vmbus_ring_buffer_info;
 
 typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
+typedef void (*hv_vmbus_sc_creation_callback)(void *context);
 
 typedef enum {
 	HV_CHANNEL_OFFER_STATE,
 	HV_CHANNEL_OPENING_STATE,
 	HV_CHANNEL_OPEN_STATE,
+	HV_CHANNEL_OPENED_STATE,
 	HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE,
 } hv_vmbus_channel_state;
 
+/*
+ *  Connection identifier type
+ */
+typedef union {
+	uint32_t		as_uint32_t;
+	struct {
+		uint32_t	id:24;
+		uint32_t	reserved:8;
+	} u;
+
+} __packed hv_vmbus_connection_id;
+
+/*
+ * Definition of the hv_vmbus_signal_event hypercall input structure
+ */
+typedef struct {
+	hv_vmbus_connection_id	connection_id;
+	uint16_t		flag_number;
+	uint16_t		rsvd_z;
+} __packed hv_vmbus_input_signal_event;
+
+typedef struct {
+	uint64_t			align8;
+	hv_vmbus_input_signal_event	event;
+} __packed hv_vmbus_input_signal_event_buffer;
+
 typedef struct hv_vmbus_channel {
 	TAILQ_ENTRY(hv_vmbus_channel)	list_entry;
 	struct hv_device*		device;
@@ -688,8 +764,82 @@ typedef struct hv_vmbus_channel {
 	hv_vmbus_pfn_channel_callback	on_channel_callback;
 	void*				channel_callback_context;
 
+	/*
+	 * If batched_reading is set to "true", mask the interrupt
+	 * and read until the channel is empty.
+	 * If batched_reading is set to "false", the channel is not
+	 * going to perform batched reading.
+	 *
+	 * Batched reading is enabled by default; specific
+	 * drivers that don't want this behavior can turn it off.
+	 */
+	boolean_t			batched_reading;
+
+	boolean_t			is_dedicated_interrupt;
+
+	/*
+	 * Used as an input param for HV_CALL_SIGNAL_EVENT hypercall.
+	 */
+	hv_vmbus_input_signal_event_buffer	signal_event_buffer;
+	/*
+	 * 8-bytes aligned of the buffer above
+	 */
+	hv_vmbus_input_signal_event	*signal_event_param;
+
+	/*
+	 * From Win8, this field specifies the target virtual process
+	 * on which to deliver the interupt from the host to guest.
+	 * Before Win8, all channel interrupts would only be
+	 * delivered on cpu 0. Setting this value to 0 would preserve
+	 * the earlier behavior.
+	 */
+	uint32_t			target_vcpu;
+	/* The corresponding CPUID in the guest */
+	uint32_t			target_cpu;
+
+	/*
+	 * Support for multi-channels.
+	 * The initial offer is considered the primary channel and this
+	 * offer message will indicate if the host supports multi-channels.
+	 * The guest is free to ask for multi-channels to be offerred and can
+	 * open these multi-channels as a normal "primary" channel. However,
+	 * all multi-channels will have the same type and instance guids as the
+	 * primary channel. Requests sent on a given channel will result in a
+	 * response on the same channel.
+	 */
+
+	/*
+	 * Multi-channel creation callback. This callback will be called in
+	 * process context when a Multi-channel offer is received from the host.
+	 * The guest can open the Multi-channel in the context of this callback.
+	 */
+	hv_vmbus_sc_creation_callback	sc_creation_callback;
+
+	struct mtx			sc_lock;
+
+	/*
+	 * Link list of all the multi-channels if this is a primary channel
+	 */
+	TAILQ_HEAD(, hv_vmbus_channel)	sc_list_anchor;
+	TAILQ_ENTRY(hv_vmbus_channel)	sc_list_entry;
+
+	/*
+	 * The primary channel this sub-channle belongs to.
+	 * This will be NULL for the primary channel.
+	 */
+	struct hv_vmbus_channel		*primary_channel;
+	/*
+	 * Support per channel state for use by vmbus drivers.
+	 */
+	void				*per_channel_state;
 } hv_vmbus_channel;
 
+static inline void
+hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state)
+{
+	channel->batched_reading = state;
+}
+
 typedef struct hv_device {
 	hv_guid		    class_id;
 	hv_guid		    device_id;
@@ -760,6 +910,8 @@ int		hv_vmbus_channel_teardown_gpdal(
 				hv_vmbus_channel*	channel,
 				uint32_t		gpadl_handle);
 
+struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
+
 /*
  * Work abstraction defines
  */
@@ -819,6 +971,7 @@ typedef struct hv_vmbus_service {
 
 extern uint8_t* receive_buffer[];
 extern hv_vmbus_service service_table[];
+extern uint32_t hv_vmbus_protocal_version;
 
 void hv_kvp_callback(void *context);
 int hv_kvp_init(hv_vmbus_service *serv);
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index d00d279..f8a871b 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/condvar.h>
+#include <sys/time.h>
 #include <sys/systm.h>
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
@@ -53,8 +54,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/callout.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/uma.h>
 #include <sys/lock.h>
 #include <sys/sema.h>
+#include <sys/sglist.h>
+#include <machine/bus.h>
+#include <sys/bus_dma.h>
 
 #include <cam/cam.h>
 #include <cam/cam_ccb.h>
@@ -66,7 +71,6 @@ __FBSDID("$FreeBSD$");
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_message.h>
 
-
 #include <dev/hyperv/include/hyperv.h>
 #include "hv_vstorage.h"
 
@@ -77,8 +81,29 @@ __FBSDID("$FreeBSD$");
 #define BLKVSC_MAX_IO_REQUESTS		STORVSC_MAX_IO_REQUESTS
 #define STORVSC_MAX_TARGETS		(2)
 
+#define STORVSC_WIN7_MAJOR 4
+#define STORVSC_WIN7_MINOR 2
+
+#define STORVSC_WIN8_MAJOR 5
+#define STORVSC_WIN8_MINOR 1
+
+#define HV_ALIGN(x, a) roundup2(x, a)
+
 struct storvsc_softc;
 
+struct hv_sgl_node {
+	LIST_ENTRY(hv_sgl_node) link;
+	struct sglist *sgl_data;
+};
+
+struct hv_sgl_page_pool{
+	LIST_HEAD(, hv_sgl_node) in_use_sgl_list;
+	LIST_HEAD(, hv_sgl_node) free_sgl_list;
+	boolean_t                is_init;
+} g_hv_sgl_page_pool;
+
+#define STORVSC_MAX_SG_PAGE_CNT STORVSC_MAX_IO_REQUESTS * HV_MAX_MULTIPAGE_BUFFER_COUNT
+
 enum storvsc_request_type {
 	WRITE_TYPE,
 	READ_TYPE,
@@ -96,20 +121,24 @@ struct hv_storvsc_request {
 	struct storvsc_softc *softc;
 	struct callout callout;
 	struct sema synch_sema; /*Synchronize the request/response if needed */
+	struct sglist *bounce_sgl;
+	unsigned int bounce_sgl_count;
+	uint64_t not_aligned_seg_bits;
 };
 
 struct storvsc_softc {
 	struct hv_device		*hs_dev;
-        LIST_HEAD(, hv_storvsc_request) hs_free_list;
-        struct mtx      		hs_lock;
-        struct storvsc_driver_props     *hs_drv_props;
-        int 				hs_unit;
-        uint32_t         		hs_frozen;
-        struct cam_sim  		*hs_sim;
-        struct cam_path 		*hs_path;
+	LIST_HEAD(, hv_storvsc_request)	hs_free_list;
+	struct mtx			hs_lock;
+	struct storvsc_driver_props	*hs_drv_props;
+	int 				hs_unit;
+	uint32_t			hs_frozen;
+	struct cam_sim			*hs_sim;
+	struct cam_path 		*hs_path;
 	uint32_t			hs_num_out_reqs;
 	boolean_t			hs_destroy;
 	boolean_t			hs_drain_notify;
+	boolean_t			hs_open_multi_channel;
 	struct sema 			hs_drain_sema;	
 	struct hv_storvsc_request	hs_init_req;
 	struct hv_storvsc_request	hs_reset_req;
@@ -124,7 +153,7 @@ struct storvsc_softc {
  * The first can be tested by "sg_senddiag -vv /dev/daX",
  * and the second and third can be done by
  * "sg_wr_mode -v -p 08 -c 0,1a -m 0,ff /dev/daX".
- */ 
+ */
 #define HVS_TIMEOUT_TEST 0
 
 /*
@@ -138,7 +167,7 @@ struct storvsc_driver_props {
 	char		*drv_name;
 	char		*drv_desc;
 	uint8_t		drv_max_luns_per_target;
-	uint8_t		drv_max_ios_per_target; 
+	uint8_t		drv_max_ios_per_target;
 	uint32_t	drv_ringbuffer_size;
 };
 
@@ -150,6 +179,8 @@ enum hv_storage_type {
 
 #define HS_MAX_ADAPTERS 10
 
+#define HV_STORAGE_SUPPORTS_MULTI_CHANNEL 0x1
+
 /* {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f} */
 static const hv_guid gStorVscDeviceType={
 	.data = {0xd9, 0x63, 0x61, 0xba, 0xa1, 0x04, 0x29, 0x4d,
@@ -171,13 +202,16 @@ static struct storvsc_driver_props g_drv_props_table[] = {
 	 STORVSC_RINGBUFFER_SIZE}
 };
 
+static int storvsc_current_major;
+static int storvsc_current_minor;
+
 /* static functions */
 static int storvsc_probe(device_t dev);
 static int storvsc_attach(device_t dev);
 static int storvsc_detach(device_t dev);
 static void storvsc_poll(struct cam_sim * sim);
 static void storvsc_action(struct cam_sim * sim, union ccb * ccb);
-static void create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
+static int create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp);
 static void storvsc_free_request(struct storvsc_softc *sc, struct hv_storvsc_request *reqp);
 static enum hv_storage_type storvsc_get_storage_type(device_t dev);
 static void hv_storvsc_on_channel_callback(void *context);
@@ -186,6 +220,14 @@ static void hv_storvsc_on_iocompletion( struct storvsc_softc *sc,
 					struct hv_storvsc_request *request);
 static int hv_storvsc_connect_vsp(struct hv_device *device);
 static void storvsc_io_done(struct hv_storvsc_request *reqp);
+static void storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+				bus_dma_segment_t *orig_sgl,
+				unsigned int orig_sgl_count,
+				uint64_t seg_bits);
+void storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+				unsigned int dest_sgl_count,
+				struct sglist* src_sgl,
+				uint64_t seg_bits);
 
 static device_method_t storvsc_methods[] = {
 	/* Device interface */
@@ -207,7 +249,7 @@ MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
 
 
 /**
- * The host is capable of sending messages to us that are 
+ * The host is capable of sending messages to us that are
  * completely unsolicited. So, we need to address the race
  * condition where we may be in the process of unloading the
  * driver when the host may send us an unsolicited message.
@@ -223,7 +265,7 @@ MODULE_DEPEND(storvsc, vmbus, 1, 1, 1);
  *    destroyed.
  *
  * 3. Once the device is marked as being destroyed, we only
- *    permit incoming traffic to properly account for 
+ *    permit incoming traffic to properly account for
  *    packets already sent out.
  */
 static inline struct storvsc_softc *
@@ -260,6 +302,113 @@ get_stor_device(struct hv_device *device,
 }
 
 /**
+ * @brief Callback handler, will be invoked when receive mutil-channel offer
+ *
+ * @param context  new multi-channel
+ */
+static void
+storvsc_handle_sc_creation(void *context)
+{
+	hv_vmbus_channel *new_channel;
+	struct hv_device *device;
+	struct storvsc_softc *sc;
+	struct vmstor_chan_props props;
+	int ret = 0;
+
+	new_channel = (hv_vmbus_channel *)context;
+	device = new_channel->primary_channel->device;
+	sc = get_stor_device(device, TRUE);
+	if (sc == NULL)
+		return;
+
+	if (FALSE == sc->hs_open_multi_channel)
+		return;
+	
+	memset(&props, 0, sizeof(props));
+
+	ret = hv_vmbus_channel_open(new_channel,
+	    sc->hs_drv_props->drv_ringbuffer_size,
+  	    sc->hs_drv_props->drv_ringbuffer_size,
+	    (void *)&props,
+	    sizeof(struct vmstor_chan_props),
+	    hv_storvsc_on_channel_callback,
+	    new_channel);
+
+	return;
+}
+
+/**
+ * @brief Send multi-channel creation request to host
+ *
+ * @param device  a Hyper-V device pointer
+ * @param max_chans  the max channels supported by vmbus
+ */
+static void
+storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
+{
+	struct storvsc_softc *sc;
+	struct hv_storvsc_request *request;
+	struct vstor_packet *vstor_packet;	
+	int request_channels_cnt = 0;
+	int ret;
+
+	/* get multichannels count that need to create */
+	request_channels_cnt = MIN(max_chans, mp_ncpus);
+
+	sc = get_stor_device(dev, TRUE);
+	if (sc == NULL) {
+		printf("Storvsc_error: get sc failed while send mutilchannel "
+		    "request\n");
+		return;
+	}
+
+	request = &sc->hs_init_req;
+
+	/* Establish a handler for multi-channel */
+	dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
+
+	/* request the host to create multi-channel */
+	memset(request, 0, sizeof(struct hv_storvsc_request));
+	
+	sema_init(&request->synch_sema, 0, ("stor_synch_sema"));
+
+	vstor_packet = &request->vstor_packet;
+	
+	vstor_packet->operation = VSTOR_OPERATION_CREATE_MULTI_CHANNELS;
+	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
+	vstor_packet->u.multi_channels_cnt = request_channels_cnt;
+
+	ret = hv_vmbus_channel_send_packet(
+	    dev->channel,
+	    vstor_packet,
+	    sizeof(struct vstor_packet),
+	    (uint64_t)(uintptr_t)request,
+	    HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+	    HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+
+	/* wait for 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
+	if (ret != 0) {		
+		printf("Storvsc_error: create multi-channel timeout, %d\n",
+		    ret);
+		return;
+	}
+
+	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
+	    vstor_packet->status != 0) {		
+		printf("Storvsc_error: create multi-channel invalid operation "
+		    "(%d) or statue (%u)\n",
+		    vstor_packet->operation, vstor_packet->status);
+		return;
+	}
+
+	sc->hs_open_multi_channel = TRUE;
+
+	if (bootverbose)
+		printf("Storvsc create multi-channel success!\n");
+}
+
+/**
  * @brief initialize channel connection to parent partition
  *
  * @param dev  a Hyper-V device pointer
@@ -272,11 +421,15 @@ hv_storvsc_channel_init(struct hv_device *dev)
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 	struct storvsc_softc *sc;
+	uint16_t max_chans = 0;
+	boolean_t support_multichannel = FALSE;
+
+	max_chans = 0;
+	support_multichannel = FALSE;
 
 	sc = get_stor_device(dev, TRUE);
-	if (sc == NULL) {
-		return ENODEV;
-	}
+	if (sc == NULL)
+		return (ENODEV);
 
 	request = &sc->hs_init_req;
 	memset(request, 0, sizeof(struct hv_storvsc_request));
@@ -300,15 +453,13 @@ hv_storvsc_channel_init(struct hv_device *dev)
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
-
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
 
-	if (ret != 0) {
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
 		vstor_packet->status != 0) {
@@ -321,7 +472,8 @@ hv_storvsc_channel_init(struct hv_device *dev)
 	vstor_packet->operation = VSTOR_OPERATION_QUERYPROTOCOLVERSION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
 
-	vstor_packet->u.version.major_minor = VMSTOR_PROTOCOL_VERSION_CURRENT;
+	vstor_packet->u.version.major_minor =
+	    VMSTOR_PROTOCOL_VERSION(storvsc_current_major, storvsc_current_minor);
 
 	/* revision is only significant for Windows guests */
 	vstor_packet->u.version.revision = 0;
@@ -334,21 +486,19 @@ hv_storvsc_channel_init(struct hv_device *dev)
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 			HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret) {
+	if (ret)
 		goto cleanup;
-	}
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+		vstor_packet->status != 0)
 		goto cleanup;
-	}
 
 	/**
 	 * Query channel properties
@@ -365,22 +515,30 @@ hv_storvsc_channel_init(struct hv_device *dev)
 				HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
 				HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if ( ret != 0) {
+	if ( ret != 0)
 		goto cleanup;
-	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	/* TODO: Check returned version */
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+	    vstor_packet->status != 0) {
 		goto cleanup;
 	}
 
+	/* multi-channels feature is supported by WIN8 and above version */
+	max_chans = vstor_packet->u.chan_props.max_channel_cnt;
+	if ((hv_vmbus_protocal_version != HV_VMBUS_VERSION_WIN7) &&
+	    (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) &&
+	    (vstor_packet->u.chan_props.flags &
+	     HV_STORAGE_SUPPORTS_MULTI_CHANNEL)) {
+		support_multichannel = TRUE;
+	}
+
 	memset(vstor_packet, 0, sizeof(struct vstor_packet));
 	vstor_packet->operation = VSTOR_OPERATION_ENDINITIALIZATION;
 	vstor_packet->flags = REQUEST_COMPLETION_FLAG;
@@ -397,16 +555,22 @@ hv_storvsc_channel_init(struct hv_device *dev)
 		goto cleanup;
 	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	/* wait 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz);
 
-	if (ret != 0) {
+	if (ret != 0)
 		goto cleanup;
-	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETEIO ||
-		vstor_packet->status != 0) {
+	    vstor_packet->status != 0)
 		goto cleanup;
-	}
+
+	/*
+	 * If multi-channel is supported, send multichannel create
+	 * request to host.
+	 */
+	if (support_multichannel)
+		storvsc_send_multichannel_request(dev, max_chans);
 
 cleanup:
 	sema_destroy(&request->synch_sema);
@@ -443,8 +607,7 @@ hv_storvsc_connect_vsp(struct hv_device *dev)
 		(void *)&props,
 		sizeof(struct vmstor_chan_props),
 		hv_storvsc_on_channel_callback,
-		dev);
-
+		dev->channel);
 
 	if (ret != 0) {
 		return ret;
@@ -490,7 +653,7 @@ hv_storvsc_host_reset(struct hv_device *dev)
 		goto cleanup;
 	}
 
-	ret = sema_timedwait(&request->synch_sema, 500); /* KYS 5 seconds */
+	ret = sema_timedwait(&request->synch_sema, 5 * hz); /* KYS 5 seconds */
 
 	if (ret) {
 		goto cleanup;
@@ -498,7 +661,7 @@ hv_storvsc_host_reset(struct hv_device *dev)
 
 
 	/*
-	 * At this point, all outstanding requests in the adapter 
+	 * At this point, all outstanding requests in the adapter
 	 * should have been flushed out and return to us
 	 */
 
@@ -521,6 +684,7 @@ hv_storvsc_io_request(struct hv_device *device,
 {
 	struct storvsc_softc *sc;
 	struct vstor_packet *vstor_packet = &request->vstor_packet;
+	struct hv_vmbus_channel* outgoing_channel = NULL;
 	int ret = 0;
 
 	sc = get_stor_device(device, TRUE);
@@ -539,19 +703,20 @@ hv_storvsc_io_request(struct hv_device *device,
 
 	vstor_packet->operation = VSTOR_OPERATION_EXECUTESRB;
 
+	outgoing_channel = vmbus_select_outgoing_channel(device->channel);
 
 	mtx_unlock(&request->softc->hs_lock);
 	if (request->data_buf.length) {
 		ret = hv_vmbus_channel_send_packet_multipagebuffer(
-				device->channel,
+				outgoing_channel,
 				&request->data_buf,
-				vstor_packet, 
-				sizeof(struct vstor_packet), 
+				vstor_packet,
+				sizeof(struct vstor_packet),
 				(uint64_t)(uintptr_t)request);
 
 	} else {
 		ret = hv_vmbus_channel_send_packet(
-			device->channel,
+			outgoing_channel,
 			vstor_packet,
 			sizeof(struct vstor_packet),
 			(uint64_t)(uintptr_t)request,
@@ -610,7 +775,8 @@ static void
 hv_storvsc_on_channel_callback(void *context)
 {
 	int ret = 0;
-	struct hv_device *device = (struct hv_device *)context;
+	hv_vmbus_channel *channel = (hv_vmbus_channel *)context;
+	struct hv_device *device = NULL;
 	struct storvsc_softc *sc;
 	uint32_t bytes_recvd;
 	uint64_t request_id;
@@ -618,15 +784,22 @@ hv_storvsc_on_channel_callback(void *context)
 	struct hv_storvsc_request *request;
 	struct vstor_packet *vstor_packet;
 
+	if (channel->primary_channel != NULL){
+		device = channel->primary_channel->device;
+	} else {
+		device = channel->device;
+	}
+
+	KASSERT(device, ("device is NULL"));
+
 	sc = get_stor_device(device, FALSE);
 	if (sc == NULL) {
+		printf("Storvsc_error: get stor device failed.\n");
 		return;
 	}
 
-	KASSERT(device, ("device"));
-
 	ret = hv_vmbus_channel_recv_packet(
-			device->channel,
+			channel,
 			packet,
 			roundup2(sizeof(struct vstor_packet), 8),
 			&bytes_recvd,
@@ -634,21 +807,28 @@ hv_storvsc_on_channel_callback(void *context)
 
 	while ((ret == 0) && (bytes_recvd > 0)) {
 		request = (struct hv_storvsc_request *)(uintptr_t)request_id;
-		KASSERT(request, ("request"));
 
 		if ((request == &sc->hs_init_req) ||
 			(request == &sc->hs_reset_req)) {
 			memcpy(&request->vstor_packet, packet,
 				   sizeof(struct vstor_packet));
-			sema_post(&request->synch_sema); 
+			sema_post(&request->synch_sema);
 		} else {
 			vstor_packet = (struct vstor_packet *)packet;
 			switch(vstor_packet->operation) {
 			case VSTOR_OPERATION_COMPLETEIO:
+				if (request == NULL)
+					panic("VMBUS: storvsc received a "
+					    "packet with NULL request id in "
+					    "COMPLETEIO operation.");
+
 				hv_storvsc_on_iocompletion(sc,
 							vstor_packet, request);
 				break;
 			case VSTOR_OPERATION_REMOVEDEVICE:
+			case VSTOR_OPERATION_ENUMERATE_BUS:
+				printf("VMBUS: storvsc operation %d not "
+				    "implemented.\n", vstor_packet->operation);
 				/* TODO: implement */
 				break;
 			default:
@@ -656,7 +836,7 @@ hv_storvsc_on_channel_callback(void *context)
 			}			
 		}
 		ret = hv_vmbus_channel_recv_packet(
-				device->channel,
+				channel,
 				packet,
 				roundup2(sizeof(struct vstor_packet), 8),
 				&bytes_recvd,
@@ -680,7 +860,16 @@ storvsc_probe(device_t dev)
 {
 	int ata_disk_enable = 0;
 	int ret	= ENXIO;
-
+	
+	if ((HV_VMBUS_VERSION_WIN8 == hv_vmbus_protocal_version) ||
+	    (HV_VMBUS_VERSION_WIN8_1 == hv_vmbus_protocal_version)){
+		storvsc_current_major = STORVSC_WIN8_MAJOR;
+		storvsc_current_minor = STORVSC_WIN8_MINOR;
+	} else {
+		storvsc_current_major = STORVSC_WIN7_MAJOR;
+		storvsc_current_minor = STORVSC_WIN7_MINOR;
+	}
+	
 	switch (storvsc_get_storage_type(dev)) {
 	case DRIVER_BLKVSC:
 		if(bootverbose)
@@ -721,9 +910,11 @@ storvsc_attach(device_t dev)
 	enum hv_storage_type stor_type;
 	struct storvsc_softc *sc;
 	struct cam_devq *devq;
-	int ret, i;
+	int ret, i, j;
 	struct hv_storvsc_request *reqp;
 	struct root_hold_token *root_mount_token = NULL;
+	struct hv_sgl_node *sgl_node = NULL;
+	void *tmp_buff = NULL;
 
 	/*
 	 * We need to serialize storvsc attach calls.
@@ -764,8 +955,41 @@ storvsc_attach(device_t dev)
 		LIST_INSERT_HEAD(&sc->hs_free_list, reqp, link);
 	}
 
+	/* create sg-list page pool */
+	if (FALSE == g_hv_sgl_page_pool.is_init) {
+		g_hv_sgl_page_pool.is_init = TRUE;
+		LIST_INIT(&g_hv_sgl_page_pool.in_use_sgl_list);
+		LIST_INIT(&g_hv_sgl_page_pool.free_sgl_list);
+
+		/*
+		 * Pre-create SG list, each SG list with
+		 * HV_MAX_MULTIPAGE_BUFFER_COUNT segments, each
+		 * segment has one page buffer
+		 */
+		for (i = 0; i < STORVSC_MAX_IO_REQUESTS; i++) {
+	        	sgl_node = malloc(sizeof(struct hv_sgl_node),
+			    M_DEVBUF, M_WAITOK|M_ZERO);
+
+			sgl_node->sgl_data =
+			    sglist_alloc(HV_MAX_MULTIPAGE_BUFFER_COUNT,
+			    M_WAITOK|M_ZERO);
+
+			for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
+				tmp_buff = malloc(PAGE_SIZE,
+				    M_DEVBUF, M_WAITOK|M_ZERO);
+
+				sgl_node->sgl_data->sg_segs[j].ss_paddr =
+				    (vm_paddr_t)tmp_buff;
+			}
+
+			LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list,
+			    sgl_node, link);
+		}
+	}
+
 	sc->hs_destroy = FALSE;
 	sc->hs_drain_notify = FALSE;
+	sc->hs_open_multi_channel = FALSE;
 	sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
 
 	ret = hv_storvsc_connect_vsp(hv_dev);
@@ -834,6 +1058,20 @@ cleanup:
 		LIST_REMOVE(reqp, link);
 		free(reqp, M_DEVBUF);
 	}
+
+	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+		LIST_REMOVE(sgl_node, link);
+		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++) {
+			if (NULL !=
+			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+			}
+		}
+		sglist_free(sgl_node->sgl_data);
+		free(sgl_node, M_DEVBUF);
+	}
+
 	return (ret);
 }
 
@@ -853,6 +1091,8 @@ storvsc_detach(device_t dev)
 	struct storvsc_softc *sc = device_get_softc(dev);
 	struct hv_storvsc_request *reqp = NULL;
 	struct hv_device *hv_device = vmbus_get_devctx(dev);
+	struct hv_sgl_node *sgl_node = NULL;
+	int j = 0;
 
 	mtx_lock(&hv_device->channel->inbound_lock);
 	sc->hs_destroy = TRUE;
@@ -884,6 +1124,20 @@ storvsc_detach(device_t dev)
 		free(reqp, M_DEVBUF);
 	}
 	mtx_unlock(&sc->hs_lock);
+
+	while (!LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
+		sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+		LIST_REMOVE(sgl_node, link);
+		for (j = 0; j < HV_MAX_MULTIPAGE_BUFFER_COUNT; j++){
+			if (NULL !=
+			    (void*)sgl_node->sgl_data->sg_segs[j].ss_paddr) {
+				free((void*)sgl_node->sgl_data->sg_segs[j].ss_paddr, M_DEVBUF);
+			}
+		}
+		sglist_free(sgl_node->sgl_data);
+		free(sgl_node, M_DEVBUF);
+	}
+	
 	return (0);
 }
 
@@ -939,7 +1193,7 @@ storvsc_timeout_test(struct hv_storvsc_request *reqp,
 				ticks, __func__, (ret == 0)?
 				"IO return detected" :
 				"IO return not detected");
-		/* 
+		/*
 		 * Now both the timer handler and io done are running
 		 * simultaneously. We want to confirm the io done always
 		 * finishes after the timer handler exits. So reqp used by
@@ -1023,7 +1277,7 @@ storvsc_poll(struct cam_sim *sim)
 
 	mtx_assert(&sc->hs_lock, MA_OWNED);
 	mtx_unlock(&sc->hs_lock);
-	hv_storvsc_on_channel_callback(sc->hs_dev);
+	hv_storvsc_on_channel_callback(sc->hs_dev->channel);
 	mtx_lock(&sc->hs_lock);
 }
 
@@ -1151,9 +1405,13 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
 
 		bzero(reqp, sizeof(struct hv_storvsc_request));
 		reqp->softc = sc;
-
-		ccb->ccb_h.status |= CAM_SIM_QUEUED;	    
-		create_storvsc_request(ccb, reqp);
+		
+		ccb->ccb_h.status |= CAM_SIM_QUEUED;
+		if ((res = create_storvsc_request(ccb, reqp)) != 0) {
+			ccb->ccb_h.status = CAM_REQ_INVALID;
+			xpt_done(ccb);
+			return;
+		}
 
 		if (ccb->ccb_h.timeout != CAM_TIME_INFINITY) {
 			callout_init(&reqp->callout, CALLOUT_MPSAFE);
@@ -1194,6 +1452,212 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
 }
 
 /**
+ * @brief destroy bounce buffer
+ *
+ * This function is responsible for destroy a Scatter/Gather list
+ * that create by storvsc_create_bounce_buffer()
+ *
+ * @param sgl- the Scatter/Gather need be destroy
+ * @param sg_count- page count of the SG list.
+ *
+ */
+static void
+storvsc_destroy_bounce_buffer(struct sglist *sgl)
+{
+	struct hv_sgl_node *sgl_node = NULL;
+
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
+	LIST_REMOVE(sgl_node, link);
+	if (NULL == sgl_node) {
+		printf("storvsc error: not enough in use sgl\n");
+		return;
+	}
+	sgl_node->sgl_data = sgl;
+	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
+}
+
+/**
+ * @brief create bounce buffer
+ *
+ * This function is responsible for create a Scatter/Gather list,
+ * which hold several pages that can be aligned with page size.
+ *
+ * @param seg_count- SG-list segments count
+ * @param write - if WRITE_TYPE, set SG list page used size to 0,
+ * otherwise set used size to page size.
+ *
+ * return NULL if create failed
+ */
+static struct sglist *
+storvsc_create_bounce_buffer(uint16_t seg_count, int write)
+{
+	int i = 0;
+	struct sglist *bounce_sgl = NULL;
+	unsigned int buf_len = ((write == WRITE_TYPE) ? 0 : PAGE_SIZE);
+	struct hv_sgl_node *sgl_node = NULL;	
+
+	/* get struct sglist from free_sgl_list */
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+	LIST_REMOVE(sgl_node, link);
+	if (NULL == sgl_node) {
+		printf("storvsc error: not enough free sgl\n");
+		return NULL;
+	}
+	bounce_sgl = sgl_node->sgl_data;
+	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
+
+	bounce_sgl->sg_maxseg = seg_count;
+
+	if (write == WRITE_TYPE)
+		bounce_sgl->sg_nseg = 0;
+	else
+		bounce_sgl->sg_nseg = seg_count;
+
+	for (i = 0; i < seg_count; i++)
+	        bounce_sgl->sg_segs[i].ss_len = buf_len;
+
+	return bounce_sgl;
+}
+
+/**
+ * @brief copy data from SG list to bounce buffer
+ *
+ * This function is responsible for copy data from one SG list's segments
+ * to another SG list which used as bounce buffer.
+ *
+ * @param bounce_sgl - the destination SG list
+ * @param orig_sgl - the segment of the source SG list.
+ * @param orig_sgl_count - the count of segments.
+ * @param orig_sgl_count - indicate which segment need bounce buffer,
+ *  set 1 means need.
+ *
+ */
+static void
+storvsc_copy_sgl_to_bounce_buf(struct sglist *bounce_sgl,
+			       bus_dma_segment_t *orig_sgl,
+			       unsigned int orig_sgl_count,
+			       uint64_t seg_bits)
+{
+	int src_sgl_idx = 0;
+
+	for (src_sgl_idx = 0; src_sgl_idx < orig_sgl_count; src_sgl_idx++) {
+		if (seg_bits & (1 << src_sgl_idx)) {
+			memcpy((void*)bounce_sgl->sg_segs[src_sgl_idx].ss_paddr,
+			    (void*)orig_sgl[src_sgl_idx].ds_addr,
+			    orig_sgl[src_sgl_idx].ds_len);
+
+			bounce_sgl->sg_segs[src_sgl_idx].ss_len =
+			    orig_sgl[src_sgl_idx].ds_len;
+		}
+	}
+}
+
+/**
+ * @brief copy data from SG list which used as bounce to another SG list
+ *
+ * This function is responsible for copy data from one SG list with bounce
+ * buffer to another SG list's segments.
+ *
+ * @param dest_sgl - the destination SG list's segments
+ * @param dest_sgl_count - the count of destination SG list's segment.
+ * @param src_sgl - the source SG list.
+ * @param seg_bits - indicate which segment used bounce buffer of src SG-list.
+ *
+ */
+void
+storvsc_copy_from_bounce_buf_to_sgl(bus_dma_segment_t *dest_sgl,
+				    unsigned int dest_sgl_count,
+				    struct sglist* src_sgl,
+				    uint64_t seg_bits)
+{
+	int sgl_idx = 0;
+	
+	for (sgl_idx = 0; sgl_idx < dest_sgl_count; sgl_idx++) {
+		if (seg_bits & (1 << sgl_idx)) {
+			memcpy((void*)(dest_sgl[sgl_idx].ds_addr),
+			    (void*)(src_sgl->sg_segs[sgl_idx].ss_paddr),
+			    src_sgl->sg_segs[sgl_idx].ss_len);
+		}
+	}
+}
+
+/**
+ * @brief check SG list with bounce buffer or not
+ *
+ * This function is responsible for check if need bounce buffer for SG list.
+ *
+ * @param sgl - the SG list's segments
+ * @param sg_count - the count of SG list's segment.
+ * @param bits - segmengs number that need bounce buffer
+ *
+ * return -1 if SG list needless bounce buffer
+ */
+static int
+storvsc_check_bounce_buffer_sgl(bus_dma_segment_t *sgl,
+				unsigned int sg_count,
+				uint64_t *bits)
+{
+	int i = 0;
+	int offset = 0;
+	uint64_t phys_addr = 0;
+	uint64_t tmp_bits = 0;
+	boolean_t found_hole = FALSE;
+	boolean_t pre_aligned = TRUE;
+
+	if (sg_count < 2){
+		return -1;
+	}
+
+	*bits = 0;
+	
+	phys_addr = vtophys(sgl[0].ds_addr);
+	offset =  phys_addr - trunc_page(phys_addr);
+
+	if (offset != 0) {
+		pre_aligned = FALSE;
+		tmp_bits |= 1;
+	}
+
+	for (i = 1; i < sg_count; i++) {
+		phys_addr = vtophys(sgl[i].ds_addr);
+		offset =  phys_addr - trunc_page(phys_addr);
+
+		if (offset == 0) {
+			if (FALSE == pre_aligned){
+				/*
+				 * This segment is aligned, if the previous
+				 * one is not aligned, find a hole
+				 */
+				found_hole = TRUE;
+			}
+			pre_aligned = TRUE;
+		} else {
+			tmp_bits |= 1 << i;
+			if (!pre_aligned) {
+				if (phys_addr != vtophys(sgl[i-1].ds_addr +
+				    sgl[i-1].ds_len)) {
+					/*
+					 * Check whether connect to previous
+					 * segment,if not, find the hole
+					 */
+					found_hole = TRUE;
+				}
+			} else {
+				found_hole = TRUE;
+			}
+			pre_aligned = FALSE;
+		}
+	}
+
+	if (!found_hole) {
+		return (-1);
+	} else {
+		*bits = tmp_bits;
+		return 0;
+	}
+}
+
+/**
  * @brief Fill in a request structure based on a CAM control block
  *
  * Fills in a request structure based on the contents of a CAM control
@@ -1203,7 +1667,7 @@ storvsc_action(struct cam_sim *sim, union ccb *ccb)
  * @param ccb pointer to a CAM contorl block
  * @param reqp pointer to a request structure
  */
-static void
+static int
 create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 {
 	struct ccb_scsiio *csio = &ccb->csio;
@@ -1211,6 +1675,7 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 	uint32_t bytes_to_copy = 0;
 	uint32_t pfn_num = 0;
 	uint32_t pfn;
+	uint64_t not_aligned_seg_bits = 0;
 	
 	/* refer to struct vmscsi_req for meanings of these two fields */
 	reqp->vstor_packet.u.vm_srb.port =
@@ -1231,48 +1696,172 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 	}
 
 	switch (ccb->ccb_h.flags & CAM_DIR_MASK) {
-    	case CAM_DIR_OUT: 
-    		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;
-    		break;
-    	case CAM_DIR_IN:
-    		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
-    		break;
-    	case CAM_DIR_NONE:
-    		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
-    		break;
-    	default:
-    		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
-    		break;
+	case CAM_DIR_OUT:
+		reqp->vstor_packet.u.vm_srb.data_in = WRITE_TYPE;	
+		break;
+	case CAM_DIR_IN:
+		reqp->vstor_packet.u.vm_srb.data_in = READ_TYPE;
+		break;
+	case CAM_DIR_NONE:
+		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+		break;
+	default:
+		reqp->vstor_packet.u.vm_srb.data_in = UNKNOWN_TYPE;
+		break;
 	}
 
 	reqp->sense_data     = &csio->sense_data;
 	reqp->sense_info_len = csio->sense_len;
 
 	reqp->ccb = ccb;
-	/*
-	KASSERT((ccb->ccb_h.flags & CAM_SCATTER_VALID) == 0,
-			("ccb is scatter gather valid\n"));
-	*/
-	if (csio->dxfer_len != 0) {
-		reqp->data_buf.length = csio->dxfer_len;
+
+	if (0 == csio->dxfer_len) {
+		return (0);
+	}
+
+	reqp->data_buf.length = csio->dxfer_len;
+
+	switch (ccb->ccb_h.flags & CAM_DATA_MASK) {
+	case CAM_DATA_VADDR:
+	{
 		bytes_to_copy = csio->dxfer_len;
 		phys_addr = vtophys(csio->data_ptr);
-		reqp->data_buf.offset = phys_addr - trunc_page(phys_addr);
+		reqp->data_buf.offset = phys_addr & PAGE_MASK;
+		
+		while (bytes_to_copy != 0) {
+			int bytes, page_offset;
+			phys_addr =
+			    vtophys(&csio->data_ptr[reqp->data_buf.length -
+			    bytes_to_copy]);
+			pfn = phys_addr >> PAGE_SHIFT;
+			reqp->data_buf.pfn_array[pfn_num] = pfn;
+			page_offset = phys_addr & PAGE_MASK;
+
+			bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+
+			bytes_to_copy -= bytes;
+			pfn_num++;
+		}
+		break;
 	}
 
-	while (bytes_to_copy != 0) {
-		int bytes, page_offset;
-		phys_addr = vtophys(&csio->data_ptr[reqp->data_buf.length -
-		                                    bytes_to_copy]);
-		pfn = phys_addr >> PAGE_SHIFT;
-		reqp->data_buf.pfn_array[pfn_num] = pfn;
-		page_offset = phys_addr - trunc_page(phys_addr);
+	case CAM_DATA_SG:
+	{
+		int i = 0;
+		int offset = 0;
+		int ret;
+
+		bus_dma_segment_t *storvsc_sglist =
+		    (bus_dma_segment_t *)ccb->csio.data_ptr;
+		u_int16_t storvsc_sg_count = ccb->csio.sglist_cnt;
+
+		printf("Storvsc: get SG I/O operation, %d\n",
+		    reqp->vstor_packet.u.vm_srb.data_in);
+
+		if (storvsc_sg_count > HV_MAX_MULTIPAGE_BUFFER_COUNT){
+			printf("Storvsc: %d segments is too much, "
+			    "only support %d segments\n",
+			    storvsc_sg_count, HV_MAX_MULTIPAGE_BUFFER_COUNT);
+			return (EINVAL);
+		}
+
+		/*
+		 * We create our own bounce buffer function currently. Idealy
+		 * we should use BUS_DMA(9) framework. But with current BUS_DMA
+		 * code there is no callback API to check the page alignment of
+		 * middle segments before busdma can decide if a bounce buffer
+		 * is needed for particular segment. There is callback,
+		 * "bus_dma_filter_t *filter", but the parrameters are not
+		 * sufficient for storvsc driver.
+		 * TODO:
+		 *	Add page alignment check in BUS_DMA(9) callback. Once
+		 *	this is complete, switch the following code to use
+		 *	BUS_DMA(9) for storvsc bounce buffer support.
+		 */
+		/* check if we need to create bounce buffer */
+		ret = storvsc_check_bounce_buffer_sgl(storvsc_sglist,
+		    storvsc_sg_count, &not_aligned_seg_bits);
+		if (ret != -1) {
+			reqp->bounce_sgl =
+			    storvsc_create_bounce_buffer(storvsc_sg_count,
+			    reqp->vstor_packet.u.vm_srb.data_in);
+			if (NULL == reqp->bounce_sgl) {
+				printf("Storvsc_error: "
+				    "create bounce buffer failed.\n");
+				return (ENOMEM);
+			}
+
+			reqp->bounce_sgl_count = storvsc_sg_count;
+			reqp->not_aligned_seg_bits = not_aligned_seg_bits;
+
+			/*
+			 * if it is write, we need copy the original data
+			 *to bounce buffer
+			 */
+			if (WRITE_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+				storvsc_copy_sgl_to_bounce_buf(
+				    reqp->bounce_sgl,
+				    storvsc_sglist,
+				    storvsc_sg_count,
+				    reqp->not_aligned_seg_bits);
+			}
+
+			/* transfer virtual address to physical frame number */
+			if (reqp->not_aligned_seg_bits & 0x1){
+ 				phys_addr =
+				    vtophys(reqp->bounce_sgl->sg_segs[0].ss_paddr);
+			}else{
+ 				phys_addr =
+					vtophys(storvsc_sglist[0].ds_addr);
+			}
+			reqp->data_buf.offset = phys_addr & PAGE_MASK;
+
+			pfn = phys_addr >> PAGE_SHIFT;
+			reqp->data_buf.pfn_array[0] = pfn;
+			
+			for (i = 1; i < storvsc_sg_count; i++) {
+				if (reqp->not_aligned_seg_bits & (1 << i)) {
+					phys_addr =
+					    vtophys(reqp->bounce_sgl->sg_segs[i].ss_paddr);
+				} else {
+					phys_addr =
+					    vtophys(storvsc_sglist[i].ds_addr);
+				}
+
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
+		} else {
+			phys_addr = vtophys(storvsc_sglist[0].ds_addr);
+
+			reqp->data_buf.offset = phys_addr & PAGE_MASK;
 
-		bytes = min(PAGE_SIZE - page_offset, bytes_to_copy);
+			for (i = 0; i < storvsc_sg_count; i++) {
+				phys_addr = vtophys(storvsc_sglist[i].ds_addr);
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
 
-		bytes_to_copy -= bytes;
-		pfn_num++;
+			/* check the last segment cross boundary or not */
+			offset = phys_addr & PAGE_MASK;
+			if (offset) {
+				phys_addr =
+				    vtophys(storvsc_sglist[i-1].ds_addr +
+				    PAGE_SIZE - offset);
+				pfn = phys_addr >> PAGE_SHIFT;
+				reqp->data_buf.pfn_array[i] = pfn;
+			}
+			
+			reqp->bounce_sgl_count = 0;
+		}
+		break;
+	}
+	default:
+		printf("Unknow flags: %d\n", ccb->ccb_h.flags);
+		return(EINVAL);
 	}
+
+	return(0);
 }
 
 /**
@@ -1291,7 +1880,29 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
 	struct ccb_scsiio *csio = &ccb->csio;
 	struct storvsc_softc *sc = reqp->softc;
 	struct vmscsi_req *vm_srb = &reqp->vstor_packet.u.vm_srb;
-	
+	bus_dma_segment_t *ori_sglist = NULL;
+	int ori_sg_count = 0;
+
+	/* destroy bounce buffer if it is used */
+	if (reqp->bounce_sgl_count) {
+		ori_sglist = (bus_dma_segment_t *)ccb->csio.data_ptr;
+		ori_sg_count = ccb->csio.sglist_cnt;
+
+		/*
+		 * If it is READ operation, we should copy back the data
+		 * to original SG list.
+		 */
+		if (READ_TYPE == reqp->vstor_packet.u.vm_srb.data_in) {
+			storvsc_copy_from_bounce_buf_to_sgl(ori_sglist,
+			    ori_sg_count,
+			    reqp->bounce_sgl,
+			    reqp->not_aligned_seg_bits);
+		}
+
+		storvsc_destroy_bounce_buffer(reqp->bounce_sgl);
+		reqp->bounce_sgl_count = 0;
+	}
+		
 	if (reqp->retries > 0) {
 		mtx_lock(&sc->hs_lock);
 #if HVS_TIMEOUT_TEST
@@ -1309,7 +1920,7 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
 		mtx_unlock(&sc->hs_lock);
 	}
 
-	/* 
+	/*
 	 * callout_drain() will wait for the timer handler to finish
 	 * if it is running. So we don't need any lock to synchronize
 	 * between this routine and the timer handler.
diff --git a/sys/dev/hyperv/storvsc/hv_vstorage.h b/sys/dev/hyperv/storvsc/hv_vstorage.h
index 2632676..deb9183 100644
--- a/sys/dev/hyperv/storvsc/hv_vstorage.h
+++ b/sys/dev/hyperv/storvsc/hv_vstorage.h
@@ -53,7 +53,7 @@
  * V1 RC > 2008/1/31          2.0
  */
 
-#define VMSTOR_PROTOCOL_VERSION_CURRENT	VMSTOR_PROTOCOL_VERSION(2, 0)
+#define VMSTOR_PROTOCOL_VERSION_CURRENT	VMSTOR_PROTOCOL_VERSION(5, 1)
 
 /**
  *  Packet structure ops describing virtual storage requests.
@@ -69,7 +69,10 @@ enum vstor_packet_ops {
 	VSTOR_OPERATION_ENDINITIALIZATION     = 8,
 	VSTOR_OPERATION_QUERYPROTOCOLVERSION  = 9,
 	VSTOR_OPERATION_QUERYPROPERTIES       = 10,
-	VSTOR_OPERATION_MAXIMUM               = 10
+	VSTOR_OPERATION_ENUMERATE_BUS         = 11,
+	VSTOR_OPERATION_FCHBA_DATA            = 12,
+	VSTOR_OPERATION_CREATE_MULTI_CHANNELS = 13,
+	VSTOR_OPERATION_MAXIMUM               = 13
 };
 
 
@@ -123,10 +126,12 @@ struct vmstor_chan_props {
 	uint8_t  path_id;
 	uint8_t  target_id;
 
+	uint16_t max_channel_cnt;
+
 	/**
 	 * Note: port number is only really known on the client side
 	 */
-	uint32_t port;
+	uint16_t port;
 	uint32_t flags;
 	uint32_t max_transfer_bytes;
 
@@ -193,6 +198,11 @@ struct vstor_packet {
 	     * Used during version negotiations.
 	     */
 	    struct vmstor_proto_ver version;
+
+	    /**
+             * Number of multichannels to create
+	     */
+	    uint16_t multi_channels_cnt;
 	} u;
 
 } __packed;
diff --git a/sys/dev/hyperv/utilities/hv_kvp.c b/sys/dev/hyperv/utilities/hv_kvp.c
index 848d364..4598510 100644
--- a/sys/dev/hyperv/utilities/hv_kvp.c
+++ b/sys/dev/hyperv/utilities/hv_kvp.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/_null.h>
 #include <sys/signal.h>
 #include <sys/syslog.h>
+#include <sys/systm.h>
 #include <sys/mutex.h>
 #include <net/if_arp.h>
 
@@ -232,7 +233,7 @@ hv_kvp_negotiate_version(struct hv_vmbus_icmsg_hdr *icmsghdrp,
 	 */
 	if ((icframe_vercnt >= 2) && (negop->icversion_data[1].major == 3)) {
 		icframe_vercnt = 3;
-		if (icmsg_vercnt >= 2)
+		if (icmsg_vercnt > 2)
 			icmsg_vercnt = 4;
 		else
 			icmsg_vercnt = 3;
@@ -734,8 +735,8 @@ hv_kvp_process_request(void *context)
 		recvlen = 0;
 		ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
 			&recvlen, &requestid);
-		hv_kvp_log_info("%s: read: context %p, pending_cnt %ju ret =%d, recvlen=%d\n",
-			__func__, context, pending_cnt, ret, recvlen);
+		hv_kvp_log_info("%s: read: context %p, pending_cnt %llu ret =%d, recvlen=%d\n",
+			__func__, context, (unsigned long long)pending_cnt, ret, recvlen);
 	} 
 }
 
@@ -813,9 +814,9 @@ static void
 hv_kvp_dev_destroy(void)
 {
 
-        if (daemon_task != NULL) {
+	if (daemon_task != NULL) {
 		PROC_LOCK(daemon_task);
-        	kern_psignal(daemon_task, SIGKILL);
+		kern_psignal(daemon_task, SIGKILL);
 		PROC_UNLOCK(daemon_task);
 	}
 	
diff --git a/sys/dev/hyperv/utilities/hv_util.c b/sys/dev/hyperv/utilities/hv_util.c
index 3e545cf..dc4b1e2 100644
--- a/sys/dev/hyperv/utilities/hv_util.c
+++ b/sys/dev/hyperv/utilities/hv_util.c
@@ -408,6 +408,15 @@ hv_util_attach(device_t dev)
 	    }
 	}
 
+	/*
+	 * These services are not performance critical and do not need
+	 * batched reading. Furthermore, some services such as KVP can
+	 * only handle one message from the host at a time.
+	 * Turn off batched reading for all util drivers before we open the
+	 * channel.
+	 */
+	hv_set_channel_read_state(hv_dev->channel, FALSE);
+
 	ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE,
 		    4 * PAGE_SIZE, NULL, 0,
 		    service->callback, hv_dev->channel);
diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c
index 103260a..94137fb 100644
--- a/sys/dev/hyperv/vmbus/hv_channel.c
+++ b/sys/dev/hyperv/vmbus/hv_channel.c
@@ -75,7 +75,7 @@ vmbus_channel_set_event(hv_vmbus_channel *channel)
 			(uint32_t *)&monitor_page->
 				trigger_group[channel->monitor_group].u.pending);
 	} else {
-		hv_vmbus_set_event(channel->offer_msg.child_rel_id);
+		hv_vmbus_set_event(channel);
 	}
 
 }
@@ -99,6 +99,18 @@ hv_vmbus_channel_open(
 	hv_vmbus_channel_open_channel*	open_msg;
 	hv_vmbus_channel_msg_info* 	open_info;
 
+	mtx_lock(&new_channel->sc_lock);
+	if (new_channel->state == HV_CHANNEL_OPEN_STATE) {
+	    new_channel->state = HV_CHANNEL_OPENING_STATE;
+	} else {
+	    mtx_unlock(&new_channel->sc_lock);
+	    if(bootverbose)
+		printf("VMBUS: Trying to open channel <%p> which in "
+		    "%d state.\n", new_channel, new_channel->state);
+	    return (EINVAL);
+	}
+	mtx_unlock(&new_channel->sc_lock);
+
 	new_channel->on_channel_callback = pfn_on_channel_callback;
 	new_channel->channel_callback_context = context;
 
@@ -162,7 +174,7 @@ hv_vmbus_channel_open(
 		new_channel->ring_buffer_gpadl_handle;
 	open_msg->downstream_ring_buffer_page_offset = send_ring_buffer_size
 		>> PAGE_SHIFT;
-	open_msg->server_context_area_gpadl_handle = 0;
+	open_msg->target_vcpu = new_channel->target_vcpu;
 
 	if (user_data_len)
 		memcpy(open_msg->user_data, user_data, user_data_len);
@@ -182,10 +194,14 @@ hv_vmbus_channel_open(
 
 	ret = sema_timedwait(&open_info->wait_sema, 500); /* KYS 5 seconds */
 
-	if (ret)
+	if (ret) {
+	    if(bootverbose)
+		printf("VMBUS: channel <%p> open timeout.\n", new_channel);
 	    goto cleanup;
+	}
 
 	if (open_info->response.open_result.status == 0) {
+	    new_channel->state = HV_CHANNEL_OPENED_STATE;
 	    if(bootverbose)
 		printf("VMBUS: channel <%p> open success.\n", new_channel);
 	} else {
@@ -497,16 +513,20 @@ cleanup:
 	return (ret);
 }
 
-/**
- * @brief Close the specified channel
- */
-void
-hv_vmbus_channel_close(hv_vmbus_channel *channel)
+static void
+hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
 {
 	int ret = 0;
 	hv_vmbus_channel_close_channel* msg;
 	hv_vmbus_channel_msg_info* info;
 
+	channel->state = HV_CHANNEL_OPEN_STATE;
+	channel->sc_creation_callback = NULL;
+
+	/*
+	 * Grab the lock to prevent race condition when a packet received
+	 * and unloading driver is in the process.
+	 */
 	mtx_lock(&channel->inbound_lock);
 	channel->on_channel_callback = NULL;
 	mtx_unlock(&channel->inbound_lock);
@@ -545,23 +565,37 @@ hv_vmbus_channel_close(hv_vmbus_channel *channel)
 	    M_DEVBUF);
 
 	free(info, M_DEVBUF);
+}
+
+/**
+ * @brief Close the specified channel
+ */
+void
+hv_vmbus_channel_close(hv_vmbus_channel *channel)
+{
+	hv_vmbus_channel*	sub_channel;
+
+	if (channel->primary_channel != NULL) {
+		/*
+		 * We only close multi-channels when the primary is
+		 * closed.
+		 */
+		return;
+	}
 
 	/*
-	 *  If we are closing the channel during an error path in
-	 *  opening the channel, don't free the channel
-	 *  since the caller will free the channel
+	 * Close all multi-channels first.
 	 */
-	if (channel->state == HV_CHANNEL_OPEN_STATE) {
-		mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
-		TAILQ_REMOVE(
-			&hv_vmbus_g_connection.channel_anchor,
-			channel,
-			list_entry);
-		mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
-
-		hv_vmbus_free_vmbus_channel(channel);
+	TAILQ_FOREACH(sub_channel, &channel->sc_list_anchor,
+	    sc_list_entry) {
+		if (sub_channel->state != HV_CHANNEL_OPENED_STATE)
+			continue;
+		hv_vmbus_channel_close_internal(sub_channel);
 	}
-
+	/*
+	 * Then close the primary channel.
+	 */
+	hv_vmbus_channel_close_internal(channel);
 }
 
 /**
@@ -581,6 +615,7 @@ hv_vmbus_channel_send_packet(
 	uint32_t		packet_len;
 	uint64_t		aligned_data;
 	uint32_t		packet_len_aligned;
+	boolean_t		need_sig;
 	hv_vmbus_sg_buffer_list	buffer_list[3];
 
 	packet_len = sizeof(hv_vm_packet_descriptor) + buffer_len;
@@ -604,12 +639,11 @@ hv_vmbus_channel_send_packet(
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
-	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
-	if (ret == 0
-		&& !hv_vmbus_get_ring_buffer_interrupt_mask(
-			&channel->outbound)) {
+	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
@@ -632,6 +666,7 @@ hv_vmbus_channel_send_packet_pagebuffer(
 
 	int					ret = 0;
 	int					i = 0;
+	boolean_t				need_sig;
 	uint32_t				packet_len;
 	uint32_t				packetLen_aligned;
 	hv_vmbus_sg_buffer_list			buffer_list[3];
@@ -675,11 +710,11 @@ hv_vmbus_channel_send_packet_pagebuffer(
 	buffer_list[2].data = &alignedData;
 	buffer_list[2].length = packetLen_aligned - packet_len;
 
-	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
-	if (ret == 0 &&
-		!hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+	if (ret == 0 && need_sig) {
 		vmbus_channel_set_event(channel);
 	}
 
@@ -700,6 +735,7 @@ hv_vmbus_channel_send_packet_multipagebuffer(
 
 	int			ret = 0;
 	uint32_t		desc_size;
+	boolean_t		need_sig;
 	uint32_t		packet_len;
 	uint32_t		packet_len_aligned;
 	uint32_t		pfn_count;
@@ -750,11 +786,11 @@ hv_vmbus_channel_send_packet_multipagebuffer(
 	buffer_list[2].data = &aligned_data;
 	buffer_list[2].length = packet_len_aligned - packet_len;
 
-	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3);
+	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
-	if (ret == 0 &&
-	    !hv_vmbus_get_ring_buffer_interrupt_mask(&channel->outbound)) {
+	if (ret == 0 && need_sig) {
 	    vmbus_channel_set_event(channel);
 	}
 
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
index 011e305..d13ece5 100644
--- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -26,6 +26,9 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <sys/param.h>
 #include <sys/mbuf.h>
 
@@ -50,6 +53,8 @@ static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_process_offer(void *context);
+struct hv_vmbus_channel*
+    vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
 /**
  * Channel message dispatch table
@@ -233,6 +238,9 @@ hv_vmbus_allocate_channel(void)
 	    return (NULL);
 
 	mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
+	mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
+
+	TAILQ_INIT(&channel->sc_list_anchor);
 
 	channel->control_work_queue = hv_work_queue_create("control");
 
@@ -262,6 +270,7 @@ ReleaseVmbusChannel(void *context)
 void
 hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 {
+	mtx_destroy(&channel->sc_lock);
 	mtx_destroy(&channel->inbound_lock);
 	/*
 	 * We have to release the channel's workqueue/thread in
@@ -279,10 +288,10 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 static void
 vmbus_channel_process_offer(void *context)
 {
-	int			ret;
 	hv_vmbus_channel*	new_channel;
 	boolean_t		f_new;
 	hv_vmbus_channel*	channel;
+	int			ret;
 
 	new_channel = (hv_vmbus_channel*) context;
 	f_new = TRUE;
@@ -291,38 +300,76 @@ vmbus_channel_process_offer(void *context)
 	/*
 	 * Make sure this is a new offer
 	 */
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
 	    list_entry)
 	{
-	    if (!memcmp(
-		&channel->offer_msg.offer.interface_type,
-		&new_channel->offer_msg.offer.interface_type,
-		sizeof(hv_guid))
-		&& !memcmp(
-		    &channel->offer_msg.offer.interface_instance,
+		if (memcmp(&channel->offer_msg.offer.interface_type,
+		    &new_channel->offer_msg.offer.interface_type,
+		    sizeof(hv_guid)) == 0 &&
+		    memcmp(&channel->offer_msg.offer.interface_instance,
 		    &new_channel->offer_msg.offer.interface_instance,
-		    sizeof(hv_guid))) {
-		f_new = FALSE;
-		break;
-	    }
+		    sizeof(hv_guid)) == 0) {
+			f_new = FALSE;
+			break;
+		}
 	}
 
 	if (f_new) {
-	    /* Insert at tail */
-	    TAILQ_INSERT_TAIL(
-		&hv_vmbus_g_connection.channel_anchor,
-		new_channel,
-		list_entry);
+		/* Insert at tail */
+		TAILQ_INSERT_TAIL(
+		    &hv_vmbus_g_connection.channel_anchor,
+		    new_channel,
+		    list_entry);
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+	/*XXX add new channel to percpu_list */
 
 	if (!f_new) {
+		/*
+		 * Check if this is a sub channel.
+		 */
+		if (new_channel->offer_msg.offer.sub_channel_index != 0) {
+			/*
+			 * It is a sub channel offer, process it.
+			 */
+			new_channel->primary_channel = channel;
+			mtx_lock(&channel->sc_lock);
+			TAILQ_INSERT_TAIL(
+			    &channel->sc_list_anchor,
+			    new_channel,
+			    sc_list_entry);
+			mtx_unlock(&channel->sc_lock);
+
+			/* Insert new channel into channel_anchor. */
+			printf("Storvsc get multi-channel offer, rel=%u.\n",
+			    new_channel->offer_msg.child_rel_id);	
+			mtx_lock(&hv_vmbus_g_connection.channel_lock);
+			TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
+			    new_channel, list_entry);				
+			mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+
+			if(bootverbose)
+				printf("VMBUS: new multi-channel offer <%p>.\n",
+				    new_channel);
+
+			/*XXX add it to percpu_list */
+
+			new_channel->state = HV_CHANNEL_OPEN_STATE;
+			if (channel->sc_creation_callback != NULL) {
+				channel->sc_creation_callback(new_channel);
+			}
+			return;
+		}
+
 	    hv_vmbus_free_vmbus_channel(new_channel);
 	    return;
 	}
 
+	new_channel->state = HV_CHANNEL_OPEN_STATE;
+
 	/*
 	 * Start the process of binding this offer to the driver
 	 * (We need to set the device field before calling
@@ -333,35 +380,86 @@ vmbus_channel_process_offer(void *context)
 	    new_channel->offer_msg.offer.interface_instance, new_channel);
 
 	/*
-	 *  TODO - the HV_CHANNEL_OPEN_STATE flag should not be set below
-	 *  but in the "open" channel request. The ret != 0 logic below
-	 *  doesn't take into account that a channel
-	 *  may have been opened successfully
-	 */
-
-	/*
 	 * Add the new device to the bus. This will kick off device-driver
 	 * binding which eventually invokes the device driver's AddDevice()
 	 * method.
 	 */
 	ret = hv_vmbus_child_device_register(new_channel->device);
 	if (ret != 0) {
-	    mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
-	    TAILQ_REMOVE(
-		&hv_vmbus_g_connection.channel_anchor,
-		new_channel,
-		list_entry);
-	    mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
-	    hv_vmbus_free_vmbus_channel(new_channel);
-	} else {
-	    /*
-	     * This state is used to indicate a successful open
-	     * so that when we do close the channel normally,
-	     * we can clean up properly
-	     */
-	    new_channel->state = HV_CHANNEL_OPEN_STATE;
+		mtx_lock(&hv_vmbus_g_connection.channel_lock);
+		TAILQ_REMOVE(
+		    &hv_vmbus_g_connection.channel_anchor,
+		    new_channel,
+		    list_entry);
+		mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+		hv_vmbus_free_vmbus_channel(new_channel);
+	}
+}
+
+/**
+ * Array of device guids that are performance critical. We try to distribute
+ * the interrupt load for these devices across all online cpus. 
+ */
+static const hv_guid high_perf_devices[] = {
+	{HV_NIC_GUID, },
+	{HV_IDE_GUID, },
+	{HV_SCSI_GUID, },
+};
+
+enum {
+	PERF_CHN_NIC = 0,
+	PERF_CHN_IDE,
+	PERF_CHN_SCSI,
+	MAX_PERF_CHN,
+};
+
+/*
+ * We use this static number to distribute the channel interrupt load.
+ */
+static uint32_t next_vcpu;
+
+/**
+ * Starting with Win8, we can statically distribute the incoming
+ * channel interrupt load by binding a channel to VCPU. We
+ * implement here a simple round robin scheme for distributing
+ * the interrupt load.
+ * We will bind channels that are not performance critical to cpu 0 and
+ * performance critical channels (IDE, SCSI and Network) will be uniformly
+ * distributed across all available CPUs.
+ */
+static void
+vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
+{
+	uint32_t current_cpu;
+	int i;
+	boolean_t is_perf_channel = FALSE;
+
+	for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
+		if (memcmp(guid->data, high_perf_devices[i].data,
+		    sizeof(hv_guid)) == 0) {
+			is_perf_channel = TRUE;
+			break;
+		}
+	}
 
+	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
+	    (!is_perf_channel)) {
+		/* Host's view of guest cpu */
+		channel->target_vcpu = 0;
+		/* Guest's own view of cpu */
+		channel->target_cpu = 0;
+		return;
 	}
+	/* mp_ncpus should have the number cpus currently online */
+	current_cpu = (++next_vcpu % mp_ncpus);
+	channel->target_cpu = current_cpu;
+	channel->target_vcpu =
+	    hv_vmbus_g_context.hv_vcpu_index[current_cpu];
+	if (bootverbose)
+		printf("VMBUS: Total online cpus %d, assign perf channel %d "
+		    "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
+		    current_cpu);
 }
 
 /**
@@ -391,6 +489,38 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 	if (new_channel == NULL)
 	    return;
 
+	/*
+	 * By default we setup state to enable batched
+	 * reading. A specific service can choose to
+	 * disable this prior to opening the channel.
+	 */
+	new_channel->batched_reading = TRUE;
+
+	new_channel->signal_event_param =
+	    (hv_vmbus_input_signal_event *)
+	    (HV_ALIGN_UP((unsigned long)
+		&new_channel->signal_event_buffer,
+		HV_HYPERCALL_PARAM_ALIGN));
+
+ 	new_channel->signal_event_param->connection_id.as_uint32_t = 0;	
+	new_channel->signal_event_param->connection_id.u.id =
+	    HV_VMBUS_EVENT_CONNECTION_ID;
+	new_channel->signal_event_param->flag_number = 0;
+	new_channel->signal_event_param->rsvd_z = 0;
+
+	if (hv_vmbus_protocal_version != HV_VMBUS_VERSION_WS2008) {
+		new_channel->is_dedicated_interrupt =
+		    (offer->is_dedicated_interrupt != 0);
+		new_channel->signal_event_param->connection_id.u.id =
+		    offer->connection_id;
+	}
+
+	/*
+	 * Bind the channel to a chosen cpu.
+	 */
+	vmbus_channel_select_cpu(new_channel,
+	    &offer->offer.interface_type);
+
 	memcpy(&new_channel->offer_msg, offer,
 	    sizeof(hv_vmbus_channel_offer_channel));
 	new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
@@ -666,7 +796,7 @@ hv_vmbus_release_unattached_channels(void)
 {
 	hv_vmbus_channel *channel;
 
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 
 	while (!TAILQ_EMPTY(&hv_vmbus_g_connection.channel_anchor)) {
 	    channel = TAILQ_FIRST(&hv_vmbus_g_connection.channel_anchor);
@@ -676,5 +806,61 @@ hv_vmbus_release_unattached_channels(void)
 	    hv_vmbus_child_device_unregister(channel->device);
 	    hv_vmbus_free_vmbus_channel(channel);
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+}
+
+/**
+ * @brief Select the best outgoing channel
+ * 
+ * The channel whose vcpu binding is closest to the currect vcpu will
+ * be selected.
+ * If no multi-channel, always select primary channel
+ * 
+ * @param primary - primary channel
+ */
+struct hv_vmbus_channel *
+vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
+{
+	hv_vmbus_channel *new_channel = NULL;
+	hv_vmbus_channel *outgoing_channel = primary;
+	int old_cpu_distance = 0;
+	int new_cpu_distance = 0;
+	int cur_vcpu = 0;
+	int smp_pro_id = PCPU_GET(cpuid);
+
+	if (TAILQ_EMPTY(&primary->sc_list_anchor)) {
+		return outgoing_channel;
+	}
+
+	if (smp_pro_id >= MAXCPU) {
+		return outgoing_channel;
+	}
+
+	cur_vcpu = hv_vmbus_g_context.hv_vcpu_index[smp_pro_id];
+	
+	TAILQ_FOREACH(new_channel, &primary->sc_list_anchor, sc_list_entry) {
+		if (new_channel->state != HV_CHANNEL_OPENED_STATE){
+			continue;
+		}
+
+		if (new_channel->target_vcpu == cur_vcpu){
+			return new_channel;
+		}
+
+		old_cpu_distance = ((outgoing_channel->target_vcpu > cur_vcpu) ?
+		    (outgoing_channel->target_vcpu - cur_vcpu) :
+		    (cur_vcpu - outgoing_channel->target_vcpu));
+
+		new_cpu_distance = ((new_channel->target_vcpu > cur_vcpu) ?
+		    (new_channel->target_vcpu - cur_vcpu) :
+		    (cur_vcpu - new_channel->target_vcpu));
+
+		if (old_cpu_distance < new_cpu_distance) {
+			continue;
+		}
+
+		outgoing_channel = new_channel;
+	}
+
+	return(outgoing_channel);
 }
diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c
index c8e0b48..cc83037 100644
--- a/sys/dev/hyperv/vmbus/hv_connection.c
+++ b/sys/dev/hyperv/vmbus/hv_connection.c
@@ -26,6 +26,9 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/systm.h>
@@ -45,14 +48,113 @@ hv_vmbus_connection hv_vmbus_g_connection =
 	{ .connect_state = HV_DISCONNECTED,
 	  .next_gpadl_handle = 0xE1E10, };
 
+uint32_t hv_vmbus_protocal_version = HV_VMBUS_VERSION_WS2008;
+
+static uint32_t
+hv_vmbus_get_next_version(uint32_t current_ver)
+{
+	switch (current_ver) {
+	case (HV_VMBUS_VERSION_WIN7):
+		return(HV_VMBUS_VERSION_WS2008);
+
+	case (HV_VMBUS_VERSION_WIN8):
+		return(HV_VMBUS_VERSION_WIN7);
+
+	case (HV_VMBUS_VERSION_WIN8_1):
+		return(HV_VMBUS_VERSION_WIN8);
+
+	case (HV_VMBUS_VERSION_WS2008):
+	default:
+		return(HV_VMBUS_VERSION_INVALID);
+	}
+}
+
+/**
+ * Negotiate the highest supported hypervisor version.
+ */
+static int
+hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
+	uint32_t version)
+{
+	int					ret = 0;
+	hv_vmbus_channel_initiate_contact	*msg;
+
+	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
+	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
+
+	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
+	msg->vmbus_version_requested = version;
+
+	msg->interrupt_page = hv_get_phys_addr(
+		hv_vmbus_g_connection.interrupt_page);
+
+	msg->monitor_page_1 = hv_get_phys_addr(
+		hv_vmbus_g_connection.monitor_pages);
+
+	msg->monitor_page_2 =
+		hv_get_phys_addr(
+			((uint8_t *) hv_vmbus_g_connection.monitor_pages
+			+ PAGE_SIZE));
+
+	/**
+	 * Add to list before we send the request since we may receive the
+	 * response before returning from this routine
+	 */
+	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+	TAILQ_INSERT_TAIL(
+		&hv_vmbus_g_connection.channel_msg_anchor,
+		msg_info,
+		msg_list_entry);
+
+	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+	ret = hv_vmbus_post_message(
+		msg,
+		sizeof(hv_vmbus_channel_initiate_contact));
+
+	if (ret != 0) {
+		mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+		TAILQ_REMOVE(
+			&hv_vmbus_g_connection.channel_msg_anchor,
+			msg_info,
+			msg_list_entry);
+		mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+		return (ret);
+	}
+
+	/**
+	 * Wait for the connection response
+	 */
+	ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
+
+	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+	TAILQ_REMOVE(
+		&hv_vmbus_g_connection.channel_msg_anchor,
+		msg_info,
+		msg_list_entry);
+	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+
+	/**
+	 * Check if successful
+	 */
+	if (msg_info->response.version_response.version_supported) {
+		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
+	} else {
+		ret = ECONNREFUSED;
+	}
+
+	return (ret);
+}
+
 /**
  * Send a connect request on the partition service connection
  */
 int
 hv_vmbus_connect(void) {
 	int					ret = 0;
+	uint32_t				version;
 	hv_vmbus_channel_msg_info*		msg_info = NULL;
-	hv_vmbus_channel_initiate_contact*	msg;
 
 	/**
 	 * Make sure we are not connecting or connected
@@ -74,7 +176,7 @@ hv_vmbus_connect(void) {
 
 	TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
 	mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
-		NULL, MTX_SPIN);
+		NULL, MTX_DEF);
 
 	/**
 	 * Setup the vmbus event connection for channel interrupt abstraction
@@ -130,71 +232,30 @@ hv_vmbus_connect(void) {
 	    goto cleanup;
 	}
 
-	sema_init(&msg_info->wait_sema, 0, "Msg Info Sema");
-	msg = (hv_vmbus_channel_initiate_contact*) msg_info->msg;
-
-	msg->header.message_type = HV_CHANNEL_MESSAGE_INITIATED_CONTACT;
-	msg->vmbus_version_requested = HV_VMBUS_REVISION_NUMBER;
-
-	msg->interrupt_page = hv_get_phys_addr(
-		hv_vmbus_g_connection.interrupt_page);
-
-	msg->monitor_page_1 = hv_get_phys_addr(
-		hv_vmbus_g_connection.monitor_pages);
-
-	msg->monitor_page_2 =
-		hv_get_phys_addr(
-			((uint8_t *) hv_vmbus_g_connection.monitor_pages
-			+ PAGE_SIZE));
-
-	/**
-	 * Add to list before we send the request since we may receive the
-	 * response before returning from this routine
+	/*
+	 * Find the highest vmbus version number we can support.
 	 */
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-
-	TAILQ_INSERT_TAIL(
-		&hv_vmbus_g_connection.channel_msg_anchor,
-		msg_info,
-		msg_list_entry);
-
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-
-	ret = hv_vmbus_post_message(
-		msg,
-		sizeof(hv_vmbus_channel_initiate_contact));
-
-	if (ret != 0) {
-		mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-		TAILQ_REMOVE(
-			&hv_vmbus_g_connection.channel_msg_anchor,
-			msg_info,
-			msg_list_entry);
-		mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-		goto cleanup;
-	}
+	version = HV_VMBUS_VERSION_CURRENT;
+
+	do {
+		ret = hv_vmbus_negotiate_version(msg_info, version);
+		if (ret == EWOULDBLOCK) {
+			/*
+			 * We timed out.
+			 */
+			goto cleanup;
+		}
 
-	/**
-	 * Wait for the connection response
-	 */
-	ret = sema_timedwait(&msg_info->wait_sema, 500); /* KYS 5 seconds */
+		if (hv_vmbus_g_connection.connect_state == HV_CONNECTED)
+			break;
 
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
-	TAILQ_REMOVE(
-		&hv_vmbus_g_connection.channel_msg_anchor,
-		msg_info,
-		msg_list_entry);
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+		version = hv_vmbus_get_next_version(version);
+	} while (version != HV_VMBUS_VERSION_INVALID);
 
-	/**
-	 * Check if successful
-	 */
-	if (msg_info->response.version_response.version_supported) {
-		hv_vmbus_g_connection.connect_state = HV_CONNECTED;
-	} else {
-		ret = ECONNREFUSED;
-		goto cleanup;
-	}
+	hv_vmbus_protocal_version = version;
+	if (bootverbose)
+		printf("VMBUS: Portocal Version: %d.%d\n",
+		    version >> 16, version & 0xFFFF);
 
 	sema_destroy(&msg_info->wait_sema);
 	free(msg_info, M_DEVBUF);
@@ -286,7 +347,7 @@ hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
 	 *  and channels are accessed without the need to take this lock or search
 	 *  the list.
 	 */
-	mtx_lock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_lock(&hv_vmbus_g_connection.channel_lock);
 	TAILQ_FOREACH(channel,
 		&hv_vmbus_g_connection.channel_anchor, list_entry) {
 
@@ -295,7 +356,7 @@ hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
 		break;
 	    }
 	}
-	mtx_unlock_spin(&hv_vmbus_g_connection.channel_lock);
+	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
 
 	return (foundChannel);
 }
@@ -306,7 +367,10 @@ hv_vmbus_get_channel_from_rel_id(uint32_t rel_id) {
 static void
 VmbusProcessChannelEvent(uint32_t relid) 
 {
+	void* arg;
+	uint32_t bytes_to_read;
 	hv_vmbus_channel* channel;
+	boolean_t is_batched_reading;
 
 	/**
 	 * Find the channel based on this relid and invokes
@@ -327,31 +391,98 @@ VmbusProcessChannelEvent(uint32_t relid)
 	 * callback to NULL. This closes the window.
 	 */
 
-	mtx_lock(&channel->inbound_lock);
+	/*
+	 * Disable the lock due to newly added WITNESS check in r277723.
+	 * Will seek other way to avoid race condition.
+	 * -- whu
+	 */
+	// mtx_lock(&channel->inbound_lock);
 	if (channel->on_channel_callback != NULL) {
-		channel->on_channel_callback(channel->channel_callback_context);
+		arg = channel->channel_callback_context;
+		is_batched_reading = channel->batched_reading;
+		/*
+		 * Optimize host to guest signaling by ensuring:
+		 * 1. While reading the channel, we disable interrupts from
+		 *    host.
+		 * 2. Ensure that we process all posted messages from the host
+		 *    before returning from this callback.
+		 * 3. Once we return, enable signaling from the host. Once this
+		 *    state is set we check to see if additional packets are
+		 *    available to read. In this case we repeat the process.
+		 */
+		do {
+			if (is_batched_reading)
+				hv_ring_buffer_read_begin(&channel->inbound);
+
+			channel->on_channel_callback(arg);
+
+			if (is_batched_reading)
+				bytes_to_read =
+				    hv_ring_buffer_read_end(&channel->inbound);
+			else
+				bytes_to_read = 0;
+		} while (is_batched_reading && (bytes_to_read != 0));
 	}
-	mtx_unlock(&channel->inbound_lock);
+	// mtx_unlock(&channel->inbound_lock);
 }
 
+#ifdef HV_DEBUG_INTR
+extern uint32_t hv_intr_count;
+extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
+extern uint32_t hv_vmbus_intr_cpu[MAXCPU];
+#endif
+
 /**
  * Handler for events
  */
 void
 hv_vmbus_on_events(void *arg) 
 {
-	int dword;
 	int bit;
+	int cpu;
+	int dword;
+	void *page_addr;
+	uint32_t* recv_interrupt_page = NULL;
 	int rel_id;
-	int maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
+	int maxdword;
+	hv_vmbus_synic_event_flags *event;
 	/* int maxdword = PAGE_SIZE >> 3; */
 
-	/*
-	 * receive size is 1/2 page and divide that by 4 bytes
-	 */
-
-	uint32_t* recv_interrupt_page =
-	    hv_vmbus_g_connection.recv_interrupt_page;
+	cpu = (int)(long)arg;
+	KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
+	    "cpu out of range!"));
+
+#ifdef HV_DEBUG_INTR
+	int i;
+	hv_vmbus_swintr_event_cpu[cpu]++;
+	if (hv_intr_count % 10000 == 0) {
+                printf("VMBUS: Total interrupt %d\n", hv_intr_count);
+                for (i = 0; i < mp_ncpus; i++)
+                        printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n",
+			    i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]);
+        }
+#endif
+
+	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
+		maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
+		/*
+		 * receive size is 1/2 page and divide that by 4 bytes
+		 */
+		recv_interrupt_page =
+		    hv_vmbus_g_connection.recv_interrupt_page;
+	} else {
+		/*
+		 * On Host with Win8 or above, the event page can be
+		 * checked directly to get the id of the channel
+		 * that has the pending interrupt.
+		 */
+		maxdword = HV_EVENT_FLAGS_DWORD_COUNT;
+		page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
+		event = (hv_vmbus_synic_event_flags *)
+		    page_addr + HV_VMBUS_MESSAGE_SINT;
+		recv_interrupt_page = event->flags32;
+	}
 
 	/*
 	 * Check events
@@ -416,16 +547,16 @@ int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
  * Send an event notification to the parent
  */
 int
-hv_vmbus_set_event(uint32_t child_rel_id) {
+hv_vmbus_set_event(hv_vmbus_channel *channel) {
 	int ret = 0;
+	uint32_t child_rel_id = channel->offer_msg.child_rel_id;
 
 	/* Each uint32_t represents 32 channels */
 
 	synch_set_bit(child_rel_id & 31,
 		(((uint32_t *)hv_vmbus_g_connection.send_interrupt_page
 			+ (child_rel_id >> 5))));
-	ret = hv_vmbus_signal_event();
+	ret = hv_vmbus_signal_event(channel->signal_event_param);
 
 	return (ret);
 }
-
diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c
index 80a1f42..84e2a5e 100644
--- a/sys/dev/hyperv/vmbus/hv_hv.c
+++ b/sys/dev/hyperv/vmbus/hv_hv.c
@@ -67,8 +67,6 @@ static inline void do_cpuid_inline(unsigned int op, unsigned int *eax,
 hv_vmbus_context hv_vmbus_g_context = {
 	.syn_ic_initialized = FALSE,
 	.hypercall_page = NULL,
-	.signal_event_param = NULL,
-	.signal_event_buffer = NULL,
 };
 
 static struct timecounter hv_timecounter = {
@@ -256,28 +254,6 @@ hv_vmbus_init(void)
 
 	hv_vmbus_g_context.hypercall_page = virt_addr;
 
-	/*
-	 * Setup the global signal event param for the signal event hypercall
-	 */
-	hv_vmbus_g_context.signal_event_buffer =
-	    malloc(sizeof(hv_vmbus_input_signal_event_buffer), M_DEVBUF,
-		M_ZERO | M_NOWAIT);
-	KASSERT(hv_vmbus_g_context.signal_event_buffer != NULL,
-	    ("Error VMBUS: Failed to allocate signal_event_buffer\n"));
-	if (hv_vmbus_g_context.signal_event_buffer == NULL)
-	    goto cleanup;
-
-	hv_vmbus_g_context.signal_event_param =
-	    (hv_vmbus_input_signal_event*)
-	    (HV_ALIGN_UP((unsigned long)
-		hv_vmbus_g_context.signal_event_buffer,
-		HV_HYPERCALL_PARAM_ALIGN));
-	hv_vmbus_g_context.signal_event_param->connection_id.as_uint32_t = 0;
-	hv_vmbus_g_context.signal_event_param->connection_id.u.id =
-	    HV_VMBUS_EVENT_CONNECTION_ID;
-	hv_vmbus_g_context.signal_event_param->flag_number = 0;
-	hv_vmbus_g_context.signal_event_param->rsvd_z = 0;
-	
 	tc_init(&hv_timecounter); /* register virtual timecount */
 	
 	return (0);
@@ -303,12 +279,6 @@ hv_vmbus_cleanup(void)
 {
 	hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
 
-	if (hv_vmbus_g_context.signal_event_buffer != NULL) {
-	    free(hv_vmbus_g_context.signal_event_buffer, M_DEVBUF);
-	    hv_vmbus_g_context.signal_event_buffer = NULL;
-	    hv_vmbus_g_context.signal_event_param = NULL;
-	}
-
 	if (hv_vmbus_g_context.guest_id == HV_FREEBSD_GUEST_ID) {
 	    if (hv_vmbus_g_context.hypercall_page != NULL) {
 		hypercall_msr.as_uint64_t = 0;
@@ -370,13 +340,13 @@ hv_vmbus_post_msg_via_msg_ipc(
  * event IPC. (This involves a hypercall.)
  */
 hv_vmbus_status
-hv_vmbus_signal_event()
+hv_vmbus_signal_event(void *con_id)
 {
 	hv_vmbus_status status;
 
 	status = hv_vmbus_do_hypercall(
 		    HV_CALL_SIGNAL_EVENT,
-		    hv_vmbus_g_context.signal_event_param,
+		    con_id,
 		    0) & 0xFFFF;
 
 	return (status);
@@ -390,6 +360,7 @@ hv_vmbus_synic_init(void *arg)
 
 {
 	int			cpu;
+	uint64_t		hv_vcpu_index;
 	hv_vmbus_synic_simp	simp;
 	hv_vmbus_synic_siefp	siefp;
 	hv_vmbus_synic_scontrol sctrl;
@@ -403,23 +374,14 @@ hv_vmbus_synic_init(void *arg)
 	    return;
 
 	/*
-	 * KYS: Looks like we can only initialize on cpu0; don't we support
-	 * SMP guests?
-	 *
-	 * TODO: Need to add SMP support for FreeBSD V9
-	 */
-
-	if (cpu != 0)
-	    return;
-
-	/*
 	 * TODO: Check the version
 	 */
 	version = rdmsr(HV_X64_MSR_SVERSION);
-
 	
-	hv_vmbus_g_context.syn_ic_msg_page[cpu] = setup_args->page_buffers[0];
-	hv_vmbus_g_context.syn_ic_event_page[cpu] = setup_args->page_buffers[1];
+	hv_vmbus_g_context.syn_ic_msg_page[cpu] =
+	    setup_args->page_buffers[2 * cpu];
+	hv_vmbus_g_context.syn_ic_event_page[cpu] =
+	    setup_args->page_buffers[2 * cpu + 1];
 
 	/*
 	 * Setup the Synic's message page
@@ -443,9 +405,10 @@ hv_vmbus_synic_init(void *arg)
 	wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
 
 	/*HV_SHARED_SINT_IDT_VECTOR + 0x20; */
+	shared_sint.as_uint64_t = 0;
 	shared_sint.u.vector = setup_args->vector;
 	shared_sint.u.masked = FALSE;
-	shared_sint.u.auto_eoi = FALSE;
+	shared_sint.u.auto_eoi = TRUE;
 
 	wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
 	    shared_sint.as_uint64_t);
@@ -458,6 +421,13 @@ hv_vmbus_synic_init(void *arg)
 
 	hv_vmbus_g_context.syn_ic_initialized = TRUE;
 
+	/*
+	 * Set up the cpuid mapping from Hyper-V to FreeBSD.
+	 * The array is indexed using FreeBSD cpuid.
+	 */
+	hv_vcpu_index = rdmsr(HV_X64_MSR_VP_INDEX);
+	hv_vmbus_g_context.hv_vcpu_index[cpu] = (uint32_t)hv_vcpu_index;
+
 	return;
 }
 
@@ -469,14 +439,10 @@ void hv_vmbus_synic_cleanup(void *arg)
 	hv_vmbus_synic_sint	shared_sint;
 	hv_vmbus_synic_simp	simp;
 	hv_vmbus_synic_siefp	siefp;
-	int			cpu = PCPU_GET(cpuid);
 
 	if (!hv_vmbus_g_context.syn_ic_initialized)
 	    return;
 
-	if (cpu != 0)
-	    return; /* TODO: XXXKYS: SMP? */
-
 	shared_sint.as_uint64_t = rdmsr(
 	    HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT);
 
diff --git a/sys/dev/hyperv/vmbus/hv_ring_buffer.c b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
index f7c1965..0e51ef7 100644
--- a/sys/dev/hyperv/vmbus/hv_ring_buffer.c
+++ b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
@@ -26,6 +26,8 @@
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/lock.h>
@@ -144,6 +146,69 @@ get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info)
 	return (uint64_t) ring_info->ring_buffer->write_index << 32;
 }
 
+void
+hv_ring_buffer_read_begin(
+	hv_vmbus_ring_buffer_info*	ring_info)
+{
+	ring_info->ring_buffer->interrupt_mask = 1;
+	mb();
+}
+
+uint32_t
+hv_ring_buffer_read_end(
+	hv_vmbus_ring_buffer_info*	ring_info)
+{
+	uint32_t read, write;	
+
+	ring_info->ring_buffer->interrupt_mask = 0;
+	mb();
+
+	/*
+	 * Now check to see if the ring buffer is still empty.
+	 * If it is not, we raced and we need to process new
+	 * incoming messages.
+	 */
+	get_ring_buffer_avail_bytes(ring_info, &read, &write);
+
+	return (read);
+}
+
+/*
+ * When we write to the ring buffer, check if the host needs to
+ * be signaled. Here is the details of this protocol:
+ *
+ *	1. The host guarantees that while it is draining the
+ *	   ring buffer, it will set the interrupt_mask to
+ *	   indicate it does not need to be interrupted when
+ *	   new data is placed.
+ *
+ *	2. The host guarantees that it will completely drain
+ *	   the ring buffer before exiting the read loop. Further,
+ *	   once the ring buffer is empty, it will clear the
+ *	   interrupt_mask and re-check to see if new data has
+ *	   arrived.
+ */
+static boolean_t
+hv_ring_buffer_needsig_on_write(
+	uint32_t			old_write_location,
+	hv_vmbus_ring_buffer_info*	rbi)
+{
+	mb();
+	if (rbi->ring_buffer->interrupt_mask)
+		return (FALSE);
+
+	/* Read memory barrier */
+	rmb();
+	/*
+	 * This is the only case we need to signal when the
+	 * ring transitions from being empty to non-empty.
+	 */
+	if (old_write_location == rbi->ring_buffer->read_index)
+		return (TRUE);
+
+	return (FALSE);
+}
+
 static uint32_t	copy_to_ring_buffer(
 			hv_vmbus_ring_buffer_info*	ring_info,
 			uint32_t			start_write_offset,
@@ -204,11 +269,13 @@ int
 hv_ring_buffer_write(
 	hv_vmbus_ring_buffer_info*	out_ring_info,
 	hv_vmbus_sg_buffer_list		sg_buffers[],
-	uint32_t			sg_buffer_count)
+	uint32_t			sg_buffer_count,
+	boolean_t			*need_sig)
 {
 	int i = 0;
 	uint32_t byte_avail_to_write;
 	uint32_t byte_avail_to_read;
+	uint32_t old_write_location;
 	uint32_t total_bytes_to_write = 0;
 
 	volatile uint32_t next_write_location;
@@ -242,6 +309,8 @@ hv_ring_buffer_write(
 	 */
 	next_write_location = get_next_write_location(out_ring_info);
 
+	old_write_location = next_write_location;
+
 	for (i = 0; i < sg_buffer_count; i++) {
 	    next_write_location = copy_to_ring_buffer(out_ring_info,
 		next_write_location, (char *) sg_buffers[i].data,
@@ -258,9 +327,9 @@ hv_ring_buffer_write(
 		(char *) &prev_indices, sizeof(uint64_t));
 
 	/*
-	 * Make sure we flush all writes before updating the writeIndex
+	 * Full memory barrier before upding the write index. 
 	 */
-	wmb();
+	mb();
 
 	/*
 	 * Now, update the write location
@@ -269,6 +338,9 @@ hv_ring_buffer_write(
 
 	mtx_unlock_spin(&out_ring_info->ring_lock);
 
+	*need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
+	    out_ring_info);
+
 	return (0);
 }
 
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
index ca28fd5..91813bb 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
@@ -53,22 +53,17 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/stdarg.h>
 #include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/segments.h>
 #include <sys/pcpu.h>
+#include <machine/apicvar.h>
 
 #include "hv_vmbus_priv.h"
 
 
 #define VMBUS_IRQ	0x5
 
-static struct intr_event *hv_msg_intr_event;
-static struct intr_event *hv_event_intr_event;
-static void *msg_swintr;
-static void *event_swintr;
 static device_t vmbus_devp;
-static void *vmbus_cookiep;
-static int vmbus_rid;
-struct resource *intr_res;
-static int vmbus_irq = VMBUS_IRQ;
 static int vmbus_inited;
 static hv_setup_args setup_args; /* only CPU 0 supported at this time */
 
@@ -77,14 +72,17 @@ static hv_setup_args setup_args; /* only CPU 0 supported at this time */
  * the hypervisor.
  */
 static void
-vmbus_msg_swintr(void *dummy)
+vmbus_msg_swintr(void *arg)
 {
 	int 			cpu;
 	void*			page_addr;
 	hv_vmbus_message*	msg;
 	hv_vmbus_message*	copied;
 
-	cpu = PCPU_GET(cpuid);
+	cpu = (int)(long)arg;
+	KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
+	    "cpu out of range!"));
+
 	page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
 	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
 
@@ -130,17 +128,8 @@ vmbus_msg_swintr(void *dummy)
  *
  * The purpose of this routine is to determine the type of VMBUS protocol
  * message to process - an event or a channel message.
- * As this is an interrupt filter routine, the function runs in a very
- * restricted envinronment.  From the manpage for bus_setup_intr(9)
- *
- *   In this restricted environment, care must be taken to account for all
- *   races.  A careful analysis of races should be done as well.  It is gener-
- *   ally cheaper to take an extra interrupt, for example, than to protect
- *   variables with spinlocks.	Read, modify, write cycles of hardware regis-
- *   ters need to be carefully analyzed if other threads are accessing the
- *   same registers.
  */
-static int
+static inline int
 hv_vmbus_isr(void *unused) 
 {
 	int				cpu;
@@ -149,8 +138,6 @@ hv_vmbus_isr(void *unused)
 	void*				page_addr;
 
 	cpu = PCPU_GET(cpuid);
-	/* (Temporary limit) */
-	KASSERT(cpu == 0, ("hv_vmbus_isr: Interrupt on CPU other than zero"));
 
 	/*
 	 * The Windows team has advised that we check for events
@@ -162,9 +149,21 @@ hv_vmbus_isr(void *unused)
 	event = (hv_vmbus_synic_event_flags*)
 		    page_addr + HV_VMBUS_MESSAGE_SINT;
 
-	/* Since we are a child, we only need to check bit 0 */
-	if (synch_test_and_clear_bit(0, &event->flags32[0])) {
-		swi_sched(event_swintr, 0);
+	if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
+	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
+		/* Since we are a child, we only need to check bit 0 */
+		if (synch_test_and_clear_bit(0, &event->flags32[0])) {
+			swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
+		}
+	} else {
+		/*
+		 * On host with Win8 or above, we can directly look at
+		 * the event page. If bit n is set, we have an interrupt 
+		 * on the channel with id n.
+		 * Directly schedule the event software interrupt on
+		 * current cpu.
+		 */
+		swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
 	}
 
 	/* Check if there are actual msgs to be process */
@@ -172,12 +171,47 @@ hv_vmbus_isr(void *unused)
 	msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
 
 	if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
-		swi_sched(msg_swintr, 0);
+		swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
 	}
 
 	return FILTER_HANDLED;
 }
 
+#ifdef HV_DEBUG_INTR 
+uint32_t hv_intr_count = 0;
+#endif
+uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
+uint32_t hv_vmbus_intr_cpu[MAXCPU];
+
+void
+hv_vector_handler(struct trapframe *trap_frame)
+{
+#ifdef HV_DEBUG_INTR
+	int cpu;
+#endif
+
+	/*
+	 * Disable preemption.
+	 */
+	critical_enter();
+
+#ifdef HV_DEBUG_INTR
+	/*
+	 * Do a little interrupt counting.
+	 */
+	cpu = PCPU_GET(cpuid);
+	hv_vmbus_intr_cpu[cpu]++;
+	hv_intr_count++;
+#endif
+
+	hv_vmbus_isr(NULL); 
+
+	/*
+	 * Enable preemption.
+	 */
+	critical_exit();
+}
+
 static int
 vmbus_read_ivar(
 	device_t	dev,
@@ -316,6 +350,81 @@ vmbus_probe(device_t dev) {
 	return (BUS_PROBE_NOWILDCARD);
 }
 
+#ifdef HYPERV
+extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
+
+/**
+ * @brief Find a free IDT slot and setup the interrupt handler.
+ */
+static int
+vmbus_vector_alloc(void)
+{
+	int vector;
+	uintptr_t func;
+	struct gate_descriptor *ip;
+
+	/*
+	 * Search backwards form the highest IDT vector available for use
+	 * as vmbus channel callback vector. We install 'hv_vmbus_callback'
+	 * handler at that vector and use it to interrupt vcpus.
+	 */
+	vector = APIC_SPURIOUS_INT;
+	while (--vector >= APIC_IPI_INTS) {
+		ip = &idt[vector];
+		func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+		if (func == (uintptr_t)&IDTVEC(rsvd)) {
+#ifdef __i386__
+			setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYS386IGT,
+			    SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#else
+			setidt(vector , IDTVEC(hv_vmbus_callback), SDT_SYSIGT,
+			    SEL_KPL, 0);
+#endif
+
+			return (vector);
+		}
+	}
+	return (0);
+}
+
+/**
+ * @brief Restore the IDT slot to rsvd.
+ */
+static void
+vmbus_vector_free(int vector)
+{
+        uintptr_t func;
+        struct gate_descriptor *ip;
+
+	if (vector == 0)
+		return;
+
+        KASSERT(vector >= APIC_IPI_INTS && vector < APIC_SPURIOUS_INT,
+            ("invalid vector %d", vector));
+
+        ip = &idt[vector];
+        func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset);
+        KASSERT(func == (uintptr_t)&IDTVEC(hv_vmbus_callback),
+            ("invalid vector %d", vector));
+
+        setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+}
+
+#else /* HYPERV */
+
+static int
+vmbus_vector_alloc(void)
+{
+	return(0);
+}
+
+static void
+vmbus_vector_free(int vector)
+{
+}
+
+#endif /* HYPERV */
+
 /**
  * @brief Main vmbus driver initialization routine.
  *
@@ -331,22 +440,7 @@ vmbus_probe(device_t dev) {
 static int
 vmbus_bus_init(void)
 {
-	struct ioapic_intsrc {
-		struct intsrc io_intsrc;
-		u_int io_irq;
-		u_int io_intpin:8;
-		u_int io_vector:8;
-		u_int io_cpu:8;
-		u_int io_activehi:1;
-		u_int io_edgetrigger:1;
-		u_int io_masked:1;
-		int io_bus:4;
-		uint32_t io_lowreg;
-	};
-	int i, ret;
-	unsigned int vector = 0;
-	struct intsrc *isrc;
-	struct ioapic_intsrc *intpin;
+	int i, j, n, ret;
 
 	if (vmbus_inited)
 		return (0);
@@ -361,80 +455,100 @@ vmbus_bus_init(void)
 		return (ret);
 	}
 
-	ret = swi_add(&hv_msg_intr_event, "hv_msg", vmbus_msg_swintr,
-	    NULL, SWI_CLOCK, 0, &msg_swintr);
-
-	if (ret)
-	    goto cleanup;
-
 	/*
-	 * Message SW interrupt handler checks a per-CPU page and
-	 * thus the thread needs to be bound to CPU-0 - which is where
-	 * all interrupts are processed.
+	 * Find a free IDT slot for vmbus callback.
 	 */
-	ret = intr_event_bind(hv_msg_intr_event, 0);
-
-	if (ret)
-		goto cleanup1;
+	hv_vmbus_g_context.hv_cb_vector = vmbus_vector_alloc();
 
-	ret = swi_add(&hv_event_intr_event, "hv_event", hv_vmbus_on_events,
-	    NULL, SWI_CLOCK, 0, &event_swintr);
-
-	if (ret)
-		goto cleanup1;
+	if (hv_vmbus_g_context.hv_cb_vector == 0) {
+		if(bootverbose)
+			printf("Error VMBUS: Cannot find free IDT slot for "
+			    "vmbus callback!\n");
+		goto cleanup;
+	}
 
-	intr_res = bus_alloc_resource(vmbus_devp,
-	    SYS_RES_IRQ, &vmbus_rid, vmbus_irq, vmbus_irq, 1, RF_ACTIVE);
+	if(bootverbose)
+		printf("VMBUS: vmbus callback vector %d\n",
+		    hv_vmbus_g_context.hv_cb_vector);
 
-	if (intr_res == NULL) {
-		ret = ENOMEM; /* XXXKYS: Need a better errno */
-		goto cleanup2;
+	/*
+	 * Notify the hypervisor of our vector.
+	 */
+	setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
+
+	CPU_FOREACH(j) {
+		hv_vmbus_intr_cpu[j] = 0;
+		hv_vmbus_swintr_event_cpu[j] = 0;
+		hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
+		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
+		hv_vmbus_g_context.event_swintr[j] = NULL;
+		hv_vmbus_g_context.msg_swintr[j] = NULL;
+
+		for (i = 0; i < 2; i++)
+			setup_args.page_buffers[2 * j + i] = NULL;
 	}
 
 	/*
-	 * Setup interrupt filter handler
+	 * Per cpu setup.
 	 */
-	ret = bus_setup_intr(vmbus_devp, intr_res,
-	    INTR_TYPE_NET | INTR_MPSAFE, hv_vmbus_isr, NULL,
-	    NULL, &vmbus_cookiep);
-
-	if (ret != 0)
-		goto cleanup3;
-
-	ret = bus_bind_intr(vmbus_devp, intr_res, 0);
-	if (ret != 0)
-		goto cleanup4;
-
-	isrc = intr_lookup_source(vmbus_irq);
-	if ((isrc == NULL) || (isrc->is_event == NULL)) {
-		ret = EINVAL;
-		goto cleanup4;
-	}
+	CPU_FOREACH(j) {
+		/*
+		 * Setup software interrupt thread and handler for msg handling.
+		 */
+		ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
+		    "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
+		    &hv_vmbus_g_context.msg_swintr[j]);
+		if (ret) {
+			if(bootverbose)
+				printf("VMBUS: failed to setup msg swi for "
+				    "cpu %d\n", j);
+			goto cleanup1;
+		}
 
-	/* vector = isrc->is_event->ie_vector; */
-	intpin = (struct ioapic_intsrc *)isrc;
-	vector = intpin->io_vector;
+		/*
+		 * Bind the swi thread to the cpu.
+		 */
+		ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
+		    j);
+	 	if (ret) {
+			if(bootverbose)
+				printf("VMBUS: failed to bind msg swi thread "
+				    "to cpu %d\n", j);
+			goto cleanup1;
+		}
 
-	if(bootverbose)
-		printf("VMBUS: irq 0x%x vector 0x%x\n", vmbus_irq, vector);
+		/*
+		 * Setup software interrupt thread and handler for
+		 * event handling.
+		 */
+		ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
+		    "hv_event", hv_vmbus_on_events, (void *)(long)j,
+		    SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
+		if (ret) {
+			if(bootverbose)
+				printf("VMBUS: failed to setup event swi for "
+				    "cpu %d\n", j);
+			goto cleanup1;
+		}
 
-	/**
-	 * Notify the hypervisor of our irq.
-	 */
-	setup_args.vector = vector;
-	for(i = 0; i < 2; i++) {
-		setup_args.page_buffers[i] =
+		/*
+		 * Prepare the per cpu msg and event pages to be called on each cpu.
+		 */
+		for(i = 0; i < 2; i++) {
+			setup_args.page_buffers[2 * j + i] =
 				malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
-		if (setup_args.page_buffers[i] == NULL) {
-			KASSERT(setup_args.page_buffers[i] != NULL,
+			if (setup_args.page_buffers[2 * j + i] == NULL) {
+				KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
 					("Error VMBUS: malloc failed!"));
-			if (i > 0)
-				free(setup_args.page_buffers[0], M_DEVBUF);
-			goto cleanup4;
+				goto cleanup1;
+			}
 		}
 	}
 
-	/* only CPU #0 supported at this time */
+	if (bootverbose)
+		printf("VMBUS: Calling smp_rendezvous, smp_started = %d\n",
+		    smp_started);
+
 	smp_rendezvous(NULL, hv_vmbus_synic_init, NULL, &setup_args);
 
 	/*
@@ -443,26 +557,32 @@ vmbus_bus_init(void)
 	ret = hv_vmbus_connect();
 
 	if (ret != 0)
-	    goto cleanup4;
+		goto cleanup1;
 
 	hv_vmbus_request_channel_offers();
 	return (ret);
 
-	cleanup4:
-
+	cleanup1:
 	/*
-	 * remove swi, bus and intr resource
+	 * Free pages alloc'ed
 	 */
-	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
+	for (n = 0; n < 2 * MAXCPU; n++)
+		if (setup_args.page_buffers[n] != NULL)
+			free(setup_args.page_buffers[n], M_DEVBUF);
 
-	cleanup3:
-	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
-
-	cleanup2:
-	swi_remove(event_swintr);
+	/*
+	 * remove swi and vmbus callback vector;
+	 */
+	CPU_FOREACH(j) {
+		if (hv_vmbus_g_context.msg_swintr[j] != NULL)
+			swi_remove(hv_vmbus_g_context.msg_swintr[j]);
+		if (hv_vmbus_g_context.event_swintr[j] != NULL)
+			swi_remove(hv_vmbus_g_context.event_swintr[j]);
+		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;	
+		hv_vmbus_g_context.hv_event_intr_event[j] = NULL;	
+	}
 
-	cleanup1:
-	swi_remove(msg_swintr);
+	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
 
 	cleanup:
 	hv_vmbus_cleanup();
@@ -515,20 +635,24 @@ vmbus_bus_exit(void)
 
 	smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
 
-	for(i = 0; i < 2; i++) {
+	for(i = 0; i < 2 * MAXCPU; i++) {
 		if (setup_args.page_buffers[i] != 0)
 			free(setup_args.page_buffers[i], M_DEVBUF);
 	}
 
 	hv_vmbus_cleanup();
 
-	/* remove swi, bus and intr resource */
-	bus_teardown_intr(vmbus_devp, intr_res, vmbus_cookiep);
-
-	bus_release_resource(vmbus_devp, SYS_RES_IRQ, vmbus_rid, intr_res);
+	/* remove swi */
+	CPU_FOREACH(i) {
+		if (hv_vmbus_g_context.msg_swintr[i] != NULL)
+			swi_remove(hv_vmbus_g_context.msg_swintr[i]);
+		if (hv_vmbus_g_context.event_swintr[i] != NULL)
+			swi_remove(hv_vmbus_g_context.event_swintr[i]);
+		hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;	
+		hv_vmbus_g_context.hv_event_intr_event[i] = NULL;	
+	}
 
-	swi_remove(msg_swintr);
-	swi_remove(event_swintr);
+	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
 
 	return;
 }
@@ -603,6 +727,6 @@ devclass_t vmbus_devclass;
 DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0);
 MODULE_VERSION(vmbus,1);
 
-/* TODO: We want to be earlier than SI_SUB_VFS */
-SYSINIT(vmb_init, SI_SUB_VFS, SI_ORDER_MIDDLE, vmbus_init, NULL);
+/* We want to be started after SMP is initialized */
+SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL);
 
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
index 6bc875d..faa6dec 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
@@ -181,49 +181,30 @@ enum {
 
 #define HV_HYPERCALL_PARAM_ALIGN sizeof(uint64_t)
 
-/*
- *  Connection identifier type
- */
-typedef union {
-	uint32_t		as_uint32_t;
-	struct {
-		uint32_t	id:24;
-		uint32_t	reserved:8;
-	} u;
-
-} __packed hv_vmbus_connection_id;
-
-/*
- * Definition of the hv_vmbus_signal_event hypercall input structure
- */
-typedef struct {
-	hv_vmbus_connection_id	connection_id;
-	uint16_t		flag_number;
-	uint16_t		rsvd_z;
-} __packed hv_vmbus_input_signal_event;
-
-typedef struct {
-	uint64_t			align8;
-	hv_vmbus_input_signal_event	event;
-} __packed hv_vmbus_input_signal_event_buffer;
-
 typedef struct {
 	uint64_t	guest_id;
 	void*		hypercall_page;
 	hv_bool_uint8_t	syn_ic_initialized;
+
+	hv_vmbus_handle	syn_ic_msg_page[MAXCPU];
+	hv_vmbus_handle	syn_ic_event_page[MAXCPU];
 	/*
-	 * This is used as an input param to HV_CALL_SIGNAL_EVENT hypercall.
-	 * The input param is immutable  in our usage and
-	 * must be dynamic mem (vs stack or global).
+	 * For FreeBSD cpuid to Hyper-V vcpuid mapping.
 	 */
-	hv_vmbus_input_signal_event_buffer	*signal_event_buffer;
+	uint32_t	hv_vcpu_index[MAXCPU];
 	/*
-	 * 8-bytes aligned of the buffer above
+	 * Each cpu has its own software interrupt handler for channel
+	 * event and msg handling.
 	 */
-	hv_vmbus_input_signal_event		*signal_event_param;
-
-	hv_vmbus_handle	syn_ic_msg_page[MAXCPU];
-	hv_vmbus_handle	syn_ic_event_page[MAXCPU];
+	struct intr_event		*hv_event_intr_event[MAXCPU];
+	struct intr_event		*hv_msg_intr_event[MAXCPU];
+	void				*event_swintr[MAXCPU];
+	void				*msg_swintr[MAXCPU];
+	/*
+	 * Host use this vector to intrrupt guest for vmbus channel
+	 * event and msg.
+	 */
+	unsigned int			hv_cb_vector;
 } hv_vmbus_context;
 
 /*
@@ -368,7 +349,8 @@ typedef struct {
 	TAILQ_HEAD(, hv_vmbus_channel_msg_info)	channel_msg_anchor;
 	struct mtx				channel_msg_lock;
 	/**
-	 * List of channels
+	 * List of primary channels. Sub channels will be linked
+	 * under their primary channel.
 	 */
 	TAILQ_HEAD(, hv_vmbus_channel)		channel_anchor;
 	struct mtx				channel_lock;
@@ -560,6 +542,8 @@ typedef union {
 	uint32_t	flags32[HV_EVENT_FLAGS_DWORD_COUNT];
 } hv_vmbus_synic_event_flags;
 
+/* MSR used to provide vcpu index */
+#define	HV_X64_MSR_VP_INDEX   (0x40000002)
 
 /*
  * Define synthetic interrupt controller model specific registers
@@ -618,7 +602,8 @@ void			hv_ring_buffer_cleanup(
 int			hv_ring_buffer_write(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				hv_vmbus_sg_buffer_list		sg_buffers[],
-				uint32_t			sg_buff_count);
+				uint32_t			sg_buff_count,
+				boolean_t			*need_sig);
 
 int			hv_ring_buffer_peek(
 				hv_vmbus_ring_buffer_info	*ring_info,
@@ -638,6 +623,12 @@ void			hv_vmbus_dump_ring_info(
 				hv_vmbus_ring_buffer_info	*ring_info,
 				char				*prefix);
 
+void			hv_ring_buffer_read_begin(
+				hv_vmbus_ring_buffer_info	*ring_info);
+
+uint32_t		hv_ring_buffer_read_end(
+				hv_vmbus_ring_buffer_info	*ring_info);
+
 hv_vmbus_channel*	hv_vmbus_allocate_channel(void);
 void			hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
 void			hv_vmbus_on_channel_message(void *context);
@@ -652,7 +643,7 @@ uint16_t		hv_vmbus_post_msg_via_msg_ipc(
 				void			*payload,
 				size_t			payload_size);
 
-uint16_t		hv_vmbus_signal_event(void);
+uint16_t		hv_vmbus_signal_event(void *con_id);
 void			hv_vmbus_synic_init(void *irq_arg);
 void			hv_vmbus_synic_cleanup(void *arg);
 int			hv_vmbus_query_hypervisor_presence(void);
@@ -674,7 +665,7 @@ hv_vmbus_channel*	hv_vmbus_get_channel_from_rel_id(uint32_t rel_id);
 int			hv_vmbus_connect(void);
 int			hv_vmbus_disconnect(void);
 int			hv_vmbus_post_message(void *buffer, size_t buf_size);
-int			hv_vmbus_set_event(uint32_t child_rel_id);
+int			hv_vmbus_set_event(hv_vmbus_channel *channel);
 void			hv_vmbus_on_events(void *);
 
 
@@ -718,7 +709,7 @@ static inline  uint64_t hv_generate_guest_id(
 
 typedef struct {
 	unsigned int	vector;
-	void		*page_buffers[2];
+	void		*page_buffers[2 * MAXCPU];
 } hv_setup_args;
 
 #endif  /* __HYPERV_PRIV_H__ */