Merge remote-tracking branch 'origin/stable/10' into devel

author: Renato Botelho <renato@netgate.com> 2016-06-14 14:37:21 -0500
committer: Renato Botelho <renato@netgate.com> 2016-06-14 14:37:21 -0500
commit: b8632c4f34175c7018be77059ab229e755eb67e0 (patch)
tree: 712b8119449ce1d7585aef984d17257bea58bf58 /sys/dev/hyperv
parent: 47dfb8d658406ebf07225c0104ebe4be06ae405f (diff)
parent: 494811e2fb5cf62d613082ffb6e26922a0b5b2e6 (diff)
download: FreeBSD-src-b8632c4f34175c7018be77059ab229e755eb67e0.zip
FreeBSD-src-b8632c4f34175c7018be77059ab229e755eb67e0.tar.gz
19 files changed, 2400 insertions, 1836 deletions
diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h
index 1a45b7b..f45543b 100644
--- a/sys/dev/hyperv/include/hyperv.h
+++ b/sys/dev/hyperv/include/hyperv.h
@@ -755,6 +755,8 @@ typedef struct hv_vmbus_channel {
 
 	struct mtx			inbound_lock;
 
+	struct taskqueue *		rxq;
+	struct task			channel_task;
 	hv_vmbus_pfn_channel_callback	on_channel_callback;
 	void*				channel_callback_context;
 
@@ -906,30 +908,6 @@ int		hv_vmbus_channel_teardown_gpdal(
 
 struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
 
-/*
- * Work abstraction defines
- */
-typedef struct hv_work_queue {
-	struct taskqueue*	queue;
-	struct proc*		proc;
-	struct sema*		work_sema;
-} hv_work_queue;
-
-typedef struct hv_work_item {
-	struct task	work;
-	void		(*callback)(void *);
-	void*		context;
-	hv_work_queue*	wq;
-} hv_work_item;
-
-struct hv_work_queue*	hv_work_queue_create(char* name);
-
-void			hv_work_queue_close(struct hv_work_queue* wq);
-
-int			hv_queue_work_item(
-				hv_work_queue*	wq,
-				void		(*callback)(void *),
-				void*		context);
 /**
  * @brief Get physical address from virtual
  */
@@ -941,35 +919,5 @@ hv_get_phys_addr(void *virt)
 	return (ret);
 }
 
-
-/**
- * KVP related structures
- * 
- */
-typedef struct hv_vmbus_service {
-        hv_guid       guid;             /* Hyper-V GUID */
-        char          *name;            /* name of service */
-        boolean_t     enabled;          /* service enabled */
-        hv_work_queue *work_queue;      /* background work queue */
-
-        /*
-         * function to initialize service
-         */
-        int (*init)(struct hv_vmbus_service *);
-
-        /*
-         * function to process Hyper-V messages
-         */
-        void (*callback)(void *);
-} hv_vmbus_service;
-
-extern uint8_t* receive_buffer[];
-extern hv_vmbus_service service_table[];
 extern uint32_t hv_vmbus_protocal_version;
-
-void hv_kvp_callback(void *context);
-int hv_kvp_init(hv_vmbus_service *serv);
-void hv_kvp_deinit(void);
-
 #endif  /* __HYPERV_H__ */
-
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c
index 64e7578..9a89b62 100644
--- a/sys/dev/hyperv/netvsc/hv_net_vsc.c
+++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c
@@ -73,10 +73,7 @@ hv_nv_alloc_net_device(struct hv_device *device)
 	netvsc_dev *net_dev;
 	hn_softc_t *sc = device_get_softc(device->device);
 
-	net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_NOWAIT | M_ZERO);
-	if (net_dev == NULL) {
-		return (NULL);
-	}
+	net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_WAITOK | M_ZERO);
 
 	net_dev->dev = device;
 	net_dev->destroy = FALSE;
@@ -135,15 +132,15 @@ hv_nv_get_next_send_section(netvsc_dev *net_dev)
 	int i;
 
 	for (i = 0; i < bitsmap_words; i++) {
-		idx = ffs(~bitsmap[i]);
+		idx = ffsl(~bitsmap[i]);
 		if (0 == idx)
 			continue;
 
 		idx--;
-		if (i * BITS_PER_LONG + idx >= net_dev->send_section_count)
-			return (ret);
+		KASSERT(i * BITS_PER_LONG + idx < net_dev->send_section_count,
+		    ("invalid i %d and idx %lu", i, idx));
 
-		if (synch_test_and_set_bit(idx, &bitsmap[i]))
+		if (atomic_testandset_long(&bitsmap[i], idx))
 			continue;
 
 		ret = i * BITS_PER_LONG + idx;
@@ -223,11 +220,7 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
 	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
 
 	net_dev->rx_sections = malloc(net_dev->rx_section_count *
-	    sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_NOWAIT);
-	if (net_dev->rx_sections == NULL) {
-		ret = EINVAL;
-		goto cleanup;
-	}
+	    sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK);
 	memcpy(net_dev->rx_sections, 
 	    init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
 	    net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
@@ -325,11 +318,7 @@ hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
 	    BITS_PER_LONG);
 	net_dev->send_section_bitsmap =
 	    malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC,
-	    M_NOWAIT | M_ZERO);
-	if (NULL == net_dev->send_section_bitsmap) {
-		ret = ENOMEM;
-		goto cleanup;
-	}
+	    M_WAITOK | M_ZERO);
 
 	goto exit;
 
@@ -788,8 +777,27 @@ hv_nv_on_send_completion(netvsc_dev *net_dev,
 		if (NULL != net_vsc_pkt) {
 			if (net_vsc_pkt->send_buf_section_idx !=
 			    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
-				synch_change_bit(net_vsc_pkt->send_buf_section_idx,
-				    net_dev->send_section_bitsmap);
+				u_long mask;
+				int idx;
+
+				idx = net_vsc_pkt->send_buf_section_idx /
+				    BITS_PER_LONG;
+				KASSERT(idx < net_dev->bitsmap_words,
+				    ("invalid section index %u",
+				     net_vsc_pkt->send_buf_section_idx));
+				mask = 1UL <<
+				    (net_vsc_pkt->send_buf_section_idx %
+				     BITS_PER_LONG);
+
+				KASSERT(net_dev->send_section_bitsmap[idx] &
+				    mask,
+				    ("index bitmap 0x%lx, section index %u, "
+				     "bitmap idx %d, bitmask 0x%lx",
+				     net_dev->send_section_bitsmap[idx],
+				     net_vsc_pkt->send_buf_section_idx,
+				     idx, mask));
+				atomic_clear_long(
+				    &net_dev->send_section_bitsmap[idx], mask);
 			}
 			
 			/* Notify the layer above us */
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h
index e684cc5..95dee17 100644
--- a/sys/dev/hyperv/netvsc/hv_net_vsc.h
+++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h
@@ -39,9 +39,11 @@
 #define __HV_NET_VSC_H__
 
 #include <sys/param.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/queue.h>
+#include <sys/taskqueue.h>
 #include <sys/sx.h>
 
 #include <machine/bus.h>
@@ -56,6 +58,8 @@
 
 #include <dev/hyperv/include/hyperv.h>
 
+#define HN_USE_TXDESC_BUFRING
+
 MALLOC_DECLARE(M_NETVSC);
 
 #define NVSP_INVALID_PROTOCOL_VERSION           (0xFFFFFFFF)
@@ -988,8 +992,67 @@ typedef struct {
 	hv_bool_uint8_t	link_state;
 } netvsc_device_info;
 
+#ifndef HN_USE_TXDESC_BUFRING
 struct hn_txdesc;
 SLIST_HEAD(hn_txdesc_list, hn_txdesc);
+#else
+struct buf_ring;
+#endif
+
+struct hn_rx_ring {
+	struct lro_ctrl	hn_lro;
+
+	/* Trust csum verification on host side */
+	int		hn_trust_hcsum;	/* HN_TRUST_HCSUM_ */
+
+	u_long		hn_csum_ip;
+	u_long		hn_csum_tcp;
+	u_long		hn_csum_udp;
+	u_long		hn_csum_trusted;
+	u_long		hn_lro_tried;
+	u_long		hn_small_pkts;
+} __aligned(CACHE_LINE_SIZE);
+
+#define HN_TRUST_HCSUM_IP	0x0001
+#define HN_TRUST_HCSUM_TCP	0x0002
+#define HN_TRUST_HCSUM_UDP	0x0004
+
+struct hn_tx_ring {
+#ifndef HN_USE_TXDESC_BUFRING
+	struct mtx	hn_txlist_spin;
+	struct hn_txdesc_list hn_txlist;
+#else
+	struct buf_ring	*hn_txdesc_br;
+#endif
+	int		hn_txdesc_cnt;
+	int		hn_txdesc_avail;
+	int		hn_has_txeof;
+
+	int		hn_sched_tx;
+	void		(*hn_txeof)(struct hn_tx_ring *);
+	struct taskqueue *hn_tx_taskq;
+	struct task	hn_tx_task;
+	struct task	hn_txeof_task;
+
+	struct mtx	hn_tx_lock;
+	struct hn_softc	*hn_sc;
+
+	int		hn_direct_tx_size;
+	int		hn_tx_chimney_size;
+	bus_dma_tag_t	hn_tx_data_dtag;
+	uint64_t	hn_csum_assist;
+
+	u_long		hn_no_txdescs;
+	u_long		hn_send_failed;
+	u_long		hn_txdma_failed;
+	u_long		hn_tx_collapsed;
+	u_long		hn_tx_chimney;
+
+	/* Rarely used stuffs */
+	struct hn_txdesc *hn_txdesc;
+	bus_dma_tag_t	hn_tx_rndis_dtag;
+	struct sysctl_oid *hn_tx_sysctl_tree;
+} __aligned(CACHE_LINE_SIZE);
 
 /*
  * Device-specific softc structure
@@ -1009,44 +1072,22 @@ typedef struct hn_softc {
 	struct hv_device  *hn_dev_obj;
 	netvsc_dev  	*net_dev;
 
-	int		hn_txdesc_cnt;
-	struct hn_txdesc *hn_txdesc;
-	bus_dma_tag_t	hn_tx_data_dtag;
-	bus_dma_tag_t	hn_tx_rndis_dtag;
-	int		hn_tx_chimney_size;
-	int		hn_tx_chimney_max;
+	int		hn_rx_ring_cnt;
+	struct hn_rx_ring *hn_rx_ring;
 
-	struct mtx	hn_txlist_spin;
-	struct hn_txdesc_list hn_txlist;
-	int		hn_txdesc_avail;
-	int		hn_txeof;
-
-	struct lro_ctrl	hn_lro;
-	int		hn_lro_hiwat;
-
-	/* Trust tcp segments verification on host side */
-	int		hn_trust_hosttcp;
-
-	u_long		hn_csum_ip;
-	u_long		hn_csum_tcp;
-	u_long		hn_csum_trusted;
-	u_long		hn_lro_tried;
-	u_long		hn_small_pkts;
-	u_long		hn_no_txdescs;
-	u_long		hn_send_failed;
-	u_long		hn_txdma_failed;
-	u_long		hn_tx_collapsed;
-	u_long		hn_tx_chimney;
+	int		hn_tx_ring_cnt;
+	struct hn_tx_ring *hn_tx_ring;
+	int		hn_tx_chimney_max;
+	struct taskqueue *hn_tx_taskq;
+	struct sysctl_oid *hn_tx_sysctl_tree;
 } hn_softc_t;
 
-
 /*
  * Externs
  */
 extern int hv_promisc_mode;
 
 void netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status);
-void netvsc_xmit_completion(void *context);
 void hv_nv_on_receive_completion(struct hv_device *device,
     uint64_t tid, uint32_t status);
 netvsc_dev *hv_nv_on_device_add(struct hv_device *device,
diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
index b3360ea..0f4425e 100644
--- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
@@ -66,10 +66,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/module.h>
 #include <sys/kernel.h>
 #include <sys/socket.h>
+#include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
+#include <sys/buf_ring.h>
 
 #include <net/if.h>
 #include <net/if_arp.h>
@@ -132,6 +134,8 @@ __FBSDID("$FreeBSD$");
 /* YYY should get it from the underlying channel */
 #define HN_TX_DESC_CNT			512
 
+#define HN_LROENT_CNT_DEF		128
+
 #define HN_RNDIS_MSG_LEN		\
     (sizeof(rndis_msg) +		\
      RNDIS_VLAN_PPI_SIZE +		\
@@ -146,10 +150,14 @@ __FBSDID("$FreeBSD$");
 #define HN_TX_DATA_SEGCNT_MAX		\
     (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS)
 
+#define HN_DIRECT_TX_SIZE_DEF		128
+
 struct hn_txdesc {
+#ifndef HN_USE_TXDESC_BUFRING
 	SLIST_ENTRY(hn_txdesc) link;
+#endif
 	struct mbuf	*m;
-	struct hn_softc	*sc;
+	struct hn_tx_ring *txr;
 	int		refs;
 	uint32_t	flags;		/* HN_TXD_FLAG_ */
 	netvsc_packet	netvsc_pkt;	/* XXX to be removed */
@@ -165,23 +173,18 @@ struct hn_txdesc {
 #define HN_TXD_FLAG_DMAMAP	0x2
 
 /*
- * A unified flag for all outbound check sum flags is useful,
- * and it helps avoiding unnecessary check sum calculation in
- * network forwarding scenario.
+ * Only enable UDP checksum offloading when it is on 2012R2 or
+ * later.  UDP checksum offloading doesn't work on earlier
+ * Windows releases.
  */
-#define HV_CSUM_FOR_OUTBOUND						\
-    (CSUM_IP|CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP|CSUM_IP_TSO|		\
-    CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP|		\
-    CSUM_IP6_TSO|CSUM_IP6_ISCSI)
-
-/* XXX move to netinet/tcp_lro.h */
-#define HN_LRO_HIWAT_MAX				65535
-#define HN_LRO_HIWAT_DEF				HN_LRO_HIWAT_MAX
+#define HN_CSUM_ASSIST_WIN8	(CSUM_IP | CSUM_TCP)
+#define HN_CSUM_ASSIST		(CSUM_IP | CSUM_UDP | CSUM_TCP)
+
+#define HN_LRO_LENLIM_DEF		(25 * ETHERMTU)
 /* YYY 2*MTU is a bit rough, but should be good enough. */
-#define HN_LRO_HIWAT_MTULIM(ifp)			(2 * (ifp)->if_mtu)
-#define HN_LRO_HIWAT_ISVALID(sc, hiwat)			\
-    ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) ||	\
-     (hiwat) <= HN_LRO_HIWAT_MAX)
+#define HN_LRO_LENLIM_MIN(ifp)		(2 * (ifp)->if_mtu)
+
+#define HN_LRO_ACKCNT_DEF		1
 
 /*
  * Be aware that this sleepable mutex will exhibit WITNESS errors when
@@ -205,19 +208,71 @@ struct hn_txdesc {
 
 int hv_promisc_mode = 0;    /* normal mode by default */
 
+SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD, NULL, "Hyper-V network interface");
+
 /* Trust tcp segements verification on host side. */
-static int hn_trust_hosttcp = 0;
-TUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp);
+static int hn_trust_hosttcp = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
+    &hn_trust_hosttcp, 0,
+    "Trust tcp segement verification on host side, "
+    "when csum info is missing (global setting)");
+
+/* Trust udp datagrams verification on host side. */
+static int hn_trust_hostudp = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
+    &hn_trust_hostudp, 0,
+    "Trust udp datagram verification on host side, "
+    "when csum info is missing (global setting)");
+
+/* Trust ip packets verification on host side. */
+static int hn_trust_hostip = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
+    &hn_trust_hostip, 0,
+    "Trust ip packet verification on host side, "
+    "when csum info is missing (global setting)");
 
 #if __FreeBSD_version >= 1100045
 /* Limit TSO burst size */
 static int hn_tso_maxlen = 0;
-TUNABLE_INT("dev.hn.tso_maxlen", &hn_tso_maxlen);
+SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
+    &hn_tso_maxlen, 0, "TSO burst limit");
 #endif
 
 /* Limit chimney send size */
 static int hn_tx_chimney_size = 0;
-TUNABLE_INT("dev.hn.tx_chimney_size", &hn_tx_chimney_size);
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
+    &hn_tx_chimney_size, 0, "Chimney send packet size limit");
+
+/* Limit the size of packet for direct transmission */
+static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
+SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
+    &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
+
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
+SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
+    &hn_lro_entry_count, 0, "LRO entry count");
+#endif
+#endif
+
+static int hn_share_tx_taskq = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
+    &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
+
+static struct taskqueue	*hn_tx_taskq;
+
+#ifndef HN_USE_TXDESC_BUFRING
+static int hn_use_txdesc_bufring = 0;
+#else
+static int hn_use_txdesc_bufring = 1;
+#endif
+SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
+    &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
+
+static int hn_bind_tx_taskq = -1;
+SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
+    &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
 
 /*
  * Forward declarations
@@ -226,82 +281,37 @@ static void hn_stop(hn_softc_t *sc);
 static void hn_ifinit_locked(hn_softc_t *sc);
 static void hn_ifinit(void *xsc);
 static int  hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
-static void hn_start_locked(struct ifnet *ifp);
+static int hn_start_locked(struct hn_tx_ring *txr, int len);
 static void hn_start(struct ifnet *ifp);
+static void hn_start_txeof(struct hn_tx_ring *);
 static int hn_ifmedia_upd(struct ifnet *ifp);
 static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
-#ifdef HN_LRO_HIWAT
-static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS);
+#if __FreeBSD_version >= 1100099
+static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
+static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
 static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS);
-static int hn_check_iplen(const struct mbuf *, int);
-static int hn_create_tx_ring(struct hn_softc *sc);
-static void hn_destroy_tx_ring(struct hn_softc *sc);
-
-static __inline void
-hn_set_lro_hiwat(struct hn_softc *sc, int hiwat)
-{
-	sc->hn_lro_hiwat = hiwat;
-#ifdef HN_LRO_HIWAT
-	sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
-#endif
-}
-
-/*
- * NetVsc get message transport protocol type 
- */
-static uint32_t get_transport_proto_type(struct mbuf *m_head)
-{
-	uint32_t ret_val = TRANSPORT_TYPE_NOT_IP;
-	uint16_t ether_type = 0;
-	int ether_len = 0;
-	struct ether_vlan_header *eh;
-#ifdef INET
-	struct ip *iph;
-#endif
-#ifdef INET6
-	struct ip6_hdr *ip6;
-#endif
-
-	eh = mtod(m_head, struct ether_vlan_header*);
-	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
-		ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
-		ether_type = eh->evl_proto;
-	} else {
-		ether_len = ETHER_HDR_LEN;
-		ether_type = eh->evl_encap_proto;
-	}
-
-	switch (ntohs(ether_type)) {
-#ifdef INET6
-	case ETHERTYPE_IPV6:
-		ip6 = (struct ip6_hdr *)(m_head->m_data + ether_len);
-
-		if (IPPROTO_TCP == ip6->ip6_nxt) {
-			ret_val = TRANSPORT_TYPE_IPV6_TCP;
-		} else if (IPPROTO_UDP == ip6->ip6_nxt) {
-			ret_val = TRANSPORT_TYPE_IPV6_UDP;
-		}
-		break;
+#if __FreeBSD_version < 1100095
+static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
+#else
+static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
 #endif
-#ifdef INET
-	case ETHERTYPE_IP:
-		iph = (struct ip *)(m_head->m_data + ether_len);
-
-		if (IPPROTO_TCP == iph->ip_p) {
-			ret_val = TRANSPORT_TYPE_IPV4_TCP;
-		} else if (IPPROTO_UDP == iph->ip_p) {
-			ret_val = TRANSPORT_TYPE_IPV4_UDP;
-		}
-		break;
-#endif
-	default:
-		ret_val = TRANSPORT_TYPE_NOT_IP;
-		break;
-	}
-
-	return (ret_val);
-}
+static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_check_iplen(const struct mbuf *, int);
+static int hn_create_tx_ring(struct hn_softc *, int);
+static void hn_destroy_tx_ring(struct hn_tx_ring *);
+static int hn_create_tx_data(struct hn_softc *);
+static void hn_destroy_tx_data(struct hn_softc *);
+static void hn_start_taskfunc(void *, int);
+static void hn_start_txeof_taskfunc(void *, int);
+static void hn_stop_tx_tasks(struct hn_softc *);
+static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
+static void hn_create_rx_data(struct hn_softc *sc);
+static void hn_destroy_rx_data(struct hn_softc *sc);
+static void hn_set_tx_chimney_size(struct hn_softc *, int);
 
 static int
 hn_ifmedia_upd(struct ifnet *ifp __unused)
@@ -353,6 +363,19 @@ netvsc_probe(device_t dev)
 	return (ENXIO);
 }
 
+static void
+hn_cpuset_setthread_task(void *xmask, int pending __unused)
+{
+	cpuset_t *mask = xmask;
+	int error;
+
+	error = cpuset_setthread(curthread->td_tid, mask);
+	if (error) {
+		panic("curthread=%ju: can't pin; error=%d",
+		    (uintmax_t)curthread->td_tid, error);
+	}
+}
+
 /*
  * Standard attach entry point.
  *
@@ -367,8 +390,6 @@ netvsc_attach(device_t dev)
 	hn_softc_t *sc;
 	int unit = device_get_unit(dev);
 	struct ifnet *ifp = NULL;
-	struct sysctl_oid_list *child;
-	struct sysctl_ctx_list *ctx;
 	int error;
 #if __FreeBSD_version >= 1100045
 	int tso_maxlen;
@@ -382,13 +403,28 @@ netvsc_attach(device_t dev)
 	bzero(sc, sizeof(hn_softc_t));
 	sc->hn_unit = unit;
 	sc->hn_dev = dev;
-	sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF;
-	sc->hn_trust_hosttcp = hn_trust_hosttcp;
-
-	error = hn_create_tx_ring(sc);
-	if (error)
-		goto failed;
 
+	if (hn_tx_taskq == NULL) {
+		sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
+		    taskqueue_thread_enqueue, &sc->hn_tx_taskq);
+		taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx",
+		    device_get_nameunit(dev));
+		if (hn_bind_tx_taskq >= 0) {
+			int cpu = hn_bind_tx_taskq;
+			struct task cpuset_task;
+			cpuset_t cpu_set;
+
+			if (cpu > mp_ncpus - 1)
+				cpu = mp_ncpus - 1;
+			CPU_SETOF(cpu, &cpu_set);
+			TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task,
+			    &cpu_set);
+			taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task);
+			taskqueue_drain(sc->hn_tx_taskq, &cpuset_task);
+		}
+	} else {
+		sc->hn_tx_taskq = hn_tx_taskq;
+	}
 	NV_LOCK_INIT(sc, "NetVSCLock");
 
 	sc->hn_dev_obj = device_ctx;
@@ -396,6 +432,12 @@ netvsc_attach(device_t dev)
 	ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
 	ifp->if_softc = sc;
 
+	error = hn_create_tx_data(sc);
+	if (error)
+		goto failed;
+
+	hn_create_rx_data(sc);
+
 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
 	ifp->if_dunit = unit;
 	ifp->if_dname = NETVSC_DEVNAME;
@@ -426,15 +468,7 @@ netvsc_attach(device_t dev)
 	ifp->if_capenable |=
 	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
 	    IFCAP_LRO;
-	/*
-	 * Only enable UDP checksum offloading when it is on 2012R2 or
-	 * later. UDP checksum offloading doesn't work on earlier
-	 * Windows releases.
-	 */
-	if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
-		ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
-	else
-		ifp->if_hwassist = CSUM_TCP | CSUM_TSO;
+	ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO;
 
 	error = hv_rf_on_device_add(device_ctx, &device_info);
 	if (error)
@@ -444,15 +478,6 @@ netvsc_attach(device_t dev)
 		sc->hn_carrier = 1;
 	}
 
-#if defined(INET) || defined(INET6)
-	tcp_lro_init(&sc->hn_lro);
-	/* Driver private LRO settings */
-	sc->hn_lro.ifp = ifp;
-#ifdef HN_LRO_HIWAT
-	sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
-#endif
-#endif	/* INET || INET6 */
-
 #if __FreeBSD_version >= 1100045
 	tso_maxlen = hn_tso_maxlen;
 	if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET)
@@ -472,87 +497,14 @@ netvsc_attach(device_t dev)
 #endif
 
 	sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
-	sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
+	hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
 	if (hn_tx_chimney_size > 0 &&
 	    hn_tx_chimney_size < sc->hn_tx_chimney_max)
-		sc->hn_tx_chimney_size = hn_tx_chimney_size;
-
-	ctx = device_get_sysctl_ctx(dev);
-	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
-
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued",
-	    CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed",
-	    CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried",
-	    CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries");
-#ifdef HN_LRO_HIWAT
-	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat",
-	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl,
-	    "I", "LRO high watermark");
-#endif
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp",
-	    CTLFLAG_RW, &sc->hn_trust_hosttcp, 0,
-	    "Trust tcp segement verification on host side, "
-	    "when csum info is missing");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip",
-	    CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp",
-	    CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted",
-	    CTLFLAG_RW, &sc->hn_csum_trusted,
-	    "# of TCP segements that we trust host's csum verification");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts",
-	    CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_txdescs",
-	    CTLFLAG_RW, &sc->hn_no_txdescs, "# of times short of TX descs");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "send_failed",
-	    CTLFLAG_RW, &sc->hn_send_failed, "# of hyper-v sending failure");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "txdma_failed",
-	    CTLFLAG_RW, &sc->hn_txdma_failed, "# of TX DMA failure");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_collapsed",
-	    CTLFLAG_RW, &sc->hn_tx_collapsed, "# of TX mbuf collapsed");
-	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_chimney",
-	    CTLFLAG_RW, &sc->hn_tx_chimney, "# of chimney send");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
-	    CTLFLAG_RD, &sc->hn_txdesc_cnt, 0, "# of total TX descs");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
-	    CTLFLAG_RD, &sc->hn_txdesc_avail, 0, "# of available TX descs");
-	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
-	    CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
-	    "Chimney send packet size upper boundary");
-	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
-	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
-	    "I", "Chimney send packet size limit");
-
-	if (unit == 0) {
-		struct sysctl_ctx_list *dc_ctx;
-		struct sysctl_oid_list *dc_child;
-		devclass_t dc;
-
-		/*
-		 * Add sysctl nodes for devclass
-		 */
-		dc = device_get_devclass(dev);
-		dc_ctx = devclass_get_sysctl_ctx(dc);
-		dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc));
-
-		SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp",
-		    CTLFLAG_RD, &hn_trust_hosttcp, 0,
-		    "Trust tcp segement verification on host side, "
-		    "when csum info is missing (global setting)");
-		SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tx_chimney_size",
-		    CTLFLAG_RD, &hn_tx_chimney_size, 0,
-		    "Chimney send packet size limit");
-#if __FreeBSD_version >= 1100045
-		SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tso_maxlen",
-		    CTLFLAG_RD, &hn_tso_maxlen, 0, "TSO burst limit");
-#endif
-	}
+		hn_set_tx_chimney_size(sc, hn_tx_chimney_size);
 
 	return (0);
 failed:
-	hn_destroy_tx_ring(sc);
+	hn_destroy_tx_data(sc);
 	if (ifp != NULL)
 		if_free(ifp);
 	return (error);
@@ -583,11 +535,14 @@ netvsc_detach(device_t dev)
 
 	hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
 
+	hn_stop_tx_tasks(sc);
+
 	ifmedia_removeall(&sc->hn_media);
-#if defined(INET) || defined(INET6)
-	tcp_lro_free(&sc->hn_lro);
-#endif
-	hn_destroy_tx_ring(sc);
+	hn_destroy_rx_data(sc);
+	hn_destroy_tx_data(sc);
+
+	if (sc->hn_tx_taskq != hn_tx_taskq)
+		taskqueue_free(sc->hn_tx_taskq);
 
 	return (0);
 }
@@ -602,13 +557,13 @@ netvsc_shutdown(device_t dev)
 }
 
 static __inline int
-hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
+hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
     struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
 {
 	struct mbuf *m = *m_head;
 	int error;
 
-	error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, txd->data_dmap,
+	error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
 	    m, segs, nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		struct mbuf *m_new;
@@ -618,13 +573,13 @@ hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
 			return ENOBUFS;
 		else
 			*m_head = m = m_new;
-		sc->hn_tx_collapsed++;
+		txr->hn_tx_collapsed++;
 
-		error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag,
+		error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
 		    txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
 	}
 	if (!error) {
-		bus_dmamap_sync(sc->hn_tx_data_dtag, txd->data_dmap,
+		bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
 		    BUS_DMASYNC_PREWRITE);
 		txd->flags |= HN_TXD_FLAG_DMAMAP;
 	}
@@ -632,20 +587,20 @@ hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
 }
 
 static __inline void
-hn_txdesc_dmamap_unload(struct hn_softc *sc, struct hn_txdesc *txd)
+hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	if (txd->flags & HN_TXD_FLAG_DMAMAP) {
-		bus_dmamap_sync(sc->hn_tx_data_dtag,
+		bus_dmamap_sync(txr->hn_tx_data_dtag,
 		    txd->data_dmap, BUS_DMASYNC_POSTWRITE);
-		bus_dmamap_unload(sc->hn_tx_data_dtag,
+		bus_dmamap_unload(txr->hn_tx_data_dtag,
 		    txd->data_dmap);
 		txd->flags &= ~HN_TXD_FLAG_DMAMAP;
 	}
 }
 
 static __inline int
-hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
+hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
 {
 
 	KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
@@ -655,7 +610,7 @@ hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
 	if (atomic_fetchadd_int(&txd->refs, -1) != 1)
 		return 0;
 
-	hn_txdesc_dmamap_unload(sc, txd);
+	hn_txdesc_dmamap_unload(txr, txd);
 	if (txd->m != NULL) {
 		m_freem(txd->m);
 		txd->m = NULL;
@@ -663,33 +618,45 @@ hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
 
 	txd->flags |= HN_TXD_FLAG_ONLIST;
 
-	mtx_lock_spin(&sc->hn_txlist_spin);
-	KASSERT(sc->hn_txdesc_avail >= 0 &&
-	    sc->hn_txdesc_avail < sc->hn_txdesc_cnt,
-	    ("txdesc_put: invalid txd avail %d", sc->hn_txdesc_avail));
-	sc->hn_txdesc_avail++;
-	SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
-	mtx_unlock_spin(&sc->hn_txlist_spin);
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_lock_spin(&txr->hn_txlist_spin);
+	KASSERT(txr->hn_txdesc_avail >= 0 &&
+	    txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
+	    ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
+	txr->hn_txdesc_avail++;
+	SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
+	mtx_unlock_spin(&txr->hn_txlist_spin);
+#else
+	atomic_add_int(&txr->hn_txdesc_avail, 1);
+	buf_ring_enqueue(txr->hn_txdesc_br, txd);
+#endif
 
 	return 1;
 }
 
 static __inline struct hn_txdesc *
-hn_txdesc_get(struct hn_softc *sc)
+hn_txdesc_get(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
-	mtx_lock_spin(&sc->hn_txlist_spin);
-	txd = SLIST_FIRST(&sc->hn_txlist);
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_lock_spin(&txr->hn_txlist_spin);
+	txd = SLIST_FIRST(&txr->hn_txlist);
 	if (txd != NULL) {
-		KASSERT(sc->hn_txdesc_avail > 0,
-		    ("txdesc_get: invalid txd avail %d", sc->hn_txdesc_avail));
-		sc->hn_txdesc_avail--;
-		SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
+		KASSERT(txr->hn_txdesc_avail > 0,
+		    ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
+		txr->hn_txdesc_avail--;
+		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
 	}
-	mtx_unlock_spin(&sc->hn_txlist_spin);
+	mtx_unlock_spin(&txr->hn_txlist_spin);
+#else
+	txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
+#endif
 
 	if (txd != NULL) {
+#ifdef HN_USE_TXDESC_BUFRING
+		atomic_subtract_int(&txr->hn_txdesc_avail, 1);
+#endif
 		KASSERT(txd->m == NULL && txd->refs == 0 &&
 		    (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd"));
 		txd->flags &= ~HN_TXD_FLAG_ONLIST;
@@ -707,213 +674,133 @@ hn_txdesc_hold(struct hn_txdesc *txd)
 	atomic_add_int(&txd->refs, 1);
 }
 
-/*
- * Send completion processing
- *
- * Note:  It looks like offset 0 of buf is reserved to hold the softc
- * pointer.  The sc pointer is not currently needed in this function, and
- * it is not presently populated by the TX function.
- */
-void
-netvsc_xmit_completion(void *context)
+static void
+hn_tx_done(void *xpkt)
 {
-	netvsc_packet *packet = context;
+	netvsc_packet *packet = xpkt;
 	struct hn_txdesc *txd;
-	struct hn_softc *sc;
+	struct hn_tx_ring *txr;
 
 	txd = (struct hn_txdesc *)(uintptr_t)
 	    packet->compl.send.send_completion_tid;
 
-	sc = txd->sc;
-	sc->hn_txeof = 1;
-	hn_txdesc_put(sc, txd);
+	txr = txd->txr;
+	txr->hn_has_txeof = 1;
+	hn_txdesc_put(txr, txd);
 }
 
 void
 netvsc_channel_rollup(struct hv_device *device_ctx)
 {
 	struct hn_softc *sc = device_get_softc(device_ctx->device);
-	struct ifnet *ifp;
+	struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; /* TODO: vRSS */
+#if defined(INET) || defined(INET6)
+	struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */
+	struct lro_ctrl *lro = &rxr->hn_lro;
+	struct lro_entry *queued;
+
+	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
+		SLIST_REMOVE_HEAD(&lro->lro_active, next);
+		tcp_lro_flush(lro, queued);
+	}
+#endif
 
-	if (!sc->hn_txeof)
+	if (!txr->hn_has_txeof)
 		return;
 
-	sc->hn_txeof = 0;
-	ifp = sc->hn_ifp;
-	NV_LOCK(sc);
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
-	hn_start_locked(ifp);
-	NV_UNLOCK(sc);
+	txr->hn_has_txeof = 0;
+	txr->hn_txeof(txr);
 }
 
 /*
- * Start a transmit of one or more packets
+ * NOTE:
+ * If this function fails, then both txd and m_head0 will be freed.
  */
-static void
-hn_start_locked(struct ifnet *ifp)
+static int
+hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
 {
-	hn_softc_t *sc = ifp->if_softc;
-	struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
-	netvsc_dev *net_dev = sc->net_dev;
+	bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
+	int error, nsegs, i;
+	struct mbuf *m_head = *m_head0;
 	netvsc_packet *packet;
-	struct mbuf *m_head, *m;
-	struct ether_vlan_header *eh;
 	rndis_msg *rndis_mesg;
 	rndis_packet *rndis_pkt;
 	rndis_per_packet_info *rppi;
-	ndis_8021q_info *rppi_vlan_info;
-	rndis_tcp_ip_csum_info *csum_info;
-	rndis_tcp_tso_info *tso_info;	
-	int ether_len;
-	uint32_t rndis_msg_size = 0;
-	uint32_t trans_proto_type;
-	uint32_t send_buf_section_idx =
-	    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
-
-	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
-	    IFF_DRV_RUNNING)
-		return;
-
-	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
-		bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
-		int error, nsegs, i, send_failed = 0;
-		struct hn_txdesc *txd;
+	uint32_t rndis_msg_size;
 
-		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
-		if (m_head == NULL)
-			break;
+	packet = &txd->netvsc_pkt;
+	packet->is_data_pkt = TRUE;
+	packet->tot_data_buf_len = m_head->m_pkthdr.len;
 
-		txd = hn_txdesc_get(sc);
-		if (txd == NULL) {
-			sc->hn_no_txdescs++;
-			IF_PREPEND(&ifp->if_snd, m_head);
-			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
-			break;
-		}
+	/*
+	 * extension points to the area reserved for the
+	 * rndis_filter_packet, which is placed just after
+	 * the netvsc_packet (and rppi struct, if present;
+	 * length is updated later).
+	 */
+	rndis_mesg = txd->rndis_msg;
+	/* XXX not necessary */
+	memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN);
+	rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
 
-		packet = &txd->netvsc_pkt;
-		/* XXX not necessary */
-		memset(packet, 0, sizeof(*packet));
+	rndis_pkt = &rndis_mesg->msg.packet;
+	rndis_pkt->data_offset = sizeof(rndis_packet);
+	rndis_pkt->data_length = packet->tot_data_buf_len;
+	rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
 
-		packet->is_data_pkt = TRUE;
+	rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
 
-		/* Initialize it from the mbuf */
-		packet->tot_data_buf_len = m_head->m_pkthdr.len;
+	if (m_head->m_flags & M_VLANTAG) {
+		ndis_8021q_info *rppi_vlan_info;
 
-		/*
-		 * extension points to the area reserved for the
-		 * rndis_filter_packet, which is placed just after
-		 * the netvsc_packet (and rppi struct, if present;
-		 * length is updated later).
-		 */
-		rndis_mesg = txd->rndis_msg;
-		/* XXX not necessary */
-		memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN);
-		rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
+		rndis_msg_size += RNDIS_VLAN_PPI_SIZE;
+		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE,
+		    ieee_8021q_info);
 
-		rndis_pkt = &rndis_mesg->msg.packet;
-		rndis_pkt->data_offset = sizeof(rndis_packet);
-		rndis_pkt->data_length = packet->tot_data_buf_len;
-		rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
+		rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi +
+		    rppi->per_packet_info_offset);
+		rppi_vlan_info->u1.s1.vlan_id =
+		    m_head->m_pkthdr.ether_vtag & 0xfff;
+	}
 
-		rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
+	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
+		rndis_tcp_tso_info *tso_info;	
+		struct ether_vlan_header *eh;
+		int ether_len;
 
 		/*
-		 * If the Hyper-V infrastructure needs to embed a VLAN tag,
-		 * initialize netvsc_packet and rppi struct values as needed.
+		 * XXX need m_pullup and use mtodo
 		 */
-		if (m_head->m_flags & M_VLANTAG) {
-			/*
-			 * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag
-			 * into the frame.
-			 */
-			rndis_msg_size += RNDIS_VLAN_PPI_SIZE;
-
-			rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE,
-			    ieee_8021q_info);
-		
-			/* VLAN info immediately follows rppi struct */
-			rppi_vlan_info = (ndis_8021q_info *)((char*)rppi + 
-			    rppi->per_packet_info_offset);
-			/* FreeBSD does not support CFI or priority */
-			rppi_vlan_info->u1.s1.vlan_id =
-			    m_head->m_pkthdr.ether_vtag & 0xfff;
-		}
-
-		/* Only check the flags for outbound and ignore the ones for inbound */
-		if (0 == (m_head->m_pkthdr.csum_flags & HV_CSUM_FOR_OUTBOUND)) {
-			goto pre_send;
-		}
-
 		eh = mtod(m_head, struct ether_vlan_header*);
-		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
 			ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
-		} else {
+		else
 			ether_len = ETHER_HDR_LEN;
-		}
-
-		trans_proto_type = get_transport_proto_type(m_head);
-		if (TRANSPORT_TYPE_NOT_IP == trans_proto_type) {
-			goto pre_send;
-		}
-
-		/*
-		 * TSO packet needless to setup the send side checksum
-		 * offload.
-		 */
-		if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
-			goto do_tso;
-		}
 
-		/* setup checksum offload */
-		rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
-		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE,
-		    tcpip_chksum_info);
-		csum_info = (rndis_tcp_ip_csum_info *)((char*)rppi +
-		    rppi->per_packet_info_offset);
-
-		if (trans_proto_type & (TYPE_IPV4 << 16)) {
-			csum_info->xmit.is_ipv4 = 1;
-		} else {
-			csum_info->xmit.is_ipv6 = 1;
-		}
-
-		if (trans_proto_type & TYPE_TCP) {
-			csum_info->xmit.tcp_csum = 1;
-			csum_info->xmit.tcp_header_offset = 0;
-		} else if (trans_proto_type & TYPE_UDP) {
-			csum_info->xmit.udp_csum = 1;
-		}
-
-		goto pre_send;
-
-do_tso:
-		/* setup TCP segmentation offload */
 		rndis_msg_size += RNDIS_TSO_PPI_SIZE;
 		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE,
 		    tcp_large_send_info);
-		
-		tso_info = (rndis_tcp_tso_info *)((char *)rppi +
+
+		tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi +
 		    rppi->per_packet_info_offset);
 		tso_info->lso_v2_xmit.type =
 		    RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
-		
+
 #ifdef INET
-		if (trans_proto_type & (TYPE_IPV4 << 16)) {
+		if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
 			struct ip *ip =
 			    (struct ip *)(m_head->m_data + ether_len);
 			unsigned long iph_len = ip->ip_hl << 2;
 			struct tcphdr *th =
 			    (struct tcphdr *)((caddr_t)ip + iph_len);
-		
+
 			tso_info->lso_v2_xmit.ip_version =
 			    RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
 			ip->ip_len = 0;
 			ip->ip_sum = 0;
-		
+
 			th->th_sum = in_pseudo(ip->ip_src.s_addr,
-			    ip->ip_dst.s_addr,
-			    htons(IPPROTO_TCP));
+			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
 		}
 #endif
 #if defined(INET6) && defined(INET)
@@ -921,8 +808,8 @@ do_tso:
 #endif
 #ifdef INET6
 		{
-			struct ip6_hdr *ip6 =
-			    (struct ip6_hdr *)(m_head->m_data + ether_len);
+			struct ip6_hdr *ip6 = (struct ip6_hdr *)
+			    (m_head->m_data + ether_len);
 			struct tcphdr *th = (struct tcphdr *)(ip6 + 1);
 
 			tso_info->lso_v2_xmit.ip_version =
@@ -933,146 +820,233 @@ do_tso:
 #endif
 		tso_info->lso_v2_xmit.tcp_header_offset = 0;
 		tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz;
+	} else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
+		rndis_tcp_ip_csum_info *csum_info;
 
-pre_send:
-		rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size;
-		packet->tot_data_buf_len = rndis_mesg->msg_len;
-
-		/* send packet with send buffer */
-		if (packet->tot_data_buf_len < sc->hn_tx_chimney_size) {
-			send_buf_section_idx =
-			    hv_nv_get_next_send_section(net_dev);
-			if (send_buf_section_idx !=
-			    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
-				char *dest = ((char *)net_dev->send_buf +
-				    send_buf_section_idx *
-				    net_dev->send_section_size);
-
-				memcpy(dest, rndis_mesg, rndis_msg_size);
-				dest += rndis_msg_size;
-				for (m = m_head; m != NULL; m = m->m_next) {
-					if (m->m_len) {
-						memcpy(dest,
-						    (void *)mtod(m, vm_offset_t),
-						    m->m_len);
-						dest += m->m_len;
-					}
-				}
+		rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
+		rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE,
+		    tcpip_chksum_info);
+		csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi +
+		    rppi->per_packet_info_offset);
 
-				packet->send_buf_section_idx =
-				    send_buf_section_idx;
-				packet->send_buf_section_size =
-				    packet->tot_data_buf_len;
-				packet->page_buf_count = 0;
-				sc->hn_tx_chimney++;
-				goto do_send;
-			}
-		}
+		csum_info->xmit.is_ipv4 = 1;
+		if (m_head->m_pkthdr.csum_flags & CSUM_IP)
+			csum_info->xmit.ip_header_csum = 1;
 
-		error = hn_txdesc_dmamap_load(sc, txd, &m_head, segs, &nsegs);
-		if (error) {
-			int freed;
+		if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
+			csum_info->xmit.tcp_csum = 1;
+			csum_info->xmit.tcp_header_offset = 0;
+		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
+			csum_info->xmit.udp_csum = 1;
+		}
+	}
 
-			/*
-			 * This mbuf is not linked w/ the txd yet, so free
-			 * it now.
-			 */
-			m_freem(m_head);
-			freed = hn_txdesc_put(sc, txd);
-			KASSERT(freed != 0,
-			    ("fail to free txd upon txdma error"));
+	rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size;
+	packet->tot_data_buf_len = rndis_mesg->msg_len;
 
-			sc->hn_txdma_failed++;
-			if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
-			continue;
+	/*
+	 * Chimney send, if the packet could fit into one chimney buffer.
+	 */
+	if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) {
+		netvsc_dev *net_dev = txr->hn_sc->net_dev;
+		uint32_t send_buf_section_idx;
+
+		send_buf_section_idx =
+		    hv_nv_get_next_send_section(net_dev);
+		if (send_buf_section_idx !=
+		    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
+			uint8_t *dest = ((uint8_t *)net_dev->send_buf +
+			    (send_buf_section_idx *
+			     net_dev->send_section_size));
+
+			memcpy(dest, rndis_mesg, rndis_msg_size);
+			dest += rndis_msg_size;
+			m_copydata(m_head, 0, m_head->m_pkthdr.len, dest);
+
+			packet->send_buf_section_idx = send_buf_section_idx;
+			packet->send_buf_section_size =
+			    packet->tot_data_buf_len;
+			packet->page_buf_count = 0;
+			txr->hn_tx_chimney++;
+			goto done;
 		}
+	}
 
-		packet->page_buf_count = nsegs +
-		    HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
-
-		/* send packet with page buffer */
-		packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr);
-		packet->page_buffers[0].offset =
-		    txd->rndis_msg_paddr & PAGE_MASK;
-		packet->page_buffers[0].length = rndis_msg_size;
+	error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
+	if (error) {
+		int freed;
 
 		/*
-		 * Fill the page buffers with mbuf info starting at index
-		 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
+		 * This mbuf is not linked w/ the txd yet, so free it now.
 		 */
-		for (i = 0; i < nsegs; ++i) {
-			hv_vmbus_page_buffer *pb = &packet->page_buffers[
-			    i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS];
+		m_freem(m_head);
+		*m_head0 = NULL;
 
-			pb->pfn = atop(segs[i].ds_addr);
-			pb->offset = segs[i].ds_addr & PAGE_MASK;
-			pb->length = segs[i].ds_len;
-		}
+		freed = hn_txdesc_put(txr, txd);
+		KASSERT(freed != 0,
+		    ("fail to free txd upon txdma error"));
+
+		txr->hn_txdma_failed++;
+		if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
+		return error;
+	}
+	*m_head0 = m_head;
+
+	packet->page_buf_count = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
+
+	/* send packet with page buffer */
+	packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr);
+	packet->page_buffers[0].offset = txd->rndis_msg_paddr & PAGE_MASK;
+	packet->page_buffers[0].length = rndis_msg_size;
 
-		packet->send_buf_section_idx = 
-		    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
-		packet->send_buf_section_size = 0;
+	/*
+	 * Fill the page buffers with mbuf info starting at index
+	 * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
+	 */
+	for (i = 0; i < nsegs; ++i) {
+		hv_vmbus_page_buffer *pb = &packet->page_buffers[
+		    i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS];
 
-do_send:
-		txd->m = m_head;
+		pb->pfn = atop(segs[i].ds_addr);
+		pb->offset = segs[i].ds_addr & PAGE_MASK;
+		pb->length = segs[i].ds_len;
+	}
+
+	packet->send_buf_section_idx =
+	    NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
+	packet->send_buf_section_size = 0;
+done:
+	txd->m = m_head;
+
+	/* Set the completion routine */
+	packet->compl.send.on_send_completion = hn_tx_done;
+	packet->compl.send.send_completion_context = packet;
+	packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd;
+
+	return 0;
+}
 
-		/* Set the completion routine */
-		packet->compl.send.on_send_completion = netvsc_xmit_completion;
-		packet->compl.send.send_completion_context = packet;
-		packet->compl.send.send_completion_tid =
-		    (uint64_t)(uintptr_t)txd;
+/*
+ * NOTE:
+ * If this function fails, then txd will be freed, but the mbuf
+ * associated w/ the txd will _not_ be freed.
+ */
+static int
+hn_send_pkt(struct ifnet *ifp, struct hv_device *device_ctx,
+    struct hn_tx_ring *txr, struct hn_txdesc *txd)
+{
+	int error, send_failed = 0;
 
 again:
+	/*
+	 * Make sure that txd is not freed before ETHER_BPF_MTAP.
+	 */
+	hn_txdesc_hold(txd);
+	error = hv_nv_on_send(device_ctx, &txd->netvsc_pkt);
+	if (!error) {
+		ETHER_BPF_MTAP(ifp, txd->m);
+		if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+	}
+	hn_txdesc_put(txr, txd);
+
+	if (__predict_false(error)) {
+		int freed;
+
 		/*
-		 * Make sure that txd is not freed before ETHER_BPF_MTAP.
+		 * This should "really rarely" happen.
+		 *
+		 * XXX Too many RX to be acked or too many sideband
+		 * commands to run?  Ask netvsc_channel_rollup()
+		 * to kick start later.
 		 */
-		hn_txdesc_hold(txd);
-		error = hv_nv_on_send(device_ctx, packet);
-		if (!error) {
-			ETHER_BPF_MTAP(ifp, m_head);
-			if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+		txr->hn_has_txeof = 1;
+		if (!send_failed) {
+			txr->hn_send_failed++;
+			send_failed = 1;
+			/*
+			 * Try sending again after set hn_has_txeof;
+			 * in case that we missed the last
+			 * netvsc_channel_rollup().
+			 */
+			goto again;
 		}
-		hn_txdesc_put(sc, txd);
+		if_printf(ifp, "send failed\n");
 
-		if (__predict_false(error)) {
-			int freed;
+		/*
+		 * Caller will perform further processing on the
+		 * associated mbuf, so don't free it in hn_txdesc_put();
+		 * only unload it from the DMA map in hn_txdesc_put(),
+		 * if it was loaded.
+		 */
+		txd->m = NULL;
+		freed = hn_txdesc_put(txr, txd);
+		KASSERT(freed != 0,
+		    ("fail to free txd upon send error"));
 
-			/*
-			 * This should "really rarely" happen.
-			 *
-			 * XXX Too many RX to be acked or too many sideband
-			 * commands to run?  Ask netvsc_channel_rollup()
-			 * to kick start later.
-			 */
-			sc->hn_txeof = 1;
-			if (!send_failed) {
-				sc->hn_send_failed++;
-				send_failed = 1;
-				/*
-				 * Try sending again after set hn_txeof;
-				 * in case that we missed the last
-				 * netvsc_channel_rollup().
-				 */
-				goto again;
-			}
-			if_printf(ifp, "send failed\n");
+		txr->hn_send_failed++;
+	}
+	return error;
+}
+
+/*
+ * Start a transmit of one or more packets
+ */
+static int
+hn_start_locked(struct hn_tx_ring *txr, int len)
+{
+	struct hn_softc *sc = txr->hn_sc;
+	struct ifnet *ifp = sc->hn_ifp;
+	struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
+
+	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
+	mtx_assert(&txr->hn_tx_lock, MA_OWNED);
+
+	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+	    IFF_DRV_RUNNING)
+		return 0;
+
+	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+		struct hn_txdesc *txd;
+		struct mbuf *m_head;
+		int error;
+
+		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
+		if (m_head == NULL)
+			break;
 
+		if (len > 0 && m_head->m_pkthdr.len > len) {
 			/*
-			 * This mbuf will be prepended, don't free it
-			 * in hn_txdesc_put(); only unload it from the
-			 * DMA map in hn_txdesc_put(), if it was loaded.
+			 * This sending could be time consuming; let callers
+			 * dispatch this packet sending (and sending of any
+			 * following up packets) to tx taskqueue.
 			 */
-			txd->m = NULL;
-			freed = hn_txdesc_put(sc, txd);
-			KASSERT(freed != 0,
-			    ("fail to free txd upon send error"));
-
-			sc->hn_send_failed++;
-			IF_PREPEND(&ifp->if_snd, m_head);
-			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+			return 1;
+		}
+
+		txd = hn_txdesc_get(txr);
+		if (txd == NULL) {
+			txr->hn_no_txdescs++;
+			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+			break;
+		}
+
+		error = hn_encap(txr, txd, &m_head);
+		if (error) {
+			/* Both txd and m_head are freed */
+			continue;
+		}
+
+		error = hn_send_pkt(ifp, device_ctx, txr, txd);
+		if (__predict_false(error)) {
+			/* txd is freed, but m_head is not */
+			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+			atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
 			break;
 		}
 	}
+	return 0;
 }
 
 /*
@@ -1162,11 +1136,11 @@ int
 netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
     rndis_tcp_ip_csum_info *csum_info)
 {
-	hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device);
+	struct hn_softc *sc = device_get_softc(device_ctx->device);
+	struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */
 	struct mbuf *m_new;
 	struct ifnet *ifp;
-	device_t dev = device_ctx->device;
-	int size, do_lro = 0;
+	int size, do_lro = 0, do_csum = 1;
 
 	if (sc == NULL) {
 		return (0); /* TODO: KYS how can this be! */
@@ -1192,7 +1166,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
 		memcpy(mtod(m_new, void *), packet->data,
 		    packet->tot_data_buf_len);
 		m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len;
-		sc->hn_small_pkts++;
+		rxr->hn_small_pkts++;
 	} else {
 		/*
 		 * Get an mbuf with a cluster.  For packets 2K or less,
@@ -1208,7 +1182,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
 
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 		if (m_new == NULL) {
-			device_printf(dev, "alloc mbuf failed.\n");
+			if_printf(ifp, "alloc mbuf failed.\n");
 			return (0);
 		}
 
@@ -1216,21 +1190,28 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
 	}
 	m_new->m_pkthdr.rcvif = ifp;
 
+	if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
+		do_csum = 0;
+
 	/* receive side checksum offload */
-	if (NULL != csum_info) {
+	if (csum_info != NULL) {
 		/* IP csum offload */
-		if (csum_info->receive.ip_csum_succeeded) {
+		if (csum_info->receive.ip_csum_succeeded && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
-			sc->hn_csum_ip++;
+			rxr->hn_csum_ip++;
 		}
 
-		/* TCP csum offload */
-		if (csum_info->receive.tcp_csum_succeeded) {
+		/* TCP/UDP csum offload */
+		if ((csum_info->receive.tcp_csum_succeeded ||
+		     csum_info->receive.udp_csum_succeeded) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
-			sc->hn_csum_tcp++;
+			if (csum_info->receive.tcp_csum_succeeded)
+				rxr->hn_csum_tcp++;
+			else
+				rxr->hn_csum_udp++;
 		}
 
 		if (csum_info->receive.ip_csum_succeeded &&
@@ -1261,8 +1242,10 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
 
 			pr = hn_check_iplen(m_new, hoff);
 			if (pr == IPPROTO_TCP) {
-				if (sc->hn_trust_hosttcp) {
-					sc->hn_csum_trusted++;
+				if (do_csum &&
+				    (rxr->hn_trust_hcsum &
+				     HN_TRUST_HCSUM_TCP)) {
+					rxr->hn_csum_trusted++;
 					m_new->m_pkthdr.csum_flags |=
 					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
@@ -1270,6 +1253,21 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
 				}
 				/* Rely on SW csum verification though... */
 				do_lro = 1;
+			} else if (pr == IPPROTO_UDP) {
+				if (do_csum &&
+				    (rxr->hn_trust_hcsum &
+				     HN_TRUST_HCSUM_UDP)) {
+					rxr->hn_csum_trusted++;
+					m_new->m_pkthdr.csum_flags |=
+					   (CSUM_IP_CHECKED | CSUM_IP_VALID |
+					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+					m_new->m_pkthdr.csum_data = 0xffff;
+				}
+			} else if (pr != IPPROTO_DONE && do_csum &&
+			    (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
+				rxr->hn_csum_trusted++;
+				m_new->m_pkthdr.csum_flags |=
+				    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			}
 		}
 	}
@@ -1289,10 +1287,10 @@ skip:
 
 	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
-		struct lro_ctrl *lro = &sc->hn_lro;
+		struct lro_ctrl *lro = &rxr->hn_lro;
 
 		if (lro->lro_cnt) {
-			sc->hn_lro_tried++;
+			rxr->hn_lro_tried++;
 			if (tcp_lro_rx(lro, m_new, 0) == 0) {
 				/* DONE! */
 				return 0;
@@ -1308,18 +1306,8 @@ skip:
 }
 
 void
-netvsc_recv_rollup(struct hv_device *device_ctx)
+netvsc_recv_rollup(struct hv_device *device_ctx __unused)
 {
-#if defined(INET) || defined(INET6)
-	hn_softc_t *sc = device_get_softc(device_ctx->device);
-	struct lro_ctrl *lro = &sc->hn_lro;
-	struct lro_entry *queued;
-
-	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
-		SLIST_REMOVE_HEAD(&lro->lro_active, next);
-		tcp_lro_flush(lro, queued);
-	}
-#endif
 }
 
 /*
@@ -1377,12 +1365,23 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 
 		/* Obtain and record requested MTU */
 		ifp->if_mtu = ifr->ifr_mtu;
+
+#if __FreeBSD_version >= 1100099
 		/*
-		 * Make sure that LRO high watermark is still valid,
-		 * after MTU change (the 2*MTU limit).
+		 * Make sure that LRO aggregation length limit is still
+		 * valid, after the MTU change.
 		 */
-		if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat))
-			hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp));
+		NV_LOCK(sc);
+		if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
+		    HN_LRO_LENLIM_MIN(ifp)) {
+			int i;
+			for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+				sc->hn_rx_ring[i].hn_lro.lro_length_lim =
+				    HN_LRO_LENLIM_MIN(ifp);
+			}
+		}
+		NV_UNLOCK(sc);
+#endif
 
 		do {
 			NV_LOCK(sc);
@@ -1422,8 +1421,10 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		}
 
 		sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
-		if (sc->hn_tx_chimney_size > sc->hn_tx_chimney_max)
-			sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
+		if (sc->hn_tx_ring[0].hn_tx_chimney_size >
+		    sc->hn_tx_chimney_max)
+			hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
+
 		hn_ifinit_locked(sc);
 
 		NV_LOCK(sc);
@@ -1483,47 +1484,43 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		error = 0;
 		break;
 	case SIOCSIFCAP:
+		NV_LOCK(sc);
+
 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
 		if (mask & IFCAP_TXCSUM) {
-			if (IFCAP_TXCSUM & ifp->if_capenable) {
-				ifp->if_capenable &= ~IFCAP_TXCSUM;
-				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
+			ifp->if_capenable ^= IFCAP_TXCSUM;
+			if (ifp->if_capenable & IFCAP_TXCSUM) {
+				ifp->if_hwassist |=
+				    sc->hn_tx_ring[0].hn_csum_assist;
 			} else {
-				ifp->if_capenable |= IFCAP_TXCSUM;
-				/*
-				 * Only enable UDP checksum offloading on
-				 * Windows Server 2012R2 or later releases.
-				 */
-				if (hv_vmbus_protocal_version >=
-				    HV_VMBUS_VERSION_WIN8_1) {
-					ifp->if_hwassist |=
-					    (CSUM_TCP | CSUM_UDP);
-				} else {
-					ifp->if_hwassist |= CSUM_TCP;
-				}
+				ifp->if_hwassist &=
+				    ~sc->hn_tx_ring[0].hn_csum_assist;
 			}
 		}
 
-		if (mask & IFCAP_RXCSUM) {
-			if (IFCAP_RXCSUM & ifp->if_capenable) {
-				ifp->if_capenable &= ~IFCAP_RXCSUM;
-			} else {
-				ifp->if_capenable |= IFCAP_RXCSUM;
-			}
-		}
+		if (mask & IFCAP_RXCSUM)
+			ifp->if_capenable ^= IFCAP_RXCSUM;
+
 		if (mask & IFCAP_LRO)
 			ifp->if_capenable ^= IFCAP_LRO;
 
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
-			ifp->if_hwassist ^= CSUM_IP_TSO;
+			if (ifp->if_capenable & IFCAP_TSO4)
+				ifp->if_hwassist |= CSUM_IP_TSO;
+			else
+				ifp->if_hwassist &= ~CSUM_IP_TSO;
 		}
 
 		if (mask & IFCAP_TSO6) {
 			ifp->if_capenable ^= IFCAP_TSO6;
-			ifp->if_hwassist ^= CSUM_IP6_TSO;
+			if (ifp->if_capenable & IFCAP_TSO6)
+				ifp->if_hwassist |= CSUM_IP6_TSO;
+			else
+				ifp->if_hwassist &= ~CSUM_IP6_TSO;
 		}
 
+		NV_UNLOCK(sc);
 		error = 0;
 		break;
 	case SIOCADDMULTI:
@@ -1566,7 +1563,8 @@ hn_stop(hn_softc_t *sc)
 	if (bootverbose)
 		printf(" Closing Device ...\n");
 
-	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+	atomic_clear_int(&ifp->if_drv_flags,
+	    (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
 	if_link_state_change(ifp, LINK_STATE_DOWN);
 	sc->hn_initdone = 0;
 
@@ -1579,16 +1577,56 @@ hn_stop(hn_softc_t *sc)
 static void
 hn_start(struct ifnet *ifp)
 {
-	hn_softc_t *sc;
+	struct hn_softc *sc = ifp->if_softc;
+	struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
 
-	sc = ifp->if_softc;
-	NV_LOCK(sc);
-	if (sc->temp_unusable) {
-		NV_UNLOCK(sc);
-		return;
+	if (txr->hn_sched_tx)
+		goto do_sched;
+
+	if (mtx_trylock(&txr->hn_tx_lock)) {
+		int sched;
+
+		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
+		mtx_unlock(&txr->hn_tx_lock);
+		if (!sched)
+			return;
+	}
+do_sched:
+	taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
+}
+
+static void
+hn_start_txeof(struct hn_tx_ring *txr)
+{
+	struct hn_softc *sc = txr->hn_sc;
+	struct ifnet *ifp = sc->hn_ifp;
+
+	KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
+
+	if (txr->hn_sched_tx)
+		goto do_sched;
+
+	if (mtx_trylock(&txr->hn_tx_lock)) {
+		int sched;
+
+		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+		sched = hn_start_locked(txr, txr->hn_direct_tx_size);
+		mtx_unlock(&txr->hn_tx_lock);
+		if (sched) {
+			taskqueue_enqueue(txr->hn_tx_taskq,
+			    &txr->hn_tx_task);
+		}
+	} else {
+do_sched:
+		/*
+		 * Release the OACTIVE earlier, with the hope, that
+		 * others could catch up.  The task will clear the
+		 * flag again with the hn_tx_lock to avoid possible
+		 * races.
+		 */
+		atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+		taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
 	}
-	hn_start_locked(ifp);
-	NV_UNLOCK(sc);
 }
 
 /*
@@ -1615,8 +1653,8 @@ hn_ifinit_locked(hn_softc_t *sc)
 	} else {
 		sc->hn_initdone = 1;
 	}
-	ifp->if_drv_flags |= IFF_DRV_RUNNING;
-	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+	atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
 	if_link_state_change(ifp, LINK_STATE_UP);
 }
 
@@ -1659,26 +1697,90 @@ hn_watchdog(struct ifnet *ifp)
 }
 #endif
 
-#ifdef HN_LRO_HIWAT
+#if __FreeBSD_version >= 1100099
+
+static int
+hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	unsigned int lenlim;
+	int error, i;
+
+	lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
+	error = sysctl_handle_int(oidp, &lenlim, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
+	    lenlim > TCP_LRO_LENGTH_MAX)
+		return EINVAL;
+
+	NV_LOCK(sc);
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
+	NV_UNLOCK(sc);
+	return 0;
+}
+
 static int
-hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS)
+hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct hn_softc *sc = arg1;
-	int hiwat, error;
+	int ackcnt, error, i;
 
-	hiwat = sc->hn_lro_hiwat;
-	error = sysctl_handle_int(oidp, &hiwat, 0, req);
+	/*
+	 * lro_ackcnt_lim is append count limit,
+	 * +1 to turn it into aggregation limit.
+	 */
+	ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
+	error = sysctl_handle_int(oidp, &ackcnt, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
 
-	if (!HN_LRO_HIWAT_ISVALID(sc, hiwat))
+	if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
 		return EINVAL;
 
-	if (sc->hn_lro_hiwat != hiwat)
-		hn_set_lro_hiwat(sc, hiwat);
+	/*
+	 * Convert aggregation limit back to append
+	 * count limit.
+	 */
+	--ackcnt;
+	NV_LOCK(sc);
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
+	NV_UNLOCK(sc);
+	return 0;
+}
+
+#endif
+
+static int
+hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int hcsum = arg2;
+	int on, error, i;
+
+	on = 0;
+	if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
+		on = 1;
+
+	error = sysctl_handle_int(oidp, &on, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	NV_LOCK(sc);
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+		if (on)
+			rxr->hn_trust_hcsum |= hcsum;
+		else
+			rxr->hn_trust_hcsum &= ~hcsum;
+	}
+	NV_UNLOCK(sc);
 	return 0;
 }
-#endif	/* HN_LRO_HIWAT */
 
 static int
 hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
@@ -1686,7 +1788,7 @@ hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
 	struct hn_softc *sc = arg1;
 	int chimney_size, error;
 
-	chimney_size = sc->hn_tx_chimney_size;
+	chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size;
 	error = sysctl_handle_int(oidp, &chimney_size, 0, req);
 	if (error || req->newptr == NULL)
 		return error;
@@ -1694,8 +1796,138 @@ hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
 	if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0)
 		return EINVAL;
 
-	if (sc->hn_tx_chimney_size != chimney_size)
-		sc->hn_tx_chimney_size = chimney_size;
+	hn_set_tx_chimney_size(sc, chimney_size);
+	return 0;
+}
+
+#if __FreeBSD_version < 1100095
+static int
+hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_rx_ring *rxr;
+	uint64_t stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		stat += *((int *)((uint8_t *)rxr + ofs));
+	}
+
+	error = sysctl_handle_64(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		*((int *)((uint8_t *)rxr + ofs)) = 0;
+	}
+	return 0;
+}
+#else
+static int
+hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_rx_ring *rxr;
+	uint64_t stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		stat += *((uint64_t *)((uint8_t *)rxr + ofs));
+	}
+
+	error = sysctl_handle_64(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
+	}
+	return 0;
+}
+
+#endif
+
+static int
+hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_rx_ring *rxr;
+	u_long stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		stat += *((u_long *)((uint8_t *)rxr + ofs));
+	}
+
+	error = sysctl_handle_long(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		rxr = &sc->hn_rx_ring[i];
+		*((u_long *)((uint8_t *)rxr + ofs)) = 0;
+	}
+	return 0;
+}
+
+static int
+hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error;
+	struct hn_tx_ring *txr;
+	u_long stat;
+
+	stat = 0;
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		stat += *((u_long *)((uint8_t *)txr + ofs));
+	}
+
+	error = sysctl_handle_long(oidp, &stat, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	/* Zero out this stat. */
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		*((u_long *)((uint8_t *)txr + ofs)) = 0;
+	}
+	return 0;
+}
+
+static int
+hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct hn_softc *sc = arg1;
+	int ofs = arg2, i, error, conf;
+	struct hn_tx_ring *txr;
+
+	txr = &sc->hn_tx_ring[0];
+	conf = *((int *)((uint8_t *)txr + ofs));
+
+	error = sysctl_handle_int(oidp, &conf, 0, req);
+	if (error || req->newptr == NULL)
+		return error;
+
+	NV_LOCK(sc);
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		txr = &sc->hn_tx_ring[i];
+		*((int *)((uint8_t *)txr + ofs)) = conf;
+	}
+	NV_UNLOCK(sc);
+
 	return 0;
 }
 
@@ -1786,17 +2018,191 @@ hn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 	*paddr = segs->ds_addr;
 }
 
+static void
+hn_create_rx_data(struct hn_softc *sc)
+{
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	device_t dev = sc->hn_dev;
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+	int lroent_cnt;
+#endif
+#endif
+	int i;
+
+	sc->hn_rx_ring_cnt = 1; /* TODO: vRSS */
+	sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
+	    M_NETVSC, M_WAITOK | M_ZERO);
+
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+	lroent_cnt = hn_lro_entry_count;
+	if (lroent_cnt < TCP_LRO_ENTRIES)
+		lroent_cnt = TCP_LRO_ENTRIES;
+	device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
+#endif
+#endif	/* INET || INET6 */
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+		struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+		if (hn_trust_hosttcp)
+			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
+		if (hn_trust_hostudp)
+			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
+		if (hn_trust_hostip)
+			rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
+
+		/*
+		 * Initialize LRO.
+		 */
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+		tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 0);
+#else
+		tcp_lro_init(&rxr->hn_lro);
+		rxr->hn_lro.ifp = sc->hn_ifp;
+#endif
+#if __FreeBSD_version >= 1100099
+		rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
+		rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
+#endif
+#endif	/* INET || INET6 */
+	}
+
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
+	    CTLTYPE_U64 | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
+#if __FreeBSD_version < 1100095
+	    hn_rx_stat_int_sysctl,
+#else
+	    hn_rx_stat_u64_sysctl,
+#endif
+	    "LU", "LRO queued");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
+	    CTLTYPE_U64 | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
+#if __FreeBSD_version < 1100095
+	    hn_rx_stat_int_sysctl,
+#else
+	    hn_rx_stat_u64_sysctl,
+#endif
+	    "LU", "LRO flushed");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_rx_ring, hn_lro_tried),
+	    hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
+#if __FreeBSD_version >= 1100099
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
+	    CTLTYPE_UINT | CTLFLAG_RW, sc, 0, hn_lro_lenlim_sysctl, "IU",
+	    "Max # of data bytes to be aggregated by LRO");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
+	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_ackcnt_sysctl, "I",
+	    "Max # of ACKs to be aggregated by LRO");
+#endif
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
+	    CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_TCP,
+	    hn_trust_hcsum_sysctl, "I",
+	    "Trust tcp segement verification on host side, "
+	    "when csum info is missing");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
+	    CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_UDP,
+	    hn_trust_hcsum_sysctl, "I",
+	    "Trust udp datagram verification on host side, "
+	    "when csum info is missing");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
+	    CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_IP,
+	    hn_trust_hcsum_sysctl, "I",
+	    "Trust ip packet verification on host side, "
+	    "when csum info is missing");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_rx_ring, hn_csum_ip),
+	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_rx_ring, hn_csum_tcp),
+	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_rx_ring, hn_csum_udp),
+	    hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_rx_ring, hn_csum_trusted),
+	    hn_rx_stat_ulong_sysctl, "LU",
+	    "# of packets that we trust host's csum verification");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_rx_ring, hn_small_pkts),
+	    hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
+}
+
+static void
+hn_destroy_rx_data(struct hn_softc *sc)
+{
+#if defined(INET) || defined(INET6)
+	int i;
+#endif
+
+	if (sc->hn_rx_ring_cnt == 0)
+		return;
+
+#if defined(INET) || defined(INET6)
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		tcp_lro_free(&sc->hn_rx_ring[i].hn_lro);
+#endif
+	free(sc->hn_rx_ring, M_NETVSC);
+	sc->hn_rx_ring = NULL;
+
+	sc->hn_rx_ring_cnt = 0;
+}
+
 static int
-hn_create_tx_ring(struct hn_softc *sc)
+hn_create_tx_ring(struct hn_softc *sc, int id)
 {
+	struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
 	bus_dma_tag_t parent_dtag;
 	int error, i;
 
-	sc->hn_txdesc_cnt = HN_TX_DESC_CNT;
-	sc->hn_txdesc = malloc(sizeof(struct hn_txdesc) * sc->hn_txdesc_cnt,
+	txr->hn_sc = sc;
+
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
+#endif
+	mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
+
+	txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
+	txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
 	    M_NETVSC, M_WAITOK | M_ZERO);
-	SLIST_INIT(&sc->hn_txlist);
-	mtx_init(&sc->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
+#ifndef HN_USE_TXDESC_BUFRING
+	SLIST_INIT(&txr->hn_txlist);
+#else
+	txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC,
+	    M_WAITOK, &txr->hn_tx_lock);
+#endif
+
+	txr->hn_tx_taskq = sc->hn_tx_taskq;
+	TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
+	TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
+
+	txr->hn_direct_tx_size = hn_direct_tx_size;
+	if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
+		txr->hn_csum_assist = HN_CSUM_ASSIST;
+	else
+		txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
+
+	/*
+	 * Always schedule transmission instead of trying to do direct
+	 * transmission.  This one gives the best performance so far.
+	 */
+	txr->hn_sched_tx = 1;
+
+	txr->hn_txeof = hn_start_txeof; /* TODO: if_transmit */
 
 	parent_dtag = bus_get_dma_tag(sc->hn_dev);
 
@@ -1813,7 +2219,7 @@ hn_create_tx_ring(struct hn_softc *sc)
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
-	    &sc->hn_tx_rndis_dtag);
+	    &txr->hn_tx_rndis_dtag);
 	if (error) {
 		device_printf(sc->hn_dev, "failed to create rndis dmatag\n");
 		return error;
@@ -1832,21 +2238,21 @@ hn_create_tx_ring(struct hn_softc *sc)
 	    0,				/* flags */
 	    NULL,			/* lockfunc */
 	    NULL,			/* lockfuncarg */
-	    &sc->hn_tx_data_dtag);
+	    &txr->hn_tx_data_dtag);
 	if (error) {
 		device_printf(sc->hn_dev, "failed to create data dmatag\n");
 		return error;
 	}
 
-	for (i = 0; i < sc->hn_txdesc_cnt; ++i) {
-		struct hn_txdesc *txd = &sc->hn_txdesc[i];
+	for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
+		struct hn_txdesc *txd = &txr->hn_txdesc[i];
 
-		txd->sc = sc;
+		txd->txr = txr;
 
 		/*
 		 * Allocate and load RNDIS messages.
 		 */
-        	error = bus_dmamem_alloc(sc->hn_tx_rndis_dtag,
+        	error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
 		    (void **)&txd->rndis_msg,
 		    BUS_DMA_WAITOK | BUS_DMA_COHERENT,
 		    &txd->rndis_msg_dmap);
@@ -1856,7 +2262,7 @@ hn_create_tx_ring(struct hn_softc *sc)
 			return error;
 		}
 
-		error = bus_dmamap_load(sc->hn_tx_rndis_dtag,
+		error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
 		    txd->rndis_msg_dmap,
 		    txd->rndis_msg, HN_RNDIS_MSG_LEN,
 		    hn_dma_map_paddr, &txd->rndis_msg_paddr,
@@ -1864,59 +2270,277 @@ hn_create_tx_ring(struct hn_softc *sc)
 		if (error) {
 			device_printf(sc->hn_dev,
 			    "failed to load rndis_msg, %d\n", i);
-			bus_dmamem_free(sc->hn_tx_rndis_dtag,
+			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* DMA map for TX data. */
-		error = bus_dmamap_create(sc->hn_tx_data_dtag, 0,
+		error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
 		    &txd->data_dmap);
 		if (error) {
 			device_printf(sc->hn_dev,
 			    "failed to allocate tx data dmamap\n");
-			bus_dmamap_unload(sc->hn_tx_rndis_dtag,
+			bus_dmamap_unload(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg_dmap);
-			bus_dmamem_free(sc->hn_tx_rndis_dtag,
+			bus_dmamem_free(txr->hn_tx_rndis_dtag,
 			    txd->rndis_msg, txd->rndis_msg_dmap);
 			return error;
 		}
 
 		/* All set, put it to list */
 		txd->flags |= HN_TXD_FLAG_ONLIST;
-		SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
+#ifndef HN_USE_TXDESC_BUFRING
+		SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
+#else
+		buf_ring_enqueue(txr->hn_txdesc_br, txd);
+#endif
+	}
+	txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
+
+	if (sc->hn_tx_sysctl_tree != NULL) {
+		struct sysctl_oid_list *child;
+		struct sysctl_ctx_list *ctx;
+		char name[16];
+
+		/*
+		 * Create per TX ring sysctl tree:
+		 * dev.hn.UNIT.tx.RINGID
+		 */
+		ctx = device_get_sysctl_ctx(sc->hn_dev);
+		child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
+
+		snprintf(name, sizeof(name), "%d", id);
+		txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
+		    name, CTLFLAG_RD, 0, "");
+
+		if (txr->hn_tx_sysctl_tree != NULL) {
+			child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
+
+			SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
+			    CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
+			    "# of available TX descs");
+		}
 	}
-	sc->hn_txdesc_avail = sc->hn_txdesc_cnt;
 
 	return 0;
 }
 
 static void
-hn_destroy_tx_ring(struct hn_softc *sc)
+hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
+{
+	struct hn_tx_ring *txr = txd->txr;
+
+	KASSERT(txd->m == NULL, ("still has mbuf installed"));
+	KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
+
+	bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap);
+	bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg,
+	    txd->rndis_msg_dmap);
+	bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
+}
+
+static void
+hn_destroy_tx_ring(struct hn_tx_ring *txr)
 {
 	struct hn_txdesc *txd;
 
-	while ((txd = SLIST_FIRST(&sc->hn_txlist)) != NULL) {
-		KASSERT(txd->m == NULL, ("still has mbuf installed"));
-		KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
-		    ("still dma mapped"));
-		SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
+	if (txr->hn_txdesc == NULL)
+		return;
+
+#ifndef HN_USE_TXDESC_BUFRING
+	while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
+		SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
+		hn_txdesc_dmamap_destroy(txd);
+	}
+#else
+	while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
+		hn_txdesc_dmamap_destroy(txd);
+#endif
+
+	if (txr->hn_tx_data_dtag != NULL)
+		bus_dma_tag_destroy(txr->hn_tx_data_dtag);
+	if (txr->hn_tx_rndis_dtag != NULL)
+		bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
+
+#ifdef HN_USE_TXDESC_BUFRING
+	buf_ring_free(txr->hn_txdesc_br, M_NETVSC);
+#endif
+
+	free(txr->hn_txdesc, M_NETVSC);
+	txr->hn_txdesc = NULL;
+
+#ifndef HN_USE_TXDESC_BUFRING
+	mtx_destroy(&txr->hn_txlist_spin);
+#endif
+	mtx_destroy(&txr->hn_tx_lock);
+}
+
+static int
+hn_create_tx_data(struct hn_softc *sc)
+{
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+	int i;
+
+	sc->hn_tx_ring_cnt = 1; /* TODO: vRSS */
+	sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
+	    M_NETVSC, M_WAITOK | M_ZERO);
+
+	ctx = device_get_sysctl_ctx(sc->hn_dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
+
+	/* Create dev.hn.UNIT.tx sysctl tree */
+	sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
+	    CTLFLAG_RD, 0, "");
+
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		int error;
+
+		error = hn_create_tx_ring(sc, i);
+		if (error)
+			return error;
+	}
+
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_no_txdescs),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_send_failed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_txdma_failed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_tx_collapsed),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
+	    CTLTYPE_ULONG | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_tx_chimney),
+	    hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
+	    CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
+	    "# of total TX descs");
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
+	    CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
+	    "Chimney send packet size upper boundary");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
+	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
+	    "I", "Chimney send packet size limit");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
+	    CTLTYPE_INT | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_direct_tx_size),
+	    hn_tx_conf_int_sysctl, "I",
+	    "Size of the packet for direct transmission");
+	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
+	    CTLTYPE_INT | CTLFLAG_RW, sc,
+	    __offsetof(struct hn_tx_ring, hn_sched_tx),
+	    hn_tx_conf_int_sysctl, "I",
+	    "Always schedule transmission "
+	    "instead of doing direct transmission");
+
+	return 0;
+}
+
+static void
+hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size)
+{
+	int i;
 
-		bus_dmamap_unload(sc->hn_tx_rndis_dtag,
-		    txd->rndis_msg_dmap);
-		bus_dmamem_free(sc->hn_tx_rndis_dtag,
-		    txd->rndis_msg, txd->rndis_msg_dmap);
+	NV_LOCK(sc);
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+		sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size;
+	NV_UNLOCK(sc);
+}
+
+static void
+hn_destroy_tx_data(struct hn_softc *sc)
+{
+	int i;
+
+	if (sc->hn_tx_ring_cnt == 0)
+		return;
+
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+		hn_destroy_tx_ring(&sc->hn_tx_ring[i]);
+
+	free(sc->hn_tx_ring, M_NETVSC);
+	sc->hn_tx_ring = NULL;
+
+	sc->hn_tx_ring_cnt = 0;
+}
+
+static void
+hn_start_taskfunc(void *xtxr, int pending __unused)
+{
+	struct hn_tx_ring *txr = xtxr;
+
+	mtx_lock(&txr->hn_tx_lock);
+	hn_start_locked(txr, 0);
+	mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
+{
+	struct hn_tx_ring *txr = xtxr;
+
+	mtx_lock(&txr->hn_tx_lock);
+	atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
+	hn_start_locked(txr, 0);
+	mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_stop_tx_tasks(struct hn_softc *sc)
+{
+	int i;
+
+	for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+		struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+
+		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
+		taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
+	}
+}
 
-		bus_dmamap_destroy(sc->hn_tx_data_dtag, txd->data_dmap);
+static void
+hn_tx_taskq_create(void *arg __unused)
+{
+	if (!hn_share_tx_taskq)
+		return;
+
+	hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
+	    taskqueue_thread_enqueue, &hn_tx_taskq);
+	taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
+	if (hn_bind_tx_taskq >= 0) {
+		int cpu = hn_bind_tx_taskq;
+		struct task cpuset_task;
+		cpuset_t cpu_set;
+
+		if (cpu > mp_ncpus - 1)
+			cpu = mp_ncpus - 1;
+		CPU_SETOF(cpu, &cpu_set);
+		TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, &cpu_set);
+		taskqueue_enqueue(hn_tx_taskq, &cpuset_task);
+		taskqueue_drain(hn_tx_taskq, &cpuset_task);
 	}
+}
+SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST,
+    hn_tx_taskq_create, NULL);
 
-	if (sc->hn_tx_data_dtag != NULL)
-		bus_dma_tag_destroy(sc->hn_tx_data_dtag);
-	if (sc->hn_tx_rndis_dtag != NULL)
-		bus_dma_tag_destroy(sc->hn_tx_rndis_dtag);
-	free(sc->hn_txdesc, M_NETVSC);
-	mtx_destroy(&sc->hn_txlist_spin);
+static void
+hn_tx_taskq_destroy(void *arg __unused)
+{
+	if (hn_tx_taskq != NULL)
+		taskqueue_free(hn_tx_taskq);
 }
+SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST,
+    hn_tx_taskq_destroy, NULL);
 
 static device_method_t netvsc_methods[] = {
         /* Device interface */
diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.c b/sys/dev/hyperv/netvsc/hv_rndis_filter.c
index 29d8c8f..31ddbc0 100644
--- a/sys/dev/hyperv/netvsc/hv_rndis_filter.c
+++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.c
@@ -136,12 +136,9 @@ hv_get_rndis_device(void)
 {
 	rndis_device *device;
 
-	device = malloc(sizeof(rndis_device), M_NETVSC, M_NOWAIT | M_ZERO);
-	if (device == NULL) {
-		return (NULL);
-	}
+	device = malloc(sizeof(rndis_device), M_NETVSC, M_WAITOK | M_ZERO);
 
-	mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_SPIN | MTX_RECURSE);
+	mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_DEF);
 
 	/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
 	STAILQ_INIT(&device->myrequest_list);
@@ -172,10 +169,7 @@ hv_rndis_request(rndis_device *device, uint32_t message_type,
 	rndis_msg *rndis_mesg;
 	rndis_set_request *set;
 
-	request = malloc(sizeof(rndis_request), M_NETVSC, M_NOWAIT | M_ZERO);
-	if (request == NULL) {
-		return (NULL);
-	}
+	request = malloc(sizeof(rndis_request), M_NETVSC, M_WAITOK | M_ZERO);
 
 	sema_init(&request->wait_sema, 0, "rndis sema");
 	
@@ -194,9 +188,9 @@ hv_rndis_request(rndis_device *device, uint32_t message_type,
 	set->request_id += 1;
 
 	/* Add to the request list */
-	mtx_lock_spin(&device->req_lock);
+	mtx_lock(&device->req_lock);
 	STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry);
-	mtx_unlock_spin(&device->req_lock);
+	mtx_unlock(&device->req_lock);
 
 	return (request);
 }
@@ -207,14 +201,14 @@ hv_rndis_request(rndis_device *device, uint32_t message_type,
 static inline void
 hv_put_rndis_request(rndis_device *device, rndis_request *request)
 {
-	mtx_lock_spin(&device->req_lock);
+	mtx_lock(&device->req_lock);
 	/* Fixme:  Has O(n) performance */
 	/*
 	 * XXXKYS: Use Doubly linked lists.
 	 */
 	STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_,
 	    mylist_entry);
-	mtx_unlock_spin(&device->req_lock);
+	mtx_unlock(&device->req_lock);
 
 	sema_destroy(&request->wait_sema);
 	free(request, M_NETVSC);
@@ -271,7 +265,7 @@ hv_rf_receive_response(rndis_device *device, rndis_msg *response)
 	rndis_request *next_request;
 	boolean_t found = FALSE;
 
-	mtx_lock_spin(&device->req_lock);
+	mtx_lock(&device->req_lock);
 	request = STAILQ_FIRST(&device->myrequest_list);
 	while (request != NULL) {
 		/*
@@ -286,7 +280,7 @@ hv_rf_receive_response(rndis_device *device, rndis_msg *response)
 		next_request = STAILQ_NEXT(request, mylist_entry);
 		request = next_request;
 	}
-	mtx_unlock_spin(&device->req_lock);
+	mtx_unlock(&device->req_lock);
 
 	if (found) {
 		if (response->msg_len <= sizeof(rndis_msg)) {
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index a780f9e..27fb3fd 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -856,8 +856,8 @@ hv_storvsc_rescan_target(struct storvsc_softc *sc)
 
 	if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
 	    CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
-		printf("unable to create path for rescan, pathid: %d,"
-		    "targetid: %d\n", pathid, targetid);
+		printf("unable to create path for rescan, pathid: %u,"
+		    "targetid: %u\n", pathid, targetid);
 		xpt_free_ccb(ccb);
 		return;
 	}
@@ -1561,13 +1561,12 @@ static void
 storvsc_destroy_bounce_buffer(struct sglist *sgl)
 {
 	struct hv_sgl_node *sgl_node = NULL;
-
-	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
-	LIST_REMOVE(sgl_node, link);
-	if (NULL == sgl_node) {
+	if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
 		printf("storvsc error: not enough in use sgl\n");
 		return;
 	}
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
+	LIST_REMOVE(sgl_node, link);
 	sgl_node->sgl_data = sgl;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
 }
@@ -1593,12 +1592,12 @@ storvsc_create_bounce_buffer(uint16_t seg_count, int write)
 	struct hv_sgl_node *sgl_node = NULL;	
 
 	/* get struct sglist from free_sgl_list */
-	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
-	LIST_REMOVE(sgl_node, link);
-	if (NULL == sgl_node) {
+	if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
 		printf("storvsc error: not enough free sgl\n");
 		return NULL;
 	}
+	sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+	LIST_REMOVE(sgl_node, link);
 	bounce_sgl = sgl_node->sgl_data;
 	LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
 
diff --git a/sys/dev/hyperv/utilities/hv_heartbeat.c b/sys/dev/hyperv/utilities/hv_heartbeat.c
new file mode 100644
index 0000000..c1b6da5
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_heartbeat.c
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 2014 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/timetc.h>
+#include <sys/syscallsubr.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_util.h"
+
+/* Heartbeat Service */
+static hv_guid service_guid = { .data =
+	{0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
+	0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d} };
+
+/**
+ * Process heartbeat message
+ */
+static void
+hv_heartbeat_cb(void *context)
+{
+	uint8_t*		buf;
+	hv_vmbus_channel*	channel;
+	uint32_t		recvlen;
+	uint64_t		requestid;
+	int			ret;
+
+	struct hv_vmbus_heartbeat_msg_data*	heartbeat_msg;
+	struct hv_vmbus_icmsg_hdr*		icmsghdrp;
+	hv_util_sc			*softc;
+
+	softc = (hv_util_sc*)context;
+	buf = softc->receive_buffer;;
+	channel = softc->hv_dev->channel;
+
+	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
+					    &requestid);
+
+	if ((ret == 0) && recvlen > 0) {
+
+	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+		hv_negotiate_version(icmsghdrp, NULL, buf);
+
+	    } else {
+		heartbeat_msg =
+		    (struct hv_vmbus_heartbeat_msg_data *)
+			&buf[sizeof(struct hv_vmbus_pipe_hdr) +
+			     sizeof(struct hv_vmbus_icmsg_hdr)];
+
+		heartbeat_msg->seq_num += 1;
+	    }
+
+	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
+				 HV_ICMSGHDRFLAG_RESPONSE;
+
+	    hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
+		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+	}
+}
+
+static int
+hv_heartbeat_probe(device_t dev)
+{
+	const char *p = vmbus_get_type(dev);
+	if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
+		device_set_desc(dev, "Hyper-V Heartbeat Service");
+		return BUS_PROBE_DEFAULT;
+	}
+
+	return ENXIO;
+}
+
+static int
+hv_heartbeat_attach(device_t dev)
+{
+	hv_util_sc *softc = (hv_util_sc*)device_get_softc(dev);
+
+	softc->callback = hv_heartbeat_cb;
+
+	return hv_util_attach(dev);
+}
+
+static device_method_t heartbeat_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, hv_heartbeat_probe),
+	DEVMETHOD(device_attach, hv_heartbeat_attach),
+	DEVMETHOD(device_detach, hv_util_detach),
+	{ 0, 0 }
+};
+
+static driver_t heartbeat_driver = { "hvheartbeat", heartbeat_methods, sizeof(hv_util_sc)};
+
+static devclass_t heartbeat_devclass;
+
+DRIVER_MODULE(hv_heartbeat, vmbus, heartbeat_driver, heartbeat_devclass, NULL, NULL);
+MODULE_VERSION(hv_heartbeat, 1);
+MODULE_DEPEND(hv_heartbeat, vmbus, 1, 1, 1);
diff --git a/sys/dev/hyperv/utilities/hv_kvp.c b/sys/dev/hyperv/utilities/hv_kvp.c
index 58d565c4..8517918 100644
--- a/sys/dev/hyperv/utilities/hv_kvp.c
+++ b/sys/dev/hyperv/utilities/hv_kvp.c
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
 #include <dev/hyperv/include/hyperv.h>
 #include <dev/hyperv/netvsc/hv_net_vsc.h>
 
+#include "hv_util.h"
 #include "unicode.h"
 #include "hv_kvp.h"
 
@@ -74,8 +75,6 @@ __FBSDID("$FreeBSD$");
 
 /* hv_kvp debug control */
 static int hv_kvp_log = 0;
-SYSCTL_INT(_dev, OID_AUTO, hv_kvp_log, CTLFLAG_RW, &hv_kvp_log, 0,
-	"hv_kvp log");
 
 #define	hv_kvp_log_error(...)	do {				\
 	if (hv_kvp_log > 0)				\
@@ -87,6 +86,10 @@ SYSCTL_INT(_dev, OID_AUTO, hv_kvp_log, CTLFLAG_RW, &hv_kvp_log, 0,
 		log(LOG_INFO, "hv_kvp: " __VA_ARGS__);		\
 } while (0)
 
+static hv_guid service_guid = { .data =
+	{0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
+	0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3,  0xe6} };
+
 /* character device prototypes */
 static d_open_t		hv_kvp_dev_open;
 static d_close_t	hv_kvp_dev_close;
@@ -94,12 +97,6 @@ static d_read_t		hv_kvp_dev_daemon_read;
 static d_write_t	hv_kvp_dev_daemon_write;
 static d_poll_t		hv_kvp_dev_daemon_poll;
 
-/* hv_kvp prototypes */
-static int	hv_kvp_req_in_progress(void);
-static void	hv_kvp_transaction_init(uint32_t, hv_vmbus_channel *, uint64_t, uint8_t *);
-static void	hv_kvp_send_msg_to_daemon(void);
-static void	hv_kvp_process_request(void *context);
-
 /* hv_kvp character device structure */
 static struct cdevsw hv_kvp_cdevsw =
 {
@@ -111,70 +108,67 @@ static struct cdevsw hv_kvp_cdevsw =
 	.d_poll		= hv_kvp_dev_daemon_poll,
 	.d_name		= "hv_kvp_dev",
 };
-static struct cdev *hv_kvp_dev;
-static struct hv_kvp_msg *hv_kvp_dev_buf;
-struct proc *daemon_task;
 
-static struct selinfo hv_kvp_selinfo;
 
 /*
  * Global state to track and synchronize multiple
  * KVP transaction requests from the host.
  */
-static struct {
-
-	/* Pre-allocated work item for queue */
-	hv_work_item		work_item;	
+typedef struct hv_kvp_sc {
+	struct hv_util_sc	util_sc;
 
-	/* Unless specified the pending mutex should be 
+	/* Unless specified the pending mutex should be
 	 * used to alter the values of the following paramters:
 	 * 1. req_in_progress
 	 * 2. req_timed_out
-	 * 3. pending_reqs.
 	 */
-	struct mtx		pending_mutex;	  
-	
+	struct mtx		pending_mutex;
+
+	struct task		task;
+
 	/* To track if transaction is active or not */
-	boolean_t		req_in_progress;    
+	boolean_t		req_in_progress;
 	/* Tracks if daemon did not reply back in time */
-	boolean_t		req_timed_out;	  
+	boolean_t		req_timed_out;
 	/* Tracks if daemon is serving a request currently */
 	boolean_t		daemon_busy;
-	/* Count of KVP requests from Hyper-V. */
-	uint64_t		pending_reqs;       
-	
-	
-	/* Length of host message */
-	uint32_t		host_msg_len;	    
 
-	/* Pointer to channel */
-	hv_vmbus_channel	*channelp;	    
+	/* Length of host message */
+	uint32_t		host_msg_len;
 
 	/* Host message id */
-	uint64_t		host_msg_id;	   
-	
+	uint64_t		host_msg_id;
+
 	/* Current kvp message from the host */
-	struct hv_kvp_msg	*host_kvp_msg;      
-	
+	struct hv_kvp_msg	*host_kvp_msg;
+
 	 /* Current kvp message for daemon */
-	struct hv_kvp_msg	daemon_kvp_msg;    
-	
+	struct hv_kvp_msg	daemon_kvp_msg;
+
 	/* Rcv buffer for communicating with the host*/
-	uint8_t			*rcv_buf;	    
-	
+	uint8_t			*rcv_buf;
+
 	/* Device semaphore to control communication */
-	struct sema		dev_sema;	   
-	
+	struct sema		dev_sema;
+
 	/* Indicates if daemon registered with driver */
-	boolean_t		register_done;      
-	
+	boolean_t		register_done;
+
 	/* Character device status */
-	boolean_t		dev_accessed;	    
-} kvp_globals;
+	boolean_t		dev_accessed;
+
+	struct cdev *hv_kvp_dev;
+
+	struct proc *daemon_task;
 
-/* global vars */
-MALLOC_DECLARE(M_HV_KVP_DEV_BUF);
-MALLOC_DEFINE(M_HV_KVP_DEV_BUF, "hv_kvp_dev buffer", "buffer for hv_kvp_dev module");
+	struct selinfo hv_kvp_selinfo;
+} hv_kvp_sc;
+
+/* hv_kvp prototypes */
+static int	hv_kvp_req_in_progress(hv_kvp_sc *sc);
+static void	hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t, uint64_t, uint8_t *);
+static void	hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc);
+static void	hv_kvp_process_request(void *context, int pending);
 
 /*
  * hv_kvp low level functions
@@ -184,10 +178,10 @@ MALLOC_DEFINE(M_HV_KVP_DEV_BUF, "hv_kvp_dev buffer", "buffer for hv_kvp_dev modu
  * Check if kvp transaction is in progres
  */
 static int
-hv_kvp_req_in_progress(void)
+hv_kvp_req_in_progress(hv_kvp_sc *sc)
 {
 
-	return (kvp_globals.req_in_progress);
+	return (sc->req_in_progress);
 }
 
 
@@ -195,18 +189,17 @@ hv_kvp_req_in_progress(void)
  * This routine is called whenever a message is received from the host
  */
 static void
-hv_kvp_transaction_init(uint32_t rcv_len, hv_vmbus_channel *rcv_channel,
+hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t rcv_len,
 			uint64_t request_id, uint8_t *rcv_buf)
 {
-	
+
 	/* Store all the relevant message details in the global structure */
 	/* Do not need to use mutex for req_in_progress here */
-	kvp_globals.req_in_progress = true;
-	kvp_globals.host_msg_len = rcv_len;
-	kvp_globals.channelp = rcv_channel;
-	kvp_globals.host_msg_id = request_id;
-	kvp_globals.rcv_buf = rcv_buf;
-	kvp_globals.host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[
+	sc->req_in_progress = true;
+	sc->host_msg_len = rcv_len;
+	sc->host_msg_id = request_id;
+	sc->rcv_buf = rcv_buf;
+	sc->host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[
 		sizeof(struct hv_vmbus_pipe_hdr) +
 		sizeof(struct hv_vmbus_icmsg_hdr)];
 }
@@ -258,12 +251,12 @@ hv_kvp_negotiate_version(struct hv_vmbus_icmsg_hdr *icmsghdrp,
  * Convert ip related info in umsg from utf8 to utf16 and store in hmsg
  */
 static int
-hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg, 
+hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg,
 				    struct hv_kvp_ip_msg *host_ip_msg)
 {
 	int err_ip, err_subnet, err_gway, err_dns, err_adap;
 	int UNUSED_FLAG = 1;
- 		
+
 	utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.ip_addr,
 	    MAX_IP_ADDR_SIZE,
 	    (char *)umsg->body.kvp_ip_val.ip_addr,
@@ -294,7 +287,7 @@ hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg,
 	    strlen((char *)umsg->body.kvp_ip_val.adapter_id),
 	    UNUSED_FLAG,
 	    &err_adap);
-	
+
 	host_ip_msg->kvp_ip_val.dhcp_enabled = umsg->body.kvp_ip_val.dhcp_enabled;
 	host_ip_msg->kvp_ip_val.addr_family = umsg->body.kvp_ip_val.addr_family;
 
@@ -389,7 +382,7 @@ hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
 	    MAX_IP_ADDR_SIZE,
 	    UNUSED_FLAG,
 	    &err_subnet);
-	
+
 	utf16_to_utf8((char *)umsg->body.kvp_ip_val.gate_way, MAX_GATEWAY_SIZE,
 	    (uint16_t *)host_ip_msg->kvp_ip_val.gate_way,
 	    MAX_GATEWAY_SIZE,
@@ -411,16 +404,13 @@ hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
  * Ensure utf16_utf8 takes care of the additional string terminating char!!
  */
 static void
-hv_kvp_convert_hostmsg_to_usermsg(void)
+hv_kvp_convert_hostmsg_to_usermsg(struct hv_kvp_msg *hmsg, struct hv_kvp_msg *umsg)
 {
 	int utf_err = 0;
 	uint32_t value_type;
-	struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)
-		kvp_globals.host_kvp_msg;
-
-	struct hv_kvp_msg *hmsg = kvp_globals.host_kvp_msg;
-	struct hv_kvp_msg *umsg = &kvp_globals.daemon_kvp_msg;
+	struct hv_kvp_ip_msg *host_ip_msg;
 
+	host_ip_msg = (struct hv_kvp_ip_msg*)hmsg;
 	memset(umsg, 0, sizeof(struct hv_kvp_msg));
 
 	umsg->kvp_hdr.operation = hmsg->kvp_hdr.operation;
@@ -525,14 +515,12 @@ hv_kvp_convert_hostmsg_to_usermsg(void)
  * Prepare a host kvp msg based on user kvp msg (utf8 to utf16)
  */
 static int
-hv_kvp_convert_usermsg_to_hostmsg(void)
+hv_kvp_convert_usermsg_to_hostmsg(struct hv_kvp_msg *umsg, struct hv_kvp_msg *hmsg)
 {
 	int hkey_len = 0, hvalue_len = 0, utf_err = 0;
 	struct hv_kvp_exchg_msg_value *host_exchg_data;
 	char *key_name, *value;
 
-	struct hv_kvp_msg *umsg = &kvp_globals.daemon_kvp_msg;
-	struct hv_kvp_msg *hmsg = kvp_globals.host_kvp_msg;
 	struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)hmsg;
 
 	switch (hmsg->kvp_hdr.operation) {
@@ -564,7 +552,7 @@ hv_kvp_convert_usermsg_to_hostmsg(void)
 
 		if ((hkey_len < 0) || (hvalue_len < 0))
 			return (HV_KVP_E_FAIL);
-			
+
 		return (KVP_SUCCESS);
 
 	case HV_KVP_OP_GET:
@@ -580,9 +568,9 @@ hv_kvp_convert_usermsg_to_hostmsg(void)
 		/* Use values by string */
 		host_exchg_data->value_type = HV_REG_SZ;
 
-		if ((hkey_len < 0) || (hvalue_len < 0)) 
+		if ((hkey_len < 0) || (hvalue_len < 0))
 			return (HV_KVP_E_FAIL);
-			
+
 		return (KVP_SUCCESS);
 
 	default:
@@ -595,22 +583,22 @@ hv_kvp_convert_usermsg_to_hostmsg(void)
  * Send the response back to the host.
  */
 static void
-hv_kvp_respond_host(int error)
+hv_kvp_respond_host(hv_kvp_sc *sc, int error)
 {
 	struct hv_vmbus_icmsg_hdr *hv_icmsg_hdrp;
 
 	hv_icmsg_hdrp = (struct hv_vmbus_icmsg_hdr *)
-	    &kvp_globals.rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)];
+	    &sc->rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)];
 
 	if (error)
 		error = HV_KVP_E_FAIL;
 
 	hv_icmsg_hdrp->status = error;
 	hv_icmsg_hdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
-	
-	error = hv_vmbus_channel_send_packet(kvp_globals.channelp,
-			kvp_globals.rcv_buf,
-			kvp_globals.host_msg_len, kvp_globals.host_msg_id,
+
+	error = hv_vmbus_channel_send_packet(sc->util_sc.hv_dev->channel,
+			sc->rcv_buf,
+			sc->host_msg_len, sc->host_msg_id,
 			HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
 
 	if (error)
@@ -624,16 +612,19 @@ hv_kvp_respond_host(int error)
  * and the host
  */
 static void
-hv_kvp_send_msg_to_daemon(void)
+hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc)
 {
+	struct hv_kvp_msg *hmsg = sc->host_kvp_msg;
+	struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg;
+
 	/* Prepare kvp_msg to be sent to user */
-	hv_kvp_convert_hostmsg_to_usermsg();
+	hv_kvp_convert_hostmsg_to_usermsg(hmsg, umsg);
 
 	/* Send the msg to user via function deamon_read - setting sema */
-	sema_post(&kvp_globals.dev_sema);
+	sema_post(&sc->dev_sema);
 
 	/* We should wake up the daemon, in case it's doing poll() */
-	selwakeup(&hv_kvp_selinfo);
+	selwakeup(&sc->hv_kvp_selinfo);
 }
 
 
@@ -642,98 +633,83 @@ hv_kvp_send_msg_to_daemon(void)
  * and interact with daemon
  */
 static void
-hv_kvp_process_request(void *context)
+hv_kvp_process_request(void *context, int pending)
 {
 	uint8_t *kvp_buf;
-	hv_vmbus_channel *channel = context;
+	hv_vmbus_channel *channel;
 	uint32_t recvlen = 0;
 	uint64_t requestid;
 	struct hv_vmbus_icmsg_hdr *icmsghdrp;
 	int ret = 0;
-	uint64_t pending_cnt = 1;
-	
+	hv_kvp_sc		*sc;
+
 	hv_kvp_log_info("%s: entering hv_kvp_process_request\n", __func__);
-	kvp_buf = receive_buffer[HV_KVP];
+
+	sc = (hv_kvp_sc*)context;
+	kvp_buf = sc->util_sc.receive_buffer;;
+	channel = sc->util_sc.hv_dev->channel;
+
 	ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
 		&recvlen, &requestid);
 
-	/*
-	 * We start counting only after the daemon registers
-	 * and therefore there could be requests pending in 
-	 * the VMBus that are not reflected in pending_cnt.
-	 * Therefore we continue reading as long as either of
-	 * the below conditions is true.
-	 */
+	while ((ret == 0) && (recvlen > 0)) {
+
+		icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+			&kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+		hv_kvp_transaction_init(sc, recvlen, requestid, kvp_buf);
+		if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+			hv_kvp_negotiate_version(icmsghdrp, NULL, kvp_buf);
+			hv_kvp_respond_host(sc, ret);
+
+			/*
+			 * It is ok to not acquire the mutex before setting
+			 * req_in_progress here because negotiation is the
+			 * first thing that happens and hence there is no
+			 * chance of a race condition.
+			 */
+
+			sc->req_in_progress = false;
+			hv_kvp_log_info("%s :version negotiated\n", __func__);
+
+		} else {
+			if (!sc->daemon_busy) {
+
+				hv_kvp_log_info("%s: issuing qury to daemon\n", __func__);
+				mtx_lock(&sc->pending_mutex);
+				sc->req_timed_out = false;
+				sc->daemon_busy = true;
+				mtx_unlock(&sc->pending_mutex);
 
-	while ((pending_cnt>0) || ((ret == 0) && (recvlen > 0))) {
-
-		if ((ret == 0) && (recvlen>0)) {
-			
-			icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
-					&kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)];
-	
-			hv_kvp_transaction_init(recvlen, channel, requestid, kvp_buf);
-			if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
-				hv_kvp_negotiate_version(icmsghdrp, NULL, kvp_buf);
-				hv_kvp_respond_host(ret);
-					
-				/*
-				 * It is ok to not acquire the mutex before setting 
-				 * req_in_progress here because negotiation is the
-				 * first thing that happens and hence there is no
-				 * chance of a race condition.
-				 */
-				
-				kvp_globals.req_in_progress = false;
-				hv_kvp_log_info("%s :version negotiated\n", __func__);
-
-			} else {
-				if (!kvp_globals.daemon_busy) {
-
-					hv_kvp_log_info("%s: issuing qury to daemon\n", __func__);
-					mtx_lock(&kvp_globals.pending_mutex);
-					kvp_globals.req_timed_out = false;
-					kvp_globals.daemon_busy = true;
-					mtx_unlock(&kvp_globals.pending_mutex);
-
-					hv_kvp_send_msg_to_daemon();
-					hv_kvp_log_info("%s: waiting for daemon\n", __func__);
-				}
-				
-				/* Wait 5 seconds for daemon to respond back */
-				tsleep(&kvp_globals, 0, "kvpworkitem", 5 * hz);
-				hv_kvp_log_info("%s: came out of wait\n", __func__);
+				hv_kvp_send_msg_to_daemon(sc);
+				hv_kvp_log_info("%s: waiting for daemon\n", __func__);
 			}
+
+			/* Wait 5 seconds for daemon to respond back */
+			tsleep(sc, 0, "kvpworkitem", 5 * hz);
+			hv_kvp_log_info("%s: came out of wait\n", __func__);
 		}
 
-		mtx_lock(&kvp_globals.pending_mutex);
-		
+		mtx_lock(&sc->pending_mutex);
+
 		/* Notice that once req_timed_out is set to true
 		 * it will remain true until the next request is
 		 * sent to the daemon. The response from daemon
-		 * is forwarded to host only when this flag is 
-		 * false. 
+		 * is forwarded to host only when this flag is
+		 * false.
 		 */
-		kvp_globals.req_timed_out = true;
+		sc->req_timed_out = true;
 
 		/*
 		 * Cancel request if so need be.
 		 */
-		if (hv_kvp_req_in_progress()) {
+		if (hv_kvp_req_in_progress(sc)) {
 			hv_kvp_log_info("%s: request was still active after wait so failing\n", __func__);
-			hv_kvp_respond_host(HV_KVP_E_FAIL);
-			kvp_globals.req_in_progress = false;	
-		}
-	
-		/*
-		* Decrement pending request count and
-		*/
-		if (kvp_globals.pending_reqs>0) {
-			kvp_globals.pending_reqs = kvp_globals.pending_reqs - 1;
+			hv_kvp_respond_host(sc, HV_KVP_E_FAIL);
+			sc->req_in_progress = false;
 		}
-		pending_cnt = kvp_globals.pending_reqs;
-		
-		mtx_unlock(&kvp_globals.pending_mutex);
+
+		mtx_unlock(&sc->pending_mutex);
 
 		/*
 		 * Try reading next buffer
@@ -741,109 +717,43 @@ hv_kvp_process_request(void *context)
 		recvlen = 0;
 		ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
 			&recvlen, &requestid);
-		hv_kvp_log_info("%s: read: context %p, pending_cnt %llu ret =%d, recvlen=%d\n",
-			__func__, context, (unsigned long long)pending_cnt, ret, recvlen);
-	} 
+		hv_kvp_log_info("%s: read: context %p, ret =%d, recvlen=%d\n",
+			__func__, context, ret, recvlen);
+	}
 }
 
 
 /*
  * Callback routine that gets called whenever there is a message from host
  */
-void
+static void
 hv_kvp_callback(void *context)
 {
-	uint64_t pending_cnt = 0;
-
-	if (kvp_globals.register_done == false) {
-		
-		kvp_globals.channelp = context;
-	} else {
-		
-		mtx_lock(&kvp_globals.pending_mutex);
-		kvp_globals.pending_reqs = kvp_globals.pending_reqs + 1;
-		pending_cnt = kvp_globals.pending_reqs;
-		mtx_unlock(&kvp_globals.pending_mutex);
-		if (pending_cnt == 1) {
-			hv_kvp_log_info("%s: Queuing work item\n", __func__);
-			hv_queue_work_item(
-					service_table[HV_KVP].work_queue,
-					hv_kvp_process_request,
-					context
-					);
-		}
-	}	
-}
-
-
-/*
- * This function is called by the hv_kvp_init -
- * creates character device hv_kvp_dev 
- * allocates memory to hv_kvp_dev_buf
- *
- */
-static int
-hv_kvp_dev_init(void)
-{
-	int error = 0;
-
-	/* initialize semaphore */
-	sema_init(&kvp_globals.dev_sema, 0, "hv_kvp device semaphore");
-	/* create character device */
-	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
-			&hv_kvp_dev,
-			&hv_kvp_cdevsw,
-			0,
-			UID_ROOT,
-			GID_WHEEL,
-			0640,
-			"hv_kvp_dev");
-					   
-	if (error != 0)
-		return (error);
-
+	hv_kvp_sc *sc = (hv_kvp_sc*)context;
 	/*
-	 * Malloc with M_WAITOK flag will never fail.
-	 */
-	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_HV_KVP_DEV_BUF, M_WAITOK |
-				M_ZERO);
-
-	return (0);
-}
-
-
-/*
- * This function is called by the hv_kvp_deinit -
- * destroy character device
- */
-static void
-hv_kvp_dev_destroy(void)
-{
-
-	if (daemon_task != NULL) {
-		PROC_LOCK(daemon_task);
-		kern_psignal(daemon_task, SIGKILL);
-		PROC_UNLOCK(daemon_task);
+	 The first request from host will not be handled until daemon is registered.
+	 when callback is triggered without a registered daemon, callback just return.
+	 When a new daemon gets regsitered, this callbcak is trigged from _write op.
+	*/
+	if (sc->register_done) {
+		hv_kvp_log_info("%s: Queuing work item\n", __func__);
+		taskqueue_enqueue(taskqueue_thread, &sc->task);
 	}
-	
-	destroy_dev(hv_kvp_dev);
-	free(hv_kvp_dev_buf, M_HV_KVP_DEV_BUF);
-	return;
 }
 
-
 static int
 hv_kvp_dev_open(struct cdev *dev, int oflags, int devtype,
 				struct thread *td)
 {
-	
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
+
 	hv_kvp_log_info("%s: Opened device \"hv_kvp_device\" successfully.\n", __func__);
-	if (kvp_globals.dev_accessed)
+	if (sc->dev_accessed)
 		return (-EBUSY);
-	
-	daemon_task = curproc;
-	kvp_globals.dev_accessed = true;
-	kvp_globals.daemon_busy = false;
+
+	sc->daemon_task = curproc;
+	sc->dev_accessed = true;
+	sc->daemon_busy = false;
 	return (0);
 }
 
@@ -852,10 +762,11 @@ static int
 hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused,
 				 struct thread *td __unused)
 {
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
 	hv_kvp_log_info("%s: Closing device \"hv_kvp_device\".\n", __func__);
-	kvp_globals.dev_accessed = false;
-	kvp_globals.register_done = false;
+	sc->dev_accessed = false;
+	sc->register_done = false;
 	return (0);
 }
 
@@ -865,18 +776,21 @@ hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __un
  * acts as a send to daemon
  */
 static int
-hv_kvp_dev_daemon_read(struct cdev *dev __unused, struct uio *uio, int ioflag __unused)
+hv_kvp_dev_daemon_read(struct cdev *dev, struct uio *uio, int ioflag __unused)
 {
 	size_t amt;
 	int error = 0;
+	struct hv_kvp_msg *hv_kvp_dev_buf;
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
 	/* Check hv_kvp daemon registration status*/
-	if (!kvp_globals.register_done)
+	if (!sc->register_done)
 		return (KVP_ERROR);
 
-	sema_wait(&kvp_globals.dev_sema);
+	sema_wait(&sc->dev_sema);
 
-	memcpy(hv_kvp_dev_buf, &kvp_globals.daemon_kvp_msg, sizeof(struct hv_kvp_msg));
+	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK);
+	memcpy(hv_kvp_dev_buf, &sc->daemon_kvp_msg, sizeof(struct hv_kvp_msg));
 
 	amt = MIN(uio->uio_resid, uio->uio_offset >= BUFFERSIZE + 1 ? 0 :
 		BUFFERSIZE + 1 - uio->uio_offset);
@@ -884,6 +798,7 @@ hv_kvp_dev_daemon_read(struct cdev *dev __unused, struct uio *uio, int ioflag __
 	if ((error = uiomove(hv_kvp_dev_buf, amt, uio)) != 0)
 		hv_kvp_log_info("%s: hv_kvp uiomove read failed!\n", __func__);
 
+	free(hv_kvp_dev_buf, M_TEMP);
 	return (error);
 }
 
@@ -893,29 +808,30 @@ hv_kvp_dev_daemon_read(struct cdev *dev __unused, struct uio *uio, int ioflag __
  * acts as a recieve from daemon
  */
 static int
-hv_kvp_dev_daemon_write(struct cdev *dev __unused, struct uio *uio, int ioflag __unused)
+hv_kvp_dev_daemon_write(struct cdev *dev, struct uio *uio, int ioflag __unused)
 {
 	size_t amt;
 	int error = 0;
+	struct hv_kvp_msg *hv_kvp_dev_buf;
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
 	uio->uio_offset = 0;
+	hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK);
 
 	amt = MIN(uio->uio_resid, BUFFERSIZE);
 	error = uiomove(hv_kvp_dev_buf, amt, uio);
 
-	if (error != 0)
+	if (error != 0) {
+		free(hv_kvp_dev_buf, M_TEMP);
 		return (error);
+	}
+	memcpy(&sc->daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg));
 
-	memcpy(&kvp_globals.daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg));
-
-	if (kvp_globals.register_done == false) {
-		if (kvp_globals.daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) {
-
-			kvp_globals.register_done = true;
-			if (kvp_globals.channelp) {
-			
-				hv_kvp_callback(kvp_globals.channelp);
-			}
+	free(hv_kvp_dev_buf, M_TEMP);
+	if (sc->register_done == false) {
+		if (sc->daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) {
+			sc->register_done = true;
+			hv_kvp_callback(dev->si_drv1);
 		}
 		else {
 			hv_kvp_log_info("%s, KVP Registration Failed\n", __func__);
@@ -923,18 +839,20 @@ hv_kvp_dev_daemon_write(struct cdev *dev __unused, struct uio *uio, int ioflag _
 		}
 	} else {
 
-		mtx_lock(&kvp_globals.pending_mutex);
+		mtx_lock(&sc->pending_mutex);
 
-		if(!kvp_globals.req_timed_out) {
+		if(!sc->req_timed_out) {
+			struct hv_kvp_msg *hmsg = sc->host_kvp_msg;
+			struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg;
 
-			hv_kvp_convert_usermsg_to_hostmsg();
-			hv_kvp_respond_host(KVP_SUCCESS);
-			wakeup(&kvp_globals);
-			kvp_globals.req_in_progress = false;
+			hv_kvp_convert_usermsg_to_hostmsg(umsg, hmsg);
+			hv_kvp_respond_host(sc, KVP_SUCCESS);
+			wakeup(sc);
+			sc->req_in_progress = false;
 		}
 
-		kvp_globals.daemon_busy = false;
-		mtx_unlock(&kvp_globals.pending_mutex);
+		sc->daemon_busy = false;
+		mtx_unlock(&sc->pending_mutex);
 	}
 
 	return (error);
@@ -946,66 +864,106 @@ hv_kvp_dev_daemon_write(struct cdev *dev __unused, struct uio *uio, int ioflag _
  * for daemon to read.
  */
 static int
-hv_kvp_dev_daemon_poll(struct cdev *dev __unused, int events, struct thread *td)
+hv_kvp_dev_daemon_poll(struct cdev *dev, int events, struct thread *td)
 {
 	int revents = 0;
+	hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
 
-	mtx_lock(&kvp_globals.pending_mutex);
+	mtx_lock(&sc->pending_mutex);
 	/*
 	 * We check global flag daemon_busy for the data availiability for
 	 * userland to read. Deamon_busy is set to true before driver has data
 	 * for daemon to read. It is set to false after daemon sends
 	 * then response back to driver.
 	 */
-	if (kvp_globals.daemon_busy == true)
+	if (sc->daemon_busy == true)
 		revents = POLLIN;
 	else
-		selrecord(td, &hv_kvp_selinfo);
+		selrecord(td, &sc->hv_kvp_selinfo);
 
-	mtx_unlock(&kvp_globals.pending_mutex);
+	mtx_unlock(&sc->pending_mutex);
 
 	return (revents);
 }
 
-
-/* 
- * hv_kvp initialization function 
- * called from hv_util service.
- *
- */
-int
-hv_kvp_init(hv_vmbus_service *srv)
+static int
+hv_kvp_probe(device_t dev)
 {
-	int error = 0;
-	hv_work_queue *work_queue = NULL;
-	
-	memset(&kvp_globals, 0, sizeof(kvp_globals));
-
-	work_queue = hv_work_queue_create("KVP Service");
-	if (work_queue == NULL) {
-		hv_kvp_log_info("%s: Work queue alloc failed\n", __func__);
-		error = ENOMEM;
-		hv_kvp_log_error("%s: ENOMEM\n", __func__);
-		goto Finish;
+	const char *p = vmbus_get_type(dev);
+	if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
+		device_set_desc(dev, "Hyper-V KVP Service");
+		return BUS_PROBE_DEFAULT;
 	}
-	srv->work_queue = work_queue;
 
-	error = hv_kvp_dev_init();
-	mtx_init(&kvp_globals.pending_mutex, "hv-kvp pending mutex",
-		       	NULL, MTX_DEF);	
-	kvp_globals.pending_reqs = 0;
+	return ENXIO;
+}
+
+static int
+hv_kvp_attach(device_t dev)
+{
+	int error;
+	struct sysctl_oid_list *child;
+	struct sysctl_ctx_list *ctx;
+
+	hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev);
 
+	sc->util_sc.callback = hv_kvp_callback;
+	sema_init(&sc->dev_sema, 0, "hv_kvp device semaphore");
+	mtx_init(&sc->pending_mutex, "hv-kvp pending mutex",
+		NULL, MTX_DEF);
 
-Finish:
-	return (error);
-}
+	ctx = device_get_sysctl_ctx(dev);
+	child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
 
+	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_kvp_log",
+	    CTLFLAG_RW, &hv_kvp_log, 0, "Hyperv KVP service log level");
 
-void
-hv_kvp_deinit(void)
+	TASK_INIT(&sc->task, 0, hv_kvp_process_request, sc);
+
+	/* create character device */
+	error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
+			&sc->hv_kvp_dev,
+			&hv_kvp_cdevsw,
+			0,
+			UID_ROOT,
+			GID_WHEEL,
+			0640,
+			"hv_kvp_dev");
+
+	if (error != 0)
+		return (error);
+	sc->hv_kvp_dev->si_drv1 = sc;
+
+	return hv_util_attach(dev);
+}
+
+static int
+hv_kvp_detach(device_t dev)
 {
-	hv_kvp_dev_destroy();
-	mtx_destroy(&kvp_globals.pending_mutex);
+	hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev);
 
-	return;
+	if (sc->daemon_task != NULL) {
+		PROC_LOCK(sc->daemon_task);
+		kern_psignal(sc->daemon_task, SIGKILL);
+		PROC_UNLOCK(sc->daemon_task);
+	}
+
+	destroy_dev(sc->hv_kvp_dev);
+	return hv_util_detach(dev);
 }
+
+static device_method_t kvp_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, hv_kvp_probe),
+	DEVMETHOD(device_attach, hv_kvp_attach),
+	DEVMETHOD(device_detach, hv_kvp_detach),
+	{ 0, 0 }
+};
+
+static driver_t kvp_driver = { "hvkvp", kvp_methods, sizeof(hv_kvp_sc)};
+
+static devclass_t kvp_devclass;
+
+DRIVER_MODULE(hv_kvp, vmbus, kvp_driver, kvp_devclass, NULL, NULL);
+MODULE_VERSION(hv_kvp, 1);
+MODULE_DEPEND(hv_kvp, vmbus, 1, 1, 1);
diff --git a/sys/dev/hyperv/utilities/hv_kvp.h b/sys/dev/hyperv/utilities/hv_kvp.h
index b67373fa..b62149e 100644
--- a/sys/dev/hyperv/utilities/hv_kvp.h
+++ b/sys/dev/hyperv/utilities/hv_kvp.h
@@ -238,17 +238,4 @@ struct hv_kvp_ip_msg {
 	struct hv_kvp_ipaddr_value      kvp_ip_val;
 } __attribute__((packed));
 
-
-#define HV_SHUT_DOWN                0
-#define HV_TIME_SYNCH               1
-#define HV_HEART_BEAT               2
-#define HV_KVP                      3
-#define HV_MAX_UTIL_SERVICES        4
-
-#define HV_WLTIMEDELTA              116444736000000000L     /* in 100ns unit */
-#define HV_ICTIMESYNCFLAG_PROBE     0
-#define HV_ICTIMESYNCFLAG_SYNC      1
-#define HV_ICTIMESYNCFLAG_SAMPLE    2
-#define HV_NANO_SEC_PER_SEC         1000000000
-
 #endif /* _KVP_H */
diff --git a/sys/dev/hyperv/utilities/hv_shutdown.c b/sys/dev/hyperv/utilities/hv_shutdown.c
new file mode 100644
index 0000000..20bc65e
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_shutdown.c
@@ -0,0 +1,151 @@
+/*-
+ * Copyright (c) 2014 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * A common driver for all hyper-V util services.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/reboot.h>
+#include <sys/timetc.h>
+#include <sys/syscallsubr.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_util.h"
+
+static hv_guid service_guid = { .data =
+	{0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
+	0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB} };
+
+/**
+ * Shutdown
+ */
+static void
+hv_shutdown_cb(void *context)
+{
+	uint8_t*			buf;
+	hv_vmbus_channel*		channel;
+	uint8_t				execute_shutdown = 0;
+	hv_vmbus_icmsg_hdr*		icmsghdrp;
+	uint32_t			recv_len;
+	uint64_t			request_id;
+	int				ret;
+	hv_vmbus_shutdown_msg_data*	shutdown_msg;
+	hv_util_sc			*softc;
+
+	softc = (hv_util_sc*)context;
+	buf = softc->receive_buffer;;
+	channel = softc->hv_dev->channel;
+	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
+					    &recv_len, &request_id);
+
+	if ((ret == 0) && recv_len > 0) {
+
+	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+		hv_negotiate_version(icmsghdrp, NULL, buf);
+
+	    } else {
+		shutdown_msg =
+		    (struct hv_vmbus_shutdown_msg_data *)
+		    &buf[sizeof(struct hv_vmbus_pipe_hdr) +
+			sizeof(struct hv_vmbus_icmsg_hdr)];
+
+		switch (shutdown_msg->flags) {
+		    case 0:
+		    case 1:
+			icmsghdrp->status = HV_S_OK;
+			execute_shutdown = 1;
+			if(bootverbose)
+			    printf("Shutdown request received -"
+				    " graceful shutdown initiated\n");
+			break;
+		    default:
+			icmsghdrp->status = HV_E_FAIL;
+			execute_shutdown = 0;
+			printf("Shutdown request received -"
+			    " Invalid request\n");
+			break;
+		    }
+	    }
+
+	icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
+				 HV_ICMSGHDRFLAG_RESPONSE;
+
+	    hv_vmbus_channel_send_packet(channel, buf,
+					recv_len, request_id,
+					HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+	}
+
+	if (execute_shutdown)
+	    shutdown_nice(RB_POWEROFF);
+}
+
+static int
+hv_shutdown_probe(device_t dev)
+{
+	const char *p = vmbus_get_type(dev);
+	if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
+		device_set_desc(dev, "Hyper-V Shutdown Service");
+		return BUS_PROBE_DEFAULT;
+	}
+
+	return ENXIO;
+}
+
+static int
+hv_shutdown_attach(device_t dev)
+{
+	hv_util_sc *softc = (hv_util_sc*)device_get_softc(dev);
+
+	softc->callback = hv_shutdown_cb;
+
+	return hv_util_attach(dev);
+}
+
+static device_method_t shutdown_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, hv_shutdown_probe),
+	DEVMETHOD(device_attach, hv_shutdown_attach),
+	DEVMETHOD(device_detach, hv_util_detach),
+	{ 0, 0 }
+};
+
+static driver_t shutdown_driver = { "hvshutdown", shutdown_methods, sizeof(hv_util_sc)};
+
+static devclass_t shutdown_devclass;
+
+DRIVER_MODULE(hv_shutdown, vmbus, shutdown_driver, shutdown_devclass, NULL, NULL);
+MODULE_VERSION(hv_shutdown, 1);
+MODULE_DEPEND(hv_shutdown, vmbus, 1, 1, 1);
diff --git a/sys/dev/hyperv/utilities/hv_timesync.c b/sys/dev/hyperv/utilities/hv_timesync.c
new file mode 100644
index 0000000..d1ea904
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_timesync.c
@@ -0,0 +1,216 @@
+/*-
+ * Copyright (c) 2014 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * A common driver for all hyper-V util services.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/reboot.h>
+#include <sys/timetc.h>
+#include <sys/syscallsubr.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_util.h"
+
+#define HV_WLTIMEDELTA              116444736000000000L     /* in 100ns unit */
+#define HV_ICTIMESYNCFLAG_PROBE     0
+#define HV_ICTIMESYNCFLAG_SYNC      1
+#define HV_ICTIMESYNCFLAG_SAMPLE    2
+#define HV_NANO_SEC_PER_SEC         1000000000
+
+/* Time Sync data */
+typedef struct {
+	uint64_t data;
+} time_sync_data;
+
+        /* Time Synch Service */
+static hv_guid service_guid = {.data =
+	{0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
+	0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf } };
+
+struct hv_ictimesync_data {
+	uint64_t    parenttime;
+	uint64_t    childtime;
+	uint64_t    roundtriptime;
+	uint8_t     flags;
+} __packed;
+
+typedef struct hv_timesync_sc {
+	hv_util_sc	util_sc;
+	struct task	task;
+	time_sync_data	time_msg;
+} hv_timesync_sc;
+
+/**
+ * Set host time based on time sync message from host
+ */
+static void
+hv_set_host_time(void *context, int pending)
+{
+	hv_timesync_sc *softc = (hv_timesync_sc*)context;
+	uint64_t hosttime = softc->time_msg.data;
+	struct timespec guest_ts, host_ts;
+	uint64_t host_tns;
+	int64_t diff;
+	int error;
+
+	host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
+	host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC);
+	host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC);
+
+	nanotime(&guest_ts);
+
+	diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec;
+
+	/*
+	 * If host differs by 5 seconds then make the guest catch up
+	 */
+	if (diff > 5 || diff < -5) {
+		error = kern_clock_settime(curthread, CLOCK_REALTIME,
+		    &host_ts);
+	}
+}
+
+/**
+ * @brief Synchronize time with host after reboot, restore, etc.
+ *
+ * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
+ * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
+ * message after the timesync channel is opened. Since the hv_utils module is
+ * loaded after hv_vmbus, the first message is usually missed. The other
+ * thing is, systime is automatically set to emulated hardware clock which may
+ * not be UTC time or in the same time zone. So, to override these effects, we
+ * use the first 50 time samples for initial system time setting.
+ */
+static inline
+void hv_adj_guesttime(hv_timesync_sc *sc, uint64_t hosttime, uint8_t flags)
+{
+	sc->time_msg.data = hosttime;
+
+	if (((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) ||
+		((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0)) {
+		taskqueue_enqueue(taskqueue_thread, &sc->task);
+	}
+}
+
+/**
+ * Time Sync Channel message handler
+ */
+static void
+hv_timesync_cb(void *context)
+{
+	hv_vmbus_channel*	channel;
+	hv_vmbus_icmsg_hdr*	icmsghdrp;
+	uint32_t		recvlen;
+	uint64_t		requestId;
+	int			ret;
+	uint8_t*		time_buf;
+	struct hv_ictimesync_data* timedatap;
+	hv_timesync_sc		*softc;
+
+	softc = (hv_timesync_sc*)context;
+	channel = softc->util_sc.hv_dev->channel;
+	time_buf = softc->util_sc.receive_buffer;
+
+	ret = hv_vmbus_channel_recv_packet(channel, time_buf,
+		PAGE_SIZE, &recvlen, &requestId);
+
+	if ((ret == 0) && recvlen > 0) {
+	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
+		sizeof(struct hv_vmbus_pipe_hdr)];
+
+	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+		hv_negotiate_version(icmsghdrp, NULL, time_buf);
+	    } else {
+		timedatap = (struct hv_ictimesync_data *) &time_buf[
+		    sizeof(struct hv_vmbus_pipe_hdr) +
+			sizeof(struct hv_vmbus_icmsg_hdr)];
+		hv_adj_guesttime(softc, timedatap->parenttime, timedatap->flags);
+	    }
+
+	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
+		| HV_ICMSGHDRFLAG_RESPONSE;
+
+	    hv_vmbus_channel_send_packet(channel, time_buf,
+		recvlen, requestId,
+		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+	}
+}
+
+static int
+hv_timesync_probe(device_t dev)
+{
+	const char *p = vmbus_get_type(dev);
+	if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
+		device_set_desc(dev, "Hyper-V Time Synch Service");
+		return BUS_PROBE_DEFAULT;
+	}
+
+	return ENXIO;
+}
+
+static int
+hv_timesync_attach(device_t dev)
+{
+	hv_timesync_sc *softc = device_get_softc(dev);
+
+	softc->util_sc.callback = hv_timesync_cb;
+	TASK_INIT(&softc->task, 1, hv_set_host_time, softc);
+
+	return hv_util_attach(dev);
+}
+
+static int
+hv_timesync_detach(device_t dev)
+{
+	hv_timesync_sc *softc = device_get_softc(dev);
+	taskqueue_drain(taskqueue_thread, &softc->task);
+
+	return hv_util_detach(dev);
+}
+
+static device_method_t timesync_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, hv_timesync_probe),
+	DEVMETHOD(device_attach, hv_timesync_attach),
+	DEVMETHOD(device_detach, hv_timesync_detach),
+	{ 0, 0 }
+};
+
+static driver_t timesync_driver = { "hvtimesync", timesync_methods, sizeof(hv_timesync_sc)};
+
+static devclass_t timesync_devclass;
+
+DRIVER_MODULE(hv_timesync, vmbus, timesync_driver, timesync_devclass, NULL, NULL);
+MODULE_VERSION(hv_timesync, 1);
+MODULE_DEPEND(hv_timesync, vmbus, 1, 1, 1);
diff --git a/sys/dev/hyperv/utilities/hv_util.c b/sys/dev/hyperv/utilities/hv_util.c
index dc4b1e2..7d19b3f 100644
--- a/sys/dev/hyperv/utilities/hv_util.c
+++ b/sys/dev/hyperv/utilities/hv_util.c
@@ -40,85 +40,9 @@
 #include <sys/syscallsubr.h>
 
 #include <dev/hyperv/include/hyperv.h>
-#include "hv_kvp.h"
+#include "hv_util.h"
 
-/* Time Sync data */
-typedef struct {
-	uint64_t data;
-} time_sync_data;
-
-static void hv_shutdown_cb(void *context);
-static void hv_heartbeat_cb(void *context);
-static void hv_timesync_cb(void *context);
-
-static int hv_timesync_init(hv_vmbus_service *serv);
-
-/*
- * Note: GUID codes below are predefined by the host hypervisor
- * (Hyper-V and Azure)interface and required for correct operation.
- */
-hv_vmbus_service service_table[] = {
-	/* Shutdown Service */
-	{ .guid.data = {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
-			0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB},
-	  .name  = "Hyper-V Shutdown Service\n",
-	  .enabled = TRUE,
-	  .callback = hv_shutdown_cb,
-	},
-
-        /* Time Synch Service */
-        { .guid.data = {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
-			0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf},
-	  .name = "Hyper-V Time Synch Service\n",
-	  .enabled = TRUE,
-	  .init = hv_timesync_init,
-	  .callback = hv_timesync_cb,
-	},
-
-        /* Heartbeat Service */
-        { .guid.data = {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
-			0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d},
-	  .name = "Hyper-V Heartbeat Service\n",
-	  .enabled = TRUE,
-  	  .callback = hv_heartbeat_cb,
-	},
-
-        /* KVP (Key Value Pair) Service */
-        { .guid.data = {0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
-			0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3,  0xe6},
-	  .name = "Hyper-V KVP Service\n",
-	  .enabled = TRUE,
-	  .init = hv_kvp_init,
-	  .callback = hv_kvp_callback,
-	},
-};
-
-/*
- * Receive buffer pointers. There is one buffer per utility service. The
- * buffer is allocated during attach().
- */
-uint8_t *receive_buffer[HV_MAX_UTIL_SERVICES];
-
-static boolean_t destroyed_kvp = FALSE;
-
-struct hv_ictimesync_data {
-	uint64_t    parenttime;
-	uint64_t    childtime;
-	uint64_t    roundtriptime;
-	uint8_t     flags;
-} __packed;
-
-static int
-hv_timesync_init(hv_vmbus_service *serv)
-{
-
-	serv->work_queue = hv_work_queue_create("Time Sync");
-	if (serv->work_queue == NULL)
-		return (ENOMEM);
-	return (0);
-}
-
-static void
+void
 hv_negotiate_version(
 	struct hv_vmbus_icmsg_hdr*		icmsghdrp,
 	struct hv_vmbus_icmsg_negotiate*	negop,
@@ -147,267 +71,19 @@ hv_negotiate_version(
 	negop->icmsg_vercnt = 1;
 }
 
-
-/**
- * Set host time based on time sync message from host
- */
-static void
-hv_set_host_time(void *context)
-{
- 	time_sync_data* time_msg = (time_sync_data*) context;	
-	uint64_t hosttime = time_msg->data;
-	struct timespec guest_ts, host_ts;
-	uint64_t host_tns;
-	int64_t diff;
-	int error;
-
-	host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
-	host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC);
-	host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC);
-
-	nanotime(&guest_ts);
-	
-	diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec;
-
-	/*
-	 * If host differs by 5 seconds then make the guest catch up
-	 */
-	if (diff > 5 || diff < -5) {
-		error = kern_clock_settime(curthread, CLOCK_REALTIME,
-		    &host_ts);
-	} 
-
-	/*
-	 * Free the hosttime that was allocated in hv_adj_guesttime()
-	 */
-	free(time_msg, M_DEVBUF);
-}
-
-/**
- * @brief Synchronize time with host after reboot, restore, etc.
- *
- * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
- * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
- * message after the timesync channel is opened. Since the hv_utils module is
- * loaded after hv_vmbus, the first message is usually missed. The other
- * thing is, systime is automatically set to emulated hardware clock which may
- * not be UTC time or in the same time zone. So, to override these effects, we
- * use the first 50 time samples for initial system time setting.
- */
-static inline
-void hv_adj_guesttime(uint64_t hosttime, uint8_t flags)
-{
-	time_sync_data* time_msg;
-
-	time_msg = malloc(sizeof(time_sync_data), M_DEVBUF, M_NOWAIT);
-
-	if (time_msg == NULL)
-		return;
-	
-	time_msg->data = hosttime;
-
-	if ((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) {
-		hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
-		    hv_set_host_time, time_msg);
-	} else if ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0) {
-		hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
-		    hv_set_host_time, time_msg);
-	} else {
-		free(time_msg, M_DEVBUF);
-	}
-}
-
-/**
- * Time Sync Channel message handler
- */
-static void
-hv_timesync_cb(void *context)
-{
-	hv_vmbus_channel*	channel = context;
-	hv_vmbus_icmsg_hdr*	icmsghdrp;
-	uint32_t		recvlen;
-	uint64_t		requestId;
-	int			ret;
-	uint8_t*		time_buf;
-	struct hv_ictimesync_data* timedatap;
-
-	time_buf = receive_buffer[HV_TIME_SYNCH];
-
-	ret = hv_vmbus_channel_recv_packet(channel, time_buf,
-					    PAGE_SIZE, &recvlen, &requestId);
-
-	if ((ret == 0) && recvlen > 0) {
-	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
-		sizeof(struct hv_vmbus_pipe_hdr)];
-
-	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
-		hv_negotiate_version(icmsghdrp, NULL, time_buf);
-	    } else {
-		timedatap = (struct hv_ictimesync_data *) &time_buf[
-		    sizeof(struct hv_vmbus_pipe_hdr) +
-			sizeof(struct hv_vmbus_icmsg_hdr)];
-		hv_adj_guesttime(timedatap->parenttime, timedatap->flags);
-	    }
-
-	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
-		| HV_ICMSGHDRFLAG_RESPONSE;
-
-	    hv_vmbus_channel_send_packet(channel, time_buf,
-		recvlen, requestId,
-		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
-	}
-}
-
-/**
- * Shutdown
- */
-static void
-hv_shutdown_cb(void *context)
-{
-	uint8_t*		buf;
-	hv_vmbus_channel*		channel = context;
-	uint8_t			execute_shutdown = 0;
-	hv_vmbus_icmsg_hdr*		icmsghdrp;
-	uint32_t		recv_len;
-	uint64_t		request_id;
-	int				ret;
-	hv_vmbus_shutdown_msg_data*	shutdown_msg;
-
-	buf = receive_buffer[HV_SHUT_DOWN];
-
-	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
-					    &recv_len, &request_id);
-
-	if ((ret == 0) && recv_len > 0) {
-
-	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
-		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
-
-	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
-		hv_negotiate_version(icmsghdrp, NULL, buf);
-
-	    } else {
-		shutdown_msg =
-		    (struct hv_vmbus_shutdown_msg_data *)
-		    &buf[sizeof(struct hv_vmbus_pipe_hdr) +
-			sizeof(struct hv_vmbus_icmsg_hdr)];
-
-		switch (shutdown_msg->flags) {
-		    case 0:
-		    case 1:
-			icmsghdrp->status = HV_S_OK;
-			execute_shutdown = 1;
-			if(bootverbose)
-			    printf("Shutdown request received -"
-				    " graceful shutdown initiated\n");
-			break;
-		    default:
-			icmsghdrp->status = HV_E_FAIL;
-			execute_shutdown = 0;
-			printf("Shutdown request received -"
-			    " Invalid request\n");
-			break;
-		    }
-	    }
-
-	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
-				 HV_ICMSGHDRFLAG_RESPONSE;
-
-	    hv_vmbus_channel_send_packet(channel, buf,
-					recv_len, request_id,
-					HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
-	}
-
-	if (execute_shutdown)
-	    shutdown_nice(RB_POWEROFF);
-}
-
-/**
- * Process heartbeat message
- */
-static void
-hv_heartbeat_cb(void *context)
-{
-	uint8_t*		buf;
-	hv_vmbus_channel*	channel = context;
-	uint32_t		recvlen;
-	uint64_t		requestid;
-	int			ret;
-
-	struct hv_vmbus_heartbeat_msg_data*	heartbeat_msg;
-	struct hv_vmbus_icmsg_hdr*		icmsghdrp;
-
-	buf = receive_buffer[HV_HEART_BEAT];
-
-	ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
-					    &requestid);
-
-	if ((ret == 0) && recvlen > 0) {
-
-	    icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
-		&buf[sizeof(struct hv_vmbus_pipe_hdr)];
-
-	    if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
-		hv_negotiate_version(icmsghdrp, NULL, buf);
-
-	    } else {
-		heartbeat_msg =
-		    (struct hv_vmbus_heartbeat_msg_data *)
-			&buf[sizeof(struct hv_vmbus_pipe_hdr) +
-			     sizeof(struct hv_vmbus_icmsg_hdr)];
-
-		heartbeat_msg->seq_num += 1;
-	    }
-
-	    icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
-				 HV_ICMSGHDRFLAG_RESPONSE;
-
-	    hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
-		HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
-	}
-}
-
-
-static int
-hv_util_probe(device_t dev)
-{
-	int i;
-	int rtn_value = ENXIO;
-
-	for (i = 0; i < HV_MAX_UTIL_SERVICES; i++) {
-	    const char *p = vmbus_get_type(dev);
-	    if (service_table[i].enabled && !memcmp(p, &service_table[i].guid, sizeof(hv_guid))) {
-		device_set_softc(dev, (void *) (&service_table[i]));
-		rtn_value = BUS_PROBE_DEFAULT;
-	    }
-	}
-
-	return rtn_value;
-}
-
-static int
+int
 hv_util_attach(device_t dev)
 {
-	struct hv_device*		hv_dev;
-	struct hv_vmbus_service*	service;
-	int				ret;
-	size_t				receive_buffer_offset;
+	struct hv_device*	hv_dev;
+	struct hv_util_sc*	softc;
+	int			ret;
 
 	hv_dev = vmbus_get_devctx(dev);
-	service = device_get_softc(dev);
-	receive_buffer_offset = service - &service_table[0];
-	device_printf(dev, "Hyper-V Service attaching: %s\n", service->name);
-	receive_buffer[receive_buffer_offset] =
+	softc = device_get_softc(dev);
+	softc->hv_dev = hv_dev;
+	softc->receive_buffer =
 		malloc(4 * PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
 
-	if (service->init != NULL) {
-	    ret = service->init(service);
-	    if (ret) {
-		ret = ENODEV;
-		goto error0;
-	    }
-	}
-
 	/*
 	 * These services are not performance critical and do not need
 	 * batched reading. Furthermore, some services such as KVP can
@@ -418,83 +94,30 @@ hv_util_attach(device_t dev)
 	hv_set_channel_read_state(hv_dev->channel, FALSE);
 
 	ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE,
-		    4 * PAGE_SIZE, NULL, 0,
-		    service->callback, hv_dev->channel);
+			4 * PAGE_SIZE, NULL, 0,
+			softc->callback, softc);
 
 	if (ret)
-	    goto error0;
+		goto error0;
 
 	return (0);
 
-	error0:
-
-	    free(receive_buffer[receive_buffer_offset], M_DEVBUF);
-	    receive_buffer[receive_buffer_offset] = NULL;
-
+error0:
+	free(softc->receive_buffer, M_DEVBUF);
 	return (ret);
 }
 
-static int
+int
 hv_util_detach(device_t dev)
 {
-	struct hv_device*		hv_dev;
-	struct hv_vmbus_service*	service;
-	size_t				receive_buffer_offset;
-
-	if (!destroyed_kvp) {
-		hv_kvp_deinit();
-		destroyed_kvp = TRUE;
-	}
+	struct hv_device*	hv_dev;
+	struct hv_util_sc*	softc;
 
 	hv_dev = vmbus_get_devctx(dev);
 
 	hv_vmbus_channel_close(hv_dev->channel);
-	service = device_get_softc(dev);
-	receive_buffer_offset = service - &service_table[0];
+	softc = device_get_softc(dev);
 
-	if (service->work_queue != NULL)
-	    hv_work_queue_close(service->work_queue);
-
-	free(receive_buffer[receive_buffer_offset], M_DEVBUF);
-	receive_buffer[receive_buffer_offset] = NULL;
+	free(softc->receive_buffer, M_DEVBUF);
 	return (0);
 }
-
-static void
-hv_util_init(void)
-{
-}
-
-static int
-hv_util_modevent(module_t mod, int event, void *arg)
-{
-	switch (event) {
-        case MOD_LOAD:
-                break;
-        case MOD_UNLOAD:
-		break;
-	default:
-		break;
-        }
-        return (0);
-}
-
-static device_method_t util_methods[] = {
-	/* Device interface */
-	DEVMETHOD(device_probe, hv_util_probe),
-	DEVMETHOD(device_attach, hv_util_attach),
-	DEVMETHOD(device_detach, hv_util_detach),
-	DEVMETHOD(device_shutdown, bus_generic_shutdown),
-	{ 0, 0 } }
-;
-
-static driver_t util_driver = { "hyperv-utils", util_methods, 0 };
-
-static devclass_t util_devclass;
-
-DRIVER_MODULE(hv_utils, vmbus, util_driver, util_devclass, hv_util_modevent, 0);
-MODULE_VERSION(hv_utils, 1);
-MODULE_DEPEND(hv_utils, vmbus, 1, 1, 1);
-
-SYSINIT(hv_util_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1,
-	hv_util_init, NULL);
diff --git a/sys/dev/hyperv/utilities/hv_util.h b/sys/dev/hyperv/utilities/hv_util.h
new file mode 100644
index 0000000..708dca8
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_util.h
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _HVUTIL_H_
+#define _HVUTIL_H_
+
+/**
+ * hv_util related structures
+ *
+ */
+typedef struct hv_util_sc {
+	/*
+	 * function to process Hyper-V messages
+	 */
+	void (*callback)(void *);
+
+	struct hv_device*	hv_dev;
+	uint8_t			*receive_buffer;
+} hv_util_sc;
+
+void hv_negotiate_version(
+	struct hv_vmbus_icmsg_hdr*		icmsghdrp,
+	struct hv_vmbus_icmsg_negotiate*	negop,
+	uint8_t*				buf);
+
+int hv_util_attach(device_t dev);
+int hv_util_detach(device_t dev);
+#endif
diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c
index 7037768..bb777cc 100644
--- a/sys/dev/hyperv/vmbus/hv_channel.c
+++ b/sys/dev/hyperv/vmbus/hv_channel.c
@@ -52,6 +52,7 @@ static int 	vmbus_channel_create_gpadl_header(
 			uint32_t*			message_count);
 
 static void 	vmbus_channel_set_event(hv_vmbus_channel* channel);
+static void	VmbusProcessChannelEvent(void* channel, int pending);
 
 /**
  *  @brief Trigger an event notification on the specified channel
@@ -68,9 +69,7 @@ vmbus_channel_set_event(hv_vmbus_channel *channel)
 				+ ((channel->offer_msg.child_rel_id >> 5))));
 
 		monitor_page = (hv_vmbus_monitor_page *)
-			hv_vmbus_g_connection.monitor_pages;
-
-		monitor_page++; /* Get the child to parent monitor page */
+			hv_vmbus_g_connection.monitor_page_2;
 
 		synch_set_bit(channel->monitor_bit,
 			(uint32_t *)&monitor_page->
@@ -115,6 +114,9 @@ hv_vmbus_channel_open(
 	new_channel->on_channel_callback = pfn_on_channel_callback;
 	new_channel->channel_callback_context = context;
 
+	new_channel->rxq = hv_vmbus_g_context.hv_event_queue[new_channel->target_cpu];
+	TASK_INIT(&new_channel->channel_task, 0, VmbusProcessChannelEvent, new_channel);
+
 	/* Allocate the ring buffer */
 	out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
 	    M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
@@ -518,6 +520,7 @@ static void
 hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
 {
 	int ret = 0;
+	struct taskqueue *rxq = channel->rxq;
 	hv_vmbus_channel_close_channel* msg;
 	hv_vmbus_channel_msg_info* info;
 
@@ -525,6 +528,11 @@ hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
 	channel->sc_creation_callback = NULL;
 
 	/*
+	 * set rxq to NULL to avoid more requests be scheduled
+	 */
+	channel->rxq = NULL;
+	taskqueue_drain(rxq, &channel->channel_task);
+	/*
 	 * Grab the lock to prevent race condition when a packet received
 	 * and unloading driver is in the process.
 	 */
@@ -666,11 +674,11 @@ hv_vmbus_channel_send_packet_pagebuffer(
 {
 
 	int					ret = 0;
-	int					i = 0;
 	boolean_t				need_sig;
 	uint32_t				packet_len;
+	uint32_t				page_buflen;
 	uint32_t				packetLen_aligned;
-	hv_vmbus_sg_buffer_list			buffer_list[3];
+	hv_vmbus_sg_buffer_list			buffer_list[4];
 	hv_vmbus_channel_packet_page_buffer	desc;
 	uint32_t				descSize;
 	uint64_t				alignedData = 0;
@@ -682,36 +690,33 @@ hv_vmbus_channel_send_packet_pagebuffer(
 	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
 	 *  is the largest size we support
 	 */
-	descSize = sizeof(hv_vmbus_channel_packet_page_buffer) -
-			((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
-			sizeof(hv_vmbus_page_buffer));
-	packet_len = descSize + buffer_len;
+	descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
+	page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
+	packet_len = descSize + page_buflen + buffer_len;
 	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
 
 	/* Setup the descriptor */
 	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
 	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
-	desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
+	/* in 8-bytes granularity */
+	desc.data_offset8 = (descSize + page_buflen) >> 3;
 	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
 	desc.transaction_id = request_id;
 	desc.range_count = page_count;
 
-	for (i = 0; i < page_count; i++) {
-		desc.range[i].length = page_buffers[i].length;
-		desc.range[i].offset = page_buffers[i].offset;
-		desc.range[i].pfn = page_buffers[i].pfn;
-	}
-
 	buffer_list[0].data = &desc;
 	buffer_list[0].length = descSize;
 
-	buffer_list[1].data = buffer;
-	buffer_list[1].length = buffer_len;
+	buffer_list[1].data = page_buffers;
+	buffer_list[1].length = page_buflen;
 
-	buffer_list[2].data = &alignedData;
-	buffer_list[2].length = packetLen_aligned - packet_len;
+	buffer_list[2].data = buffer;
+	buffer_list[2].length = buffer_len;
 
-	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+	buffer_list[3].data = &alignedData;
+	buffer_list[3].length = packetLen_aligned - packet_len;
+
+	ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4,
 	    &need_sig);
 
 	/* TODO: We should determine if this is optional */
@@ -880,3 +885,67 @@ hv_vmbus_channel_recv_packet_raw(
 
 	return (0);
 }
+
+
+/**
+ * Process a channel event notification
+ */
+static void
+VmbusProcessChannelEvent(void* context, int pending)
+{
+	void* arg;
+	uint32_t bytes_to_read;
+	hv_vmbus_channel* channel = (hv_vmbus_channel*)context;
+	boolean_t is_batched_reading;
+
+	/**
+	 * Find the channel based on this relid and invokes
+	 * the channel callback to process the event
+	 */
+
+	if (channel == NULL) {
+		return;
+	}
+	/**
+	 * To deal with the race condition where we might
+	 * receive a packet while the relevant driver is
+	 * being unloaded, dispatch the callback while
+	 * holding the channel lock. The unloading driver
+	 * will acquire the same channel lock to set the
+	 * callback to NULL. This closes the window.
+	 */
+
+	/*
+	 * Disable the lock due to newly added WITNESS check in r277723.
+	 * Will seek other way to avoid race condition.
+	 * -- whu
+	 */
+	// mtx_lock(&channel->inbound_lock);
+	if (channel->on_channel_callback != NULL) {
+		arg = channel->channel_callback_context;
+		is_batched_reading = channel->batched_reading;
+		/*
+		 * Optimize host to guest signaling by ensuring:
+		 * 1. While reading the channel, we disable interrupts from
+		 *    host.
+		 * 2. Ensure that we process all posted messages from the host
+		 *    before returning from this callback.
+		 * 3. Once we return, enable signaling from the host. Once this
+		 *    state is set we check to see if additional packets are
+		 *    available to read. In this case we repeat the process.
+		 */
+		do {
+			if (is_batched_reading)
+				hv_ring_buffer_read_begin(&channel->inbound);
+
+			channel->on_channel_callback(arg);
+
+			if (is_batched_reading)
+				bytes_to_read =
+				    hv_ring_buffer_read_end(&channel->inbound);
+			else
+				bytes_to_read = 0;
+		} while (is_batched_reading && (bytes_to_read != 0));
+	}
+	// mtx_unlock(&channel->inbound_lock);
+}
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
index 4ccb647..ab6e8ad 100644
--- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -39,8 +39,10 @@ __FBSDID("$FreeBSD$");
  */
 
 static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_offer_internal(void* context);
 static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_offer_rescind_internal(void* context);
 static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
 static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
@@ -52,41 +54,46 @@ static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
 hv_vmbus_channel_msg_table_entry
     g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
 	{ HV_CHANNEL_MESSAGE_INVALID,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGE_OFFER_CHANNEL,
-		0, vmbus_channel_on_offer },
+		vmbus_channel_on_offer },
 	{ HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
-		0, vmbus_channel_on_offer_rescind },
+		vmbus_channel_on_offer_rescind },
 	{ HV_CHANNEL_MESSAGE_REQUEST_OFFERS,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
-		1, vmbus_channel_on_offers_delivered },
+		vmbus_channel_on_offers_delivered },
 	{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
-		1, vmbus_channel_on_open_result },
+		vmbus_channel_on_open_result },
 	{ HV_CHANNEL_MESSAGE_CLOSE_CHANNEL,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGEL_GPADL_HEADER,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_BODY,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_CREATED,
-		1, vmbus_channel_on_gpadl_created },
+		vmbus_channel_on_gpadl_created },
 	{ HV_CHANNEL_MESSAGE_GPADL_TEARDOWN,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
-		1, vmbus_channel_on_gpadl_torndown },
+		vmbus_channel_on_gpadl_torndown },
 	{ HV_CHANNEL_MESSAGE_REL_ID_RELEASED,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGE_INITIATED_CONTACT,
-		0, NULL },
+		NULL },
 	{ HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
-		1, vmbus_channel_on_version_response },
+		vmbus_channel_on_version_response },
 	{ HV_CHANNEL_MESSAGE_UNLOAD,
-		0, NULL }
+		NULL }
 };
 
+typedef struct hv_work_item {
+	struct task	work;
+	void		(*callback)(void *);
+	void*		context;
+} hv_work_item;
 
 /**
  * Implementation of the work abstraction.
@@ -96,120 +103,30 @@ work_item_callback(void *work, int pending)
 {
 	struct hv_work_item *w = (struct hv_work_item *)work;
 
-	/*
-	 * Serialize work execution.
-	 */
-	if (w->wq->work_sema != NULL) {
-		sema_wait(w->wq->work_sema);
-	}
-
 	w->callback(w->context);
 
-	if (w->wq->work_sema != NULL) {
-		sema_post(w->wq->work_sema);
-	} 
-
 	free(w, M_DEVBUF);
 }
 
-struct hv_work_queue*
-hv_work_queue_create(char* name)
-{
-	static unsigned int	qid = 0;
-	char			qname[64];
-	int			pri;
-	struct hv_work_queue*	wq;
-
-	wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
-	KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n"));
-	if (wq == NULL)
-	    return (NULL);
-
-	/*
-	 * We use work abstraction to handle messages
-	 * coming from the host and these are typically offers.
-	 * Some FreeBsd drivers appear to have a concurrency issue
-	 * where probe/attach needs to be serialized. We ensure that
-	 * by having only one thread process work elements in a 
-	 * specific queue by serializing work execution.
-	 *
-	 */
-	if (strcmp(name, "vmbusQ") == 0) {
-	    pri = PI_DISK;
-	} else { /* control */
-	    pri = PI_NET;
-	    /*
-	     * Initialize semaphore for this queue by pointing
-	     * to the globale semaphore used for synchronizing all
-	     * control messages.
-	     */
-	    wq->work_sema = &hv_vmbus_g_connection.control_sema;
-	}
-
-	sprintf(qname, "hv_%s_%u", name, qid);
-
-	/*
-	 * Fixme:  FreeBSD 8.2 has a different prototype for
-	 * taskqueue_create(), and for certain other taskqueue functions.
-	 * We need to research the implications of these changes.
-	 * Fixme:  Not sure when the changes were introduced.
-	 */
-	wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue,
-	    &wq->queue
-	    #if __FreeBSD_version < 800000
-	    , &wq->proc
-	    #endif
-	    );
-
-	if (wq->queue == NULL) {
-	    free(wq, M_DEVBUF);
-	    return (NULL);
-	}
-
-	if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) {
-	    taskqueue_free(wq->queue);
-	    free(wq, M_DEVBUF);
-	    return (NULL);
-	}
-
-	qid++;
-
-	return (wq);
-}
-
-void
-hv_work_queue_close(struct hv_work_queue *wq)
-{
-	/*
-	 * KYS: Need to drain the taskqueue
-	 * before we close the hv_work_queue.
-	 */
-	/*KYS: taskqueue_drain(wq->tq, ); */
-	taskqueue_free(wq->queue);
-	free(wq, M_DEVBUF);
-}
-
 /**
  * @brief Create work item
  */
-int
+static int
 hv_queue_work_item(
-	struct hv_work_queue *wq,
 	void (*callback)(void *), void *context)
 {
 	struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
-					M_DEVBUF, M_NOWAIT | M_ZERO);
+					M_DEVBUF, M_NOWAIT);
 	KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
 	if (w == NULL)
 	    return (ENOMEM);
 
 	w->callback = callback;
 	w->context = context;
-	w->wq = wq;
 
 	TASK_INIT(&w->work, 0, work_item_callback, w);
 
-	return (taskqueue_enqueue(wq->queue, &w->work));
+	return (taskqueue_enqueue(taskqueue_thread, &w->work));
 }
 
 
@@ -224,10 +141,7 @@ hv_vmbus_allocate_channel(void)
 	channel = (hv_vmbus_channel*) malloc(
 					sizeof(hv_vmbus_channel),
 					M_DEVBUF,
-					M_NOWAIT | M_ZERO);
-	KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!"));
-	if (channel == NULL)
-	    return (NULL);
+					M_WAITOK | M_ZERO);
 
 	mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
 	mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
@@ -238,16 +152,6 @@ hv_vmbus_allocate_channel(void)
 }
 
 /**
- * @brief Release the vmbus channel object itself
- */
-static inline void
-ReleaseVmbusChannel(void *context)
-{
-	hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
-	free(channel, M_DEVBUF);
-}
-
-/**
  * @brief Release the resources used by the vmbus channel object
  */
 void
@@ -255,13 +159,8 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
 {
 	mtx_destroy(&channel->sc_lock);
 	mtx_destroy(&channel->inbound_lock);
-	/*
-	 * We have to release the channel's workqueue/thread in
-	 *  the vmbus's workqueue/thread context
-	 * ie we can't destroy ourselves
-	 */
-	hv_queue_work_item(hv_vmbus_g_connection.work_queue,
-	    ReleaseVmbusChannel, (void *) channel);
+
+	free(channel, M_DEVBUF);
 }
 
 /**
@@ -459,7 +358,7 @@ static void
 vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 {
 	hv_vmbus_channel_offer_channel* offer;
-	hv_vmbus_channel* new_channel;
+	hv_vmbus_channel_offer_channel* copied;
 
 	offer = (hv_vmbus_channel_offer_channel*) hdr;
 
@@ -469,10 +368,25 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 	guidType = &offer->offer.interface_type;
 	guidInstance = &offer->offer.interface_instance;
 
+	// copy offer data
+	copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT);
+	if (copied == NULL) {
+		printf("fail to allocate memory\n");
+		return;
+	}
+
+	memcpy(copied, hdr, sizeof(*copied));
+	hv_queue_work_item(vmbus_channel_on_offer_internal, copied);
+}
+
+static void
+vmbus_channel_on_offer_internal(void* context)
+{
+	hv_vmbus_channel* new_channel;
+
+	hv_vmbus_channel_offer_channel* offer = (hv_vmbus_channel_offer_channel*)context;
 	/* Allocate the channel object and save this offer */
 	new_channel = hv_vmbus_allocate_channel();
-	if (new_channel == NULL)
-	    return;
 
 	/*
 	 * By default we setup state to enable batched
@@ -512,6 +426,8 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
 	new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
 
 	vmbus_channel_process_offer(new_channel);
+
+	free(offer, M_DEVBUF);
 }
 
 /**
@@ -529,13 +445,20 @@ vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
 	rescind = (hv_vmbus_channel_rescind_offer*) hdr;
 
 	channel = hv_vmbus_g_connection.channels[rescind->child_rel_id];
-	if (channel == NULL) 
+	if (channel == NULL)
 	    return;
 
-	hv_vmbus_child_device_unregister(channel->device);
-	mtx_lock(&hv_vmbus_g_connection.channel_lock);
+	hv_queue_work_item(vmbus_channel_on_offer_rescind_internal, channel);
 	hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL;
-	mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+}
+
+static void
+vmbus_channel_on_offer_rescind_internal(void *context)
+{
+	hv_vmbus_channel*               channel;
+
+	channel = (hv_vmbus_channel*)context;
+	hv_vmbus_child_device_unregister(channel->device);
 }
 
 /**
@@ -712,35 +635,6 @@ vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
 }
 
 /**
- * @brief Handler for channel protocol messages.
- *
- * This is invoked in the vmbus worker thread context.
- */
-void
-hv_vmbus_on_channel_message(void *context)
-{
-	hv_vmbus_message*		msg;
-	hv_vmbus_channel_msg_header*	hdr;
-	int				size;
-
-	msg = (hv_vmbus_message*) context;
-	hdr = (hv_vmbus_channel_msg_header*) msg->u.payload;
-	size = msg->header.payload_size;
-
-	if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) {
-	    free(msg, M_DEVBUF);
-	    return;
-	}
-
-	if (g_channel_message_table[hdr->message_type].messageHandler) {
-	    g_channel_message_table[hdr->message_type].messageHandler(hdr);
-	}
-
-	/* Free the msg that was allocated in VmbusOnMsgDPC() */
-	free(msg, M_DEVBUF);
-}
-
-/**
  *  @brief Send a request to get all our pending offers.
  */
 int
@@ -765,8 +659,7 @@ hv_vmbus_request_channel_offers(void)
 
 	ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
 
-	if (msg_info)
-	    free(msg_info, M_DEVBUF);
+	free(msg_info, M_DEVBUF);
 
 	return (ret);
 }
diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c
index cfdc9bb..fb1879d 100644
--- a/sys/dev/hyperv/vmbus/hv_connection.c
+++ b/sys/dev/hyperv/vmbus/hv_connection.c
@@ -90,12 +90,10 @@ hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
 		hv_vmbus_g_connection.interrupt_page);
 
 	msg->monitor_page_1 = hv_get_phys_addr(
-		hv_vmbus_g_connection.monitor_pages);
+		hv_vmbus_g_connection.monitor_page_1);
 
-	msg->monitor_page_2 =
-		hv_get_phys_addr(
-			((uint8_t *) hv_vmbus_g_connection.monitor_pages
-			+ PAGE_SIZE));
+	msg->monitor_page_2 = hv_get_phys_addr(
+		hv_vmbus_g_connection.monitor_page_2);
 
 	/**
 	 * Add to list before we send the request since we may receive the
@@ -168,8 +166,6 @@ hv_vmbus_connect(void) {
 	 * Initialize the vmbus connection
 	 */
 	hv_vmbus_g_connection.connect_state = HV_CONNECTING;
-	hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ");
-	sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema");
 
 	TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
 	mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
@@ -183,18 +179,9 @@ hv_vmbus_connect(void) {
 	 * Setup the vmbus event connection for channel interrupt abstraction
 	 * stuff
 	 */
-	hv_vmbus_g_connection.interrupt_page = contigmalloc(
+	hv_vmbus_g_connection.interrupt_page = malloc(
 					PAGE_SIZE, M_DEVBUF,
-					M_NOWAIT | M_ZERO, 0UL,
-					BUS_SPACE_MAXADDR,
-					PAGE_SIZE, 0);
-	KASSERT(hv_vmbus_g_connection.interrupt_page != NULL,
-	    ("Error VMBUS: malloc failed to allocate Channel"
-		" Request Event message!"));
-	if (hv_vmbus_g_connection.interrupt_page == NULL) {
-	    ret = ENOMEM;
-	    goto cleanup;
-	}
+					M_WAITOK | M_ZERO);
 
 	hv_vmbus_g_connection.recv_interrupt_page =
 		hv_vmbus_g_connection.interrupt_page;
@@ -207,31 +194,19 @@ hv_vmbus_connect(void) {
 	 * Set up the monitor notification facility. The 1st page for
 	 * parent->child and the 2nd page for child->parent
 	 */
-	hv_vmbus_g_connection.monitor_pages = contigmalloc(
-		2 * PAGE_SIZE,
+	hv_vmbus_g_connection.monitor_page_1 = malloc(
+		PAGE_SIZE,
 		M_DEVBUF,
-		M_NOWAIT | M_ZERO,
-		0UL,
-		BUS_SPACE_MAXADDR,
+		M_WAITOK | M_ZERO);
+	hv_vmbus_g_connection.monitor_page_2 = malloc(
 		PAGE_SIZE,
-		0);
-	KASSERT(hv_vmbus_g_connection.monitor_pages != NULL,
-	    ("Error VMBUS: malloc failed to allocate Monitor Pages!"));
-	if (hv_vmbus_g_connection.monitor_pages == NULL) {
-	    ret = ENOMEM;
-	    goto cleanup;
-	}
+		M_DEVBUF,
+		M_WAITOK | M_ZERO);
 
 	msg_info = (hv_vmbus_channel_msg_info*)
 		malloc(sizeof(hv_vmbus_channel_msg_info) +
 			sizeof(hv_vmbus_channel_initiate_contact),
-			M_DEVBUF, M_NOWAIT | M_ZERO);
-	KASSERT(msg_info != NULL,
-	    ("Error VMBUS: malloc failed for Initiate Contact message!"));
-	if (msg_info == NULL) {
-	    ret = ENOMEM;
-	    goto cleanup;
-	}
+			M_DEVBUF, M_WAITOK | M_ZERO);
 
 	hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
 		HV_CHANNEL_MAX_COUNT,
@@ -273,26 +248,16 @@ hv_vmbus_connect(void) {
 
 	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
 
-	hv_work_queue_close(hv_vmbus_g_connection.work_queue);
-	sema_destroy(&hv_vmbus_g_connection.control_sema);
 	mtx_destroy(&hv_vmbus_g_connection.channel_lock);
 	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
 
 	if (hv_vmbus_g_connection.interrupt_page != NULL) {
-		contigfree(
-			hv_vmbus_g_connection.interrupt_page,
-			PAGE_SIZE,
-			M_DEVBUF);
+		free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
 		hv_vmbus_g_connection.interrupt_page = NULL;
 	}
 
-	if (hv_vmbus_g_connection.monitor_pages != NULL) {
-		contigfree(
-			hv_vmbus_g_connection.monitor_pages,
-			2 * PAGE_SIZE,
-			M_DEVBUF);
-		hv_vmbus_g_connection.monitor_pages = NULL;
-	}
+	free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
+	free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
 
 	if (msg_info) {
 		sema_destroy(&msg_info->wait_sema);
@@ -309,108 +274,29 @@ hv_vmbus_connect(void) {
 int
 hv_vmbus_disconnect(void) {
 	int			 ret = 0;
-	hv_vmbus_channel_unload* msg;
-
-	msg = malloc(sizeof(hv_vmbus_channel_unload),
-	    M_DEVBUF, M_NOWAIT | M_ZERO);
-	KASSERT(msg != NULL,
-	    ("Error VMBUS: malloc failed to allocate Channel Unload Msg!"));
-	if (msg == NULL)
-	    return (ENOMEM);
-
-	msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD;
+	hv_vmbus_channel_unload  msg;
 
-	ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload));
+	msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
 
+	ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
 
-	contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF);
+	free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
 
 	mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
 
-	hv_work_queue_close(hv_vmbus_g_connection.work_queue);
-	sema_destroy(&hv_vmbus_g_connection.control_sema);
-
 	free(hv_vmbus_g_connection.channels, M_DEVBUF);
 	hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
 
-	free(msg, M_DEVBUF);
-
 	return (ret);
 }
 
 /**
- * Process a channel event notification
- */
-static void
-VmbusProcessChannelEvent(uint32_t relid) 
-{
-	void* arg;
-	uint32_t bytes_to_read;
-	hv_vmbus_channel* channel;
-	boolean_t is_batched_reading;
-
-	/**
-	 * Find the channel based on this relid and invokes
-	 * the channel callback to process the event
-	 */
-
-	channel = hv_vmbus_g_connection.channels[relid];
-
-	if (channel == NULL) {
-		return;
-	}
-	/**
-	 * To deal with the race condition where we might
-	 * receive a packet while the relevant driver is 
-	 * being unloaded, dispatch the callback while 
-	 * holding the channel lock. The unloading driver
-	 * will acquire the same channel lock to set the
-	 * callback to NULL. This closes the window.
-	 */
-
-	/*
-	 * Disable the lock due to newly added WITNESS check in r277723.
-	 * Will seek other way to avoid race condition.
-	 * -- whu
-	 */
-	// mtx_lock(&channel->inbound_lock);
-	if (channel->on_channel_callback != NULL) {
-		arg = channel->channel_callback_context;
-		is_batched_reading = channel->batched_reading;
-		/*
-		 * Optimize host to guest signaling by ensuring:
-		 * 1. While reading the channel, we disable interrupts from
-		 *    host.
-		 * 2. Ensure that we process all posted messages from the host
-		 *    before returning from this callback.
-		 * 3. Once we return, enable signaling from the host. Once this
-		 *    state is set we check to see if additional packets are
-		 *    available to read. In this case we repeat the process.
-		 */
-		do {
-			if (is_batched_reading)
-				hv_ring_buffer_read_begin(&channel->inbound);
-
-			channel->on_channel_callback(arg);
-
-			if (is_batched_reading)
-				bytes_to_read =
-				    hv_ring_buffer_read_end(&channel->inbound);
-			else
-				bytes_to_read = 0;
-		} while (is_batched_reading && (bytes_to_read != 0));
-	}
-	// mtx_unlock(&channel->inbound_lock);
-}
-
-/**
  * Handler for events
  */
 void
-hv_vmbus_on_events(void *arg) 
+hv_vmbus_on_events(int cpu)
 {
 	int bit;
-	int cpu;
 	int dword;
 	void *page_addr;
 	uint32_t* recv_interrupt_page = NULL;
@@ -419,7 +305,6 @@ hv_vmbus_on_events(void *arg)
 	hv_vmbus_synic_event_flags *event;
 	/* int maxdword = PAGE_SIZE >> 3; */
 
-	cpu = (int)(long)arg;
 	KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
 	    "cpu out of range!"));
 
@@ -461,8 +346,14 @@ hv_vmbus_on_events(void *arg)
 				 */
 				continue;
 			    } else {
-				VmbusProcessChannelEvent(rel_id);
-
+				hv_vmbus_channel * channel = hv_vmbus_g_connection.channels[rel_id];
+				/* if channel is closed or closing */
+				if (channel == NULL || channel->rxq == NULL)
+					continue;
+
+				if (channel->batched_reading)
+					hv_ring_buffer_read_begin(&channel->inbound);
+				taskqueue_enqueue_fast(channel->rxq, &channel->channel_task);
 			    }
 			}
 		    }
diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c
index ca5641f..6afc2b8 100644
--- a/sys/dev/hyperv/vmbus/hv_hv.c
+++ b/sys/dev/hyperv/vmbus/hv_hv.c
@@ -189,11 +189,7 @@ hv_vmbus_init(void)
 	 * See if the hypercall page is already set
 	 */
 	hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
-	virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
-	KASSERT(virt_addr != NULL,
-	    ("Error VMBUS: malloc failed to allocate page during init!"));
-	if (virt_addr == NULL)
-	    goto cleanup;
+	virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
 
 	hypercall_msr.u.enable = 1;
 	hypercall_msr.u.guest_physical_address =
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
index 66a3f39..c8d6894 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
@@ -83,8 +83,6 @@ vmbus_msg_swintr(void *arg)
 	hv_vmbus_channel_msg_table_entry *entry;
 	hv_vmbus_channel_msg_type msg_type;
 	hv_vmbus_message*	msg;
-	hv_vmbus_message*	copied;
-	static bool warned	= false;
 
 	cpu = (int)(long)arg;
 	KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
@@ -100,31 +98,15 @@ vmbus_msg_swintr(void *arg)
 		hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
 		msg_type = hdr->message_type;
 
-		if (msg_type >= HV_CHANNEL_MESSAGE_COUNT && !warned) {
-			warned = true;
+		if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
 			printf("VMBUS: unknown message type = %d\n", msg_type);
 			goto handled;
 		}
 
 		entry = &g_channel_message_table[msg_type];
 
-		if (entry->handler_no_sleep)
+		if (entry->messageHandler)
 			entry->messageHandler(hdr);
-		else {
-
-			copied = malloc(sizeof(hv_vmbus_message),
-					M_DEVBUF, M_NOWAIT);
-			KASSERT(copied != NULL,
-				("Error VMBUS: malloc failed to allocate"
-					" hv_vmbus_message!"));
-			if (copied == NULL)
-				continue;
-
-			memcpy(copied, msg, sizeof(hv_vmbus_message));
-			hv_queue_work_item(hv_vmbus_g_connection.work_queue,
-					   hv_vmbus_on_channel_message,
-					   copied);
-		}
 handled:
 	    msg->header.message_type = HV_MESSAGE_TYPE_NONE;
 
@@ -177,7 +159,7 @@ hv_vmbus_isr(struct trapframe *frame)
 	    (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
 		/* Since we are a child, we only need to check bit 0 */
 		if (synch_test_and_clear_bit(0, &event->flags32[0])) {
-			swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
+			hv_vmbus_on_events(cpu);
 		}
 	} else {
 		/*
@@ -187,7 +169,7 @@ hv_vmbus_isr(struct trapframe *frame)
 		 * Directly schedule the event software interrupt on
 		 * current cpu.
 		 */
-		swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
+		hv_vmbus_on_events(cpu);
 	}
 
 	/* Check if there are actual msgs to be process */
@@ -225,7 +207,6 @@ hv_vmbus_isr(struct trapframe *frame)
 	return (FILTER_HANDLED);
 }
 
-uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
 u_long *hv_vmbus_intr_cpu[MAXCPU];
 
 void
@@ -310,12 +291,7 @@ hv_vmbus_child_device_create(
 	 * Allocate the new child device
 	 */
 	child_dev = malloc(sizeof(hv_device), M_DEVBUF,
-			M_NOWAIT |  M_ZERO);
-	KASSERT(child_dev != NULL,
-	    ("Error VMBUS: malloc failed to allocate hv_device!"));
-
-	if (child_dev == NULL)
-		return (NULL);
+			M_WAITOK |  M_ZERO);
 
 	child_dev->channel = channel;
 	memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
@@ -455,6 +431,19 @@ vmbus_vector_free(int vector)
 
 #endif /* HYPERV */
 
+static void
+vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
+{
+	cpuset_t *mask = xmask;
+	int error;
+
+	error = cpuset_setthread(curthread->td_tid, mask);
+	if (error) {
+		panic("curthread=%ju: can't pin; error=%d",
+		    (uintmax_t)curthread->td_tid, error);
+	}
+}
+
 /**
  * @brief Main vmbus driver initialization routine.
  *
@@ -472,6 +461,7 @@ vmbus_bus_init(void)
 {
 	int i, j, n, ret;
 	char buf[MAXCOMLEN + 1];
+	cpuset_t cpu_mask;
 
 	if (vmbus_inited)
 		return (0);
@@ -508,10 +498,7 @@ vmbus_bus_init(void)
 	setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
 
 	CPU_FOREACH(j) {
-		hv_vmbus_swintr_event_cpu[j] = 0;
-		hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
 		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
-		hv_vmbus_g_context.event_swintr[j] = NULL;
 		hv_vmbus_g_context.msg_swintr[j] = NULL;
 
 		snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
@@ -525,6 +512,26 @@ vmbus_bus_init(void)
 	 * Per cpu setup.
 	 */
 	CPU_FOREACH(j) {
+		struct task cpuset_task;
+
+		/*
+		 * Setup taskqueue to handle events
+		 */
+		hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK,
+			taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]);
+		if (hv_vmbus_g_context.hv_event_queue[j] == NULL) {
+			if (bootverbose)
+				printf("VMBUS: failed to setup taskqueue\n");
+			goto cleanup1;
+		}
+		taskqueue_start_threads(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET,
+			"hvevent%d", j);
+
+		CPU_SETOF(j, &cpu_mask);
+		TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, &cpu_mask);
+		taskqueue_enqueue(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task);
+		taskqueue_drain(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task);
+
 		/*
 		 * Setup software interrupt thread and handler for msg handling.
 		 */
@@ -543,7 +550,7 @@ vmbus_bus_init(void)
 		 */
 		ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
 		    j);
-	 	if (ret) {
+		if (ret) {
 			if(bootverbose)
 				printf("VMBUS: failed to bind msg swi thread "
 				    "to cpu %d\n", j);
@@ -551,30 +558,11 @@ vmbus_bus_init(void)
 		}
 
 		/*
-		 * Setup software interrupt thread and handler for
-		 * event handling.
-		 */
-		ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
-		    "hv_event", hv_vmbus_on_events, (void *)(long)j,
-		    SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
-		if (ret) {
-			if(bootverbose)
-				printf("VMBUS: failed to setup event swi for "
-				    "cpu %d\n", j);
-			goto cleanup1;
-		}
-
-		/*
 		 * Prepare the per cpu msg and event pages to be called on each cpu.
 		 */
 		for(i = 0; i < 2; i++) {
 			setup_args.page_buffers[2 * j + i] =
-				malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
-			if (setup_args.page_buffers[2 * j + i] == NULL) {
-				KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
-					("Error VMBUS: malloc failed!"));
-				goto cleanup1;
-			}
+				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
 		}
 	}
 
@@ -607,12 +595,11 @@ vmbus_bus_init(void)
 	 * remove swi and vmbus callback vector;
 	 */
 	CPU_FOREACH(j) {
+		if (hv_vmbus_g_context.hv_event_queue[j] != NULL)
+			taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]);
 		if (hv_vmbus_g_context.msg_swintr[j] != NULL)
 			swi_remove(hv_vmbus_g_context.msg_swintr[j]);
-		if (hv_vmbus_g_context.event_swintr[j] != NULL)
-			swi_remove(hv_vmbus_g_context.event_swintr[j]);
 		hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;	
-		hv_vmbus_g_context.hv_event_intr_event[j] = NULL;	
 	}
 
 	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
@@ -677,12 +664,11 @@ vmbus_bus_exit(void)
 
 	/* remove swi */
 	CPU_FOREACH(i) {
+		if (hv_vmbus_g_context.hv_event_queue[i] != NULL)
+			taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
 		if (hv_vmbus_g_context.msg_swintr[i] != NULL)
 			swi_remove(hv_vmbus_g_context.msg_swintr[i]);
-		if (hv_vmbus_g_context.event_swintr[i] != NULL)
-			swi_remove(hv_vmbus_g_context.event_swintr[i]);
 		hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;	
-		hv_vmbus_g_context.hv_event_intr_event[i] = NULL;	
 	}
 
 	vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
index 13a35c4..5f62072 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
@@ -202,9 +202,8 @@ typedef struct {
 	 * Each cpu has its own software interrupt handler for channel
 	 * event and msg handling.
 	 */
-	struct intr_event		*hv_event_intr_event[MAXCPU];
+	struct taskqueue		*hv_event_queue[MAXCPU];
 	struct intr_event		*hv_msg_intr_event[MAXCPU];
-	void				*event_swintr[MAXCPU];
 	void				*msg_swintr[MAXCPU];
 	/*
 	 * Host use this vector to intrrupt guest for vmbus channel
@@ -351,7 +350,8 @@ typedef struct {
 	 * notification and 2nd is child->parent
 	 * notification
 	 */
-	void					*monitor_pages;
+	void					*monitor_page_1;
+	void					*monitor_page_2;
 	TAILQ_HEAD(, hv_vmbus_channel_msg_info)	channel_msg_anchor;
 	struct mtx				channel_msg_lock;
 	/**
@@ -363,10 +363,8 @@ typedef struct {
 
 	/**
 	 * channel table for fast lookup through id.
-	 */
+	*/
 	hv_vmbus_channel                        **channels;
-	hv_vmbus_handle				work_queue;
-	struct sema				control_sema;
 } hv_vmbus_connection;
 
 typedef union {
@@ -633,7 +631,6 @@ typedef void (*vmbus_msg_handler)(hv_vmbus_channel_msg_header *msg);
 typedef struct hv_vmbus_channel_msg_table_entry {
 	hv_vmbus_channel_msg_type    messageType;
 
-	bool   handler_no_sleep; /* true: the handler doesn't sleep */
 	vmbus_msg_handler   messageHandler;
 } hv_vmbus_channel_msg_table_entry;
 
@@ -683,7 +680,6 @@ uint32_t		hv_ring_buffer_read_end(
 
 hv_vmbus_channel*	hv_vmbus_allocate_channel(void);
 void			hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
-void			hv_vmbus_on_channel_message(void *context);
 int			hv_vmbus_request_channel_offers(void);
 void			hv_vmbus_release_unattached_channels(void);
 int			hv_vmbus_init(void);
@@ -717,7 +713,7 @@ int			hv_vmbus_connect(void);
 int			hv_vmbus_disconnect(void);
 int			hv_vmbus_post_message(void *buffer, size_t buf_size);
 int			hv_vmbus_set_event(hv_vmbus_channel *channel);
-void			hv_vmbus_on_events(void *);
+void			hv_vmbus_on_events(int cpu);
 
 /**
  * Event Timer interfaces
author	Renato Botelho <renato@netgate.com>	2016-06-14 14:37:21 -0500
committer	Renato Botelho <renato@netgate.com>	2016-06-14 14:37:21 -0500
commit	b8632c4f34175c7018be77059ab229e755eb67e0 (patch)
tree	712b8119449ce1d7585aef984d17257bea58bf58 /sys/dev/hyperv
parent	47dfb8d658406ebf07225c0104ebe4be06ae405f (diff)
parent	494811e2fb5cf62d613082ffb6e26922a0b5b2e6 (diff)
download	FreeBSD-src-b8632c4f34175c7018be77059ab229e755eb67e0.zip FreeBSD-src-b8632c4f34175c7018be77059ab229e755eb67e0.tar.gz