summaryrefslogtreecommitdiffstats
path: root/sys/dev/hyperv
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2016-06-14 14:37:21 -0500
committerRenato Botelho <renato@netgate.com>2016-06-14 14:37:21 -0500
commitb8632c4f34175c7018be77059ab229e755eb67e0 (patch)
tree712b8119449ce1d7585aef984d17257bea58bf58 /sys/dev/hyperv
parent47dfb8d658406ebf07225c0104ebe4be06ae405f (diff)
parent494811e2fb5cf62d613082ffb6e26922a0b5b2e6 (diff)
downloadFreeBSD-src-b8632c4f34175c7018be77059ab229e755eb67e0.zip
FreeBSD-src-b8632c4f34175c7018be77059ab229e755eb67e0.tar.gz
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys/dev/hyperv')
-rw-r--r--sys/dev/hyperv/include/hyperv.h56
-rw-r--r--sys/dev/hyperv/netvsc/hv_net_vsc.c48
-rw-r--r--sys/dev/hyperv/netvsc/hv_net_vsc.h99
-rw-r--r--sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c1826
-rw-r--r--sys/dev/hyperv/netvsc/hv_rndis_filter.c24
-rw-r--r--sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c17
-rw-r--r--sys/dev/hyperv/utilities/hv_heartbeat.c129
-rw-r--r--sys/dev/hyperv/utilities/hv_kvp.c556
-rw-r--r--sys/dev/hyperv/utilities/hv_kvp.h13
-rw-r--r--sys/dev/hyperv/utilities/hv_shutdown.c151
-rw-r--r--sys/dev/hyperv/utilities/hv_timesync.c216
-rw-r--r--sys/dev/hyperv/utilities/hv_util.c415
-rw-r--r--sys/dev/hyperv/utilities/hv_util.h55
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel.c111
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel_mgmt.c231
-rw-r--r--sys/dev/hyperv/vmbus/hv_connection.c165
-rw-r--r--sys/dev/hyperv/vmbus/hv_hv.c6
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c104
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_priv.h14
19 files changed, 2400 insertions, 1836 deletions
diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h
index 1a45b7b..f45543b 100644
--- a/sys/dev/hyperv/include/hyperv.h
+++ b/sys/dev/hyperv/include/hyperv.h
@@ -755,6 +755,8 @@ typedef struct hv_vmbus_channel {
struct mtx inbound_lock;
+ struct taskqueue * rxq;
+ struct task channel_task;
hv_vmbus_pfn_channel_callback on_channel_callback;
void* channel_callback_context;
@@ -906,30 +908,6 @@ int hv_vmbus_channel_teardown_gpdal(
struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
-/*
- * Work abstraction defines
- */
-typedef struct hv_work_queue {
- struct taskqueue* queue;
- struct proc* proc;
- struct sema* work_sema;
-} hv_work_queue;
-
-typedef struct hv_work_item {
- struct task work;
- void (*callback)(void *);
- void* context;
- hv_work_queue* wq;
-} hv_work_item;
-
-struct hv_work_queue* hv_work_queue_create(char* name);
-
-void hv_work_queue_close(struct hv_work_queue* wq);
-
-int hv_queue_work_item(
- hv_work_queue* wq,
- void (*callback)(void *),
- void* context);
/**
* @brief Get physical address from virtual
*/
@@ -941,35 +919,5 @@ hv_get_phys_addr(void *virt)
return (ret);
}
-
-/**
- * KVP related structures
- *
- */
-typedef struct hv_vmbus_service {
- hv_guid guid; /* Hyper-V GUID */
- char *name; /* name of service */
- boolean_t enabled; /* service enabled */
- hv_work_queue *work_queue; /* background work queue */
-
- /*
- * function to initialize service
- */
- int (*init)(struct hv_vmbus_service *);
-
- /*
- * function to process Hyper-V messages
- */
- void (*callback)(void *);
-} hv_vmbus_service;
-
-extern uint8_t* receive_buffer[];
-extern hv_vmbus_service service_table[];
extern uint32_t hv_vmbus_protocal_version;
-
-void hv_kvp_callback(void *context);
-int hv_kvp_init(hv_vmbus_service *serv);
-void hv_kvp_deinit(void);
-
#endif /* __HYPERV_H__ */
-
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c
index 64e7578..9a89b62 100644
--- a/sys/dev/hyperv/netvsc/hv_net_vsc.c
+++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c
@@ -73,10 +73,7 @@ hv_nv_alloc_net_device(struct hv_device *device)
netvsc_dev *net_dev;
hn_softc_t *sc = device_get_softc(device->device);
- net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_NOWAIT | M_ZERO);
- if (net_dev == NULL) {
- return (NULL);
- }
+ net_dev = malloc(sizeof(netvsc_dev), M_NETVSC, M_WAITOK | M_ZERO);
net_dev->dev = device;
net_dev->destroy = FALSE;
@@ -135,15 +132,15 @@ hv_nv_get_next_send_section(netvsc_dev *net_dev)
int i;
for (i = 0; i < bitsmap_words; i++) {
- idx = ffs(~bitsmap[i]);
+ idx = ffsl(~bitsmap[i]);
if (0 == idx)
continue;
idx--;
- if (i * BITS_PER_LONG + idx >= net_dev->send_section_count)
- return (ret);
+ KASSERT(i * BITS_PER_LONG + idx < net_dev->send_section_count,
+ ("invalid i %d and idx %lu", i, idx));
- if (synch_test_and_set_bit(idx, &bitsmap[i]))
+ if (atomic_testandset_long(&bitsmap[i], idx))
continue;
ret = i * BITS_PER_LONG + idx;
@@ -223,11 +220,7 @@ hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device)
init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections;
net_dev->rx_sections = malloc(net_dev->rx_section_count *
- sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_NOWAIT);
- if (net_dev->rx_sections == NULL) {
- ret = EINVAL;
- goto cleanup;
- }
+ sizeof(nvsp_1_rx_buf_section), M_NETVSC, M_WAITOK);
memcpy(net_dev->rx_sections,
init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections,
net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section));
@@ -325,11 +318,7 @@ hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device)
BITS_PER_LONG);
net_dev->send_section_bitsmap =
malloc(net_dev->bitsmap_words * sizeof(long), M_NETVSC,
- M_NOWAIT | M_ZERO);
- if (NULL == net_dev->send_section_bitsmap) {
- ret = ENOMEM;
- goto cleanup;
- }
+ M_WAITOK | M_ZERO);
goto exit;
@@ -788,8 +777,27 @@ hv_nv_on_send_completion(netvsc_dev *net_dev,
if (NULL != net_vsc_pkt) {
if (net_vsc_pkt->send_buf_section_idx !=
NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
- synch_change_bit(net_vsc_pkt->send_buf_section_idx,
- net_dev->send_section_bitsmap);
+ u_long mask;
+ int idx;
+
+ idx = net_vsc_pkt->send_buf_section_idx /
+ BITS_PER_LONG;
+ KASSERT(idx < net_dev->bitsmap_words,
+ ("invalid section index %u",
+ net_vsc_pkt->send_buf_section_idx));
+ mask = 1UL <<
+ (net_vsc_pkt->send_buf_section_idx %
+ BITS_PER_LONG);
+
+ KASSERT(net_dev->send_section_bitsmap[idx] &
+ mask,
+ ("index bitmap 0x%lx, section index %u, "
+ "bitmap idx %d, bitmask 0x%lx",
+ net_dev->send_section_bitsmap[idx],
+ net_vsc_pkt->send_buf_section_idx,
+ idx, mask));
+ atomic_clear_long(
+ &net_dev->send_section_bitsmap[idx], mask);
}
/* Notify the layer above us */
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h
index e684cc5..95dee17 100644
--- a/sys/dev/hyperv/netvsc/hv_net_vsc.h
+++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h
@@ -39,9 +39,11 @@
#define __HV_NET_VSC_H__
#include <sys/param.h>
+#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/queue.h>
+#include <sys/taskqueue.h>
#include <sys/sx.h>
#include <machine/bus.h>
@@ -56,6 +58,8 @@
#include <dev/hyperv/include/hyperv.h>
+#define HN_USE_TXDESC_BUFRING
+
MALLOC_DECLARE(M_NETVSC);
#define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF)
@@ -988,8 +992,67 @@ typedef struct {
hv_bool_uint8_t link_state;
} netvsc_device_info;
+#ifndef HN_USE_TXDESC_BUFRING
struct hn_txdesc;
SLIST_HEAD(hn_txdesc_list, hn_txdesc);
+#else
+struct buf_ring;
+#endif
+
+struct hn_rx_ring {
+ struct lro_ctrl hn_lro;
+
+ /* Trust csum verification on host side */
+ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */
+
+ u_long hn_csum_ip;
+ u_long hn_csum_tcp;
+ u_long hn_csum_udp;
+ u_long hn_csum_trusted;
+ u_long hn_lro_tried;
+ u_long hn_small_pkts;
+} __aligned(CACHE_LINE_SIZE);
+
+#define HN_TRUST_HCSUM_IP 0x0001
+#define HN_TRUST_HCSUM_TCP 0x0002
+#define HN_TRUST_HCSUM_UDP 0x0004
+
+struct hn_tx_ring {
+#ifndef HN_USE_TXDESC_BUFRING
+ struct mtx hn_txlist_spin;
+ struct hn_txdesc_list hn_txlist;
+#else
+ struct buf_ring *hn_txdesc_br;
+#endif
+ int hn_txdesc_cnt;
+ int hn_txdesc_avail;
+ int hn_has_txeof;
+
+ int hn_sched_tx;
+ void (*hn_txeof)(struct hn_tx_ring *);
+ struct taskqueue *hn_tx_taskq;
+ struct task hn_tx_task;
+ struct task hn_txeof_task;
+
+ struct mtx hn_tx_lock;
+ struct hn_softc *hn_sc;
+
+ int hn_direct_tx_size;
+ int hn_tx_chimney_size;
+ bus_dma_tag_t hn_tx_data_dtag;
+ uint64_t hn_csum_assist;
+
+ u_long hn_no_txdescs;
+ u_long hn_send_failed;
+ u_long hn_txdma_failed;
+ u_long hn_tx_collapsed;
+ u_long hn_tx_chimney;
+
+ /* Rarely used stuffs */
+ struct hn_txdesc *hn_txdesc;
+ bus_dma_tag_t hn_tx_rndis_dtag;
+ struct sysctl_oid *hn_tx_sysctl_tree;
+} __aligned(CACHE_LINE_SIZE);
/*
* Device-specific softc structure
@@ -1009,44 +1072,22 @@ typedef struct hn_softc {
struct hv_device *hn_dev_obj;
netvsc_dev *net_dev;
- int hn_txdesc_cnt;
- struct hn_txdesc *hn_txdesc;
- bus_dma_tag_t hn_tx_data_dtag;
- bus_dma_tag_t hn_tx_rndis_dtag;
- int hn_tx_chimney_size;
- int hn_tx_chimney_max;
+ int hn_rx_ring_cnt;
+ struct hn_rx_ring *hn_rx_ring;
- struct mtx hn_txlist_spin;
- struct hn_txdesc_list hn_txlist;
- int hn_txdesc_avail;
- int hn_txeof;
-
- struct lro_ctrl hn_lro;
- int hn_lro_hiwat;
-
- /* Trust tcp segments verification on host side */
- int hn_trust_hosttcp;
-
- u_long hn_csum_ip;
- u_long hn_csum_tcp;
- u_long hn_csum_trusted;
- u_long hn_lro_tried;
- u_long hn_small_pkts;
- u_long hn_no_txdescs;
- u_long hn_send_failed;
- u_long hn_txdma_failed;
- u_long hn_tx_collapsed;
- u_long hn_tx_chimney;
+ int hn_tx_ring_cnt;
+ struct hn_tx_ring *hn_tx_ring;
+ int hn_tx_chimney_max;
+ struct taskqueue *hn_tx_taskq;
+ struct sysctl_oid *hn_tx_sysctl_tree;
} hn_softc_t;
-
/*
* Externs
*/
extern int hv_promisc_mode;
void netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status);
-void netvsc_xmit_completion(void *context);
void hv_nv_on_receive_completion(struct hv_device *device,
uint64_t tid, uint32_t status);
netvsc_dev *hv_nv_on_device_add(struct hv_device *device,
diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
index b3360ea..0f4425e 100644
--- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
@@ -66,10 +66,12 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/kernel.h>
#include <sys/socket.h>
+#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
+#include <sys/buf_ring.h>
#include <net/if.h>
#include <net/if_arp.h>
@@ -132,6 +134,8 @@ __FBSDID("$FreeBSD$");
/* YYY should get it from the underlying channel */
#define HN_TX_DESC_CNT 512
+#define HN_LROENT_CNT_DEF 128
+
#define HN_RNDIS_MSG_LEN \
(sizeof(rndis_msg) + \
RNDIS_VLAN_PPI_SIZE + \
@@ -146,10 +150,14 @@ __FBSDID("$FreeBSD$");
#define HN_TX_DATA_SEGCNT_MAX \
(NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS)
+#define HN_DIRECT_TX_SIZE_DEF 128
+
struct hn_txdesc {
+#ifndef HN_USE_TXDESC_BUFRING
SLIST_ENTRY(hn_txdesc) link;
+#endif
struct mbuf *m;
- struct hn_softc *sc;
+ struct hn_tx_ring *txr;
int refs;
uint32_t flags; /* HN_TXD_FLAG_ */
netvsc_packet netvsc_pkt; /* XXX to be removed */
@@ -165,23 +173,18 @@ struct hn_txdesc {
#define HN_TXD_FLAG_DMAMAP 0x2
/*
- * A unified flag for all outbound check sum flags is useful,
- * and it helps avoiding unnecessary check sum calculation in
- * network forwarding scenario.
+ * Only enable UDP checksum offloading when it is on 2012R2 or
+ * later. UDP checksum offloading doesn't work on earlier
+ * Windows releases.
*/
-#define HV_CSUM_FOR_OUTBOUND \
- (CSUM_IP|CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP|CSUM_IP_TSO| \
- CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP| \
- CSUM_IP6_TSO|CSUM_IP6_ISCSI)
-
-/* XXX move to netinet/tcp_lro.h */
-#define HN_LRO_HIWAT_MAX 65535
-#define HN_LRO_HIWAT_DEF HN_LRO_HIWAT_MAX
+#define HN_CSUM_ASSIST_WIN8 (CSUM_IP | CSUM_TCP)
+#define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP)
+
+#define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
/* YYY 2*MTU is a bit rough, but should be good enough. */
-#define HN_LRO_HIWAT_MTULIM(ifp) (2 * (ifp)->if_mtu)
-#define HN_LRO_HIWAT_ISVALID(sc, hiwat) \
- ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) || \
- (hiwat) <= HN_LRO_HIWAT_MAX)
+#define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
+
+#define HN_LRO_ACKCNT_DEF 1
/*
* Be aware that this sleepable mutex will exhibit WITNESS errors when
@@ -205,19 +208,71 @@ struct hn_txdesc {
int hv_promisc_mode = 0; /* normal mode by default */
+SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD, NULL, "Hyper-V network interface");
+
/* Trust tcp segements verification on host side. */
-static int hn_trust_hosttcp = 0;
-TUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp);
+static int hn_trust_hosttcp = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
+ &hn_trust_hosttcp, 0,
+ "Trust tcp segement verification on host side, "
+ "when csum info is missing (global setting)");
+
+/* Trust udp datagrams verification on host side. */
+static int hn_trust_hostudp = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
+ &hn_trust_hostudp, 0,
+ "Trust udp datagram verification on host side, "
+ "when csum info is missing (global setting)");
+
+/* Trust ip packets verification on host side. */
+static int hn_trust_hostip = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
+ &hn_trust_hostip, 0,
+ "Trust ip packet verification on host side, "
+ "when csum info is missing (global setting)");
#if __FreeBSD_version >= 1100045
/* Limit TSO burst size */
static int hn_tso_maxlen = 0;
-TUNABLE_INT("dev.hn.tso_maxlen", &hn_tso_maxlen);
+SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
+ &hn_tso_maxlen, 0, "TSO burst limit");
#endif
/* Limit chimney send size */
static int hn_tx_chimney_size = 0;
-TUNABLE_INT("dev.hn.tx_chimney_size", &hn_tx_chimney_size);
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
+ &hn_tx_chimney_size, 0, "Chimney send packet size limit");
+
+/* Limit the size of packet for direct transmission */
+static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
+SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
+ &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
+
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
+SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
+ &hn_lro_entry_count, 0, "LRO entry count");
+#endif
+#endif
+
+static int hn_share_tx_taskq = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
+ &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
+
+static struct taskqueue *hn_tx_taskq;
+
+#ifndef HN_USE_TXDESC_BUFRING
+static int hn_use_txdesc_bufring = 0;
+#else
+static int hn_use_txdesc_bufring = 1;
+#endif
+SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
+ &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
+
+static int hn_bind_tx_taskq = -1;
+SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
+ &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
/*
* Forward declarations
@@ -226,82 +281,37 @@ static void hn_stop(hn_softc_t *sc);
static void hn_ifinit_locked(hn_softc_t *sc);
static void hn_ifinit(void *xsc);
static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
-static void hn_start_locked(struct ifnet *ifp);
+static int hn_start_locked(struct hn_tx_ring *txr, int len);
static void hn_start(struct ifnet *ifp);
+static void hn_start_txeof(struct hn_tx_ring *);
static int hn_ifmedia_upd(struct ifnet *ifp);
static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
-#ifdef HN_LRO_HIWAT
-static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS);
+#if __FreeBSD_version >= 1100099
+static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
#endif
+static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS);
-static int hn_check_iplen(const struct mbuf *, int);
-static int hn_create_tx_ring(struct hn_softc *sc);
-static void hn_destroy_tx_ring(struct hn_softc *sc);
-
-static __inline void
-hn_set_lro_hiwat(struct hn_softc *sc, int hiwat)
-{
- sc->hn_lro_hiwat = hiwat;
-#ifdef HN_LRO_HIWAT
- sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
-#endif
-}
-
-/*
- * NetVsc get message transport protocol type
- */
-static uint32_t get_transport_proto_type(struct mbuf *m_head)
-{
- uint32_t ret_val = TRANSPORT_TYPE_NOT_IP;
- uint16_t ether_type = 0;
- int ether_len = 0;
- struct ether_vlan_header *eh;
-#ifdef INET
- struct ip *iph;
-#endif
-#ifdef INET6
- struct ip6_hdr *ip6;
-#endif
-
- eh = mtod(m_head, struct ether_vlan_header*);
- if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
- ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
- ether_type = eh->evl_proto;
- } else {
- ether_len = ETHER_HDR_LEN;
- ether_type = eh->evl_encap_proto;
- }
-
- switch (ntohs(ether_type)) {
-#ifdef INET6
- case ETHERTYPE_IPV6:
- ip6 = (struct ip6_hdr *)(m_head->m_data + ether_len);
-
- if (IPPROTO_TCP == ip6->ip6_nxt) {
- ret_val = TRANSPORT_TYPE_IPV6_TCP;
- } else if (IPPROTO_UDP == ip6->ip6_nxt) {
- ret_val = TRANSPORT_TYPE_IPV6_UDP;
- }
- break;
+#if __FreeBSD_version < 1100095
+static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
+#else
+static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
#endif
-#ifdef INET
- case ETHERTYPE_IP:
- iph = (struct ip *)(m_head->m_data + ether_len);
-
- if (IPPROTO_TCP == iph->ip_p) {
- ret_val = TRANSPORT_TYPE_IPV4_TCP;
- } else if (IPPROTO_UDP == iph->ip_p) {
- ret_val = TRANSPORT_TYPE_IPV4_UDP;
- }
- break;
-#endif
- default:
- ret_val = TRANSPORT_TYPE_NOT_IP;
- break;
- }
-
- return (ret_val);
-}
+static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_check_iplen(const struct mbuf *, int);
+static int hn_create_tx_ring(struct hn_softc *, int);
+static void hn_destroy_tx_ring(struct hn_tx_ring *);
+static int hn_create_tx_data(struct hn_softc *);
+static void hn_destroy_tx_data(struct hn_softc *);
+static void hn_start_taskfunc(void *, int);
+static void hn_start_txeof_taskfunc(void *, int);
+static void hn_stop_tx_tasks(struct hn_softc *);
+static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
+static void hn_create_rx_data(struct hn_softc *sc);
+static void hn_destroy_rx_data(struct hn_softc *sc);
+static void hn_set_tx_chimney_size(struct hn_softc *, int);
static int
hn_ifmedia_upd(struct ifnet *ifp __unused)
@@ -353,6 +363,19 @@ netvsc_probe(device_t dev)
return (ENXIO);
}
+static void
+hn_cpuset_setthread_task(void *xmask, int pending __unused)
+{
+ cpuset_t *mask = xmask;
+ int error;
+
+ error = cpuset_setthread(curthread->td_tid, mask);
+ if (error) {
+ panic("curthread=%ju: can't pin; error=%d",
+ (uintmax_t)curthread->td_tid, error);
+ }
+}
+
/*
* Standard attach entry point.
*
@@ -367,8 +390,6 @@ netvsc_attach(device_t dev)
hn_softc_t *sc;
int unit = device_get_unit(dev);
struct ifnet *ifp = NULL;
- struct sysctl_oid_list *child;
- struct sysctl_ctx_list *ctx;
int error;
#if __FreeBSD_version >= 1100045
int tso_maxlen;
@@ -382,13 +403,28 @@ netvsc_attach(device_t dev)
bzero(sc, sizeof(hn_softc_t));
sc->hn_unit = unit;
sc->hn_dev = dev;
- sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF;
- sc->hn_trust_hosttcp = hn_trust_hosttcp;
-
- error = hn_create_tx_ring(sc);
- if (error)
- goto failed;
+ if (hn_tx_taskq == NULL) {
+ sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
+ taskqueue_thread_enqueue, &sc->hn_tx_taskq);
+ taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx",
+ device_get_nameunit(dev));
+ if (hn_bind_tx_taskq >= 0) {
+ int cpu = hn_bind_tx_taskq;
+ struct task cpuset_task;
+ cpuset_t cpu_set;
+
+ if (cpu > mp_ncpus - 1)
+ cpu = mp_ncpus - 1;
+ CPU_SETOF(cpu, &cpu_set);
+ TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task,
+ &cpu_set);
+ taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task);
+ taskqueue_drain(sc->hn_tx_taskq, &cpuset_task);
+ }
+ } else {
+ sc->hn_tx_taskq = hn_tx_taskq;
+ }
NV_LOCK_INIT(sc, "NetVSCLock");
sc->hn_dev_obj = device_ctx;
@@ -396,6 +432,12 @@ netvsc_attach(device_t dev)
ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
ifp->if_softc = sc;
+ error = hn_create_tx_data(sc);
+ if (error)
+ goto failed;
+
+ hn_create_rx_data(sc);
+
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_dunit = unit;
ifp->if_dname = NETVSC_DEVNAME;
@@ -426,15 +468,7 @@ netvsc_attach(device_t dev)
ifp->if_capenable |=
IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO |
IFCAP_LRO;
- /*
- * Only enable UDP checksum offloading when it is on 2012R2 or
- * later. UDP checksum offloading doesn't work on earlier
- * Windows releases.
- */
- if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
- ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO;
- else
- ifp->if_hwassist = CSUM_TCP | CSUM_TSO;
+ ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO;
error = hv_rf_on_device_add(device_ctx, &device_info);
if (error)
@@ -444,15 +478,6 @@ netvsc_attach(device_t dev)
sc->hn_carrier = 1;
}
-#if defined(INET) || defined(INET6)
- tcp_lro_init(&sc->hn_lro);
- /* Driver private LRO settings */
- sc->hn_lro.ifp = ifp;
-#ifdef HN_LRO_HIWAT
- sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat;
-#endif
-#endif /* INET || INET6 */
-
#if __FreeBSD_version >= 1100045
tso_maxlen = hn_tso_maxlen;
if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET)
@@ -472,87 +497,14 @@ netvsc_attach(device_t dev)
#endif
sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
- sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
+ hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
if (hn_tx_chimney_size > 0 &&
hn_tx_chimney_size < sc->hn_tx_chimney_max)
- sc->hn_tx_chimney_size = hn_tx_chimney_size;
-
- ctx = device_get_sysctl_ctx(dev);
- child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
-
- SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued",
- CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued");
- SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed",
- CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried",
- CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries");
-#ifdef HN_LRO_HIWAT
- SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat",
- CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl,
- "I", "LRO high watermark");
-#endif
- SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp",
- CTLFLAG_RW, &sc->hn_trust_hosttcp, 0,
- "Trust tcp segement verification on host side, "
- "when csum info is missing");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip",
- CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp",
- CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted",
- CTLFLAG_RW, &sc->hn_csum_trusted,
- "# of TCP segements that we trust host's csum verification");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts",
- CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_txdescs",
- CTLFLAG_RW, &sc->hn_no_txdescs, "# of times short of TX descs");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "send_failed",
- CTLFLAG_RW, &sc->hn_send_failed, "# of hyper-v sending failure");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "txdma_failed",
- CTLFLAG_RW, &sc->hn_txdma_failed, "# of TX DMA failure");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_collapsed",
- CTLFLAG_RW, &sc->hn_tx_collapsed, "# of TX mbuf collapsed");
- SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_chimney",
- CTLFLAG_RW, &sc->hn_tx_chimney, "# of chimney send");
- SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
- CTLFLAG_RD, &sc->hn_txdesc_cnt, 0, "# of total TX descs");
- SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
- CTLFLAG_RD, &sc->hn_txdesc_avail, 0, "# of available TX descs");
- SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
- CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
- "Chimney send packet size upper boundary");
- SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
- CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
- "I", "Chimney send packet size limit");
-
- if (unit == 0) {
- struct sysctl_ctx_list *dc_ctx;
- struct sysctl_oid_list *dc_child;
- devclass_t dc;
-
- /*
- * Add sysctl nodes for devclass
- */
- dc = device_get_devclass(dev);
- dc_ctx = devclass_get_sysctl_ctx(dc);
- dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc));
-
- SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp",
- CTLFLAG_RD, &hn_trust_hosttcp, 0,
- "Trust tcp segement verification on host side, "
- "when csum info is missing (global setting)");
- SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tx_chimney_size",
- CTLFLAG_RD, &hn_tx_chimney_size, 0,
- "Chimney send packet size limit");
-#if __FreeBSD_version >= 1100045
- SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tso_maxlen",
- CTLFLAG_RD, &hn_tso_maxlen, 0, "TSO burst limit");
-#endif
- }
+ hn_set_tx_chimney_size(sc, hn_tx_chimney_size);
return (0);
failed:
- hn_destroy_tx_ring(sc);
+ hn_destroy_tx_data(sc);
if (ifp != NULL)
if_free(ifp);
return (error);
@@ -583,11 +535,14 @@ netvsc_detach(device_t dev)
hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL);
+ hn_stop_tx_tasks(sc);
+
ifmedia_removeall(&sc->hn_media);
-#if defined(INET) || defined(INET6)
- tcp_lro_free(&sc->hn_lro);
-#endif
- hn_destroy_tx_ring(sc);
+ hn_destroy_rx_data(sc);
+ hn_destroy_tx_data(sc);
+
+ if (sc->hn_tx_taskq != hn_tx_taskq)
+ taskqueue_free(sc->hn_tx_taskq);
return (0);
}
@@ -602,13 +557,13 @@ netvsc_shutdown(device_t dev)
}
static __inline int
-hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
+hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
{
struct mbuf *m = *m_head;
int error;
- error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, txd->data_dmap,
+ error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
m, segs, nsegs, BUS_DMA_NOWAIT);
if (error == EFBIG) {
struct mbuf *m_new;
@@ -618,13 +573,13 @@ hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
return ENOBUFS;
else
*m_head = m = m_new;
- sc->hn_tx_collapsed++;
+ txr->hn_tx_collapsed++;
- error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag,
+ error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
}
if (!error) {
- bus_dmamap_sync(sc->hn_tx_data_dtag, txd->data_dmap,
+ bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
BUS_DMASYNC_PREWRITE);
txd->flags |= HN_TXD_FLAG_DMAMAP;
}
@@ -632,20 +587,20 @@ hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd,
}
static __inline void
-hn_txdesc_dmamap_unload(struct hn_softc *sc, struct hn_txdesc *txd)
+hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
if (txd->flags & HN_TXD_FLAG_DMAMAP) {
- bus_dmamap_sync(sc->hn_tx_data_dtag,
+ bus_dmamap_sync(txr->hn_tx_data_dtag,
txd->data_dmap, BUS_DMASYNC_POSTWRITE);
- bus_dmamap_unload(sc->hn_tx_data_dtag,
+ bus_dmamap_unload(txr->hn_tx_data_dtag,
txd->data_dmap);
txd->flags &= ~HN_TXD_FLAG_DMAMAP;
}
}
static __inline int
-hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
+hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
@@ -655,7 +610,7 @@ hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
if (atomic_fetchadd_int(&txd->refs, -1) != 1)
return 0;
- hn_txdesc_dmamap_unload(sc, txd);
+ hn_txdesc_dmamap_unload(txr, txd);
if (txd->m != NULL) {
m_freem(txd->m);
txd->m = NULL;
@@ -663,33 +618,45 @@ hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd)
txd->flags |= HN_TXD_FLAG_ONLIST;
- mtx_lock_spin(&sc->hn_txlist_spin);
- KASSERT(sc->hn_txdesc_avail >= 0 &&
- sc->hn_txdesc_avail < sc->hn_txdesc_cnt,
- ("txdesc_put: invalid txd avail %d", sc->hn_txdesc_avail));
- sc->hn_txdesc_avail++;
- SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
- mtx_unlock_spin(&sc->hn_txlist_spin);
+#ifndef HN_USE_TXDESC_BUFRING
+ mtx_lock_spin(&txr->hn_txlist_spin);
+ KASSERT(txr->hn_txdesc_avail >= 0 &&
+ txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
+ ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
+ txr->hn_txdesc_avail++;
+ SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
+ mtx_unlock_spin(&txr->hn_txlist_spin);
+#else
+ atomic_add_int(&txr->hn_txdesc_avail, 1);
+ buf_ring_enqueue(txr->hn_txdesc_br, txd);
+#endif
return 1;
}
static __inline struct hn_txdesc *
-hn_txdesc_get(struct hn_softc *sc)
+hn_txdesc_get(struct hn_tx_ring *txr)
{
struct hn_txdesc *txd;
- mtx_lock_spin(&sc->hn_txlist_spin);
- txd = SLIST_FIRST(&sc->hn_txlist);
+#ifndef HN_USE_TXDESC_BUFRING
+ mtx_lock_spin(&txr->hn_txlist_spin);
+ txd = SLIST_FIRST(&txr->hn_txlist);
if (txd != NULL) {
- KASSERT(sc->hn_txdesc_avail > 0,
- ("txdesc_get: invalid txd avail %d", sc->hn_txdesc_avail));
- sc->hn_txdesc_avail--;
- SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
+ KASSERT(txr->hn_txdesc_avail > 0,
+ ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
+ txr->hn_txdesc_avail--;
+ SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
}
- mtx_unlock_spin(&sc->hn_txlist_spin);
+ mtx_unlock_spin(&txr->hn_txlist_spin);
+#else
+ txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
+#endif
if (txd != NULL) {
+#ifdef HN_USE_TXDESC_BUFRING
+ atomic_subtract_int(&txr->hn_txdesc_avail, 1);
+#endif
KASSERT(txd->m == NULL && txd->refs == 0 &&
(txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd"));
txd->flags &= ~HN_TXD_FLAG_ONLIST;
@@ -707,213 +674,133 @@ hn_txdesc_hold(struct hn_txdesc *txd)
atomic_add_int(&txd->refs, 1);
}
-/*
- * Send completion processing
- *
- * Note: It looks like offset 0 of buf is reserved to hold the softc
- * pointer. The sc pointer is not currently needed in this function, and
- * it is not presently populated by the TX function.
- */
-void
-netvsc_xmit_completion(void *context)
+static void
+hn_tx_done(void *xpkt)
{
- netvsc_packet *packet = context;
+ netvsc_packet *packet = xpkt;
struct hn_txdesc *txd;
- struct hn_softc *sc;
+ struct hn_tx_ring *txr;
txd = (struct hn_txdesc *)(uintptr_t)
packet->compl.send.send_completion_tid;
- sc = txd->sc;
- sc->hn_txeof = 1;
- hn_txdesc_put(sc, txd);
+ txr = txd->txr;
+ txr->hn_has_txeof = 1;
+ hn_txdesc_put(txr, txd);
}
void
netvsc_channel_rollup(struct hv_device *device_ctx)
{
struct hn_softc *sc = device_get_softc(device_ctx->device);
- struct ifnet *ifp;
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; /* TODO: vRSS */
+#if defined(INET) || defined(INET6)
+ struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */
+ struct lro_ctrl *lro = &rxr->hn_lro;
+ struct lro_entry *queued;
+
+ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
+ SLIST_REMOVE_HEAD(&lro->lro_active, next);
+ tcp_lro_flush(lro, queued);
+ }
+#endif
- if (!sc->hn_txeof)
+ if (!txr->hn_has_txeof)
return;
- sc->hn_txeof = 0;
- ifp = sc->hn_ifp;
- NV_LOCK(sc);
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
- hn_start_locked(ifp);
- NV_UNLOCK(sc);
+ txr->hn_has_txeof = 0;
+ txr->hn_txeof(txr);
}
/*
- * Start a transmit of one or more packets
+ * NOTE:
+ * If this function fails, then both txd and m_head0 will be freed.
*/
-static void
-hn_start_locked(struct ifnet *ifp)
+static int
+hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
{
- hn_softc_t *sc = ifp->if_softc;
- struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
- netvsc_dev *net_dev = sc->net_dev;
+ bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
+ int error, nsegs, i;
+ struct mbuf *m_head = *m_head0;
netvsc_packet *packet;
- struct mbuf *m_head, *m;
- struct ether_vlan_header *eh;
rndis_msg *rndis_mesg;
rndis_packet *rndis_pkt;
rndis_per_packet_info *rppi;
- ndis_8021q_info *rppi_vlan_info;
- rndis_tcp_ip_csum_info *csum_info;
- rndis_tcp_tso_info *tso_info;
- int ether_len;
- uint32_t rndis_msg_size = 0;
- uint32_t trans_proto_type;
- uint32_t send_buf_section_idx =
- NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
-
- if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
- IFF_DRV_RUNNING)
- return;
-
- while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
- bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
- int error, nsegs, i, send_failed = 0;
- struct hn_txdesc *txd;
+ uint32_t rndis_msg_size;
- IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
- if (m_head == NULL)
- break;
+ packet = &txd->netvsc_pkt;
+ packet->is_data_pkt = TRUE;
+ packet->tot_data_buf_len = m_head->m_pkthdr.len;
- txd = hn_txdesc_get(sc);
- if (txd == NULL) {
- sc->hn_no_txdescs++;
- IF_PREPEND(&ifp->if_snd, m_head);
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
- break;
- }
+ /*
+ * extension points to the area reserved for the
+ * rndis_filter_packet, which is placed just after
+ * the netvsc_packet (and rppi struct, if present;
+ * length is updated later).
+ */
+ rndis_mesg = txd->rndis_msg;
+ /* XXX not necessary */
+ memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN);
+ rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
- packet = &txd->netvsc_pkt;
- /* XXX not necessary */
- memset(packet, 0, sizeof(*packet));
+ rndis_pkt = &rndis_mesg->msg.packet;
+ rndis_pkt->data_offset = sizeof(rndis_packet);
+ rndis_pkt->data_length = packet->tot_data_buf_len;
+ rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
- packet->is_data_pkt = TRUE;
+ rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
- /* Initialize it from the mbuf */
- packet->tot_data_buf_len = m_head->m_pkthdr.len;
+ if (m_head->m_flags & M_VLANTAG) {
+ ndis_8021q_info *rppi_vlan_info;
- /*
- * extension points to the area reserved for the
- * rndis_filter_packet, which is placed just after
- * the netvsc_packet (and rppi struct, if present;
- * length is updated later).
- */
- rndis_mesg = txd->rndis_msg;
- /* XXX not necessary */
- memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN);
- rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
+ rndis_msg_size += RNDIS_VLAN_PPI_SIZE;
+ rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE,
+ ieee_8021q_info);
- rndis_pkt = &rndis_mesg->msg.packet;
- rndis_pkt->data_offset = sizeof(rndis_packet);
- rndis_pkt->data_length = packet->tot_data_buf_len;
- rndis_pkt->per_pkt_info_offset = sizeof(rndis_packet);
+ rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi +
+ rppi->per_packet_info_offset);
+ rppi_vlan_info->u1.s1.vlan_id =
+ m_head->m_pkthdr.ether_vtag & 0xfff;
+ }
- rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
+ if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
+ rndis_tcp_tso_info *tso_info;
+ struct ether_vlan_header *eh;
+ int ether_len;
/*
- * If the Hyper-V infrastructure needs to embed a VLAN tag,
- * initialize netvsc_packet and rppi struct values as needed.
+ * XXX need m_pullup and use mtodo
*/
- if (m_head->m_flags & M_VLANTAG) {
- /*
- * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag
- * into the frame.
- */
- rndis_msg_size += RNDIS_VLAN_PPI_SIZE;
-
- rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE,
- ieee_8021q_info);
-
- /* VLAN info immediately follows rppi struct */
- rppi_vlan_info = (ndis_8021q_info *)((char*)rppi +
- rppi->per_packet_info_offset);
- /* FreeBSD does not support CFI or priority */
- rppi_vlan_info->u1.s1.vlan_id =
- m_head->m_pkthdr.ether_vtag & 0xfff;
- }
-
- /* Only check the flags for outbound and ignore the ones for inbound */
- if (0 == (m_head->m_pkthdr.csum_flags & HV_CSUM_FOR_OUTBOUND)) {
- goto pre_send;
- }
-
eh = mtod(m_head, struct ether_vlan_header*);
- if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
+ if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
- } else {
+ else
ether_len = ETHER_HDR_LEN;
- }
-
- trans_proto_type = get_transport_proto_type(m_head);
- if (TRANSPORT_TYPE_NOT_IP == trans_proto_type) {
- goto pre_send;
- }
-
- /*
- * TSO packet needless to setup the send side checksum
- * offload.
- */
- if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
- goto do_tso;
- }
- /* setup checksum offload */
- rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
- rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE,
- tcpip_chksum_info);
- csum_info = (rndis_tcp_ip_csum_info *)((char*)rppi +
- rppi->per_packet_info_offset);
-
- if (trans_proto_type & (TYPE_IPV4 << 16)) {
- csum_info->xmit.is_ipv4 = 1;
- } else {
- csum_info->xmit.is_ipv6 = 1;
- }
-
- if (trans_proto_type & TYPE_TCP) {
- csum_info->xmit.tcp_csum = 1;
- csum_info->xmit.tcp_header_offset = 0;
- } else if (trans_proto_type & TYPE_UDP) {
- csum_info->xmit.udp_csum = 1;
- }
-
- goto pre_send;
-
-do_tso:
- /* setup TCP segmentation offload */
rndis_msg_size += RNDIS_TSO_PPI_SIZE;
rppi = hv_set_rppi_data(rndis_mesg, RNDIS_TSO_PPI_SIZE,
tcp_large_send_info);
-
- tso_info = (rndis_tcp_tso_info *)((char *)rppi +
+
+ tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi +
rppi->per_packet_info_offset);
tso_info->lso_v2_xmit.type =
RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
-
+
#ifdef INET
- if (trans_proto_type & (TYPE_IPV4 << 16)) {
+ if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
struct ip *ip =
(struct ip *)(m_head->m_data + ether_len);
unsigned long iph_len = ip->ip_hl << 2;
struct tcphdr *th =
(struct tcphdr *)((caddr_t)ip + iph_len);
-
+
tso_info->lso_v2_xmit.ip_version =
RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
ip->ip_len = 0;
ip->ip_sum = 0;
-
+
th->th_sum = in_pseudo(ip->ip_src.s_addr,
- ip->ip_dst.s_addr,
- htons(IPPROTO_TCP));
+ ip->ip_dst.s_addr, htons(IPPROTO_TCP));
}
#endif
#if defined(INET6) && defined(INET)
@@ -921,8 +808,8 @@ do_tso:
#endif
#ifdef INET6
{
- struct ip6_hdr *ip6 =
- (struct ip6_hdr *)(m_head->m_data + ether_len);
+ struct ip6_hdr *ip6 = (struct ip6_hdr *)
+ (m_head->m_data + ether_len);
struct tcphdr *th = (struct tcphdr *)(ip6 + 1);
tso_info->lso_v2_xmit.ip_version =
@@ -933,146 +820,233 @@ do_tso:
#endif
tso_info->lso_v2_xmit.tcp_header_offset = 0;
tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz;
+ } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
+ rndis_tcp_ip_csum_info *csum_info;
-pre_send:
- rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size;
- packet->tot_data_buf_len = rndis_mesg->msg_len;
-
- /* send packet with send buffer */
- if (packet->tot_data_buf_len < sc->hn_tx_chimney_size) {
- send_buf_section_idx =
- hv_nv_get_next_send_section(net_dev);
- if (send_buf_section_idx !=
- NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
- char *dest = ((char *)net_dev->send_buf +
- send_buf_section_idx *
- net_dev->send_section_size);
-
- memcpy(dest, rndis_mesg, rndis_msg_size);
- dest += rndis_msg_size;
- for (m = m_head; m != NULL; m = m->m_next) {
- if (m->m_len) {
- memcpy(dest,
- (void *)mtod(m, vm_offset_t),
- m->m_len);
- dest += m->m_len;
- }
- }
+ rndis_msg_size += RNDIS_CSUM_PPI_SIZE;
+ rppi = hv_set_rppi_data(rndis_mesg, RNDIS_CSUM_PPI_SIZE,
+ tcpip_chksum_info);
+ csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi +
+ rppi->per_packet_info_offset);
- packet->send_buf_section_idx =
- send_buf_section_idx;
- packet->send_buf_section_size =
- packet->tot_data_buf_len;
- packet->page_buf_count = 0;
- sc->hn_tx_chimney++;
- goto do_send;
- }
- }
+ csum_info->xmit.is_ipv4 = 1;
+ if (m_head->m_pkthdr.csum_flags & CSUM_IP)
+ csum_info->xmit.ip_header_csum = 1;
- error = hn_txdesc_dmamap_load(sc, txd, &m_head, segs, &nsegs);
- if (error) {
- int freed;
+ if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
+ csum_info->xmit.tcp_csum = 1;
+ csum_info->xmit.tcp_header_offset = 0;
+ } else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
+ csum_info->xmit.udp_csum = 1;
+ }
+ }
- /*
- * This mbuf is not linked w/ the txd yet, so free
- * it now.
- */
- m_freem(m_head);
- freed = hn_txdesc_put(sc, txd);
- KASSERT(freed != 0,
- ("fail to free txd upon txdma error"));
+ rndis_mesg->msg_len = packet->tot_data_buf_len + rndis_msg_size;
+ packet->tot_data_buf_len = rndis_mesg->msg_len;
- sc->hn_txdma_failed++;
- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
- continue;
+ /*
+ * Chimney send, if the packet could fit into one chimney buffer.
+ */
+ if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) {
+ netvsc_dev *net_dev = txr->hn_sc->net_dev;
+ uint32_t send_buf_section_idx;
+
+ send_buf_section_idx =
+ hv_nv_get_next_send_section(net_dev);
+ if (send_buf_section_idx !=
+ NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
+ uint8_t *dest = ((uint8_t *)net_dev->send_buf +
+ (send_buf_section_idx *
+ net_dev->send_section_size));
+
+ memcpy(dest, rndis_mesg, rndis_msg_size);
+ dest += rndis_msg_size;
+ m_copydata(m_head, 0, m_head->m_pkthdr.len, dest);
+
+ packet->send_buf_section_idx = send_buf_section_idx;
+ packet->send_buf_section_size =
+ packet->tot_data_buf_len;
+ packet->page_buf_count = 0;
+ txr->hn_tx_chimney++;
+ goto done;
}
+ }
- packet->page_buf_count = nsegs +
- HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
-
- /* send packet with page buffer */
- packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr);
- packet->page_buffers[0].offset =
- txd->rndis_msg_paddr & PAGE_MASK;
- packet->page_buffers[0].length = rndis_msg_size;
+ error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
+ if (error) {
+ int freed;
/*
- * Fill the page buffers with mbuf info starting at index
- * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
+ * This mbuf is not linked w/ the txd yet, so free it now.
*/
- for (i = 0; i < nsegs; ++i) {
- hv_vmbus_page_buffer *pb = &packet->page_buffers[
- i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS];
+ m_freem(m_head);
+ *m_head0 = NULL;
- pb->pfn = atop(segs[i].ds_addr);
- pb->offset = segs[i].ds_addr & PAGE_MASK;
- pb->length = segs[i].ds_len;
- }
+ freed = hn_txdesc_put(txr, txd);
+ KASSERT(freed != 0,
+ ("fail to free txd upon txdma error"));
+
+ txr->hn_txdma_failed++;
+ if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
+ return error;
+ }
+ *m_head0 = m_head;
+
+ packet->page_buf_count = nsegs + HV_RF_NUM_TX_RESERVED_PAGE_BUFS;
+
+ /* send packet with page buffer */
+ packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr);
+ packet->page_buffers[0].offset = txd->rndis_msg_paddr & PAGE_MASK;
+ packet->page_buffers[0].length = rndis_msg_size;
- packet->send_buf_section_idx =
- NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
- packet->send_buf_section_size = 0;
+ /*
+ * Fill the page buffers with mbuf info starting at index
+ * HV_RF_NUM_TX_RESERVED_PAGE_BUFS.
+ */
+ for (i = 0; i < nsegs; ++i) {
+ hv_vmbus_page_buffer *pb = &packet->page_buffers[
+ i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS];
-do_send:
- txd->m = m_head;
+ pb->pfn = atop(segs[i].ds_addr);
+ pb->offset = segs[i].ds_addr & PAGE_MASK;
+ pb->length = segs[i].ds_len;
+ }
+
+ packet->send_buf_section_idx =
+ NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
+ packet->send_buf_section_size = 0;
+done:
+ txd->m = m_head;
+
+ /* Set the completion routine */
+ packet->compl.send.on_send_completion = hn_tx_done;
+ packet->compl.send.send_completion_context = packet;
+ packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)txd;
+
+ return 0;
+}
- /* Set the completion routine */
- packet->compl.send.on_send_completion = netvsc_xmit_completion;
- packet->compl.send.send_completion_context = packet;
- packet->compl.send.send_completion_tid =
- (uint64_t)(uintptr_t)txd;
+/*
+ * NOTE:
+ * If this function fails, then txd will be freed, but the mbuf
+ * associated w/ the txd will _not_ be freed.
+ */
+static int
+hn_send_pkt(struct ifnet *ifp, struct hv_device *device_ctx,
+ struct hn_tx_ring *txr, struct hn_txdesc *txd)
+{
+ int error, send_failed = 0;
again:
+ /*
+ * Make sure that txd is not freed before ETHER_BPF_MTAP.
+ */
+ hn_txdesc_hold(txd);
+ error = hv_nv_on_send(device_ctx, &txd->netvsc_pkt);
+ if (!error) {
+ ETHER_BPF_MTAP(ifp, txd->m);
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ }
+ hn_txdesc_put(txr, txd);
+
+ if (__predict_false(error)) {
+ int freed;
+
/*
- * Make sure that txd is not freed before ETHER_BPF_MTAP.
+ * This should "really rarely" happen.
+ *
+ * XXX Too many RX to be acked or too many sideband
+ * commands to run? Ask netvsc_channel_rollup()
+ * to kick start later.
*/
- hn_txdesc_hold(txd);
- error = hv_nv_on_send(device_ctx, packet);
- if (!error) {
- ETHER_BPF_MTAP(ifp, m_head);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ txr->hn_has_txeof = 1;
+ if (!send_failed) {
+ txr->hn_send_failed++;
+ send_failed = 1;
+ /*
+ * Try sending again after set hn_has_txeof;
+ * in case that we missed the last
+ * netvsc_channel_rollup().
+ */
+ goto again;
}
- hn_txdesc_put(sc, txd);
+ if_printf(ifp, "send failed\n");
- if (__predict_false(error)) {
- int freed;
+ /*
+ * Caller will perform further processing on the
+ * associated mbuf, so don't free it in hn_txdesc_put();
+ * only unload it from the DMA map in hn_txdesc_put(),
+ * if it was loaded.
+ */
+ txd->m = NULL;
+ freed = hn_txdesc_put(txr, txd);
+ KASSERT(freed != 0,
+ ("fail to free txd upon send error"));
- /*
- * This should "really rarely" happen.
- *
- * XXX Too many RX to be acked or too many sideband
- * commands to run? Ask netvsc_channel_rollup()
- * to kick start later.
- */
- sc->hn_txeof = 1;
- if (!send_failed) {
- sc->hn_send_failed++;
- send_failed = 1;
- /*
- * Try sending again after set hn_txeof;
- * in case that we missed the last
- * netvsc_channel_rollup().
- */
- goto again;
- }
- if_printf(ifp, "send failed\n");
+ txr->hn_send_failed++;
+ }
+ return error;
+}
+
+/*
+ * Start a transmit of one or more packets
+ */
+static int
+hn_start_locked(struct hn_tx_ring *txr, int len)
+{
+ struct hn_softc *sc = txr->hn_sc;
+ struct ifnet *ifp = sc->hn_ifp;
+ struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
+
+ KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
+ mtx_assert(&txr->hn_tx_lock, MA_OWNED);
+
+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+ IFF_DRV_RUNNING)
+ return 0;
+
+ while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+ struct hn_txdesc *txd;
+ struct mbuf *m_head;
+ int error;
+
+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
+ if (m_head == NULL)
+ break;
+ if (len > 0 && m_head->m_pkthdr.len > len) {
/*
- * This mbuf will be prepended, don't free it
- * in hn_txdesc_put(); only unload it from the
- * DMA map in hn_txdesc_put(), if it was loaded.
+ * This sending could be time consuming; let callers
+ * dispatch this packet sending (and sending of any
+ * following up packets) to tx taskqueue.
*/
- txd->m = NULL;
- freed = hn_txdesc_put(sc, txd);
- KASSERT(freed != 0,
- ("fail to free txd upon send error"));
-
- sc->hn_send_failed++;
- IF_PREPEND(&ifp->if_snd, m_head);
- ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+ IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ return 1;
+ }
+
+ txd = hn_txdesc_get(txr);
+ if (txd == NULL) {
+ txr->hn_no_txdescs++;
+ IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+ break;
+ }
+
+ error = hn_encap(txr, txd, &m_head);
+ if (error) {
+ /* Both txd and m_head are freed */
+ continue;
+ }
+
+ error = hn_send_pkt(ifp, device_ctx, txr, txd);
+ if (__predict_false(error)) {
+ /* txd is freed, but m_head is not */
+ IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
break;
}
}
+ return 0;
}
/*
@@ -1162,11 +1136,11 @@ int
netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
rndis_tcp_ip_csum_info *csum_info)
{
- hn_softc_t *sc = (hn_softc_t *)device_get_softc(device_ctx->device);
+ struct hn_softc *sc = device_get_softc(device_ctx->device);
+ struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */
struct mbuf *m_new;
struct ifnet *ifp;
- device_t dev = device_ctx->device;
- int size, do_lro = 0;
+ int size, do_lro = 0, do_csum = 1;
if (sc == NULL) {
return (0); /* TODO: KYS how can this be! */
@@ -1192,7 +1166,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
memcpy(mtod(m_new, void *), packet->data,
packet->tot_data_buf_len);
m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len;
- sc->hn_small_pkts++;
+ rxr->hn_small_pkts++;
} else {
/*
* Get an mbuf with a cluster. For packets 2K or less,
@@ -1208,7 +1182,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
if (m_new == NULL) {
- device_printf(dev, "alloc mbuf failed.\n");
+ if_printf(ifp, "alloc mbuf failed.\n");
return (0);
}
@@ -1216,21 +1190,28 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
}
m_new->m_pkthdr.rcvif = ifp;
+ if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
+ do_csum = 0;
+
/* receive side checksum offload */
- if (NULL != csum_info) {
+ if (csum_info != NULL) {
/* IP csum offload */
- if (csum_info->receive.ip_csum_succeeded) {
+ if (csum_info->receive.ip_csum_succeeded && do_csum) {
m_new->m_pkthdr.csum_flags |=
(CSUM_IP_CHECKED | CSUM_IP_VALID);
- sc->hn_csum_ip++;
+ rxr->hn_csum_ip++;
}
- /* TCP csum offload */
- if (csum_info->receive.tcp_csum_succeeded) {
+ /* TCP/UDP csum offload */
+ if ((csum_info->receive.tcp_csum_succeeded ||
+ csum_info->receive.udp_csum_succeeded) && do_csum) {
m_new->m_pkthdr.csum_flags |=
(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m_new->m_pkthdr.csum_data = 0xffff;
- sc->hn_csum_tcp++;
+ if (csum_info->receive.tcp_csum_succeeded)
+ rxr->hn_csum_tcp++;
+ else
+ rxr->hn_csum_udp++;
}
if (csum_info->receive.ip_csum_succeeded &&
@@ -1261,8 +1242,10 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
pr = hn_check_iplen(m_new, hoff);
if (pr == IPPROTO_TCP) {
- if (sc->hn_trust_hosttcp) {
- sc->hn_csum_trusted++;
+ if (do_csum &&
+ (rxr->hn_trust_hcsum &
+ HN_TRUST_HCSUM_TCP)) {
+ rxr->hn_csum_trusted++;
m_new->m_pkthdr.csum_flags |=
(CSUM_IP_CHECKED | CSUM_IP_VALID |
CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
@@ -1270,6 +1253,21 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
}
/* Rely on SW csum verification though... */
do_lro = 1;
+ } else if (pr == IPPROTO_UDP) {
+ if (do_csum &&
+ (rxr->hn_trust_hcsum &
+ HN_TRUST_HCSUM_UDP)) {
+ rxr->hn_csum_trusted++;
+ m_new->m_pkthdr.csum_flags |=
+ (CSUM_IP_CHECKED | CSUM_IP_VALID |
+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ m_new->m_pkthdr.csum_data = 0xffff;
+ }
+ } else if (pr != IPPROTO_DONE && do_csum &&
+ (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
+ rxr->hn_csum_trusted++;
+ m_new->m_pkthdr.csum_flags |=
+ (CSUM_IP_CHECKED | CSUM_IP_VALID);
}
}
}
@@ -1289,10 +1287,10 @@ skip:
if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
#if defined(INET) || defined(INET6)
- struct lro_ctrl *lro = &sc->hn_lro;
+ struct lro_ctrl *lro = &rxr->hn_lro;
if (lro->lro_cnt) {
- sc->hn_lro_tried++;
+ rxr->hn_lro_tried++;
if (tcp_lro_rx(lro, m_new, 0) == 0) {
/* DONE! */
return 0;
@@ -1308,18 +1306,8 @@ skip:
}
void
-netvsc_recv_rollup(struct hv_device *device_ctx)
+netvsc_recv_rollup(struct hv_device *device_ctx __unused)
{
-#if defined(INET) || defined(INET6)
- hn_softc_t *sc = device_get_softc(device_ctx->device);
- struct lro_ctrl *lro = &sc->hn_lro;
- struct lro_entry *queued;
-
- while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
- SLIST_REMOVE_HEAD(&lro->lro_active, next);
- tcp_lro_flush(lro, queued);
- }
-#endif
}
/*
@@ -1377,12 +1365,23 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
/* Obtain and record requested MTU */
ifp->if_mtu = ifr->ifr_mtu;
+
+#if __FreeBSD_version >= 1100099
/*
- * Make sure that LRO high watermark is still valid,
- * after MTU change (the 2*MTU limit).
+ * Make sure that LRO aggregation length limit is still
+ * valid, after the MTU change.
*/
- if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat))
- hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp));
+ NV_LOCK(sc);
+ if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
+ HN_LRO_LENLIM_MIN(ifp)) {
+ int i;
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ sc->hn_rx_ring[i].hn_lro.lro_length_lim =
+ HN_LRO_LENLIM_MIN(ifp);
+ }
+ }
+ NV_UNLOCK(sc);
+#endif
do {
NV_LOCK(sc);
@@ -1422,8 +1421,10 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
- if (sc->hn_tx_chimney_size > sc->hn_tx_chimney_max)
- sc->hn_tx_chimney_size = sc->hn_tx_chimney_max;
+ if (sc->hn_tx_ring[0].hn_tx_chimney_size >
+ sc->hn_tx_chimney_max)
+ hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
+
hn_ifinit_locked(sc);
NV_LOCK(sc);
@@ -1483,47 +1484,43 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = 0;
break;
case SIOCSIFCAP:
+ NV_LOCK(sc);
+
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_TXCSUM) {
- if (IFCAP_TXCSUM & ifp->if_capenable) {
- ifp->if_capenable &= ~IFCAP_TXCSUM;
- ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP);
+ ifp->if_capenable ^= IFCAP_TXCSUM;
+ if (ifp->if_capenable & IFCAP_TXCSUM) {
+ ifp->if_hwassist |=
+ sc->hn_tx_ring[0].hn_csum_assist;
} else {
- ifp->if_capenable |= IFCAP_TXCSUM;
- /*
- * Only enable UDP checksum offloading on
- * Windows Server 2012R2 or later releases.
- */
- if (hv_vmbus_protocal_version >=
- HV_VMBUS_VERSION_WIN8_1) {
- ifp->if_hwassist |=
- (CSUM_TCP | CSUM_UDP);
- } else {
- ifp->if_hwassist |= CSUM_TCP;
- }
+ ifp->if_hwassist &=
+ ~sc->hn_tx_ring[0].hn_csum_assist;
}
}
- if (mask & IFCAP_RXCSUM) {
- if (IFCAP_RXCSUM & ifp->if_capenable) {
- ifp->if_capenable &= ~IFCAP_RXCSUM;
- } else {
- ifp->if_capenable |= IFCAP_RXCSUM;
- }
- }
+ if (mask & IFCAP_RXCSUM)
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+
if (mask & IFCAP_LRO)
ifp->if_capenable ^= IFCAP_LRO;
if (mask & IFCAP_TSO4) {
ifp->if_capenable ^= IFCAP_TSO4;
- ifp->if_hwassist ^= CSUM_IP_TSO;
+ if (ifp->if_capenable & IFCAP_TSO4)
+ ifp->if_hwassist |= CSUM_IP_TSO;
+ else
+ ifp->if_hwassist &= ~CSUM_IP_TSO;
}
if (mask & IFCAP_TSO6) {
ifp->if_capenable ^= IFCAP_TSO6;
- ifp->if_hwassist ^= CSUM_IP6_TSO;
+ if (ifp->if_capenable & IFCAP_TSO6)
+ ifp->if_hwassist |= CSUM_IP6_TSO;
+ else
+ ifp->if_hwassist &= ~CSUM_IP6_TSO;
}
+ NV_UNLOCK(sc);
error = 0;
break;
case SIOCADDMULTI:
@@ -1566,7 +1563,8 @@ hn_stop(hn_softc_t *sc)
if (bootverbose)
printf(" Closing Device ...\n");
- ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+ atomic_clear_int(&ifp->if_drv_flags,
+ (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
if_link_state_change(ifp, LINK_STATE_DOWN);
sc->hn_initdone = 0;
@@ -1579,16 +1577,56 @@ hn_stop(hn_softc_t *sc)
static void
hn_start(struct ifnet *ifp)
{
- hn_softc_t *sc;
+ struct hn_softc *sc = ifp->if_softc;
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
- sc = ifp->if_softc;
- NV_LOCK(sc);
- if (sc->temp_unusable) {
- NV_UNLOCK(sc);
- return;
+ if (txr->hn_sched_tx)
+ goto do_sched;
+
+ if (mtx_trylock(&txr->hn_tx_lock)) {
+ int sched;
+
+ sched = hn_start_locked(txr, txr->hn_direct_tx_size);
+ mtx_unlock(&txr->hn_tx_lock);
+ if (!sched)
+ return;
+ }
+do_sched:
+ taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
+}
+
+static void
+hn_start_txeof(struct hn_tx_ring *txr)
+{
+ struct hn_softc *sc = txr->hn_sc;
+ struct ifnet *ifp = sc->hn_ifp;
+
+ KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
+
+ if (txr->hn_sched_tx)
+ goto do_sched;
+
+ if (mtx_trylock(&txr->hn_tx_lock)) {
+ int sched;
+
+ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+ sched = hn_start_locked(txr, txr->hn_direct_tx_size);
+ mtx_unlock(&txr->hn_tx_lock);
+ if (sched) {
+ taskqueue_enqueue(txr->hn_tx_taskq,
+ &txr->hn_tx_task);
+ }
+ } else {
+do_sched:
+ /*
+ * Release the OACTIVE earlier, with the hope, that
+ * others could catch up. The task will clear the
+ * flag again with the hn_tx_lock to avoid possible
+ * races.
+ */
+ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+ taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
- hn_start_locked(ifp);
- NV_UNLOCK(sc);
}
/*
@@ -1615,8 +1653,8 @@ hn_ifinit_locked(hn_softc_t *sc)
} else {
sc->hn_initdone = 1;
}
- ifp->if_drv_flags |= IFF_DRV_RUNNING;
- ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+ atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
if_link_state_change(ifp, LINK_STATE_UP);
}
@@ -1659,26 +1697,90 @@ hn_watchdog(struct ifnet *ifp)
}
#endif
-#ifdef HN_LRO_HIWAT
+#if __FreeBSD_version >= 1100099
+
+static int
+hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ unsigned int lenlim;
+ int error, i;
+
+ lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
+ error = sysctl_handle_int(oidp, &lenlim, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+
+ if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
+ lenlim > TCP_LRO_LENGTH_MAX)
+ return EINVAL;
+
+ NV_LOCK(sc);
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+ sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
+ NV_UNLOCK(sc);
+ return 0;
+}
+
static int
-hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS)
+hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
- int hiwat, error;
+ int ackcnt, error, i;
- hiwat = sc->hn_lro_hiwat;
- error = sysctl_handle_int(oidp, &hiwat, 0, req);
+ /*
+ * lro_ackcnt_lim is append count limit,
+ * +1 to turn it into aggregation limit.
+ */
+ ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
+ error = sysctl_handle_int(oidp, &ackcnt, 0, req);
if (error || req->newptr == NULL)
return error;
- if (!HN_LRO_HIWAT_ISVALID(sc, hiwat))
+ if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
return EINVAL;
- if (sc->hn_lro_hiwat != hiwat)
- hn_set_lro_hiwat(sc, hiwat);
+ /*
+ * Convert aggregation limit back to append
+ * count limit.
+ */
+ --ackcnt;
+ NV_LOCK(sc);
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+ sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
+ NV_UNLOCK(sc);
+ return 0;
+}
+
+#endif
+
+static int
+hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int hcsum = arg2;
+ int on, error, i;
+
+ on = 0;
+ if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
+ on = 1;
+
+ error = sysctl_handle_int(oidp, &on, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+
+ NV_LOCK(sc);
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+ if (on)
+ rxr->hn_trust_hcsum |= hcsum;
+ else
+ rxr->hn_trust_hcsum &= ~hcsum;
+ }
+ NV_UNLOCK(sc);
return 0;
}
-#endif /* HN_LRO_HIWAT */
static int
hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
@@ -1686,7 +1788,7 @@ hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
struct hn_softc *sc = arg1;
int chimney_size, error;
- chimney_size = sc->hn_tx_chimney_size;
+ chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size;
error = sysctl_handle_int(oidp, &chimney_size, 0, req);
if (error || req->newptr == NULL)
return error;
@@ -1694,8 +1796,138 @@ hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS)
if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0)
return EINVAL;
- if (sc->hn_tx_chimney_size != chimney_size)
- sc->hn_tx_chimney_size = chimney_size;
+ hn_set_tx_chimney_size(sc, chimney_size);
+ return 0;
+}
+
+#if __FreeBSD_version < 1100095
+static int
+hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int ofs = arg2, i, error;
+ struct hn_rx_ring *rxr;
+ uint64_t stat;
+
+ stat = 0;
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ rxr = &sc->hn_rx_ring[i];
+ stat += *((int *)((uint8_t *)rxr + ofs));
+ }
+
+ error = sysctl_handle_64(oidp, &stat, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+
+ /* Zero out this stat. */
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ rxr = &sc->hn_rx_ring[i];
+ *((int *)((uint8_t *)rxr + ofs)) = 0;
+ }
+ return 0;
+}
+#else
+static int
+hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int ofs = arg2, i, error;
+ struct hn_rx_ring *rxr;
+ uint64_t stat;
+
+ stat = 0;
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ rxr = &sc->hn_rx_ring[i];
+ stat += *((uint64_t *)((uint8_t *)rxr + ofs));
+ }
+
+ error = sysctl_handle_64(oidp, &stat, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+
+ /* Zero out this stat. */
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ rxr = &sc->hn_rx_ring[i];
+ *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
+ }
+ return 0;
+}
+
+#endif
+
+static int
+hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int ofs = arg2, i, error;
+ struct hn_rx_ring *rxr;
+ u_long stat;
+
+ stat = 0;
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ rxr = &sc->hn_rx_ring[i];
+ stat += *((u_long *)((uint8_t *)rxr + ofs));
+ }
+
+ error = sysctl_handle_long(oidp, &stat, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+
+ /* Zero out this stat. */
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ rxr = &sc->hn_rx_ring[i];
+ *((u_long *)((uint8_t *)rxr + ofs)) = 0;
+ }
+ return 0;
+}
+
+static int
+hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int ofs = arg2, i, error;
+ struct hn_tx_ring *txr;
+ u_long stat;
+
+ stat = 0;
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ txr = &sc->hn_tx_ring[i];
+ stat += *((u_long *)((uint8_t *)txr + ofs));
+ }
+
+ error = sysctl_handle_long(oidp, &stat, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+
+ /* Zero out this stat. */
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ txr = &sc->hn_tx_ring[i];
+ *((u_long *)((uint8_t *)txr + ofs)) = 0;
+ }
+ return 0;
+}
+
+static int
+hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int ofs = arg2, i, error, conf;
+ struct hn_tx_ring *txr;
+
+ txr = &sc->hn_tx_ring[0];
+ conf = *((int *)((uint8_t *)txr + ofs));
+
+ error = sysctl_handle_int(oidp, &conf, 0, req);
+ if (error || req->newptr == NULL)
+ return error;
+
+ NV_LOCK(sc);
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ txr = &sc->hn_tx_ring[i];
+ *((int *)((uint8_t *)txr + ofs)) = conf;
+ }
+ NV_UNLOCK(sc);
+
return 0;
}
@@ -1786,17 +2018,191 @@ hn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
*paddr = segs->ds_addr;
}
+static void
+hn_create_rx_data(struct hn_softc *sc)
+{
+ struct sysctl_oid_list *child;
+ struct sysctl_ctx_list *ctx;
+ device_t dev = sc->hn_dev;
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+ int lroent_cnt;
+#endif
+#endif
+ int i;
+
+ sc->hn_rx_ring_cnt = 1; /* TODO: vRSS */
+ sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
+ M_NETVSC, M_WAITOK | M_ZERO);
+
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+ lroent_cnt = hn_lro_entry_count;
+ if (lroent_cnt < TCP_LRO_ENTRIES)
+ lroent_cnt = TCP_LRO_ENTRIES;
+ device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
+#endif
+#endif /* INET || INET6 */
+
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+ if (hn_trust_hosttcp)
+ rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
+ if (hn_trust_hostudp)
+ rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
+ if (hn_trust_hostip)
+ rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
+
+ /*
+ * Initialize LRO.
+ */
+#if defined(INET) || defined(INET6)
+#if __FreeBSD_version >= 1100095
+ tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, 0);
+#else
+ tcp_lro_init(&rxr->hn_lro);
+ rxr->hn_lro.ifp = sc->hn_ifp;
+#endif
+#if __FreeBSD_version >= 1100099
+ rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
+ rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
+#endif
+#endif /* INET || INET6 */
+ }
+
+ ctx = device_get_sysctl_ctx(dev);
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
+ CTLTYPE_U64 | CTLFLAG_RW, sc,
+ __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
+#if __FreeBSD_version < 1100095
+ hn_rx_stat_int_sysctl,
+#else
+ hn_rx_stat_u64_sysctl,
+#endif
+ "LU", "LRO queued");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
+ CTLTYPE_U64 | CTLFLAG_RW, sc,
+ __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
+#if __FreeBSD_version < 1100095
+ hn_rx_stat_int_sysctl,
+#else
+ hn_rx_stat_u64_sysctl,
+#endif
+ "LU", "LRO flushed");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_rx_ring, hn_lro_tried),
+ hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
+#if __FreeBSD_version >= 1100099
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
+ CTLTYPE_UINT | CTLFLAG_RW, sc, 0, hn_lro_lenlim_sysctl, "IU",
+ "Max # of data bytes to be aggregated by LRO");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
+ CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_ackcnt_sysctl, "I",
+ "Max # of ACKs to be aggregated by LRO");
+#endif
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
+ CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_TCP,
+ hn_trust_hcsum_sysctl, "I",
+ "Trust tcp segement verification on host side, "
+ "when csum info is missing");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
+ CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_UDP,
+ hn_trust_hcsum_sysctl, "I",
+ "Trust udp datagram verification on host side, "
+ "when csum info is missing");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
+ CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_IP,
+ hn_trust_hcsum_sysctl, "I",
+ "Trust ip packet verification on host side, "
+ "when csum info is missing");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_rx_ring, hn_csum_ip),
+ hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_rx_ring, hn_csum_tcp),
+ hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_rx_ring, hn_csum_udp),
+ hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_rx_ring, hn_csum_trusted),
+ hn_rx_stat_ulong_sysctl, "LU",
+ "# of packets that we trust host's csum verification");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_rx_ring, hn_small_pkts),
+ hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
+}
+
+static void
+hn_destroy_rx_data(struct hn_softc *sc)
+{
+#if defined(INET) || defined(INET6)
+ int i;
+#endif
+
+ if (sc->hn_rx_ring_cnt == 0)
+ return;
+
+#if defined(INET) || defined(INET6)
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+ tcp_lro_free(&sc->hn_rx_ring[i].hn_lro);
+#endif
+ free(sc->hn_rx_ring, M_NETVSC);
+ sc->hn_rx_ring = NULL;
+
+ sc->hn_rx_ring_cnt = 0;
+}
+
static int
-hn_create_tx_ring(struct hn_softc *sc)
+hn_create_tx_ring(struct hn_softc *sc, int id)
{
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
bus_dma_tag_t parent_dtag;
int error, i;
- sc->hn_txdesc_cnt = HN_TX_DESC_CNT;
- sc->hn_txdesc = malloc(sizeof(struct hn_txdesc) * sc->hn_txdesc_cnt,
+ txr->hn_sc = sc;
+
+#ifndef HN_USE_TXDESC_BUFRING
+ mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
+#endif
+ mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
+
+ txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
+ txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
M_NETVSC, M_WAITOK | M_ZERO);
- SLIST_INIT(&sc->hn_txlist);
- mtx_init(&sc->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
+#ifndef HN_USE_TXDESC_BUFRING
+ SLIST_INIT(&txr->hn_txlist);
+#else
+ txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC,
+ M_WAITOK, &txr->hn_tx_lock);
+#endif
+
+ txr->hn_tx_taskq = sc->hn_tx_taskq;
+ TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
+ TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
+
+ txr->hn_direct_tx_size = hn_direct_tx_size;
+ if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
+ txr->hn_csum_assist = HN_CSUM_ASSIST;
+ else
+ txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8;
+
+ /*
+ * Always schedule transmission instead of trying to do direct
+ * transmission. This one gives the best performance so far.
+ */
+ txr->hn_sched_tx = 1;
+
+ txr->hn_txeof = hn_start_txeof; /* TODO: if_transmit */
parent_dtag = bus_get_dma_tag(sc->hn_dev);
@@ -1813,7 +2219,7 @@ hn_create_tx_ring(struct hn_softc *sc)
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
- &sc->hn_tx_rndis_dtag);
+ &txr->hn_tx_rndis_dtag);
if (error) {
device_printf(sc->hn_dev, "failed to create rndis dmatag\n");
return error;
@@ -1832,21 +2238,21 @@ hn_create_tx_ring(struct hn_softc *sc)
0, /* flags */
NULL, /* lockfunc */
NULL, /* lockfuncarg */
- &sc->hn_tx_data_dtag);
+ &txr->hn_tx_data_dtag);
if (error) {
device_printf(sc->hn_dev, "failed to create data dmatag\n");
return error;
}
- for (i = 0; i < sc->hn_txdesc_cnt; ++i) {
- struct hn_txdesc *txd = &sc->hn_txdesc[i];
+ for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
+ struct hn_txdesc *txd = &txr->hn_txdesc[i];
- txd->sc = sc;
+ txd->txr = txr;
/*
* Allocate and load RNDIS messages.
*/
- error = bus_dmamem_alloc(sc->hn_tx_rndis_dtag,
+ error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
(void **)&txd->rndis_msg,
BUS_DMA_WAITOK | BUS_DMA_COHERENT,
&txd->rndis_msg_dmap);
@@ -1856,7 +2262,7 @@ hn_create_tx_ring(struct hn_softc *sc)
return error;
}
- error = bus_dmamap_load(sc->hn_tx_rndis_dtag,
+ error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
txd->rndis_msg_dmap,
txd->rndis_msg, HN_RNDIS_MSG_LEN,
hn_dma_map_paddr, &txd->rndis_msg_paddr,
@@ -1864,59 +2270,277 @@ hn_create_tx_ring(struct hn_softc *sc)
if (error) {
device_printf(sc->hn_dev,
"failed to load rndis_msg, %d\n", i);
- bus_dmamem_free(sc->hn_tx_rndis_dtag,
+ bus_dmamem_free(txr->hn_tx_rndis_dtag,
txd->rndis_msg, txd->rndis_msg_dmap);
return error;
}
/* DMA map for TX data. */
- error = bus_dmamap_create(sc->hn_tx_data_dtag, 0,
+ error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
&txd->data_dmap);
if (error) {
device_printf(sc->hn_dev,
"failed to allocate tx data dmamap\n");
- bus_dmamap_unload(sc->hn_tx_rndis_dtag,
+ bus_dmamap_unload(txr->hn_tx_rndis_dtag,
txd->rndis_msg_dmap);
- bus_dmamem_free(sc->hn_tx_rndis_dtag,
+ bus_dmamem_free(txr->hn_tx_rndis_dtag,
txd->rndis_msg, txd->rndis_msg_dmap);
return error;
}
/* All set, put it to list */
txd->flags |= HN_TXD_FLAG_ONLIST;
- SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link);
+#ifndef HN_USE_TXDESC_BUFRING
+ SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
+#else
+ buf_ring_enqueue(txr->hn_txdesc_br, txd);
+#endif
+ }
+ txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
+
+ if (sc->hn_tx_sysctl_tree != NULL) {
+ struct sysctl_oid_list *child;
+ struct sysctl_ctx_list *ctx;
+ char name[16];
+
+ /*
+ * Create per TX ring sysctl tree:
+ * dev.hn.UNIT.tx.RINGID
+ */
+ ctx = device_get_sysctl_ctx(sc->hn_dev);
+ child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
+
+ snprintf(name, sizeof(name), "%d", id);
+ txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
+ name, CTLFLAG_RD, 0, "");
+
+ if (txr->hn_tx_sysctl_tree != NULL) {
+ child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
+
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
+ CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
+ "# of available TX descs");
+ }
}
- sc->hn_txdesc_avail = sc->hn_txdesc_cnt;
return 0;
}
static void
-hn_destroy_tx_ring(struct hn_softc *sc)
+hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
+{
+ struct hn_tx_ring *txr = txd->txr;
+
+ KASSERT(txd->m == NULL, ("still has mbuf installed"));
+ KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
+
+ bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap);
+ bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg,
+ txd->rndis_msg_dmap);
+ bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
+}
+
+static void
+hn_destroy_tx_ring(struct hn_tx_ring *txr)
{
struct hn_txdesc *txd;
- while ((txd = SLIST_FIRST(&sc->hn_txlist)) != NULL) {
- KASSERT(txd->m == NULL, ("still has mbuf installed"));
- KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
- ("still dma mapped"));
- SLIST_REMOVE_HEAD(&sc->hn_txlist, link);
+ if (txr->hn_txdesc == NULL)
+ return;
+
+#ifndef HN_USE_TXDESC_BUFRING
+ while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
+ SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
+ hn_txdesc_dmamap_destroy(txd);
+ }
+#else
+ while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
+ hn_txdesc_dmamap_destroy(txd);
+#endif
+
+ if (txr->hn_tx_data_dtag != NULL)
+ bus_dma_tag_destroy(txr->hn_tx_data_dtag);
+ if (txr->hn_tx_rndis_dtag != NULL)
+ bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
+
+#ifdef HN_USE_TXDESC_BUFRING
+ buf_ring_free(txr->hn_txdesc_br, M_NETVSC);
+#endif
+
+ free(txr->hn_txdesc, M_NETVSC);
+ txr->hn_txdesc = NULL;
+
+#ifndef HN_USE_TXDESC_BUFRING
+ mtx_destroy(&txr->hn_txlist_spin);
+#endif
+ mtx_destroy(&txr->hn_tx_lock);
+}
+
+static int
+hn_create_tx_data(struct hn_softc *sc)
+{
+ struct sysctl_oid_list *child;
+ struct sysctl_ctx_list *ctx;
+ int i;
+
+ sc->hn_tx_ring_cnt = 1; /* TODO: vRSS */
+ sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
+ M_NETVSC, M_WAITOK | M_ZERO);
+
+ ctx = device_get_sysctl_ctx(sc->hn_dev);
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
+
+ /* Create dev.hn.UNIT.tx sysctl tree */
+ sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
+ CTLFLAG_RD, 0, "");
+
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ int error;
+
+ error = hn_create_tx_ring(sc, i);
+ if (error)
+ return error;
+ }
+
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_tx_ring, hn_no_txdescs),
+ hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_tx_ring, hn_send_failed),
+ hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_tx_ring, hn_txdma_failed),
+ hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_tx_ring, hn_tx_collapsed),
+ hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
+ CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ __offsetof(struct hn_tx_ring, hn_tx_chimney),
+ hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
+ CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
+ "# of total TX descs");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
+ CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
+ "Chimney send packet size upper boundary");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
+ CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
+ "I", "Chimney send packet size limit");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
+ CTLTYPE_INT | CTLFLAG_RW, sc,
+ __offsetof(struct hn_tx_ring, hn_direct_tx_size),
+ hn_tx_conf_int_sysctl, "I",
+ "Size of the packet for direct transmission");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
+ CTLTYPE_INT | CTLFLAG_RW, sc,
+ __offsetof(struct hn_tx_ring, hn_sched_tx),
+ hn_tx_conf_int_sysctl, "I",
+ "Always schedule transmission "
+ "instead of doing direct transmission");
+
+ return 0;
+}
+
+static void
+hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size)
+{
+ int i;
- bus_dmamap_unload(sc->hn_tx_rndis_dtag,
- txd->rndis_msg_dmap);
- bus_dmamem_free(sc->hn_tx_rndis_dtag,
- txd->rndis_msg, txd->rndis_msg_dmap);
+ NV_LOCK(sc);
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+ sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size;
+ NV_UNLOCK(sc);
+}
+
+static void
+hn_destroy_tx_data(struct hn_softc *sc)
+{
+ int i;
+
+ if (sc->hn_tx_ring_cnt == 0)
+ return;
+
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+ hn_destroy_tx_ring(&sc->hn_tx_ring[i]);
+
+ free(sc->hn_tx_ring, M_NETVSC);
+ sc->hn_tx_ring = NULL;
+
+ sc->hn_tx_ring_cnt = 0;
+}
+
+static void
+hn_start_taskfunc(void *xtxr, int pending __unused)
+{
+ struct hn_tx_ring *txr = xtxr;
+
+ mtx_lock(&txr->hn_tx_lock);
+ hn_start_locked(txr, 0);
+ mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
+{
+ struct hn_tx_ring *txr = xtxr;
+
+ mtx_lock(&txr->hn_tx_lock);
+ atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
+ hn_start_locked(txr, 0);
+ mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_stop_tx_tasks(struct hn_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+
+ taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
+ taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
+ }
+}
- bus_dmamap_destroy(sc->hn_tx_data_dtag, txd->data_dmap);
+static void
+hn_tx_taskq_create(void *arg __unused)
+{
+ if (!hn_share_tx_taskq)
+ return;
+
+ hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
+ taskqueue_thread_enqueue, &hn_tx_taskq);
+ taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
+ if (hn_bind_tx_taskq >= 0) {
+ int cpu = hn_bind_tx_taskq;
+ struct task cpuset_task;
+ cpuset_t cpu_set;
+
+ if (cpu > mp_ncpus - 1)
+ cpu = mp_ncpus - 1;
+ CPU_SETOF(cpu, &cpu_set);
+ TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, &cpu_set);
+ taskqueue_enqueue(hn_tx_taskq, &cpuset_task);
+ taskqueue_drain(hn_tx_taskq, &cpuset_task);
}
+}
+SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST,
+ hn_tx_taskq_create, NULL);
- if (sc->hn_tx_data_dtag != NULL)
- bus_dma_tag_destroy(sc->hn_tx_data_dtag);
- if (sc->hn_tx_rndis_dtag != NULL)
- bus_dma_tag_destroy(sc->hn_tx_rndis_dtag);
- free(sc->hn_txdesc, M_NETVSC);
- mtx_destroy(&sc->hn_txlist_spin);
+static void
+hn_tx_taskq_destroy(void *arg __unused)
+{
+ if (hn_tx_taskq != NULL)
+ taskqueue_free(hn_tx_taskq);
}
+SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST,
+ hn_tx_taskq_destroy, NULL);
static device_method_t netvsc_methods[] = {
/* Device interface */
diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.c b/sys/dev/hyperv/netvsc/hv_rndis_filter.c
index 29d8c8f..31ddbc0 100644
--- a/sys/dev/hyperv/netvsc/hv_rndis_filter.c
+++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.c
@@ -136,12 +136,9 @@ hv_get_rndis_device(void)
{
rndis_device *device;
- device = malloc(sizeof(rndis_device), M_NETVSC, M_NOWAIT | M_ZERO);
- if (device == NULL) {
- return (NULL);
- }
+ device = malloc(sizeof(rndis_device), M_NETVSC, M_WAITOK | M_ZERO);
- mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_SPIN | MTX_RECURSE);
+ mtx_init(&device->req_lock, "HV-FRL", NULL, MTX_DEF);
/* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */
STAILQ_INIT(&device->myrequest_list);
@@ -172,10 +169,7 @@ hv_rndis_request(rndis_device *device, uint32_t message_type,
rndis_msg *rndis_mesg;
rndis_set_request *set;
- request = malloc(sizeof(rndis_request), M_NETVSC, M_NOWAIT | M_ZERO);
- if (request == NULL) {
- return (NULL);
- }
+ request = malloc(sizeof(rndis_request), M_NETVSC, M_WAITOK | M_ZERO);
sema_init(&request->wait_sema, 0, "rndis sema");
@@ -194,9 +188,9 @@ hv_rndis_request(rndis_device *device, uint32_t message_type,
set->request_id += 1;
/* Add to the request list */
- mtx_lock_spin(&device->req_lock);
+ mtx_lock(&device->req_lock);
STAILQ_INSERT_TAIL(&device->myrequest_list, request, mylist_entry);
- mtx_unlock_spin(&device->req_lock);
+ mtx_unlock(&device->req_lock);
return (request);
}
@@ -207,14 +201,14 @@ hv_rndis_request(rndis_device *device, uint32_t message_type,
static inline void
hv_put_rndis_request(rndis_device *device, rndis_request *request)
{
- mtx_lock_spin(&device->req_lock);
+ mtx_lock(&device->req_lock);
/* Fixme: Has O(n) performance */
/*
* XXXKYS: Use Doubly linked lists.
*/
STAILQ_REMOVE(&device->myrequest_list, request, rndis_request_,
mylist_entry);
- mtx_unlock_spin(&device->req_lock);
+ mtx_unlock(&device->req_lock);
sema_destroy(&request->wait_sema);
free(request, M_NETVSC);
@@ -271,7 +265,7 @@ hv_rf_receive_response(rndis_device *device, rndis_msg *response)
rndis_request *next_request;
boolean_t found = FALSE;
- mtx_lock_spin(&device->req_lock);
+ mtx_lock(&device->req_lock);
request = STAILQ_FIRST(&device->myrequest_list);
while (request != NULL) {
/*
@@ -286,7 +280,7 @@ hv_rf_receive_response(rndis_device *device, rndis_msg *response)
next_request = STAILQ_NEXT(request, mylist_entry);
request = next_request;
}
- mtx_unlock_spin(&device->req_lock);
+ mtx_unlock(&device->req_lock);
if (found) {
if (response->msg_len <= sizeof(rndis_msg)) {
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index a780f9e..27fb3fd 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -856,8 +856,8 @@ hv_storvsc_rescan_target(struct storvsc_softc *sc)
if (xpt_create_path(&ccb->ccb_h.path, NULL, pathid, targetid,
CAM_LUN_WILDCARD) != CAM_REQ_CMP) {
- printf("unable to create path for rescan, pathid: %d,"
- "targetid: %d\n", pathid, targetid);
+ printf("unable to create path for rescan, pathid: %u,"
+ "targetid: %u\n", pathid, targetid);
xpt_free_ccb(ccb);
return;
}
@@ -1561,13 +1561,12 @@ static void
storvsc_destroy_bounce_buffer(struct sglist *sgl)
{
struct hv_sgl_node *sgl_node = NULL;
-
- sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
- LIST_REMOVE(sgl_node, link);
- if (NULL == sgl_node) {
+ if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) {
printf("storvsc error: not enough in use sgl\n");
return;
}
+ sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list);
+ LIST_REMOVE(sgl_node, link);
sgl_node->sgl_data = sgl;
LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link);
}
@@ -1593,12 +1592,12 @@ storvsc_create_bounce_buffer(uint16_t seg_count, int write)
struct hv_sgl_node *sgl_node = NULL;
/* get struct sglist from free_sgl_list */
- sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
- LIST_REMOVE(sgl_node, link);
- if (NULL == sgl_node) {
+ if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) {
printf("storvsc error: not enough free sgl\n");
return NULL;
}
+ sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list);
+ LIST_REMOVE(sgl_node, link);
bounce_sgl = sgl_node->sgl_data;
LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link);
diff --git a/sys/dev/hyperv/utilities/hv_heartbeat.c b/sys/dev/hyperv/utilities/hv_heartbeat.c
new file mode 100644
index 0000000..c1b6da5
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_heartbeat.c
@@ -0,0 +1,129 @@
+/*-
+ * Copyright (c) 2014 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/timetc.h>
+#include <sys/syscallsubr.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_util.h"
+
+/* Heartbeat Service */
+static hv_guid service_guid = { .data =
+ {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
+ 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d} };
+
+/**
+ * Process heartbeat message
+ */
+static void
+hv_heartbeat_cb(void *context)
+{
+ uint8_t* buf;
+ hv_vmbus_channel* channel;
+ uint32_t recvlen;
+ uint64_t requestid;
+ int ret;
+
+ struct hv_vmbus_heartbeat_msg_data* heartbeat_msg;
+ struct hv_vmbus_icmsg_hdr* icmsghdrp;
+ hv_util_sc *softc;
+
+ softc = (hv_util_sc*)context;
+ buf = softc->receive_buffer;;
+ channel = softc->hv_dev->channel;
+
+ ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
+ &requestid);
+
+ if ((ret == 0) && recvlen > 0) {
+
+ icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+ &buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+ if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+ hv_negotiate_version(icmsghdrp, NULL, buf);
+
+ } else {
+ heartbeat_msg =
+ (struct hv_vmbus_heartbeat_msg_data *)
+ &buf[sizeof(struct hv_vmbus_pipe_hdr) +
+ sizeof(struct hv_vmbus_icmsg_hdr)];
+
+ heartbeat_msg->seq_num += 1;
+ }
+
+ icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
+ HV_ICMSGHDRFLAG_RESPONSE;
+
+ hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ }
+}
+
+static int
+hv_heartbeat_probe(device_t dev)
+{
+ const char *p = vmbus_get_type(dev);
+ if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
+ device_set_desc(dev, "Hyper-V Heartbeat Service");
+ return BUS_PROBE_DEFAULT;
+ }
+
+ return ENXIO;
+}
+
+static int
+hv_heartbeat_attach(device_t dev)
+{
+ hv_util_sc *softc = (hv_util_sc*)device_get_softc(dev);
+
+ softc->callback = hv_heartbeat_cb;
+
+ return hv_util_attach(dev);
+}
+
+static device_method_t heartbeat_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, hv_heartbeat_probe),
+ DEVMETHOD(device_attach, hv_heartbeat_attach),
+ DEVMETHOD(device_detach, hv_util_detach),
+ { 0, 0 }
+};
+
+static driver_t heartbeat_driver = { "hvheartbeat", heartbeat_methods, sizeof(hv_util_sc)};
+
+static devclass_t heartbeat_devclass;
+
+DRIVER_MODULE(hv_heartbeat, vmbus, heartbeat_driver, heartbeat_devclass, NULL, NULL);
+MODULE_VERSION(hv_heartbeat, 1);
+MODULE_DEPEND(hv_heartbeat, vmbus, 1, 1, 1);
diff --git a/sys/dev/hyperv/utilities/hv_kvp.c b/sys/dev/hyperv/utilities/hv_kvp.c
index 58d565c4..8517918 100644
--- a/sys/dev/hyperv/utilities/hv_kvp.c
+++ b/sys/dev/hyperv/utilities/hv_kvp.c
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
#include <dev/hyperv/include/hyperv.h>
#include <dev/hyperv/netvsc/hv_net_vsc.h>
+#include "hv_util.h"
#include "unicode.h"
#include "hv_kvp.h"
@@ -74,8 +75,6 @@ __FBSDID("$FreeBSD$");
/* hv_kvp debug control */
static int hv_kvp_log = 0;
-SYSCTL_INT(_dev, OID_AUTO, hv_kvp_log, CTLFLAG_RW, &hv_kvp_log, 0,
- "hv_kvp log");
#define hv_kvp_log_error(...) do { \
if (hv_kvp_log > 0) \
@@ -87,6 +86,10 @@ SYSCTL_INT(_dev, OID_AUTO, hv_kvp_log, CTLFLAG_RW, &hv_kvp_log, 0,
log(LOG_INFO, "hv_kvp: " __VA_ARGS__); \
} while (0)
+static hv_guid service_guid = { .data =
+ {0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
+ 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6} };
+
/* character device prototypes */
static d_open_t hv_kvp_dev_open;
static d_close_t hv_kvp_dev_close;
@@ -94,12 +97,6 @@ static d_read_t hv_kvp_dev_daemon_read;
static d_write_t hv_kvp_dev_daemon_write;
static d_poll_t hv_kvp_dev_daemon_poll;
-/* hv_kvp prototypes */
-static int hv_kvp_req_in_progress(void);
-static void hv_kvp_transaction_init(uint32_t, hv_vmbus_channel *, uint64_t, uint8_t *);
-static void hv_kvp_send_msg_to_daemon(void);
-static void hv_kvp_process_request(void *context);
-
/* hv_kvp character device structure */
static struct cdevsw hv_kvp_cdevsw =
{
@@ -111,70 +108,67 @@ static struct cdevsw hv_kvp_cdevsw =
.d_poll = hv_kvp_dev_daemon_poll,
.d_name = "hv_kvp_dev",
};
-static struct cdev *hv_kvp_dev;
-static struct hv_kvp_msg *hv_kvp_dev_buf;
-struct proc *daemon_task;
-static struct selinfo hv_kvp_selinfo;
/*
* Global state to track and synchronize multiple
* KVP transaction requests from the host.
*/
-static struct {
-
- /* Pre-allocated work item for queue */
- hv_work_item work_item;
+typedef struct hv_kvp_sc {
+ struct hv_util_sc util_sc;
- /* Unless specified the pending mutex should be
+ /* Unless specified the pending mutex should be
* used to alter the values of the following paramters:
* 1. req_in_progress
* 2. req_timed_out
- * 3. pending_reqs.
*/
- struct mtx pending_mutex;
-
+ struct mtx pending_mutex;
+
+ struct task task;
+
/* To track if transaction is active or not */
- boolean_t req_in_progress;
+ boolean_t req_in_progress;
/* Tracks if daemon did not reply back in time */
- boolean_t req_timed_out;
+ boolean_t req_timed_out;
/* Tracks if daemon is serving a request currently */
boolean_t daemon_busy;
- /* Count of KVP requests from Hyper-V. */
- uint64_t pending_reqs;
-
-
- /* Length of host message */
- uint32_t host_msg_len;
- /* Pointer to channel */
- hv_vmbus_channel *channelp;
+ /* Length of host message */
+ uint32_t host_msg_len;
/* Host message id */
- uint64_t host_msg_id;
-
+ uint64_t host_msg_id;
+
/* Current kvp message from the host */
- struct hv_kvp_msg *host_kvp_msg;
-
+ struct hv_kvp_msg *host_kvp_msg;
+
/* Current kvp message for daemon */
- struct hv_kvp_msg daemon_kvp_msg;
-
+ struct hv_kvp_msg daemon_kvp_msg;
+
/* Rcv buffer for communicating with the host*/
- uint8_t *rcv_buf;
-
+ uint8_t *rcv_buf;
+
/* Device semaphore to control communication */
- struct sema dev_sema;
-
+ struct sema dev_sema;
+
/* Indicates if daemon registered with driver */
- boolean_t register_done;
-
+ boolean_t register_done;
+
/* Character device status */
- boolean_t dev_accessed;
-} kvp_globals;
+ boolean_t dev_accessed;
+
+ struct cdev *hv_kvp_dev;
+
+ struct proc *daemon_task;
-/* global vars */
-MALLOC_DECLARE(M_HV_KVP_DEV_BUF);
-MALLOC_DEFINE(M_HV_KVP_DEV_BUF, "hv_kvp_dev buffer", "buffer for hv_kvp_dev module");
+ struct selinfo hv_kvp_selinfo;
+} hv_kvp_sc;
+
+/* hv_kvp prototypes */
+static int hv_kvp_req_in_progress(hv_kvp_sc *sc);
+static void hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t, uint64_t, uint8_t *);
+static void hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc);
+static void hv_kvp_process_request(void *context, int pending);
/*
* hv_kvp low level functions
@@ -184,10 +178,10 @@ MALLOC_DEFINE(M_HV_KVP_DEV_BUF, "hv_kvp_dev buffer", "buffer for hv_kvp_dev modu
* Check if kvp transaction is in progres
*/
static int
-hv_kvp_req_in_progress(void)
+hv_kvp_req_in_progress(hv_kvp_sc *sc)
{
- return (kvp_globals.req_in_progress);
+ return (sc->req_in_progress);
}
@@ -195,18 +189,17 @@ hv_kvp_req_in_progress(void)
* This routine is called whenever a message is received from the host
*/
static void
-hv_kvp_transaction_init(uint32_t rcv_len, hv_vmbus_channel *rcv_channel,
+hv_kvp_transaction_init(hv_kvp_sc *sc, uint32_t rcv_len,
uint64_t request_id, uint8_t *rcv_buf)
{
-
+
/* Store all the relevant message details in the global structure */
/* Do not need to use mutex for req_in_progress here */
- kvp_globals.req_in_progress = true;
- kvp_globals.host_msg_len = rcv_len;
- kvp_globals.channelp = rcv_channel;
- kvp_globals.host_msg_id = request_id;
- kvp_globals.rcv_buf = rcv_buf;
- kvp_globals.host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[
+ sc->req_in_progress = true;
+ sc->host_msg_len = rcv_len;
+ sc->host_msg_id = request_id;
+ sc->rcv_buf = rcv_buf;
+ sc->host_kvp_msg = (struct hv_kvp_msg *)&rcv_buf[
sizeof(struct hv_vmbus_pipe_hdr) +
sizeof(struct hv_vmbus_icmsg_hdr)];
}
@@ -258,12 +251,12 @@ hv_kvp_negotiate_version(struct hv_vmbus_icmsg_hdr *icmsghdrp,
* Convert ip related info in umsg from utf8 to utf16 and store in hmsg
*/
static int
-hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg,
+hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg,
struct hv_kvp_ip_msg *host_ip_msg)
{
int err_ip, err_subnet, err_gway, err_dns, err_adap;
int UNUSED_FLAG = 1;
-
+
utf8_to_utf16((uint16_t *)host_ip_msg->kvp_ip_val.ip_addr,
MAX_IP_ADDR_SIZE,
(char *)umsg->body.kvp_ip_val.ip_addr,
@@ -294,7 +287,7 @@ hv_kvp_convert_utf8_ipinfo_to_utf16(struct hv_kvp_msg *umsg,
strlen((char *)umsg->body.kvp_ip_val.adapter_id),
UNUSED_FLAG,
&err_adap);
-
+
host_ip_msg->kvp_ip_val.dhcp_enabled = umsg->body.kvp_ip_val.dhcp_enabled;
host_ip_msg->kvp_ip_val.addr_family = umsg->body.kvp_ip_val.addr_family;
@@ -389,7 +382,7 @@ hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
MAX_IP_ADDR_SIZE,
UNUSED_FLAG,
&err_subnet);
-
+
utf16_to_utf8((char *)umsg->body.kvp_ip_val.gate_way, MAX_GATEWAY_SIZE,
(uint16_t *)host_ip_msg->kvp_ip_val.gate_way,
MAX_GATEWAY_SIZE,
@@ -411,16 +404,13 @@ hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
* Ensure utf16_utf8 takes care of the additional string terminating char!!
*/
static void
-hv_kvp_convert_hostmsg_to_usermsg(void)
+hv_kvp_convert_hostmsg_to_usermsg(struct hv_kvp_msg *hmsg, struct hv_kvp_msg *umsg)
{
int utf_err = 0;
uint32_t value_type;
- struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)
- kvp_globals.host_kvp_msg;
-
- struct hv_kvp_msg *hmsg = kvp_globals.host_kvp_msg;
- struct hv_kvp_msg *umsg = &kvp_globals.daemon_kvp_msg;
+ struct hv_kvp_ip_msg *host_ip_msg;
+ host_ip_msg = (struct hv_kvp_ip_msg*)hmsg;
memset(umsg, 0, sizeof(struct hv_kvp_msg));
umsg->kvp_hdr.operation = hmsg->kvp_hdr.operation;
@@ -525,14 +515,12 @@ hv_kvp_convert_hostmsg_to_usermsg(void)
* Prepare a host kvp msg based on user kvp msg (utf8 to utf16)
*/
static int
-hv_kvp_convert_usermsg_to_hostmsg(void)
+hv_kvp_convert_usermsg_to_hostmsg(struct hv_kvp_msg *umsg, struct hv_kvp_msg *hmsg)
{
int hkey_len = 0, hvalue_len = 0, utf_err = 0;
struct hv_kvp_exchg_msg_value *host_exchg_data;
char *key_name, *value;
- struct hv_kvp_msg *umsg = &kvp_globals.daemon_kvp_msg;
- struct hv_kvp_msg *hmsg = kvp_globals.host_kvp_msg;
struct hv_kvp_ip_msg *host_ip_msg = (struct hv_kvp_ip_msg *)hmsg;
switch (hmsg->kvp_hdr.operation) {
@@ -564,7 +552,7 @@ hv_kvp_convert_usermsg_to_hostmsg(void)
if ((hkey_len < 0) || (hvalue_len < 0))
return (HV_KVP_E_FAIL);
-
+
return (KVP_SUCCESS);
case HV_KVP_OP_GET:
@@ -580,9 +568,9 @@ hv_kvp_convert_usermsg_to_hostmsg(void)
/* Use values by string */
host_exchg_data->value_type = HV_REG_SZ;
- if ((hkey_len < 0) || (hvalue_len < 0))
+ if ((hkey_len < 0) || (hvalue_len < 0))
return (HV_KVP_E_FAIL);
-
+
return (KVP_SUCCESS);
default:
@@ -595,22 +583,22 @@ hv_kvp_convert_usermsg_to_hostmsg(void)
* Send the response back to the host.
*/
static void
-hv_kvp_respond_host(int error)
+hv_kvp_respond_host(hv_kvp_sc *sc, int error)
{
struct hv_vmbus_icmsg_hdr *hv_icmsg_hdrp;
hv_icmsg_hdrp = (struct hv_vmbus_icmsg_hdr *)
- &kvp_globals.rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)];
+ &sc->rcv_buf[sizeof(struct hv_vmbus_pipe_hdr)];
if (error)
error = HV_KVP_E_FAIL;
hv_icmsg_hdrp->status = error;
hv_icmsg_hdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE;
-
- error = hv_vmbus_channel_send_packet(kvp_globals.channelp,
- kvp_globals.rcv_buf,
- kvp_globals.host_msg_len, kvp_globals.host_msg_id,
+
+ error = hv_vmbus_channel_send_packet(sc->util_sc.hv_dev->channel,
+ sc->rcv_buf,
+ sc->host_msg_len, sc->host_msg_id,
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
if (error)
@@ -624,16 +612,19 @@ hv_kvp_respond_host(int error)
* and the host
*/
static void
-hv_kvp_send_msg_to_daemon(void)
+hv_kvp_send_msg_to_daemon(hv_kvp_sc *sc)
{
+ struct hv_kvp_msg *hmsg = sc->host_kvp_msg;
+ struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg;
+
/* Prepare kvp_msg to be sent to user */
- hv_kvp_convert_hostmsg_to_usermsg();
+ hv_kvp_convert_hostmsg_to_usermsg(hmsg, umsg);
/* Send the msg to user via function deamon_read - setting sema */
- sema_post(&kvp_globals.dev_sema);
+ sema_post(&sc->dev_sema);
/* We should wake up the daemon, in case it's doing poll() */
- selwakeup(&hv_kvp_selinfo);
+ selwakeup(&sc->hv_kvp_selinfo);
}
@@ -642,98 +633,83 @@ hv_kvp_send_msg_to_daemon(void)
* and interact with daemon
*/
static void
-hv_kvp_process_request(void *context)
+hv_kvp_process_request(void *context, int pending)
{
uint8_t *kvp_buf;
- hv_vmbus_channel *channel = context;
+ hv_vmbus_channel *channel;
uint32_t recvlen = 0;
uint64_t requestid;
struct hv_vmbus_icmsg_hdr *icmsghdrp;
int ret = 0;
- uint64_t pending_cnt = 1;
-
+ hv_kvp_sc *sc;
+
hv_kvp_log_info("%s: entering hv_kvp_process_request\n", __func__);
- kvp_buf = receive_buffer[HV_KVP];
+
+ sc = (hv_kvp_sc*)context;
+ kvp_buf = sc->util_sc.receive_buffer;;
+ channel = sc->util_sc.hv_dev->channel;
+
ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
&recvlen, &requestid);
- /*
- * We start counting only after the daemon registers
- * and therefore there could be requests pending in
- * the VMBus that are not reflected in pending_cnt.
- * Therefore we continue reading as long as either of
- * the below conditions is true.
- */
+ while ((ret == 0) && (recvlen > 0)) {
+
+ icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+ &kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+ hv_kvp_transaction_init(sc, recvlen, requestid, kvp_buf);
+ if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+ hv_kvp_negotiate_version(icmsghdrp, NULL, kvp_buf);
+ hv_kvp_respond_host(sc, ret);
+
+ /*
+ * It is ok to not acquire the mutex before setting
+ * req_in_progress here because negotiation is the
+ * first thing that happens and hence there is no
+ * chance of a race condition.
+ */
+
+ sc->req_in_progress = false;
+ hv_kvp_log_info("%s :version negotiated\n", __func__);
+
+ } else {
+ if (!sc->daemon_busy) {
+
+ hv_kvp_log_info("%s: issuing qury to daemon\n", __func__);
+ mtx_lock(&sc->pending_mutex);
+ sc->req_timed_out = false;
+ sc->daemon_busy = true;
+ mtx_unlock(&sc->pending_mutex);
- while ((pending_cnt>0) || ((ret == 0) && (recvlen > 0))) {
-
- if ((ret == 0) && (recvlen>0)) {
-
- icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
- &kvp_buf[sizeof(struct hv_vmbus_pipe_hdr)];
-
- hv_kvp_transaction_init(recvlen, channel, requestid, kvp_buf);
- if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
- hv_kvp_negotiate_version(icmsghdrp, NULL, kvp_buf);
- hv_kvp_respond_host(ret);
-
- /*
- * It is ok to not acquire the mutex before setting
- * req_in_progress here because negotiation is the
- * first thing that happens and hence there is no
- * chance of a race condition.
- */
-
- kvp_globals.req_in_progress = false;
- hv_kvp_log_info("%s :version negotiated\n", __func__);
-
- } else {
- if (!kvp_globals.daemon_busy) {
-
- hv_kvp_log_info("%s: issuing qury to daemon\n", __func__);
- mtx_lock(&kvp_globals.pending_mutex);
- kvp_globals.req_timed_out = false;
- kvp_globals.daemon_busy = true;
- mtx_unlock(&kvp_globals.pending_mutex);
-
- hv_kvp_send_msg_to_daemon();
- hv_kvp_log_info("%s: waiting for daemon\n", __func__);
- }
-
- /* Wait 5 seconds for daemon to respond back */
- tsleep(&kvp_globals, 0, "kvpworkitem", 5 * hz);
- hv_kvp_log_info("%s: came out of wait\n", __func__);
+ hv_kvp_send_msg_to_daemon(sc);
+ hv_kvp_log_info("%s: waiting for daemon\n", __func__);
}
+
+ /* Wait 5 seconds for daemon to respond back */
+ tsleep(sc, 0, "kvpworkitem", 5 * hz);
+ hv_kvp_log_info("%s: came out of wait\n", __func__);
}
- mtx_lock(&kvp_globals.pending_mutex);
-
+ mtx_lock(&sc->pending_mutex);
+
/* Notice that once req_timed_out is set to true
* it will remain true until the next request is
* sent to the daemon. The response from daemon
- * is forwarded to host only when this flag is
- * false.
+ * is forwarded to host only when this flag is
+ * false.
*/
- kvp_globals.req_timed_out = true;
+ sc->req_timed_out = true;
/*
* Cancel request if so need be.
*/
- if (hv_kvp_req_in_progress()) {
+ if (hv_kvp_req_in_progress(sc)) {
hv_kvp_log_info("%s: request was still active after wait so failing\n", __func__);
- hv_kvp_respond_host(HV_KVP_E_FAIL);
- kvp_globals.req_in_progress = false;
- }
-
- /*
- * Decrement pending request count and
- */
- if (kvp_globals.pending_reqs>0) {
- kvp_globals.pending_reqs = kvp_globals.pending_reqs - 1;
+ hv_kvp_respond_host(sc, HV_KVP_E_FAIL);
+ sc->req_in_progress = false;
}
- pending_cnt = kvp_globals.pending_reqs;
-
- mtx_unlock(&kvp_globals.pending_mutex);
+
+ mtx_unlock(&sc->pending_mutex);
/*
* Try reading next buffer
@@ -741,109 +717,43 @@ hv_kvp_process_request(void *context)
recvlen = 0;
ret = hv_vmbus_channel_recv_packet(channel, kvp_buf, 2 * PAGE_SIZE,
&recvlen, &requestid);
- hv_kvp_log_info("%s: read: context %p, pending_cnt %llu ret =%d, recvlen=%d\n",
- __func__, context, (unsigned long long)pending_cnt, ret, recvlen);
- }
+ hv_kvp_log_info("%s: read: context %p, ret =%d, recvlen=%d\n",
+ __func__, context, ret, recvlen);
+ }
}
/*
* Callback routine that gets called whenever there is a message from host
*/
-void
+static void
hv_kvp_callback(void *context)
{
- uint64_t pending_cnt = 0;
-
- if (kvp_globals.register_done == false) {
-
- kvp_globals.channelp = context;
- } else {
-
- mtx_lock(&kvp_globals.pending_mutex);
- kvp_globals.pending_reqs = kvp_globals.pending_reqs + 1;
- pending_cnt = kvp_globals.pending_reqs;
- mtx_unlock(&kvp_globals.pending_mutex);
- if (pending_cnt == 1) {
- hv_kvp_log_info("%s: Queuing work item\n", __func__);
- hv_queue_work_item(
- service_table[HV_KVP].work_queue,
- hv_kvp_process_request,
- context
- );
- }
- }
-}
-
-
-/*
- * This function is called by the hv_kvp_init -
- * creates character device hv_kvp_dev
- * allocates memory to hv_kvp_dev_buf
- *
- */
-static int
-hv_kvp_dev_init(void)
-{
- int error = 0;
-
- /* initialize semaphore */
- sema_init(&kvp_globals.dev_sema, 0, "hv_kvp device semaphore");
- /* create character device */
- error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
- &hv_kvp_dev,
- &hv_kvp_cdevsw,
- 0,
- UID_ROOT,
- GID_WHEEL,
- 0640,
- "hv_kvp_dev");
-
- if (error != 0)
- return (error);
-
+ hv_kvp_sc *sc = (hv_kvp_sc*)context;
/*
- * Malloc with M_WAITOK flag will never fail.
- */
- hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_HV_KVP_DEV_BUF, M_WAITOK |
- M_ZERO);
-
- return (0);
-}
-
-
-/*
- * This function is called by the hv_kvp_deinit -
- * destroy character device
- */
-static void
-hv_kvp_dev_destroy(void)
-{
-
- if (daemon_task != NULL) {
- PROC_LOCK(daemon_task);
- kern_psignal(daemon_task, SIGKILL);
- PROC_UNLOCK(daemon_task);
+ The first request from host will not be handled until daemon is registered.
+ when callback is triggered without a registered daemon, callback just return.
+ When a new daemon gets regsitered, this callbcak is trigged from _write op.
+ */
+ if (sc->register_done) {
+ hv_kvp_log_info("%s: Queuing work item\n", __func__);
+ taskqueue_enqueue(taskqueue_thread, &sc->task);
}
-
- destroy_dev(hv_kvp_dev);
- free(hv_kvp_dev_buf, M_HV_KVP_DEV_BUF);
- return;
}
-
static int
hv_kvp_dev_open(struct cdev *dev, int oflags, int devtype,
struct thread *td)
{
-
+ hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
+
hv_kvp_log_info("%s: Opened device \"hv_kvp_device\" successfully.\n", __func__);
- if (kvp_globals.dev_accessed)
+ if (sc->dev_accessed)
return (-EBUSY);
-
- daemon_task = curproc;
- kvp_globals.dev_accessed = true;
- kvp_globals.daemon_busy = false;
+
+ sc->daemon_task = curproc;
+ sc->dev_accessed = true;
+ sc->daemon_busy = false;
return (0);
}
@@ -852,10 +762,11 @@ static int
hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __unused,
struct thread *td __unused)
{
+ hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
hv_kvp_log_info("%s: Closing device \"hv_kvp_device\".\n", __func__);
- kvp_globals.dev_accessed = false;
- kvp_globals.register_done = false;
+ sc->dev_accessed = false;
+ sc->register_done = false;
return (0);
}
@@ -865,18 +776,21 @@ hv_kvp_dev_close(struct cdev *dev __unused, int fflag __unused, int devtype __un
* acts as a send to daemon
*/
static int
-hv_kvp_dev_daemon_read(struct cdev *dev __unused, struct uio *uio, int ioflag __unused)
+hv_kvp_dev_daemon_read(struct cdev *dev, struct uio *uio, int ioflag __unused)
{
size_t amt;
int error = 0;
+ struct hv_kvp_msg *hv_kvp_dev_buf;
+ hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
/* Check hv_kvp daemon registration status*/
- if (!kvp_globals.register_done)
+ if (!sc->register_done)
return (KVP_ERROR);
- sema_wait(&kvp_globals.dev_sema);
+ sema_wait(&sc->dev_sema);
- memcpy(hv_kvp_dev_buf, &kvp_globals.daemon_kvp_msg, sizeof(struct hv_kvp_msg));
+ hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK);
+ memcpy(hv_kvp_dev_buf, &sc->daemon_kvp_msg, sizeof(struct hv_kvp_msg));
amt = MIN(uio->uio_resid, uio->uio_offset >= BUFFERSIZE + 1 ? 0 :
BUFFERSIZE + 1 - uio->uio_offset);
@@ -884,6 +798,7 @@ hv_kvp_dev_daemon_read(struct cdev *dev __unused, struct uio *uio, int ioflag __
if ((error = uiomove(hv_kvp_dev_buf, amt, uio)) != 0)
hv_kvp_log_info("%s: hv_kvp uiomove read failed!\n", __func__);
+ free(hv_kvp_dev_buf, M_TEMP);
return (error);
}
@@ -893,29 +808,30 @@ hv_kvp_dev_daemon_read(struct cdev *dev __unused, struct uio *uio, int ioflag __
* acts as a recieve from daemon
*/
static int
-hv_kvp_dev_daemon_write(struct cdev *dev __unused, struct uio *uio, int ioflag __unused)
+hv_kvp_dev_daemon_write(struct cdev *dev, struct uio *uio, int ioflag __unused)
{
size_t amt;
int error = 0;
+ struct hv_kvp_msg *hv_kvp_dev_buf;
+ hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
uio->uio_offset = 0;
+ hv_kvp_dev_buf = malloc(sizeof(*hv_kvp_dev_buf), M_TEMP, M_WAITOK);
amt = MIN(uio->uio_resid, BUFFERSIZE);
error = uiomove(hv_kvp_dev_buf, amt, uio);
- if (error != 0)
+ if (error != 0) {
+ free(hv_kvp_dev_buf, M_TEMP);
return (error);
+ }
+ memcpy(&sc->daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg));
- memcpy(&kvp_globals.daemon_kvp_msg, hv_kvp_dev_buf, sizeof(struct hv_kvp_msg));
-
- if (kvp_globals.register_done == false) {
- if (kvp_globals.daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) {
-
- kvp_globals.register_done = true;
- if (kvp_globals.channelp) {
-
- hv_kvp_callback(kvp_globals.channelp);
- }
+ free(hv_kvp_dev_buf, M_TEMP);
+ if (sc->register_done == false) {
+ if (sc->daemon_kvp_msg.kvp_hdr.operation == HV_KVP_OP_REGISTER) {
+ sc->register_done = true;
+ hv_kvp_callback(dev->si_drv1);
}
else {
hv_kvp_log_info("%s, KVP Registration Failed\n", __func__);
@@ -923,18 +839,20 @@ hv_kvp_dev_daemon_write(struct cdev *dev __unused, struct uio *uio, int ioflag _
}
} else {
- mtx_lock(&kvp_globals.pending_mutex);
+ mtx_lock(&sc->pending_mutex);
- if(!kvp_globals.req_timed_out) {
+ if(!sc->req_timed_out) {
+ struct hv_kvp_msg *hmsg = sc->host_kvp_msg;
+ struct hv_kvp_msg *umsg = &sc->daemon_kvp_msg;
- hv_kvp_convert_usermsg_to_hostmsg();
- hv_kvp_respond_host(KVP_SUCCESS);
- wakeup(&kvp_globals);
- kvp_globals.req_in_progress = false;
+ hv_kvp_convert_usermsg_to_hostmsg(umsg, hmsg);
+ hv_kvp_respond_host(sc, KVP_SUCCESS);
+ wakeup(sc);
+ sc->req_in_progress = false;
}
- kvp_globals.daemon_busy = false;
- mtx_unlock(&kvp_globals.pending_mutex);
+ sc->daemon_busy = false;
+ mtx_unlock(&sc->pending_mutex);
}
return (error);
@@ -946,66 +864,106 @@ hv_kvp_dev_daemon_write(struct cdev *dev __unused, struct uio *uio, int ioflag _
* for daemon to read.
*/
static int
-hv_kvp_dev_daemon_poll(struct cdev *dev __unused, int events, struct thread *td)
+hv_kvp_dev_daemon_poll(struct cdev *dev, int events, struct thread *td)
{
int revents = 0;
+ hv_kvp_sc *sc = (hv_kvp_sc*)dev->si_drv1;
- mtx_lock(&kvp_globals.pending_mutex);
+ mtx_lock(&sc->pending_mutex);
/*
* We check global flag daemon_busy for the data availiability for
* userland to read. Deamon_busy is set to true before driver has data
* for daemon to read. It is set to false after daemon sends
* then response back to driver.
*/
- if (kvp_globals.daemon_busy == true)
+ if (sc->daemon_busy == true)
revents = POLLIN;
else
- selrecord(td, &hv_kvp_selinfo);
+ selrecord(td, &sc->hv_kvp_selinfo);
- mtx_unlock(&kvp_globals.pending_mutex);
+ mtx_unlock(&sc->pending_mutex);
return (revents);
}
-
-/*
- * hv_kvp initialization function
- * called from hv_util service.
- *
- */
-int
-hv_kvp_init(hv_vmbus_service *srv)
+static int
+hv_kvp_probe(device_t dev)
{
- int error = 0;
- hv_work_queue *work_queue = NULL;
-
- memset(&kvp_globals, 0, sizeof(kvp_globals));
-
- work_queue = hv_work_queue_create("KVP Service");
- if (work_queue == NULL) {
- hv_kvp_log_info("%s: Work queue alloc failed\n", __func__);
- error = ENOMEM;
- hv_kvp_log_error("%s: ENOMEM\n", __func__);
- goto Finish;
+ const char *p = vmbus_get_type(dev);
+ if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
+ device_set_desc(dev, "Hyper-V KVP Service");
+ return BUS_PROBE_DEFAULT;
}
- srv->work_queue = work_queue;
- error = hv_kvp_dev_init();
- mtx_init(&kvp_globals.pending_mutex, "hv-kvp pending mutex",
- NULL, MTX_DEF);
- kvp_globals.pending_reqs = 0;
+ return ENXIO;
+}
+
+static int
+hv_kvp_attach(device_t dev)
+{
+ int error;
+ struct sysctl_oid_list *child;
+ struct sysctl_ctx_list *ctx;
+
+ hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev);
+ sc->util_sc.callback = hv_kvp_callback;
+ sema_init(&sc->dev_sema, 0, "hv_kvp device semaphore");
+ mtx_init(&sc->pending_mutex, "hv-kvp pending mutex",
+ NULL, MTX_DEF);
-Finish:
- return (error);
-}
+ ctx = device_get_sysctl_ctx(dev);
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_kvp_log",
+ CTLFLAG_RW, &hv_kvp_log, 0, "Hyperv KVP service log level");
-void
-hv_kvp_deinit(void)
+ TASK_INIT(&sc->task, 0, hv_kvp_process_request, sc);
+
+ /* create character device */
+ error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK,
+ &sc->hv_kvp_dev,
+ &hv_kvp_cdevsw,
+ 0,
+ UID_ROOT,
+ GID_WHEEL,
+ 0640,
+ "hv_kvp_dev");
+
+ if (error != 0)
+ return (error);
+ sc->hv_kvp_dev->si_drv1 = sc;
+
+ return hv_util_attach(dev);
+}
+
+static int
+hv_kvp_detach(device_t dev)
{
- hv_kvp_dev_destroy();
- mtx_destroy(&kvp_globals.pending_mutex);
+ hv_kvp_sc *sc = (hv_kvp_sc*)device_get_softc(dev);
- return;
+ if (sc->daemon_task != NULL) {
+ PROC_LOCK(sc->daemon_task);
+ kern_psignal(sc->daemon_task, SIGKILL);
+ PROC_UNLOCK(sc->daemon_task);
+ }
+
+ destroy_dev(sc->hv_kvp_dev);
+ return hv_util_detach(dev);
}
+
+static device_method_t kvp_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, hv_kvp_probe),
+ DEVMETHOD(device_attach, hv_kvp_attach),
+ DEVMETHOD(device_detach, hv_kvp_detach),
+ { 0, 0 }
+};
+
+static driver_t kvp_driver = { "hvkvp", kvp_methods, sizeof(hv_kvp_sc)};
+
+static devclass_t kvp_devclass;
+
+DRIVER_MODULE(hv_kvp, vmbus, kvp_driver, kvp_devclass, NULL, NULL);
+MODULE_VERSION(hv_kvp, 1);
+MODULE_DEPEND(hv_kvp, vmbus, 1, 1, 1);
diff --git a/sys/dev/hyperv/utilities/hv_kvp.h b/sys/dev/hyperv/utilities/hv_kvp.h
index b67373fa..b62149e 100644
--- a/sys/dev/hyperv/utilities/hv_kvp.h
+++ b/sys/dev/hyperv/utilities/hv_kvp.h
@@ -238,17 +238,4 @@ struct hv_kvp_ip_msg {
struct hv_kvp_ipaddr_value kvp_ip_val;
} __attribute__((packed));
-
-#define HV_SHUT_DOWN 0
-#define HV_TIME_SYNCH 1
-#define HV_HEART_BEAT 2
-#define HV_KVP 3
-#define HV_MAX_UTIL_SERVICES 4
-
-#define HV_WLTIMEDELTA 116444736000000000L /* in 100ns unit */
-#define HV_ICTIMESYNCFLAG_PROBE 0
-#define HV_ICTIMESYNCFLAG_SYNC 1
-#define HV_ICTIMESYNCFLAG_SAMPLE 2
-#define HV_NANO_SEC_PER_SEC 1000000000
-
#endif /* _KVP_H */
diff --git a/sys/dev/hyperv/utilities/hv_shutdown.c b/sys/dev/hyperv/utilities/hv_shutdown.c
new file mode 100644
index 0000000..20bc65e
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_shutdown.c
@@ -0,0 +1,151 @@
+/*-
+ * Copyright (c) 2014 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * A common driver for all hyper-V util services.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/reboot.h>
+#include <sys/timetc.h>
+#include <sys/syscallsubr.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_util.h"
+
+static hv_guid service_guid = { .data =
+ {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
+ 0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB} };
+
+/**
+ * Shutdown
+ */
+static void
+hv_shutdown_cb(void *context)
+{
+ uint8_t* buf;
+ hv_vmbus_channel* channel;
+ uint8_t execute_shutdown = 0;
+ hv_vmbus_icmsg_hdr* icmsghdrp;
+ uint32_t recv_len;
+ uint64_t request_id;
+ int ret;
+ hv_vmbus_shutdown_msg_data* shutdown_msg;
+ hv_util_sc *softc;
+
+ softc = (hv_util_sc*)context;
+ buf = softc->receive_buffer;;
+ channel = softc->hv_dev->channel;
+ ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
+ &recv_len, &request_id);
+
+ if ((ret == 0) && recv_len > 0) {
+
+ icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
+ &buf[sizeof(struct hv_vmbus_pipe_hdr)];
+
+ if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+ hv_negotiate_version(icmsghdrp, NULL, buf);
+
+ } else {
+ shutdown_msg =
+ (struct hv_vmbus_shutdown_msg_data *)
+ &buf[sizeof(struct hv_vmbus_pipe_hdr) +
+ sizeof(struct hv_vmbus_icmsg_hdr)];
+
+ switch (shutdown_msg->flags) {
+ case 0:
+ case 1:
+ icmsghdrp->status = HV_S_OK;
+ execute_shutdown = 1;
+ if(bootverbose)
+ printf("Shutdown request received -"
+ " graceful shutdown initiated\n");
+ break;
+ default:
+ icmsghdrp->status = HV_E_FAIL;
+ execute_shutdown = 0;
+ printf("Shutdown request received -"
+ " Invalid request\n");
+ break;
+ }
+ }
+
+ icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
+ HV_ICMSGHDRFLAG_RESPONSE;
+
+ hv_vmbus_channel_send_packet(channel, buf,
+ recv_len, request_id,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ }
+
+ if (execute_shutdown)
+ shutdown_nice(RB_POWEROFF);
+}
+
+static int
+hv_shutdown_probe(device_t dev)
+{
+ const char *p = vmbus_get_type(dev);
+ if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
+ device_set_desc(dev, "Hyper-V Shutdown Service");
+ return BUS_PROBE_DEFAULT;
+ }
+
+ return ENXIO;
+}
+
+static int
+hv_shutdown_attach(device_t dev)
+{
+ hv_util_sc *softc = (hv_util_sc*)device_get_softc(dev);
+
+ softc->callback = hv_shutdown_cb;
+
+ return hv_util_attach(dev);
+}
+
+static device_method_t shutdown_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, hv_shutdown_probe),
+ DEVMETHOD(device_attach, hv_shutdown_attach),
+ DEVMETHOD(device_detach, hv_util_detach),
+ { 0, 0 }
+};
+
+static driver_t shutdown_driver = { "hvshutdown", shutdown_methods, sizeof(hv_util_sc)};
+
+static devclass_t shutdown_devclass;
+
+DRIVER_MODULE(hv_shutdown, vmbus, shutdown_driver, shutdown_devclass, NULL, NULL);
+MODULE_VERSION(hv_shutdown, 1);
+MODULE_DEPEND(hv_shutdown, vmbus, 1, 1, 1);
diff --git a/sys/dev/hyperv/utilities/hv_timesync.c b/sys/dev/hyperv/utilities/hv_timesync.c
new file mode 100644
index 0000000..d1ea904
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_timesync.c
@@ -0,0 +1,216 @@
+/*-
+ * Copyright (c) 2014 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * A common driver for all hyper-V util services.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/reboot.h>
+#include <sys/timetc.h>
+#include <sys/syscallsubr.h>
+
+#include <dev/hyperv/include/hyperv.h>
+#include "hv_util.h"
+
+#define HV_WLTIMEDELTA 116444736000000000L /* in 100ns unit */
+#define HV_ICTIMESYNCFLAG_PROBE 0
+#define HV_ICTIMESYNCFLAG_SYNC 1
+#define HV_ICTIMESYNCFLAG_SAMPLE 2
+#define HV_NANO_SEC_PER_SEC 1000000000
+
+/* Time Sync data */
+typedef struct {
+ uint64_t data;
+} time_sync_data;
+
+ /* Time Synch Service */
+static hv_guid service_guid = {.data =
+ {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
+ 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf } };
+
+struct hv_ictimesync_data {
+ uint64_t parenttime;
+ uint64_t childtime;
+ uint64_t roundtriptime;
+ uint8_t flags;
+} __packed;
+
+typedef struct hv_timesync_sc {
+ hv_util_sc util_sc;
+ struct task task;
+ time_sync_data time_msg;
+} hv_timesync_sc;
+
+/**
+ * Set host time based on time sync message from host
+ */
+static void
+hv_set_host_time(void *context, int pending)
+{
+ hv_timesync_sc *softc = (hv_timesync_sc*)context;
+ uint64_t hosttime = softc->time_msg.data;
+ struct timespec guest_ts, host_ts;
+ uint64_t host_tns;
+ int64_t diff;
+ int error;
+
+ host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
+ host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC);
+ host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC);
+
+ nanotime(&guest_ts);
+
+ diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec;
+
+ /*
+ * If host differs by 5 seconds then make the guest catch up
+ */
+ if (diff > 5 || diff < -5) {
+ error = kern_clock_settime(curthread, CLOCK_REALTIME,
+ &host_ts);
+ }
+}
+
+/**
+ * @brief Synchronize time with host after reboot, restore, etc.
+ *
+ * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
+ * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
+ * message after the timesync channel is opened. Since the hv_utils module is
+ * loaded after hv_vmbus, the first message is usually missed. The other
+ * thing is, systime is automatically set to emulated hardware clock which may
+ * not be UTC time or in the same time zone. So, to override these effects, we
+ * use the first 50 time samples for initial system time setting.
+ */
+static inline
+void hv_adj_guesttime(hv_timesync_sc *sc, uint64_t hosttime, uint8_t flags)
+{
+ sc->time_msg.data = hosttime;
+
+ if (((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) ||
+ ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0)) {
+ taskqueue_enqueue(taskqueue_thread, &sc->task);
+ }
+}
+
+/**
+ * Time Sync Channel message handler
+ */
+static void
+hv_timesync_cb(void *context)
+{
+ hv_vmbus_channel* channel;
+ hv_vmbus_icmsg_hdr* icmsghdrp;
+ uint32_t recvlen;
+ uint64_t requestId;
+ int ret;
+ uint8_t* time_buf;
+ struct hv_ictimesync_data* timedatap;
+ hv_timesync_sc *softc;
+
+ softc = (hv_timesync_sc*)context;
+ channel = softc->util_sc.hv_dev->channel;
+ time_buf = softc->util_sc.receive_buffer;
+
+ ret = hv_vmbus_channel_recv_packet(channel, time_buf,
+ PAGE_SIZE, &recvlen, &requestId);
+
+ if ((ret == 0) && recvlen > 0) {
+ icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
+ sizeof(struct hv_vmbus_pipe_hdr)];
+
+ if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
+ hv_negotiate_version(icmsghdrp, NULL, time_buf);
+ } else {
+ timedatap = (struct hv_ictimesync_data *) &time_buf[
+ sizeof(struct hv_vmbus_pipe_hdr) +
+ sizeof(struct hv_vmbus_icmsg_hdr)];
+ hv_adj_guesttime(softc, timedatap->parenttime, timedatap->flags);
+ }
+
+ icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
+ | HV_ICMSGHDRFLAG_RESPONSE;
+
+ hv_vmbus_channel_send_packet(channel, time_buf,
+ recvlen, requestId,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
+ }
+}
+
+static int
+hv_timesync_probe(device_t dev)
+{
+ const char *p = vmbus_get_type(dev);
+ if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
+ device_set_desc(dev, "Hyper-V Time Synch Service");
+ return BUS_PROBE_DEFAULT;
+ }
+
+ return ENXIO;
+}
+
+static int
+hv_timesync_attach(device_t dev)
+{
+ hv_timesync_sc *softc = device_get_softc(dev);
+
+ softc->util_sc.callback = hv_timesync_cb;
+ TASK_INIT(&softc->task, 1, hv_set_host_time, softc);
+
+ return hv_util_attach(dev);
+}
+
+static int
+hv_timesync_detach(device_t dev)
+{
+ hv_timesync_sc *softc = device_get_softc(dev);
+ taskqueue_drain(taskqueue_thread, &softc->task);
+
+ return hv_util_detach(dev);
+}
+
+static device_method_t timesync_methods[] = {
+ /* Device interface */
+ DEVMETHOD(device_probe, hv_timesync_probe),
+ DEVMETHOD(device_attach, hv_timesync_attach),
+ DEVMETHOD(device_detach, hv_timesync_detach),
+ { 0, 0 }
+};
+
+static driver_t timesync_driver = { "hvtimesync", timesync_methods, sizeof(hv_timesync_sc)};
+
+static devclass_t timesync_devclass;
+
+DRIVER_MODULE(hv_timesync, vmbus, timesync_driver, timesync_devclass, NULL, NULL);
+MODULE_VERSION(hv_timesync, 1);
+MODULE_DEPEND(hv_timesync, vmbus, 1, 1, 1);
diff --git a/sys/dev/hyperv/utilities/hv_util.c b/sys/dev/hyperv/utilities/hv_util.c
index dc4b1e2..7d19b3f 100644
--- a/sys/dev/hyperv/utilities/hv_util.c
+++ b/sys/dev/hyperv/utilities/hv_util.c
@@ -40,85 +40,9 @@
#include <sys/syscallsubr.h>
#include <dev/hyperv/include/hyperv.h>
-#include "hv_kvp.h"
+#include "hv_util.h"
-/* Time Sync data */
-typedef struct {
- uint64_t data;
-} time_sync_data;
-
-static void hv_shutdown_cb(void *context);
-static void hv_heartbeat_cb(void *context);
-static void hv_timesync_cb(void *context);
-
-static int hv_timesync_init(hv_vmbus_service *serv);
-
-/*
- * Note: GUID codes below are predefined by the host hypervisor
- * (Hyper-V and Azure)interface and required for correct operation.
- */
-hv_vmbus_service service_table[] = {
- /* Shutdown Service */
- { .guid.data = {0x31, 0x60, 0x0B, 0X0E, 0x13, 0x52, 0x34, 0x49,
- 0x81, 0x8B, 0x38, 0XD9, 0x0C, 0xED, 0x39, 0xDB},
- .name = "Hyper-V Shutdown Service\n",
- .enabled = TRUE,
- .callback = hv_shutdown_cb,
- },
-
- /* Time Synch Service */
- { .guid.data = {0x30, 0xe6, 0x27, 0x95, 0xae, 0xd0, 0x7b, 0x49,
- 0xad, 0xce, 0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf},
- .name = "Hyper-V Time Synch Service\n",
- .enabled = TRUE,
- .init = hv_timesync_init,
- .callback = hv_timesync_cb,
- },
-
- /* Heartbeat Service */
- { .guid.data = {0x39, 0x4f, 0x16, 0x57, 0x15, 0x91, 0x78, 0x4e,
- 0xab, 0x55, 0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d},
- .name = "Hyper-V Heartbeat Service\n",
- .enabled = TRUE,
- .callback = hv_heartbeat_cb,
- },
-
- /* KVP (Key Value Pair) Service */
- { .guid.data = {0xe7, 0xf4, 0xa0, 0xa9, 0x45, 0x5a, 0x96, 0x4d,
- 0xb8, 0x27, 0x8a, 0x84, 0x1e, 0x8c, 0x3, 0xe6},
- .name = "Hyper-V KVP Service\n",
- .enabled = TRUE,
- .init = hv_kvp_init,
- .callback = hv_kvp_callback,
- },
-};
-
-/*
- * Receive buffer pointers. There is one buffer per utility service. The
- * buffer is allocated during attach().
- */
-uint8_t *receive_buffer[HV_MAX_UTIL_SERVICES];
-
-static boolean_t destroyed_kvp = FALSE;
-
-struct hv_ictimesync_data {
- uint64_t parenttime;
- uint64_t childtime;
- uint64_t roundtriptime;
- uint8_t flags;
-} __packed;
-
-static int
-hv_timesync_init(hv_vmbus_service *serv)
-{
-
- serv->work_queue = hv_work_queue_create("Time Sync");
- if (serv->work_queue == NULL)
- return (ENOMEM);
- return (0);
-}
-
-static void
+void
hv_negotiate_version(
struct hv_vmbus_icmsg_hdr* icmsghdrp,
struct hv_vmbus_icmsg_negotiate* negop,
@@ -147,267 +71,19 @@ hv_negotiate_version(
negop->icmsg_vercnt = 1;
}
-
-/**
- * Set host time based on time sync message from host
- */
-static void
-hv_set_host_time(void *context)
-{
- time_sync_data* time_msg = (time_sync_data*) context;
- uint64_t hosttime = time_msg->data;
- struct timespec guest_ts, host_ts;
- uint64_t host_tns;
- int64_t diff;
- int error;
-
- host_tns = (hosttime - HV_WLTIMEDELTA) * 100;
- host_ts.tv_sec = (time_t)(host_tns/HV_NANO_SEC_PER_SEC);
- host_ts.tv_nsec = (long)(host_tns%HV_NANO_SEC_PER_SEC);
-
- nanotime(&guest_ts);
-
- diff = (int64_t)host_ts.tv_sec - (int64_t)guest_ts.tv_sec;
-
- /*
- * If host differs by 5 seconds then make the guest catch up
- */
- if (diff > 5 || diff < -5) {
- error = kern_clock_settime(curthread, CLOCK_REALTIME,
- &host_ts);
- }
-
- /*
- * Free the hosttime that was allocated in hv_adj_guesttime()
- */
- free(time_msg, M_DEVBUF);
-}
-
-/**
- * @brief Synchronize time with host after reboot, restore, etc.
- *
- * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
- * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
- * message after the timesync channel is opened. Since the hv_utils module is
- * loaded after hv_vmbus, the first message is usually missed. The other
- * thing is, systime is automatically set to emulated hardware clock which may
- * not be UTC time or in the same time zone. So, to override these effects, we
- * use the first 50 time samples for initial system time setting.
- */
-static inline
-void hv_adj_guesttime(uint64_t hosttime, uint8_t flags)
-{
- time_sync_data* time_msg;
-
- time_msg = malloc(sizeof(time_sync_data), M_DEVBUF, M_NOWAIT);
-
- if (time_msg == NULL)
- return;
-
- time_msg->data = hosttime;
-
- if ((flags & HV_ICTIMESYNCFLAG_SYNC) != 0) {
- hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
- hv_set_host_time, time_msg);
- } else if ((flags & HV_ICTIMESYNCFLAG_SAMPLE) != 0) {
- hv_queue_work_item(service_table[HV_TIME_SYNCH].work_queue,
- hv_set_host_time, time_msg);
- } else {
- free(time_msg, M_DEVBUF);
- }
-}
-
-/**
- * Time Sync Channel message handler
- */
-static void
-hv_timesync_cb(void *context)
-{
- hv_vmbus_channel* channel = context;
- hv_vmbus_icmsg_hdr* icmsghdrp;
- uint32_t recvlen;
- uint64_t requestId;
- int ret;
- uint8_t* time_buf;
- struct hv_ictimesync_data* timedatap;
-
- time_buf = receive_buffer[HV_TIME_SYNCH];
-
- ret = hv_vmbus_channel_recv_packet(channel, time_buf,
- PAGE_SIZE, &recvlen, &requestId);
-
- if ((ret == 0) && recvlen > 0) {
- icmsghdrp = (struct hv_vmbus_icmsg_hdr *) &time_buf[
- sizeof(struct hv_vmbus_pipe_hdr)];
-
- if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
- hv_negotiate_version(icmsghdrp, NULL, time_buf);
- } else {
- timedatap = (struct hv_ictimesync_data *) &time_buf[
- sizeof(struct hv_vmbus_pipe_hdr) +
- sizeof(struct hv_vmbus_icmsg_hdr)];
- hv_adj_guesttime(timedatap->parenttime, timedatap->flags);
- }
-
- icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION
- | HV_ICMSGHDRFLAG_RESPONSE;
-
- hv_vmbus_channel_send_packet(channel, time_buf,
- recvlen, requestId,
- HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
- }
-}
-
-/**
- * Shutdown
- */
-static void
-hv_shutdown_cb(void *context)
-{
- uint8_t* buf;
- hv_vmbus_channel* channel = context;
- uint8_t execute_shutdown = 0;
- hv_vmbus_icmsg_hdr* icmsghdrp;
- uint32_t recv_len;
- uint64_t request_id;
- int ret;
- hv_vmbus_shutdown_msg_data* shutdown_msg;
-
- buf = receive_buffer[HV_SHUT_DOWN];
-
- ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE,
- &recv_len, &request_id);
-
- if ((ret == 0) && recv_len > 0) {
-
- icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
- &buf[sizeof(struct hv_vmbus_pipe_hdr)];
-
- if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
- hv_negotiate_version(icmsghdrp, NULL, buf);
-
- } else {
- shutdown_msg =
- (struct hv_vmbus_shutdown_msg_data *)
- &buf[sizeof(struct hv_vmbus_pipe_hdr) +
- sizeof(struct hv_vmbus_icmsg_hdr)];
-
- switch (shutdown_msg->flags) {
- case 0:
- case 1:
- icmsghdrp->status = HV_S_OK;
- execute_shutdown = 1;
- if(bootverbose)
- printf("Shutdown request received -"
- " graceful shutdown initiated\n");
- break;
- default:
- icmsghdrp->status = HV_E_FAIL;
- execute_shutdown = 0;
- printf("Shutdown request received -"
- " Invalid request\n");
- break;
- }
- }
-
- icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
- HV_ICMSGHDRFLAG_RESPONSE;
-
- hv_vmbus_channel_send_packet(channel, buf,
- recv_len, request_id,
- HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
- }
-
- if (execute_shutdown)
- shutdown_nice(RB_POWEROFF);
-}
-
-/**
- * Process heartbeat message
- */
-static void
-hv_heartbeat_cb(void *context)
-{
- uint8_t* buf;
- hv_vmbus_channel* channel = context;
- uint32_t recvlen;
- uint64_t requestid;
- int ret;
-
- struct hv_vmbus_heartbeat_msg_data* heartbeat_msg;
- struct hv_vmbus_icmsg_hdr* icmsghdrp;
-
- buf = receive_buffer[HV_HEART_BEAT];
-
- ret = hv_vmbus_channel_recv_packet(channel, buf, PAGE_SIZE, &recvlen,
- &requestid);
-
- if ((ret == 0) && recvlen > 0) {
-
- icmsghdrp = (struct hv_vmbus_icmsg_hdr *)
- &buf[sizeof(struct hv_vmbus_pipe_hdr)];
-
- if (icmsghdrp->icmsgtype == HV_ICMSGTYPE_NEGOTIATE) {
- hv_negotiate_version(icmsghdrp, NULL, buf);
-
- } else {
- heartbeat_msg =
- (struct hv_vmbus_heartbeat_msg_data *)
- &buf[sizeof(struct hv_vmbus_pipe_hdr) +
- sizeof(struct hv_vmbus_icmsg_hdr)];
-
- heartbeat_msg->seq_num += 1;
- }
-
- icmsghdrp->icflags = HV_ICMSGHDRFLAG_TRANSACTION |
- HV_ICMSGHDRFLAG_RESPONSE;
-
- hv_vmbus_channel_send_packet(channel, buf, recvlen, requestid,
- HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0);
- }
-}
-
-
-static int
-hv_util_probe(device_t dev)
-{
- int i;
- int rtn_value = ENXIO;
-
- for (i = 0; i < HV_MAX_UTIL_SERVICES; i++) {
- const char *p = vmbus_get_type(dev);
- if (service_table[i].enabled && !memcmp(p, &service_table[i].guid, sizeof(hv_guid))) {
- device_set_softc(dev, (void *) (&service_table[i]));
- rtn_value = BUS_PROBE_DEFAULT;
- }
- }
-
- return rtn_value;
-}
-
-static int
+int
hv_util_attach(device_t dev)
{
- struct hv_device* hv_dev;
- struct hv_vmbus_service* service;
- int ret;
- size_t receive_buffer_offset;
+ struct hv_device* hv_dev;
+ struct hv_util_sc* softc;
+ int ret;
hv_dev = vmbus_get_devctx(dev);
- service = device_get_softc(dev);
- receive_buffer_offset = service - &service_table[0];
- device_printf(dev, "Hyper-V Service attaching: %s\n", service->name);
- receive_buffer[receive_buffer_offset] =
+ softc = device_get_softc(dev);
+ softc->hv_dev = hv_dev;
+ softc->receive_buffer =
malloc(4 * PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
- if (service->init != NULL) {
- ret = service->init(service);
- if (ret) {
- ret = ENODEV;
- goto error0;
- }
- }
-
/*
* These services are not performance critical and do not need
* batched reading. Furthermore, some services such as KVP can
@@ -418,83 +94,30 @@ hv_util_attach(device_t dev)
hv_set_channel_read_state(hv_dev->channel, FALSE);
ret = hv_vmbus_channel_open(hv_dev->channel, 4 * PAGE_SIZE,
- 4 * PAGE_SIZE, NULL, 0,
- service->callback, hv_dev->channel);
+ 4 * PAGE_SIZE, NULL, 0,
+ softc->callback, softc);
if (ret)
- goto error0;
+ goto error0;
return (0);
- error0:
-
- free(receive_buffer[receive_buffer_offset], M_DEVBUF);
- receive_buffer[receive_buffer_offset] = NULL;
-
+error0:
+ free(softc->receive_buffer, M_DEVBUF);
return (ret);
}
-static int
+int
hv_util_detach(device_t dev)
{
- struct hv_device* hv_dev;
- struct hv_vmbus_service* service;
- size_t receive_buffer_offset;
-
- if (!destroyed_kvp) {
- hv_kvp_deinit();
- destroyed_kvp = TRUE;
- }
+ struct hv_device* hv_dev;
+ struct hv_util_sc* softc;
hv_dev = vmbus_get_devctx(dev);
hv_vmbus_channel_close(hv_dev->channel);
- service = device_get_softc(dev);
- receive_buffer_offset = service - &service_table[0];
+ softc = device_get_softc(dev);
- if (service->work_queue != NULL)
- hv_work_queue_close(service->work_queue);
-
- free(receive_buffer[receive_buffer_offset], M_DEVBUF);
- receive_buffer[receive_buffer_offset] = NULL;
+ free(softc->receive_buffer, M_DEVBUF);
return (0);
}
-
-static void
-hv_util_init(void)
-{
-}
-
-static int
-hv_util_modevent(module_t mod, int event, void *arg)
-{
- switch (event) {
- case MOD_LOAD:
- break;
- case MOD_UNLOAD:
- break;
- default:
- break;
- }
- return (0);
-}
-
-static device_method_t util_methods[] = {
- /* Device interface */
- DEVMETHOD(device_probe, hv_util_probe),
- DEVMETHOD(device_attach, hv_util_attach),
- DEVMETHOD(device_detach, hv_util_detach),
- DEVMETHOD(device_shutdown, bus_generic_shutdown),
- { 0, 0 } }
-;
-
-static driver_t util_driver = { "hyperv-utils", util_methods, 0 };
-
-static devclass_t util_devclass;
-
-DRIVER_MODULE(hv_utils, vmbus, util_driver, util_devclass, hv_util_modevent, 0);
-MODULE_VERSION(hv_utils, 1);
-MODULE_DEPEND(hv_utils, vmbus, 1, 1, 1);
-
-SYSINIT(hv_util_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1,
- hv_util_init, NULL);
diff --git a/sys/dev/hyperv/utilities/hv_util.h b/sys/dev/hyperv/utilities/hv_util.h
new file mode 100644
index 0000000..708dca8
--- /dev/null
+++ b/sys/dev/hyperv/utilities/hv_util.h
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2012 Citrix Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _HVUTIL_H_
+#define _HVUTIL_H_
+
+/**
+ * hv_util related structures
+ *
+ */
+typedef struct hv_util_sc {
+ /*
+ * function to process Hyper-V messages
+ */
+ void (*callback)(void *);
+
+ struct hv_device* hv_dev;
+ uint8_t *receive_buffer;
+} hv_util_sc;
+
+void hv_negotiate_version(
+ struct hv_vmbus_icmsg_hdr* icmsghdrp,
+ struct hv_vmbus_icmsg_negotiate* negop,
+ uint8_t* buf);
+
+int hv_util_attach(device_t dev);
+int hv_util_detach(device_t dev);
+#endif
diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c
index 7037768..bb777cc 100644
--- a/sys/dev/hyperv/vmbus/hv_channel.c
+++ b/sys/dev/hyperv/vmbus/hv_channel.c
@@ -52,6 +52,7 @@ static int vmbus_channel_create_gpadl_header(
uint32_t* message_count);
static void vmbus_channel_set_event(hv_vmbus_channel* channel);
+static void VmbusProcessChannelEvent(void* channel, int pending);
/**
* @brief Trigger an event notification on the specified channel
@@ -68,9 +69,7 @@ vmbus_channel_set_event(hv_vmbus_channel *channel)
+ ((channel->offer_msg.child_rel_id >> 5))));
monitor_page = (hv_vmbus_monitor_page *)
- hv_vmbus_g_connection.monitor_pages;
-
- monitor_page++; /* Get the child to parent monitor page */
+ hv_vmbus_g_connection.monitor_page_2;
synch_set_bit(channel->monitor_bit,
(uint32_t *)&monitor_page->
@@ -115,6 +114,9 @@ hv_vmbus_channel_open(
new_channel->on_channel_callback = pfn_on_channel_callback;
new_channel->channel_callback_context = context;
+ new_channel->rxq = hv_vmbus_g_context.hv_event_queue[new_channel->target_cpu];
+ TASK_INIT(&new_channel->channel_task, 0, VmbusProcessChannelEvent, new_channel);
+
/* Allocate the ring buffer */
out = contigmalloc((send_ring_buffer_size + recv_ring_buffer_size),
M_DEVBUF, M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0);
@@ -518,6 +520,7 @@ static void
hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
{
int ret = 0;
+ struct taskqueue *rxq = channel->rxq;
hv_vmbus_channel_close_channel* msg;
hv_vmbus_channel_msg_info* info;
@@ -525,6 +528,11 @@ hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
channel->sc_creation_callback = NULL;
/*
+ * set rxq to NULL to avoid more requests be scheduled
+ */
+ channel->rxq = NULL;
+ taskqueue_drain(rxq, &channel->channel_task);
+ /*
* Grab the lock to prevent race condition when a packet received
* and unloading driver is in the process.
*/
@@ -666,11 +674,11 @@ hv_vmbus_channel_send_packet_pagebuffer(
{
int ret = 0;
- int i = 0;
boolean_t need_sig;
uint32_t packet_len;
+ uint32_t page_buflen;
uint32_t packetLen_aligned;
- hv_vmbus_sg_buffer_list buffer_list[3];
+ hv_vmbus_sg_buffer_list buffer_list[4];
hv_vmbus_channel_packet_page_buffer desc;
uint32_t descSize;
uint64_t alignedData = 0;
@@ -682,36 +690,33 @@ hv_vmbus_channel_send_packet_pagebuffer(
* Adjust the size down since hv_vmbus_channel_packet_page_buffer
* is the largest size we support
*/
- descSize = sizeof(hv_vmbus_channel_packet_page_buffer) -
- ((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
- sizeof(hv_vmbus_page_buffer));
- packet_len = descSize + buffer_len;
+ descSize = __offsetof(hv_vmbus_channel_packet_page_buffer, range);
+ page_buflen = sizeof(hv_vmbus_page_buffer) * page_count;
+ packet_len = descSize + page_buflen + buffer_len;
packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
/* Setup the descriptor */
desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
- desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
+ /* in 8-bytes granularity */
+ desc.data_offset8 = (descSize + page_buflen) >> 3;
desc.length8 = (uint16_t) (packetLen_aligned >> 3);
desc.transaction_id = request_id;
desc.range_count = page_count;
- for (i = 0; i < page_count; i++) {
- desc.range[i].length = page_buffers[i].length;
- desc.range[i].offset = page_buffers[i].offset;
- desc.range[i].pfn = page_buffers[i].pfn;
- }
-
buffer_list[0].data = &desc;
buffer_list[0].length = descSize;
- buffer_list[1].data = buffer;
- buffer_list[1].length = buffer_len;
+ buffer_list[1].data = page_buffers;
+ buffer_list[1].length = page_buflen;
- buffer_list[2].data = &alignedData;
- buffer_list[2].length = packetLen_aligned - packet_len;
+ buffer_list[2].data = buffer;
+ buffer_list[2].length = buffer_len;
- ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 3,
+ buffer_list[3].data = &alignedData;
+ buffer_list[3].length = packetLen_aligned - packet_len;
+
+ ret = hv_ring_buffer_write(&channel->outbound, buffer_list, 4,
&need_sig);
/* TODO: We should determine if this is optional */
@@ -880,3 +885,67 @@ hv_vmbus_channel_recv_packet_raw(
return (0);
}
+
+
+/**
+ * Process a channel event notification
+ */
+static void
+VmbusProcessChannelEvent(void* context, int pending)
+{
+ void* arg;
+ uint32_t bytes_to_read;
+ hv_vmbus_channel* channel = (hv_vmbus_channel*)context;
+ boolean_t is_batched_reading;
+
+ /**
+ * Find the channel based on this relid and invokes
+ * the channel callback to process the event
+ */
+
+ if (channel == NULL) {
+ return;
+ }
+ /**
+ * To deal with the race condition where we might
+ * receive a packet while the relevant driver is
+ * being unloaded, dispatch the callback while
+ * holding the channel lock. The unloading driver
+ * will acquire the same channel lock to set the
+ * callback to NULL. This closes the window.
+ */
+
+ /*
+ * Disable the lock due to newly added WITNESS check in r277723.
+ * Will seek other way to avoid race condition.
+ * -- whu
+ */
+ // mtx_lock(&channel->inbound_lock);
+ if (channel->on_channel_callback != NULL) {
+ arg = channel->channel_callback_context;
+ is_batched_reading = channel->batched_reading;
+ /*
+ * Optimize host to guest signaling by ensuring:
+ * 1. While reading the channel, we disable interrupts from
+ * host.
+ * 2. Ensure that we process all posted messages from the host
+ * before returning from this callback.
+ * 3. Once we return, enable signaling from the host. Once this
+ * state is set we check to see if additional packets are
+ * available to read. In this case we repeat the process.
+ */
+ do {
+ if (is_batched_reading)
+ hv_ring_buffer_read_begin(&channel->inbound);
+
+ channel->on_channel_callback(arg);
+
+ if (is_batched_reading)
+ bytes_to_read =
+ hv_ring_buffer_read_end(&channel->inbound);
+ else
+ bytes_to_read = 0;
+ } while (is_batched_reading && (bytes_to_read != 0));
+ }
+ // mtx_unlock(&channel->inbound_lock);
+}
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
index 4ccb647..ab6e8ad 100644
--- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -39,8 +39,10 @@ __FBSDID("$FreeBSD$");
*/
static void vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_offer_internal(void* context);
static void vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr);
+static void vmbus_channel_on_offer_rescind_internal(void* context);
static void vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr);
static void vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr);
@@ -52,41 +54,46 @@ static void vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr);
hv_vmbus_channel_msg_table_entry
g_channel_message_table[HV_CHANNEL_MESSAGE_COUNT] = {
{ HV_CHANNEL_MESSAGE_INVALID,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGE_OFFER_CHANNEL,
- 0, vmbus_channel_on_offer },
+ vmbus_channel_on_offer },
{ HV_CHANNEL_MESSAGE_RESCIND_CHANNEL_OFFER,
- 0, vmbus_channel_on_offer_rescind },
+ vmbus_channel_on_offer_rescind },
{ HV_CHANNEL_MESSAGE_REQUEST_OFFERS,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGE_ALL_OFFERS_DELIVERED,
- 1, vmbus_channel_on_offers_delivered },
+ vmbus_channel_on_offers_delivered },
{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGE_OPEN_CHANNEL_RESULT,
- 1, vmbus_channel_on_open_result },
+ vmbus_channel_on_open_result },
{ HV_CHANNEL_MESSAGE_CLOSE_CHANNEL,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGEL_GPADL_HEADER,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGE_GPADL_BODY,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGE_GPADL_CREATED,
- 1, vmbus_channel_on_gpadl_created },
+ vmbus_channel_on_gpadl_created },
{ HV_CHANNEL_MESSAGE_GPADL_TEARDOWN,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGE_GPADL_TORNDOWN,
- 1, vmbus_channel_on_gpadl_torndown },
+ vmbus_channel_on_gpadl_torndown },
{ HV_CHANNEL_MESSAGE_REL_ID_RELEASED,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGE_INITIATED_CONTACT,
- 0, NULL },
+ NULL },
{ HV_CHANNEL_MESSAGE_VERSION_RESPONSE,
- 1, vmbus_channel_on_version_response },
+ vmbus_channel_on_version_response },
{ HV_CHANNEL_MESSAGE_UNLOAD,
- 0, NULL }
+ NULL }
};
+typedef struct hv_work_item {
+ struct task work;
+ void (*callback)(void *);
+ void* context;
+} hv_work_item;
/**
* Implementation of the work abstraction.
@@ -96,120 +103,30 @@ work_item_callback(void *work, int pending)
{
struct hv_work_item *w = (struct hv_work_item *)work;
- /*
- * Serialize work execution.
- */
- if (w->wq->work_sema != NULL) {
- sema_wait(w->wq->work_sema);
- }
-
w->callback(w->context);
- if (w->wq->work_sema != NULL) {
- sema_post(w->wq->work_sema);
- }
-
free(w, M_DEVBUF);
}
-struct hv_work_queue*
-hv_work_queue_create(char* name)
-{
- static unsigned int qid = 0;
- char qname[64];
- int pri;
- struct hv_work_queue* wq;
-
- wq = malloc(sizeof(struct hv_work_queue), M_DEVBUF, M_NOWAIT | M_ZERO);
- KASSERT(wq != NULL, ("Error VMBUS: Failed to allocate work_queue\n"));
- if (wq == NULL)
- return (NULL);
-
- /*
- * We use work abstraction to handle messages
- * coming from the host and these are typically offers.
- * Some FreeBsd drivers appear to have a concurrency issue
- * where probe/attach needs to be serialized. We ensure that
- * by having only one thread process work elements in a
- * specific queue by serializing work execution.
- *
- */
- if (strcmp(name, "vmbusQ") == 0) {
- pri = PI_DISK;
- } else { /* control */
- pri = PI_NET;
- /*
- * Initialize semaphore for this queue by pointing
- * to the globale semaphore used for synchronizing all
- * control messages.
- */
- wq->work_sema = &hv_vmbus_g_connection.control_sema;
- }
-
- sprintf(qname, "hv_%s_%u", name, qid);
-
- /*
- * Fixme: FreeBSD 8.2 has a different prototype for
- * taskqueue_create(), and for certain other taskqueue functions.
- * We need to research the implications of these changes.
- * Fixme: Not sure when the changes were introduced.
- */
- wq->queue = taskqueue_create(qname, M_NOWAIT, taskqueue_thread_enqueue,
- &wq->queue
- #if __FreeBSD_version < 800000
- , &wq->proc
- #endif
- );
-
- if (wq->queue == NULL) {
- free(wq, M_DEVBUF);
- return (NULL);
- }
-
- if (taskqueue_start_threads(&wq->queue, 1, pri, "%s taskq", qname)) {
- taskqueue_free(wq->queue);
- free(wq, M_DEVBUF);
- return (NULL);
- }
-
- qid++;
-
- return (wq);
-}
-
-void
-hv_work_queue_close(struct hv_work_queue *wq)
-{
- /*
- * KYS: Need to drain the taskqueue
- * before we close the hv_work_queue.
- */
- /*KYS: taskqueue_drain(wq->tq, ); */
- taskqueue_free(wq->queue);
- free(wq, M_DEVBUF);
-}
-
/**
* @brief Create work item
*/
-int
+static int
hv_queue_work_item(
- struct hv_work_queue *wq,
void (*callback)(void *), void *context)
{
struct hv_work_item *w = malloc(sizeof(struct hv_work_item),
- M_DEVBUF, M_NOWAIT | M_ZERO);
+ M_DEVBUF, M_NOWAIT);
KASSERT(w != NULL, ("Error VMBUS: Failed to allocate WorkItem\n"));
if (w == NULL)
return (ENOMEM);
w->callback = callback;
w->context = context;
- w->wq = wq;
TASK_INIT(&w->work, 0, work_item_callback, w);
- return (taskqueue_enqueue(wq->queue, &w->work));
+ return (taskqueue_enqueue(taskqueue_thread, &w->work));
}
@@ -224,10 +141,7 @@ hv_vmbus_allocate_channel(void)
channel = (hv_vmbus_channel*) malloc(
sizeof(hv_vmbus_channel),
M_DEVBUF,
- M_NOWAIT | M_ZERO);
- KASSERT(channel != NULL, ("Error VMBUS: Failed to allocate channel!"));
- if (channel == NULL)
- return (NULL);
+ M_WAITOK | M_ZERO);
mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
@@ -238,16 +152,6 @@ hv_vmbus_allocate_channel(void)
}
/**
- * @brief Release the vmbus channel object itself
- */
-static inline void
-ReleaseVmbusChannel(void *context)
-{
- hv_vmbus_channel* channel = (hv_vmbus_channel*) context;
- free(channel, M_DEVBUF);
-}
-
-/**
* @brief Release the resources used by the vmbus channel object
*/
void
@@ -255,13 +159,8 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
{
mtx_destroy(&channel->sc_lock);
mtx_destroy(&channel->inbound_lock);
- /*
- * We have to release the channel's workqueue/thread in
- * the vmbus's workqueue/thread context
- * ie we can't destroy ourselves
- */
- hv_queue_work_item(hv_vmbus_g_connection.work_queue,
- ReleaseVmbusChannel, (void *) channel);
+
+ free(channel, M_DEVBUF);
}
/**
@@ -459,7 +358,7 @@ static void
vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
{
hv_vmbus_channel_offer_channel* offer;
- hv_vmbus_channel* new_channel;
+ hv_vmbus_channel_offer_channel* copied;
offer = (hv_vmbus_channel_offer_channel*) hdr;
@@ -469,10 +368,25 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
guidType = &offer->offer.interface_type;
guidInstance = &offer->offer.interface_instance;
+ // copy offer data
+ copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT);
+ if (copied == NULL) {
+ printf("fail to allocate memory\n");
+ return;
+ }
+
+ memcpy(copied, hdr, sizeof(*copied));
+ hv_queue_work_item(vmbus_channel_on_offer_internal, copied);
+}
+
+static void
+vmbus_channel_on_offer_internal(void* context)
+{
+ hv_vmbus_channel* new_channel;
+
+ hv_vmbus_channel_offer_channel* offer = (hv_vmbus_channel_offer_channel*)context;
/* Allocate the channel object and save this offer */
new_channel = hv_vmbus_allocate_channel();
- if (new_channel == NULL)
- return;
/*
* By default we setup state to enable batched
@@ -512,6 +426,8 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
vmbus_channel_process_offer(new_channel);
+
+ free(offer, M_DEVBUF);
}
/**
@@ -529,13 +445,20 @@ vmbus_channel_on_offer_rescind(hv_vmbus_channel_msg_header* hdr)
rescind = (hv_vmbus_channel_rescind_offer*) hdr;
channel = hv_vmbus_g_connection.channels[rescind->child_rel_id];
- if (channel == NULL)
+ if (channel == NULL)
return;
- hv_vmbus_child_device_unregister(channel->device);
- mtx_lock(&hv_vmbus_g_connection.channel_lock);
+ hv_queue_work_item(vmbus_channel_on_offer_rescind_internal, channel);
hv_vmbus_g_connection.channels[rescind->child_rel_id] = NULL;
- mtx_unlock(&hv_vmbus_g_connection.channel_lock);
+}
+
+static void
+vmbus_channel_on_offer_rescind_internal(void *context)
+{
+ hv_vmbus_channel* channel;
+
+ channel = (hv_vmbus_channel*)context;
+ hv_vmbus_child_device_unregister(channel->device);
}
/**
@@ -712,35 +635,6 @@ vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
}
/**
- * @brief Handler for channel protocol messages.
- *
- * This is invoked in the vmbus worker thread context.
- */
-void
-hv_vmbus_on_channel_message(void *context)
-{
- hv_vmbus_message* msg;
- hv_vmbus_channel_msg_header* hdr;
- int size;
-
- msg = (hv_vmbus_message*) context;
- hdr = (hv_vmbus_channel_msg_header*) msg->u.payload;
- size = msg->header.payload_size;
-
- if (hdr->message_type >= HV_CHANNEL_MESSAGE_COUNT) {
- free(msg, M_DEVBUF);
- return;
- }
-
- if (g_channel_message_table[hdr->message_type].messageHandler) {
- g_channel_message_table[hdr->message_type].messageHandler(hdr);
- }
-
- /* Free the msg that was allocated in VmbusOnMsgDPC() */
- free(msg, M_DEVBUF);
-}
-
-/**
* @brief Send a request to get all our pending offers.
*/
int
@@ -765,8 +659,7 @@ hv_vmbus_request_channel_offers(void)
ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_msg_header));
- if (msg_info)
- free(msg_info, M_DEVBUF);
+ free(msg_info, M_DEVBUF);
return (ret);
}
diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c
index cfdc9bb..fb1879d 100644
--- a/sys/dev/hyperv/vmbus/hv_connection.c
+++ b/sys/dev/hyperv/vmbus/hv_connection.c
@@ -90,12 +90,10 @@ hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
hv_vmbus_g_connection.interrupt_page);
msg->monitor_page_1 = hv_get_phys_addr(
- hv_vmbus_g_connection.monitor_pages);
+ hv_vmbus_g_connection.monitor_page_1);
- msg->monitor_page_2 =
- hv_get_phys_addr(
- ((uint8_t *) hv_vmbus_g_connection.monitor_pages
- + PAGE_SIZE));
+ msg->monitor_page_2 = hv_get_phys_addr(
+ hv_vmbus_g_connection.monitor_page_2);
/**
* Add to list before we send the request since we may receive the
@@ -168,8 +166,6 @@ hv_vmbus_connect(void) {
* Initialize the vmbus connection
*/
hv_vmbus_g_connection.connect_state = HV_CONNECTING;
- hv_vmbus_g_connection.work_queue = hv_work_queue_create("vmbusQ");
- sema_init(&hv_vmbus_g_connection.control_sema, 1, "control_sema");
TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
@@ -183,18 +179,9 @@ hv_vmbus_connect(void) {
* Setup the vmbus event connection for channel interrupt abstraction
* stuff
*/
- hv_vmbus_g_connection.interrupt_page = contigmalloc(
+ hv_vmbus_g_connection.interrupt_page = malloc(
PAGE_SIZE, M_DEVBUF,
- M_NOWAIT | M_ZERO, 0UL,
- BUS_SPACE_MAXADDR,
- PAGE_SIZE, 0);
- KASSERT(hv_vmbus_g_connection.interrupt_page != NULL,
- ("Error VMBUS: malloc failed to allocate Channel"
- " Request Event message!"));
- if (hv_vmbus_g_connection.interrupt_page == NULL) {
- ret = ENOMEM;
- goto cleanup;
- }
+ M_WAITOK | M_ZERO);
hv_vmbus_g_connection.recv_interrupt_page =
hv_vmbus_g_connection.interrupt_page;
@@ -207,31 +194,19 @@ hv_vmbus_connect(void) {
* Set up the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
- hv_vmbus_g_connection.monitor_pages = contigmalloc(
- 2 * PAGE_SIZE,
+ hv_vmbus_g_connection.monitor_page_1 = malloc(
+ PAGE_SIZE,
M_DEVBUF,
- M_NOWAIT | M_ZERO,
- 0UL,
- BUS_SPACE_MAXADDR,
+ M_WAITOK | M_ZERO);
+ hv_vmbus_g_connection.monitor_page_2 = malloc(
PAGE_SIZE,
- 0);
- KASSERT(hv_vmbus_g_connection.monitor_pages != NULL,
- ("Error VMBUS: malloc failed to allocate Monitor Pages!"));
- if (hv_vmbus_g_connection.monitor_pages == NULL) {
- ret = ENOMEM;
- goto cleanup;
- }
+ M_DEVBUF,
+ M_WAITOK | M_ZERO);
msg_info = (hv_vmbus_channel_msg_info*)
malloc(sizeof(hv_vmbus_channel_msg_info) +
sizeof(hv_vmbus_channel_initiate_contact),
- M_DEVBUF, M_NOWAIT | M_ZERO);
- KASSERT(msg_info != NULL,
- ("Error VMBUS: malloc failed for Initiate Contact message!"));
- if (msg_info == NULL) {
- ret = ENOMEM;
- goto cleanup;
- }
+ M_DEVBUF, M_WAITOK | M_ZERO);
hv_vmbus_g_connection.channels = malloc(sizeof(hv_vmbus_channel*) *
HV_CHANNEL_MAX_COUNT,
@@ -273,26 +248,16 @@ hv_vmbus_connect(void) {
hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
- hv_work_queue_close(hv_vmbus_g_connection.work_queue);
- sema_destroy(&hv_vmbus_g_connection.control_sema);
mtx_destroy(&hv_vmbus_g_connection.channel_lock);
mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
if (hv_vmbus_g_connection.interrupt_page != NULL) {
- contigfree(
- hv_vmbus_g_connection.interrupt_page,
- PAGE_SIZE,
- M_DEVBUF);
+ free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
hv_vmbus_g_connection.interrupt_page = NULL;
}
- if (hv_vmbus_g_connection.monitor_pages != NULL) {
- contigfree(
- hv_vmbus_g_connection.monitor_pages,
- 2 * PAGE_SIZE,
- M_DEVBUF);
- hv_vmbus_g_connection.monitor_pages = NULL;
- }
+ free(hv_vmbus_g_connection.monitor_page_1, M_DEVBUF);
+ free(hv_vmbus_g_connection.monitor_page_2, M_DEVBUF);
if (msg_info) {
sema_destroy(&msg_info->wait_sema);
@@ -309,108 +274,29 @@ hv_vmbus_connect(void) {
int
hv_vmbus_disconnect(void) {
int ret = 0;
- hv_vmbus_channel_unload* msg;
-
- msg = malloc(sizeof(hv_vmbus_channel_unload),
- M_DEVBUF, M_NOWAIT | M_ZERO);
- KASSERT(msg != NULL,
- ("Error VMBUS: malloc failed to allocate Channel Unload Msg!"));
- if (msg == NULL)
- return (ENOMEM);
-
- msg->message_type = HV_CHANNEL_MESSAGE_UNLOAD;
+ hv_vmbus_channel_unload msg;
- ret = hv_vmbus_post_message(msg, sizeof(hv_vmbus_channel_unload));
+ msg.message_type = HV_CHANNEL_MESSAGE_UNLOAD;
+ ret = hv_vmbus_post_message(&msg, sizeof(hv_vmbus_channel_unload));
- contigfree(hv_vmbus_g_connection.interrupt_page, PAGE_SIZE, M_DEVBUF);
+ free(hv_vmbus_g_connection.interrupt_page, M_DEVBUF);
mtx_destroy(&hv_vmbus_g_connection.channel_msg_lock);
- hv_work_queue_close(hv_vmbus_g_connection.work_queue);
- sema_destroy(&hv_vmbus_g_connection.control_sema);
-
free(hv_vmbus_g_connection.channels, M_DEVBUF);
hv_vmbus_g_connection.connect_state = HV_DISCONNECTED;
- free(msg, M_DEVBUF);
-
return (ret);
}
/**
- * Process a channel event notification
- */
-static void
-VmbusProcessChannelEvent(uint32_t relid)
-{
- void* arg;
- uint32_t bytes_to_read;
- hv_vmbus_channel* channel;
- boolean_t is_batched_reading;
-
- /**
- * Find the channel based on this relid and invokes
- * the channel callback to process the event
- */
-
- channel = hv_vmbus_g_connection.channels[relid];
-
- if (channel == NULL) {
- return;
- }
- /**
- * To deal with the race condition where we might
- * receive a packet while the relevant driver is
- * being unloaded, dispatch the callback while
- * holding the channel lock. The unloading driver
- * will acquire the same channel lock to set the
- * callback to NULL. This closes the window.
- */
-
- /*
- * Disable the lock due to newly added WITNESS check in r277723.
- * Will seek other way to avoid race condition.
- * -- whu
- */
- // mtx_lock(&channel->inbound_lock);
- if (channel->on_channel_callback != NULL) {
- arg = channel->channel_callback_context;
- is_batched_reading = channel->batched_reading;
- /*
- * Optimize host to guest signaling by ensuring:
- * 1. While reading the channel, we disable interrupts from
- * host.
- * 2. Ensure that we process all posted messages from the host
- * before returning from this callback.
- * 3. Once we return, enable signaling from the host. Once this
- * state is set we check to see if additional packets are
- * available to read. In this case we repeat the process.
- */
- do {
- if (is_batched_reading)
- hv_ring_buffer_read_begin(&channel->inbound);
-
- channel->on_channel_callback(arg);
-
- if (is_batched_reading)
- bytes_to_read =
- hv_ring_buffer_read_end(&channel->inbound);
- else
- bytes_to_read = 0;
- } while (is_batched_reading && (bytes_to_read != 0));
- }
- // mtx_unlock(&channel->inbound_lock);
-}
-
-/**
* Handler for events
*/
void
-hv_vmbus_on_events(void *arg)
+hv_vmbus_on_events(int cpu)
{
int bit;
- int cpu;
int dword;
void *page_addr;
uint32_t* recv_interrupt_page = NULL;
@@ -419,7 +305,6 @@ hv_vmbus_on_events(void *arg)
hv_vmbus_synic_event_flags *event;
/* int maxdword = PAGE_SIZE >> 3; */
- cpu = (int)(long)arg;
KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
"cpu out of range!"));
@@ -461,8 +346,14 @@ hv_vmbus_on_events(void *arg)
*/
continue;
} else {
- VmbusProcessChannelEvent(rel_id);
-
+ hv_vmbus_channel * channel = hv_vmbus_g_connection.channels[rel_id];
+ /* if channel is closed or closing */
+ if (channel == NULL || channel->rxq == NULL)
+ continue;
+
+ if (channel->batched_reading)
+ hv_ring_buffer_read_begin(&channel->inbound);
+ taskqueue_enqueue_fast(channel->rxq, &channel->channel_task);
}
}
}
diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c
index ca5641f..6afc2b8 100644
--- a/sys/dev/hyperv/vmbus/hv_hv.c
+++ b/sys/dev/hyperv/vmbus/hv_hv.c
@@ -189,11 +189,7 @@ hv_vmbus_init(void)
* See if the hypercall page is already set
*/
hypercall_msr.as_uint64_t = rdmsr(HV_X64_MSR_HYPERCALL);
- virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
- KASSERT(virt_addr != NULL,
- ("Error VMBUS: malloc failed to allocate page during init!"));
- if (virt_addr == NULL)
- goto cleanup;
+ virt_addr = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
hypercall_msr.u.enable = 1;
hypercall_msr.u.guest_physical_address =
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
index 66a3f39..c8d6894 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
@@ -83,8 +83,6 @@ vmbus_msg_swintr(void *arg)
hv_vmbus_channel_msg_table_entry *entry;
hv_vmbus_channel_msg_type msg_type;
hv_vmbus_message* msg;
- hv_vmbus_message* copied;
- static bool warned = false;
cpu = (int)(long)arg;
KASSERT(cpu <= mp_maxid, ("VMBUS: vmbus_msg_swintr: "
@@ -100,31 +98,15 @@ vmbus_msg_swintr(void *arg)
hdr = (hv_vmbus_channel_msg_header *)msg->u.payload;
msg_type = hdr->message_type;
- if (msg_type >= HV_CHANNEL_MESSAGE_COUNT && !warned) {
- warned = true;
+ if (msg_type >= HV_CHANNEL_MESSAGE_COUNT) {
printf("VMBUS: unknown message type = %d\n", msg_type);
goto handled;
}
entry = &g_channel_message_table[msg_type];
- if (entry->handler_no_sleep)
+ if (entry->messageHandler)
entry->messageHandler(hdr);
- else {
-
- copied = malloc(sizeof(hv_vmbus_message),
- M_DEVBUF, M_NOWAIT);
- KASSERT(copied != NULL,
- ("Error VMBUS: malloc failed to allocate"
- " hv_vmbus_message!"));
- if (copied == NULL)
- continue;
-
- memcpy(copied, msg, sizeof(hv_vmbus_message));
- hv_queue_work_item(hv_vmbus_g_connection.work_queue,
- hv_vmbus_on_channel_message,
- copied);
- }
handled:
msg->header.message_type = HV_MESSAGE_TYPE_NONE;
@@ -177,7 +159,7 @@ hv_vmbus_isr(struct trapframe *frame)
(hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
/* Since we are a child, we only need to check bit 0 */
if (synch_test_and_clear_bit(0, &event->flags32[0])) {
- swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
+ hv_vmbus_on_events(cpu);
}
} else {
/*
@@ -187,7 +169,7 @@ hv_vmbus_isr(struct trapframe *frame)
* Directly schedule the event software interrupt on
* current cpu.
*/
- swi_sched(hv_vmbus_g_context.event_swintr[cpu], 0);
+ hv_vmbus_on_events(cpu);
}
/* Check if there are actual msgs to be process */
@@ -225,7 +207,6 @@ hv_vmbus_isr(struct trapframe *frame)
return (FILTER_HANDLED);
}
-uint32_t hv_vmbus_swintr_event_cpu[MAXCPU];
u_long *hv_vmbus_intr_cpu[MAXCPU];
void
@@ -310,12 +291,7 @@ hv_vmbus_child_device_create(
* Allocate the new child device
*/
child_dev = malloc(sizeof(hv_device), M_DEVBUF,
- M_NOWAIT | M_ZERO);
- KASSERT(child_dev != NULL,
- ("Error VMBUS: malloc failed to allocate hv_device!"));
-
- if (child_dev == NULL)
- return (NULL);
+ M_WAITOK | M_ZERO);
child_dev->channel = channel;
memcpy(&child_dev->class_id, &type, sizeof(hv_guid));
@@ -455,6 +431,19 @@ vmbus_vector_free(int vector)
#endif /* HYPERV */
+static void
+vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
+{
+ cpuset_t *mask = xmask;
+ int error;
+
+ error = cpuset_setthread(curthread->td_tid, mask);
+ if (error) {
+ panic("curthread=%ju: can't pin; error=%d",
+ (uintmax_t)curthread->td_tid, error);
+ }
+}
+
/**
* @brief Main vmbus driver initialization routine.
*
@@ -472,6 +461,7 @@ vmbus_bus_init(void)
{
int i, j, n, ret;
char buf[MAXCOMLEN + 1];
+ cpuset_t cpu_mask;
if (vmbus_inited)
return (0);
@@ -508,10 +498,7 @@ vmbus_bus_init(void)
setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
CPU_FOREACH(j) {
- hv_vmbus_swintr_event_cpu[j] = 0;
- hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
- hv_vmbus_g_context.event_swintr[j] = NULL;
hv_vmbus_g_context.msg_swintr[j] = NULL;
snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
@@ -525,6 +512,26 @@ vmbus_bus_init(void)
* Per cpu setup.
*/
CPU_FOREACH(j) {
+ struct task cpuset_task;
+
+ /*
+ * Setup taskqueue to handle events
+ */
+ hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK,
+ taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]);
+ if (hv_vmbus_g_context.hv_event_queue[j] == NULL) {
+ if (bootverbose)
+ printf("VMBUS: failed to setup taskqueue\n");
+ goto cleanup1;
+ }
+ taskqueue_start_threads(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET,
+ "hvevent%d", j);
+
+ CPU_SETOF(j, &cpu_mask);
+ TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, &cpu_mask);
+ taskqueue_enqueue(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task);
+ taskqueue_drain(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task);
+
/*
* Setup software interrupt thread and handler for msg handling.
*/
@@ -543,7 +550,7 @@ vmbus_bus_init(void)
*/
ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
j);
- if (ret) {
+ if (ret) {
if(bootverbose)
printf("VMBUS: failed to bind msg swi thread "
"to cpu %d\n", j);
@@ -551,30 +558,11 @@ vmbus_bus_init(void)
}
/*
- * Setup software interrupt thread and handler for
- * event handling.
- */
- ret = swi_add(&hv_vmbus_g_context.hv_event_intr_event[j],
- "hv_event", hv_vmbus_on_events, (void *)(long)j,
- SWI_CLOCK, 0, &hv_vmbus_g_context.event_swintr[j]);
- if (ret) {
- if(bootverbose)
- printf("VMBUS: failed to setup event swi for "
- "cpu %d\n", j);
- goto cleanup1;
- }
-
- /*
* Prepare the per cpu msg and event pages to be called on each cpu.
*/
for(i = 0; i < 2; i++) {
setup_args.page_buffers[2 * j + i] =
- malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT | M_ZERO);
- if (setup_args.page_buffers[2 * j + i] == NULL) {
- KASSERT(setup_args.page_buffers[2 * j + i] != NULL,
- ("Error VMBUS: malloc failed!"));
- goto cleanup1;
- }
+ malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
}
}
@@ -607,12 +595,11 @@ vmbus_bus_init(void)
* remove swi and vmbus callback vector;
*/
CPU_FOREACH(j) {
+ if (hv_vmbus_g_context.hv_event_queue[j] != NULL)
+ taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]);
if (hv_vmbus_g_context.msg_swintr[j] != NULL)
swi_remove(hv_vmbus_g_context.msg_swintr[j]);
- if (hv_vmbus_g_context.event_swintr[j] != NULL)
- swi_remove(hv_vmbus_g_context.event_swintr[j]);
hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
- hv_vmbus_g_context.hv_event_intr_event[j] = NULL;
}
vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
@@ -677,12 +664,11 @@ vmbus_bus_exit(void)
/* remove swi */
CPU_FOREACH(i) {
+ if (hv_vmbus_g_context.hv_event_queue[i] != NULL)
+ taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
if (hv_vmbus_g_context.msg_swintr[i] != NULL)
swi_remove(hv_vmbus_g_context.msg_swintr[i]);
- if (hv_vmbus_g_context.event_swintr[i] != NULL)
- swi_remove(hv_vmbus_g_context.event_swintr[i]);
hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;
- hv_vmbus_g_context.hv_event_intr_event[i] = NULL;
}
vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
index 13a35c4..5f62072 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
@@ -202,9 +202,8 @@ typedef struct {
* Each cpu has its own software interrupt handler for channel
* event and msg handling.
*/
- struct intr_event *hv_event_intr_event[MAXCPU];
+ struct taskqueue *hv_event_queue[MAXCPU];
struct intr_event *hv_msg_intr_event[MAXCPU];
- void *event_swintr[MAXCPU];
void *msg_swintr[MAXCPU];
/*
* Host use this vector to intrrupt guest for vmbus channel
@@ -351,7 +350,8 @@ typedef struct {
* notification and 2nd is child->parent
* notification
*/
- void *monitor_pages;
+ void *monitor_page_1;
+ void *monitor_page_2;
TAILQ_HEAD(, hv_vmbus_channel_msg_info) channel_msg_anchor;
struct mtx channel_msg_lock;
/**
@@ -363,10 +363,8 @@ typedef struct {
/**
* channel table for fast lookup through id.
- */
+ */
hv_vmbus_channel **channels;
- hv_vmbus_handle work_queue;
- struct sema control_sema;
} hv_vmbus_connection;
typedef union {
@@ -633,7 +631,6 @@ typedef void (*vmbus_msg_handler)(hv_vmbus_channel_msg_header *msg);
typedef struct hv_vmbus_channel_msg_table_entry {
hv_vmbus_channel_msg_type messageType;
- bool handler_no_sleep; /* true: the handler doesn't sleep */
vmbus_msg_handler messageHandler;
} hv_vmbus_channel_msg_table_entry;
@@ -683,7 +680,6 @@ uint32_t hv_ring_buffer_read_end(
hv_vmbus_channel* hv_vmbus_allocate_channel(void);
void hv_vmbus_free_vmbus_channel(hv_vmbus_channel *channel);
-void hv_vmbus_on_channel_message(void *context);
int hv_vmbus_request_channel_offers(void);
void hv_vmbus_release_unattached_channels(void);
int hv_vmbus_init(void);
@@ -717,7 +713,7 @@ int hv_vmbus_connect(void);
int hv_vmbus_disconnect(void);
int hv_vmbus_post_message(void *buffer, size_t buf_size);
int hv_vmbus_set_event(hv_vmbus_channel *channel);
-void hv_vmbus_on_events(void *);
+void hv_vmbus_on_events(int cpu);
/**
* Event Timer interfaces
OpenPOWER on IntegriCloud