summaryrefslogtreecommitdiffstats
path: root/sys/dev/hyperv
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2016-06-21 07:44:54 -0300
committerRenato Botelho <renato@netgate.com>2016-06-21 07:44:54 -0300
commit1fc6b0207cc2f3cce33817706603caa41a9de24d (patch)
treed2d812b76b08f42a002621f716dd5f3199c7ca7d /sys/dev/hyperv
parentb8632c4f34175c7018be77059ab229e755eb67e0 (diff)
parentbc9e0dd07a76c4d7a1c6fcf21824ca2cecff2c6d (diff)
downloadFreeBSD-src-1fc6b0207cc2f3cce33817706603caa41a9de24d.zip
FreeBSD-src-1fc6b0207cc2f3cce33817706603caa41a9de24d.tar.gz
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys/dev/hyperv')
-rw-r--r--sys/dev/hyperv/include/hyperv.h29
-rw-r--r--sys/dev/hyperv/netvsc/hv_net_vsc.c157
-rw-r--r--sys/dev/hyperv/netvsc/hv_net_vsc.h195
-rw-r--r--sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c760
-rw-r--r--sys/dev/hyperv/netvsc/hv_rndis.h50
-rw-r--r--sys/dev/hyperv/netvsc/hv_rndis_filter.c397
-rw-r--r--sys/dev/hyperv/netvsc/hv_rndis_filter.h34
-rw-r--r--sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c58
-rw-r--r--sys/dev/hyperv/utilities/hv_heartbeat.c6
-rw-r--r--sys/dev/hyperv/utilities/hv_kvp.c35
-rw-r--r--sys/dev/hyperv/utilities/hv_kvp.h2
-rw-r--r--sys/dev/hyperv/utilities/hv_shutdown.c6
-rw-r--r--sys/dev/hyperv/utilities/hv_timesync.c6
-rw-r--r--sys/dev/hyperv/utilities/hv_util.c2
-rw-r--r--sys/dev/hyperv/utilities/hv_util.h2
-rw-r--r--sys/dev/hyperv/vmbus/amd64/hv_vector.S46
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel.c150
-rw-r--r--sys/dev/hyperv/vmbus/hv_channel_mgmt.c237
-rw-r--r--sys/dev/hyperv/vmbus/hv_connection.c67
-rw-r--r--sys/dev/hyperv/vmbus/hv_et.c100
-rw-r--r--sys/dev/hyperv/vmbus/hv_hv.c190
-rw-r--r--sys/dev/hyperv/vmbus/hv_ring_buffer.c44
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c180
-rw-r--r--sys/dev/hyperv/vmbus/hv_vmbus_priv.h47
-rw-r--r--sys/dev/hyperv/vmbus/i386/hv_vector.S49
25 files changed, 2167 insertions, 682 deletions
diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h
index f45543b..aeec8ec 100644
--- a/sys/dev/hyperv/include/hyperv.h
+++ b/sys/dev/hyperv/include/hyperv.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -124,6 +124,8 @@ typedef struct hv_guid {
unsigned char data[16];
} __packed hv_guid;
+int snprintf_hv_guid(char *, size_t, const hv_guid *);
+
#define HV_NIC_GUID \
.data = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, \
0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
@@ -689,7 +691,6 @@ typedef struct {
} hv_vmbus_ring_buffer_info;
typedef void (*hv_vmbus_pfn_channel_callback)(void *context);
-typedef void (*hv_vmbus_sc_creation_callback)(void *context);
typedef enum {
HV_CHANNEL_OFFER_STATE,
@@ -753,8 +754,6 @@ typedef struct hv_vmbus_channel {
*/
hv_vmbus_ring_buffer_info inbound;
- struct mtx inbound_lock;
-
struct taskqueue * rxq;
struct task channel_task;
hv_vmbus_pfn_channel_callback on_channel_callback;
@@ -804,13 +803,6 @@ typedef struct hv_vmbus_channel {
* response on the same channel.
*/
- /*
- * Multi-channel creation callback. This callback will be called in
- * process context when a Multi-channel offer is received from the host.
- * The guest can open the Multi-channel in the context of this callback.
- */
- hv_vmbus_sc_creation_callback sc_creation_callback;
-
struct mtx sc_lock;
/*
@@ -818,18 +810,24 @@ typedef struct hv_vmbus_channel {
*/
TAILQ_HEAD(, hv_vmbus_channel) sc_list_anchor;
TAILQ_ENTRY(hv_vmbus_channel) sc_list_entry;
+ int subchan_cnt;
/*
* The primary channel this sub-channle belongs to.
* This will be NULL for the primary channel.
*/
struct hv_vmbus_channel *primary_channel;
+
/*
- * Support per channel state for use by vmbus drivers.
+ * Driver private data
*/
- void *per_channel_state;
+ void *hv_chan_priv1;
+ void *hv_chan_priv2;
+ void *hv_chan_priv3;
} hv_vmbus_channel;
+#define HV_VMBUS_CHAN_ISPRIMARY(chan) ((chan)->primary_channel == NULL)
+
static inline void
hv_set_channel_read_state(hv_vmbus_channel* channel, boolean_t state)
{
@@ -908,6 +906,11 @@ int hv_vmbus_channel_teardown_gpdal(
struct hv_vmbus_channel* vmbus_select_outgoing_channel(struct hv_vmbus_channel *promary);
+void vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu);
+struct hv_vmbus_channel **
+ vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt);
+void vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt);
+
/**
* @brief Get physical address from virtual
*/
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c
index 9a89b62..a62f450 100644
--- a/sys/dev/hyperv/netvsc/hv_net_vsc.c
+++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
@@ -48,21 +48,27 @@
#include "hv_rndis.h"
#include "hv_rndis_filter.h"
+/* priv1 and priv2 are consumed by the main driver */
+#define hv_chan_rdbuf hv_chan_priv3
+
MALLOC_DEFINE(M_NETVSC, "netvsc", "Hyper-V netvsc driver");
/*
* Forward declarations
*/
-static void hv_nv_on_channel_callback(void *context);
+static void hv_nv_on_channel_callback(void *xchan);
static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device);
static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device);
static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev);
static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev);
static int hv_nv_connect_to_vsp(struct hv_device *device);
static void hv_nv_on_send_completion(netvsc_dev *net_dev,
- struct hv_device *device, hv_vm_packet_descriptor *pkt);
+ struct hv_device *device, struct hv_vmbus_channel *, hv_vm_packet_descriptor *pkt);
+static void hv_nv_on_receive_completion(struct hv_vmbus_channel *chan,
+ uint64_t tid, uint32_t status);
static void hv_nv_on_receive(netvsc_dev *net_dev,
- struct hv_device *device, hv_vm_packet_descriptor *pkt);
+ struct hv_device *device, struct hv_vmbus_channel *chan,
+ hv_vm_packet_descriptor *pkt);
/*
*
@@ -115,7 +121,7 @@ hv_nv_get_inbound_net_device(struct hv_device *device)
* permit incoming packets if and only if there
* are outstanding sends.
*/
- if (net_dev->destroy && net_dev->num_outstanding_sends == 0) {
+ if (net_dev->destroy) {
return (NULL);
}
@@ -654,6 +660,16 @@ hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
hv_nv_destroy_send_buffer(net_dev);
}
+void
+hv_nv_subchan_attach(struct hv_vmbus_channel *chan)
+{
+
+ chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
+ hv_vmbus_channel_open(chan, NETVSC_DEVICE_RING_BUFFER_SIZE,
+ NETVSC_DEVICE_RING_BUFFER_SIZE, NULL, 0,
+ hv_nv_on_channel_callback, chan);
+}
+
/*
* Net VSC on device add
*
@@ -662,25 +678,30 @@ hv_nv_disconnect_from_vsp(netvsc_dev *net_dev)
netvsc_dev *
hv_nv_on_device_add(struct hv_device *device, void *additional_info)
{
+ struct hv_vmbus_channel *chan = device->channel;
netvsc_dev *net_dev;
int ret = 0;
net_dev = hv_nv_alloc_net_device(device);
- if (!net_dev)
- goto cleanup;
+ if (net_dev == NULL)
+ return NULL;
/* Initialize the NetVSC channel extension */
sema_init(&net_dev->channel_init_sema, 0, "netdev_sema");
+ chan->hv_chan_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
+
/*
* Open the channel
*/
- ret = hv_vmbus_channel_open(device->channel,
+ ret = hv_vmbus_channel_open(chan,
NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE,
- NULL, 0, hv_nv_on_channel_callback, device);
- if (ret != 0)
+ NULL, 0, hv_nv_on_channel_callback, chan);
+ if (ret != 0) {
+ free(chan->hv_chan_rdbuf, M_NETVSC);
goto cleanup;
+ }
/*
* Connect with the NetVsp
@@ -693,18 +714,16 @@ hv_nv_on_device_add(struct hv_device *device, void *additional_info)
close:
/* Now, we can close the channel safely */
-
- hv_vmbus_channel_close(device->channel);
+ free(chan->hv_chan_rdbuf, M_NETVSC);
+ hv_vmbus_channel_close(chan);
cleanup:
/*
* Free the packet buffers on the netvsc device packet queue.
* Release other resources.
*/
- if (net_dev) {
- sema_destroy(&net_dev->channel_init_sema);
- free(net_dev, M_NETVSC);
- }
+ sema_destroy(&net_dev->channel_init_sema);
+ free(net_dev, M_NETVSC);
return (NULL);
}
@@ -719,14 +738,7 @@ hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
netvsc_dev *net_dev = sc->net_dev;;
/* Stop outbound traffic ie sends and receives completions */
- mtx_lock(&device->channel->inbound_lock);
net_dev->destroy = TRUE;
- mtx_unlock(&device->channel->inbound_lock);
-
- /* Wait for all send completions */
- while (net_dev->num_outstanding_sends) {
- DELAY(100);
- }
hv_nv_disconnect_from_vsp(net_dev);
@@ -739,6 +751,7 @@ hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE;
}
+ free(device->channel->hv_chan_rdbuf, M_NETVSC);
hv_vmbus_channel_close(device->channel);
sema_destroy(&net_dev->channel_init_sema);
@@ -752,7 +765,8 @@ hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel)
*/
static void
hv_nv_on_send_completion(netvsc_dev *net_dev,
- struct hv_device *device, hv_vm_packet_descriptor *pkt)
+ struct hv_device *device, struct hv_vmbus_channel *chan,
+ hv_vm_packet_descriptor *pkt)
{
nvsp_msg *nvsp_msg_pkt;
netvsc_packet *net_vsc_pkt;
@@ -764,7 +778,9 @@ hv_nv_on_send_completion(netvsc_dev *net_dev,
|| nvsp_msg_pkt->hdr.msg_type
== nvsp_msg_1_type_send_rx_buf_complete
|| nvsp_msg_pkt->hdr.msg_type
- == nvsp_msg_1_type_send_send_buf_complete) {
+ == nvsp_msg_1_type_send_send_buf_complete
+ || nvsp_msg_pkt->hdr.msg_type
+ == nvsp_msg5_type_subchannel) {
/* Copy the response back */
memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt,
sizeof(nvsp_msg));
@@ -801,12 +817,10 @@ hv_nv_on_send_completion(netvsc_dev *net_dev,
}
/* Notify the layer above us */
- net_vsc_pkt->compl.send.on_send_completion(
+ net_vsc_pkt->compl.send.on_send_completion(chan,
net_vsc_pkt->compl.send.send_completion_context);
}
-
- atomic_subtract_int(&net_dev->num_outstanding_sends, 1);
}
}
@@ -816,16 +830,11 @@ hv_nv_on_send_completion(netvsc_dev *net_dev,
* Returns 0 on success, non-zero on failure.
*/
int
-hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
+hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt)
{
- netvsc_dev *net_dev;
nvsp_msg send_msg;
int ret;
- net_dev = hv_nv_get_outbound_net_device(device);
- if (!net_dev)
- return (ENODEV);
-
send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt;
if (pkt->is_data_pkt) {
/* 0 is RMC_DATA */
@@ -841,20 +850,16 @@ hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
pkt->send_buf_section_size;
if (pkt->page_buf_count) {
- ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel,
+ ret = hv_vmbus_channel_send_packet_pagebuffer(chan,
pkt->page_buffers, pkt->page_buf_count,
&send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt);
} else {
- ret = hv_vmbus_channel_send_packet(device->channel,
+ ret = hv_vmbus_channel_send_packet(chan,
&send_msg, sizeof(nvsp_msg), (uint64_t)(uintptr_t)pkt,
HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}
- /* Record outstanding send only if send_packet() succeeded */
- if (ret == 0)
- atomic_add_int(&net_dev->num_outstanding_sends, 1);
-
return (ret);
}
@@ -866,7 +871,7 @@ hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt)
*/
static void
hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device,
- hv_vm_packet_descriptor *pkt)
+ struct hv_vmbus_channel *chan, hv_vm_packet_descriptor *pkt)
{
hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
nvsp_msg *nvsp_msg_pkt;
@@ -916,7 +921,7 @@ hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device,
net_vsc_pkt->tot_data_buf_len =
vm_xfer_page_pkt->ranges[i].byte_count;
- hv_rf_on_receive(net_dev, device, net_vsc_pkt);
+ hv_rf_on_receive(net_dev, device, chan, net_vsc_pkt);
if (net_vsc_pkt->status != nvsp_status_success) {
status = nvsp_status_failure;
}
@@ -927,9 +932,8 @@ hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device,
* messages (not just data messages) will trigger a response
* message back to the host.
*/
- hv_nv_on_receive_completion(device, vm_xfer_page_pkt->d.transaction_id,
+ hv_nv_on_receive_completion(chan, vm_xfer_page_pkt->d.transaction_id,
status);
- hv_rf_receive_rollup(net_dev);
}
/*
@@ -937,8 +941,8 @@ hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device,
*
* Send a receive completion packet to RNDIS device (ie NetVsp)
*/
-void
-hv_nv_on_receive_completion(struct hv_device *device, uint64_t tid,
+static void
+hv_nv_on_receive_completion(struct hv_vmbus_channel *chan, uint64_t tid,
uint32_t status)
{
nvsp_msg rx_comp_msg;
@@ -953,7 +957,7 @@ hv_nv_on_receive_completion(struct hv_device *device, uint64_t tid,
retry_send_cmplt:
/* Send the completion */
- ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg,
+ ret = hv_vmbus_channel_send_packet(chan, &rx_comp_msg,
sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
if (ret == 0) {
/* success */
@@ -970,12 +974,53 @@ retry_send_cmplt:
}
/*
+ * Net VSC receiving vRSS send table from VSP
+ */
+static void
+hv_nv_send_table(struct hv_device *device, hv_vm_packet_descriptor *pkt)
+{
+ netvsc_dev *net_dev;
+ nvsp_msg *nvsp_msg_pkt;
+ int i;
+ uint32_t count, *table;
+
+ net_dev = hv_nv_get_inbound_net_device(device);
+ if (!net_dev)
+ return;
+
+ nvsp_msg_pkt =
+ (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
+
+ if (nvsp_msg_pkt->hdr.msg_type !=
+ nvsp_msg5_type_send_indirection_table) {
+ printf("Netvsc: !Warning! receive msg type not "
+ "send_indirection_table. type = %d\n",
+ nvsp_msg_pkt->hdr.msg_type);
+ return;
+ }
+
+ count = nvsp_msg_pkt->msgs.vers_5_msgs.send_table.count;
+ if (count != VRSS_SEND_TABLE_SIZE) {
+ printf("Netvsc: Received wrong send table size: %u\n", count);
+ return;
+ }
+
+ table = (uint32_t *)
+ ((unsigned long)&nvsp_msg_pkt->msgs.vers_5_msgs.send_table +
+ nvsp_msg_pkt->msgs.vers_5_msgs.send_table.offset);
+
+ for (i = 0; i < count; i++)
+ net_dev->vrss_send_table[i] = table[i];
+}
+
+/*
* Net VSC on channel callback
*/
static void
-hv_nv_on_channel_callback(void *context)
+hv_nv_on_channel_callback(void *xchan)
{
- struct hv_device *device = (struct hv_device *)context;
+ struct hv_vmbus_channel *chan = xchan;
+ struct hv_device *device = chan->device;
netvsc_dev *net_dev;
device_t dev = device->device;
uint32_t bytes_rxed;
@@ -989,20 +1034,24 @@ hv_nv_on_channel_callback(void *context)
if (net_dev == NULL)
return;
- buffer = net_dev->callback_buf;
+ buffer = chan->hv_chan_rdbuf;
do {
- ret = hv_vmbus_channel_recv_packet_raw(device->channel,
+ ret = hv_vmbus_channel_recv_packet_raw(chan,
buffer, bufferlen, &bytes_rxed, &request_id);
if (ret == 0) {
if (bytes_rxed > 0) {
desc = (hv_vm_packet_descriptor *)buffer;
switch (desc->type) {
case HV_VMBUS_PACKET_TYPE_COMPLETION:
- hv_nv_on_send_completion(net_dev, device, desc);
+ hv_nv_on_send_completion(net_dev, device,
+ chan, desc);
break;
case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
- hv_nv_on_receive(net_dev, device, desc);
+ hv_nv_on_receive(net_dev, device, chan, desc);
+ break;
+ case HV_VMBUS_PACKET_TYPE_DATA_IN_BAND:
+ hv_nv_send_table(device, desc);
break;
default:
device_printf(dev,
@@ -1036,5 +1085,5 @@ hv_nv_on_channel_callback(void *context)
if (bufferlen > NETVSC_PACKET_SIZE)
free(buffer, M_NETVSC);
- hv_rf_channel_rollup(net_dev);
+ hv_rf_channel_rollup(chan);
}
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h
index 95dee17..7c43f64 100644
--- a/sys/dev/hyperv/netvsc/hv_net_vsc.h
+++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
@@ -86,6 +86,92 @@ MALLOC_DECLARE(M_NETVSC);
*/
#define NVSP_MAX_PACKETS_PER_RECEIVE 375
+/* vRSS stuff */
+#define RNDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
+#define RNDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
+
+#define RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
+#define RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
+
+struct rndis_obj_header {
+ uint8_t type;
+ uint8_t rev;
+ uint16_t size;
+} __packed;
+
+/* rndis_recv_scale_cap/cap_flag */
+#define RNDIS_RSS_CAPS_MESSAGE_SIGNALED_INTERRUPTS 0x01000000
+#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_ISR 0x02000000
+#define RNDIS_RSS_CAPS_CLASSIFICATION_AT_DPC 0x04000000
+#define RNDIS_RSS_CAPS_USING_MSI_X 0x08000000
+#define RNDIS_RSS_CAPS_RSS_AVAILABLE_ON_PORTS 0x10000000
+#define RNDIS_RSS_CAPS_SUPPORTS_MSI_X 0x20000000
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV4 0x00000100
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6 0x00000200
+#define RNDIS_RSS_CAPS_HASH_TYPE_TCP_IPV6_EX 0x00000400
+
+/* RNDIS_RECEIVE_SCALE_CAPABILITIES */
+struct rndis_recv_scale_cap {
+ struct rndis_obj_header hdr;
+ uint32_t cap_flag;
+ uint32_t num_int_msg;
+ uint32_t num_recv_que;
+ uint16_t num_indirect_tabent;
+} __packed;
+
+/* rndis_recv_scale_param flags */
+#define RNDIS_RSS_PARAM_FLAG_BASE_CPU_UNCHANGED 0x0001
+#define RNDIS_RSS_PARAM_FLAG_HASH_INFO_UNCHANGED 0x0002
+#define RNDIS_RSS_PARAM_FLAG_ITABLE_UNCHANGED 0x0004
+#define RNDIS_RSS_PARAM_FLAG_HASH_KEY_UNCHANGED 0x0008
+#define RNDIS_RSS_PARAM_FLAG_DISABLE_RSS 0x0010
+
+/* Hash info bits */
+#define RNDIS_HASH_FUNC_TOEPLITZ 0x00000001
+#define RNDIS_HASH_IPV4 0x00000100
+#define RNDIS_HASH_TCP_IPV4 0x00000200
+#define RNDIS_HASH_IPV6 0x00000400
+#define RNDIS_HASH_IPV6_EX 0x00000800
+#define RNDIS_HASH_TCP_IPV6 0x00001000
+#define RNDIS_HASH_TCP_IPV6_EX 0x00002000
+
+#define RNDIS_RSS_INDIRECTION_TABLE_MAX_SIZE_REVISION_2 (128 * 4)
+#define RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40
+
+#define ITAB_NUM 128
+#define HASH_KEYLEN RNDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
+
+/* RNDIS_RECEIVE_SCALE_PARAMETERS */
+typedef struct rndis_recv_scale_param_ {
+ struct rndis_obj_header hdr;
+
+ /* Qualifies the rest of the information */
+ uint16_t flag;
+
+ /* The base CPU number to do receive processing. not used */
+ uint16_t base_cpu_number;
+
+ /* This describes the hash function and type being enabled */
+ uint32_t hashinfo;
+
+ /* The size of indirection table array */
+ uint16_t indirect_tabsize;
+
+ /* The offset of the indirection table from the beginning of this
+ * structure
+ */
+ uint32_t indirect_taboffset;
+
+ /* The size of the hash secret key */
+ uint16_t hashkey_size;
+
+ /* The offset of the secret key from the beginning of this structure */
+ uint32_t hashkey_offset;
+
+ uint32_t processor_masks_offset;
+ uint32_t num_processor_masks;
+ uint32_t processor_masks_entry_size;
+} rndis_recv_scale_param;
typedef enum nvsp_msg_type_ {
nvsp_msg_type_none = 0,
@@ -146,6 +232,27 @@ typedef enum nvsp_msg_type_ {
nvsp_msg_2_type_alloc_chimney_handle,
nvsp_msg_2_type_alloc_chimney_handle_complete,
+
+ nvsp_msg2_max = nvsp_msg_2_type_alloc_chimney_handle_complete,
+
+ /*
+ * Version 4 Messages
+ */
+ nvsp_msg4_type_send_vf_association,
+ nvsp_msg4_type_switch_data_path,
+ nvsp_msg4_type_uplink_connect_state_deprecated,
+
+ nvsp_msg4_max = nvsp_msg4_type_uplink_connect_state_deprecated,
+
+ /*
+ * Version 5 Messages
+ */
+ nvsp_msg5_type_oid_query_ex,
+ nvsp_msg5_type_oid_query_ex_comp,
+ nvsp_msg5_type_subchannel,
+ nvsp_msg5_type_send_indirection_table,
+
+ nvsp_msg5_max = nvsp_msg5_type_send_indirection_table,
} nvsp_msg_type;
typedef enum nvsp_status_ {
@@ -793,6 +900,39 @@ typedef struct nvsp_2_msg_send_vmq_rndis_pkt_complete_
uint32_t status;
} __packed nvsp_2_msg_send_vmq_rndis_pkt_complete;
+/*
+ * Version 5 messages
+ */
+enum nvsp_subchannel_operation {
+ NVSP_SUBCHANNEL_NONE = 0,
+ NVSP_SUBCHANNE_ALLOCATE,
+ NVSP_SUBCHANNE_MAX
+};
+
+typedef struct nvsp_5_subchannel_request_
+{
+ uint32_t op;
+ uint32_t num_subchannels;
+} __packed nvsp_5_subchannel_request;
+
+typedef struct nvsp_5_subchannel_complete_
+{
+ uint32_t status;
+ /* Actual number of subchannels allocated */
+ uint32_t num_subchannels;
+} __packed nvsp_5_subchannel_complete;
+
+typedef struct nvsp_5_send_indirect_table_
+{
+ /* The number of entries in the send indirection table */
+ uint32_t count;
+ /*
+ * The offset of the send indireciton table from top of
+ * this struct. The send indirection table tells which channel
+ * to put the send traffic on. Each entry is a channel number.
+ */
+ uint32_t offset;
+} __packed nvsp_5_send_indirect_table;
typedef union nvsp_1_msg_uber_ {
nvsp_1_msg_send_ndis_version send_ndis_vers;
@@ -838,11 +978,18 @@ typedef union nvsp_2_msg_uber_ {
nvsp_2_msg_alloc_chimney_handle_complete alloc_chimney_handle_complete;
} __packed nvsp_2_msg_uber;
+typedef union nvsp_5_msg_uber_
+{
+ nvsp_5_subchannel_request subchannel_request;
+ nvsp_5_subchannel_complete subchn_complete;
+ nvsp_5_send_indirect_table send_table;
+} __packed nvsp_5_msg_uber;
typedef union nvsp_all_msgs_ {
nvsp_msg_init_uber init_msgs;
nvsp_1_msg_uber vers_1_msgs;
nvsp_2_msg_uber vers_2_msgs;
+ nvsp_5_msg_uber vers_5_msgs;
} __packed nvsp_all_msgs;
/*
@@ -883,6 +1030,7 @@ typedef struct nvsp_msg_ {
#define NETVSC_MAX_CONFIGURABLE_MTU (9 * 1024)
#define NETVSC_PACKET_SIZE PAGE_SIZE
+#define VRSS_SEND_TABLE_SIZE 16
/*
* Data types
@@ -893,7 +1041,6 @@ typedef struct nvsp_msg_ {
*/
typedef struct netvsc_dev_ {
struct hv_device *dev;
- int num_outstanding_sends;
/* Send buffer allocated by us but manages by NetVSP */
void *send_buf;
@@ -924,12 +1071,15 @@ typedef struct netvsc_dev_ {
hv_bool_uint8_t destroy;
/* Negotiated NVSP version */
uint32_t nvsp_version;
-
- uint8_t callback_buf[NETVSC_PACKET_SIZE];
+
+ uint32_t num_channel;
+
+ uint32_t vrss_send_table[VRSS_SEND_TABLE_SIZE];
} netvsc_dev;
+struct hv_vmbus_channel;
-typedef void (*pfn_on_send_rx_completion)(void *);
+typedef void (*pfn_on_send_rx_completion)(struct hv_vmbus_channel *, void *);
#define NETVSC_DEVICE_RING_BUFFER_SIZE (128 * PAGE_SIZE)
#define NETVSC_PACKET_MAXPAGE 32
@@ -1000,10 +1150,12 @@ struct buf_ring;
#endif
struct hn_rx_ring {
- struct lro_ctrl hn_lro;
+ struct ifnet *hn_ifp;
+ int hn_rx_idx;
/* Trust csum verification on host side */
int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */
+ struct lro_ctrl hn_lro;
u_long hn_csum_ip;
u_long hn_csum_tcp;
@@ -1011,12 +1163,20 @@ struct hn_rx_ring {
u_long hn_csum_trusted;
u_long hn_lro_tried;
u_long hn_small_pkts;
+ u_long hn_pkts;
+ u_long hn_rss_pkts;
+
+ /* Rarely used stuffs */
+ struct sysctl_oid *hn_rx_sysctl_tree;
+ int hn_rx_flags;
} __aligned(CACHE_LINE_SIZE);
#define HN_TRUST_HCSUM_IP 0x0001
#define HN_TRUST_HCSUM_TCP 0x0002
#define HN_TRUST_HCSUM_UDP 0x0004
+#define HN_RX_FLAG_ATTACHED 0x1
+
struct hn_tx_ring {
#ifndef HN_USE_TXDESC_BUFRING
struct mtx hn_txlist_spin;
@@ -1026,7 +1186,8 @@ struct hn_tx_ring {
#endif
int hn_txdesc_cnt;
int hn_txdesc_avail;
- int hn_has_txeof;
+ u_short hn_has_txeof;
+ u_short hn_txdone_cnt;
int hn_sched_tx;
void (*hn_txeof)(struct hn_tx_ring *);
@@ -1034,8 +1195,13 @@ struct hn_tx_ring {
struct task hn_tx_task;
struct task hn_txeof_task;
+ struct buf_ring *hn_mbuf_br;
+ int hn_oactive;
+ int hn_tx_idx;
+
struct mtx hn_tx_lock;
struct hn_softc *hn_sc;
+ struct hv_vmbus_channel *hn_chan;
int hn_direct_tx_size;
int hn_tx_chimney_size;
@@ -1046,14 +1212,19 @@ struct hn_tx_ring {
u_long hn_send_failed;
u_long hn_txdma_failed;
u_long hn_tx_collapsed;
+ u_long hn_tx_chimney_tried;
u_long hn_tx_chimney;
+ u_long hn_pkts;
/* Rarely used stuffs */
struct hn_txdesc *hn_txdesc;
bus_dma_tag_t hn_tx_rndis_dtag;
struct sysctl_oid *hn_tx_sysctl_tree;
+ int hn_tx_flags;
} __aligned(CACHE_LINE_SIZE);
+#define HN_TX_FLAG_ATTACHED 0x1
+
/*
* Device-specific softc structure
*/
@@ -1073,13 +1244,18 @@ typedef struct hn_softc {
netvsc_dev *net_dev;
int hn_rx_ring_cnt;
+ int hn_rx_ring_inuse;
struct hn_rx_ring *hn_rx_ring;
int hn_tx_ring_cnt;
+ int hn_tx_ring_inuse;
struct hn_tx_ring *hn_tx_ring;
+
+ int hn_cpu;
int hn_tx_chimney_max;
struct taskqueue *hn_tx_taskq;
struct sysctl_oid *hn_tx_sysctl_tree;
+ struct sysctl_oid *hn_rx_sysctl_tree;
} hn_softc_t;
/*
@@ -1088,14 +1264,13 @@ typedef struct hn_softc {
extern int hv_promisc_mode;
void netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status);
-void hv_nv_on_receive_completion(struct hv_device *device,
- uint64_t tid, uint32_t status);
netvsc_dev *hv_nv_on_device_add(struct hv_device *device,
void *additional_info);
int hv_nv_on_device_remove(struct hv_device *device,
boolean_t destroy_channel);
-int hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt);
+int hv_nv_on_send(struct hv_vmbus_channel *chan, netvsc_packet *pkt);
int hv_nv_get_next_send_section(netvsc_dev *net_dev);
+void hv_nv_subchan_attach(struct hv_vmbus_channel *chan);
#endif /* __HV_NET_VSC_H__ */
diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
index 0f4425e..f670c12 100644
--- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c
@@ -1,6 +1,6 @@
/*-
* Copyright (c) 2010-2012 Citrix Inc.
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
*
@@ -119,6 +119,8 @@ __FBSDID("$FreeBSD$");
#include "hv_rndis.h"
#include "hv_rndis_filter.h"
+#define hv_chan_rxr hv_chan_priv1
+#define hv_chan_txr hv_chan_priv2
/* Short for Hyper-V network interface */
#define NETVSC_DEVNAME "hn"
@@ -136,8 +138,11 @@ __FBSDID("$FreeBSD$");
#define HN_LROENT_CNT_DEF 128
+#define HN_RING_CNT_DEF_MAX 8
+
#define HN_RNDIS_MSG_LEN \
(sizeof(rndis_msg) + \
+ RNDIS_HASHVAL_PPI_SIZE + \
RNDIS_VLAN_PPI_SIZE + \
RNDIS_TSO_PPI_SIZE + \
RNDIS_CSUM_PPI_SIZE)
@@ -152,6 +157,8 @@ __FBSDID("$FreeBSD$");
#define HN_DIRECT_TX_SIZE_DEF 128
+#define HN_EARLY_TXEOF_THRESH 8
+
struct hn_txdesc {
#ifndef HN_USE_TXDESC_BUFRING
SLIST_ENTRY(hn_txdesc) link;
@@ -180,6 +187,7 @@ struct hn_txdesc {
#define HN_CSUM_ASSIST_WIN8 (CSUM_IP | CSUM_TCP)
#define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP)
+#define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
#define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
/* YYY 2*MTU is a bit rough, but should be good enough. */
#define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
@@ -208,7 +216,8 @@ struct hn_txdesc {
int hv_promisc_mode = 0; /* normal mode by default */
-SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD, NULL, "Hyper-V network interface");
+SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+ "Hyper-V network interface");
/* Trust tcp segements verification on host side. */
static int hn_trust_hosttcp = 1;
@@ -231,12 +240,10 @@ SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
"Trust ip packet verification on host side, "
"when csum info is missing (global setting)");
-#if __FreeBSD_version >= 1100045
/* Limit TSO burst size */
static int hn_tso_maxlen = 0;
SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
&hn_tso_maxlen, 0, "TSO burst limit");
-#endif
/* Limit chimney send size */
static int hn_tx_chimney_size = 0;
@@ -274,6 +281,25 @@ static int hn_bind_tx_taskq = -1;
SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
&hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
+static int hn_use_if_start = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
+ &hn_use_if_start, 0, "Use if_start TX method");
+
+static int hn_chan_cnt = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
+ &hn_chan_cnt, 0,
+ "# of channels to use; each channel has one RX ring and one TX ring");
+
+static int hn_tx_ring_cnt = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
+ &hn_tx_ring_cnt, 0, "# of TX rings to use");
+
+static int hn_tx_swq_depth = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
+ &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
+
+static u_int hn_cpu_index;
+
/*
* Forward declarations
*/
@@ -303,15 +329,45 @@ static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_check_iplen(const struct mbuf *, int);
static int hn_create_tx_ring(struct hn_softc *, int);
static void hn_destroy_tx_ring(struct hn_tx_ring *);
-static int hn_create_tx_data(struct hn_softc *);
+static int hn_create_tx_data(struct hn_softc *, int);
static void hn_destroy_tx_data(struct hn_softc *);
static void hn_start_taskfunc(void *, int);
static void hn_start_txeof_taskfunc(void *, int);
static void hn_stop_tx_tasks(struct hn_softc *);
static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
-static void hn_create_rx_data(struct hn_softc *sc);
+static void hn_create_rx_data(struct hn_softc *sc, int);
static void hn_destroy_rx_data(struct hn_softc *sc);
static void hn_set_tx_chimney_size(struct hn_softc *, int);
+static void hn_channel_attach(struct hn_softc *, struct hv_vmbus_channel *);
+static void hn_subchan_attach(struct hn_softc *, struct hv_vmbus_channel *);
+
+static int hn_transmit(struct ifnet *, struct mbuf *);
+static void hn_xmit_qflush(struct ifnet *);
+static int hn_xmit(struct hn_tx_ring *, int);
+static void hn_xmit_txeof(struct hn_tx_ring *);
+static void hn_xmit_taskfunc(void *, int);
+static void hn_xmit_txeof_taskfunc(void *, int);
+
+#if __FreeBSD_version >= 1100099
+static void
+hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
+{
+ int i;
+
+ for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
+ sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
+}
+#endif
+
+static int
+hn_get_txswq_depth(const struct hn_tx_ring *txr)
+{
+
+ KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
+ if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
+ return txr->hn_txdesc_cnt;
+ return hn_tx_swq_depth;
+}
static int
hn_ifmedia_upd(struct ifnet *ifp __unused)
@@ -353,7 +409,7 @@ netvsc_probe(device_t dev)
p = vmbus_get_type(dev);
if (!memcmp(p, &g_net_vsc_device_type.data, sizeof(hv_guid))) {
- device_set_desc(dev, "Synthetic Network Interface");
+ device_set_desc(dev, "Hyper-V Network Interface");
if (bootverbose)
printf("Netvsc probe... DONE \n");
@@ -386,21 +442,16 @@ static int
netvsc_attach(device_t dev)
{
struct hv_device *device_ctx = vmbus_get_devctx(dev);
+ struct hv_vmbus_channel *pri_chan;
netvsc_device_info device_info;
hn_softc_t *sc;
int unit = device_get_unit(dev);
struct ifnet *ifp = NULL;
- int error;
-#if __FreeBSD_version >= 1100045
+ int error, ring_cnt, tx_ring_cnt;
int tso_maxlen;
-#endif
sc = device_get_softc(dev);
- if (sc == NULL) {
- return (ENOMEM);
- }
- bzero(sc, sizeof(hn_softc_t));
sc->hn_unit = unit;
sc->hn_dev = dev;
@@ -431,26 +482,69 @@ netvsc_attach(device_t dev)
ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
ifp->if_softc = sc;
+ if_initname(ifp, device_get_name(dev), device_get_unit(dev));
+
+ /*
+ * Figure out the # of RX rings (ring_cnt) and the # of TX rings
+ * to use (tx_ring_cnt).
+ *
+ * NOTE:
+ * The # of RX rings to use is same as the # of channels to use.
+ */
+ ring_cnt = hn_chan_cnt;
+ if (ring_cnt <= 0) {
+ /* Default */
+ ring_cnt = mp_ncpus;
+ if (ring_cnt > HN_RING_CNT_DEF_MAX)
+ ring_cnt = HN_RING_CNT_DEF_MAX;
+ } else if (ring_cnt > mp_ncpus) {
+ ring_cnt = mp_ncpus;
+ }
+
+ tx_ring_cnt = hn_tx_ring_cnt;
+ if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
+ tx_ring_cnt = ring_cnt;
+ if (hn_use_if_start) {
+ /* ifnet.if_start only needs one TX ring. */
+ tx_ring_cnt = 1;
+ }
+
+ /*
+ * Set the leader CPU for channels.
+ */
+ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
- error = hn_create_tx_data(sc);
+ error = hn_create_tx_data(sc, tx_ring_cnt);
if (error)
goto failed;
+ hn_create_rx_data(sc, ring_cnt);
- hn_create_rx_data(sc);
-
- if_initname(ifp, device_get_name(dev), device_get_unit(dev));
- ifp->if_dunit = unit;
- ifp->if_dname = NETVSC_DEVNAME;
+ /*
+ * Associate the first TX/RX ring w/ the primary channel.
+ */
+ pri_chan = device_ctx->channel;
+ KASSERT(HV_VMBUS_CHAN_ISPRIMARY(pri_chan), ("not primary channel"));
+ KASSERT(pri_chan->offer_msg.offer.sub_channel_index == 0,
+ ("primary channel subidx %u",
+ pri_chan->offer_msg.offer.sub_channel_index));
+ hn_channel_attach(sc, pri_chan);
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = hn_ioctl;
- ifp->if_start = hn_start;
ifp->if_init = hn_ifinit;
/* needed by hv_rf_on_device_add() code */
ifp->if_mtu = ETHERMTU;
- IFQ_SET_MAXLEN(&ifp->if_snd, 512);
- ifp->if_snd.ifq_drv_maxlen = 511;
- IFQ_SET_READY(&ifp->if_snd);
+ if (hn_use_if_start) {
+ int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
+
+ ifp->if_start = hn_start;
+ IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
+ ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
+ IFQ_SET_READY(&ifp->if_snd);
+ } else {
+ ifp->if_transmit = hn_transmit;
+ ifp->if_qflush = hn_xmit_qflush;
+ }
ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
@@ -470,15 +564,58 @@ netvsc_attach(device_t dev)
IFCAP_LRO;
ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO;
- error = hv_rf_on_device_add(device_ctx, &device_info);
+ error = hv_rf_on_device_add(device_ctx, &device_info, ring_cnt);
if (error)
goto failed;
+ KASSERT(sc->net_dev->num_channel > 0 &&
+ sc->net_dev->num_channel <= sc->hn_rx_ring_inuse,
+ ("invalid channel count %u, should be less than %d",
+ sc->net_dev->num_channel, sc->hn_rx_ring_inuse));
+
+ /*
+ * Set the # of TX/RX rings that could be used according to
+ * the # of channels that host offered.
+ */
+ if (sc->hn_tx_ring_inuse > sc->net_dev->num_channel)
+ sc->hn_tx_ring_inuse = sc->net_dev->num_channel;
+ sc->hn_rx_ring_inuse = sc->net_dev->num_channel;
+ device_printf(dev, "%d TX ring, %d RX ring\n",
+ sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
+
+ if (sc->net_dev->num_channel > 1) {
+ struct hv_vmbus_channel **subchan;
+ int subchan_cnt = sc->net_dev->num_channel - 1;
+ int i;
+
+ /* Wait for sub-channels setup to complete. */
+ subchan = vmbus_get_subchan(pri_chan, subchan_cnt);
+
+ /* Attach the sub-channels. */
+ for (i = 0; i < subchan_cnt; ++i) {
+ /* NOTE: Calling order is critical. */
+ hn_subchan_attach(sc, subchan[i]);
+ hv_nv_subchan_attach(subchan[i]);
+ }
+
+ /* Release the sub-channels */
+ vmbus_rel_subchan(subchan, subchan_cnt);
+ device_printf(dev, "%d sub-channels setup done\n", subchan_cnt);
+ }
+
+#if __FreeBSD_version >= 1100099
+ if (sc->hn_rx_ring_inuse > 1) {
+ /*
+ * Reduce TCP segment aggregation limit for multiple
+ * RX rings to increase ACK timeliness.
+ */
+ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
+ }
+#endif
if (device_info.link_state == 0) {
sc->hn_carrier = 1;
}
-#if __FreeBSD_version >= 1100045
tso_maxlen = hn_tso_maxlen;
if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET)
tso_maxlen = IP_MAXPACKET;
@@ -487,14 +624,11 @@ netvsc_attach(device_t dev)
ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
ifp->if_hw_tsomax = tso_maxlen -
(ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
-#endif
ether_ifattach(ifp, device_info.mac_addr);
-#if __FreeBSD_version >= 1100045
if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax,
ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
-#endif
sc->hn_tx_chimney_max = sc->net_dev->send_section_size;
hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max);
@@ -674,8 +808,15 @@ hn_txdesc_hold(struct hn_txdesc *txd)
atomic_add_int(&txd->refs, 1);
}
+static __inline void
+hn_txeof(struct hn_tx_ring *txr)
+{
+ txr->hn_has_txeof = 0;
+ txr->hn_txeof(txr);
+}
+
static void
-hn_tx_done(void *xpkt)
+hn_tx_done(struct hv_vmbus_channel *chan, void *xpkt)
{
netvsc_packet *packet = xpkt;
struct hn_txdesc *txd;
@@ -685,17 +826,28 @@ hn_tx_done(void *xpkt)
packet->compl.send.send_completion_tid;
txr = txd->txr;
+ KASSERT(txr->hn_chan == chan,
+ ("channel mismatch, on channel%u, should be channel%u",
+ chan->offer_msg.offer.sub_channel_index,
+ txr->hn_chan->offer_msg.offer.sub_channel_index));
+
txr->hn_has_txeof = 1;
hn_txdesc_put(txr, txd);
+
+ ++txr->hn_txdone_cnt;
+ if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
+ txr->hn_txdone_cnt = 0;
+ if (txr->hn_oactive)
+ hn_txeof(txr);
+ }
}
void
-netvsc_channel_rollup(struct hv_device *device_ctx)
+netvsc_channel_rollup(struct hv_vmbus_channel *chan)
{
- struct hn_softc *sc = device_get_softc(device_ctx->device);
- struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; /* TODO: vRSS */
+ struct hn_tx_ring *txr = chan->hv_chan_txr;
#if defined(INET) || defined(INET6)
- struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */
+ struct hn_rx_ring *rxr = chan->hv_chan_rxr;
struct lro_ctrl *lro = &rxr->hn_lro;
struct lro_entry *queued;
@@ -705,11 +857,16 @@ netvsc_channel_rollup(struct hv_device *device_ctx)
}
#endif
- if (!txr->hn_has_txeof)
+ /*
+ * NOTE:
+ * 'txr' could be NULL, if multiple channels and
+ * ifnet.if_start method are enabled.
+ */
+ if (txr == NULL || !txr->hn_has_txeof)
return;
- txr->hn_has_txeof = 0;
- txr->hn_txeof(txr);
+ txr->hn_txdone_cnt = 0;
+ hn_txeof(txr);
}
/*
@@ -726,6 +883,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
rndis_msg *rndis_mesg;
rndis_packet *rndis_pkt;
rndis_per_packet_info *rppi;
+ struct rndis_hash_value *hash_value;
uint32_t rndis_msg_size;
packet = &txd->netvsc_pkt;
@@ -750,6 +908,18 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
rndis_msg_size = RNDIS_MESSAGE_SIZE(rndis_packet);
+ /*
+ * Set the hash value for this packet, so that the host could
+ * dispatch the TX done event for this packet back to this TX
+ * ring's channel.
+ */
+ rndis_msg_size += RNDIS_HASHVAL_PPI_SIZE;
+ rppi = hv_set_rppi_data(rndis_mesg, RNDIS_HASHVAL_PPI_SIZE,
+ nbl_hash_value);
+ hash_value = (struct rndis_hash_value *)((uint8_t *)rppi +
+ rppi->per_packet_info_offset);
+ hash_value->hash_value = txr->hn_tx_idx;
+
if (m_head->m_flags & M_VLANTAG) {
ndis_8021q_info *rppi_vlan_info;
@@ -851,6 +1021,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
netvsc_dev *net_dev = txr->hn_sc->net_dev;
uint32_t send_buf_section_idx;
+ txr->hn_tx_chimney_tried++;
send_buf_section_idx =
hv_nv_get_next_send_section(net_dev);
if (send_buf_section_idx !=
@@ -932,8 +1103,7 @@ done:
* associated w/ the txd will _not_ be freed.
*/
static int
-hn_send_pkt(struct ifnet *ifp, struct hv_device *device_ctx,
- struct hn_tx_ring *txr, struct hn_txdesc *txd)
+hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
int error, send_failed = 0;
@@ -942,10 +1112,17 @@ again:
* Make sure that txd is not freed before ETHER_BPF_MTAP.
*/
hn_txdesc_hold(txd);
- error = hv_nv_on_send(device_ctx, &txd->netvsc_pkt);
+ error = hv_nv_on_send(txr->hn_chan, &txd->netvsc_pkt);
if (!error) {
ETHER_BPF_MTAP(ifp, txd->m);
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if (!hn_use_if_start) {
+ if_inc_counter(ifp, IFCOUNTER_OBYTES,
+ txd->m->m_pkthdr.len);
+ if (txd->m->m_flags & M_MCAST)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ }
+ txr->hn_pkts++;
}
hn_txdesc_put(txr, txd);
@@ -996,8 +1173,9 @@ hn_start_locked(struct hn_tx_ring *txr, int len)
{
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
- struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
+ KASSERT(hn_use_if_start,
+ ("hn_start_locked is called, when if_start is disabled"));
KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
@@ -1038,7 +1216,7 @@ hn_start_locked(struct hn_tx_ring *txr, int len)
continue;
}
- error = hn_send_pkt(ifp, device_ctx, txr, txd);
+ error = hn_send_pkt(ifp, txr, txd);
if (__predict_false(error)) {
/* txd is freed, but m_head is not */
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
@@ -1057,10 +1235,6 @@ netvsc_linkstatus_callback(struct hv_device *device_obj, uint32_t status)
{
hn_softc_t *sc = device_get_softc(device_obj->device);
- if (sc == NULL) {
- return;
- }
-
if (status == 1) {
sc->hn_carrier = 1;
} else {
@@ -1133,26 +1307,18 @@ hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
* Note: This is no longer used as a callback
*/
int
-netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
- rndis_tcp_ip_csum_info *csum_info)
+netvsc_recv(struct hv_vmbus_channel *chan, netvsc_packet *packet,
+ const rndis_tcp_ip_csum_info *csum_info,
+ const struct rndis_hash_info *hash_info,
+ const struct rndis_hash_value *hash_value)
{
- struct hn_softc *sc = device_get_softc(device_ctx->device);
- struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */
+ struct hn_rx_ring *rxr = chan->hv_chan_rxr;
+ struct ifnet *ifp = rxr->hn_ifp;
struct mbuf *m_new;
- struct ifnet *ifp;
int size, do_lro = 0, do_csum = 1;
- if (sc == NULL) {
- return (0); /* TODO: KYS how can this be! */
- }
-
- ifp = sc->hn_ifp;
-
- ifp = sc->arpcom.ac_ifp;
-
- if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
return (0);
- }
/*
* Bail out if packet contains more data than configured MTU.
@@ -1161,8 +1327,10 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
return (0);
} else if (packet->tot_data_buf_len <= MHLEN) {
m_new = m_gethdr(M_NOWAIT, MT_DATA);
- if (m_new == NULL)
+ if (m_new == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
return (0);
+ }
memcpy(mtod(m_new, void *), packet->data,
packet->tot_data_buf_len);
m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len;
@@ -1182,7 +1350,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
if (m_new == NULL) {
- if_printf(ifp, "alloc mbuf failed.\n");
+ if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
return (0);
}
@@ -1251,7 +1419,6 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet,
CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m_new->m_pkthdr.csum_data = 0xffff;
}
- /* Rely on SW csum verification though... */
do_lro = 1;
} else if (pr == IPPROTO_UDP) {
if (do_csum &&
@@ -1278,12 +1445,58 @@ skip:
m_new->m_flags |= M_VLANTAG;
}
+ if (hash_info != NULL && hash_value != NULL) {
+ int hash_type = M_HASHTYPE_OPAQUE;
+
+ rxr->hn_rss_pkts++;
+ m_new->m_pkthdr.flowid = hash_value->hash_value;
+ if ((hash_info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
+ NDIS_HASH_FUNCTION_TOEPLITZ) {
+ uint32_t type =
+ (hash_info->hash_info & NDIS_HASH_TYPE_MASK);
+
+ switch (type) {
+ case NDIS_HASH_IPV4:
+ hash_type = M_HASHTYPE_RSS_IPV4;
+ break;
+
+ case NDIS_HASH_TCP_IPV4:
+ hash_type = M_HASHTYPE_RSS_TCP_IPV4;
+ break;
+
+ case NDIS_HASH_IPV6:
+ hash_type = M_HASHTYPE_RSS_IPV6;
+ break;
+
+ case NDIS_HASH_IPV6_EX:
+ hash_type = M_HASHTYPE_RSS_IPV6_EX;
+ break;
+
+ case NDIS_HASH_TCP_IPV6:
+ hash_type = M_HASHTYPE_RSS_TCP_IPV6;
+ break;
+
+ case NDIS_HASH_TCP_IPV6_EX:
+ hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
+ break;
+ }
+ }
+ M_HASHTYPE_SET(m_new, hash_type);
+ } else {
+ if (hash_value != NULL)
+ m_new->m_pkthdr.flowid = hash_value->hash_value;
+ else
+ m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
+ M_HASHTYPE_SET(m_new, M_HASHTYPE_OPAQUE);
+ }
+
/*
* Note: Moved RX completion back to hv_nv_on_receive() so all
* messages (not just data messages) will trigger a response.
*/
ifp->if_ipackets++;
+ rxr->hn_pkts++;
if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
#if defined(INET) || defined(INET6)
@@ -1305,11 +1518,6 @@ skip:
return (0);
}
-void
-netvsc_recv_rollup(struct hv_device *device_ctx __unused)
-{
-}
-
/*
* Rules for using sc->temp_unusable:
* 1. sc->temp_unusable can only be read or written while holding NV_LOCK()
@@ -1373,13 +1581,8 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
*/
NV_LOCK(sc);
if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
- HN_LRO_LENLIM_MIN(ifp)) {
- int i;
- for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
- sc->hn_rx_ring[i].hn_lro.lro_length_lim =
- HN_LRO_LENLIM_MIN(ifp);
- }
- }
+ HN_LRO_LENLIM_MIN(ifp))
+ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
NV_UNLOCK(sc);
#endif
@@ -1412,7 +1615,8 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
NV_UNLOCK(sc);
break;
}
- error = hv_rf_on_device_add(hn_dev, &device_info);
+ error = hv_rf_on_device_add(hn_dev, &device_info,
+ sc->hn_rx_ring_inuse);
if (error) {
NV_LOCK(sc);
sc->temp_unusable = FALSE;
@@ -1555,7 +1759,7 @@ static void
hn_stop(hn_softc_t *sc)
{
struct ifnet *ifp;
- int ret;
+ int ret, i;
struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
ifp = sc->hn_ifp;
@@ -1565,6 +1769,9 @@ hn_stop(hn_softc_t *sc)
atomic_clear_int(&ifp->if_drv_flags,
(IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
+ sc->hn_tx_ring[i].hn_oactive = 0;
+
if_link_state_change(ifp, LINK_STATE_DOWN);
sc->hn_initdone = 0;
@@ -1637,7 +1844,7 @@ hn_ifinit_locked(hn_softc_t *sc)
{
struct ifnet *ifp;
struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev);
- int ret;
+ int ret, i;
ifp = sc->hn_ifp;
@@ -1653,7 +1860,11 @@ hn_ifinit_locked(hn_softc_t *sc)
} else {
sc->hn_initdone = 1;
}
+
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
+ sc->hn_tx_ring[i].hn_oactive = 0;
+
atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
if_link_state_change(ifp, LINK_STATE_UP);
}
@@ -1704,7 +1915,7 @@ hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
unsigned int lenlim;
- int error, i;
+ int error;
lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
error = sysctl_handle_int(oidp, &lenlim, 0, req);
@@ -1716,8 +1927,7 @@ hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
return EINVAL;
NV_LOCK(sc);
- for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
- sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
+ hn_set_lro_lenlim(sc, lenlim);
NV_UNLOCK(sc);
return 0;
}
@@ -1746,7 +1956,7 @@ hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
*/
--ackcnt;
NV_LOCK(sc);
- for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+ for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
NV_UNLOCK(sc);
return 0;
@@ -1770,7 +1980,7 @@ hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
return error;
NV_LOCK(sc);
- for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
if (on)
@@ -1810,7 +2020,7 @@ hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
uint64_t stat;
stat = 0;
- for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
rxr = &sc->hn_rx_ring[i];
stat += *((int *)((uint8_t *)rxr + ofs));
}
@@ -1820,7 +2030,7 @@ hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
return error;
/* Zero out this stat. */
- for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
rxr = &sc->hn_rx_ring[i];
*((int *)((uint8_t *)rxr + ofs)) = 0;
}
@@ -1836,7 +2046,7 @@ hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
uint64_t stat;
stat = 0;
- for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
rxr = &sc->hn_rx_ring[i];
stat += *((uint64_t *)((uint8_t *)rxr + ofs));
}
@@ -1846,7 +2056,7 @@ hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
return error;
/* Zero out this stat. */
- for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
rxr = &sc->hn_rx_ring[i];
*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
}
@@ -1890,7 +2100,7 @@ hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
u_long stat;
stat = 0;
- for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
txr = &sc->hn_tx_ring[i];
stat += *((u_long *)((uint8_t *)txr + ofs));
}
@@ -1900,7 +2110,7 @@ hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
return error;
/* Zero out this stat. */
- for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
txr = &sc->hn_tx_ring[i];
*((u_long *)((uint8_t *)txr + ofs)) = 0;
}
@@ -1922,7 +2132,7 @@ hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
return error;
NV_LOCK(sc);
- for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
txr = &sc->hn_tx_ring[i];
*((int *)((uint8_t *)txr + ofs)) = conf;
}
@@ -2019,7 +2229,7 @@ hn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error)
}
static void
-hn_create_rx_data(struct hn_softc *sc)
+hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
{
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
@@ -2031,7 +2241,9 @@ hn_create_rx_data(struct hn_softc *sc)
#endif
int i;
- sc->hn_rx_ring_cnt = 1; /* TODO: vRSS */
+ sc->hn_rx_ring_cnt = ring_cnt;
+ sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
+
sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
M_NETVSC, M_WAITOK | M_ZERO);
@@ -2044,6 +2256,13 @@ hn_create_rx_data(struct hn_softc *sc)
#endif
#endif /* INET || INET6 */
+ ctx = device_get_sysctl_ctx(dev);
+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+
+ /* Create dev.hn.UNIT.rx sysctl tree */
+ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
@@ -2053,6 +2272,8 @@ hn_create_rx_data(struct hn_softc *sc)
rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
if (hn_trust_hostip)
rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
+ rxr->hn_ifp = sc->hn_ifp;
+ rxr->hn_rx_idx = i;
/*
* Initialize LRO.
@@ -2069,13 +2290,35 @@ hn_create_rx_data(struct hn_softc *sc)
rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
#endif
#endif /* INET || INET6 */
- }
- ctx = device_get_sysctl_ctx(dev);
- child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
+ if (sc->hn_rx_sysctl_tree != NULL) {
+ char name[16];
+
+ /*
+ * Create per RX ring sysctl tree:
+ * dev.hn.UNIT.rx.RINGID
+ */
+ snprintf(name, sizeof(name), "%d", i);
+ rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
+ OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
+ if (rxr->hn_rx_sysctl_tree != NULL) {
+ SYSCTL_ADD_ULONG(ctx,
+ SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+ OID_AUTO, "packets", CTLFLAG_RW,
+ &rxr->hn_pkts, "# of packets received");
+ SYSCTL_ADD_ULONG(ctx,
+ SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+ OID_AUTO, "rss_pkts", CTLFLAG_RW,
+ &rxr->hn_rss_pkts,
+ "# of packets w/ RSS info received");
+ }
+ }
+ }
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
- CTLTYPE_U64 | CTLFLAG_RW, sc,
+ CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_lro.lro_queued),
#if __FreeBSD_version < 1100095
hn_rx_stat_int_sysctl,
@@ -2084,7 +2327,7 @@ hn_create_rx_data(struct hn_softc *sc)
#endif
"LU", "LRO queued");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
- CTLTYPE_U64 | CTLFLAG_RW, sc,
+ CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
#if __FreeBSD_version < 1100095
hn_rx_stat_int_sysctl,
@@ -2093,53 +2336,59 @@ hn_create_rx_data(struct hn_softc *sc)
#endif
"LU", "LRO flushed");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_lro_tried),
hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
#if __FreeBSD_version >= 1100099
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
- CTLTYPE_UINT | CTLFLAG_RW, sc, 0, hn_lro_lenlim_sysctl, "IU",
+ CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_lro_lenlim_sysctl, "IU",
"Max # of data bytes to be aggregated by LRO");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
- CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_ackcnt_sysctl, "I",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_lro_ackcnt_sysctl, "I",
"Max # of ACKs to be aggregated by LRO");
#endif
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
- CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_TCP,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
hn_trust_hcsum_sysctl, "I",
"Trust tcp segement verification on host side, "
"when csum info is missing");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
- CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_UDP,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
hn_trust_hcsum_sysctl, "I",
"Trust udp datagram verification on host side, "
"when csum info is missing");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
- CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_IP,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
hn_trust_hcsum_sysctl, "I",
"Trust ip packet verification on host side, "
"when csum info is missing");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_csum_ip),
hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_csum_tcp),
hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_csum_udp),
hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_csum_trusted),
hn_rx_stat_ulong_sysctl, "LU",
"# of packets that we trust host's csum verification");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_rx_ring, hn_small_pkts),
hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
+ CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
+ CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
}
static void
@@ -2160,6 +2409,7 @@ hn_destroy_rx_data(struct hn_softc *sc)
sc->hn_rx_ring = NULL;
sc->hn_rx_ring_cnt = 0;
+ sc->hn_rx_ring_inuse = 0;
}
static int
@@ -2170,6 +2420,7 @@ hn_create_tx_ring(struct hn_softc *sc, int id)
int error, i;
txr->hn_sc = sc;
+ txr->hn_tx_idx = id;
#ifndef HN_USE_TXDESC_BUFRING
mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
@@ -2187,8 +2438,22 @@ hn_create_tx_ring(struct hn_softc *sc, int id)
#endif
txr->hn_tx_taskq = sc->hn_tx_taskq;
- TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
- TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
+
+ if (hn_use_if_start) {
+ txr->hn_txeof = hn_start_txeof;
+ TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
+ TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
+ } else {
+ int br_depth;
+
+ txr->hn_txeof = hn_xmit_txeof;
+ TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
+ TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
+
+ br_depth = hn_get_txswq_depth(txr);
+ txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC,
+ M_WAITOK, &txr->hn_tx_lock);
+ }
txr->hn_direct_tx_size = hn_direct_tx_size;
if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1)
@@ -2202,8 +2467,6 @@ hn_create_tx_ring(struct hn_softc *sc, int id)
*/
txr->hn_sched_tx = 1;
- txr->hn_txeof = hn_start_txeof; /* TODO: if_transmit */
-
parent_dtag = bus_get_dma_tag(sc->hn_dev);
/* DMA tag for RNDIS messages. */
@@ -2312,7 +2575,7 @@ hn_create_tx_ring(struct hn_softc *sc, int id)
snprintf(name, sizeof(name), "%d", id);
txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
- name, CTLFLAG_RD, 0, "");
+ name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
if (txr->hn_tx_sysctl_tree != NULL) {
child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
@@ -2320,6 +2583,14 @@ hn_create_tx_ring(struct hn_softc *sc, int id)
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
"# of available TX descs");
+ if (!hn_use_if_start) {
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
+ CTLFLAG_RD, &txr->hn_oactive, 0,
+ "over active");
+ }
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
+ CTLFLAG_RW, &txr->hn_pkts,
+ "# of packets transmitted");
}
}
@@ -2354,8 +2625,10 @@ hn_destroy_tx_ring(struct hn_tx_ring *txr)
hn_txdesc_dmamap_destroy(txd);
}
#else
+ mtx_lock(&txr->hn_tx_lock);
while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
hn_txdesc_dmamap_destroy(txd);
+ mtx_unlock(&txr->hn_tx_lock);
#endif
if (txr->hn_tx_data_dtag != NULL)
@@ -2370,6 +2643,9 @@ hn_destroy_tx_ring(struct hn_tx_ring *txr)
free(txr->hn_txdesc, M_NETVSC);
txr->hn_txdesc = NULL;
+ if (txr->hn_mbuf_br != NULL)
+ buf_ring_free(txr->hn_mbuf_br, M_NETVSC);
+
#ifndef HN_USE_TXDESC_BUFRING
mtx_destroy(&txr->hn_txlist_spin);
#endif
@@ -2377,13 +2653,15 @@ hn_destroy_tx_ring(struct hn_tx_ring *txr)
}
static int
-hn_create_tx_data(struct hn_softc *sc)
+hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
{
struct sysctl_oid_list *child;
struct sysctl_ctx_list *ctx;
int i;
- sc->hn_tx_ring_cnt = 1; /* TODO: vRSS */
+ sc->hn_tx_ring_cnt = ring_cnt;
+ sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
+
sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
M_NETVSC, M_WAITOK | M_ZERO);
@@ -2392,7 +2670,7 @@ hn_create_tx_data(struct hn_softc *sc)
/* Create dev.hn.UNIT.tx sysctl tree */
sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
- CTLFLAG_RD, 0, "");
+ CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
int error;
@@ -2403,25 +2681,29 @@ hn_create_tx_data(struct hn_softc *sc)
}
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_no_txdescs),
hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_send_failed),
hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_txdma_failed),
hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_tx_collapsed),
hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
- CTLTYPE_ULONG | CTLFLAG_RW, sc,
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_tx_chimney),
hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+ __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
+ hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
"# of total TX descs");
@@ -2429,19 +2711,24 @@ hn_create_tx_data(struct hn_softc *sc)
CTLFLAG_RD, &sc->hn_tx_chimney_max, 0,
"Chimney send packet size upper boundary");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
- CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_tx_chimney_size_sysctl,
"I", "Chimney send packet size limit");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
- CTLTYPE_INT | CTLFLAG_RW, sc,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_direct_tx_size),
hn_tx_conf_int_sysctl, "I",
"Size of the packet for direct transmission");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
- CTLTYPE_INT | CTLFLAG_RW, sc,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_sched_tx),
hn_tx_conf_int_sysctl, "I",
"Always schedule transmission "
"instead of doing direct transmission");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
+ CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
+ CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
return 0;
}
@@ -2452,7 +2739,7 @@ hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size)
int i;
NV_LOCK(sc);
- for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size;
NV_UNLOCK(sc);
}
@@ -2472,6 +2759,7 @@ hn_destroy_tx_data(struct hn_softc *sc)
sc->hn_tx_ring = NULL;
sc->hn_tx_ring_cnt = 0;
+ sc->hn_tx_ring_inuse = 0;
}
static void
@@ -2500,7 +2788,7 @@ hn_stop_tx_tasks(struct hn_softc *sc)
{
int i;
- for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
@@ -2508,6 +2796,224 @@ hn_stop_tx_tasks(struct hn_softc *sc)
}
}
+static int
+hn_xmit(struct hn_tx_ring *txr, int len)
+{
+ struct hn_softc *sc = txr->hn_sc;
+ struct ifnet *ifp = sc->hn_ifp;
+ struct mbuf *m_head;
+
+ mtx_assert(&txr->hn_tx_lock, MA_OWNED);
+ KASSERT(hn_use_if_start == 0,
+ ("hn_xmit is called, when if_start is enabled"));
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
+ return 0;
+
+ while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
+ struct hn_txdesc *txd;
+ int error;
+
+ if (len > 0 && m_head->m_pkthdr.len > len) {
+ /*
+ * This sending could be time consuming; let callers
+ * dispatch this packet sending (and sending of any
+ * following up packets) to tx taskqueue.
+ */
+ drbr_putback(ifp, txr->hn_mbuf_br, m_head);
+ return 1;
+ }
+
+ txd = hn_txdesc_get(txr);
+ if (txd == NULL) {
+ txr->hn_no_txdescs++;
+ drbr_putback(ifp, txr->hn_mbuf_br, m_head);
+ txr->hn_oactive = 1;
+ break;
+ }
+
+ error = hn_encap(txr, txd, &m_head);
+ if (error) {
+ /* Both txd and m_head are freed; discard */
+ drbr_advance(ifp, txr->hn_mbuf_br);
+ continue;
+ }
+
+ error = hn_send_pkt(ifp, txr, txd);
+ if (__predict_false(error)) {
+ /* txd is freed, but m_head is not */
+ drbr_putback(ifp, txr->hn_mbuf_br, m_head);
+ txr->hn_oactive = 1;
+ break;
+ }
+
+ /* Sent */
+ drbr_advance(ifp, txr->hn_mbuf_br);
+ }
+ return 0;
+}
+
+static int
+hn_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ struct hn_softc *sc = ifp->if_softc;
+ struct hn_tx_ring *txr;
+ int error, idx = 0;
+
+ /*
+ * Select the TX ring based on flowid
+ */
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
+ txr = &sc->hn_tx_ring[idx];
+
+ error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
+ if (error) {
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
+ return error;
+ }
+
+ if (txr->hn_oactive)
+ return 0;
+
+ if (txr->hn_sched_tx)
+ goto do_sched;
+
+ if (mtx_trylock(&txr->hn_tx_lock)) {
+ int sched;
+
+ sched = hn_xmit(txr, txr->hn_direct_tx_size);
+ mtx_unlock(&txr->hn_tx_lock);
+ if (!sched)
+ return 0;
+ }
+do_sched:
+ taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
+ return 0;
+}
+
+static void
+hn_xmit_qflush(struct ifnet *ifp)
+{
+ struct hn_softc *sc = ifp->if_softc;
+ int i;
+
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+ struct mbuf *m;
+
+ mtx_lock(&txr->hn_tx_lock);
+ while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
+ m_freem(m);
+ mtx_unlock(&txr->hn_tx_lock);
+ }
+ if_qflush(ifp);
+}
+
+static void
+hn_xmit_txeof(struct hn_tx_ring *txr)
+{
+
+ if (txr->hn_sched_tx)
+ goto do_sched;
+
+ if (mtx_trylock(&txr->hn_tx_lock)) {
+ int sched;
+
+ txr->hn_oactive = 0;
+ sched = hn_xmit(txr, txr->hn_direct_tx_size);
+ mtx_unlock(&txr->hn_tx_lock);
+ if (sched) {
+ taskqueue_enqueue(txr->hn_tx_taskq,
+ &txr->hn_tx_task);
+ }
+ } else {
+do_sched:
+ /*
+ * Release the oactive earlier, with the hope, that
+ * others could catch up. The task will clear the
+ * oactive again with the hn_tx_lock to avoid possible
+ * races.
+ */
+ txr->hn_oactive = 0;
+ taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
+ }
+}
+
+static void
+hn_xmit_taskfunc(void *xtxr, int pending __unused)
+{
+ struct hn_tx_ring *txr = xtxr;
+
+ mtx_lock(&txr->hn_tx_lock);
+ hn_xmit(txr, 0);
+ mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
+{
+ struct hn_tx_ring *txr = xtxr;
+
+ mtx_lock(&txr->hn_tx_lock);
+ txr->hn_oactive = 0;
+ hn_xmit(txr, 0);
+ mtx_unlock(&txr->hn_tx_lock);
+}
+
+static void
+hn_channel_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan)
+{
+ struct hn_rx_ring *rxr;
+ int idx;
+
+ idx = chan->offer_msg.offer.sub_channel_index;
+
+ KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
+ ("invalid channel index %d, should > 0 && < %d",
+ idx, sc->hn_rx_ring_inuse));
+ rxr = &sc->hn_rx_ring[idx];
+ KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
+ ("RX ring %d already attached", idx));
+ rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
+
+ chan->hv_chan_rxr = rxr;
+ if (bootverbose) {
+ if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n",
+ idx, chan->offer_msg.child_rel_id);
+ }
+
+ if (idx < sc->hn_tx_ring_inuse) {
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
+
+ KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
+ ("TX ring %d already attached", idx));
+ txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
+
+ chan->hv_chan_txr = txr;
+ txr->hn_chan = chan;
+ if (bootverbose) {
+ if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n",
+ idx, chan->offer_msg.child_rel_id);
+ }
+ }
+
+ /* Bind channel to a proper CPU */
+ vmbus_channel_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
+}
+
+static void
+hn_subchan_attach(struct hn_softc *sc, struct hv_vmbus_channel *chan)
+{
+
+ KASSERT(!HV_VMBUS_CHAN_ISPRIMARY(chan),
+ ("subchannel callback on primary channel"));
+ KASSERT(chan->offer_msg.offer.sub_channel_index > 0,
+ ("invalid channel subidx %u",
+ chan->offer_msg.offer.sub_channel_index));
+ hn_channel_attach(sc, chan);
+}
+
static void
hn_tx_taskq_create(void *arg __unused)
{
diff --git a/sys/dev/hyperv/netvsc/hv_rndis.h b/sys/dev/hyperv/netvsc/hv_rndis.h
index cd46ecc..b27579d 100644
--- a/sys/dev/hyperv/netvsc/hv_rndis.h
+++ b/sys/dev/hyperv/netvsc/hv_rndis.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
@@ -167,6 +167,14 @@
#define RNDIS_OID_GEN_MACHINE_NAME 0x0001021A
#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B
+/*
+ * For receive side scale
+ */
+/* Query only */
+#define RNDIS_OID_GEN_RSS_CAPABILITIES 0x00010203
+/* Query and set */
+#define RNDIS_OID_GEN_RSS_PARAMETERS 0x00010204
+
#define RNDIS_OID_GEN_XMIT_OK 0x00020101
#define RNDIS_OID_GEN_RCV_OK 0x00020102
#define RNDIS_OID_GEN_XMIT_ERROR 0x00020103
@@ -608,6 +616,9 @@ typedef enum ndis_per_pkt_infotype_ {
max_perpkt_info
} ndis_per_pkt_infotype;
+#define nbl_hash_value pkt_cancel_id
+#define nbl_hash_info original_netbuf_list
+
typedef struct ndis_8021q_info_ {
union {
struct {
@@ -680,6 +691,28 @@ typedef struct rndis_tcp_ip_csum_info_ {
};
} rndis_tcp_ip_csum_info;
+struct rndis_hash_value {
+ uint32_t hash_value;
+} __packed;
+
+struct rndis_hash_info {
+ uint32_t hash_info;
+} __packed;
+
+#define NDIS_HASH_FUNCTION_MASK 0x000000FF /* see hash function */
+#define NDIS_HASH_TYPE_MASK 0x00FFFF00 /* see hash type */
+
+/* hash function */
+#define NDIS_HASH_FUNCTION_TOEPLITZ 0x00000001
+
+/* hash type */
+#define NDIS_HASH_IPV4 0x00000100
+#define NDIS_HASH_TCP_IPV4 0x00000200
+#define NDIS_HASH_IPV6 0x00000400
+#define NDIS_HASH_IPV6_EX 0x00000800
+#define NDIS_HASH_TCP_IPV6 0x00001000
+#define NDIS_HASH_TCP_IPV6_EX 0x00002000
+
typedef struct rndis_tcp_tso_info_ {
union {
struct {
@@ -713,6 +746,9 @@ typedef struct rndis_tcp_tso_info_ {
};
} rndis_tcp_tso_info;
+#define RNDIS_HASHVAL_PPI_SIZE (sizeof(rndis_per_packet_info) + \
+ sizeof(struct rndis_hash_value))
+
#define RNDIS_VLAN_PPI_SIZE (sizeof(rndis_per_packet_info) + \
sizeof(ndis_8021q_info))
@@ -1046,11 +1082,13 @@ typedef struct rndismp_rx_bufs_info_ {
/*
* Externs
*/
-int netvsc_recv(struct hv_device *device_ctx,
- netvsc_packet *packet,
- rndis_tcp_ip_csum_info *csum_info);
-void netvsc_recv_rollup(struct hv_device *device_ctx);
-void netvsc_channel_rollup(struct hv_device *device_ctx);
+struct hv_vmbus_channel;
+
+int netvsc_recv(struct hv_vmbus_channel *chan,
+ netvsc_packet *packet, const rndis_tcp_ip_csum_info *csum_info,
+ const struct rndis_hash_info *hash_info,
+ const struct rndis_hash_value *hash_value);
+void netvsc_channel_rollup(struct hv_vmbus_channel *chan);
void* hv_set_rppi_data(rndis_msg *rndis_mesg,
uint32_t rppi_size,
diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.c b/sys/dev/hyperv/netvsc/hv_rndis_filter.c
index 31ddbc0..8e95510 100644
--- a/sys/dev/hyperv/netvsc/hv_rndis_filter.c
+++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
@@ -45,10 +45,27 @@ __FBSDID("$FreeBSD$");
#include <vm/pmap.h>
#include <dev/hyperv/include/hyperv.h>
+#include <dev/hyperv/vmbus/hv_vmbus_priv.h>
#include "hv_net_vsc.h"
#include "hv_rndis.h"
#include "hv_rndis_filter.h"
+struct hv_rf_recvinfo {
+ const ndis_8021q_info *vlan_info;
+ const rndis_tcp_ip_csum_info *csum_info;
+ const struct rndis_hash_info *hash_info;
+ const struct rndis_hash_value *hash_value;
+};
+
+#define HV_RF_RECVINFO_VLAN 0x1
+#define HV_RF_RECVINFO_CSUM 0x2
+#define HV_RF_RECVINFO_HASHINF 0x4
+#define HV_RF_RECVINFO_HASHVAL 0x8
+#define HV_RF_RECVINFO_ALL \
+ (HV_RF_RECVINFO_VLAN | \
+ HV_RF_RECVINFO_CSUM | \
+ HV_RF_RECVINFO_HASHINF | \
+ HV_RF_RECVINFO_HASHVAL)
/*
* Forward declarations
@@ -59,6 +76,7 @@ static void hv_rf_receive_response(rndis_device *device, rndis_msg *response);
static void hv_rf_receive_indicate_status(rndis_device *device,
rndis_msg *response);
static void hv_rf_receive_data(rndis_device *device, rndis_msg *message,
+ struct hv_vmbus_channel *chan,
netvsc_packet *pkt);
static int hv_rf_query_device(rndis_device *device, uint32_t oid,
void *result, uint32_t *result_size);
@@ -68,8 +86,8 @@ static int hv_rf_set_packet_filter(rndis_device *device, uint32_t new_filter);
static int hv_rf_init_device(rndis_device *device);
static int hv_rf_open_device(rndis_device *device);
static int hv_rf_close_device(rndis_device *device);
-static void hv_rf_on_send_request_completion(void *context);
-static void hv_rf_on_send_request_halt_completion(void *context);
+static void hv_rf_on_send_request_completion(struct hv_vmbus_channel *, void *context);
+static void hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *, void *context);
int
hv_rf_send_offload_request(struct hv_device *device,
rndis_offload_params *offloads);
@@ -223,6 +241,8 @@ hv_rf_send_request(rndis_device *device, rndis_request *request,
{
int ret;
netvsc_packet *packet;
+ netvsc_dev *net_dev = device->net_dev;
+ int send_buf_section_idx;
/* Set up the packet to send it */
packet = &request->pkt;
@@ -237,6 +257,20 @@ hv_rf_send_request(rndis_device *device, rndis_request *request,
packet->page_buffers[0].offset =
(unsigned long)&request->request_msg & (PAGE_SIZE - 1);
+ if (packet->page_buffers[0].offset +
+ packet->page_buffers[0].length > PAGE_SIZE) {
+ packet->page_buf_count = 2;
+ packet->page_buffers[0].length =
+ PAGE_SIZE - packet->page_buffers[0].offset;
+ packet->page_buffers[1].pfn =
+ hv_get_phys_addr((char*)&request->request_msg +
+ packet->page_buffers[0].length) >> PAGE_SHIFT;
+ packet->page_buffers[1].offset = 0;
+ packet->page_buffers[1].length =
+ request->request_msg.msg_len -
+ packet->page_buffers[0].length;
+ }
+
packet->compl.send.send_completion_context = request; /* packet */
if (message_type != REMOTE_NDIS_HALT_MSG) {
packet->compl.send.on_send_completion =
@@ -246,11 +280,26 @@ hv_rf_send_request(rndis_device *device, rndis_request *request,
hv_rf_on_send_request_halt_completion;
}
packet->compl.send.send_completion_tid = (unsigned long)device;
- packet->send_buf_section_idx =
- NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
+ if (packet->tot_data_buf_len < net_dev->send_section_size) {
+ send_buf_section_idx = hv_nv_get_next_send_section(net_dev);
+ if (send_buf_section_idx !=
+ NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX) {
+ char *dest = ((char *)net_dev->send_buf +
+ send_buf_section_idx * net_dev->send_section_size);
+
+ memcpy(dest, &request->request_msg, request->request_msg.msg_len);
+ packet->send_buf_section_idx = send_buf_section_idx;
+ packet->send_buf_section_size = packet->tot_data_buf_len;
+ packet->page_buf_count = 0;
+ goto sendit;
+ }
+ /* Failed to allocate chimney send buffer; move on */
+ }
+ packet->send_buf_section_idx = NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX;
packet->send_buf_section_size = 0;
- ret = hv_nv_on_send(device->net_dev->dev, packet);
+sendit:
+ ret = hv_nv_on_send(device->net_dev->dev->channel, packet);
return (ret);
}
@@ -373,8 +422,7 @@ hv_rf_send_offload_request(struct hv_device *device,
}
cleanup:
- if (request)
- hv_put_rndis_request(rndis_dev, request);
+ hv_put_rndis_request(rndis_dev, request);
return (ret);
}
@@ -402,17 +450,95 @@ hv_rf_receive_indicate_status(rndis_device *device, rndis_msg *response)
}
}
+static int
+hv_rf_find_recvinfo(const rndis_packet *rpkt, struct hv_rf_recvinfo *info)
+{
+ const rndis_per_packet_info *ppi;
+ uint32_t mask, len;
+
+ info->vlan_info = NULL;
+ info->csum_info = NULL;
+ info->hash_info = NULL;
+ info->hash_value = NULL;
+
+ if (rpkt->per_pkt_info_offset == 0)
+ return 0;
+
+ ppi = (const rndis_per_packet_info *)
+ ((const uint8_t *)rpkt + rpkt->per_pkt_info_offset);
+ len = rpkt->per_pkt_info_length;
+ mask = 0;
+
+ while (len != 0) {
+ const void *ppi_dptr;
+ uint32_t ppi_dlen;
+
+ if (__predict_false(ppi->size < ppi->per_packet_info_offset))
+ return EINVAL;
+ ppi_dlen = ppi->size - ppi->per_packet_info_offset;
+ ppi_dptr = (const uint8_t *)ppi + ppi->per_packet_info_offset;
+
+ switch (ppi->type) {
+ case ieee_8021q_info:
+ if (__predict_false(ppi_dlen < sizeof(ndis_8021q_info)))
+ return EINVAL;
+ info->vlan_info = ppi_dptr;
+ mask |= HV_RF_RECVINFO_VLAN;
+ break;
+
+ case tcpip_chksum_info:
+ if (__predict_false(ppi_dlen <
+ sizeof(rndis_tcp_ip_csum_info)))
+ return EINVAL;
+ info->csum_info = ppi_dptr;
+ mask |= HV_RF_RECVINFO_CSUM;
+ break;
+
+ case nbl_hash_value:
+ if (__predict_false(ppi_dlen <
+ sizeof(struct rndis_hash_value)))
+ return EINVAL;
+ info->hash_value = ppi_dptr;
+ mask |= HV_RF_RECVINFO_HASHVAL;
+ break;
+
+ case nbl_hash_info:
+ if (__predict_false(ppi_dlen <
+ sizeof(struct rndis_hash_info)))
+ return EINVAL;
+ info->hash_info = ppi_dptr;
+ mask |= HV_RF_RECVINFO_HASHINF;
+ break;
+
+ default:
+ goto skip;
+ }
+
+ if (mask == HV_RF_RECVINFO_ALL) {
+ /* All found; done */
+ break;
+ }
+skip:
+ if (__predict_false(len < ppi->size))
+ return EINVAL;
+ len -= ppi->size;
+ ppi = (const rndis_per_packet_info *)
+ ((const uint8_t *)ppi + ppi->size);
+ }
+ return 0;
+}
+
/*
* RNDIS filter receive data
*/
static void
-hv_rf_receive_data(rndis_device *device, rndis_msg *message, netvsc_packet *pkt)
+hv_rf_receive_data(rndis_device *device, rndis_msg *message,
+ struct hv_vmbus_channel *chan, netvsc_packet *pkt)
{
rndis_packet *rndis_pkt;
- ndis_8021q_info *rppi_vlan_info;
uint32_t data_offset;
- rndis_tcp_ip_csum_info *csum_info = NULL;
device_t dev = device->net_dev->dev->device;
+ struct hv_rf_recvinfo info;
rndis_pkt = &message->msg.packet;
@@ -436,22 +562,26 @@ hv_rf_receive_data(rndis_device *device, rndis_msg *message, netvsc_packet *pkt)
pkt->tot_data_buf_len = rndis_pkt->data_length;
pkt->data = (void *)((unsigned long)pkt->data + data_offset);
- rppi_vlan_info = hv_get_ppi_data(rndis_pkt, ieee_8021q_info);
- if (rppi_vlan_info) {
- pkt->vlan_tci = rppi_vlan_info->u1.s1.vlan_id;
- } else {
- pkt->vlan_tci = 0;
+ if (hv_rf_find_recvinfo(rndis_pkt, &info)) {
+ pkt->status = nvsp_status_failure;
+ device_printf(dev, "recvinfo parsing failed\n");
+ return;
}
- csum_info = hv_get_ppi_data(rndis_pkt, tcpip_chksum_info);
- netvsc_recv(device->net_dev->dev, pkt, csum_info);
+ if (info.vlan_info != NULL)
+ pkt->vlan_tci = info.vlan_info->u1.s1.vlan_id;
+ else
+ pkt->vlan_tci = 0;
+
+ netvsc_recv(chan, pkt, info.csum_info, info.hash_info, info.hash_value);
}
/*
* RNDIS filter on receive
*/
int
-hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device, netvsc_packet *pkt)
+hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device,
+ struct hv_vmbus_channel *chan, netvsc_packet *pkt)
{
rndis_device *rndis_dev;
rndis_msg *rndis_hdr;
@@ -474,7 +604,7 @@ hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device, netvsc_packet *p
/* data message */
case REMOTE_NDIS_PACKET_MSG:
- hv_rf_receive_data(rndis_dev, rndis_hdr, pkt);
+ hv_rf_receive_data(rndis_dev, rndis_hdr, chan, pkt);
break;
/* completion messages */
case REMOTE_NDIS_INITIALIZE_CMPLT:
@@ -525,6 +655,19 @@ hv_rf_query_device(rndis_device *device, uint32_t oid, void *result,
query->info_buffer_length = 0;
query->device_vc_handle = 0;
+ if (oid == RNDIS_OID_GEN_RSS_CAPABILITIES) {
+ struct rndis_recv_scale_cap *cap;
+
+ request->request_msg.msg_len +=
+ sizeof(struct rndis_recv_scale_cap);
+ query->info_buffer_length = sizeof(struct rndis_recv_scale_cap);
+ cap = (struct rndis_recv_scale_cap *)((unsigned long)query +
+ query->info_buffer_offset);
+ cap->hdr.type = RNDIS_OBJECT_TYPE_RSS_CAPABILITIES;
+ cap->hdr.rev = RNDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2;
+ cap->hdr.size = sizeof(struct rndis_recv_scale_cap);
+ }
+
ret = hv_rf_send_request(device, request, REMOTE_NDIS_QUERY_MSG);
if (ret != 0) {
/* Fixme: printf added */
@@ -579,6 +722,114 @@ hv_rf_query_device_link_status(rndis_device *device)
RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &device->link_status, &size));
}
+static uint8_t netvsc_hash_key[HASH_KEYLEN] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
+/*
+ * RNDIS set vRSS parameters
+ */
+static int
+hv_rf_set_rss_param(rndis_device *device, int num_queue)
+{
+ rndis_request *request;
+ rndis_set_request *set;
+ rndis_set_complete *set_complete;
+ rndis_recv_scale_param *rssp;
+ uint32_t extlen = sizeof(rndis_recv_scale_param) +
+ (4 * ITAB_NUM) + HASH_KEYLEN;
+ uint32_t *itab, status;
+ uint8_t *keyp;
+ int i, ret;
+
+
+ request = hv_rndis_request(device, REMOTE_NDIS_SET_MSG,
+ RNDIS_MESSAGE_SIZE(rndis_set_request) + extlen);
+ if (request == NULL) {
+ if (bootverbose)
+ printf("Netvsc: No memory to set vRSS parameters.\n");
+ ret = -1;
+ goto cleanup;
+ }
+
+ set = &request->request_msg.msg.set_request;
+ set->oid = RNDIS_OID_GEN_RSS_PARAMETERS;
+ set->info_buffer_length = extlen;
+ set->info_buffer_offset = sizeof(rndis_set_request);
+ set->device_vc_handle = 0;
+
+ /* Fill out the rssp parameter structure */
+ rssp = (rndis_recv_scale_param *)(set + 1);
+ rssp->hdr.type = RNDIS_OBJECT_TYPE_RSS_PARAMETERS;
+ rssp->hdr.rev = RNDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2;
+ rssp->hdr.size = sizeof(rndis_recv_scale_param);
+ rssp->flag = 0;
+ rssp->hashinfo = RNDIS_HASH_FUNC_TOEPLITZ | RNDIS_HASH_IPV4 |
+ RNDIS_HASH_TCP_IPV4 | RNDIS_HASH_IPV6 | RNDIS_HASH_TCP_IPV6;
+ rssp->indirect_tabsize = 4 * ITAB_NUM;
+ rssp->indirect_taboffset = sizeof(rndis_recv_scale_param);
+ rssp->hashkey_size = HASH_KEYLEN;
+ rssp->hashkey_offset = rssp->indirect_taboffset +
+ rssp->indirect_tabsize;
+
+ /* Set indirection table entries */
+ itab = (uint32_t *)(rssp + 1);
+ for (i = 0; i < ITAB_NUM; i++)
+ itab[i] = i % num_queue;
+
+ /* Set hash key values */
+ keyp = (uint8_t *)((unsigned long)rssp + rssp->hashkey_offset);
+ for (i = 0; i < HASH_KEYLEN; i++)
+ keyp[i] = netvsc_hash_key[i];
+
+ ret = hv_rf_send_request(device, request, REMOTE_NDIS_SET_MSG);
+ if (ret != 0) {
+ goto cleanup;
+ }
+
+ /*
+ * Wait for the response from the host. Another thread will signal
+ * us when the response has arrived. In the failure case,
+ * sema_timedwait() returns a non-zero status after waiting 5 seconds.
+ */
+ ret = sema_timedwait(&request->wait_sema, 5 * hz);
+ if (ret == 0) {
+ /* Response received, check status */
+ set_complete = &request->response_msg.msg.set_complete;
+ status = set_complete->status;
+ if (status != RNDIS_STATUS_SUCCESS) {
+ /* Bad response status, return error */
+ if (bootverbose)
+ printf("Netvsc: Failed to set vRSS "
+ "parameters.\n");
+ ret = -2;
+ } else {
+ if (bootverbose)
+ printf("Netvsc: Successfully set vRSS "
+ "parameters.\n");
+ }
+ } else {
+ /*
+ * We cannot deallocate the request since we may still
+ * receive a send completion for it.
+ */
+ printf("Netvsc: vRSS set timeout, id = %u, ret = %d\n",
+ request->request_msg.msg.init_request.request_id, ret);
+ goto exit;
+ }
+
+cleanup:
+ if (request != NULL) {
+ hv_put_rndis_request(device, request);
+ }
+exit:
+ return (ret);
+}
+
/*
* RNDIS filter set packet filter
* Sends an rndis request with the new filter, then waits for a response
@@ -752,10 +1003,8 @@ hv_rf_halt_device(rndis_device *device)
}
device->state = RNDIS_DEV_UNINITIALIZED;
-
- if (request != NULL) {
- hv_put_rndis_request(device, request);
- }
+
+ hv_put_rndis_request(device, request);
return (0);
}
@@ -813,12 +1062,16 @@ hv_rf_close_device(rndis_device *device)
* RNDIS filter on device add
*/
int
-hv_rf_on_device_add(struct hv_device *device, void *additl_info)
+hv_rf_on_device_add(struct hv_device *device, void *additl_info,
+ int nchan)
{
int ret;
netvsc_dev *net_dev;
rndis_device *rndis_dev;
+ nvsp_msg *init_pkt;
rndis_offload_params offloads;
+ struct rndis_recv_scale_cap rsscaps;
+ uint32_t rsscaps_size = sizeof(struct rndis_recv_scale_cap);
netvsc_device_info *dev_info = (netvsc_device_info *)additl_info;
device_t dev = device->device;
@@ -884,6 +1137,67 @@ hv_rf_on_device_add(struct hv_device *device, void *additl_info)
dev_info->link_state = rndis_dev->link_status;
+ net_dev->num_channel = 1;
+ if (net_dev->nvsp_version < NVSP_PROTOCOL_VERSION_5 || nchan == 1)
+ return (0);
+
+ memset(&rsscaps, 0, rsscaps_size);
+ ret = hv_rf_query_device(rndis_dev,
+ RNDIS_OID_GEN_RSS_CAPABILITIES,
+ &rsscaps, &rsscaps_size);
+ if ((ret != 0) || (rsscaps.num_recv_que < 2)) {
+ device_printf(dev, "hv_rf_query_device failed or "
+ "rsscaps.num_recv_que < 2 \n");
+ goto out;
+ }
+ device_printf(dev, "channel, offered %u, requested %d\n",
+ rsscaps.num_recv_que, nchan);
+ if (nchan > rsscaps.num_recv_que)
+ nchan = rsscaps.num_recv_que;
+ net_dev->num_channel = nchan;
+
+ if (net_dev->num_channel == 1) {
+ device_printf(dev, "net_dev->num_channel == 1 under VRSS\n");
+ goto out;
+ }
+
+ /* request host to create sub channels */
+ init_pkt = &net_dev->channel_init_packet;
+ memset(init_pkt, 0, sizeof(nvsp_msg));
+
+ init_pkt->hdr.msg_type = nvsp_msg5_type_subchannel;
+ init_pkt->msgs.vers_5_msgs.subchannel_request.op =
+ NVSP_SUBCHANNE_ALLOCATE;
+ init_pkt->msgs.vers_5_msgs.subchannel_request.num_subchannels =
+ net_dev->num_channel - 1;
+
+ ret = hv_vmbus_channel_send_packet(device->channel, init_pkt,
+ sizeof(nvsp_msg), (uint64_t)(uintptr_t)init_pkt,
+ HV_VMBUS_PACKET_TYPE_DATA_IN_BAND,
+ HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+ if (ret != 0) {
+ device_printf(dev, "Fail to allocate subchannel\n");
+ goto out;
+ }
+
+ sema_wait(&net_dev->channel_init_sema);
+
+ if (init_pkt->msgs.vers_5_msgs.subchn_complete.status !=
+ nvsp_status_success) {
+ ret = ENODEV;
+ device_printf(dev, "sub channel complete error\n");
+ goto out;
+ }
+
+ net_dev->num_channel = 1 +
+ init_pkt->msgs.vers_5_msgs.subchn_complete.num_subchannels;
+
+ ret = hv_rf_set_rss_param(rndis_dev, net_dev->num_channel);
+
+out:
+ if (ret)
+ net_dev->num_channel = 1;
+
return (ret);
}
@@ -938,7 +1252,8 @@ hv_rf_on_close(struct hv_device *device)
* RNDIS filter on send request completion callback
*/
static void
-hv_rf_on_send_request_completion(void *context)
+hv_rf_on_send_request_completion(struct hv_vmbus_channel *chan __unused,
+ void *context __unused)
{
}
@@ -946,7 +1261,8 @@ hv_rf_on_send_request_completion(void *context)
* RNDIS filter on send request (halt only) completion callback
*/
static void
-hv_rf_on_send_request_halt_completion(void *context)
+hv_rf_on_send_request_halt_completion(struct hv_vmbus_channel *chan __unused,
+ void *context)
{
rndis_request *request = context;
@@ -958,32 +1274,9 @@ hv_rf_on_send_request_halt_completion(void *context)
request->halt_complete_flag = 1;
}
-/*
- * RNDIS filter when "all" reception is done
- */
-void
-hv_rf_receive_rollup(netvsc_dev *net_dev)
-{
- rndis_device *rndis_dev;
-
- rndis_dev = (rndis_device *)net_dev->extension;
- netvsc_recv_rollup(rndis_dev->net_dev->dev);
-}
-
void
-hv_rf_channel_rollup(netvsc_dev *net_dev)
+hv_rf_channel_rollup(struct hv_vmbus_channel *chan)
{
- rndis_device *rndis_dev;
-
- rndis_dev = (rndis_device *)net_dev->extension;
- /*
- * This could be called pretty early, so we need
- * to make sure everything has been setup.
- */
- if (rndis_dev == NULL ||
- rndis_dev->net_dev == NULL ||
- rndis_dev->net_dev->dev == NULL)
- return;
- netvsc_channel_rollup(rndis_dev->net_dev->dev);
+ netvsc_channel_rollup(chan);
}
diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.h b/sys/dev/hyperv/netvsc/hv_rndis_filter.h
index 9d7a38d..dbaaa42 100644
--- a/sys/dev/hyperv/netvsc/hv_rndis_filter.h
+++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2010-2012 Citrix Inc.
* Copyright (c) 2012 NetApp Inc.
* All rights reserved.
@@ -63,17 +63,32 @@ typedef struct rndis_request_ {
struct sema wait_sema;
/*
- * Fixme: We assumed a fixed size response here. If we do ever
- * need to handle a bigger response, we can either define a max
- * response message or add a response buffer variable above this field
+ * The max response size is sizeof(rndis_msg) + PAGE_SIZE.
+ *
+ * XXX
+ * This is ugly and should be cleaned up once we busdma-fy
+ * RNDIS request bits.
*/
rndis_msg response_msg;
+ uint8_t buf_resp[PAGE_SIZE];
/* Simplify allocation by having a netvsc packet inline */
netvsc_packet pkt;
hv_vmbus_page_buffer buffer;
- /* Fixme: We assumed a fixed size request here. */
+
+ /*
+ * The max request size is sizeof(rndis_msg) + PAGE_SIZE.
+ *
+ * NOTE:
+ * This is required for the large request like RSS settings.
+ *
+ * XXX
+ * This is ugly and should be cleaned up once we busdma-fy
+ * RNDIS request bits.
+ */
rndis_msg request_msg;
+ uint8_t buf_req[PAGE_SIZE];
+
/* Fixme: Poor man's semaphore. */
uint32_t halt_complete_flag;
} rndis_request;
@@ -95,12 +110,13 @@ typedef struct rndis_device_ {
/*
* Externs
*/
+struct hv_vmbus_channel;
-int hv_rf_on_receive(netvsc_dev *net_dev,
- struct hv_device *device, netvsc_packet *pkt);
+int hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device,
+ struct hv_vmbus_channel *chan, netvsc_packet *pkt);
void hv_rf_receive_rollup(netvsc_dev *net_dev);
-void hv_rf_channel_rollup(netvsc_dev *net_dev);
-int hv_rf_on_device_add(struct hv_device *device, void *additl_info);
+void hv_rf_channel_rollup(struct hv_vmbus_channel *chan);
+int hv_rf_on_device_add(struct hv_device *device, void *additl_info, int nchan);
int hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel);
int hv_rf_on_open(struct hv_device *device);
int hv_rf_on_close(struct hv_device *device);
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index 27fb3fd..a89a762 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -134,7 +134,6 @@ struct storvsc_softc {
uint32_t hs_num_out_reqs;
boolean_t hs_destroy;
boolean_t hs_drain_notify;
- boolean_t hs_open_multi_channel;
struct sema hs_drain_sema;
struct hv_storvsc_request hs_init_req;
struct hv_storvsc_request hs_reset_req;
@@ -324,9 +323,6 @@ get_stor_device(struct hv_device *device,
struct storvsc_softc *sc;
sc = device_get_softc(device->device);
- if (sc == NULL) {
- return NULL;
- }
if (outbound) {
/*
@@ -350,29 +346,19 @@ get_stor_device(struct hv_device *device,
return sc;
}
-/**
- * @brief Callback handler, will be invoked when receive mutil-channel offer
- *
- * @param context new multi-channel
- */
static void
-storvsc_handle_sc_creation(void *context)
+storvsc_subchan_attach(struct hv_vmbus_channel *new_channel)
{
- hv_vmbus_channel *new_channel;
struct hv_device *device;
struct storvsc_softc *sc;
struct vmstor_chan_props props;
int ret = 0;
- new_channel = (hv_vmbus_channel *)context;
- device = new_channel->primary_channel->device;
+ device = new_channel->device;
sc = get_stor_device(device, TRUE);
if (sc == NULL)
return;
- if (FALSE == sc->hs_open_multi_channel)
- return;
-
memset(&props, 0, sizeof(props));
ret = hv_vmbus_channel_open(new_channel,
@@ -395,11 +381,12 @@ storvsc_handle_sc_creation(void *context)
static void
storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
{
+ struct hv_vmbus_channel **subchan;
struct storvsc_softc *sc;
struct hv_storvsc_request *request;
struct vstor_packet *vstor_packet;
int request_channels_cnt = 0;
- int ret;
+ int ret, i;
/* get multichannels count that need to create */
request_channels_cnt = MIN(max_chans, mp_ncpus);
@@ -413,9 +400,6 @@ storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
request = &sc->hs_init_req;
- /* Establish a handler for multi-channel */
- dev->channel->sc_creation_callback = storvsc_handle_sc_creation;
-
/* request the host to create multi-channel */
memset(request, 0, sizeof(struct hv_storvsc_request));
@@ -451,7 +435,15 @@ storvsc_send_multichannel_request(struct hv_device *dev, int max_chans)
return;
}
- sc->hs_open_multi_channel = TRUE;
+ /* Wait for sub-channels setup to complete. */
+ subchan = vmbus_get_subchan(dev->channel, request_channels_cnt);
+
+ /* Attach the sub-channels. */
+ for (i = 0; i < request_channels_cnt; ++i)
+ storvsc_subchan_attach(subchan[i]);
+
+ /* Release the sub-channels. */
+ vmbus_rel_subchan(subchan, request_channels_cnt);
if (bootverbose)
printf("Storvsc create multi-channel success!\n");
@@ -883,12 +875,7 @@ hv_storvsc_on_channel_callback(void *context)
struct hv_storvsc_request *request;
struct vstor_packet *vstor_packet;
- if (channel->primary_channel != NULL){
- device = channel->primary_channel->device;
- } else {
- device = channel->device;
- }
-
+ device = channel->device;
KASSERT(device, ("device is NULL"));
sc = get_stor_device(device, FALSE);
@@ -970,6 +957,7 @@ storvsc_probe(device_t dev)
if(bootverbose)
device_printf(dev,
"Enlightened ATA/IDE detected\n");
+ device_set_desc(dev, g_drv_props_table[DRIVER_BLKVSC].drv_desc);
ret = BUS_PROBE_DEFAULT;
} else if(bootverbose)
device_printf(dev, "Emulated ATA/IDE set (hw.ata.disk_enable set)\n");
@@ -977,6 +965,7 @@ storvsc_probe(device_t dev)
case DRIVER_STORVSC:
if(bootverbose)
device_printf(dev, "Enlightened SCSI device detected\n");
+ device_set_desc(dev, g_drv_props_table[DRIVER_STORVSC].drv_desc);
ret = BUS_PROBE_DEFAULT;
break;
default:
@@ -1014,10 +1003,6 @@ storvsc_attach(device_t dev)
root_mount_token = root_mount_hold("storvsc");
sc = device_get_softc(dev);
- if (sc == NULL) {
- ret = ENOMEM;
- goto cleanup;
- }
stor_type = storvsc_get_storage_type(dev);
@@ -1026,15 +1011,12 @@ storvsc_attach(device_t dev)
goto cleanup;
}
- bzero(sc, sizeof(struct storvsc_softc));
-
/* fill in driver specific properties */
sc->hs_drv_props = &g_drv_props_table[stor_type];
/* fill in device specific properties */
sc->hs_unit = device_get_unit(dev);
sc->hs_dev = hv_dev;
- device_set_desc(dev, g_drv_props_table[stor_type].drv_desc);
LIST_INIT(&sc->hs_free_list);
mtx_init(&sc->hs_lock, "hvslck", NULL, MTX_DEF);
@@ -1081,7 +1063,6 @@ storvsc_attach(device_t dev)
sc->hs_destroy = FALSE;
sc->hs_drain_notify = FALSE;
- sc->hs_open_multi_channel = FALSE;
sema_init(&sc->hs_drain_sema, 0, "Store Drain Sema");
ret = hv_storvsc_connect_vsp(hv_dev);
@@ -1186,9 +1167,7 @@ storvsc_detach(device_t dev)
struct hv_sgl_node *sgl_node = NULL;
int j = 0;
- mtx_lock(&hv_device->channel->inbound_lock);
sc->hs_destroy = TRUE;
- mtx_unlock(&hv_device->channel->inbound_lock);
/*
* At this point, all outbound traffic should be disabled. We
@@ -2147,8 +2126,9 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
reqp->softc->hs_frozen = 0;
}
storvsc_free_request(sc, reqp);
- xpt_done(ccb);
mtx_unlock(&sc->hs_lock);
+
+ xpt_done_direct(ccb);
}
/**
diff --git a/sys/dev/hyperv/utilities/hv_heartbeat.c b/sys/dev/hyperv/utilities/hv_heartbeat.c
index c1b6da5..5f4fcf6 100644
--- a/sys/dev/hyperv/utilities/hv_heartbeat.c
+++ b/sys/dev/hyperv/utilities/hv_heartbeat.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -94,6 +94,10 @@ static int
hv_heartbeat_probe(device_t dev)
{
const char *p = vmbus_get_type(dev);
+
+ if (resource_disabled("hvheartbeat", 0))
+ return ENXIO;
+
if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
device_set_desc(dev, "Hyper-V Heartbeat Service");
return BUS_PROBE_DEFAULT;
diff --git a/sys/dev/hyperv/utilities/hv_kvp.c b/sys/dev/hyperv/utilities/hv_kvp.c
index 8517918..b1f6ec1 100644
--- a/sys/dev/hyperv/utilities/hv_kvp.c
+++ b/sys/dev/hyperv/utilities/hv_kvp.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -304,28 +304,11 @@ hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
{
int err_ip, err_subnet, err_gway, err_dns, err_adap;
int UNUSED_FLAG = 1;
- int guid_index;
struct hv_device *hv_dev; /* GUID Data Structure */
hn_softc_t *sc; /* hn softc structure */
char if_name[4];
- unsigned char guid_instance[40];
- char *guid_data = NULL;
char buf[39];
- struct guid_extract {
- char a1[2];
- char a2[2];
- char a3[2];
- char a4[2];
- char b1[2];
- char b2[2];
- char c1[2];
- char c2[2];
- char d[4];
- char e[12];
- };
-
- struct guid_extract *id;
device_t *devs;
int devcnt;
@@ -352,17 +335,7 @@ hv_kvp_convert_utf16_ipinfo_to_utf8(struct hv_kvp_ip_msg *host_ip_msg,
/* Trying to find GUID of Network Device */
hv_dev = sc->hn_dev_obj;
- for (guid_index = 0; guid_index < 16; guid_index++) {
- sprintf(&guid_instance[guid_index * 2], "%02x",
- hv_dev->device_id.data[guid_index]);
- }
-
- guid_data = (char *)guid_instance;
- id = (struct guid_extract *)guid_data;
- snprintf(buf, sizeof(buf), "{%.2s%.2s%.2s%.2s-%.2s%.2s-%.2s%.2s-%.4s-%s}",
- id->a4, id->a3, id->a2, id->a1,
- id->b2, id->b1, id->c2, id->c1, id->d, id->e);
- guid_data = NULL;
+ snprintf_hv_guid(buf, sizeof(buf), &hv_dev->device_id);
sprintf(if_name, "%s%d", "hn", device_get_unit(devs[devcnt]));
if (strncmp(buf, (char *)umsg->body.kvp_ip_val.adapter_id, 39) == 0) {
@@ -890,6 +863,10 @@ static int
hv_kvp_probe(device_t dev)
{
const char *p = vmbus_get_type(dev);
+
+ if (resource_disabled("hvkvp", 0))
+ return ENXIO;
+
if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
device_set_desc(dev, "Hyper-V KVP Service");
return BUS_PROBE_DEFAULT;
diff --git a/sys/dev/hyperv/utilities/hv_kvp.h b/sys/dev/hyperv/utilities/hv_kvp.h
index b62149e..6474e18 100644
--- a/sys/dev/hyperv/utilities/hv_kvp.h
+++ b/sys/dev/hyperv/utilities/hv_kvp.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/hyperv/utilities/hv_shutdown.c b/sys/dev/hyperv/utilities/hv_shutdown.c
index 20bc65e..3dfbf13 100644
--- a/sys/dev/hyperv/utilities/hv_shutdown.c
+++ b/sys/dev/hyperv/utilities/hv_shutdown.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -116,6 +116,10 @@ static int
hv_shutdown_probe(device_t dev)
{
const char *p = vmbus_get_type(dev);
+
+ if (resource_disabled("hvshutdown", 0))
+ return ENXIO;
+
if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
device_set_desc(dev, "Hyper-V Shutdown Service");
return BUS_PROBE_DEFAULT;
diff --git a/sys/dev/hyperv/utilities/hv_timesync.c b/sys/dev/hyperv/utilities/hv_timesync.c
index d1ea904..eeb0434 100644
--- a/sys/dev/hyperv/utilities/hv_timesync.c
+++ b/sys/dev/hyperv/utilities/hv_timesync.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -171,6 +171,10 @@ static int
hv_timesync_probe(device_t dev)
{
const char *p = vmbus_get_type(dev);
+
+ if (resource_disabled("hvtimesync", 0))
+ return ENXIO;
+
if (!memcmp(p, &service_guid, sizeof(hv_guid))) {
device_set_desc(dev, "Hyper-V Time Synch Service");
return BUS_PROBE_DEFAULT;
diff --git a/sys/dev/hyperv/utilities/hv_util.c b/sys/dev/hyperv/utilities/hv_util.c
index 7d19b3f..3119e3f 100644
--- a/sys/dev/hyperv/utilities/hv_util.c
+++ b/sys/dev/hyperv/utilities/hv_util.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014 Microsoft Corp.
+ * Copyright (c) 2014,2016 Microsoft Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/sys/dev/hyperv/utilities/hv_util.h b/sys/dev/hyperv/utilities/hv_util.h
index 708dca8..e202784 100644
--- a/sys/dev/hyperv/utilities/hv_util.h
+++ b/sys/dev/hyperv/utilities/hv_util.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
diff --git a/sys/dev/hyperv/vmbus/amd64/hv_vector.S b/sys/dev/hyperv/vmbus/amd64/hv_vector.S
new file mode 100644
index 0000000..2594483
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/amd64/hv_vector.S
@@ -0,0 +1,46 @@
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(hv_vmbus_callback)
+ PUSH_FRAME
+ FAKE_MCOUNT(TF_RIP(%rsp))
+ movq %rsp, %rdi
+ call hv_vector_handler
+ MEXITCOUNT
+ jmp doreti
diff --git a/sys/dev/hyperv/vmbus/hv_channel.c b/sys/dev/hyperv/vmbus/hv_channel.c
index bb777cc..6da0643 100644
--- a/sys/dev/hyperv/vmbus/hv_channel.c
+++ b/sys/dev/hyperv/vmbus/hv_channel.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/sysctl.h>
#include <machine/bus.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -80,6 +81,90 @@ vmbus_channel_set_event(hv_vmbus_channel *channel)
}
+static int
+vmbus_channel_sysctl_monalloc(SYSCTL_HANDLER_ARGS)
+{
+ struct hv_vmbus_channel *chan = arg1;
+ int alloc = 0;
+
+ if (chan->offer_msg.monitor_allocated)
+ alloc = 1;
+ return sysctl_handle_int(oidp, &alloc, 0, req);
+}
+
+static void
+vmbus_channel_sysctl_create(hv_vmbus_channel* channel)
+{
+ device_t dev;
+ struct sysctl_oid *devch_sysctl;
+ struct sysctl_oid *devch_id_sysctl, *devch_sub_sysctl;
+ struct sysctl_oid *devch_id_in_sysctl, *devch_id_out_sysctl;
+ struct sysctl_ctx_list *ctx;
+ uint32_t ch_id;
+ uint16_t sub_ch_id;
+ char name[16];
+
+ hv_vmbus_channel* primary_ch = channel->primary_channel;
+
+ if (primary_ch == NULL) {
+ dev = channel->device->device;
+ ch_id = channel->offer_msg.child_rel_id;
+ } else {
+ dev = primary_ch->device->device;
+ ch_id = primary_ch->offer_msg.child_rel_id;
+ sub_ch_id = channel->offer_msg.offer.sub_channel_index;
+ }
+ ctx = device_get_sysctl_ctx(dev);
+ /* This creates dev.DEVNAME.DEVUNIT.channel tree */
+ devch_sysctl = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+ OID_AUTO, "channel", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+ /* This creates dev.DEVNAME.DEVUNIT.channel.CHANID tree */
+ snprintf(name, sizeof(name), "%d", ch_id);
+ devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(devch_sysctl),
+ OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
+ if (primary_ch != NULL) {
+ devch_sub_sysctl = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(devch_id_sysctl),
+ OID_AUTO, "sub", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+ snprintf(name, sizeof(name), "%d", sub_ch_id);
+ devch_id_sysctl = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(devch_sub_sysctl),
+ OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl),
+ OID_AUTO, "chanid", CTLFLAG_RD,
+ &channel->offer_msg.child_rel_id, 0, "channel id");
+ }
+ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
+ "cpu", CTLFLAG_RD, &channel->target_cpu, 0, "owner CPU id");
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(devch_id_sysctl), OID_AUTO,
+ "monitor_allocated", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ channel, 0, vmbus_channel_sysctl_monalloc, "I",
+ "is monitor allocated to this channel");
+
+ devch_id_in_sysctl = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(devch_id_sysctl),
+ OID_AUTO,
+ "in",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+ devch_id_out_sysctl = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(devch_id_sysctl),
+ OID_AUTO,
+ "out",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
+ hv_ring_buffer_stat(ctx,
+ SYSCTL_CHILDREN(devch_id_in_sysctl),
+ &(channel->inbound),
+ "inbound ring buffer stats");
+ hv_ring_buffer_stat(ctx,
+ SYSCTL_CHILDREN(devch_id_out_sysctl),
+ &(channel->outbound),
+ "outbound ring buffer stats");
+}
+
/**
* @brief Open the specified channel
*/
@@ -143,6 +228,9 @@ hv_vmbus_channel_open(
in,
recv_ring_buffer_size);
+ /* Create sysctl tree for this channel */
+ vmbus_channel_sysctl_create(new_channel);
+
/**
* Establish the gpadl for the ring buffer
*/
@@ -182,12 +270,12 @@ hv_vmbus_channel_open(
if (user_data_len)
memcpy(open_msg->user_data, user_data, user_data_len);
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_INSERT_TAIL(
&hv_vmbus_g_connection.channel_msg_anchor,
open_info,
msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
ret = hv_vmbus_post_message(
open_msg, sizeof(hv_vmbus_channel_open_channel));
@@ -214,12 +302,12 @@ hv_vmbus_channel_open(
}
cleanup:
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(
&hv_vmbus_g_connection.channel_msg_anchor,
open_info,
msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
sema_destroy(&open_info->wait_sema);
free(open_info, M_DEVBUF);
@@ -384,17 +472,22 @@ hv_vmbus_channel_establish_gpadl(
hv_vmbus_channel_msg_info* curr;
uint32_t next_gpadl_handle;
- next_gpadl_handle = hv_vmbus_g_connection.next_gpadl_handle;
- atomic_add_int((int*) &hv_vmbus_g_connection.next_gpadl_handle, 1);
+ next_gpadl_handle = atomic_fetchadd_int(
+ &hv_vmbus_g_connection.next_gpadl_handle, 1);
ret = vmbus_channel_create_gpadl_header(
contig_buffer, size, &msg_info, &msg_count);
- if(ret != 0) { /* if(allocation failed) return immediately */
- /* reverse atomic_add_int above */
- atomic_subtract_int((int*)
- &hv_vmbus_g_connection.next_gpadl_handle, 1);
- return ret;
+ if(ret != 0) {
+ /*
+ * XXX
+ * We can _not_ even revert the above incremental,
+ * if multiple GPADL establishments are running
+ * parallelly, decrement the global next_gpadl_handle
+ * is calling for _big_ trouble. A better solution
+ * is to have a 0-based GPADL id bitmap ...
+ */
+ return ret;
}
sema_init(&msg_info->wait_sema, 0, "Open Info Sema");
@@ -403,13 +496,13 @@ hv_vmbus_channel_establish_gpadl(
gpadl_msg->child_rel_id = channel->offer_msg.child_rel_id;
gpadl_msg->gpadl = next_gpadl_handle;
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_INSERT_TAIL(
&hv_vmbus_g_connection.channel_msg_anchor,
msg_info,
msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
ret = hv_vmbus_post_message(
gpadl_msg,
@@ -448,10 +541,10 @@ hv_vmbus_channel_establish_gpadl(
cleanup:
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
msg_info, msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
sema_destroy(&msg_info->wait_sema);
free(msg_info, M_DEVBUF);
@@ -490,10 +583,10 @@ hv_vmbus_channel_teardown_gpdal(
msg->child_rel_id = channel->offer_msg.child_rel_id;
msg->gpadl = gpadl_handle;
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_msg_anchor,
info, msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
ret = hv_vmbus_post_message(msg,
sizeof(hv_vmbus_channel_gpadl_teardown));
@@ -506,10 +599,10 @@ cleanup:
/*
* Received a torndown response
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(&hv_vmbus_g_connection.channel_msg_anchor,
info, msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
sema_destroy(&info->wait_sema);
free(info, M_DEVBUF);
@@ -525,20 +618,13 @@ hv_vmbus_channel_close_internal(hv_vmbus_channel *channel)
hv_vmbus_channel_msg_info* info;
channel->state = HV_CHANNEL_OPEN_STATE;
- channel->sc_creation_callback = NULL;
/*
* set rxq to NULL to avoid more requests be scheduled
*/
channel->rxq = NULL;
taskqueue_drain(rxq, &channel->channel_task);
- /*
- * Grab the lock to prevent race condition when a packet received
- * and unloading driver is in the process.
- */
- mtx_lock(&channel->inbound_lock);
channel->on_channel_callback = NULL;
- mtx_unlock(&channel->inbound_lock);
/**
* Send a closing message
@@ -857,7 +943,6 @@ hv_vmbus_channel_recv_packet_raw(
{
int ret;
uint32_t packetLen;
- uint32_t userLen;
hv_vm_packet_descriptor desc;
*buffer_actual_len = 0;
@@ -871,8 +956,6 @@ hv_vmbus_channel_recv_packet_raw(
return (0);
packetLen = desc.length8 << 3;
- userLen = packetLen - (desc.data_offset8 << 3);
-
*buffer_actual_len = packetLen;
if (packetLen > buffer_len)
@@ -915,12 +998,6 @@ VmbusProcessChannelEvent(void* context, int pending)
* callback to NULL. This closes the window.
*/
- /*
- * Disable the lock due to newly added WITNESS check in r277723.
- * Will seek other way to avoid race condition.
- * -- whu
- */
- // mtx_lock(&channel->inbound_lock);
if (channel->on_channel_callback != NULL) {
arg = channel->channel_callback_context;
is_batched_reading = channel->batched_reading;
@@ -947,5 +1024,4 @@ VmbusProcessChannelEvent(void* context, int pending)
bytes_to_read = 0;
} while (is_batched_reading && (bytes_to_read != 0));
}
- // mtx_unlock(&channel->inbound_lock);
}
diff --git a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
index ab6e8ad..00b54ed 100644
--- a/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
+++ b/sys/dev/hyperv/vmbus/hv_channel_mgmt.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -30,7 +30,10 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/mbuf.h>
+#include <sys/mutex.h>
#include "hv_vmbus_priv.h"
@@ -95,6 +98,14 @@ typedef struct hv_work_item {
void* context;
} hv_work_item;
+static struct mtx vmbus_chwait_lock;
+MTX_SYSINIT(vmbus_chwait_lk, &vmbus_chwait_lock, "vmbus primarych wait lock",
+ MTX_DEF);
+static uint32_t vmbus_chancnt;
+static uint32_t vmbus_devcnt;
+
+#define VMBUS_CHANCNT_DONE 0x80000000
+
/**
* Implementation of the work abstraction.
*/
@@ -143,9 +154,7 @@ hv_vmbus_allocate_channel(void)
M_DEVBUF,
M_WAITOK | M_ZERO);
- mtx_init(&channel->inbound_lock, "channel inbound", NULL, MTX_DEF);
mtx_init(&channel->sc_lock, "vmbus multi channel", NULL, MTX_DEF);
-
TAILQ_INIT(&channel->sc_list_anchor);
return (channel);
@@ -158,8 +167,6 @@ void
hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
{
mtx_destroy(&channel->sc_lock);
- mtx_destroy(&channel->inbound_lock);
-
free(channel, M_DEVBUF);
}
@@ -170,13 +177,10 @@ hv_vmbus_free_vmbus_channel(hv_vmbus_channel* channel)
static void
vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
{
- boolean_t f_new;
hv_vmbus_channel* channel;
int ret;
uint32_t relid;
- f_new = TRUE;
- channel = NULL;
relid = new_channel->offer_msg.child_rel_id;
/*
* Make sure this is a new offer
@@ -185,31 +189,24 @@ vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
hv_vmbus_g_connection.channels[relid] = new_channel;
TAILQ_FOREACH(channel, &hv_vmbus_g_connection.channel_anchor,
- list_entry)
- {
+ list_entry) {
if (memcmp(&channel->offer_msg.offer.interface_type,
&new_channel->offer_msg.offer.interface_type,
sizeof(hv_guid)) == 0 &&
memcmp(&channel->offer_msg.offer.interface_instance,
&new_channel->offer_msg.offer.interface_instance,
- sizeof(hv_guid)) == 0) {
- f_new = FALSE;
+ sizeof(hv_guid)) == 0)
break;
- }
}
- if (f_new) {
- /* Insert at tail */
- TAILQ_INSERT_TAIL(
- &hv_vmbus_g_connection.channel_anchor,
- new_channel,
- list_entry);
+ if (channel == NULL) {
+ /* Install the new primary channel */
+ TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
+ new_channel, list_entry);
}
mtx_unlock(&hv_vmbus_g_connection.channel_lock);
- /*XXX add new channel to percpu_list */
-
- if (!f_new) {
+ if (channel != NULL) {
/*
* Check if this is a sub channel.
*/
@@ -218,17 +215,20 @@ vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
* It is a sub channel offer, process it.
*/
new_channel->primary_channel = channel;
+ new_channel->device = channel->device;
mtx_lock(&channel->sc_lock);
- TAILQ_INSERT_TAIL(
- &channel->sc_list_anchor,
- new_channel,
- sc_list_entry);
+ TAILQ_INSERT_TAIL(&channel->sc_list_anchor,
+ new_channel, sc_list_entry);
mtx_unlock(&channel->sc_lock);
+ if (bootverbose) {
+ printf("VMBUS get multi-channel offer, "
+ "rel=%u, sub=%u\n",
+ new_channel->offer_msg.child_rel_id,
+ new_channel->offer_msg.offer.sub_channel_index);
+ }
+
/* Insert new channel into channel_anchor. */
- printf("VMBUS get multi-channel offer, rel=%u,sub=%u\n",
- new_channel->offer_msg.child_rel_id,
- new_channel->offer_msg.offer.sub_channel_index);
mtx_lock(&hv_vmbus_g_connection.channel_lock);
TAILQ_INSERT_TAIL(&hv_vmbus_g_connection.channel_anchor,
new_channel, list_entry);
@@ -239,17 +239,25 @@ vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
"its primary channel is <%p>.\n",
new_channel, new_channel->primary_channel);
- /*XXX add it to percpu_list */
-
new_channel->state = HV_CHANNEL_OPEN_STATE;
- if (channel->sc_creation_callback != NULL) {
- channel->sc_creation_callback(new_channel);
- }
+
+ /*
+ * Bump up sub-channel count and notify anyone that is
+ * interested in this sub-channel, after this sub-channel
+ * is setup.
+ */
+ mtx_lock(&channel->sc_lock);
+ channel->subchan_cnt++;
+ mtx_unlock(&channel->sc_lock);
+ wakeup(channel);
+
return;
}
- hv_vmbus_free_vmbus_channel(new_channel);
- return;
+ printf("VMBUS: duplicated primary channel%u\n",
+ new_channel->offer_msg.child_rel_id);
+ hv_vmbus_free_vmbus_channel(new_channel);
+ return;
}
new_channel->state = HV_CHANNEL_OPEN_STATE;
@@ -271,13 +279,37 @@ vmbus_channel_process_offer(hv_vmbus_channel *new_channel)
ret = hv_vmbus_child_device_register(new_channel->device);
if (ret != 0) {
mtx_lock(&hv_vmbus_g_connection.channel_lock);
- TAILQ_REMOVE(
- &hv_vmbus_g_connection.channel_anchor,
- new_channel,
- list_entry);
+ TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
+ new_channel, list_entry);
mtx_unlock(&hv_vmbus_g_connection.channel_lock);
hv_vmbus_free_vmbus_channel(new_channel);
}
+
+ mtx_lock(&vmbus_chwait_lock);
+ vmbus_devcnt++;
+ mtx_unlock(&vmbus_chwait_lock);
+ wakeup(&vmbus_devcnt);
+}
+
+void
+vmbus_channel_cpu_set(struct hv_vmbus_channel *chan, int cpu)
+{
+ KASSERT(cpu >= 0 && cpu < mp_ncpus, ("invalid cpu %d", cpu));
+
+ if (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008 ||
+ hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) {
+ /* Only cpu0 is supported */
+ cpu = 0;
+ }
+
+ chan->target_cpu = cpu;
+ chan->target_vcpu = hv_vmbus_g_context.hv_vcpu_index[cpu];
+
+ if (bootverbose) {
+ printf("vmbus_chan%u: assigned to cpu%u [vcpu%u]\n",
+ chan->offer_msg.child_rel_id,
+ chan->target_cpu, chan->target_vcpu);
+ }
}
/**
@@ -312,11 +344,12 @@ static uint32_t next_vcpu;
* distributed across all available CPUs.
*/
static void
-vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
+vmbus_channel_select_defcpu(struct hv_vmbus_channel *channel)
{
uint32_t current_cpu;
int i;
boolean_t is_perf_channel = FALSE;
+ const hv_guid *guid = &channel->offer_msg.offer.interface_type;
for (i = PERF_CHN_NIC; i < MAX_PERF_CHN; i++) {
if (memcmp(guid->data, high_perf_devices[i].data,
@@ -326,24 +359,14 @@ vmbus_channel_select_cpu(hv_vmbus_channel *channel, hv_guid *guid)
}
}
- if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
- (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7) ||
- (!is_perf_channel)) {
- /* Host's view of guest cpu */
- channel->target_vcpu = 0;
- /* Guest's own view of cpu */
- channel->target_cpu = 0;
+ if (!is_perf_channel) {
+ /* Stick to cpu0 */
+ vmbus_channel_cpu_set(channel, 0);
return;
}
/* mp_ncpus should have the number cpus currently online */
current_cpu = (++next_vcpu % mp_ncpus);
- channel->target_cpu = current_cpu;
- channel->target_vcpu =
- hv_vmbus_g_context.hv_vcpu_index[current_cpu];
- if (bootverbose)
- printf("VMBUS: Total online cpus %d, assign perf channel %d "
- "to vcpu %d, cpu %d\n", mp_ncpus, i, channel->target_vcpu,
- current_cpu);
+ vmbus_channel_cpu_set(channel, current_cpu);
}
/**
@@ -362,12 +385,6 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
offer = (hv_vmbus_channel_offer_channel*) hdr;
- hv_guid *guidType;
- hv_guid *guidInstance;
-
- guidType = &offer->offer.interface_type;
- guidInstance = &offer->offer.interface_instance;
-
// copy offer data
copied = malloc(sizeof(*copied), M_DEVBUF, M_NOWAIT);
if (copied == NULL) {
@@ -377,6 +394,11 @@ vmbus_channel_on_offer(hv_vmbus_channel_msg_header* hdr)
memcpy(copied, hdr, sizeof(*copied));
hv_queue_work_item(vmbus_channel_on_offer_internal, copied);
+
+ mtx_lock(&vmbus_chwait_lock);
+ if ((vmbus_chancnt & VMBUS_CHANCNT_DONE) == 0)
+ vmbus_chancnt++;
+ mtx_unlock(&vmbus_chwait_lock);
}
static void
@@ -414,17 +436,14 @@ vmbus_channel_on_offer_internal(void* context)
offer->connection_id;
}
- /*
- * Bind the channel to a chosen cpu.
- */
- vmbus_channel_select_cpu(new_channel,
- &offer->offer.interface_type);
-
memcpy(&new_channel->offer_msg, offer,
sizeof(hv_vmbus_channel_offer_channel));
new_channel->monitor_group = (uint8_t) offer->monitor_id / 32;
new_channel->monitor_bit = (uint8_t) offer->monitor_id % 32;
+ /* Select default cpu for this channel. */
+ vmbus_channel_select_defcpu(new_channel);
+
vmbus_channel_process_offer(new_channel);
free(offer, M_DEVBUF);
@@ -458,7 +477,10 @@ vmbus_channel_on_offer_rescind_internal(void *context)
hv_vmbus_channel* channel;
channel = (hv_vmbus_channel*)context;
- hv_vmbus_child_device_unregister(channel->device);
+ if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
+ /* Only primary channel owns the hv_device */
+ hv_vmbus_child_device_unregister(channel->device);
+ }
}
/**
@@ -468,6 +490,11 @@ vmbus_channel_on_offer_rescind_internal(void *context)
static void
vmbus_channel_on_offers_delivered(hv_vmbus_channel_msg_header* hdr)
{
+
+ mtx_lock(&vmbus_chwait_lock);
+ vmbus_chancnt |= VMBUS_CHANCNT_DONE;
+ mtx_unlock(&vmbus_chwait_lock);
+ wakeup(&vmbus_chancnt);
}
/**
@@ -490,7 +517,7 @@ vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
/*
* Find the open msg, copy the result and signal/unblock the wait event
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
msg_list_entry) {
@@ -508,7 +535,7 @@ vmbus_channel_on_open_result(hv_vmbus_channel_msg_header* hdr)
}
}
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
}
@@ -532,7 +559,7 @@ vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
/* Find the establish msg, copy the result and signal/unblock
* the wait event
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
msg_list_entry) {
request_header = (hv_vmbus_channel_msg_header*) msg_info->msg;
@@ -551,7 +578,7 @@ vmbus_channel_on_gpadl_created(hv_vmbus_channel_msg_header* hdr)
}
}
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
}
/**
@@ -576,7 +603,7 @@ vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
* wait event.
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
msg_list_entry) {
@@ -596,7 +623,7 @@ vmbus_channel_on_gpadl_torndown(hv_vmbus_channel_msg_header* hdr)
}
}
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
}
/**
@@ -616,7 +643,7 @@ vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
versionResponse = (hv_vmbus_channel_version_response*)hdr;
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_FOREACH(msg_info, &hv_vmbus_g_connection.channel_msg_anchor,
msg_list_entry) {
requestHeader = (hv_vmbus_channel_msg_header*) msg_info->msg;
@@ -630,7 +657,7 @@ vmbus_channel_on_version_response(hv_vmbus_channel_msg_header* hdr)
sema_post(&msg_info->wait_sema);
}
}
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
}
@@ -679,7 +706,10 @@ hv_vmbus_release_unattached_channels(void)
TAILQ_REMOVE(&hv_vmbus_g_connection.channel_anchor,
channel, list_entry);
- hv_vmbus_child_device_unregister(channel->device);
+ if (HV_VMBUS_CHAN_ISPRIMARY(channel)) {
+ /* Only primary channel owns the hv_device */
+ hv_vmbus_child_device_unregister(channel->device);
+ }
hv_vmbus_free_vmbus_channel(channel);
}
bzero(hv_vmbus_g_connection.channels,
@@ -742,3 +772,56 @@ vmbus_select_outgoing_channel(struct hv_vmbus_channel *primary)
return(outgoing_channel);
}
+
+void
+vmbus_scan(void)
+{
+ uint32_t chancnt;
+
+ mtx_lock(&vmbus_chwait_lock);
+ while ((vmbus_chancnt & VMBUS_CHANCNT_DONE) == 0)
+ mtx_sleep(&vmbus_chancnt, &vmbus_chwait_lock, 0, "waitch", 0);
+ chancnt = vmbus_chancnt & ~VMBUS_CHANCNT_DONE;
+
+ while (vmbus_devcnt != chancnt)
+ mtx_sleep(&vmbus_devcnt, &vmbus_chwait_lock, 0, "waitdev", 0);
+ mtx_unlock(&vmbus_chwait_lock);
+}
+
+struct hv_vmbus_channel **
+vmbus_get_subchan(struct hv_vmbus_channel *pri_chan, int subchan_cnt)
+{
+ struct hv_vmbus_channel **ret, *chan;
+ int i;
+
+ ret = malloc(subchan_cnt * sizeof(struct hv_vmbus_channel *), M_TEMP,
+ M_WAITOK);
+
+ mtx_lock(&pri_chan->sc_lock);
+
+ while (pri_chan->subchan_cnt < subchan_cnt)
+ mtx_sleep(pri_chan, &pri_chan->sc_lock, 0, "subch", 0);
+
+ i = 0;
+ TAILQ_FOREACH(chan, &pri_chan->sc_list_anchor, sc_list_entry) {
+ /* TODO: refcnt chan */
+ ret[i] = chan;
+
+ ++i;
+ if (i == subchan_cnt)
+ break;
+ }
+ KASSERT(i == subchan_cnt, ("invalid subchan count %d, should be %d",
+ pri_chan->subchan_cnt, subchan_cnt));
+
+ mtx_unlock(&pri_chan->sc_lock);
+
+ return ret;
+}
+
+void
+vmbus_rel_subchan(struct hv_vmbus_channel **subchan, int subchan_cnt __unused)
+{
+
+ free(subchan, M_TEMP);
+}
diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c
index fb1879d..0424b47 100644
--- a/sys/dev/hyperv/vmbus/hv_connection.c
+++ b/sys/dev/hyperv/vmbus/hv_connection.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -99,26 +99,26 @@ hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
* Add to list before we send the request since we may receive the
* response before returning from this routine
*/
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_INSERT_TAIL(
&hv_vmbus_g_connection.channel_msg_anchor,
msg_info,
msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
ret = hv_vmbus_post_message(
msg,
sizeof(hv_vmbus_channel_initiate_contact));
if (ret != 0) {
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(
&hv_vmbus_g_connection.channel_msg_anchor,
msg_info,
msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
return (ret);
}
@@ -127,12 +127,12 @@ hv_vmbus_negotiate_version(hv_vmbus_channel_msg_info *msg_info,
*/
ret = sema_timedwait(&msg_info->wait_sema, 5 * hz); /* KYS 5 seconds */
- mtx_lock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_lock(&hv_vmbus_g_connection.channel_msg_lock);
TAILQ_REMOVE(
&hv_vmbus_g_connection.channel_msg_anchor,
msg_info,
msg_list_entry);
- mtx_unlock_spin(&hv_vmbus_g_connection.channel_msg_lock);
+ mtx_unlock(&hv_vmbus_g_connection.channel_msg_lock);
/**
* Check if successful
@@ -169,7 +169,7 @@ hv_vmbus_connect(void) {
TAILQ_INIT(&hv_vmbus_g_connection.channel_msg_anchor);
mtx_init(&hv_vmbus_g_connection.channel_msg_lock, "vmbus channel msg",
- NULL, MTX_SPIN);
+ NULL, MTX_DEF);
TAILQ_INIT(&hv_vmbus_g_connection.channel_anchor);
mtx_init(&hv_vmbus_g_connection.channel_lock, "vmbus channel",
@@ -308,14 +308,18 @@ hv_vmbus_on_events(int cpu)
KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: "
"cpu out of range!"));
+ page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
+ event = (hv_vmbus_synic_event_flags *)
+ page_addr + HV_VMBUS_MESSAGE_SINT;
if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
(hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5;
/*
* receive size is 1/2 page and divide that by 4 bytes
*/
- recv_interrupt_page =
- hv_vmbus_g_connection.recv_interrupt_page;
+ if (synch_test_and_clear_bit(0, &event->flags32[0]))
+ recv_interrupt_page =
+ hv_vmbus_g_connection.recv_interrupt_page;
} else {
/*
* On Host with Win8 or above, the event page can be
@@ -323,9 +327,6 @@ hv_vmbus_on_events(int cpu)
* that has the pending interrupt.
*/
maxdword = HV_EVENT_FLAGS_DWORD_COUNT;
- page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
- event = (hv_vmbus_synic_event_flags *)
- page_addr + HV_VMBUS_MESSAGE_SINT;
recv_interrupt_page = event->flags32;
}
@@ -367,31 +368,35 @@ hv_vmbus_on_events(int cpu)
/**
* Send a msg on the vmbus's message connection
*/
-int hv_vmbus_post_message(void *buffer, size_t bufferLen) {
- int ret = 0;
+int hv_vmbus_post_message(void *buffer, size_t bufferLen)
+{
hv_vmbus_connection_id connId;
- unsigned retries = 0;
+ sbintime_t time = SBT_1MS;
+ int retries;
+ int ret;
- /* NetScaler delays from previous code were consolidated here */
- static int delayAmount[] = {100, 100, 100, 500, 500, 5000, 5000, 5000};
+ connId.as_uint32_t = 0;
+ connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
- /* for(each entry in delayAmount) try to post message,
- * delay a little bit before retrying
+ /*
+ * We retry to cope with transient failures caused by host side's
+ * insufficient resources. 20 times should suffice in practice.
*/
- for (retries = 0;
- retries < sizeof(delayAmount)/sizeof(delayAmount[0]); retries++) {
- connId.as_uint32_t = 0;
- connId.u.id = HV_VMBUS_MESSAGE_CONNECTION_ID;
- ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer, bufferLen);
- if (ret != HV_STATUS_INSUFFICIENT_BUFFERS)
- break;
- /* TODO: KYS We should use a blocking wait call */
- DELAY(delayAmount[retries]);
+ for (retries = 0; retries < 20; retries++) {
+ ret = hv_vmbus_post_msg_via_msg_ipc(connId, 1, buffer,
+ bufferLen);
+ if (ret == HV_STATUS_SUCCESS)
+ return (0);
+
+ pause_sbt("pstmsg", time, 0, C_HARDCLOCK);
+ if (time < SBT_1S * 2)
+ time *= 2;
}
- KASSERT(ret == 0, ("Error VMBUS: Message Post Failed\n"));
+ KASSERT(ret == HV_STATUS_SUCCESS,
+ ("Error VMBUS: Message Post Failed, ret=%d\n", ret));
- return (ret);
+ return (EAGAIN);
}
/**
diff --git a/sys/dev/hyperv/vmbus/hv_et.c b/sys/dev/hyperv/vmbus/hv_et.c
index d961486..440b514 100644
--- a/sys/dev/hyperv/vmbus/hv_et.c
+++ b/sys/dev/hyperv/vmbus/hv_et.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2015 Microsoft Corp.
+ * Copyright (c) 2015,2016 Microsoft Corp.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,6 +28,9 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/smp.h>
@@ -40,8 +43,7 @@ __FBSDID("$FreeBSD$");
#define HV_MAX_DELTA_TICKS 0xffffffffLL
#define HV_MIN_DELTA_TICKS 1LL
-static struct eventtimer et;
-static uint64_t periodticks[MAXCPU];
+static struct eventtimer *et;
static inline uint64_t
sbintime2tick(sbintime_t time)
@@ -60,11 +62,7 @@ hv_et_start(struct eventtimer *et, sbintime_t firsttime, sbintime_t periodtime)
timer_cfg.as_uint64 = 0;
timer_cfg.auto_enable = 1;
- timer_cfg.sintx = HV_VMBUS_MESSAGE_SINT;
-
- periodticks[curcpu] = sbintime2tick(periodtime);
- if (firsttime == 0)
- firsttime = periodtime;
+ timer_cfg.sintx = HV_VMBUS_TIMER_SINT;
current = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
current += sbintime2tick(firsttime);
@@ -87,45 +85,77 @@ hv_et_stop(struct eventtimer *et)
void
hv_et_intr(struct trapframe *frame)
{
- union hv_timer_config timer_cfg;
struct trapframe *oldframe;
struct thread *td;
- if (periodticks[curcpu] != 0) {
- uint64_t tick = sbintime2tick(periodticks[curcpu]);
- timer_cfg.as_uint64 = rdmsr(HV_X64_MSR_STIMER0_CONFIG);
- timer_cfg.enable = 0;
- timer_cfg.auto_enable = 1;
- timer_cfg.periodic = 1;
- periodticks[curcpu] = 0;
-
- wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64);
- wrmsr(HV_X64_MSR_STIMER0_COUNT, tick);
- }
-
- if (et.et_active) {
+ if (et->et_active) {
td = curthread;
td->td_intr_nesting_level++;
oldframe = td->td_intr_frame;
td->td_intr_frame = frame;
- et.et_event_cb(&et, et.et_arg);
+ et->et_event_cb(et, et->et_arg);
td->td_intr_frame = oldframe;
td->td_intr_nesting_level--;
}
}
-void
-hv_et_init(void)
+static void
+hv_et_identify(driver_t *driver, device_t parent)
{
- et.et_name = "HyperV";
- et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU | ET_FLAGS_PERIODIC;
- et.et_quality = 1000;
- et.et_frequency = HV_TIMER_FREQUENCY;
- et.et_min_period = (1LL << 32) / HV_TIMER_FREQUENCY;
- et.et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY);
- et.et_start = hv_et_start;
- et.et_stop = hv_et_stop;
- et.et_priv = &et;
- et_register(&et);
+ if (device_find_child(parent, "hv_et", -1) != NULL)
+ return;
+
+ device_add_child(parent, "hv_et", -1);
+}
+
+static int
+hv_et_probe(device_t dev)
+{
+ device_set_desc(dev, "Hyper-V event timer");
+
+ return (BUS_PROBE_NOWILDCARD);
}
+static int
+hv_et_attach(device_t dev)
+{
+ /* XXX: need allocate SINT and remove global et */
+ et = device_get_softc(dev);
+
+ et->et_name = "Hyper-V";
+ et->et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU;
+ et->et_quality = 1000;
+ et->et_frequency = HV_TIMER_FREQUENCY;
+ et->et_min_period = HV_MIN_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY);
+ et->et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY);
+ et->et_start = hv_et_start;
+ et->et_stop = hv_et_stop;
+ et->et_priv = dev;
+
+ return (et_register(et));
+}
+
+static int
+hv_et_detach(device_t dev)
+{
+ return (et_deregister(et));
+}
+
+static device_method_t hv_et_methods[] = {
+ DEVMETHOD(device_identify, hv_et_identify),
+ DEVMETHOD(device_probe, hv_et_probe),
+ DEVMETHOD(device_attach, hv_et_attach),
+ DEVMETHOD(device_detach, hv_et_detach),
+
+ DEVMETHOD_END
+};
+
+static driver_t hv_et_driver = {
+ "hv_et",
+ hv_et_methods,
+ sizeof(struct eventtimer)
+};
+
+static devclass_t hv_et_devclass;
+DRIVER_MODULE(hv_et, vmbus, hv_et_driver, hv_et_devclass, NULL, 0);
+MODULE_VERSION(hv_et, 1);
diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c
index 6afc2b8..70a5608 100644
--- a/sys/dev/hyperv/vmbus/hv_hv.c
+++ b/sys/dev/hyperv/vmbus/hv_hv.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -33,6 +33,7 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/pcpu.h>
#include <sys/timetc.h>
@@ -47,9 +48,16 @@ __FBSDID("$FreeBSD$");
#define HV_NANOSECONDS_PER_SEC 1000000000L
+#define HYPERV_INTERFACE 0x31237648 /* HV#1 */
static u_int hv_get_timecount(struct timecounter *tc);
+u_int hyperv_features;
+u_int hyperv_recommends;
+
+static u_int hyperv_pm_features;
+static u_int hyperv_features3;
+
/**
* Globals
*/
@@ -70,47 +78,6 @@ hv_get_timecount(struct timecounter *tc)
}
/**
- * @brief Query the cpuid for presence of windows hypervisor
- */
-int
-hv_vmbus_query_hypervisor_presence(void)
-{
- if (vm_guest != VM_GUEST_HV)
- return (0);
-
- return (hv_high >= HV_X64_CPUID_MIN && hv_high <= HV_X64_CPUID_MAX);
-}
-
-/**
- * @brief Get version of the windows hypervisor
- */
-static int
-hv_vmbus_get_hypervisor_version(void)
-{
- u_int regs[4];
- unsigned int maxLeaf;
- unsigned int op;
-
- /*
- * Its assumed that this is called after confirming that
- * Viridian is present
- * Query id and revision.
- */
- op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
- do_cpuid(op, regs);
-
- maxLeaf = regs[0];
- op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
- do_cpuid(op, regs);
-
- if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) {
- op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
- do_cpuid(op, regs);
- }
- return (maxLeaf);
-}
-
-/**
* @brief Invoke the specified hypercall
*/
static uint64_t
@@ -159,9 +126,8 @@ hv_vmbus_do_hypercall(uint64_t control, void* input, void* output)
int
hv_vmbus_init(void)
{
- int max_leaf;
hv_vmbus_x64_msr_hypercall_contents hypercall_msr;
- void* virt_addr = 0;
+ void* virt_addr = NULL;
memset(
hv_vmbus_g_context.syn_ic_event_page,
@@ -176,8 +142,6 @@ hv_vmbus_init(void)
if (vm_guest != VM_GUEST_HV)
goto cleanup;
- max_leaf = hv_vmbus_get_hypervisor_version();
-
/*
* Write our OS info
*/
@@ -207,10 +171,6 @@ hv_vmbus_init(void)
hv_vmbus_g_context.hypercall_page = virt_addr;
- tc_init(&hv_timecounter); /* register virtual timecount */
-
- hv_et_init();
-
return (0);
cleanup:
@@ -368,6 +328,9 @@ hv_vmbus_synic_init(void *arg)
wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
shared_sint.as_uint64_t);
+ wrmsr(HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT,
+ shared_sint.as_uint64_t);
+
/* Enable the global synic bit */
sctrl.as_uint64_t = rdmsr(HV_X64_MSR_SCONTROL);
sctrl.u.enable = 1;
@@ -404,12 +367,23 @@ void hv_vmbus_synic_cleanup(void *arg)
shared_sint.u.masked = 1;
/*
- * Disable the interrupt
+ * Disable the interrupt 0
*/
wrmsr(
HV_X64_MSR_SINT0 + HV_VMBUS_MESSAGE_SINT,
shared_sint.as_uint64_t);
+ shared_sint.as_uint64_t = rdmsr(
+ HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT);
+
+ shared_sint.u.masked = 1;
+
+ /*
+ * Disable the interrupt 1
+ */
+ wrmsr(
+ HV_X64_MSR_SINT0 + HV_VMBUS_TIMER_SINT,
+ shared_sint.as_uint64_t);
simp.as_uint64_t = rdmsr(HV_X64_MSR_SIMP);
simp.u.simp_enabled = 0;
simp.u.base_simp_gpa = 0;
@@ -423,3 +397,117 @@ void hv_vmbus_synic_cleanup(void *arg)
wrmsr(HV_X64_MSR_SIEFP, siefp.as_uint64_t);
}
+static bool
+hyperv_identify(void)
+{
+ u_int regs[4];
+ unsigned int maxLeaf;
+ unsigned int op;
+
+ if (vm_guest != VM_GUEST_HV)
+ return (false);
+
+ op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION;
+ do_cpuid(op, regs);
+ maxLeaf = regs[0];
+ if (maxLeaf < HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS)
+ return (false);
+
+ op = HV_CPU_ID_FUNCTION_HV_INTERFACE;
+ do_cpuid(op, regs);
+ if (regs[0] != HYPERV_INTERFACE)
+ return (false);
+
+ op = HV_CPU_ID_FUNCTION_MS_HV_FEATURES;
+ do_cpuid(op, regs);
+ if ((regs[0] & HV_FEATURE_MSR_HYPERCALL) == 0) {
+ /*
+ * Hyper-V w/o Hypercall is impossible; someone
+ * is faking Hyper-V.
+ */
+ return (false);
+ }
+ hyperv_features = regs[0];
+ hyperv_pm_features = regs[2];
+ hyperv_features3 = regs[3];
+
+ op = HV_CPU_ID_FUNCTION_MS_HV_VERSION;
+ do_cpuid(op, regs);
+ printf("Hyper-V Version: %d.%d.%d [SP%d]\n",
+ regs[1] >> 16, regs[1] & 0xffff, regs[0], regs[2]);
+
+ printf(" Features=0x%b\n", hyperv_features,
+ "\020"
+ "\001VPRUNTIME" /* MSR_VP_RUNTIME */
+ "\002TMREFCNT" /* MSR_TIME_REF_COUNT */
+ "\003SYNIC" /* MSRs for SynIC */
+ "\004SYNTM" /* MSRs for SynTimer */
+ "\005APIC" /* MSR_{EOI,ICR,TPR} */
+ "\006HYPERCALL" /* MSR_{GUEST_OS_ID,HYPERCALL} */
+ "\007VPINDEX" /* MSR_VP_INDEX */
+ "\010RESET" /* MSR_RESET */
+ "\011STATS" /* MSR_STATS_ */
+ "\012REFTSC" /* MSR_REFERENCE_TSC */
+ "\013IDLE" /* MSR_GUEST_IDLE */
+ "\014TMFREQ" /* MSR_{TSC,APIC}_FREQUENCY */
+ "\015DEBUG"); /* MSR_SYNTH_DEBUG_ */
+ printf(" PM Features=max C%u, 0x%b\n",
+ HV_PM_FEATURE_CSTATE(hyperv_pm_features),
+ (hyperv_pm_features & ~HV_PM_FEATURE_CSTATE_MASK),
+ "\020"
+ "\005C3HPET"); /* HPET is required for C3 state */
+ printf(" Features3=0x%b\n", hyperv_features3,
+ "\020"
+ "\001MWAIT" /* MWAIT */
+ "\002DEBUG" /* guest debug support */
+ "\003PERFMON" /* performance monitor */
+ "\004PCPUDPE" /* physical CPU dynamic partition event */
+ "\005XMMHC" /* hypercall input through XMM regs */
+ "\006IDLE" /* guest idle support */
+ "\007SLEEP" /* hypervisor sleep support */
+ "\010NUMA" /* NUMA distance query support */
+ "\011TMFREQ" /* timer frequency query (TSC, LAPIC) */
+ "\012SYNCMC" /* inject synthetic machine checks */
+ "\013CRASH" /* MSRs for guest crash */
+ "\014DEBUGMSR" /* MSRs for guest debug */
+ "\015NPIEP" /* NPIEP */
+ "\016HVDIS"); /* disabling hypervisor */
+
+ op = HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION;
+ do_cpuid(op, regs);
+ hyperv_recommends = regs[0];
+ if (bootverbose)
+ printf(" Recommends: %08x %08x\n", regs[0], regs[1]);
+
+ op = HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS;
+ do_cpuid(op, regs);
+ if (bootverbose) {
+ printf(" Limits: Vcpu:%d Lcpu:%d Int:%d\n",
+ regs[0], regs[1], regs[2]);
+ }
+
+ if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE) {
+ op = HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE;
+ do_cpuid(op, regs);
+ if (bootverbose) {
+ printf(" HW Features: %08x AMD: %08x\n",
+ regs[0], regs[3]);
+ }
+ }
+
+ return (true);
+}
+
+static void
+hyperv_init(void *dummy __unused)
+{
+ if (!hyperv_identify())
+ return;
+
+ if (hyperv_features & HV_FEATURE_MSR_TIME_REFCNT) {
+ /* Register virtual timecount */
+ tc_init(&hv_timecounter);
+ }
+}
+SYSINIT(hyperv_initialize, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, hyperv_init,
+ NULL);
diff --git a/sys/dev/hyperv/vmbus/hv_ring_buffer.c b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
index 0e51ef7..cd82b27 100644
--- a/sys/dev/hyperv/vmbus/hv_ring_buffer.c
+++ b/sys/dev/hyperv/vmbus/hv_ring_buffer.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/sysctl.h>
#include "hv_vmbus_priv.h"
@@ -39,6 +40,47 @@ __FBSDID("$FreeBSD$");
#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
((z) - ((w) - (r))):((r) - (w))
+static int
+hv_rbi_sysctl_stats(SYSCTL_HANDLER_ARGS)
+{
+ hv_vmbus_ring_buffer_info* rbi;
+ uint32_t read_index, write_index, interrupt_mask, sz;
+ uint32_t read_avail, write_avail;
+ char rbi_stats[256];
+
+ rbi = (hv_vmbus_ring_buffer_info*)arg1;
+ read_index = rbi->ring_buffer->read_index;
+ write_index = rbi->ring_buffer->write_index;
+ interrupt_mask = rbi->ring_buffer->interrupt_mask;
+ sz = rbi->ring_data_size;
+ write_avail = HV_BYTES_AVAIL_TO_WRITE(read_index,
+ write_index, sz);
+ read_avail = sz - write_avail;
+ snprintf(rbi_stats, sizeof(rbi_stats),
+ "r_idx:%d "
+ "w_idx:%d "
+ "int_mask:%d "
+ "r_avail:%d "
+ "w_avail:%d",
+ read_index, write_index, interrupt_mask,
+ read_avail, write_avail);
+
+ return (sysctl_handle_string(oidp, rbi_stats,
+ sizeof(rbi_stats), req));
+}
+
+void
+hv_ring_buffer_stat(
+ struct sysctl_ctx_list *ctx,
+ struct sysctl_oid_list *tree_node,
+ hv_vmbus_ring_buffer_info *rbi,
+ const char *desc)
+{
+ SYSCTL_ADD_PROC(ctx, tree_node, OID_AUTO,
+ "ring_buffer_stats",
+ CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, rbi, 0,
+ hv_rbi_sysctl_stats, "A", desc);
+}
/**
* @brief Get number of bytes available to read and to write to
* for the specified ring buffer
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
index c8d6894..e274d59 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/pcpu.h>
#include <machine/apicvar.h>
+#include <dev/hyperv/include/hyperv.h>
#include "hv_vmbus_priv.h"
#include <contrib/dev/acpica/include/acpi.h>
@@ -75,7 +76,7 @@ static char *vmbus_ids[] = { "VMBUS", NULL };
* the hypervisor.
*/
static void
-vmbus_msg_swintr(void *arg)
+vmbus_msg_swintr(void *arg, int pending __unused)
{
int cpu;
void* page_addr;
@@ -116,8 +117,12 @@ handled:
* message_pending and EOMing. Otherwise, the EOMing will
* not deliver any more messages
* since there is no empty slot
+ *
+ * NOTE:
+ * mb() is used here, since atomic_thread_fence_seq_cst()
+ * will become compiler fence on UP kernel.
*/
- wmb();
+ mb();
if (msg->header.message_flags.u.message_pending) {
/*
@@ -140,7 +145,6 @@ hv_vmbus_isr(struct trapframe *frame)
{
int cpu;
hv_vmbus_message* msg;
- hv_vmbus_synic_event_flags* event;
void* page_addr;
cpu = PCPU_GET(cpuid);
@@ -151,43 +155,31 @@ hv_vmbus_isr(struct trapframe *frame)
* in Windows when running as a guest in Hyper-V
*/
- page_addr = hv_vmbus_g_context.syn_ic_event_page[cpu];
- event = (hv_vmbus_synic_event_flags*)
- page_addr + HV_VMBUS_MESSAGE_SINT;
-
- if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) ||
- (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) {
- /* Since we are a child, we only need to check bit 0 */
- if (synch_test_and_clear_bit(0, &event->flags32[0])) {
- hv_vmbus_on_events(cpu);
- }
- } else {
- /*
- * On host with Win8 or above, we can directly look at
- * the event page. If bit n is set, we have an interrupt
- * on the channel with id n.
- * Directly schedule the event software interrupt on
- * current cpu.
- */
- hv_vmbus_on_events(cpu);
- }
+ hv_vmbus_on_events(cpu);
/* Check if there are actual msgs to be process */
page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu];
- msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
+ msg = (hv_vmbus_message*) page_addr + HV_VMBUS_TIMER_SINT;
/* we call eventtimer process the message */
if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) {
msg->header.message_type = HV_MESSAGE_TYPE_NONE;
+ /* call intrrupt handler of event timer */
+ hv_et_intr(frame);
+
/*
* Make sure the write to message_type (ie set to
* HV_MESSAGE_TYPE_NONE) happens before we read the
* message_pending and EOMing. Otherwise, the EOMing will
* not deliver any more messages
* since there is no empty slot
+ *
+ * NOTE:
+ * mb() is used here, since atomic_thread_fence_seq_cst()
+ * will become compiler fence on UP kernel.
*/
- wmb();
+ mb();
if (msg->header.message_flags.u.message_pending) {
/*
@@ -196,12 +188,12 @@ hv_vmbus_isr(struct trapframe *frame)
*/
wrmsr(HV_X64_MSR_EOM, 0);
}
- hv_et_intr(frame);
- return (FILTER_HANDLED);
}
+ msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT;
if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) {
- swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0);
+ taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[cpu],
+ &hv_vmbus_g_context.hv_msg_task[cpu]);
}
return (FILTER_HANDLED);
@@ -279,6 +271,26 @@ vmbus_write_ivar(
return (ENOENT);
}
+static int
+vmbus_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen)
+{
+ char guidbuf[40];
+ struct hv_device *dev_ctx = device_get_ivars(child);
+
+ if (dev_ctx == NULL)
+ return (0);
+
+ strlcat(buf, "classid=", buflen);
+ snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->class_id);
+ strlcat(buf, guidbuf, buflen);
+
+ strlcat(buf, " deviceid=", buflen);
+ snprintf_hv_guid(guidbuf, sizeof(guidbuf), &dev_ctx->device_id);
+ strlcat(buf, guidbuf, buflen);
+
+ return (0);
+}
+
struct hv_device*
hv_vmbus_child_device_create(
hv_guid type,
@@ -300,34 +312,34 @@ hv_vmbus_child_device_create(
return (child_dev);
}
-static void
-print_dev_guid(struct hv_device *dev)
+int
+snprintf_hv_guid(char *buf, size_t sz, const hv_guid *guid)
{
- int i;
- unsigned char guid_name[100];
- for (i = 0; i < 32; i += 2)
- sprintf(&guid_name[i], "%02x", dev->class_id.data[i / 2]);
- if(bootverbose)
- printf("VMBUS: Class ID: %s\n", guid_name);
+ int cnt;
+ const unsigned char *d = guid->data;
+
+ cnt = snprintf(buf, sz,
+ "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ d[3], d[2], d[1], d[0], d[5], d[4], d[7], d[6],
+ d[8], d[9], d[10], d[11], d[12], d[13], d[14], d[15]);
+ return (cnt);
}
int
hv_vmbus_child_device_register(struct hv_device *child_dev)
{
device_t child;
- int ret = 0;
-
- print_dev_guid(child_dev);
+ if (bootverbose) {
+ char name[40];
+ snprintf_hv_guid(name, sizeof(name), &child_dev->class_id);
+ printf("VMBUS: Class ID: %s\n", name);
+ }
child = device_add_child(vmbus_devp, NULL, -1);
child_dev->device = child;
device_set_ivars(child, child_dev);
- mtx_lock(&Giant);
- ret = device_probe_and_attach(child);
- mtx_unlock(&Giant);
-
return (0);
}
@@ -356,7 +368,6 @@ vmbus_probe(device_t dev) {
return (BUS_PROBE_DEFAULT);
}
-#ifdef HYPERV
extern inthand_t IDTVEC(rsvd), IDTVEC(hv_vmbus_callback);
/**
@@ -416,21 +427,6 @@ vmbus_vector_free(int vector)
setidt(vector, IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
}
-#else /* HYPERV */
-
-static int
-vmbus_vector_alloc(void)
-{
- return(0);
-}
-
-static void
-vmbus_vector_free(int vector)
-{
-}
-
-#endif /* HYPERV */
-
static void
vmbus_cpuset_setthread_task(void *xmask, int pending __unused)
{
@@ -498,9 +494,6 @@ vmbus_bus_init(void)
setup_args.vector = hv_vmbus_g_context.hv_cb_vector;
CPU_FOREACH(j) {
- hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
- hv_vmbus_g_context.msg_swintr[j] = NULL;
-
snprintf(buf, sizeof(buf), "cpu%d:hyperv", j);
intrcnt_add(buf, &hv_vmbus_intr_cpu[j]);
@@ -519,11 +512,6 @@ vmbus_bus_init(void)
*/
hv_vmbus_g_context.hv_event_queue[j] = taskqueue_create_fast("hyperv event", M_WAITOK,
taskqueue_thread_enqueue, &hv_vmbus_g_context.hv_event_queue[j]);
- if (hv_vmbus_g_context.hv_event_queue[j] == NULL) {
- if (bootverbose)
- printf("VMBUS: failed to setup taskqueue\n");
- goto cleanup1;
- }
taskqueue_start_threads(&hv_vmbus_g_context.hv_event_queue[j], 1, PI_NET,
"hvevent%d", j);
@@ -533,29 +521,20 @@ vmbus_bus_init(void)
taskqueue_drain(hv_vmbus_g_context.hv_event_queue[j], &cpuset_task);
/*
- * Setup software interrupt thread and handler for msg handling.
+ * Setup per-cpu tasks and taskqueues to handle msg.
*/
- ret = swi_add(&hv_vmbus_g_context.hv_msg_intr_event[j],
- "hv_msg", vmbus_msg_swintr, (void *)(long)j, SWI_CLOCK, 0,
- &hv_vmbus_g_context.msg_swintr[j]);
- if (ret) {
- if(bootverbose)
- printf("VMBUS: failed to setup msg swi for "
- "cpu %d\n", j);
- goto cleanup1;
- }
+ hv_vmbus_g_context.hv_msg_tq[j] = taskqueue_create_fast(
+ "hyperv msg", M_WAITOK, taskqueue_thread_enqueue,
+ &hv_vmbus_g_context.hv_msg_tq[j]);
+ taskqueue_start_threads(&hv_vmbus_g_context.hv_msg_tq[j], 1, PI_NET,
+ "hvmsg%d", j);
+ TASK_INIT(&hv_vmbus_g_context.hv_msg_task[j], 0,
+ vmbus_msg_swintr, (void *)(long)j);
- /*
- * Bind the swi thread to the cpu.
- */
- ret = intr_event_bind(hv_vmbus_g_context.hv_msg_intr_event[j],
- j);
- if (ret) {
- if(bootverbose)
- printf("VMBUS: failed to bind msg swi thread "
- "to cpu %d\n", j);
- goto cleanup1;
- }
+ CPU_SETOF(j, &cpu_mask);
+ TASK_INIT(&cpuset_task, 0, vmbus_cpuset_setthread_task, &cpu_mask);
+ taskqueue_enqueue(hv_vmbus_g_context.hv_msg_tq[j], &cpuset_task);
+ taskqueue_drain(hv_vmbus_g_context.hv_msg_tq[j], &cpuset_task);
/*
* Prepare the per cpu msg and event pages to be called on each cpu.
@@ -581,6 +560,11 @@ vmbus_bus_init(void)
goto cleanup1;
hv_vmbus_request_channel_offers();
+
+ vmbus_scan();
+ bus_generic_attach(vmbus_devp);
+ device_printf(vmbus_devp, "device scan, probe and attach done\n");
+
return (ret);
cleanup1:
@@ -595,11 +579,10 @@ vmbus_bus_init(void)
* remove swi and vmbus callback vector;
*/
CPU_FOREACH(j) {
- if (hv_vmbus_g_context.hv_event_queue[j] != NULL)
+ if (hv_vmbus_g_context.hv_event_queue[j] != NULL) {
taskqueue_free(hv_vmbus_g_context.hv_event_queue[j]);
- if (hv_vmbus_g_context.msg_swintr[j] != NULL)
- swi_remove(hv_vmbus_g_context.msg_swintr[j]);
- hv_vmbus_g_context.hv_msg_intr_event[j] = NULL;
+ hv_vmbus_g_context.hv_event_queue[j] = NULL;
+ }
}
vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
@@ -626,6 +609,7 @@ vmbus_attach(device_t dev)
if (!cold)
vmbus_bus_init();
+ bus_generic_probe(dev);
return (0);
}
@@ -656,7 +640,7 @@ vmbus_bus_exit(void)
smp_rendezvous(NULL, hv_vmbus_synic_cleanup, NULL, NULL);
for(i = 0; i < 2 * MAXCPU; i++) {
- if (setup_args.page_buffers[i] != 0)
+ if (setup_args.page_buffers[i] != NULL)
free(setup_args.page_buffers[i], M_DEVBUF);
}
@@ -664,11 +648,10 @@ vmbus_bus_exit(void)
/* remove swi */
CPU_FOREACH(i) {
- if (hv_vmbus_g_context.hv_event_queue[i] != NULL)
+ if (hv_vmbus_g_context.hv_event_queue[i] != NULL) {
taskqueue_free(hv_vmbus_g_context.hv_event_queue[i]);
- if (hv_vmbus_g_context.msg_swintr[i] != NULL)
- swi_remove(hv_vmbus_g_context.msg_swintr[i]);
- hv_vmbus_g_context.hv_msg_intr_event[i] = NULL;
+ hv_vmbus_g_context.hv_event_queue[i] = NULL;
+ }
}
vmbus_vector_free(hv_vmbus_g_context.hv_cb_vector);
@@ -733,6 +716,7 @@ static device_method_t vmbus_methods[] = {
DEVMETHOD(bus_print_child, bus_generic_print_child),
DEVMETHOD(bus_read_ivar, vmbus_read_ivar),
DEVMETHOD(bus_write_ivar, vmbus_write_ivar),
+ DEVMETHOD(bus_child_pnpinfo_str, vmbus_child_pnpinfo_str),
{ 0, 0 } };
diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
index 5f62072..f83102a 100644
--- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
+++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2009-2012,2016 Microsoft Corp.
* Copyright (c) 2012 NetApp Inc.
* Copyright (c) 2012 Citrix Inc.
* All rights reserved.
@@ -70,6 +70,7 @@ typedef uint16_t hv_vmbus_status;
* You did not supply enough message buffers to send a message.
*/
+#define HV_STATUS_SUCCESS ((uint16_t)0)
#define HV_STATUS_INSUFFICIENT_BUFFERS ((uint16_t)0x0013)
typedef void (*hv_vmbus_channel_callback)(void *context);
@@ -180,7 +181,8 @@ enum {
HV_VMBUS_EVENT_PORT_ID = 2,
HV_VMBUS_MONITOR_CONNECTION_ID = 3,
HV_VMBUS_MONITOR_PORT_ID = 3,
- HV_VMBUS_MESSAGE_SINT = 2
+ HV_VMBUS_MESSAGE_SINT = 2,
+ HV_VMBUS_TIMER_SINT = 4,
};
#define HV_PRESENT_BIT 0x80000000
@@ -203,8 +205,8 @@ typedef struct {
* event and msg handling.
*/
struct taskqueue *hv_event_queue[MAXCPU];
- struct intr_event *hv_msg_intr_event[MAXCPU];
- void *msg_swintr[MAXCPU];
+ struct taskqueue *hv_msg_tq[MAXCPU];
+ struct task hv_msg_task[MAXCPU];
/*
* Host use this vector to intrrupt guest for vmbus channel
* event and msg.
@@ -469,10 +471,28 @@ typedef enum {
HV_CPU_ID_FUNCTION_MS_HV_VERSION = 0x40000002,
HV_CPU_ID_FUNCTION_MS_HV_FEATURES = 0x40000003,
HV_CPU_ID_FUNCTION_MS_HV_ENLIGHTENMENT_INFORMATION = 0x40000004,
- HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005
-
+ HV_CPU_ID_FUNCTION_MS_HV_IMPLEMENTATION_LIMITS = 0x40000005,
+ HV_CPU_ID_FUNCTION_MS_HV_HARDWARE_FEATURE = 0x40000006
} hv_vmbus_cpuid_function;
+#define HV_FEATURE_MSR_TIME_REFCNT 0x0002 /* MSR_TIME_REF_COUNT */
+#define HV_FEATURE_MSR_SYNIC 0x0004 /* MSRs for SynIC */
+#define HV_FEATURE_MSR_SYNTIMER 0x0008 /* MSRs for SynTimer */
+#define HV_FEATURE_MSR_APIC 0x0010 /* MSR_{EOI,ICR,TPR} */
+#define HV_FEATURE_MSR_HYPERCALL 0x0020 /* MSR_{GUEST_OS_ID,HYPERCALL} */
+#define HV_FEATURE_MSR_GUEST_IDLE 0x0400 /* MSR_GUEST_IDLE */
+
+#define HV_PM_FEATURE_CSTATE_MASK 0x000f
+#define HV_PM_FEATURE_C3_HPET 0x0010 /* C3 requires HPET */
+#define HV_PM_FEATURE_CSTATE(f) ((f) & HV_PM_FEATURE_CSTATE_MASK)
+
+#define HV_FEATURE3_MWAIT 0x0001 /* MWAIT */
+#define HV_FEATURE3_XMM_HYPERCALL 0x0010 /* hypercall input through XMM regs */
+#define HV_FEATURE3_GUEST_IDLE 0x0020 /* guest idle support */
+#define HV_FEATURE3_NUMA 0x0080 /* NUMA distance query support */
+#define HV_FEATURE3_TIME_FREQ 0x0100 /* timer frequency query (TSC, LAPIC) */
+#define HV_FEATURE3_MSR_CRASH 0x0400 /* MSRs for guest crash */
+
/*
* Define the format of the SIMP register
*/
@@ -626,6 +646,9 @@ typedef enum {
extern hv_vmbus_context hv_vmbus_g_context;
extern hv_vmbus_connection hv_vmbus_g_connection;
+extern u_int hyperv_features;
+extern u_int hyperv_recommends;
+
typedef void (*vmbus_msg_handler)(hv_vmbus_channel_msg_header *msg);
typedef struct hv_vmbus_channel_msg_table_entry {
@@ -639,6 +662,14 @@ extern hv_vmbus_channel_msg_table_entry g_channel_message_table[];
/*
* Private, VM Bus functions
*/
+struct sysctl_ctx_list;
+struct sysctl_oid_list;
+
+void hv_ring_buffer_stat(
+ struct sysctl_ctx_list *ctx,
+ struct sysctl_oid_list *tree_node,
+ hv_vmbus_ring_buffer_info *rbi,
+ const char *desc);
int hv_vmbus_ring_buffer_init(
hv_vmbus_ring_buffer_info *ring_info,
@@ -694,7 +725,6 @@ uint16_t hv_vmbus_post_msg_via_msg_ipc(
uint16_t hv_vmbus_signal_event(void *con_id);
void hv_vmbus_synic_init(void *irq_arg);
void hv_vmbus_synic_cleanup(void *arg);
-int hv_vmbus_query_hypervisor_presence(void);
struct hv_device* hv_vmbus_child_device_create(
hv_guid device_type,
@@ -721,6 +751,9 @@ void hv_vmbus_on_events(int cpu);
void hv_et_init(void);
void hv_et_intr(struct trapframe*);
+/* Wait for device creation */
+void vmbus_scan(void);
+
/*
* The guest OS needs to register the guest ID with the hypervisor.
* The guest ID is a 64 bit entity and the structure of this ID is
diff --git a/sys/dev/hyperv/vmbus/i386/hv_vector.S b/sys/dev/hyperv/vmbus/i386/hv_vector.S
new file mode 100644
index 0000000..55a2613
--- /dev/null
+++ b/sys/dev/hyperv/vmbus/i386/hv_vector.S
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 2016 Microsoft Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asmacros.h>
+#include <machine/specialreg.h>
+
+#include "assym.s"
+
+/*
+ * This is the Hyper-V vmbus channel direct callback interrupt.
+ * Only used when it is running on Hyper-V.
+ */
+ .text
+ SUPERALIGN_TEXT
+IDTVEC(hv_vmbus_callback)
+ PUSH_FRAME
+ SET_KERNEL_SREGS
+ cld
+ FAKE_MCOUNT(TF_EIP(%esp))
+ pushl %esp
+ call hv_vector_handler
+ add $4, %esp
+ MEXITCOUNT
+ jmp doreti
OpenPOWER on IntegriCloud