diff options
Diffstat (limited to 'sys/dev/hyperv/netvsc/hv_net_vsc.c')
-rw-r--r-- | sys/dev/hyperv/netvsc/hv_net_vsc.c | 1141 |
1 files changed, 1141 insertions, 0 deletions
diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c new file mode 100644 index 0000000..aeee94d --- /dev/null +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -0,0 +1,1141 @@ +/*- + * Copyright (c) 2009-2012 Microsoft Corp. + * Copyright (c) 2010-2012 Citrix Inc. + * Copyright (c) 2012 NetApp Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * HyperV vmbus network VSC (virtual services client) module + * + */ + + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/socket.h> +#include <sys/lock.h> +#include <net/if.h> +#include <net/if_arp.h> +#include <machine/bus.h> +#include <machine/atomic.h> + +#include <dev/hyperv/include/hyperv.h> +#include "hv_net_vsc.h" +#include "hv_rndis.h" +#include "hv_rndis_filter.h" + + +/* + * Forward declarations + */ +static void hv_nv_on_channel_callback(void *context); +static int hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device); +static int hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device); +static int hv_nv_destroy_send_buffer(netvsc_dev *net_dev); +static int hv_nv_destroy_rx_buffer(netvsc_dev *net_dev); +static int hv_nv_connect_to_vsp(struct hv_device *device); +static void hv_nv_on_send_completion(struct hv_device *device, + hv_vm_packet_descriptor *pkt); +static void hv_nv_on_receive(struct hv_device *device, + hv_vm_packet_descriptor *pkt); +static void hv_nv_send_receive_completion(struct hv_device *device, + uint64_t tid); + + +/* + * + */ +static inline netvsc_dev * +hv_nv_alloc_net_device(struct hv_device *device) +{ + netvsc_dev *net_dev; + hn_softc_t *sc = device_get_softc(device->device); + + net_dev = malloc(sizeof(netvsc_dev), M_DEVBUF, M_NOWAIT | M_ZERO); + if (net_dev == NULL) { + return (NULL); + } + + net_dev->dev = device; + net_dev->destroy = FALSE; + sc->net_dev = net_dev; + + return (net_dev); +} + +/* + * + */ +static inline netvsc_dev * +hv_nv_get_outbound_net_device(struct hv_device *device) +{ + hn_softc_t *sc = device_get_softc(device->device); + netvsc_dev *net_dev = sc->net_dev;; + + if ((net_dev != NULL) && net_dev->destroy) { + return (NULL); + } + + return (net_dev); +} + +/* + * + */ +static inline netvsc_dev * +hv_nv_get_inbound_net_device(struct hv_device *device) +{ + hn_softc_t *sc = device_get_softc(device->device); + netvsc_dev *net_dev = sc->net_dev;; + + if (net_dev == NULL) { + return (net_dev); + } + /* + * When the device is being destroyed; we only + * permit incoming packets if and only if there + * are outstanding sends. + */ + if (net_dev->destroy && net_dev->num_outstanding_sends == 0) { + return (NULL); + } + + return (net_dev); +} + +/* + * Net VSC initialize receive buffer with net VSP + * + * Net VSP: Network virtual services client, also known as the + * Hyper-V extensible switch and the synthetic data path. + */ +static int +hv_nv_init_rx_buffer_with_net_vsp(struct hv_device *device) +{ + netvsc_dev *net_dev; + nvsp_msg *init_pkt; + int ret = 0; + + net_dev = hv_nv_get_outbound_net_device(device); + if (!net_dev) { + return (ENODEV); + } + + net_dev->rx_buf = contigmalloc(net_dev->rx_buf_size, M_DEVBUF, + M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); + if (net_dev->rx_buf == NULL) { + ret = ENOMEM; + goto cleanup; + } + + /* + * Establish the GPADL handle for this buffer on this channel. + * Note: This call uses the vmbus connection rather than the + * channel to establish the gpadl handle. + * GPADL: Guest physical address descriptor list. + */ + ret = hv_vmbus_channel_establish_gpadl( + device->channel, net_dev->rx_buf, + net_dev->rx_buf_size, &net_dev->rx_buf_gpadl_handle); + if (ret != 0) { + goto cleanup; + } + + /* sema_wait(&ext->channel_init_sema); KYS CHECK */ + + /* Notify the NetVsp of the gpadl handle */ + init_pkt = &net_dev->channel_init_packet; + + memset(init_pkt, 0, sizeof(nvsp_msg)); + + init_pkt->hdr.msg_type = nvsp_msg_1_type_send_rx_buf; + init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = + net_dev->rx_buf_gpadl_handle; + init_pkt->msgs.vers_1_msgs.send_rx_buf.id = + NETVSC_RECEIVE_BUFFER_ID; + + /* Send the gpadl notification request */ + + ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, + sizeof(nvsp_msg), (uint64_t)init_pkt, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, + HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret != 0) { + goto cleanup; + } + + sema_wait(&net_dev->channel_init_sema); + + /* Check the response */ + if (init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.status + != nvsp_status_success) { + ret = EINVAL; + goto cleanup; + } + + net_dev->rx_section_count = + init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.num_sections; + + net_dev->rx_sections = malloc(net_dev->rx_section_count * + sizeof(nvsp_1_rx_buf_section), M_DEVBUF, M_NOWAIT); + if (net_dev->rx_sections == NULL) { + ret = EINVAL; + goto cleanup; + } + memcpy(net_dev->rx_sections, + init_pkt->msgs.vers_1_msgs.send_rx_buf_complete.sections, + net_dev->rx_section_count * sizeof(nvsp_1_rx_buf_section)); + + + /* + * For first release, there should only be 1 section that represents + * the entire receive buffer + */ + if (net_dev->rx_section_count != 1 + || net_dev->rx_sections->offset != 0) { + ret = EINVAL; + goto cleanup; + } + + goto exit; + +cleanup: + hv_nv_destroy_rx_buffer(net_dev); + +exit: + return (ret); +} + +/* + * Net VSC initialize send buffer with net VSP + */ +static int +hv_nv_init_send_buffer_with_net_vsp(struct hv_device *device) +{ + netvsc_dev *net_dev; + nvsp_msg *init_pkt; + int ret = 0; + + net_dev = hv_nv_get_outbound_net_device(device); + if (!net_dev) { + return (ENODEV); + } + + net_dev->send_buf = contigmalloc(net_dev->send_buf_size, M_DEVBUF, + M_ZERO, 0UL, BUS_SPACE_MAXADDR, PAGE_SIZE, 0); + if (net_dev->send_buf == NULL) { + ret = ENOMEM; + goto cleanup; + } + + /* + * Establish the gpadl handle for this buffer on this channel. + * Note: This call uses the vmbus connection rather than the + * channel to establish the gpadl handle. + */ + ret = hv_vmbus_channel_establish_gpadl(device->channel, + net_dev->send_buf, net_dev->send_buf_size, + &net_dev->send_buf_gpadl_handle); + if (ret != 0) { + goto cleanup; + } + + /* Notify the NetVsp of the gpadl handle */ + + init_pkt = &net_dev->channel_init_packet; + + memset(init_pkt, 0, sizeof(nvsp_msg)); + + init_pkt->hdr.msg_type = nvsp_msg_1_type_send_send_buf; + init_pkt->msgs.vers_1_msgs.send_rx_buf.gpadl_handle = + net_dev->send_buf_gpadl_handle; + init_pkt->msgs.vers_1_msgs.send_rx_buf.id = + NETVSC_SEND_BUFFER_ID; + + /* Send the gpadl notification request */ + + ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, + sizeof(nvsp_msg), (uint64_t)init_pkt, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, + HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret != 0) { + goto cleanup; + } + + sema_wait(&net_dev->channel_init_sema); + + /* Check the response */ + if (init_pkt->msgs.vers_1_msgs.send_send_buf_complete.status + != nvsp_status_success) { + ret = EINVAL; + goto cleanup; + } + + net_dev->send_section_size = + init_pkt->msgs.vers_1_msgs.send_send_buf_complete.section_size; + + goto exit; + +cleanup: + hv_nv_destroy_send_buffer(net_dev); + +exit: + return (ret); +} + +/* + * Net VSC destroy receive buffer + */ +static int +hv_nv_destroy_rx_buffer(netvsc_dev *net_dev) +{ + nvsp_msg *revoke_pkt; + int ret = 0; + + /* + * If we got a section count, it means we received a + * send_rx_buf_complete msg + * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, + * we need to send a revoke msg here + */ + if (net_dev->rx_section_count) { + /* Send the revoke receive buffer */ + revoke_pkt = &net_dev->revoke_packet; + memset(revoke_pkt, 0, sizeof(nvsp_msg)); + + revoke_pkt->hdr.msg_type = nvsp_msg_1_type_revoke_rx_buf; + revoke_pkt->msgs.vers_1_msgs.revoke_rx_buf.id = + NETVSC_RECEIVE_BUFFER_ID; + + ret = hv_vmbus_channel_send_packet(net_dev->dev->channel, + revoke_pkt, sizeof(nvsp_msg), + (uint64_t)revoke_pkt, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); + + /* + * If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + return (ret); + } + } + + /* Tear down the gpadl on the vsp end */ + if (net_dev->rx_buf_gpadl_handle) { + ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel, + net_dev->rx_buf_gpadl_handle); + /* + * If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + return (ret); + } + net_dev->rx_buf_gpadl_handle = 0; + } + + if (net_dev->rx_buf) { + /* Free up the receive buffer */ + contigfree(net_dev->rx_buf, net_dev->rx_buf_size, M_DEVBUF); + net_dev->rx_buf = NULL; + } + + if (net_dev->rx_sections) { + free(net_dev->rx_sections, M_DEVBUF); + net_dev->rx_sections = NULL; + net_dev->rx_section_count = 0; + } + + return (ret); +} + +/* + * Net VSC destroy send buffer + */ +static int +hv_nv_destroy_send_buffer(netvsc_dev *net_dev) +{ + nvsp_msg *revoke_pkt; + int ret = 0; + + /* + * If we got a section count, it means we received a + * send_rx_buf_complete msg + * (ie sent nvsp_msg_1_type_send_rx_buf msg) therefore, + * we need to send a revoke msg here + */ + if (net_dev->send_section_size) { + /* Send the revoke send buffer */ + revoke_pkt = &net_dev->revoke_packet; + memset(revoke_pkt, 0, sizeof(nvsp_msg)); + + revoke_pkt->hdr.msg_type = + nvsp_msg_1_type_revoke_send_buf; + revoke_pkt->msgs.vers_1_msgs.revoke_send_buf.id = + NETVSC_SEND_BUFFER_ID; + + ret = hv_vmbus_channel_send_packet(net_dev->dev->channel, + revoke_pkt, sizeof(nvsp_msg), + (uint64_t)revoke_pkt, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); + /* + * If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + return (ret); + } + } + + /* Tear down the gpadl on the vsp end */ + if (net_dev->send_buf_gpadl_handle) { + ret = hv_vmbus_channel_teardown_gpdal(net_dev->dev->channel, + net_dev->send_buf_gpadl_handle); + + /* + * If we failed here, we might as well return and have a leak + * rather than continue and a bugchk + */ + if (ret != 0) { + return (ret); + } + net_dev->send_buf_gpadl_handle = 0; + } + + if (net_dev->send_buf) { + /* Free up the receive buffer */ + contigfree(net_dev->send_buf, net_dev->send_buf_size, M_DEVBUF); + net_dev->send_buf = NULL; + } + + return (ret); +} + + +/* + * Attempt to negotiate the caller-specified NVSP version + * + * For NVSP v2, Server 2008 R2 does not set + * init_pkt->msgs.init_msgs.init_compl.negotiated_prot_vers + * to the negotiated version, so we cannot rely on that. + */ +static int +hv_nv_negotiate_nvsp_protocol(struct hv_device *device, netvsc_dev *net_dev, + uint32_t nvsp_ver) +{ + nvsp_msg *init_pkt; + int ret; + + init_pkt = &net_dev->channel_init_packet; + memset(init_pkt, 0, sizeof(nvsp_msg)); + init_pkt->hdr.msg_type = nvsp_msg_type_init; + + /* + * Specify parameter as the only acceptable protocol version + */ + init_pkt->msgs.init_msgs.init.p1.protocol_version = nvsp_ver; + init_pkt->msgs.init_msgs.init.protocol_version_2 = nvsp_ver; + + /* Send the init request */ + ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, + sizeof(nvsp_msg), (uint64_t)init_pkt, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, + HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + if (ret != 0) + return (-1); + + sema_wait(&net_dev->channel_init_sema); + + if (init_pkt->msgs.init_msgs.init_compl.status != nvsp_status_success) + return (EINVAL); + + return (0); +} + +/* + * Send NDIS version 2 config packet containing MTU. + * + * Not valid for NDIS version 1. + */ +static int +hv_nv_send_ndis_config(struct hv_device *device, uint32_t mtu) +{ + netvsc_dev *net_dev; + nvsp_msg *init_pkt; + int ret; + + net_dev = hv_nv_get_outbound_net_device(device); + if (!net_dev) + return (-ENODEV); + + /* + * Set up configuration packet, write MTU + * Indicate we are capable of handling VLAN tags + */ + init_pkt = &net_dev->channel_init_packet; + memset(init_pkt, 0, sizeof(nvsp_msg)); + init_pkt->hdr.msg_type = nvsp_msg_2_type_send_ndis_config; + init_pkt->msgs.vers_2_msgs.send_ndis_config.mtu = mtu; + init_pkt-> + msgs.vers_2_msgs.send_ndis_config.capabilities.u1.u2.ieee8021q + = 1; + + /* Send the configuration packet */ + ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, + sizeof(nvsp_msg), (uint64_t)init_pkt, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); + if (ret != 0) + return (-EINVAL); + + return (0); +} + +/* + * Net VSC connect to VSP + */ +static int +hv_nv_connect_to_vsp(struct hv_device *device) +{ + netvsc_dev *net_dev; + nvsp_msg *init_pkt; + uint32_t nvsp_vers; + uint32_t ndis_version; + int ret = 0; + device_t dev = device->device; + hn_softc_t *sc = device_get_softc(dev); + struct ifnet *ifp = sc->arpcom.ac_ifp; + + net_dev = hv_nv_get_outbound_net_device(device); + if (!net_dev) { + return (ENODEV); + } + + /* + * Negotiate the NVSP version. Try NVSP v2 first. + */ + nvsp_vers = NVSP_PROTOCOL_VERSION_2; + ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers); + if (ret != 0) { + /* NVSP v2 failed, try NVSP v1 */ + nvsp_vers = NVSP_PROTOCOL_VERSION_1; + ret = hv_nv_negotiate_nvsp_protocol(device, net_dev, nvsp_vers); + if (ret != 0) { + /* NVSP v1 failed, return bad status */ + return (ret); + } + } + net_dev->nvsp_version = nvsp_vers; + + /* + * Set the MTU if supported by this NVSP protocol version + * This needs to be right after the NVSP init message per Haiyang + */ + if (nvsp_vers >= NVSP_PROTOCOL_VERSION_2) + ret = hv_nv_send_ndis_config(device, ifp->if_mtu); + + /* + * Send the NDIS version + */ + init_pkt = &net_dev->channel_init_packet; + + memset(init_pkt, 0, sizeof(nvsp_msg)); + + /* + * Updated to version 5.1, minimum, for VLAN per Haiyang + */ + ndis_version = NDIS_VERSION; + + init_pkt->hdr.msg_type = nvsp_msg_1_type_send_ndis_vers; + init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_major_vers = + (ndis_version & 0xFFFF0000) >> 16; + init_pkt->msgs.vers_1_msgs.send_ndis_vers.ndis_minor_vers = + ndis_version & 0xFFFF; + + /* Send the init request */ + + ret = hv_vmbus_channel_send_packet(device->channel, init_pkt, + sizeof(nvsp_msg), (uint64_t)init_pkt, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, 0); + if (ret != 0) { + goto cleanup; + } + /* + * TODO: BUGBUG - We have to wait for the above msg since the netvsp + * uses KMCL which acknowledges packet (completion packet) + * since our Vmbus always set the + * HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED flag + */ + /* sema_wait(&NetVscChannel->channel_init_sema); */ + + /* Post the big receive buffer to NetVSP */ + ret = hv_nv_init_rx_buffer_with_net_vsp(device); + if (ret == 0) + ret = hv_nv_init_send_buffer_with_net_vsp(device); + +cleanup: + return (ret); +} + +/* + * Net VSC disconnect from VSP + */ +static void +hv_nv_disconnect_from_vsp(netvsc_dev *net_dev) +{ + hv_nv_destroy_rx_buffer(net_dev); + hv_nv_destroy_send_buffer(net_dev); +} + +/* + * Net VSC on device add + * + * Callback when the device belonging to this driver is added + */ +netvsc_dev * +hv_nv_on_device_add(struct hv_device *device, void *additional_info) +{ + netvsc_dev *net_dev; + netvsc_packet *packet; + netvsc_packet *next_packet; + int i, ret = 0; + + net_dev = hv_nv_alloc_net_device(device); + if (!net_dev) + goto cleanup; + + /* Initialize the NetVSC channel extension */ + net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; + mtx_init(&net_dev->rx_pkt_list_lock, "HV-RPL", NULL, + MTX_SPIN | MTX_RECURSE); + + net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; + + /* Same effect as STAILQ_HEAD_INITIALIZER() static initializer */ + STAILQ_INIT(&net_dev->myrx_packet_list); + + /* + * malloc a sufficient number of netvsc_packet buffers to hold + * a packet list. Add them to the netvsc device packet queue. + */ + for (i=0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) { + packet = malloc(sizeof(netvsc_packet) + + (NETVSC_RECEIVE_SG_COUNT * sizeof(hv_vmbus_page_buffer)), + M_DEVBUF, M_NOWAIT | M_ZERO); + if (!packet) { + break; + } + STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, + mylist_entry); + } + + sema_init(&net_dev->channel_init_sema, 0, "netdev_sema"); + + /* + * Open the channel + */ + ret = hv_vmbus_channel_open(device->channel, + NETVSC_DEVICE_RING_BUFFER_SIZE, NETVSC_DEVICE_RING_BUFFER_SIZE, + NULL, 0, hv_nv_on_channel_callback, device); + if (ret != 0) + goto cleanup; + + /* + * Connect with the NetVsp + */ + ret = hv_nv_connect_to_vsp(device); + if (ret != 0) + goto close; + + return (net_dev); + +close: + /* Now, we can close the channel safely */ + + hv_vmbus_channel_close(device->channel); + +cleanup: + /* + * Free the packet buffers on the netvsc device packet queue. + * Release other resources. + */ + if (net_dev) { + sema_destroy(&net_dev->channel_init_sema); + + packet = STAILQ_FIRST(&net_dev->myrx_packet_list); + while (packet != NULL) { + next_packet = STAILQ_NEXT(packet, mylist_entry); + free(packet, M_DEVBUF); + packet = next_packet; + } + /* Reset the list to initial state */ + STAILQ_INIT(&net_dev->myrx_packet_list); + + mtx_destroy(&net_dev->rx_pkt_list_lock); + + free(net_dev, M_DEVBUF); + } + + return (NULL); +} + +/* + * Net VSC on device remove + */ +int +hv_nv_on_device_remove(struct hv_device *device, boolean_t destroy_channel) +{ + netvsc_packet *net_vsc_pkt; + netvsc_packet *next_net_vsc_pkt; + hn_softc_t *sc = device_get_softc(device->device); + netvsc_dev *net_dev = sc->net_dev;; + + /* Stop outbound traffic ie sends and receives completions */ + mtx_lock(&device->channel->inbound_lock); + net_dev->destroy = TRUE; + mtx_unlock(&device->channel->inbound_lock); + + /* Wait for all send completions */ + while (net_dev->num_outstanding_sends) { + DELAY(100); + } + + hv_nv_disconnect_from_vsp(net_dev); + + /* At this point, no one should be accessing net_dev except in here */ + + /* Now, we can close the channel safely */ + + if (!destroy_channel) { + device->channel->state = + HV_CHANNEL_CLOSING_NONDESTRUCTIVE_STATE; + } + + hv_vmbus_channel_close(device->channel); + + /* Release all resources */ + net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list); + while (net_vsc_pkt != NULL) { + next_net_vsc_pkt = STAILQ_NEXT(net_vsc_pkt, mylist_entry); + free(net_vsc_pkt, M_DEVBUF); + net_vsc_pkt = next_net_vsc_pkt; + } + + /* Reset the list to initial state */ + STAILQ_INIT(&net_dev->myrx_packet_list); + + mtx_destroy(&net_dev->rx_pkt_list_lock); + sema_destroy(&net_dev->channel_init_sema); + free(net_dev, M_DEVBUF); + + return (0); +} + +/* + * Net VSC on send completion + */ +static void +hv_nv_on_send_completion(struct hv_device *device, hv_vm_packet_descriptor *pkt) +{ + netvsc_dev *net_dev; + nvsp_msg *nvsp_msg_pkt; + netvsc_packet *net_vsc_pkt; + + net_dev = hv_nv_get_inbound_net_device(device); + if (!net_dev) { + return; + } + + nvsp_msg_pkt = + (nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3)); + + if (nvsp_msg_pkt->hdr.msg_type == nvsp_msg_type_init_complete + || nvsp_msg_pkt->hdr.msg_type + == nvsp_msg_1_type_send_rx_buf_complete + || nvsp_msg_pkt->hdr.msg_type + == nvsp_msg_1_type_send_send_buf_complete) { + /* Copy the response back */ + memcpy(&net_dev->channel_init_packet, nvsp_msg_pkt, + sizeof(nvsp_msg)); + sema_post(&net_dev->channel_init_sema); + } else if (nvsp_msg_pkt->hdr.msg_type == + nvsp_msg_1_type_send_rndis_pkt_complete) { + /* Get the send context */ + net_vsc_pkt = + (netvsc_packet *)(unsigned long)pkt->transaction_id; + + /* Notify the layer above us */ + net_vsc_pkt->compl.send.on_send_completion( + net_vsc_pkt->compl.send.send_completion_context); + + atomic_subtract_int(&net_dev->num_outstanding_sends, 1); + } +} + +/* + * Net VSC on send + * Sends a packet on the specified Hyper-V device. + * Returns 0 on success, non-zero on failure. + */ +int +hv_nv_on_send(struct hv_device *device, netvsc_packet *pkt) +{ + netvsc_dev *net_dev; + nvsp_msg send_msg; + int ret; + + net_dev = hv_nv_get_outbound_net_device(device); + if (!net_dev) + return (ENODEV); + + send_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt; + if (pkt->is_data_pkt) { + /* 0 is RMC_DATA */ + send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 0; + } else { + /* 1 is RMC_CONTROL */ + send_msg.msgs.vers_1_msgs.send_rndis_pkt.chan_type = 1; + } + + /* Not using send buffer section */ + send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_idx = + 0xFFFFFFFF; + send_msg.msgs.vers_1_msgs.send_rndis_pkt.send_buf_section_size = 0; + + if (pkt->page_buf_count) { + ret = hv_vmbus_channel_send_packet_pagebuffer(device->channel, + pkt->page_buffers, pkt->page_buf_count, + &send_msg, sizeof(nvsp_msg), (uint64_t)pkt); + } else { + ret = hv_vmbus_channel_send_packet(device->channel, + &send_msg, sizeof(nvsp_msg), (uint64_t)pkt, + HV_VMBUS_PACKET_TYPE_DATA_IN_BAND, + HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + } + + /* Record outstanding send only if send_packet() succeeded */ + if (ret == 0) + atomic_add_int(&net_dev->num_outstanding_sends, 1); + + return (ret); +} + +/* + * Net VSC on receive + * + * In the FreeBSD Hyper-V virtual world, this function deals exclusively + * with virtual addresses. + */ +static void +hv_nv_on_receive(struct hv_device *device, hv_vm_packet_descriptor *pkt) +{ + netvsc_dev *net_dev; + hv_vm_transfer_page_packet_header *vm_xfer_page_pkt; + nvsp_msg *nvsp_msg_pkt; + netvsc_packet *net_vsc_pkt = NULL; + unsigned long start; + xfer_page_packet *xfer_page_pkt = NULL; + STAILQ_HEAD(PKT_LIST, netvsc_packet_) mylist_head = + STAILQ_HEAD_INITIALIZER(mylist_head); + int count = 0; + int i = 0; + + net_dev = hv_nv_get_inbound_net_device(device); + if (!net_dev) + return; + + /* + * All inbound packets other than send completion should be + * xfer page packet. + */ + if (pkt->type != HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES) + return; + + nvsp_msg_pkt = (nvsp_msg *)((unsigned long)pkt + + (pkt->data_offset8 << 3)); + + /* Make sure this is a valid nvsp packet */ + if (nvsp_msg_pkt->hdr.msg_type != nvsp_msg_1_type_send_rndis_pkt) + return; + + vm_xfer_page_pkt = (hv_vm_transfer_page_packet_header *)pkt; + + if (vm_xfer_page_pkt->transfer_page_set_id + != NETVSC_RECEIVE_BUFFER_ID) { + return; + } + + STAILQ_INIT(&mylist_head); + + /* + * Grab free packets (range count + 1) to represent this xfer page + * packet. +1 to represent the xfer page packet itself. We grab it + * here so that we know exactly how many we can fulfill. + */ + mtx_lock_spin(&net_dev->rx_pkt_list_lock); + while (!STAILQ_EMPTY(&net_dev->myrx_packet_list)) { + net_vsc_pkt = STAILQ_FIRST(&net_dev->myrx_packet_list); + STAILQ_REMOVE_HEAD(&net_dev->myrx_packet_list, mylist_entry); + + STAILQ_INSERT_TAIL(&mylist_head, net_vsc_pkt, mylist_entry); + + if (++count == vm_xfer_page_pkt->range_count + 1) + break; + } + + mtx_unlock_spin(&net_dev->rx_pkt_list_lock); + + /* + * We need at least 2 netvsc pkts (1 to represent the xfer page + * and at least 1 for the range) i.e. we can handle some of the + * xfer page packet ranges... + */ + if (count < 2) { + /* Return netvsc packet to the freelist */ + mtx_lock_spin(&net_dev->rx_pkt_list_lock); + for (i=count; i != 0; i--) { + net_vsc_pkt = STAILQ_FIRST(&mylist_head); + STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry); + + STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, + net_vsc_pkt, mylist_entry); + } + mtx_unlock_spin(&net_dev->rx_pkt_list_lock); + + hv_nv_send_receive_completion(device, + vm_xfer_page_pkt->d.transaction_id); + + return; + } + + /* Take the first packet in the list */ + xfer_page_pkt = (xfer_page_packet *)STAILQ_FIRST(&mylist_head); + STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry); + + /* This is how many data packets we can supply */ + xfer_page_pkt->count = count - 1; + + /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ + for (i=0; i < (count - 1); i++) { + net_vsc_pkt = STAILQ_FIRST(&mylist_head); + STAILQ_REMOVE_HEAD(&mylist_head, mylist_entry); + + /* + * Initialize the netvsc packet + */ + net_vsc_pkt->xfer_page_pkt = xfer_page_pkt; + net_vsc_pkt->compl.rx.rx_completion_context = net_vsc_pkt; + net_vsc_pkt->device = device; + /* Save this so that we can send it back */ + net_vsc_pkt->compl.rx.rx_completion_tid = + vm_xfer_page_pkt->d.transaction_id; + + net_vsc_pkt->tot_data_buf_len = + vm_xfer_page_pkt->ranges[i].byte_count; + net_vsc_pkt->page_buf_count = 1; + + net_vsc_pkt->page_buffers[0].length = + vm_xfer_page_pkt->ranges[i].byte_count; + + /* The virtual address of the packet in the receive buffer */ + start = ((unsigned long)net_dev->rx_buf + + vm_xfer_page_pkt->ranges[i].byte_offset); + start = ((unsigned long)start) & ~(PAGE_SIZE - 1); + + /* Page number of the virtual page containing packet start */ + net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT; + + /* Calculate the page relative offset */ + net_vsc_pkt->page_buffers[0].offset = + vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1); + + /* + * In this implementation, we are dealing with virtual + * addresses exclusively. Since we aren't using physical + * addresses at all, we don't care if a packet crosses a + * page boundary. For this reason, the original code to + * check for and handle page crossings has been removed. + */ + + /* + * Pass it to the upper layer. The receive completion call + * has been moved into this function. + */ + hv_rf_on_receive(device, net_vsc_pkt); + + /* + * Moved completion call back here so that all received + * messages (not just data messages) will trigger a response + * message back to the host. + */ + hv_nv_on_receive_completion(net_vsc_pkt); + } +} + +/* + * Net VSC send receive completion + */ +static void +hv_nv_send_receive_completion(struct hv_device *device, uint64_t tid) +{ + nvsp_msg rx_comp_msg; + int retries = 0; + int ret = 0; + + rx_comp_msg.hdr.msg_type = nvsp_msg_1_type_send_rndis_pkt_complete; + + /* Pass in the status */ + rx_comp_msg.msgs.vers_1_msgs.send_rndis_pkt_complete.status = + nvsp_status_success; + +retry_send_cmplt: + /* Send the completion */ + ret = hv_vmbus_channel_send_packet(device->channel, &rx_comp_msg, + sizeof(nvsp_msg), tid, HV_VMBUS_PACKET_TYPE_COMPLETION, 0); + if (ret == 0) { + /* success */ + /* no-op */ + } else if (ret == EAGAIN) { + /* no more room... wait a bit and attempt to retry 3 times */ + retries++; + + if (retries < 4) { + DELAY(100); + goto retry_send_cmplt; + } + } +} + +/* + * Net VSC on receive completion + * + * Send a receive completion packet to RNDIS device (ie NetVsp) + */ +void +hv_nv_on_receive_completion(void *context) +{ + netvsc_packet *packet = (netvsc_packet *)context; + struct hv_device *device = (struct hv_device *)packet->device; + netvsc_dev *net_dev; + uint64_t tid = 0; + boolean_t send_rx_completion = FALSE; + + /* + * Even though it seems logical to do a hv_nv_get_outbound_net_device() + * here to send out receive completion, we are using + * hv_nv_get_inbound_net_device() since we may have disabled + * outbound traffic already. + */ + net_dev = hv_nv_get_inbound_net_device(device); + if (net_dev == NULL) + return; + + /* Overloading use of the lock. */ + mtx_lock_spin(&net_dev->rx_pkt_list_lock); + + packet->xfer_page_pkt->count--; + + /* + * Last one in the line that represent 1 xfer page packet. + * Return the xfer page packet itself to the free list. + */ + if (packet->xfer_page_pkt->count == 0) { + send_rx_completion = TRUE; + tid = packet->compl.rx.rx_completion_tid; + STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, + (netvsc_packet *)(packet->xfer_page_pkt), mylist_entry); + } + + /* Put the packet back on the free list */ + STAILQ_INSERT_TAIL(&net_dev->myrx_packet_list, packet, mylist_entry); + mtx_unlock_spin(&net_dev->rx_pkt_list_lock); + + /* Send a receive completion for the xfer page packet */ + if (send_rx_completion) + hv_nv_send_receive_completion(device, tid); +} + +/* + * Net VSC on channel callback + */ +static void +hv_nv_on_channel_callback(void *context) +{ + /* Fixme: Magic number */ + const int net_pkt_size = 2048; + struct hv_device *device = (struct hv_device *)context; + netvsc_dev *net_dev; + uint32_t bytes_rxed; + uint64_t request_id; + uint8_t *packet; + hv_vm_packet_descriptor *desc; + uint8_t *buffer; + int bufferlen = net_pkt_size; + int ret = 0; + + packet = malloc(net_pkt_size * sizeof(uint8_t), M_DEVBUF, M_NOWAIT); + if (!packet) + return; + + buffer = packet; + + net_dev = hv_nv_get_inbound_net_device(device); + if (net_dev == NULL) + goto out; + + do { + ret = hv_vmbus_channel_recv_packet_raw(device->channel, + buffer, bufferlen, &bytes_rxed, &request_id); + if (ret == 0) { + if (bytes_rxed > 0) { + desc = (hv_vm_packet_descriptor *)buffer; + switch (desc->type) { + case HV_VMBUS_PACKET_TYPE_COMPLETION: + hv_nv_on_send_completion(device, desc); + break; + case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES: + hv_nv_on_receive(device, desc); + break; + default: + break; + } + } else { + break; + } + } else if (ret == ENOBUFS) { + /* Handle large packet */ + free(buffer, M_DEVBUF); + buffer = malloc(bytes_rxed, M_DEVBUF, M_NOWAIT); + if (buffer == NULL) { + break; + } + bufferlen = bytes_rxed; + } + } while (1); + +out: + free(buffer, M_DEVBUF); +} + |