diff options
-rw-r--r-- | share/man/man4/nvd.4 | 13 | ||||
-rw-r--r-- | sys/conf/files.amd64 | 1 | ||||
-rw-r--r-- | sys/conf/files.i386 | 1 | ||||
-rw-r--r-- | sys/dev/hyperv/include/hyperv.h | 5 | ||||
-rw-r--r-- | sys/dev/hyperv/netvsc/hv_net_vsc.c | 10 | ||||
-rw-r--r-- | sys/dev/hyperv/netvsc/hv_net_vsc.h | 21 | ||||
-rw-r--r-- | sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c | 390 | ||||
-rw-r--r-- | sys/dev/hyperv/netvsc/hv_rndis.h | 1 | ||||
-rw-r--r-- | sys/dev/hyperv/netvsc/hv_rndis_filter.c | 11 | ||||
-rw-r--r-- | sys/dev/hyperv/netvsc/hv_rndis_filter.h | 1 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_connection.c | 19 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_et.c | 131 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_hv.c | 61 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c | 78 | ||||
-rw-r--r-- | sys/dev/hyperv/vmbus/hv_vmbus_priv.h | 57 | ||||
-rw-r--r-- | sys/dev/nvd/nvd.c | 18 | ||||
-rw-r--r-- | sys/dev/nvme/nvme_ctrlr.c | 18 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_vnops.c | 13 | ||||
-rw-r--r-- | sys/modules/hyperv/vmbus/Makefile | 3 | ||||
-rw-r--r-- | sys/x86/x86/identcpu.c | 2 |
20 files changed, 649 insertions, 205 deletions
diff --git a/share/man/man4/nvd.4 b/share/man/man4/nvd.4 index 15200a4..4018dd8 100644 --- a/share/man/man4/nvd.4 +++ b/share/man/man4/nvd.4 @@ -1,5 +1,5 @@ .\" -.\" Copyright (c) 2012-2014 Intel Corporation +.\" Copyright (c) 2012-2016 Intel Corporation .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 18, 2014 +.Dd January 28, 2016 .Dt NVD 4 .Os .Sh NAME @@ -74,6 +74,15 @@ Note that device nodes from the driver are not .Xr GEOM 4 disks and cannot be partitioned. +.Sh CONFIGURATION +The +.Nm +driver defines a system-wide maximum delete size for NVMe devices. The +default is 1GB. To select a different value, set the following tunable in +.Xr loader.conf 5 : +.Bd -literal -offset indent +hw.nvd.delete_max=<delete size in bytes> +.Ed .Sh SEE ALSO .Xr GEOM 4 , .Xr nvme 4 , diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 445912a..f9080fd 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -275,6 +275,7 @@ dev/hyperv/vmbus/hv_channel.c optional hyperv dev/hyperv/vmbus/hv_channel_mgmt.c optional hyperv dev/hyperv/vmbus/hv_connection.c optional hyperv dev/hyperv/vmbus/hv_hv.c optional hyperv +dev/hyperv/vmbus/hv_et.c optional hyperv dev/hyperv/vmbus/hv_ring_buffer.c optional hyperv dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c optional hyperv dev/kbd/kbd.c optional atkbd | sc | ukbd | vt diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index fb6e4b2..f9816ae 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -253,6 +253,7 @@ dev/hyperv/vmbus/hv_channel.c optional hyperv dev/hyperv/vmbus/hv_channel_mgmt.c optional hyperv dev/hyperv/vmbus/hv_connection.c optional hyperv dev/hyperv/vmbus/hv_hv.c optional hyperv +dev/hyperv/vmbus/hv_et.c optional hyperv dev/hyperv/vmbus/hv_ring_buffer.c optional hyperv dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c optional hyperv dev/ichwd/ichwd.c optional ichwd diff --git a/sys/dev/hyperv/include/hyperv.h b/sys/dev/hyperv/include/hyperv.h index b5600ba..1a45b7b 100644 --- a/sys/dev/hyperv/include/hyperv.h +++ b/sys/dev/hyperv/include/hyperv.h @@ -335,11 +335,6 @@ typedef enum { HV_CHANNEL_MESSAGE_INITIATED_CONTACT = 14, HV_CHANNEL_MESSAGE_VERSION_RESPONSE = 15, HV_CHANNEL_MESSAGE_UNLOAD = 16, - -#ifdef HV_VMBUS_FEATURE_PARENT_OR_PEER_MEMORY_MAPPED_INTO_A_CHILD - HV_CHANNEL_MESSAGE_VIEW_RANGE_ADD = 17, - HV_CHANNEL_MESSAGE_VIEW_RANGE_REMOVE = 18, -#endif HV_CHANNEL_MESSAGE_COUNT } hv_vmbus_channel_msg_type; diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.c b/sys/dev/hyperv/netvsc/hv_net_vsc.c index d7cb747..a44c30d 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.c +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.c @@ -641,6 +641,12 @@ hv_nv_connect_to_vsp(struct hv_device *device) /* sema_wait(&NetVscChannel->channel_init_sema); */ /* Post the big receive buffer to NetVSP */ + if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) + net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; + else + net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; + net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; + ret = hv_nv_init_rx_buffer_with_net_vsp(device); if (ret == 0) ret = hv_nv_init_send_buffer_with_net_vsp(device); @@ -675,9 +681,6 @@ hv_nv_on_device_add(struct hv_device *device, void *additional_info) goto cleanup; /* Initialize the NetVSC channel extension */ - net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; - - net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; sema_init(&net_dev->channel_init_sema, 0, "netdev_sema"); @@ -918,6 +921,7 @@ hv_nv_on_receive(netvsc_dev *net_dev, struct hv_device *device, */ hv_nv_on_receive_completion(device, vm_xfer_page_pkt->d.transaction_id, status); + hv_rf_receive_rollup(net_dev); } /* diff --git a/sys/dev/hyperv/netvsc/hv_net_vsc.h b/sys/dev/hyperv/netvsc/hv_net_vsc.h index 477a296..4e63b94 100644 --- a/sys/dev/hyperv/netvsc/hv_net_vsc.h +++ b/sys/dev/hyperv/netvsc/hv_net_vsc.h @@ -44,6 +44,12 @@ #include <sys/malloc.h> #include <sys/sx.h> +#include <netinet/in.h> +#include <netinet/tcp_lro.h> + +#include <net/if.h> +#include <net/if_media.h> + #include <dev/hyperv/include/hyperv.h> MALLOC_DECLARE(M_NETVSC); @@ -851,7 +857,7 @@ typedef struct nvsp_msg_ { #define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ #define NETVSC_SEND_BUFFER_ID 0xface - +#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ #define NETVSC_RECEIVE_BUFFER_ID 0xcafe @@ -984,6 +990,7 @@ typedef struct { typedef struct hn_softc { struct ifnet *hn_ifp; struct arpcom arpcom; + struct ifmedia hn_media; device_t hn_dev; uint8_t hn_unit; int hn_carrier; @@ -994,6 +1001,18 @@ typedef struct hn_softc { int temp_unusable; struct hv_device *hn_dev_obj; netvsc_dev *net_dev; + + struct lro_ctrl hn_lro; + int hn_lro_hiwat; + + /* Trust tcp segments verification on host side */ + int hn_trust_hosttcp; + + u_long hn_csum_ip; + u_long hn_csum_tcp; + u_long hn_csum_trusted; + u_long hn_lro_tried; + u_long hn_small_pkts; } hn_softc_t; diff --git a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c index 33718a9..f8ebd38 100644 --- a/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ b/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include <sys/queue.h> #include <sys/lock.h> #include <sys/sx.h> +#include <sys/sysctl.h> #include <net/if.h> #include <net/if_arp.h> @@ -138,13 +139,14 @@ __FBSDID("$FreeBSD$"); CSUM_IP_ISCSI|CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP| \ CSUM_IP6_TSO|CSUM_IP6_ISCSI) -/* - * Data types - */ - -struct hv_netvsc_driver_context { - uint32_t drv_inited; -}; +/* XXX move to netinet/tcp_lro.h */ +#define HN_LRO_HIWAT_MAX 65535 +#define HN_LRO_HIWAT_DEF HN_LRO_HIWAT_MAX +/* YYY 2*MTU is a bit rough, but should be good enough. */ +#define HN_LRO_HIWAT_MTULIM(ifp) (2 * (ifp)->if_mtu) +#define HN_LRO_HIWAT_ISVALID(sc, hiwat) \ + ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) || \ + (hiwat) <= HN_LRO_HIWAT_MAX) /* * Be aware that this sleepable mutex will exhibit WITNESS errors when @@ -168,9 +170,9 @@ struct hv_netvsc_driver_context { int hv_promisc_mode = 0; /* normal mode by default */ -/* The one and only one */ -static struct hv_netvsc_driver_context g_netvsc_drv; - +/* Trust tcp segements verification on host side. */ +static int hn_trust_hosttcp = 0; +TUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp); /* * Forward declarations @@ -181,6 +183,21 @@ static void hn_ifinit(void *xsc); static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int hn_start_locked(struct ifnet *ifp); static void hn_start(struct ifnet *ifp); +static int hn_ifmedia_upd(struct ifnet *ifp); +static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); +#ifdef HN_LRO_HIWAT +static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS); +#endif +static int hn_check_iplen(const struct mbuf *, int); + +static __inline void +hn_set_lro_hiwat(struct hn_softc *sc, int hiwat) +{ + sc->hn_lro_hiwat = hiwat; +#ifdef HN_LRO_HIWAT + sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; +#endif +} /* * NetVsc get message transport protocol type @@ -238,35 +255,27 @@ static uint32_t get_transport_proto_type(struct mbuf *m_head) return (ret_val); } -/* - * NetVsc driver initialization - * Note: Filter init is no longer required - */ static int -netvsc_drv_init(void) +hn_ifmedia_upd(struct ifnet *ifp __unused) { - return (0); + + return EOPNOTSUPP; } -/* - * NetVsc global initialization entry point - */ static void -netvsc_init(void) +hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { - if (bootverbose) - printf("Netvsc initializing... "); + struct hn_softc *sc = ifp->if_softc; - /* - * XXXKYS: cleanup initialization - */ - if (!cold && !g_netvsc_drv.drv_inited) { - g_netvsc_drv.drv_inited = 1; - netvsc_drv_init(); - if (bootverbose) - printf("done!\n"); - } else if (bootverbose) - printf("Already initialized!\n"); + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if (!sc->hn_carrier) { + ifmr->ifm_active |= IFM_NONE; + return; + } + ifmr->ifm_status |= IFM_ACTIVE; + ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ @@ -310,10 +319,10 @@ netvsc_attach(device_t dev) hn_softc_t *sc; int unit = device_get_unit(dev); struct ifnet *ifp; + struct sysctl_oid_list *child; + struct sysctl_ctx_list *ctx; int ret; - netvsc_init(); - sc = device_get_softc(dev); if (sc == NULL) { return (ENOMEM); @@ -322,6 +331,8 @@ netvsc_attach(device_t dev) bzero(sc, sizeof(hn_softc_t)); sc->hn_unit = unit; sc->hn_dev = dev; + sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF; + sc->hn_trust_hosttcp = hn_trust_hosttcp; NV_LOCK_INIT(sc, "NetVSCLock"); @@ -344,14 +355,22 @@ netvsc_attach(device_t dev) ifp->if_snd.ifq_drv_maxlen = 511; IFQ_SET_READY(&ifp->if_snd); + ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); + ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); + /* XXX ifmedia_set really should do this for us */ + sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; + /* * Tell upper layers that we support full VLAN capability. */ ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= - IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO; + IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | + IFCAP_LRO; ifp->if_capenable |= - IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO; + IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | + IFCAP_LRO; /* * Only enable UDP checksum offloading when it is on 2012R2 or * later. UDP checksum offloading doesn't work on earlier @@ -372,8 +391,63 @@ netvsc_attach(device_t dev) sc->hn_carrier = 1; } +#if defined(INET) || defined(INET6) + tcp_lro_init(&sc->hn_lro); + /* Driver private LRO settings */ + sc->hn_lro.ifp = ifp; +#ifdef HN_LRO_HIWAT + sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; +#endif +#endif /* INET || INET6 */ + ether_ifattach(ifp, device_info.mac_addr); + ctx = device_get_sysctl_ctx(dev); + child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); + + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued", + CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed", + CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried", + CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries"); +#ifdef HN_LRO_HIWAT + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat", + CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl, + "I", "LRO high watermark"); +#endif + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp", + CTLFLAG_RW, &sc->hn_trust_hosttcp, 0, + "Trust tcp segement verification on host side, " + "when csum info is missing"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip", + CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp", + CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted", + CTLFLAG_RW, &sc->hn_csum_trusted, + "# of TCP segements that we trust host's csum verification"); + SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts", + CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received"); + + if (unit == 0) { + struct sysctl_ctx_list *dc_ctx; + struct sysctl_oid_list *dc_child; + devclass_t dc; + + /* + * Add sysctl nodes for devclass + */ + dc = device_get_devclass(dev); + dc_ctx = devclass_get_sysctl_ctx(dc); + dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc)); + + SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp", + CTLFLAG_RD, &hn_trust_hosttcp, 0, + "Trust tcp segement verification on host side, " + "when csum info is missing (global setting)"); + } + return (0); } @@ -383,6 +457,7 @@ netvsc_attach(device_t dev) static int netvsc_detach(device_t dev) { + struct hn_softc *sc = device_get_softc(dev); struct hv_device *hv_device = vmbus_get_devctx(dev); if (bootverbose) @@ -401,6 +476,11 @@ netvsc_detach(device_t dev) hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); + ifmedia_removeall(&sc->hn_media); +#if defined(INET) || defined(INET6) + tcp_lro_free(&sc->hn_lro); +#endif + return (0); } @@ -887,7 +967,7 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, struct mbuf *m_new; struct ifnet *ifp; device_t dev = device_ctx->device; - int size; + int size, do_lro = 0; if (sc == NULL) { return (0); /* TODO: KYS how can this be! */ @@ -906,40 +986,44 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, */ if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { return (0); - } - - /* - * Get an mbuf with a cluster. For packets 2K or less, - * get a standard 2K cluster. For anything larger, get a - * 4K cluster. Any buffers larger than 4K can cause problems - * if looped around to the Hyper-V TX channel, so avoid them. - */ - size = MCLBYTES; - - if (packet->tot_data_buf_len > MCLBYTES) { - /* 4096 */ - size = MJUMPAGESIZE; - } + } else if (packet->tot_data_buf_len <= MHLEN) { + m_new = m_gethdr(M_NOWAIT, MT_DATA); + if (m_new == NULL) + return (0); + memcpy(mtod(m_new, void *), packet->data, + packet->tot_data_buf_len); + m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len; + sc->hn_small_pkts++; + } else { + /* + * Get an mbuf with a cluster. For packets 2K or less, + * get a standard 2K cluster. For anything larger, get a + * 4K cluster. Any buffers larger than 4K can cause problems + * if looped around to the Hyper-V TX channel, so avoid them. + */ + size = MCLBYTES; + if (packet->tot_data_buf_len > MCLBYTES) { + /* 4096 */ + size = MJUMPAGESIZE; + } - m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size); + m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); + if (m_new == NULL) { + device_printf(dev, "alloc mbuf failed.\n"); + return (0); + } - if (m_new == NULL) { - device_printf(dev, "alloc mbuf failed.\n"); - return (0); + hv_m_append(m_new, packet->tot_data_buf_len, packet->data); } - - hv_m_append(m_new, packet->tot_data_buf_len, - packet->data); - m_new->m_pkthdr.rcvif = ifp; /* receive side checksum offload */ - m_new->m_pkthdr.csum_flags = 0; if (NULL != csum_info) { /* IP csum offload */ if (csum_info->receive.ip_csum_succeeded) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); + sc->hn_csum_ip++; } /* TCP csum offload */ @@ -947,9 +1031,50 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; + sc->hn_csum_tcp++; + } + + if (csum_info->receive.ip_csum_succeeded && + csum_info->receive.tcp_csum_succeeded) + do_lro = 1; + } else { + const struct ether_header *eh; + uint16_t etype; + int hoff; + + hoff = sizeof(*eh); + if (m_new->m_len < hoff) + goto skip; + eh = mtod(m_new, struct ether_header *); + etype = ntohs(eh->ether_type); + if (etype == ETHERTYPE_VLAN) { + const struct ether_vlan_header *evl; + + hoff = sizeof(*evl); + if (m_new->m_len < hoff) + goto skip; + evl = mtod(m_new, struct ether_vlan_header *); + etype = ntohs(evl->evl_proto); } - } + if (etype == ETHERTYPE_IP) { + int pr; + + pr = hn_check_iplen(m_new, hoff); + if (pr == IPPROTO_TCP) { + if (sc->hn_trust_hosttcp) { + sc->hn_csum_trusted++; + m_new->m_pkthdr.csum_flags |= + (CSUM_IP_CHECKED | CSUM_IP_VALID | + CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + m_new->m_pkthdr.csum_data = 0xffff; + } + /* Rely on SW csum verification though... */ + do_lro = 1; + } + } + } +skip: if ((packet->vlan_tci != 0) && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { m_new->m_pkthdr.ether_vtag = packet->vlan_tci; @@ -963,12 +1088,41 @@ netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, ifp->if_ipackets++; + if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { +#if defined(INET) || defined(INET6) + struct lro_ctrl *lro = &sc->hn_lro; + + if (lro->lro_cnt) { + sc->hn_lro_tried++; + if (tcp_lro_rx(lro, m_new, 0) == 0) { + /* DONE! */ + return 0; + } + } +#endif + } + /* We're not holding the lock here, so don't release it */ (*ifp->if_input)(ifp, m_new); return (0); } +void +netvsc_recv_rollup(struct hv_device *device_ctx) +{ +#if defined(INET) || defined(INET6) + hn_softc_t *sc = device_get_softc(device_ctx->device); + struct lro_ctrl *lro = &sc->hn_lro; + struct lro_entry *queued; + + while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { + SLIST_REMOVE_HEAD(&lro->lro_active, next); + tcp_lro_flush(lro, queued); + } +#endif +} + /* * Rules for using sc->temp_unusable: * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() @@ -1024,7 +1178,13 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) /* Obtain and record requested MTU */ ifp->if_mtu = ifr->ifr_mtu; - + /* + * Make sure that LRO high watermark is still valid, + * after MTU change (the 2*MTU limit). + */ + if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat)) + hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp)); + do { NV_LOCK(sc); if (!sc->temp_unusable) { @@ -1149,6 +1309,8 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ifp->if_capenable |= IFCAP_RXCSUM; } } + if (mask & IFCAP_LRO) + ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; @@ -1173,10 +1335,11 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = 0; } #endif - /* FALLTHROUGH */ + error = EINVAL; + break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: - error = EINVAL; + error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); @@ -1294,6 +1457,102 @@ hn_watchdog(struct ifnet *ifp) } #endif +#ifdef HN_LRO_HIWAT +static int +hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct hn_softc *sc = arg1; + int hiwat, error; + + hiwat = sc->hn_lro_hiwat; + error = sysctl_handle_int(oidp, &hiwat, 0, req); + if (error || req->newptr == NULL) + return error; + + if (!HN_LRO_HIWAT_ISVALID(sc, hiwat)) + return EINVAL; + + if (sc->hn_lro_hiwat != hiwat) + hn_set_lro_hiwat(sc, hiwat); + return 0; +} +#endif /* HN_LRO_HIWAT */ + +static int +hn_check_iplen(const struct mbuf *m, int hoff) +{ + const struct ip *ip; + int len, iphlen, iplen; + const struct tcphdr *th; + int thoff; /* TCP data offset */ + + len = hoff + sizeof(struct ip); + + /* The packet must be at least the size of an IP header. */ + if (m->m_pkthdr.len < len) + return IPPROTO_DONE; + + /* The fixed IP header must reside completely in the first mbuf. */ + if (m->m_len < len) + return IPPROTO_DONE; + + ip = mtodo(m, hoff); + + /* Bound check the packet's stated IP header length. */ + iphlen = ip->ip_hl << 2; + if (iphlen < sizeof(struct ip)) /* minimum header length */ + return IPPROTO_DONE; + + /* The full IP header must reside completely in the one mbuf. */ + if (m->m_len < hoff + iphlen) + return IPPROTO_DONE; + + iplen = ntohs(ip->ip_len); + + /* + * Check that the amount of data in the buffers is as + * at least much as the IP header would have us expect. + */ + if (m->m_pkthdr.len < hoff + iplen) + return IPPROTO_DONE; + + /* + * Ignore IP fragments. + */ + if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) + return IPPROTO_DONE; + + /* + * The TCP/IP or UDP/IP header must be entirely contained within + * the first fragment of a packet. + */ + switch (ip->ip_p) { + case IPPROTO_TCP: + if (iplen < iphlen + sizeof(struct tcphdr)) + return IPPROTO_DONE; + if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) + return IPPROTO_DONE; + th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); + thoff = th->th_off << 2; + if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) + return IPPROTO_DONE; + if (m->m_len < hoff + iphlen + thoff) + return IPPROTO_DONE; + break; + case IPPROTO_UDP: + if (iplen < iphlen + sizeof(struct udphdr)) + return IPPROTO_DONE; + if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) + return IPPROTO_DONE; + break; + default: + if (iplen < iphlen) + return IPPROTO_DONE; + break; + } + return ip->ip_p; +} + static device_method_t netvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netvsc_probe), @@ -1315,6 +1574,3 @@ static devclass_t netvsc_devclass; DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); MODULE_VERSION(hn, 1); MODULE_DEPEND(hn, vmbus, 1, 1, 1); -SYSINIT(netvsc_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1, netvsc_init, - NULL); - diff --git a/sys/dev/hyperv/netvsc/hv_rndis.h b/sys/dev/hyperv/netvsc/hv_rndis.h index 64fd578..fd032de 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis.h +++ b/sys/dev/hyperv/netvsc/hv_rndis.h @@ -1049,6 +1049,7 @@ typedef struct rndismp_rx_bufs_info_ { int netvsc_recv(struct hv_device *device_ctx, netvsc_packet *packet, rndis_tcp_ip_csum_info *csum_info); +void netvsc_recv_rollup(struct hv_device *device_ctx); void* hv_set_rppi_data(rndis_msg *rndis_mesg, uint32_t rppi_size, diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.c b/sys/dev/hyperv/netvsc/hv_rndis_filter.c index 691badd..3e95024 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis_filter.c +++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.c @@ -963,3 +963,14 @@ hv_rf_on_send_request_halt_completion(void *context) request->halt_complete_flag = 1; } +/* + * RNDIS filter when "all" reception is done + */ +void +hv_rf_receive_rollup(netvsc_dev *net_dev) +{ + rndis_device *rndis_dev; + + rndis_dev = (rndis_device *)net_dev->extension; + netvsc_recv_rollup(rndis_dev->net_dev->dev); +} diff --git a/sys/dev/hyperv/netvsc/hv_rndis_filter.h b/sys/dev/hyperv/netvsc/hv_rndis_filter.h index 8355c6a..2f3ebd8 100644 --- a/sys/dev/hyperv/netvsc/hv_rndis_filter.h +++ b/sys/dev/hyperv/netvsc/hv_rndis_filter.h @@ -98,6 +98,7 @@ typedef struct rndis_device_ { int hv_rf_on_receive(netvsc_dev *net_dev, struct hv_device *device, netvsc_packet *pkt); +void hv_rf_receive_rollup(netvsc_dev *net_dev); int hv_rf_on_device_add(struct hv_device *device, void *additl_info); int hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel); int hv_rf_on_open(struct hv_device *device); diff --git a/sys/dev/hyperv/vmbus/hv_connection.c b/sys/dev/hyperv/vmbus/hv_connection.c index cc83037..7496288 100644 --- a/sys/dev/hyperv/vmbus/hv_connection.c +++ b/sys/dev/hyperv/vmbus/hv_connection.c @@ -254,7 +254,7 @@ hv_vmbus_connect(void) { hv_vmbus_protocal_version = version; if (bootverbose) - printf("VMBUS: Portocal Version: %d.%d\n", + printf("VMBUS: Protocol Version: %d.%d\n", version >> 16, version & 0xFFFF); sema_destroy(&msg_info->wait_sema); @@ -426,12 +426,6 @@ VmbusProcessChannelEvent(uint32_t relid) // mtx_unlock(&channel->inbound_lock); } -#ifdef HV_DEBUG_INTR -extern uint32_t hv_intr_count; -extern uint32_t hv_vmbus_swintr_event_cpu[MAXCPU]; -extern uint32_t hv_vmbus_intr_cpu[MAXCPU]; -#endif - /** * Handler for events */ @@ -452,17 +446,6 @@ hv_vmbus_on_events(void *arg) KASSERT(cpu <= mp_maxid, ("VMBUS: hv_vmbus_on_events: " "cpu out of range!")); -#ifdef HV_DEBUG_INTR - int i; - hv_vmbus_swintr_event_cpu[cpu]++; - if (hv_intr_count % 10000 == 0) { - printf("VMBUS: Total interrupt %d\n", hv_intr_count); - for (i = 0; i < mp_ncpus; i++) - printf("VMBUS: hw cpu[%d]: %d, event sw intr cpu[%d]: %d\n", - i, hv_vmbus_intr_cpu[i], i, hv_vmbus_swintr_event_cpu[i]); - } -#endif - if ((hv_vmbus_protocal_version == HV_VMBUS_VERSION_WS2008) || (hv_vmbus_protocal_version == HV_VMBUS_VERSION_WIN7)) { maxdword = HV_MAX_NUM_CHANNELS_SUPPORTED >> 5; diff --git a/sys/dev/hyperv/vmbus/hv_et.c b/sys/dev/hyperv/vmbus/hv_et.c new file mode 100644 index 0000000..d961486 --- /dev/null +++ b/sys/dev/hyperv/vmbus/hv_et.c @@ -0,0 +1,131 @@ +/*- + * Copyright (c) 2015 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/smp.h> +#include <sys/time.h> +#include <sys/timeet.h> + +#include "hv_vmbus_priv.h" + +#define HV_TIMER_FREQUENCY (10 * 1000 * 1000LL) /* 100ns period */ +#define HV_MAX_DELTA_TICKS 0xffffffffLL +#define HV_MIN_DELTA_TICKS 1LL + +static struct eventtimer et; +static uint64_t periodticks[MAXCPU]; + +static inline uint64_t +sbintime2tick(sbintime_t time) +{ + struct timespec val; + + val = sbttots(time); + return val.tv_sec * HV_TIMER_FREQUENCY + val.tv_nsec / 100; +} + +static int +hv_et_start(struct eventtimer *et, sbintime_t firsttime, sbintime_t periodtime) +{ + union hv_timer_config timer_cfg; + uint64_t current; + + timer_cfg.as_uint64 = 0; + timer_cfg.auto_enable = 1; + timer_cfg.sintx = HV_VMBUS_MESSAGE_SINT; + + periodticks[curcpu] = sbintime2tick(periodtime); + if (firsttime == 0) + firsttime = periodtime; + + current = rdmsr(HV_X64_MSR_TIME_REF_COUNT); + current += sbintime2tick(firsttime); + + wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + wrmsr(HV_X64_MSR_STIMER0_COUNT, current); + + return (0); +} + +static int +hv_et_stop(struct eventtimer *et) +{ + wrmsr(HV_X64_MSR_STIMER0_CONFIG, 0); + wrmsr(HV_X64_MSR_STIMER0_COUNT, 0); + + return (0); +} + +void +hv_et_intr(struct trapframe *frame) +{ + union hv_timer_config timer_cfg; + struct trapframe *oldframe; + struct thread *td; + + if (periodticks[curcpu] != 0) { + uint64_t tick = sbintime2tick(periodticks[curcpu]); + timer_cfg.as_uint64 = rdmsr(HV_X64_MSR_STIMER0_CONFIG); + timer_cfg.enable = 0; + timer_cfg.auto_enable = 1; + timer_cfg.periodic = 1; + periodticks[curcpu] = 0; + + wrmsr(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + wrmsr(HV_X64_MSR_STIMER0_COUNT, tick); + } + + if (et.et_active) { + td = curthread; + td->td_intr_nesting_level++; + oldframe = td->td_intr_frame; + td->td_intr_frame = frame; + et.et_event_cb(&et, et.et_arg); + td->td_intr_frame = oldframe; + td->td_intr_nesting_level--; + } +} + +void +hv_et_init(void) +{ + et.et_name = "HyperV"; + et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU | ET_FLAGS_PERIODIC; + et.et_quality = 1000; + et.et_frequency = HV_TIMER_FREQUENCY; + et.et_min_period = (1LL << 32) / HV_TIMER_FREQUENCY; + et.et_max_period = HV_MAX_DELTA_TICKS * ((1LL << 32) / HV_TIMER_FREQUENCY); + et.et_start = hv_et_start; + et.et_stop = hv_et_stop; + et.et_priv = &et; + et_register(&et); +} + diff --git a/sys/dev/hyperv/vmbus/hv_hv.c b/sys/dev/hyperv/vmbus/hv_hv.c index 84e2a5e..ca5641f 100644 --- a/sys/dev/hyperv/vmbus/hv_hv.c +++ b/sys/dev/hyperv/vmbus/hv_hv.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <sys/pcpu.h> #include <sys/timetc.h> #include <machine/bus.h> +#include <machine/md_var.h> #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/pmap.h> @@ -44,23 +45,11 @@ __FBSDID("$FreeBSD$"); #include "hv_vmbus_priv.h" -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 - -#define HV_X64_CPUID_MIN 0x40000005 -#define HV_X64_CPUID_MAX 0x4000ffff -#define HV_X64_MSR_TIME_REF_COUNT 0x40000020 - #define HV_NANOSECONDS_PER_SEC 1000000000L static u_int hv_get_timecount(struct timecounter *tc); -static inline void do_cpuid_inline(unsigned int op, unsigned int *eax, - unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), - "=d" (*edx) : "0" (op), "c" (ecx)); -} - /** * Globals */ @@ -86,27 +75,10 @@ hv_get_timecount(struct timecounter *tc) int hv_vmbus_query_hypervisor_presence(void) { - u_int regs[4]; - int hyper_v_detected = 0; - - /* - * When Xen is detected and native Xen PV support is enabled, - * ignore Xen's HyperV emulation. - */ - if (vm_guest == VM_GUEST_XEN) + if (vm_guest != VM_GUEST_HV) return (0); - do_cpuid(1, regs); - if (regs[2] & 0x80000000) { /* if(a hypervisor is detected) */ - /* make sure this really is Hyper-V */ - /* we look at the CPUID info */ - do_cpuid(HV_X64_MSR_GUEST_OS_ID, regs); - hyper_v_detected = - regs[0] >= HV_X64_CPUID_MIN && - regs[0] <= HV_X64_CPUID_MAX && - !memcmp("Microsoft Hv", ®s[1], 12); - } - return (hyper_v_detected); + return (hv_high >= HV_X64_CPUID_MIN && hv_high <= HV_X64_CPUID_MAX); } /** @@ -115,10 +87,7 @@ hv_vmbus_query_hypervisor_presence(void) static int hv_vmbus_get_hypervisor_version(void) { - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; + u_int regs[4]; unsigned int maxLeaf; unsigned int op; @@ -127,28 +96,16 @@ hv_vmbus_get_hypervisor_version(void) * Viridian is present * Query id and revision. */ - eax = 0; - ebx = 0; - ecx = 0; - edx = 0; op = HV_CPU_ID_FUNCTION_HV_VENDOR_AND_MAX_FUNCTION; - do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + do_cpuid(op, regs); - maxLeaf = eax; - eax = 0; - ebx = 0; - ecx = 0; - edx = 0; + maxLeaf = regs[0]; op = HV_CPU_ID_FUNCTION_HV_INTERFACE; - do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + do_cpuid(op, regs); if (maxLeaf >= HV_CPU_ID_FUNCTION_MS_HV_VERSION) { - eax = 0; - ebx = 0; - ecx = 0; - edx = 0; op = HV_CPU_ID_FUNCTION_MS_HV_VERSION; - do_cpuid_inline(op, &eax, &ebx, &ecx, &edx); + do_cpuid(op, regs); } return (maxLeaf); } @@ -255,6 +212,8 @@ hv_vmbus_init(void) hv_vmbus_g_context.hypercall_page = virt_addr; tc_init(&hv_timecounter); /* register virtual timecount */ + + hv_et_init(); return (0); diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c index f7eae26..66a3f39 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c +++ b/sys/dev/hyperv/vmbus/hv_vmbus_drv_freebsd.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <sys/lock.h> #include <sys/malloc.h> #include <sys/module.h> +#include <sys/proc.h> #include <sys/sysctl.h> #include <sys/syslog.h> #include <sys/systm.h> @@ -60,13 +61,15 @@ __FBSDID("$FreeBSD$"); #include "hv_vmbus_priv.h" - -#define VMBUS_IRQ 0x5 +#include <contrib/dev/acpica/include/acpi.h> +#include "acpi_if.h" static device_t vmbus_devp; static int vmbus_inited; static hv_setup_args setup_args; /* only CPU 0 supported at this time */ +static char *vmbus_ids[] = { "VMBUS", NULL }; + /** * @brief Software interrupt thread routine to handle channel messages from * the hypervisor. @@ -151,7 +154,7 @@ handled: * message to process - an event or a channel message. */ static inline int -hv_vmbus_isr(void *unused) +hv_vmbus_isr(struct trapframe *frame) { int cpu; hv_vmbus_message* msg; @@ -191,41 +194,57 @@ hv_vmbus_isr(void *unused) page_addr = hv_vmbus_g_context.syn_ic_msg_page[cpu]; msg = (hv_vmbus_message*) page_addr + HV_VMBUS_MESSAGE_SINT; + /* we call eventtimer process the message */ + if (msg->header.message_type == HV_MESSAGE_TIMER_EXPIRED) { + msg->header.message_type = HV_MESSAGE_TYPE_NONE; + + /* + * Make sure the write to message_type (ie set to + * HV_MESSAGE_TYPE_NONE) happens before we read the + * message_pending and EOMing. Otherwise, the EOMing will + * not deliver any more messages + * since there is no empty slot + */ + wmb(); + + if (msg->header.message_flags.u.message_pending) { + /* + * This will cause message queue rescan to possibly + * deliver another msg from the hypervisor + */ + wrmsr(HV_X64_MSR_EOM, 0); + } + hv_et_intr(frame); + return (FILTER_HANDLED); + } + if (msg->header.message_type != HV_MESSAGE_TYPE_NONE) { swi_sched(hv_vmbus_g_context.msg_swintr[cpu], 0); } - return FILTER_HANDLED; + return (FILTER_HANDLED); } -#ifdef HV_DEBUG_INTR -uint32_t hv_intr_count = 0; -#endif uint32_t hv_vmbus_swintr_event_cpu[MAXCPU]; -uint32_t hv_vmbus_intr_cpu[MAXCPU]; +u_long *hv_vmbus_intr_cpu[MAXCPU]; void hv_vector_handler(struct trapframe *trap_frame) { -#ifdef HV_DEBUG_INTR int cpu; -#endif /* * Disable preemption. */ critical_enter(); -#ifdef HV_DEBUG_INTR /* * Do a little interrupt counting. */ cpu = PCPU_GET(cpuid); - hv_vmbus_intr_cpu[cpu]++; - hv_intr_count++; -#endif + (*hv_vmbus_intr_cpu[cpu])++; - hv_vmbus_isr(NULL); + hv_vmbus_isr(trap_frame); /* * Enable preemption. @@ -350,25 +369,15 @@ hv_vmbus_child_device_unregister(struct hv_device *child_dev) return(ret); } -static void -vmbus_identify(driver_t *driver, device_t parent) -{ - if (!hv_vmbus_query_hypervisor_presence()) - return; - - vm_guest = VM_GUEST_HV; - - BUS_ADD_CHILD(parent, 0, "vmbus", 0); -} - static int vmbus_probe(device_t dev) { - if(bootverbose) - device_printf(dev, "VMBUS: probe\n"); + if (ACPI_ID_PROBE(device_get_parent(dev), dev, vmbus_ids) == NULL || + device_get_unit(dev) != 0) + return (ENXIO); device_set_desc(dev, "Vmbus Devices"); - return (BUS_PROBE_NOWILDCARD); + return (BUS_PROBE_DEFAULT); } #ifdef HYPERV @@ -462,6 +471,7 @@ static int vmbus_bus_init(void) { int i, j, n, ret; + char buf[MAXCOMLEN + 1]; if (vmbus_inited) return (0); @@ -498,13 +508,15 @@ vmbus_bus_init(void) setup_args.vector = hv_vmbus_g_context.hv_cb_vector; CPU_FOREACH(j) { - hv_vmbus_intr_cpu[j] = 0; hv_vmbus_swintr_event_cpu[j] = 0; hv_vmbus_g_context.hv_event_intr_event[j] = NULL; hv_vmbus_g_context.hv_msg_intr_event[j] = NULL; hv_vmbus_g_context.event_swintr[j] = NULL; hv_vmbus_g_context.msg_swintr[j] = NULL; + snprintf(buf, sizeof(buf), "cpu%d:hyperv", j); + intrcnt_add(buf, &hv_vmbus_intr_cpu[j]); + for (i = 0; i < 2; i++) setup_args.page_buffers[2 * j + i] = NULL; } @@ -723,7 +735,6 @@ vmbus_modevent(module_t mod, int what, void *arg) static device_method_t vmbus_methods[] = { /** Device interface */ - DEVMETHOD(device_identify, vmbus_identify), DEVMETHOD(device_probe, vmbus_probe), DEVMETHOD(device_attach, vmbus_attach), DEVMETHOD(device_detach, vmbus_detach), @@ -745,8 +756,9 @@ static driver_t vmbus_driver = { driver_name, vmbus_methods,0, }; devclass_t vmbus_devclass; -DRIVER_MODULE(vmbus, nexus, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); -MODULE_VERSION(vmbus,1); +DRIVER_MODULE(vmbus, acpi, vmbus_driver, vmbus_devclass, vmbus_modevent, 0); +MODULE_DEPEND(vmbus, acpi, 1, 1, 1); +MODULE_VERSION(vmbus, 1); /* We want to be started after SMP is initialized */ SYSINIT(vmb_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, vmbus_init, NULL); diff --git a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h index 0503d06..74fe824 100644 --- a/sys/dev/hyperv/vmbus/hv_vmbus_priv.h +++ b/sys/dev/hyperv/vmbus/hv_vmbus_priv.h @@ -359,11 +359,6 @@ typedef struct { struct sema control_sema; } hv_vmbus_connection; -/* - * Declare the MSR used to identify the guest OS - */ -#define HV_X64_MSR_GUEST_OS_ID 0x40000000 - typedef union { uint64_t as_uint64_t; struct { @@ -380,10 +375,6 @@ typedef union { } u; } hv_vmbus_x64_msr_guest_os_id_contents; -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor - */ -#define HV_X64_MSR_HYPERCALL 0x40000001 typedef union { uint64_t as_uint64_t; @@ -513,6 +504,22 @@ typedef union { } hv_vmbus_synic_sint; /* + * Timer configuration register. + */ +union hv_timer_config { + uint64_t as_uint64; + struct { + uint64_t enable:1; + uint64_t periodic:1; + uint64_t lazy:1; + uint64_t auto_enable:1; + uint64_t reserved_z0:12; + uint64_t sintx:4; + uint64_t reserved_z1:44; + }; +}; + +/* * Define syn_ic control register */ typedef union _hv_vmbus_synic_scontrol { @@ -542,8 +549,21 @@ typedef union { uint32_t flags32[HV_EVENT_FLAGS_DWORD_COUNT]; } hv_vmbus_synic_event_flags; +#define HV_X64_CPUID_MIN (0x40000005) +#define HV_X64_CPUID_MAX (0x4000ffff) + +/* + * Declare the MSR used to identify the guest OS + */ +#define HV_X64_MSR_GUEST_OS_ID (0x40000000) +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor + */ +#define HV_X64_MSR_HYPERCALL (0x40000001) /* MSR used to provide vcpu index */ -#define HV_X64_MSR_VP_INDEX (0x40000002) +#define HV_X64_MSR_VP_INDEX (0x40000002) + +#define HV_X64_MSR_TIME_REF_COUNT (0x40000020) /* * Define synthetic interrupt controller model specific registers @@ -572,6 +592,18 @@ typedef union { #define HV_X64_MSR_SINT15 (0x4000009F) /* + * Synthetic Timer MSRs. Four timers per vcpu. + */ +#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 + +/* * Declare the various hypercall operations */ typedef enum { @@ -678,6 +710,11 @@ int hv_vmbus_post_message(void *buffer, size_t buf_size); int hv_vmbus_set_event(hv_vmbus_channel *channel); void hv_vmbus_on_events(void *); +/** + * Event Timer interfaces + */ +void hv_et_init(void); +void hv_et_intr(struct trapframe*); /* * The guest OS needs to register the guest ID with the hypervisor. diff --git a/sys/dev/nvd/nvd.c b/sys/dev/nvd/nvd.c index 24ee075..e062f57 100644 --- a/sys/dev/nvd/nvd.c +++ b/sys/dev/nvd/nvd.c @@ -1,5 +1,5 @@ /*- - * Copyright (C) 2012-2013 Intel Corporation + * Copyright (C) 2012-2016 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/module.h> +#include <sys/sysctl.h> #include <sys/systm.h> #include <sys/taskqueue.h> @@ -88,6 +89,19 @@ struct nvd_controller { static TAILQ_HEAD(, nvd_controller) ctrlr_head; static TAILQ_HEAD(disk_list, nvd_disk) disk_head; +static SYSCTL_NODE(_hw, OID_AUTO, nvd, CTLFLAG_RD, 0, "nvd driver parameters"); +/* + * The NVMe specification does not define a maximum or optimal delete size, so + * technically max delete size is min(full size of the namespace, 2^32 - 1 + * LBAs). A single delete for a multi-TB NVMe namespace though may take much + * longer to complete than the nvme(4) I/O timeout period. So choose a sensible + * default here that is still suitably large to minimize the number of overall + * delete operations. + */ +static uint64_t nvd_delete_max = (1024 * 1024 * 1024); /* 1GB */ +SYSCTL_UQUAD(_hw_nvd, OID_AUTO, delete_max, CTLFLAG_RDTUN, &nvd_delete_max, 0, + "nvd maximum BIO_DELETE size in bytes"); + static int nvd_modevent(module_t mod, int type, void *arg) { int error = 0; @@ -295,6 +309,8 @@ nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg) disk->d_sectorsize = nvme_ns_get_sector_size(ns); disk->d_mediasize = (off_t)nvme_ns_get_size(ns); disk->d_delmaxsize = (off_t)nvme_ns_get_size(ns); + if (disk->d_delmaxsize > nvd_delete_max) + disk->d_delmaxsize = nvd_delete_max; disk->d_stripesize = nvme_ns_get_optimal_sector_size(ns); if (TAILQ_EMPTY(&disk_head)) diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 86f2a54..991c52f 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -725,15 +725,17 @@ nvme_ctrlr_start(void *ctrlr_arg) * explicit specify how many queues it will use. This value should * never change between resets, so panic if somehow that does happen. */ - old_num_io_queues = ctrlr->num_io_queues; - if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) { - nvme_ctrlr_fail(ctrlr); - return; - } + if (ctrlr->is_resetting) { + old_num_io_queues = ctrlr->num_io_queues; + if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) { + nvme_ctrlr_fail(ctrlr); + return; + } - if (old_num_io_queues != ctrlr->num_io_queues) { - panic("num_io_queues changed from %u to %u", old_num_io_queues, - ctrlr->num_io_queues); + if (old_num_io_queues != ctrlr->num_io_queues) { + panic("num_io_queues changed from %u to %u", + old_num_io_queues, ctrlr->num_io_queues); + } } if (nvme_ctrlr_create_qpairs(ctrlr) != 0) { diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c index f5dc3b2..eea65d2 100644 --- a/sys/fs/ext2fs/ext2_vnops.c +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -985,10 +985,10 @@ abortit: dp = VTOI(fdvp); } else { /* - * From name has disappeared. + * From name has disappeared. IN_RENAME is not sufficient + * to protect against directory races due to timing windows, + * so we can't panic here. */ - if (doingdirectory) - panic("ext2_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } @@ -1003,8 +1003,11 @@ abortit: * rename. */ if (xp != ip) { - if (doingdirectory) - panic("ext2_rename: lost dir entry"); + /* + * From name resolves to a different inode. IN_RENAME is + * not sufficient protection against timing window races + * so we can't panic here. + */ } else { /* * If the source is a directory with a diff --git a/sys/modules/hyperv/vmbus/Makefile b/sys/modules/hyperv/vmbus/Makefile index 11228cd..637157b 100644 --- a/sys/modules/hyperv/vmbus/Makefile +++ b/sys/modules/hyperv/vmbus/Makefile @@ -7,11 +7,12 @@ KMOD= hv_vmbus SRCS= hv_channel.c \ hv_channel_mgmt.c \ hv_connection.c \ + hv_et.c \ hv_hv.c \ hv_ring_buffer.c \ hv_vmbus_drv_freebsd.c \ hv_vmbus_priv.h -SRCS+= bus_if.h device_if.h +SRCS+= acpi_if.h bus_if.h device_if.h opt_acpi.h CFLAGS+= -I${.CURDIR}/../../../dev/hyperv/include \ -I${.CURDIR}/../../../dev/hyperv/vmbus \ diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index 1544509..59c8783 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -1251,6 +1251,8 @@ identify_hypervisor(void) hv_vendor[12] = '\0'; if (strcmp(hv_vendor, "VMwareVMware") == 0) vm_guest = VM_GUEST_VMWARE; + else if (strcmp(hv_vendor, "Microsoft Hv") == 0) + vm_guest = VM_GUEST_HV; } return; } |