summaryrefslogtreecommitdiffstats
path: root/sys/dev/hyperv/netvsc/if_hn.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/hyperv/netvsc/if_hn.c')
-rw-r--r--sys/dev/hyperv/netvsc/if_hn.c1238
1 files changed, 986 insertions, 252 deletions
diff --git a/sys/dev/hyperv/netvsc/if_hn.c b/sys/dev/hyperv/netvsc/if_hn.c
index 1731c97..8750ab0 100644
--- a/sys/dev/hyperv/netvsc/if_hn.c
+++ b/sys/dev/hyperv/netvsc/if_hn.c
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet6.h"
#include "opt_inet.h"
+#include "opt_hn.h"
#include <sys/param.h>
#include <sys/bus.h>
@@ -152,7 +153,11 @@ __FBSDID("$FreeBSD$");
sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
#define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
#define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
-#define HN_LOCK(sc) sx_xlock(&(sc)->hn_lock)
+#define HN_LOCK(sc) \
+do { \
+ while (sx_try_xlock(&(sc)->hn_lock) == 0) \
+ DELAY(1000); \
+} while (0)
#define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
#define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
@@ -162,10 +167,24 @@ __FBSDID("$FreeBSD$");
#define HN_CSUM_IP6_HWASSIST(sc) \
((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
+#define HN_PKTSIZE_MIN(align) \
+ roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
+ HN_RNDIS_PKT_LEN, (align))
+#define HN_PKTSIZE(m, align) \
+ roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
+
+#define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus)
+
struct hn_txdesc {
#ifndef HN_USE_TXDESC_BUFRING
SLIST_ENTRY(hn_txdesc) link;
#endif
+ STAILQ_ENTRY(hn_txdesc) agg_link;
+
+ /* Aggregated txdescs, in sending order. */
+ STAILQ_HEAD(, hn_txdesc) agg_list;
+
+ /* The oldest packet, if transmission aggregation happens. */
struct mbuf *m;
struct hn_tx_ring *txr;
int refs;
@@ -183,6 +202,7 @@ struct hn_txdesc {
#define HN_TXD_FLAG_ONLIST 0x0001
#define HN_TXD_FLAG_DMAMAP 0x0002
+#define HN_TXD_FLAG_ONAGG 0x0004
struct hn_rxinfo {
uint32_t vlan_info;
@@ -262,6 +282,11 @@ static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
static void hn_stop(struct hn_softc *);
static void hn_init_locked(struct hn_softc *);
@@ -278,6 +303,7 @@ static int hn_synth_attach(struct hn_softc *, int);
static void hn_synth_detach(struct hn_softc *);
static int hn_synth_alloc_subchans(struct hn_softc *,
int *);
+static bool hn_synth_attachable(const struct hn_softc *);
static void hn_suspend(struct hn_softc *);
static void hn_suspend_data(struct hn_softc *);
static void hn_suspend_mgmt(struct hn_softc *);
@@ -285,7 +311,10 @@ static void hn_resume(struct hn_softc *);
static void hn_resume_data(struct hn_softc *);
static void hn_resume_mgmt(struct hn_softc *);
static void hn_suspend_mgmt_taskfunc(void *, int);
-static void hn_chan_drain(struct vmbus_channel *);
+static void hn_chan_drain(struct hn_softc *,
+ struct vmbus_channel *);
+static void hn_polling(struct hn_softc *, u_int);
+static void hn_chan_polling(struct vmbus_channel *, u_int);
static void hn_update_link_status(struct hn_softc *);
static void hn_change_network(struct hn_softc *);
@@ -297,9 +326,10 @@ static void hn_link_status(struct hn_softc *);
static int hn_create_rx_data(struct hn_softc *, int);
static void hn_destroy_rx_data(struct hn_softc *);
static int hn_check_iplen(const struct mbuf *, int);
-static int hn_set_rxfilter(struct hn_softc *);
+static int hn_set_rxfilter(struct hn_softc *, uint32_t);
+static int hn_rxfilter_config(struct hn_softc *);
static int hn_rss_reconfig(struct hn_softc *);
-static void hn_rss_ind_fixup(struct hn_softc *, int);
+static void hn_rss_ind_fixup(struct hn_softc *);
static int hn_rxpkt(struct hn_rx_ring *, const void *,
int, const struct hn_rxinfo *);
@@ -309,7 +339,9 @@ static int hn_create_tx_data(struct hn_softc *, int);
static void hn_fixup_tx_data(struct hn_softc *);
static void hn_destroy_tx_data(struct hn_softc *);
static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
-static int hn_encap(struct hn_tx_ring *,
+static void hn_txdesc_gc(struct hn_tx_ring *,
+ struct hn_txdesc *);
+static int hn_encap(struct ifnet *, struct hn_tx_ring *,
struct hn_txdesc *, struct mbuf **);
static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
struct hn_txdesc *);
@@ -318,6 +350,10 @@ static void hn_set_tso_maxsize(struct hn_softc *, int, int);
static bool hn_tx_ring_pending(struct hn_tx_ring *);
static void hn_tx_ring_qflush(struct hn_tx_ring *);
static void hn_resume_tx(struct hn_softc *, int);
+static void hn_set_txagg(struct hn_softc *);
+static void *hn_try_txagg(struct ifnet *,
+ struct hn_tx_ring *, struct hn_txdesc *,
+ int);
static int hn_get_txswq_depth(const struct hn_tx_ring *);
static void hn_txpkt_done(struct hn_nvs_sendctx *,
struct hn_softc *, struct vmbus_channel *,
@@ -385,10 +421,18 @@ SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
#endif
#endif
-/* Use shared TX taskqueue */
-static int hn_share_tx_taskq = 0;
-SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
- &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
+static int hn_tx_taskq_cnt = 1;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
+ &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
+
+#define HN_TX_TASKQ_M_INDEP 0
+#define HN_TX_TASKQ_M_GLOBAL 1
+#define HN_TX_TASKQ_M_EVTTQ 2
+
+static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
+ &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
+ "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
#ifndef HN_USE_TXDESC_BUFRING
static int hn_use_txdesc_bufring = 0;
@@ -398,11 +442,6 @@ static int hn_use_txdesc_bufring = 1;
SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
&hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
-/* Bind TX taskqueue to the target CPU */
-static int hn_bind_tx_taskq = -1;
-SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
- &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
-
#ifdef HN_IFSTART_SUPPORT
/* Use ifnet.if_start instead of ifnet.if_transmit */
static int hn_use_if_start = 0;
@@ -433,8 +472,18 @@ SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
&hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
#endif
+/* Packet transmission aggregation size limit */
+static int hn_tx_agg_size = -1;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
+ &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
+
+/* Packet transmission aggregation count limit */
+static int hn_tx_agg_pkts = -1;
+SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
+ &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
+
static u_int hn_cpu_index; /* next CPU for channel */
-static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */
+static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */
static const uint8_t
hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
@@ -472,7 +521,7 @@ hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
{
int i;
- for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
}
#endif
@@ -625,11 +674,25 @@ do { \
#endif /* INET6 || INET */
static int
-hn_set_rxfilter(struct hn_softc *sc)
+hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
+{
+ int error = 0;
+
+ HN_LOCK_ASSERT(sc);
+
+ if (sc->hn_rx_filter != filter) {
+ error = hn_rndis_set_rxfilter(sc, filter);
+ if (!error)
+ sc->hn_rx_filter = filter;
+ }
+ return (error);
+}
+
+static int
+hn_rxfilter_config(struct hn_softc *sc)
{
struct ifnet *ifp = sc->hn_ifp;
uint32_t filter;
- int error = 0;
HN_LOCK_ASSERT(sc);
@@ -639,26 +702,90 @@ hn_set_rxfilter(struct hn_softc *sc)
filter = NDIS_PACKET_TYPE_DIRECTED;
if (ifp->if_flags & IFF_BROADCAST)
filter |= NDIS_PACKET_TYPE_BROADCAST;
-#ifdef notyet
- /*
- * See the comment in SIOCADDMULTI/SIOCDELMULTI.
- */
/* TODO: support multicast list */
if ((ifp->if_flags & IFF_ALLMULTI) ||
!TAILQ_EMPTY(&ifp->if_multiaddrs))
filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
-#else
- /* Always enable ALLMULTI */
- filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
-#endif
}
+ return (hn_set_rxfilter(sc, filter));
+}
- if (sc->hn_rx_filter != filter) {
- error = hn_rndis_set_rxfilter(sc, filter);
- if (!error)
- sc->hn_rx_filter = filter;
+static void
+hn_set_txagg(struct hn_softc *sc)
+{
+ uint32_t size, pkts;
+ int i;
+
+ /*
+ * Setup aggregation size.
+ */
+ if (sc->hn_agg_size < 0)
+ size = UINT32_MAX;
+ else
+ size = sc->hn_agg_size;
+
+ if (sc->hn_rndis_agg_size < size)
+ size = sc->hn_rndis_agg_size;
+
+ /* NOTE: We only aggregate packets using chimney sending buffers. */
+ if (size > (uint32_t)sc->hn_chim_szmax)
+ size = sc->hn_chim_szmax;
+
+ if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
+ /* Disable */
+ size = 0;
+ pkts = 0;
+ goto done;
+ }
+
+ /* NOTE: Type of the per TX ring setting is 'int'. */
+ if (size > INT_MAX)
+ size = INT_MAX;
+
+ /*
+ * Setup aggregation packet count.
+ */
+ if (sc->hn_agg_pkts < 0)
+ pkts = UINT32_MAX;
+ else
+ pkts = sc->hn_agg_pkts;
+
+ if (sc->hn_rndis_agg_pkts < pkts)
+ pkts = sc->hn_rndis_agg_pkts;
+
+ if (pkts <= 1) {
+ /* Disable */
+ size = 0;
+ pkts = 0;
+ goto done;
+ }
+
+ /* NOTE: Type of the per TX ring setting is 'short'. */
+ if (pkts > SHRT_MAX)
+ pkts = SHRT_MAX;
+
+done:
+ /* NOTE: Type of the per TX ring setting is 'short'. */
+ if (sc->hn_rndis_agg_align > SHRT_MAX) {
+ /* Disable */
+ size = 0;
+ pkts = 0;
+ }
+
+ if (bootverbose) {
+ if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
+ size, pkts, sc->hn_rndis_agg_align);
+ }
+
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
+ struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+
+ mtx_lock(&txr->hn_tx_lock);
+ txr->hn_agg_szmax = size;
+ txr->hn_agg_pktmax = pkts;
+ txr->hn_agg_align = sc->hn_rndis_agg_align;
+ mtx_unlock(&txr->hn_tx_lock);
}
- return (error);
}
static int
@@ -711,11 +838,12 @@ hn_rss_reconfig(struct hn_softc *sc)
}
static void
-hn_rss_ind_fixup(struct hn_softc *sc, int nchan)
+hn_rss_ind_fixup(struct hn_softc *sc)
{
struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
- int i;
+ int i, nchan;
+ nchan = sc->hn_rx_ring_inuse;
KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
/*
@@ -773,19 +901,6 @@ hn_probe(device_t dev)
return ENXIO;
}
-static void
-hn_cpuset_setthread_task(void *xmask, int pending __unused)
-{
- cpuset_t *mask = xmask;
- int error;
-
- error = cpuset_setthread(curthread->td_tid, mask);
- if (error) {
- panic("curthread=%ju: can't pin; error=%d",
- (uintmax_t)curthread->td_tid, error);
- }
-}
-
static int
hn_attach(device_t dev)
{
@@ -801,28 +916,29 @@ hn_attach(device_t dev)
HN_LOCK_INIT(sc);
/*
+ * Initialize these tunables once.
+ */
+ sc->hn_agg_size = hn_tx_agg_size;
+ sc->hn_agg_pkts = hn_tx_agg_pkts;
+
+ /*
* Setup taskqueue for transmission.
*/
- if (hn_tx_taskq == NULL) {
- sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
- taskqueue_thread_enqueue, &sc->hn_tx_taskq);
- taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx",
- device_get_nameunit(dev));
- if (hn_bind_tx_taskq >= 0) {
- int cpu = hn_bind_tx_taskq;
- struct task cpuset_task;
- cpuset_t cpu_set;
-
- if (cpu > mp_ncpus - 1)
- cpu = mp_ncpus - 1;
- CPU_SETOF(cpu, &cpu_set);
- TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task,
- &cpu_set);
- taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task);
- taskqueue_drain(sc->hn_tx_taskq, &cpuset_task);
+ if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
+ int i;
+
+ sc->hn_tx_taskqs =
+ malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
+ M_DEVBUF, M_WAITOK);
+ for (i = 0; i < hn_tx_taskq_cnt; ++i) {
+ sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
+ M_WAITOK, taskqueue_thread_enqueue,
+ &sc->hn_tx_taskqs[i]);
+ taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
+ "%s tx%d", device_get_nameunit(dev), i);
}
- } else {
- sc->hn_tx_taskq = hn_tx_taskq;
+ } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
+ sc->hn_tx_taskqs = hn_tx_taskque;
}
/*
@@ -900,8 +1016,25 @@ hn_attach(device_t dev)
*/
sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
- if (sc->hn_xact == NULL)
+ if (sc->hn_xact == NULL) {
+ error = ENXIO;
+ goto failed;
+ }
+
+ /*
+ * Install orphan handler for the revocation of this device's
+ * primary channel.
+ *
+ * NOTE:
+ * The processing order is critical here:
+ * Install the orphan handler, _before_ testing whether this
+ * device's primary channel has been revoked or not.
+ */
+ vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
+ if (vmbus_chan_is_revoked(sc->hn_prichan)) {
+ error = ENXIO;
goto failed;
+ }
/*
* Attach the synthetic parts, i.e. NVS and RNDIS.
@@ -956,6 +1089,28 @@ hn_attach(device_t dev)
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
hn_rss_ind_sysctl, "IU", "RSS indirect table");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
+ CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
+ "RNDIS offered packet transmission aggregation size limit");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
+ CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
+ "RNDIS offered packet transmission aggregation count limit");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
+ CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
+ "RNDIS packet transmission aggregation alignment");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_txagg_size_sysctl, "I",
+ "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_txagg_pkts_sysctl, "I",
+ "Packet transmission aggregation packets, "
+ "0 -- disable, -1 -- auto");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
+ CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_polling_sysctl, "I",
+ "Polling frequency: [100,1000000], 0 disable polling");
/*
* Setup the ifmedia, which has been initialized earlier.
@@ -1020,6 +1175,13 @@ hn_attach(device_t dev)
/* Enable all available capabilities by default. */
ifp->if_capenable = ifp->if_capabilities;
+ /*
+ * Disable IPv6 TSO and TXCSUM by default, they still can
+ * be enabled through SIOCSIFCAP.
+ */
+ ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
+ ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
+
if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
@@ -1056,6 +1218,14 @@ hn_detach(device_t dev)
struct hn_softc *sc = device_get_softc(dev);
struct ifnet *ifp = sc->hn_ifp;
+ if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
+ /*
+ * In case that the vmbus missed the orphan handler
+ * installation.
+ */
+ vmbus_xact_ctx_orphan(sc->hn_xact);
+ }
+
if (device_is_attached(dev)) {
HN_LOCK(sc);
if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
@@ -1077,12 +1247,23 @@ hn_detach(device_t dev)
hn_destroy_rx_data(sc);
hn_destroy_tx_data(sc);
- if (sc->hn_tx_taskq != hn_tx_taskq)
- taskqueue_free(sc->hn_tx_taskq);
+ if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
+ int i;
+
+ for (i = 0; i < hn_tx_taskq_cnt; ++i)
+ taskqueue_free(sc->hn_tx_taskqs[i]);
+ free(sc->hn_tx_taskqs, M_DEVBUF);
+ }
taskqueue_free(sc->hn_mgmt_taskq0);
- if (sc->hn_xact != NULL)
+ if (sc->hn_xact != NULL) {
+ /*
+ * Uninstall the orphan handler _before_ the xact is
+ * destructed.
+ */
+ vmbus_chan_unset_orphan(sc->hn_prichan);
vmbus_xact_ctx_destroy(sc->hn_xact);
+ }
if_free(ifp);
@@ -1211,16 +1392,45 @@ hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
("put an onlist txd %#x", txd->flags));
+ KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+ ("put an onagg txd %#x", txd->flags));
KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
if (atomic_fetchadd_int(&txd->refs, -1) != 1)
return 0;
+ if (!STAILQ_EMPTY(&txd->agg_list)) {
+ struct hn_txdesc *tmp_txd;
+
+ while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
+ int freed;
+
+ KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
+ ("resursive aggregation on aggregated txdesc"));
+ KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
+ ("not aggregated txdesc"));
+ KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
+ ("aggregated txdesc uses dmamap"));
+ KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
+ ("aggregated txdesc consumes "
+ "chimney sending buffer"));
+ KASSERT(tmp_txd->chim_size == 0,
+ ("aggregated txdesc has non-zero "
+ "chimney sending size"));
+
+ STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
+ tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
+ freed = hn_txdesc_put(txr, tmp_txd);
+ KASSERT(freed, ("failed to free aggregated txdesc"));
+ }
+ }
+
if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
("chim txd uses dmamap"));
hn_chim_free(txr->hn_sc, txd->chim_index);
txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
+ txd->chim_size = 0;
} else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
bus_dmamap_sync(txr->hn_tx_data_dtag,
txd->data_dmap, BUS_DMASYNC_POSTWRITE);
@@ -1243,10 +1453,12 @@ hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
txr->hn_txdesc_avail++;
SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
mtx_unlock_spin(&txr->hn_txlist_spin);
-#else
+#else /* HN_USE_TXDESC_BUFRING */
+#ifdef HN_DEBUG
atomic_add_int(&txr->hn_txdesc_avail, 1);
- buf_ring_enqueue(txr->hn_txdesc_br, txd);
#endif
+ buf_ring_enqueue(txr->hn_txdesc_br, txd);
+#endif /* !HN_USE_TXDESC_BUFRING */
return 1;
}
@@ -1272,11 +1484,16 @@ hn_txdesc_get(struct hn_tx_ring *txr)
if (txd != NULL) {
#ifdef HN_USE_TXDESC_BUFRING
+#ifdef HN_DEBUG
atomic_subtract_int(&txr->hn_txdesc_avail, 1);
#endif
+#endif /* HN_USE_TXDESC_BUFRING */
KASSERT(txd->m == NULL && txd->refs == 0 &&
+ STAILQ_EMPTY(&txd->agg_list) &&
txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
+ txd->chim_size == 0 &&
(txd->flags & HN_TXD_FLAG_ONLIST) &&
+ (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
(txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
txd->flags &= ~HN_TXD_FLAG_ONLIST;
txd->refs = 1;
@@ -1289,10 +1506,26 @@ hn_txdesc_hold(struct hn_txdesc *txd)
{
/* 0->1 transition will never work */
- KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs));
+ KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
atomic_add_int(&txd->refs, 1);
}
+static __inline void
+hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
+{
+
+ KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+ ("recursive aggregation on aggregating txdesc"));
+
+ KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
+ ("already aggregated"));
+ KASSERT(STAILQ_EMPTY(&txd->agg_list),
+ ("recursive aggregation on to-be-aggregated txdesc"));
+
+ txd->flags |= HN_TXD_FLAG_ONAGG;
+ STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
+}
+
static bool
hn_tx_ring_pending(struct hn_tx_ring *txr)
{
@@ -1327,7 +1560,7 @@ hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
txr = txd->txr;
KASSERT(txr->hn_chan == chan,
("channel mismatch, on chan%u, should be chan%u",
- vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan)));
+ vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
txr->hn_has_txeof = 1;
hn_txdesc_put(txr, txd);
@@ -1410,12 +1643,123 @@ hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
return (pi->rm_data);
}
+static __inline int
+hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
+{
+ struct hn_txdesc *txd;
+ struct mbuf *m;
+ int error, pkts;
+
+ txd = txr->hn_agg_txd;
+ KASSERT(txd != NULL, ("no aggregate txdesc"));
+
+ /*
+ * Since hn_txpkt() will reset this temporary stat, save
+ * it now, so that oerrors can be updated properly, if
+ * hn_txpkt() ever fails.
+ */
+ pkts = txr->hn_stat_pkts;
+
+ /*
+ * Since txd's mbuf will _not_ be freed upon hn_txpkt()
+ * failure, save it for later freeing, if hn_txpkt() ever
+ * fails.
+ */
+ m = txd->m;
+ error = hn_txpkt(ifp, txr, txd);
+ if (__predict_false(error)) {
+ /* txd is freed, but m is not. */
+ m_freem(m);
+
+ txr->hn_flush_failed++;
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
+ }
+
+ /* Reset all aggregation states. */
+ txr->hn_agg_txd = NULL;
+ txr->hn_agg_szleft = 0;
+ txr->hn_agg_pktleft = 0;
+ txr->hn_agg_prevpkt = NULL;
+
+ return (error);
+}
+
+static void *
+hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
+ int pktsize)
+{
+ void *chim;
+
+ if (txr->hn_agg_txd != NULL) {
+ if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
+ struct hn_txdesc *agg_txd = txr->hn_agg_txd;
+ struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
+ int olen;
+
+ /*
+ * Update the previous RNDIS packet's total length,
+ * it can be increased due to the mandatory alignment
+ * padding for this RNDIS packet. And update the
+ * aggregating txdesc's chimney sending buffer size
+ * accordingly.
+ *
+ * XXX
+ * Zero-out the padding, as required by the RNDIS spec.
+ */
+ olen = pkt->rm_len;
+ pkt->rm_len = roundup2(olen, txr->hn_agg_align);
+ agg_txd->chim_size += pkt->rm_len - olen;
+
+ /* Link this txdesc to the parent. */
+ hn_txdesc_agg(agg_txd, txd);
+
+ chim = (uint8_t *)pkt + pkt->rm_len;
+ /* Save the current packet for later fixup. */
+ txr->hn_agg_prevpkt = chim;
+
+ txr->hn_agg_pktleft--;
+ txr->hn_agg_szleft -= pktsize;
+ if (txr->hn_agg_szleft <=
+ HN_PKTSIZE_MIN(txr->hn_agg_align)) {
+ /*
+ * Probably can't aggregate more packets,
+ * flush this aggregating txdesc proactively.
+ */
+ txr->hn_agg_pktleft = 0;
+ }
+ /* Done! */
+ return (chim);
+ }
+ hn_flush_txagg(ifp, txr);
+ }
+ KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
+
+ txr->hn_tx_chimney_tried++;
+ txd->chim_index = hn_chim_alloc(txr->hn_sc);
+ if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
+ return (NULL);
+ txr->hn_tx_chimney++;
+
+ chim = txr->hn_sc->hn_chim +
+ (txd->chim_index * txr->hn_sc->hn_chim_szmax);
+
+ if (txr->hn_agg_pktmax > 1 &&
+ txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
+ txr->hn_agg_txd = txd;
+ txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
+ txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
+ txr->hn_agg_prevpkt = chim;
+ }
+ return (chim);
+}
+
/*
* NOTE:
* If this function fails, then both txd and m_head0 will be freed.
*/
static int
-hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
+hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
+ struct mbuf **m_head0)
{
bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
int error, nsegs, i;
@@ -1423,33 +1767,30 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
struct rndis_packet_msg *pkt;
uint32_t *pi_data;
void *chim = NULL;
- int pktlen;
+ int pkt_hlen, pkt_size;
pkt = txd->rndis_pkt;
- if (m_head->m_pkthdr.len + HN_RNDIS_PKT_LEN < txr->hn_chim_size) {
- /*
- * This packet is small enough to fit into a chimney sending
- * buffer. Try allocating one chimney sending buffer now.
- */
- txr->hn_tx_chimney_tried++;
- txd->chim_index = hn_chim_alloc(txr->hn_sc);
- if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
- chim = txr->hn_sc->hn_chim +
- (txd->chim_index * txr->hn_sc->hn_chim_szmax);
- /*
- * Directly fill the chimney sending buffer w/ the
- * RNDIS packet message.
- */
+ pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
+ if (pkt_size < txr->hn_chim_size) {
+ chim = hn_try_txagg(ifp, txr, txd, pkt_size);
+ if (chim != NULL)
pkt = chim;
- }
+ } else {
+ if (txr->hn_agg_txd != NULL)
+ hn_flush_txagg(ifp, txr);
}
pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len;
pkt->rm_dataoffset = sizeof(*pkt);
pkt->rm_datalen = m_head->m_pkthdr.len;
+ pkt->rm_oobdataoffset = 0;
+ pkt->rm_oobdatalen = 0;
+ pkt->rm_oobdataelements = 0;
pkt->rm_pktinfooffset = sizeof(*pkt);
pkt->rm_pktinfolen = 0;
+ pkt->rm_vchandle = 0;
+ pkt->rm_reserved = 0;
if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
/*
@@ -1510,7 +1851,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
*pi_data |= NDIS_TXCSUM_INFO_UDPCS;
}
- pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
+ pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
/* Convert RNDIS packet message offsets */
pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset);
pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
@@ -1519,25 +1860,36 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
* Fast path: Chimney sending.
*/
if (chim != NULL) {
- KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
- ("chimney buffer is not used"));
- KASSERT(pkt == chim, ("RNDIS pkt not in chimney buffer"));
+ struct hn_txdesc *tgt_txd = txd;
+
+ if (txr->hn_agg_txd != NULL) {
+ tgt_txd = txr->hn_agg_txd;
+#ifdef INVARIANTS
+ *m_head0 = NULL;
+#endif
+ }
+
+ KASSERT(pkt == chim,
+ ("RNDIS pkt not in chimney sending buffer"));
+ KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
+ ("chimney sending buffer is not used"));
+ tgt_txd->chim_size += pkt->rm_len;
m_copydata(m_head, 0, m_head->m_pkthdr.len,
- ((uint8_t *)chim) + pktlen);
+ ((uint8_t *)chim) + pkt_hlen);
- txd->chim_size = pkt->rm_len;
txr->hn_gpa_cnt = 0;
- txr->hn_tx_chimney++;
txr->hn_sendpkt = hn_txpkt_chim;
goto done;
}
+
+ KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
("chimney buffer is used"));
KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
- if (error) {
+ if (__predict_false(error)) {
int freed;
/*
@@ -1551,7 +1903,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
("fail to free txd upon txdma error"));
txr->hn_txdma_failed++;
- if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return error;
}
*m_head0 = m_head;
@@ -1562,7 +1914,7 @@ hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
/* send packet with page buffer */
txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
- txr->hn_gpa[0].gpa_len = pktlen;
+ txr->hn_gpa[0].gpa_len = pkt_hlen;
/*
* Fill the page buffers with mbuf info after the page
@@ -1585,6 +1937,12 @@ done:
/* Set the completion routine */
hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
+ /* Update temporary stats for later use. */
+ txr->hn_stat_pkts++;
+ txr->hn_stat_size += m_head->m_pkthdr.len;
+ if (m_head->m_flags & M_MCAST)
+ txr->hn_stat_mcasts++;
+
return 0;
}
@@ -1596,29 +1954,44 @@ done:
static int
hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
{
- int error, send_failed = 0;
+ int error, send_failed = 0, has_bpf;
again:
- /*
- * Make sure that txd is not freed before ETHER_BPF_MTAP.
- */
- hn_txdesc_hold(txd);
+ has_bpf = bpf_peers_present(ifp->if_bpf);
+ if (has_bpf) {
+ /*
+ * Make sure that this txd and any aggregated txds are not
+ * freed before ETHER_BPF_MTAP.
+ */
+ hn_txdesc_hold(txd);
+ }
error = txr->hn_sendpkt(txr, txd);
if (!error) {
- ETHER_BPF_MTAP(ifp, txd->m);
- if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if (has_bpf) {
+ const struct hn_txdesc *tmp_txd;
+
+ ETHER_BPF_MTAP(ifp, txd->m);
+ STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
+ ETHER_BPF_MTAP(ifp, tmp_txd->m);
+ }
+
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
#ifdef HN_IFSTART_SUPPORT
if (!hn_use_if_start)
#endif
{
if_inc_counter(ifp, IFCOUNTER_OBYTES,
- txd->m->m_pkthdr.len);
- if (txd->m->m_flags & M_MCAST)
- if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ txr->hn_stat_size);
+ if (txr->hn_stat_mcasts != 0) {
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS,
+ txr->hn_stat_mcasts);
+ }
}
- txr->hn_pkts++;
+ txr->hn_pkts += txr->hn_stat_pkts;
+ txr->hn_sends++;
}
- hn_txdesc_put(txr, txd);
+ if (has_bpf)
+ hn_txdesc_put(txr, txd);
if (__predict_false(error)) {
int freed;
@@ -1656,7 +2029,13 @@ again:
txr->hn_send_failed++;
}
- return error;
+
+ /* Reset temporary stats, after this sending is done. */
+ txr->hn_stat_size = 0;
+ txr->hn_stat_pkts = 0;
+ txr->hn_stat_mcasts = 0;
+
+ return (error);
}
/*
@@ -2041,10 +2420,18 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
if (ifp->if_flags & IFF_UP) {
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
- hn_set_rxfilter(sc);
- else
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ /*
+ * Caller meight hold mutex, e.g.
+ * bpf; use busy-wait for the RNDIS
+ * reply.
+ */
+ HN_NO_SLEEPING(sc);
+ hn_rxfilter_config(sc);
+ HN_SLEEPING_OK(sc);
+ } else {
hn_init_locked(sc);
+ }
} else {
if (ifp->if_drv_flags & IFF_DRV_RUNNING)
hn_stop(sc);
@@ -2105,27 +2492,23 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCADDMULTI:
case SIOCDELMULTI:
-#ifdef notyet
- /*
- * XXX
- * Multicast uses mutex, while RNDIS RX filter setting
- * sleeps. We workaround this by always enabling
- * ALLMULTI. ALLMULTI would actually always be on, even
- * if we supported the SIOCADDMULTI/SIOCDELMULTI, since
- * we don't support multicast address list configuration
- * for this driver.
- */
HN_LOCK(sc);
if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
HN_UNLOCK(sc);
break;
}
- if (ifp->if_drv_flags & IFF_DRV_RUNNING)
- hn_set_rxfilter(sc);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ /*
+ * Multicast uses mutex; use busy-wait for
+ * the RNDIS reply.
+ */
+ HN_NO_SLEEPING(sc);
+ hn_rxfilter_config(sc);
+ HN_SLEEPING_OK(sc);
+ }
HN_UNLOCK(sc);
-#endif
break;
case SIOCSIFMEDIA:
@@ -2151,6 +2534,9 @@ hn_stop(struct hn_softc *sc)
KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
("synthetic parts were not attached"));
+ /* Disable polling. */
+ hn_polling(sc, 0);
+
/* Clear RUNNING bit _before_ hn_suspend_data() */
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
hn_suspend_data(sc);
@@ -2176,7 +2562,7 @@ hn_init_locked(struct hn_softc *sc)
return;
/* Configure RX filter */
- hn_set_rxfilter(sc);
+ hn_rxfilter_config(sc);
/* Clear OACTIVE bit. */
atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
@@ -2188,6 +2574,10 @@ hn_init_locked(struct hn_softc *sc)
/* Everything is ready; unleash! */
atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
+
+ /* Re-enable polling if requested. */
+ if (sc->hn_pollhz > 0)
+ hn_polling(sc, sc->hn_pollhz);
}
static void
@@ -2250,7 +2640,7 @@ hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
*/
--ackcnt;
HN_LOCK(sc);
- for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
HN_UNLOCK(sc);
return 0;
@@ -2274,7 +2664,7 @@ hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
return error;
HN_LOCK(sc);
- for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
if (on)
@@ -2342,7 +2732,7 @@ hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
uint64_t stat;
stat = 0;
- for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
stat += *((uint64_t *)((uint8_t *)rxr + ofs));
}
@@ -2352,7 +2742,7 @@ hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
return error;
/* Zero out this stat. */
- for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
rxr = &sc->hn_rx_ring[i];
*((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
}
@@ -2396,7 +2786,7 @@ hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
u_long stat;
stat = 0;
- for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
stat += *((u_long *)((uint8_t *)txr + ofs));
}
@@ -2406,7 +2796,7 @@ hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
return error;
/* Zero out this stat. */
- for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
*((u_long *)((uint8_t *)txr + ofs)) = 0;
}
@@ -2428,7 +2818,7 @@ hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
return error;
HN_LOCK(sc);
- for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
txr = &sc->hn_tx_ring[i];
*((int *)((uint8_t *)txr + ofs)) = conf;
}
@@ -2438,6 +2828,119 @@ hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
}
static int
+hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int error, size;
+
+ size = sc->hn_agg_size;
+ error = sysctl_handle_int(oidp, &size, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ HN_LOCK(sc);
+ sc->hn_agg_size = size;
+ hn_set_txagg(sc);
+ HN_UNLOCK(sc);
+
+ return (0);
+}
+
+static int
+hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int error, pkts;
+
+ pkts = sc->hn_agg_pkts;
+ error = sysctl_handle_int(oidp, &pkts, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ HN_LOCK(sc);
+ sc->hn_agg_pkts = pkts;
+ hn_set_txagg(sc);
+ HN_UNLOCK(sc);
+
+ return (0);
+}
+
+static int
+hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int pkts;
+
+ pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
+ return (sysctl_handle_int(oidp, &pkts, 0, req));
+}
+
+static int
+hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int align;
+
+ align = sc->hn_tx_ring[0].hn_agg_align;
+ return (sysctl_handle_int(oidp, &align, 0, req));
+}
+
+static void
+hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
+{
+ if (pollhz == 0)
+ vmbus_chan_poll_disable(chan);
+ else
+ vmbus_chan_poll_enable(chan, pollhz);
+}
+
+static void
+hn_polling(struct hn_softc *sc, u_int pollhz)
+{
+ int nsubch = sc->hn_rx_ring_inuse - 1;
+
+ HN_LOCK_ASSERT(sc);
+
+ if (nsubch > 0) {
+ struct vmbus_channel **subch;
+ int i;
+
+ subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
+ for (i = 0; i < nsubch; ++i)
+ hn_chan_polling(subch[i], pollhz);
+ vmbus_subchan_rel(subch, nsubch);
+ }
+ hn_chan_polling(sc->hn_prichan, pollhz);
+}
+
+static int
+hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int pollhz, error;
+
+ pollhz = sc->hn_pollhz;
+ error = sysctl_handle_int(oidp, &pollhz, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ if (pollhz != 0 &&
+ (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
+ return (EINVAL);
+
+ HN_LOCK(sc);
+ if (sc->hn_pollhz != pollhz) {
+ sc->hn_pollhz = pollhz;
+ if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
+ (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
+ hn_polling(sc, sc->hn_pollhz);
+ }
+ HN_UNLOCK(sc);
+
+ return (0);
+}
+
+static int
hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
{
struct hn_softc *sc = arg1;
@@ -2546,7 +3049,7 @@ hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
goto back;
sc->hn_flags |= HN_FLAG_HAS_RSSIND;
- hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse);
+ hn_rss_ind_fixup(sc);
error = hn_rss_reconfig(sc);
back:
HN_UNLOCK(sc);
@@ -2853,7 +3356,10 @@ hn_destroy_rx_data(struct hn_softc *sc)
int i;
if (sc->hn_rxbuf != NULL) {
- hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
+ if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
+ hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
+ else
+ device_printf(sc->hn_dev, "RXBUF is referenced\n");
sc->hn_rxbuf = NULL;
}
@@ -2865,7 +3371,12 @@ hn_destroy_rx_data(struct hn_softc *sc)
if (rxr->hn_br == NULL)
continue;
- hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
+ if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
+ hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
+ } else {
+ device_printf(sc->hn_dev,
+ "%dth channel bufring is referenced", i);
+ }
rxr->hn_br = NULL;
#if defined(INET) || defined(INET6)
@@ -2906,7 +3417,12 @@ hn_tx_ring_create(struct hn_softc *sc, int id)
M_WAITOK, &txr->hn_tx_lock);
#endif
- txr->hn_tx_taskq = sc->hn_tx_taskq;
+ if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
+ txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
+ device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
+ } else {
+ txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
+ }
#ifdef HN_IFSTART_SUPPORT
if (hn_use_if_start) {
@@ -2980,6 +3496,7 @@ hn_tx_ring_create(struct hn_softc *sc, int id)
txd->txr = txr;
txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
+ STAILQ_INIT(&txd->agg_list);
/*
* Allocate and load RNDIS packet message.
@@ -3049,9 +3566,11 @@ hn_tx_ring_create(struct hn_softc *sc, int id)
if (txr->hn_tx_sysctl_tree != NULL) {
child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
+#ifdef HN_DEBUG
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
"# of available TX descs");
+#endif
#ifdef HN_IFSTART_SUPPORT
if (!hn_use_if_start)
#endif
@@ -3063,6 +3582,8 @@ hn_tx_ring_create(struct hn_softc *sc, int id)
SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
CTLFLAG_RW, &txr->hn_pkts,
"# of packets transmitted");
+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
+ CTLFLAG_RW, &txr->hn_sends, "# of sends");
}
}
@@ -3084,24 +3605,43 @@ hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
}
static void
+hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
+{
+
+ KASSERT(txd->refs == 0 || txd->refs == 1,
+ ("invalid txd refs %d", txd->refs));
+
+ /* Aggregated txds will be freed by their aggregating txd. */
+ if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
+ int freed;
+
+ freed = hn_txdesc_put(txr, txd);
+ KASSERT(freed, ("can't free txdesc"));
+ }
+}
+
+static void
hn_tx_ring_destroy(struct hn_tx_ring *txr)
{
- struct hn_txdesc *txd;
+ int i;
if (txr->hn_txdesc == NULL)
return;
-#ifndef HN_USE_TXDESC_BUFRING
- while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
- SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
- hn_txdesc_dmamap_destroy(txd);
- }
-#else
- mtx_lock(&txr->hn_tx_lock);
- while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
- hn_txdesc_dmamap_destroy(txd);
- mtx_unlock(&txr->hn_tx_lock);
-#endif
+ /*
+ * NOTE:
+ * Because the freeing of aggregated txds will be deferred
+ * to the aggregating txd, two passes are used here:
+ * - The first pass GCes any pending txds. This GC is necessary,
+ * since if the channels are revoked, hypervisor will not
+ * deliver send-done for all pending txds.
+ * - The second pass frees the busdma stuffs, i.e. after all txds
+ * were freed.
+ */
+ for (i = 0; i < txr->hn_txdesc_cnt; ++i)
+ hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
+ for (i = 0; i < txr->hn_txdesc_cnt; ++i)
+ hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
if (txr->hn_tx_data_dtag != NULL)
bus_dma_tag_destroy(txr->hn_tx_data_dtag);
@@ -3177,6 +3717,11 @@ hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_txdma_failed),
hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
+ CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
+ __offsetof(struct hn_tx_ring, hn_flush_failed),
+ hn_tx_stat_ulong_sysctl, "LU",
+ "# of packet transmission aggregation flush failure");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
__offsetof(struct hn_tx_ring, hn_tx_collapsed),
@@ -3213,6 +3758,17 @@ hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
+ CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
+ "Applied packet transmission aggregation size");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
+ CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+ hn_txagg_pktmax_sysctl, "I",
+ "Applied packet transmission aggregation packets");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
+ CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+ hn_txagg_align_sysctl, "I",
+ "Applied packet transmission aggregation alignment");
return 0;
}
@@ -3222,7 +3778,7 @@ hn_set_chim_size(struct hn_softc *sc, int chim_size)
{
int i;
- for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
+ for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
sc->hn_tx_ring[i].hn_chim_size = chim_size;
}
@@ -3272,12 +3828,10 @@ hn_fixup_tx_data(struct hn_softc *sc)
csum_assist |= CSUM_IP_TCP;
if (sc->hn_caps & HN_CAP_UDP4CS)
csum_assist |= CSUM_IP_UDP;
-#ifdef notyet
if (sc->hn_caps & HN_CAP_TCP6CS)
csum_assist |= CSUM_IP6_TCP;
if (sc->hn_caps & HN_CAP_UDP6CS)
csum_assist |= CSUM_IP6_UDP;
-#endif
for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
@@ -3298,7 +3852,12 @@ hn_destroy_tx_data(struct hn_softc *sc)
int i;
if (sc->hn_chim != NULL) {
- hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
+ if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
+ hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
+ } else {
+ device_printf(sc->hn_dev,
+ "chimney sending buffer is referenced");
+ }
sc->hn_chim = NULL;
}
@@ -3332,18 +3891,20 @@ hn_start_locked(struct hn_tx_ring *txr, int len)
{
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
+ int sched = 0;
KASSERT(hn_use_if_start,
("hn_start_locked is called, when if_start is disabled"));
KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
+ KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
if (__predict_false(txr->hn_suspended))
- return 0;
+ return (0);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
IFF_DRV_RUNNING)
- return 0;
+ return (0);
while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
struct hn_txdesc *txd;
@@ -3361,7 +3922,8 @@ hn_start_locked(struct hn_tx_ring *txr, int len)
* following up packets) to tx taskqueue.
*/
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- return 1;
+ sched = 1;
+ break;
}
#if defined(INET6) || defined(INET)
@@ -3382,21 +3944,50 @@ hn_start_locked(struct hn_tx_ring *txr, int len)
break;
}
- error = hn_encap(txr, txd, &m_head);
+ error = hn_encap(ifp, txr, txd, &m_head);
if (error) {
/* Both txd and m_head are freed */
+ KASSERT(txr->hn_agg_txd == NULL,
+ ("encap failed w/ pending aggregating txdesc"));
continue;
}
- error = hn_txpkt(ifp, txr, txd);
- if (__predict_false(error)) {
- /* txd is freed, but m_head is not */
- IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
- atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
- break;
+ if (txr->hn_agg_pktleft == 0) {
+ if (txr->hn_agg_txd != NULL) {
+ KASSERT(m_head == NULL,
+ ("pending mbuf for aggregating txdesc"));
+ error = hn_flush_txagg(ifp, txr);
+ if (__predict_false(error)) {
+ atomic_set_int(&ifp->if_drv_flags,
+ IFF_DRV_OACTIVE);
+ break;
+ }
+ } else {
+ KASSERT(m_head != NULL, ("mbuf was freed"));
+ error = hn_txpkt(ifp, txr, txd);
+ if (__predict_false(error)) {
+ /* txd is freed, but m_head is not */
+ IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
+ atomic_set_int(&ifp->if_drv_flags,
+ IFF_DRV_OACTIVE);
+ break;
+ }
+ }
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(txr->hn_agg_txd != NULL,
+ ("no aggregating txdesc"));
+ KASSERT(m_head == NULL,
+ ("pending mbuf for aggregating txdesc"));
}
+#endif
}
- return 0;
+
+ /* Flush pending aggerated transmission. */
+ if (txr->hn_agg_txd != NULL)
+ hn_flush_txagg(ifp, txr);
+ return (sched);
}
static void
@@ -3473,18 +4064,20 @@ hn_xmit(struct hn_tx_ring *txr, int len)
struct hn_softc *sc = txr->hn_sc;
struct ifnet *ifp = sc->hn_ifp;
struct mbuf *m_head;
+ int sched = 0;
mtx_assert(&txr->hn_tx_lock, MA_OWNED);
#ifdef HN_IFSTART_SUPPORT
KASSERT(hn_use_if_start == 0,
("hn_xmit is called, when if_start is enabled"));
#endif
+ KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
if (__predict_false(txr->hn_suspended))
- return 0;
+ return (0);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
- return 0;
+ return (0);
while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
struct hn_txdesc *txd;
@@ -3497,7 +4090,8 @@ hn_xmit(struct hn_tx_ring *txr, int len)
* following up packets) to tx taskqueue.
*/
drbr_putback(ifp, txr->hn_mbuf_br, m_head);
- return 1;
+ sched = 1;
+ break;
}
txd = hn_txdesc_get(txr);
@@ -3508,25 +4102,53 @@ hn_xmit(struct hn_tx_ring *txr, int len)
break;
}
- error = hn_encap(txr, txd, &m_head);
+ error = hn_encap(ifp, txr, txd, &m_head);
if (error) {
/* Both txd and m_head are freed; discard */
+ KASSERT(txr->hn_agg_txd == NULL,
+ ("encap failed w/ pending aggregating txdesc"));
drbr_advance(ifp, txr->hn_mbuf_br);
continue;
}
- error = hn_txpkt(ifp, txr, txd);
- if (__predict_false(error)) {
- /* txd is freed, but m_head is not */
- drbr_putback(ifp, txr->hn_mbuf_br, m_head);
- txr->hn_oactive = 1;
- break;
+ if (txr->hn_agg_pktleft == 0) {
+ if (txr->hn_agg_txd != NULL) {
+ KASSERT(m_head == NULL,
+ ("pending mbuf for aggregating txdesc"));
+ error = hn_flush_txagg(ifp, txr);
+ if (__predict_false(error)) {
+ txr->hn_oactive = 1;
+ break;
+ }
+ } else {
+ KASSERT(m_head != NULL, ("mbuf was freed"));
+ error = hn_txpkt(ifp, txr, txd);
+ if (__predict_false(error)) {
+ /* txd is freed, but m_head is not */
+ drbr_putback(ifp, txr->hn_mbuf_br,
+ m_head);
+ txr->hn_oactive = 1;
+ break;
+ }
+ }
+ }
+#ifdef INVARIANTS
+ else {
+ KASSERT(txr->hn_agg_txd != NULL,
+ ("no aggregating txdesc"));
+ KASSERT(m_head == NULL,
+ ("pending mbuf for aggregating txdesc"));
}
+#endif
/* Sent */
drbr_advance(ifp, txr->hn_mbuf_br);
}
- return 0;
+
+ /* Flush pending aggerated transmission. */
+ if (txr->hn_agg_txd != NULL)
+ hn_flush_txagg(ifp, txr);
+ return (sched);
}
static int
@@ -3695,7 +4317,7 @@ hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
}
/* Bind this channel to a proper CPU. */
- vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
+ vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
/*
* Open this channel
@@ -3706,11 +4328,14 @@ hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
cbr.cbr_rxsz = HN_RXBR_SIZE;
error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
if (error) {
- if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
- vmbus_chan_id(chan), error);
- rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
- if (txr != NULL)
- txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
+ if (error == EISCONN) {
+ if_printf(sc->hn_ifp, "bufring is connected after "
+ "chan%u open failure\n", vmbus_chan_id(chan));
+ rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
+ } else {
+ if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
+ vmbus_chan_id(chan), error);
+ }
}
return (error);
}
@@ -3719,7 +4344,7 @@ static void
hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
{
struct hn_rx_ring *rxr;
- int idx;
+ int idx, error;
idx = vmbus_chan_subidx(chan);
@@ -3748,7 +4373,15 @@ hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
* NOTE:
* Channel closing does _not_ destroy the target channel.
*/
- vmbus_chan_close(chan);
+ error = vmbus_chan_close_direct(chan);
+ if (error == EISCONN) {
+ if_printf(sc->hn_ifp, "chan%u bufring is connected "
+ "after being closed\n", vmbus_chan_id(chan));
+ rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
+ } else if (error) {
+ if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
+ vmbus_chan_id(chan), error);
+ }
}
static int
@@ -3758,15 +4391,18 @@ hn_attach_subchans(struct hn_softc *sc)
int subchan_cnt = sc->hn_rx_ring_inuse - 1;
int i, error = 0;
- if (subchan_cnt == 0)
- return (0);
+ KASSERT(subchan_cnt > 0, ("no sub-channels"));
/* Attach the sub-channels. */
subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
for (i = 0; i < subchan_cnt; ++i) {
- error = hn_chan_attach(sc, subchans[i]);
- if (error)
- break;
+ int error1;
+
+ error1 = hn_chan_attach(sc, subchans[i]);
+ if (error1) {
+ error = error1;
+ /* Move on; all channels will be detached later. */
+ }
}
vmbus_subchan_rel(subchans, subchan_cnt);
@@ -3878,16 +4514,39 @@ hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
return (0);
}
+static bool
+hn_synth_attachable(const struct hn_softc *sc)
+{
+ int i;
+
+ if (sc->hn_flags & HN_FLAG_ERRORS)
+ return (false);
+
+ for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
+ const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
+
+ if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
+ return (false);
+ }
+ return (true);
+}
+
static int
hn_synth_attach(struct hn_softc *sc, int mtu)
{
+#define ATTACHED_NVS 0x0002
+#define ATTACHED_RNDIS 0x0004
+
struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
int error, nsubch, nchan, i;
- uint32_t old_caps;
+ uint32_t old_caps, attached = 0;
KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
("synthetic parts were attached"));
+ if (!hn_synth_attachable(sc))
+ return (ENXIO);
+
/* Save capabilities for later verification. */
old_caps = sc->hn_caps;
sc->hn_caps = 0;
@@ -3901,21 +4560,23 @@ hn_synth_attach(struct hn_softc *sc, int mtu)
*/
error = hn_chan_attach(sc, sc->hn_prichan);
if (error)
- return (error);
+ goto failed;
/*
* Attach NVS.
*/
error = hn_nvs_attach(sc, mtu);
if (error)
- return (error);
+ goto failed;
+ attached |= ATTACHED_NVS;
/*
* Attach RNDIS _after_ NVS is attached.
*/
error = hn_rndis_attach(sc, mtu);
if (error)
- return (error);
+ goto failed;
+ attached |= ATTACHED_RNDIS;
/*
* Make sure capabilities are not changed.
@@ -3923,9 +4584,8 @@ hn_synth_attach(struct hn_softc *sc, int mtu)
if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
old_caps, sc->hn_caps);
- /* Restore old capabilities and abort. */
- sc->hn_caps = old_caps;
- return ENXIO;
+ error = ENXIO;
+ goto failed;
}
/*
@@ -3938,19 +4598,34 @@ hn_synth_attach(struct hn_softc *sc, int mtu)
nsubch = sc->hn_rx_ring_cnt - 1;
error = hn_synth_alloc_subchans(sc, &nsubch);
if (error)
- return (error);
+ goto failed;
+ /* NOTE: _Full_ synthetic parts detach is required now. */
+ sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
+ /*
+ * Set the # of TX/RX rings that could be used according to
+ * the # of channels that NVS offered.
+ */
nchan = nsubch + 1;
+ hn_set_ring_inuse(sc, nchan);
if (nchan == 1) {
/* Only the primary channel can be used; done */
goto back;
}
/*
- * Configure RSS key and indirect table _after_ all sub-channels
- * are allocated.
+ * Attach the sub-channels.
+ *
+ * NOTE: hn_set_ring_inuse() _must_ have been called.
*/
+ error = hn_attach_subchans(sc);
+ if (error)
+ goto failed;
+ /*
+ * Configure RSS key and indirect table _after_ all sub-channels
+ * are attached.
+ */
if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
/*
* RSS key is not set yet; set it to the default RSS key.
@@ -3978,34 +4653,38 @@ hn_synth_attach(struct hn_softc *sc, int mtu)
* # of usable channels may be changed, so we have to
* make sure that all entries in RSS indirect table
* are valid.
+ *
+ * NOTE: hn_set_ring_inuse() _must_ have been called.
*/
- hn_rss_ind_fixup(sc, nchan);
+ hn_rss_ind_fixup(sc);
}
error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
- if (error) {
- /*
- * Failed to configure RSS key or indirect table; only
- * the primary channel can be used.
- */
- nchan = 1;
- }
+ if (error)
+ goto failed;
back:
/*
- * Set the # of TX/RX rings that could be used according to
- * the # of channels that NVS offered.
+ * Fixup transmission aggregation setup.
*/
- hn_set_ring_inuse(sc, nchan);
+ hn_set_txagg(sc);
+ return (0);
- /*
- * Attach the sub-channels, if any.
- */
- error = hn_attach_subchans(sc);
- if (error)
- return (error);
+failed:
+ if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
+ hn_synth_detach(sc);
+ } else {
+ if (attached & ATTACHED_RNDIS)
+ hn_rndis_detach(sc);
+ if (attached & ATTACHED_NVS)
+ hn_nvs_detach(sc);
+ hn_chan_detach(sc, sc->hn_prichan);
+ /* Restore old capabilities. */
+ sc->hn_caps = old_caps;
+ }
+ return (error);
- sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
- return (0);
+#undef ATTACHED_RNDIS
+#undef ATTACHED_NVS
}
/*
@@ -4016,7 +4695,6 @@ back:
static void
hn_synth_detach(struct hn_softc *sc)
{
- HN_LOCK_ASSERT(sc);
KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
("synthetic parts were not attached"));
@@ -4052,10 +4730,17 @@ hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
}
static void
-hn_chan_drain(struct vmbus_channel *chan)
+hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
{
- while (!vmbus_chan_rx_empty(chan) || !vmbus_chan_tx_empty(chan))
+ /*
+ * NOTE:
+ * The TX bufring will not be drained by the hypervisor,
+ * if the primary channel is revoked.
+ */
+ while (!vmbus_chan_rx_empty(chan) ||
+ (!vmbus_chan_is_revoked(sc->hn_prichan) &&
+ !vmbus_chan_tx_empty(chan)))
pause("waitch", 1);
vmbus_chan_intr_drain(chan);
}
@@ -4064,6 +4749,7 @@ static void
hn_suspend_data(struct hn_softc *sc)
{
struct vmbus_channel **subch = NULL;
+ struct hn_tx_ring *txr;
int i, nsubch;
HN_LOCK_ASSERT(sc);
@@ -4072,26 +4758,29 @@ hn_suspend_data(struct hn_softc *sc)
* Suspend TX.
*/
for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
- struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
+ txr = &sc->hn_tx_ring[i];
mtx_lock(&txr->hn_tx_lock);
txr->hn_suspended = 1;
mtx_unlock(&txr->hn_tx_lock);
/* No one is able send more packets now. */
- /* Wait for all pending sends to finish. */
- while (hn_tx_ring_pending(txr))
+ /*
+ * Wait for all pending sends to finish.
+ *
+ * NOTE:
+ * We will _not_ receive all pending send-done, if the
+ * primary channel is revoked.
+ */
+ while (hn_tx_ring_pending(txr) &&
+ !vmbus_chan_is_revoked(sc->hn_prichan))
pause("hnwtx", 1 /* 1 tick */);
-
- taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
- taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
}
/*
* Disable RX by clearing RX filter.
*/
- sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
- hn_rndis_set_rxfilter(sc, sc->hn_rx_filter);
+ hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
/*
* Give RNDIS enough time to flush all pending data packets.
@@ -4107,12 +4796,27 @@ hn_suspend_data(struct hn_softc *sc)
if (subch != NULL) {
for (i = 0; i < nsubch; ++i)
- hn_chan_drain(subch[i]);
+ hn_chan_drain(sc, subch[i]);
}
- hn_chan_drain(sc->hn_prichan);
+ hn_chan_drain(sc, sc->hn_prichan);
if (subch != NULL)
vmbus_subchan_rel(subch, nsubch);
+
+ /*
+ * Drain any pending TX tasks.
+ *
+ * NOTE:
+ * The above hn_chan_drain() can dispatch TX tasks, so the TX
+ * tasks will have to be drained _after_ the above hn_chan_drain()
+ * calls.
+ */
+ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
+ txr = &sc->hn_tx_ring[i];
+
+ taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
+ taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
+ }
}
static void
@@ -4148,6 +4852,9 @@ static void
hn_suspend(struct hn_softc *sc)
{
+ /* Disable polling. */
+ hn_polling(sc, 0);
+
if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
hn_suspend_data(sc);
hn_suspend_mgmt(sc);
@@ -4180,7 +4887,7 @@ hn_resume_data(struct hn_softc *sc)
/*
* Re-enable RX.
*/
- hn_set_rxfilter(sc);
+ hn_rxfilter_config(sc);
/*
* Make sure to clear suspend status on "all" TX rings,
@@ -4240,6 +4947,13 @@ hn_resume(struct hn_softc *sc)
if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
hn_resume_data(sc);
hn_resume_mgmt(sc);
+
+ /*
+ * Re-enable polling if this interface is running and
+ * the polling is requested.
+ */
+ if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
+ hn_polling(sc, sc->hn_pollhz);
}
static void
@@ -4758,27 +5472,42 @@ hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
static void
hn_tx_taskq_create(void *arg __unused)
{
+ int i;
+
+ /*
+ * Fix the # of TX taskqueues.
+ */
+ if (hn_tx_taskq_cnt <= 0)
+ hn_tx_taskq_cnt = 1;
+ else if (hn_tx_taskq_cnt > mp_ncpus)
+ hn_tx_taskq_cnt = mp_ncpus;
+
+ /*
+ * Fix the TX taskqueue mode.
+ */
+ switch (hn_tx_taskq_mode) {
+ case HN_TX_TASKQ_M_INDEP:
+ case HN_TX_TASKQ_M_GLOBAL:
+ case HN_TX_TASKQ_M_EVTTQ:
+ break;
+ default:
+ hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
+ break;
+ }
if (vm_guest != VM_GUEST_HV)
return;
- if (!hn_share_tx_taskq)
+ if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
return;
- hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
- taskqueue_thread_enqueue, &hn_tx_taskq);
- taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
- if (hn_bind_tx_taskq >= 0) {
- int cpu = hn_bind_tx_taskq;
- struct task cpuset_task;
- cpuset_t cpu_set;
-
- if (cpu > mp_ncpus - 1)
- cpu = mp_ncpus - 1;
- CPU_SETOF(cpu, &cpu_set);
- TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, &cpu_set);
- taskqueue_enqueue(hn_tx_taskq, &cpuset_task);
- taskqueue_drain(hn_tx_taskq, &cpuset_task);
+ hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
+ M_DEVBUF, M_WAITOK);
+ for (i = 0; i < hn_tx_taskq_cnt; ++i) {
+ hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
+ taskqueue_thread_enqueue, &hn_tx_taskque[i]);
+ taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
+ "hn tx%d", i);
}
}
SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
@@ -4788,8 +5517,13 @@ static void
hn_tx_taskq_destroy(void *arg __unused)
{
- if (hn_tx_taskq != NULL)
- taskqueue_free(hn_tx_taskq);
+ if (hn_tx_taskque != NULL) {
+ int i;
+
+ for (i = 0; i < hn_tx_taskq_cnt; ++i)
+ taskqueue_free(hn_tx_taskque[i]);
+ free(hn_tx_taskque, M_DEVBUF);
+ }
}
SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
hn_tx_taskq_destroy, NULL);
OpenPOWER on IntegriCloud