diff options
author | gnn <gnn@FreeBSD.org> | 2014-05-11 17:18:09 +0000 |
---|---|---|
committer | gnn <gnn@FreeBSD.org> | 2014-05-11 17:18:09 +0000 |
commit | 59b23c23135d2b5631f4bad81adb0c86ac213b7a (patch) | |
tree | 99be57e58ee7334664d3dca78c9b4de48f6462dd /sys/dev | |
parent | b6c03d4ad70a056fb956f7ab6f41ffbbc8e01aa7 (diff) | |
download | FreeBSD-src-59b23c23135d2b5631f4bad81adb0c86ac213b7a.zip FreeBSD-src-59b23c23135d2b5631f4bad81adb0c86ac213b7a.tar.gz |
MFC: 263302, 264461, 264772
263302:
fix mbuf leak if it does not fit in software queue
264461:
Commit various fixes for the SolarFlare drivers, in particular
this set of patches fixes support for systems with > 32 cores.
Details include
sfxge: RXQ index (not label) comes from FW in flush done/failed events
Change the second argument name of the efx_rxq_flush_done_ev_t and
efx_rxq_flush_failed_ev_t prototypes to highlight that RXQ index (not label)
comes from FW in flush done and failed events.
sfxge: TXQ index (not label) comes from FW in flush done event
Change the second argument name of the efx_txq_flush_done_ev_t prototype to
highlight that TXQ index (not label) comes from FW in flush done event.
sfxge: use TXQ type as label to support more than 32 TXQs
There are 3 TXQs in event queue 0 and 1 TXQ (with TCP/UDP checksum offload)
in all other event queues.
264772:
Check that port is started when MAC filter is set
The MAC filter set may be called without softc_lock held in the case of
SIOCADDMULTI and SIOCDELMULTI ioctls. The ioctl handler checks IFF_DRV_RUNNING
flag which implies port started, but it is not guaranteed to remain.
softc_lock shared lock can't be held in the case of these ioctls processing,
since it results in failure where kernel complains that non-sleepable
lock is held in sleeping thread.
Both problems are repeatable on LAG with LACP proto bring up.
Submitted by: Andrew Rybchenko <Andrew.Rybchenko at oktetlabs.ru>
Sponsored by: Solarflare Communications, Inc.
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/sfxge/common/efx.h | 6 | ||||
-rw-r--r-- | sys/dev/sfxge/common/efx_ev.c | 20 | ||||
-rw-r--r-- | sys/dev/sfxge/common/efx_tx.c | 2 | ||||
-rw-r--r-- | sys/dev/sfxge/sfxge_ev.c | 35 | ||||
-rw-r--r-- | sys/dev/sfxge/sfxge_port.c | 17 | ||||
-rw-r--r-- | sys/dev/sfxge/sfxge_tx.c | 18 |
6 files changed, 70 insertions, 28 deletions
diff --git a/sys/dev/sfxge/common/efx.h b/sys/dev/sfxge/common/efx.h index a0f2025..7693de4 100644 --- a/sys/dev/sfxge/common/efx.h +++ b/sys/dev/sfxge/common/efx.h @@ -1360,17 +1360,17 @@ typedef __checkReturn boolean_t typedef __checkReturn boolean_t (*efx_rxq_flush_done_ev_t)( __in_opt void *arg, - __in uint32_t label); + __in uint32_t rxq_index); typedef __checkReturn boolean_t (*efx_rxq_flush_failed_ev_t)( __in_opt void *arg, - __in uint32_t label); + __in uint32_t rxq_index); typedef __checkReturn boolean_t (*efx_txq_flush_done_ev_t)( __in_opt void *arg, - __in uint32_t label); + __in uint32_t txq_index); typedef __checkReturn boolean_t (*efx_software_ev_t)( diff --git a/sys/dev/sfxge/common/efx_ev.c b/sys/dev/sfxge/common/efx_ev.c index 1ce8218..f8b9aae 100644 --- a/sys/dev/sfxge/common/efx_ev.c +++ b/sys/dev/sfxge/common/efx_ev.c @@ -407,24 +407,24 @@ efx_ev_driver( switch (EFX_QWORD_FIELD(*eqp, FSF_AZ_DRIVER_EV_SUBCODE)) { case FSE_AZ_TX_DESCQ_FLS_DONE_EV: { - uint32_t label; + uint32_t txq_index; EFX_EV_QSTAT_INCR(eep, EV_DRIVER_TX_DESCQ_FLS_DONE); - label = EFX_QWORD_FIELD(*eqp, FSF_AZ_DRIVER_EV_SUBDATA); + txq_index = EFX_QWORD_FIELD(*eqp, FSF_AZ_DRIVER_EV_SUBDATA); - EFSYS_PROBE1(tx_descq_fls_done, uint32_t, label); + EFSYS_PROBE1(tx_descq_fls_done, uint32_t, txq_index); EFSYS_ASSERT(eecp->eec_txq_flush_done != NULL); - should_abort = eecp->eec_txq_flush_done(arg, label); + should_abort = eecp->eec_txq_flush_done(arg, txq_index); break; } case FSE_AZ_RX_DESCQ_FLS_DONE_EV: { - uint32_t label; + uint32_t rxq_index; uint32_t failed; - label = EFX_QWORD_FIELD(*eqp, FSF_AZ_DRIVER_EV_RX_DESCQ_ID); + rxq_index = EFX_QWORD_FIELD(*eqp, FSF_AZ_DRIVER_EV_RX_DESCQ_ID); failed = EFX_QWORD_FIELD(*eqp, FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL); EFSYS_ASSERT(eecp->eec_rxq_flush_done != NULL); @@ -433,15 +433,15 @@ efx_ev_driver( if (failed) { EFX_EV_QSTAT_INCR(eep, EV_DRIVER_RX_DESCQ_FLS_FAILED); - EFSYS_PROBE1(rx_descq_fls_failed, uint32_t, label); + EFSYS_PROBE1(rx_descq_fls_failed, uint32_t, rxq_index); - should_abort = eecp->eec_rxq_flush_failed(arg, label); + should_abort = eecp->eec_rxq_flush_failed(arg, rxq_index); } else { EFX_EV_QSTAT_INCR(eep, EV_DRIVER_RX_DESCQ_FLS_DONE); - EFSYS_PROBE1(rx_descq_fls_done, uint32_t, label); + EFSYS_PROBE1(rx_descq_fls_done, uint32_t, rxq_index); - should_abort = eecp->eec_rxq_flush_done(arg, label); + should_abort = eecp->eec_rxq_flush_done(arg, rxq_index); } break; diff --git a/sys/dev/sfxge/common/efx_tx.c b/sys/dev/sfxge/common/efx_tx.c index 56a47fa..a78b761 100644 --- a/sys/dev/sfxge/common/efx_tx.c +++ b/sys/dev/sfxge/common/efx_tx.c @@ -290,7 +290,7 @@ efx_tx_qcreate( EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_TX); EFX_STATIC_ASSERT(EFX_EV_TX_NLABELS == (1 << FRF_AZ_TX_DESCQ_LABEL_WIDTH)); - EFSYS_ASSERT3U(label, <, EFX_EV_TX_NLABELS); + /* EFSYS_ASSERT3U(label, <, EFX_EV_TX_NLABELS);*/ EFSYS_ASSERT3U(enp->en_tx_qcount + 1, <, encp->enc_txq_limit); if (!ISP2(n) || !(n & EFX_TXQ_NDESCS_MASK)) { diff --git a/sys/dev/sfxge/sfxge_ev.c b/sys/dev/sfxge/sfxge_ev.c index b506b27..49e8799 100644 --- a/sys/dev/sfxge/sfxge_ev.c +++ b/sys/dev/sfxge/sfxge_ev.c @@ -150,17 +150,18 @@ sfxge_ev_exception(void *arg, uint32_t code, uint32_t data) } static boolean_t -sfxge_ev_rxq_flush_done(void *arg, uint32_t label) +sfxge_ev_rxq_flush_done(void *arg, uint32_t rxq_index) { struct sfxge_evq *evq; struct sfxge_softc *sc; struct sfxge_rxq *rxq; unsigned int index; + unsigned int label; uint16_t magic; evq = (struct sfxge_evq *)arg; sc = evq->sc; - rxq = sc->rxq[label]; + rxq = sc->rxq[rxq_index]; KASSERT(rxq != NULL, ("rxq == NULL")); @@ -168,6 +169,7 @@ sfxge_ev_rxq_flush_done(void *arg, uint32_t label) index = rxq->index; evq = sc->evq[index]; + label = rxq_index; KASSERT((label & SFXGE_MAGIC_DMAQ_LABEL_MASK) == label, ("(label & SFXGE_MAGIC_DMAQ_LABEL_MASK) != level")); magic = SFXGE_MAGIC_RX_QFLUSH_DONE | label; @@ -180,17 +182,18 @@ sfxge_ev_rxq_flush_done(void *arg, uint32_t label) } static boolean_t -sfxge_ev_rxq_flush_failed(void *arg, uint32_t label) +sfxge_ev_rxq_flush_failed(void *arg, uint32_t rxq_index) { struct sfxge_evq *evq; struct sfxge_softc *sc; struct sfxge_rxq *rxq; unsigned int index; + unsigned int label; uint16_t magic; evq = (struct sfxge_evq *)arg; sc = evq->sc; - rxq = sc->rxq[label]; + rxq = sc->rxq[rxq_index]; KASSERT(rxq != NULL, ("rxq == NULL")); @@ -198,6 +201,7 @@ sfxge_ev_rxq_flush_failed(void *arg, uint32_t label) index = rxq->index; evq = sc->evq[index]; + label = rxq_index; KASSERT((label & SFXGE_MAGIC_DMAQ_LABEL_MASK) == label, ("(label & SFXGE_MAGIC_DMAQ_LABEL_MASK) != label")); magic = SFXGE_MAGIC_RX_QFLUSH_FAILED | label; @@ -209,18 +213,27 @@ sfxge_ev_rxq_flush_failed(void *arg, uint32_t label) return (B_FALSE); } +static struct sfxge_txq * +sfxge_get_txq_by_label(struct sfxge_evq *evq, enum sfxge_txq_type label) +{ + unsigned int index; + + KASSERT((evq->index == 0 && label < SFXGE_TXQ_NTYPES) || + (label == SFXGE_TXQ_IP_TCP_UDP_CKSUM), ("unexpected txq label")); + index = (evq->index == 0) ? label : (evq->index - 1 + SFXGE_TXQ_NTYPES); + return evq->sc->txq[index]; +} + static boolean_t sfxge_ev_tx(void *arg, uint32_t label, uint32_t id) { struct sfxge_evq *evq; - struct sfxge_softc *sc; struct sfxge_txq *txq; unsigned int stop; unsigned int delta; evq = (struct sfxge_evq *)arg; - sc = evq->sc; - txq = sc->txq[label]; + txq = sfxge_get_txq_by_label(evq, label); KASSERT(txq != NULL, ("txq == NULL")); KASSERT(evq->index == txq->evq_index, @@ -251,16 +264,17 @@ done: } static boolean_t -sfxge_ev_txq_flush_done(void *arg, uint32_t label) +sfxge_ev_txq_flush_done(void *arg, uint32_t txq_index) { struct sfxge_evq *evq; struct sfxge_softc *sc; struct sfxge_txq *txq; + unsigned int label; uint16_t magic; evq = (struct sfxge_evq *)arg; sc = evq->sc; - txq = sc->txq[label]; + txq = sc->txq[txq_index]; KASSERT(txq != NULL, ("txq == NULL")); KASSERT(txq->init_state == SFXGE_TXQ_INITIALIZED, @@ -269,6 +283,7 @@ sfxge_ev_txq_flush_done(void *arg, uint32_t label) /* Resend a software event on the correct queue */ evq = sc->evq[txq->evq_index]; + label = txq->type; KASSERT((label & SFXGE_MAGIC_DMAQ_LABEL_MASK) == label, ("(label & SFXGE_MAGIC_DMAQ_LABEL_MASK) != label")); magic = SFXGE_MAGIC_TX_QFLUSH_DONE | label; @@ -325,7 +340,7 @@ sfxge_ev_software(void *arg, uint16_t magic) break; } case SFXGE_MAGIC_TX_QFLUSH_DONE: { - struct sfxge_txq *txq = sc->txq[label]; + struct sfxge_txq *txq = sfxge_get_txq_by_label(evq, label); KASSERT(txq != NULL, ("txq == NULL")); KASSERT(evq->index == txq->evq_index, diff --git a/sys/dev/sfxge/sfxge_port.c b/sys/dev/sfxge/sfxge_port.c index 2c7f772..abd2924 100644 --- a/sys/dev/sfxge/sfxge_port.c +++ b/sys/dev/sfxge/sfxge_port.c @@ -320,10 +320,21 @@ sfxge_mac_filter_set(struct sfxge_softc *sc) struct sfxge_port *port = &sc->port; int rc; - KASSERT(port->init_state == SFXGE_PORT_STARTED, ("port not started")); - mtx_lock(&port->lock); - rc = sfxge_mac_filter_set_locked(sc); + /* + * The function may be called without softc_lock held in the + * case of SIOCADDMULTI and SIOCDELMULTI ioctls. ioctl handler + * checks IFF_DRV_RUNNING flag which implies port started, but + * it is not guaranteed to remain. softc_lock shared lock can't + * be held in the case of these ioctls processing, since it + * results in failure where kernel complains that non-sleepable + * lock is held in sleeping thread. Both problems are repeatable + * on LAG with LACP proto bring up. + */ + if (port->init_state == SFXGE_PORT_STARTED) + rc = sfxge_mac_filter_set_locked(sc); + else + rc = 0; mtx_unlock(&port->lock); return rc; } diff --git a/sys/dev/sfxge/sfxge_tx.c b/sys/dev/sfxge/sfxge_tx.c index 33f26d9..6c20720 100644 --- a/sys/dev/sfxge/sfxge_tx.c +++ b/sys/dev/sfxge/sfxge_tx.c @@ -27,6 +27,21 @@ * SUCH DAMAGE. */ +/* Theory of operation: + * + * Tx queues allocation and mapping + * + * One Tx queue with enabled checksum offload is allocated per Rx channel + * (event queue). Also 2 Tx queues (one without checksum offload and one + * with IP checksum offload only) are allocated and bound to event queue 0. + * sfxge_txq_type is used as Tx queue label. + * + * So, event queue plus label mapping to Tx queue index is: + * if event queue index is 0, TxQ-index = TxQ-label * [0..SFXGE_TXQ_NTYPES) + * else TxQ-index = SFXGE_TXQ_NTYPES + EvQ-index - 1 + * See sfxge_get_txq_by_label() sfxge_ev.c + */ + #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -536,6 +551,7 @@ sfxge_tx_packet_add(struct sfxge_txq *txq, struct mbuf *m) return (0); fail: + m_freem(m); return (rc); } @@ -1177,7 +1193,7 @@ sfxge_tx_qstart(struct sfxge_softc *sc, unsigned int index) } /* Create the common code transmit queue. */ - if ((rc = efx_tx_qcreate(sc->enp, index, index, esmp, + if ((rc = efx_tx_qcreate(sc->enp, index, txq->type, esmp, SFXGE_NDESCS, txq->buf_base_id, flags, evq->common, &txq->common)) != 0) goto fail; |