diff options
author | np <np@FreeBSD.org> | 2015-04-08 01:43:29 +0000 |
---|---|---|
committer | np <np@FreeBSD.org> | 2015-04-08 01:43:29 +0000 |
commit | 3713ef6b03e4017a5ba9e902268968038c9c14de (patch) | |
tree | ecb62836844d65b4fedb1160523763c6f025944f /sys/dev | |
parent | 406b6dc8fbf65c592dac719ca5937c1dcd684008 (diff) | |
download | FreeBSD-src-3713ef6b03e4017a5ba9e902268968038c9c14de.zip FreeBSD-src-3713ef6b03e4017a5ba9e902268968038c9c14de.tar.gz |
MFC r279243-r279246, r279251, r279691, r279700, and r279701.
r279243:
cxgbe(4): request an automatic tx update when a netmap txq idles.
r279244:
cxgbe(4): wait for the hardware to catch up before destroying a netmap txq.
r279245:
cxgbe(4): do not set the netmap rxq interrupts on a hair-trigger.
r279246:
cxgbe(4): set up congestion management for netmap rx queues.
The hw.cxgbe.cong_drop knob controls the response of the chip when
netmap queues are congested.
r279251:
cxgbe(4): allow tx hardware checksumming on the netmap interface.
It is disabled by default but users can set IFCAP_TXCSUM on the
netmap ifnet (ifconfig ncxl0 txcsum) to override netmap and force
the hardware to calculate and insert proper IP and L4 checksums in
outbound frames.
r279691:
cxgbe(4): provide the correct size of freelists associated with netmap
rx queues to the chip. This will fix many problems with native netmap
rx on ncxl/ncxgbe interfaces.
r279700:
cxgbe(4): knobs to experiment with the interrupt coalescing timer for
netmap rx queues, and the "batchiness" of rx updates sent to the chip.
These knobs will probably become per-rxq in the near future and will be
documented only after their final form is decided.
r279701:
cxgbe(4): experimental rx packet sink for netmap queues. This is not
intended for general use.
Diffstat (limited to 'sys/dev')
-rw-r--r-- | sys/dev/cxgbe/adapter.h | 1 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_netmap.c | 156 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_sge.c | 2 |
3 files changed, 126 insertions, 33 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 0e72fb3..93905e5 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -1022,6 +1022,7 @@ void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *); void t4_update_fl_bufsize(struct ifnet *); int can_resume_tx(struct sge_eq *); +int tnl_cong(struct port_info *); /* t4_tracer.c */ struct t4_tracer; diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c index a0742d0..7d48603 100644 --- a/sys/dev/cxgbe/t4_netmap.c +++ b/sys/dev/cxgbe/t4_netmap.c @@ -58,6 +58,25 @@ extern int fl_pad; /* XXXNM */ extern int spg_len; /* XXXNM */ extern int fl_pktshift; /* XXXNM */ +SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, "cxgbe netmap parameters"); + +/* + * 0 = normal netmap rx + * 1 = black hole + * 2 = supermassive black hole (buffer packing enabled) + */ +int black_hole = 0; +SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_black_hole, CTLFLAG_RDTUN, &black_hole, 0, + "Sink incoming packets."); + +int rx_ndesc = 256; +SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_ndesc, CTLFLAG_RWTUN, + &rx_ndesc, 0, "# of rx descriptors after which the hw cidx is updated."); + +int holdoff_tmr_idx = 2; +SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN, + &holdoff_tmr_idx, 0, "Holdoff timer index for netmap rx queues."); + /* netmap ifnet routines */ static void cxgbe_nm_init(void *); static int cxgbe_nm_ioctl(struct ifnet *, unsigned long, caddr_t); @@ -226,9 +245,9 @@ cxgbe_nm_qflush(struct ifnet *ifp) } static int -alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq) +alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int cong) { - int rc, cntxt_id; + int rc, cntxt_id, i; __be32 v; struct adapter *sc = pi->adapter; struct netmap_adapter *na = NA(pi->nm_ifp); @@ -267,14 +286,20 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq) V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4)); c.iqsize = htobe16(pi->qsize_rxq); c.iqaddr = htobe64(nm_rxq->iq_ba); + if (cong >= 0) { + c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN | + V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF | + F_FW_IQ_CMD_FL0CONGEN); + } c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO | - (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0)); + (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) | + (black_hole == 2 ? F_FW_IQ_CMD_FL0PACKEN : 0)); c.fl0dcaen_to_fl0cidxfthresh = htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B)); - c.fl0size = htobe16(na->num_rx_desc + spg_len / EQ_ESIZE); + c.fl0size = htobe16(na->num_rx_desc / 8 + spg_len / EQ_ESIZE); c.fl0addr = htobe64(nm_rxq->fl_ba); rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c); @@ -310,8 +335,37 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq) if (is_t5(sc)) nm_rxq->fl_db_val |= F_DBTYPE; - t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(F_QINTR_CNT_EN) | - V_INGRESSQID(nm_rxq->iq_cntxt_id)); + if (is_t5(sc) && cong >= 0) { + uint32_t param, val; + + param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | + V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id); + param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) | + V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id); + if (cong == 0) + val = 1 << 19; + else { + val = 2 << 19; + for (i = 0; i < 4; i++) { + if (cong & (1 << i)) + val |= 1 << (i << 2); + } + } + + rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); + if (rc != 0) { + /* report error but carry on */ + device_printf(sc->dev, + "failed to set congestion manager context for " + "ingress queue %d: %d\n", nm_rxq->iq_cntxt_id, rc); + } + } + + t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), + V_INGRESSQID(nm_rxq->iq_cntxt_id) | + V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); return (rc); } @@ -351,7 +405,8 @@ alloc_nm_txq_hwq(struct port_info *pi, struct sge_nm_txq *nm_txq) V_FW_EQ_ETH_CMD_VFN(0)); c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC | F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c)); - c.autoequiqe_to_viid = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->nm_viid)); + c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUEQE | + V_FW_EQ_ETH_CMD_VIID(pi->nm_viid)); c.fetchszm_to_iqid = htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) | V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO | @@ -448,7 +503,7 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, nm_set_native_flags(na); for_each_nm_rxq(pi, i, nm_rxq) { - alloc_nm_rxq_hwq(pi, nm_rxq); + alloc_nm_rxq_hwq(pi, nm_rxq, tnl_cong(pi)); nm_rxq->fl_hwidx = hwidx; slot = netmap_reset(na, NR_RX, i, 0); MPASS(slot != NULL); /* XXXNM: error check, not assert */ @@ -456,13 +511,14 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, /* We deal with 8 bufs at a time */ MPASS((na->num_rx_desc & 7) == 0); MPASS(na->num_rx_desc == nm_rxq->fl_sidx); - for (j = 0; j < nm_rxq->fl_sidx - 8; j++) { + for (j = 0; j < nm_rxq->fl_sidx; j++) { uint64_t ba; PNMB(na, &slot[j], &ba); + MPASS(ba != 0); nm_rxq->fl_desc[j] = htobe64(ba | hwidx); } - nm_rxq->fl_pidx = j; + j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8; MPASS((j & 7) == 0); j /= 8; /* driver pidx to hardware pidx */ wmb(); @@ -513,12 +569,17 @@ cxgbe_netmap_off(struct adapter *sc, struct port_info *pi, struct ifnet *ifp, if_printf(ifp, "netmap disable_vi failed: %d\n", rc); nm_clear_native_flags(na); - /* - * XXXNM: We need to make sure that the tx queues are quiet and won't - * request any more SGE_EGR_UPDATEs. - */ - for_each_nm_txq(pi, i, nm_txq) { + struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx]; + + /* Wait for hw pidx to catch up ... */ + while (be16toh(nm_txq->pidx) != spg->pidx) + pause("nmpidx", 1); + + /* ... and then for the cidx. */ + while (spg->pidx != spg->cidx) + pause("nmcidx", 1); + free_nm_txq_hwq(pi, nm_txq); } for_each_nm_rxq(pi, i, nm_rxq) { @@ -641,7 +702,7 @@ int lazy_tx_credit_flush = 1; */ static void cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq, - struct netmap_kring *kring, int npkt, int npkt_remaining) + struct netmap_kring *kring, int npkt, int npkt_remaining, int txcsum) { struct netmap_ring *ring = kring->ring; struct netmap_slot *slot; @@ -668,6 +729,7 @@ cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq, for (i = 0; i < n; i++) { slot = &ring->slot[kring->nr_hwcur]; PNMB(kring->na, slot, &ba); + MPASS(ba != 0); cpl->ctrl0 = nm_txq->cpl_ctrl0; cpl->pack = 0; @@ -677,10 +739,12 @@ cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq, * checksum offloading, TCP segmentation offloading, * encryption, VLAN encapsulation/decapsulation, etc." * - * XXXNM: it makes sense to enable checksum offload. + * So the ncxl interfaces have tx hardware checksumming + * disabled by default. But you can override netmap by + * enabling IFCAP_TXCSUM on the interface manully. */ - cpl->ctrl1 = htobe64(F_TXPKT_IPCSUM_DIS | - F_TXPKT_L4CSUM_DIS); + cpl->ctrl1 = txcsum ? 0 : + htobe64(F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS); usgl = (void *)(cpl + 1); usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | @@ -793,7 +857,7 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[pi->first_nm_txq + kring->ring_id]; const u_int head = kring->rhead; u_int reclaimed = 0; - int n, d, npkt_remaining, ndesc_remaining; + int n, d, npkt_remaining, ndesc_remaining, txcsum; /* * Tx was at kring->nr_hwcur last time around and now we need to advance @@ -804,6 +868,7 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) npkt_remaining = head >= kring->nr_hwcur ? head - kring->nr_hwcur : kring->nkr_num_slots - kring->nr_hwcur + head; + txcsum = ifp->if_capenable & (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); while (npkt_remaining) { reclaimed += reclaim_nm_tx_desc(nm_txq); ndesc_remaining = contiguous_ndesc_available(nm_txq); @@ -827,7 +892,7 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags) /* Send n packets and update nm_txq->pidx and kring->nr_hwcur */ npkt_remaining -= n; - cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining); + cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining, txcsum); } MPASS(npkt_remaining == 0); MPASS(kring->nr_hwcur == head); @@ -861,6 +926,9 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) u_int n; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; + if (black_hole) + return (0); /* No updates ever. */ + if (netmap_no_pendintr || force_update) { kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx); kring->nr_kflags &= ~NKR_PENDINTR; @@ -890,6 +958,7 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags) while (n > 0) { for (i = 0; i < 8; i++, fl_pidx++, slot++) { PNMB(na, slot, &ba); + MPASS(ba != 0); nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx); slot->flags &= ~NS_BUF_CHANGED; MPASS(fl_pidx <= nm_rxq->fl_sidx); @@ -1064,10 +1133,10 @@ t4_nm_intr(void *arg) struct netmap_ring *ring = kring->ring; struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx]; uint32_t lq; - u_int n = 0; - int processed = 0; + u_int n = 0, work = 0; uint8_t opcode; uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx); + u_int fl_credits = fl_cidx & 7; while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) { @@ -1078,8 +1147,10 @@ t4_nm_intr(void *arg) switch (G_RSPD_TYPE(d->rsp.u.type_gen)) { case X_RSPD_TYPE_FLBUF: - /* No buffer packing so new buf every time */ - MPASS(lq & F_RSPD_NEWBUF); + if (black_hole != 2) { + /* No buffer packing so new buf every time */ + MPASS(lq & F_RSPD_NEWBUF); + } /* fall through */ @@ -1095,7 +1166,9 @@ t4_nm_intr(void *arg) case CPL_RX_PKT: ring->slot[fl_cidx].len = G_RSPD_LEN(lq) - fl_pktshift; ring->slot[fl_cidx].flags = kring->nkr_slot_flags; - if (__predict_false(++fl_cidx == nm_rxq->fl_sidx)) + fl_cidx += (lq & F_RSPD_NEWBUF) ? 1 : 0; + fl_credits += (lq & F_RSPD_NEWBUF) ? 1 : 0; + if (__predict_false(fl_cidx == nm_rxq->fl_sidx)) fl_cidx = 0; break; default: @@ -1121,18 +1194,37 @@ t4_nm_intr(void *arg) nm_rxq->iq_gen ^= F_RSPD_GEN; } - if (__predict_false(++n == 64)) { /* XXXNM: tune */ + if (__predict_false(++n == rx_ndesc)) { + atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); + if (black_hole && fl_credits >= 8) { + fl_credits /= 8; + IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, + nm_rxq->fl_sidx); + t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), + nm_rxq->fl_db_val | V_PIDX(fl_credits)); + fl_credits = fl_cidx & 7; + } else if (!black_hole) { + netmap_rx_irq(ifp, nm_rxq->nid, &work); + MPASS(work != 0); + } t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(n) | V_INGRESSQID(nm_rxq->iq_cntxt_id) | V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX))); n = 0; } } - if (fl_cidx != nm_rxq->fl_cidx) { - atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); - netmap_rx_irq(ifp, nm_rxq->nid, &processed); - } + + atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx); + if (black_hole) { + fl_credits /= 8; + IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx); + t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), + nm_rxq->fl_db_val | V_PIDX(fl_credits)); + } else + netmap_rx_irq(ifp, nm_rxq->nid, &work); + t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(n) | - V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) | V_SEINTARM(F_QINTR_CNT_EN)); + V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) | + V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx))); } #endif diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index e806793..d35c77e 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -2563,7 +2563,7 @@ free_mgmtq(struct adapter *sc) return free_wrq(sc, &sc->sge.mgmtq); } -static inline int +int tnl_cong(struct port_info *pi) { |