summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/conf/files2
-rw-r--r--sys/dev/cxgbe/adapter.h116
-rw-r--r--sys/dev/cxgbe/t4_l2t.c9
-rw-r--r--sys/dev/cxgbe/t4_main.c264
-rw-r--r--sys/dev/cxgbe/t4_mp_ring.c364
-rw-r--r--sys/dev/cxgbe/t4_mp_ring.h68
-rw-r--r--sys/dev/cxgbe/t4_sge.c1994
-rw-r--r--sys/modules/cxgbe/if_cxgbe/Makefile1
8 files changed, 1686 insertions, 1132 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 3884c11..9e55f42 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1142,6 +1142,8 @@ dev/cxgb/sys/uipc_mvec.c optional cxgb pci \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw \
compile-with "${NORMAL_C} -I$S/dev/cxgb"
+dev/cxgbe/t4_mp_ring.c optional cxgbe pci \
+ compile-with "${NORMAL_C} -I$S/dev/cxgbe"
dev/cxgbe/t4_main.c optional cxgbe pci \
compile-with "${NORMAL_C} -I$S/dev/cxgbe"
dev/cxgbe/t4_netmap.c optional cxgbe pci \
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index ec84bb4..62ff9af 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -152,7 +152,8 @@ enum {
CL_METADATA_SIZE = CACHE_LINE_SIZE,
SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */
- TX_SGL_SEGS = 36,
+ TX_SGL_SEGS = 39,
+ TX_SGL_SEGS_TSO = 38,
TX_WR_FLITS = SGE_MAX_WR_LEN / 8
};
@@ -273,6 +274,7 @@ struct port_info {
struct timeval last_refreshed;
struct port_stats stats;
u_int tnl_cong_drops;
+ u_int tx_parse_error;
eventhandler_tag vlan_c;
@@ -308,23 +310,9 @@ struct tx_desc {
__be64 flit[8];
};
-struct tx_map {
- struct mbuf *m;
- bus_dmamap_t map;
-};
-
-/* DMA maps used for tx */
-struct tx_maps {
- struct tx_map *maps;
- uint32_t map_total; /* # of DMA maps */
- uint32_t map_pidx; /* next map to be used */
- uint32_t map_cidx; /* reclaimed up to this index */
- uint32_t map_avail; /* # of available maps */
-};
-
struct tx_sdesc {
+ struct mbuf *m; /* m_nextpkt linked chain of frames */
uint8_t desc_used; /* # of hardware descriptors used by the WR */
- uint8_t credits; /* NIC txq: # of frames sent out in the WR */
};
@@ -378,16 +366,12 @@ struct sge_iq {
enum {
EQ_CTRL = 1,
EQ_ETH = 2,
-#ifdef TCP_OFFLOAD
EQ_OFLD = 3,
-#endif
/* eq flags */
- EQ_TYPEMASK = 7, /* 3 lsbits hold the type */
- EQ_ALLOCATED = (1 << 3), /* firmware resources allocated */
- EQ_DOOMED = (1 << 4), /* about to be destroyed */
- EQ_CRFLUSHED = (1 << 5), /* expecting an update from SGE */
- EQ_STALLED = (1 << 6), /* out of hw descriptors or dmamaps */
+ EQ_TYPEMASK = 0x3, /* 2 lsbits hold the type (see above) */
+ EQ_ALLOCATED = (1 << 2), /* firmware resources allocated */
+ EQ_ENABLED = (1 << 3), /* open for business */
};
/* Listed in order of preference. Update t4_sysctls too if you change these */
@@ -402,32 +386,25 @@ enum {DOORBELL_UDB, DOORBELL_WCWR, DOORBELL_UDBWC, DOORBELL_KDB};
struct sge_eq {
unsigned int flags; /* MUST be first */
unsigned int cntxt_id; /* SGE context id for the eq */
- bus_dma_tag_t desc_tag;
- bus_dmamap_t desc_map;
- char lockname[16];
struct mtx eq_lock;
struct tx_desc *desc; /* KVA of descriptor ring */
- bus_addr_t ba; /* bus address of descriptor ring */
- struct sge_qstat *spg; /* status page, for convenience */
uint16_t doorbells;
volatile uint32_t *udb; /* KVA of doorbell (lies within BAR2) */
u_int udb_qid; /* relative qid within the doorbell page */
- uint16_t cap; /* max # of desc, for convenience */
- uint16_t avail; /* available descriptors, for convenience */
- uint16_t qsize; /* size (# of entries) of the queue */
+ uint16_t sidx; /* index of the entry with the status page */
uint16_t cidx; /* consumer idx (desc idx) */
uint16_t pidx; /* producer idx (desc idx) */
- uint16_t pending; /* # of descriptors used since last doorbell */
+ uint16_t equeqidx; /* EQUEQ last requested at this pidx */
+ uint16_t dbidx; /* pidx of the most recent doorbell */
uint16_t iqid; /* iq that gets egr_update for the eq */
uint8_t tx_chan; /* tx channel used by the eq */
- struct task tx_task;
- struct callout tx_callout;
-
- /* stats */
+ volatile u_int equiq; /* EQUIQ outstanding */
- uint32_t egr_update; /* # of SGE_EGR_UPDATE notifications for eq */
- uint32_t unstalled; /* recovered from stall */
+ bus_dma_tag_t desc_tag;
+ bus_dmamap_t desc_map;
+ bus_addr_t ba; /* bus address of descriptor ring */
+ char lockname[16];
};
struct sw_zone_info {
@@ -499,18 +476,19 @@ struct sge_fl {
struct cluster_layout cll_alt; /* alternate refill zone, layout */
};
+struct mp_ring;
+
/* txq: SGE egress queue + what's needed for Ethernet NIC */
struct sge_txq {
struct sge_eq eq; /* MUST be first */
struct ifnet *ifp; /* the interface this txq belongs to */
- bus_dma_tag_t tx_tag; /* tag for transmit buffers */
- struct buf_ring *br; /* tx buffer ring */
+ struct mp_ring *r; /* tx software ring */
struct tx_sdesc *sdesc; /* KVA of software descriptor ring */
- struct mbuf *m; /* held up due to temporary resource shortage */
-
- struct tx_maps txmaps;
+ struct sglist *gl;
+ __be32 cpl_ctrl0; /* for convenience */
+ struct task tx_reclaim_task;
/* stats for common events first */
uint64_t txcsum; /* # of times hardware assisted with checksum */
@@ -519,13 +497,12 @@ struct sge_txq {
uint64_t imm_wrs; /* # of work requests with immediate data */
uint64_t sgl_wrs; /* # of work requests with direct SGL */
uint64_t txpkt_wrs; /* # of txpkt work requests (not coalesced) */
- uint64_t txpkts_wrs; /* # of coalesced tx work requests */
- uint64_t txpkts_pkts; /* # of frames in coalesced tx work requests */
+ uint64_t txpkts0_wrs; /* # of type0 coalesced tx work requests */
+ uint64_t txpkts1_wrs; /* # of type1 coalesced tx work requests */
+ uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */
+ uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */
/* stats for not-that-common events */
-
- uint32_t no_dmamap; /* no DMA map to load the mbuf */
- uint32_t no_desc; /* out of hardware descriptors */
} __aligned(CACHE_LINE_SIZE);
/* rxq: SGE ingress queue + SGE free list + miscellaneous items */
@@ -574,7 +551,13 @@ struct wrqe {
STAILQ_ENTRY(wrqe) link;
struct sge_wrq *wrq;
int wr_len;
- uint64_t wr[] __aligned(16);
+ char wr[] __aligned(16);
+};
+
+struct wrq_cookie {
+ TAILQ_ENTRY(wrq_cookie) link;
+ int ndesc;
+ int pidx;
};
/*
@@ -585,17 +568,32 @@ struct sge_wrq {
struct sge_eq eq; /* MUST be first */
struct adapter *adapter;
+ struct task wrq_tx_task;
+
+ /* Tx desc reserved but WR not "committed" yet. */
+ TAILQ_HEAD(wrq_incomplete_wrs , wrq_cookie) incomplete_wrs;
- /* List of WRs held up due to lack of tx descriptors */
+ /* List of WRs ready to go out as soon as descriptors are available. */
STAILQ_HEAD(, wrqe) wr_list;
+ u_int nwr_pending;
+ u_int ndesc_needed;
/* stats for common events first */
- uint64_t tx_wrs; /* # of tx work requests */
+ uint64_t tx_wrs_direct; /* # of WRs written directly to desc ring. */
+ uint64_t tx_wrs_ss; /* # of WRs copied from scratch space. */
+ uint64_t tx_wrs_copied; /* # of WRs queued and copied to desc ring. */
/* stats for not-that-common events */
- uint32_t no_desc; /* out of hardware descriptors */
+ /*
+ * Scratch space for work requests that wrap around after reaching the
+ * status page, and some infomation about the last WR that used it.
+ */
+ uint16_t ss_pidx;
+ uint16_t ss_len;
+ uint8_t ss[SGE_MAX_WR_LEN];
+
} __aligned(CACHE_LINE_SIZE);
@@ -744,7 +742,7 @@ struct adapter {
struct sge sge;
int lro_timeout;
- struct taskqueue *tq[NCHAN]; /* taskqueues that flush data out */
+ struct taskqueue *tq[NCHAN]; /* General purpose taskqueues */
struct port_info *port[MAX_NPORTS];
uint8_t chan_map[NCHAN];
@@ -978,12 +976,11 @@ static inline int
tx_resume_threshold(struct sge_eq *eq)
{
- return (eq->qsize / 4);
+ /* not quite the same as qsize / 4, but this will do. */
+ return (eq->sidx / 4);
}
/* t4_main.c */
-void t4_tx_task(void *, int);
-void t4_tx_callout(void *);
int t4_os_find_pci_capability(struct adapter *, int);
int t4_os_pci_save_state(struct adapter *);
int t4_os_pci_restore_state(struct adapter *);
@@ -1024,16 +1021,15 @@ int t4_setup_adapter_queues(struct adapter *);
int t4_teardown_adapter_queues(struct adapter *);
int t4_setup_port_queues(struct port_info *);
int t4_teardown_port_queues(struct port_info *);
-int t4_alloc_tx_maps(struct tx_maps *, bus_dma_tag_t, int, int);
-void t4_free_tx_maps(struct tx_maps *, bus_dma_tag_t);
void t4_intr_all(void *);
void t4_intr(void *);
void t4_intr_err(void *);
void t4_intr_evt(void *);
void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *);
-int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *);
void t4_update_fl_bufsize(struct ifnet *);
-int can_resume_tx(struct sge_eq *);
+int parse_pkt(struct mbuf **);
+void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *);
+void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *);
/* t4_tracer.c */
struct t4_tracer;
diff --git a/sys/dev/cxgbe/t4_l2t.c b/sys/dev/cxgbe/t4_l2t.c
index 6f7378a..cca1bf3 100644
--- a/sys/dev/cxgbe/t4_l2t.c
+++ b/sys/dev/cxgbe/t4_l2t.c
@@ -113,16 +113,15 @@ found:
int
t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
{
- struct wrqe *wr;
+ struct wrq_cookie cookie;
struct cpl_l2t_write_req *req;
int idx = e->idx + sc->vres.l2t.start;
mtx_assert(&e->lock, MA_OWNED);
- wr = alloc_wrqe(sizeof(*req), &sc->sge.mgmtq);
- if (wr == NULL)
+ req = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*req), 16), &cookie);
+ if (req == NULL)
return (ENOMEM);
- req = wrtod(wr);
INIT_TP_WR(req, 0);
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx |
@@ -132,7 +131,7 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
req->vlan = htons(e->vlan);
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
- t4_wrq_tx(sc, wr);
+ commit_wrq_wr(&sc->sge.mgmtq, req, &cookie);
if (sync && e->state != L2T_STATE_SWITCHING)
e->state = L2T_STATE_SYNC_WRITE;
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 2c384fd..39dc816 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -66,6 +66,7 @@ __FBSDID("$FreeBSD$");
#include "common/t4_regs_values.h"
#include "t4_ioctl.h"
#include "t4_l2t.h"
+#include "t4_mp_ring.h"
/* T4 bus driver interface */
static int t4_probe(device_t);
@@ -378,7 +379,8 @@ static void build_medialist(struct port_info *, struct ifmedia *);
static int cxgbe_init_synchronized(struct port_info *);
static int cxgbe_uninit_synchronized(struct port_info *);
static int setup_intr_handlers(struct adapter *);
-static void quiesce_eq(struct adapter *, struct sge_eq *);
+static void quiesce_txq(struct adapter *, struct sge_txq *);
+static void quiesce_wrq(struct adapter *, struct sge_wrq *);
static void quiesce_iq(struct adapter *, struct sge_iq *);
static void quiesce_fl(struct adapter *, struct sge_fl *);
static int t4_alloc_irq(struct adapter *, struct irq *, int rid,
@@ -434,7 +436,6 @@ static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS);
static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS);
static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS);
#endif
-static inline void txq_start(struct ifnet *, struct sge_txq *);
static uint32_t fconf_to_mode(uint32_t);
static uint32_t mode_to_fconf(uint32_t);
static uint32_t fspec_to_fconf(struct t4_filter_specification *);
@@ -1429,67 +1430,36 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct port_info *pi = ifp->if_softc;
struct adapter *sc = pi->adapter;
- struct sge_txq *txq = &sc->sge.txq[pi->first_txq];
- struct buf_ring *br;
+ struct sge_txq *txq;
+ void *items[1];
int rc;
M_ASSERTPKTHDR(m);
+ MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */
if (__predict_false(pi->link_cfg.link_ok == 0)) {
m_freem(m);
return (ENETDOWN);
}
- /* check if flowid is set */
- if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
- txq += ((m->m_pkthdr.flowid % (pi->ntxq - pi->rsrv_noflowq))
- + pi->rsrv_noflowq);
- br = txq->br;
-
- if (TXQ_TRYLOCK(txq) == 0) {
- struct sge_eq *eq = &txq->eq;
-
- /*
- * It is possible that t4_eth_tx finishes up and releases the
- * lock between the TRYLOCK above and the drbr_enqueue here. We
- * need to make sure that this mbuf doesn't just sit there in
- * the drbr.
- */
-
- rc = drbr_enqueue(ifp, br, m);
- if (rc == 0 && callout_pending(&eq->tx_callout) == 0 &&
- !(eq->flags & EQ_DOOMED))
- callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
+ rc = parse_pkt(&m);
+ if (__predict_false(rc != 0)) {
+ MPASS(m == NULL); /* was freed already */
+ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
return (rc);
}
- /*
- * txq->m is the mbuf that is held up due to a temporary shortage of
- * resources and it should be put on the wire first. Then what's in
- * drbr and finally the mbuf that was just passed in to us.
- *
- * Return code should indicate the fate of the mbuf that was passed in
- * this time.
- */
-
- TXQ_LOCK_ASSERT_OWNED(txq);
- if (drbr_needs_enqueue(ifp, br) || txq->m) {
-
- /* Queued for transmission. */
-
- rc = drbr_enqueue(ifp, br, m);
- m = txq->m ? txq->m : drbr_dequeue(ifp, br);
- (void) t4_eth_tx(ifp, txq, m);
- TXQ_UNLOCK(txq);
- return (rc);
- }
+ /* Select a txq. */
+ txq = &sc->sge.txq[pi->first_txq];
+ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+ txq += ((m->m_pkthdr.flowid % (pi->ntxq - pi->rsrv_noflowq)) +
+ pi->rsrv_noflowq);
- /* Direct transmission. */
- rc = t4_eth_tx(ifp, txq, m);
- if (rc != 0 && txq->m)
- rc = 0; /* held, will be transmitted soon (hopefully) */
+ items[0] = m;
+ rc = mp_ring_enqueue(txq->r, items, 1, 4096);
+ if (__predict_false(rc != 0))
+ m_freem(m);
- TXQ_UNLOCK(txq);
return (rc);
}
@@ -1499,17 +1469,17 @@ cxgbe_qflush(struct ifnet *ifp)
struct port_info *pi = ifp->if_softc;
struct sge_txq *txq;
int i;
- struct mbuf *m;
/* queues do not exist if !PORT_INIT_DONE. */
if (pi->flags & PORT_INIT_DONE) {
for_each_txq(pi, i, txq) {
TXQ_LOCK(txq);
- m_freem(txq->m);
- txq->m = NULL;
- while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
- m_freem(m);
+ txq->eq.flags &= ~EQ_ENABLED;
TXQ_UNLOCK(txq);
+ while (!mp_ring_is_idle(txq->r)) {
+ mp_ring_check_drainage(txq->r, 0);
+ pause("qflush", 1);
+ }
}
}
if_qflush(ifp);
@@ -1564,7 +1534,7 @@ cxgbe_get_counter(struct ifnet *ifp, ift_counter c)
struct sge_txq *txq;
for_each_txq(pi, i, txq)
- drops += txq->br->br_drops;
+ drops += counter_u64_fetch(txq->r->drops);
}
return (drops);
@@ -3236,7 +3206,8 @@ cxgbe_init_synchronized(struct port_info *pi)
{
struct adapter *sc = pi->adapter;
struct ifnet *ifp = pi->ifp;
- int rc = 0;
+ int rc = 0, i;
+ struct sge_txq *txq;
ASSERT_SYNCHRONIZED_OP(sc);
@@ -3265,6 +3236,17 @@ cxgbe_init_synchronized(struct port_info *pi)
}
/*
+ * Can't fail from this point onwards. Review cxgbe_uninit_synchronized
+ * if this changes.
+ */
+
+ for_each_txq(pi, i, txq) {
+ TXQ_LOCK(txq);
+ txq->eq.flags |= EQ_ENABLED;
+ TXQ_UNLOCK(txq);
+ }
+
+ /*
* The first iq of the first port to come up is used for tracing.
*/
if (sc->traceq < 0) {
@@ -3297,7 +3279,8 @@ cxgbe_uninit_synchronized(struct port_info *pi)
{
struct adapter *sc = pi->adapter;
struct ifnet *ifp = pi->ifp;
- int rc;
+ int rc, i;
+ struct sge_txq *txq;
ASSERT_SYNCHRONIZED_OP(sc);
@@ -3314,6 +3297,12 @@ cxgbe_uninit_synchronized(struct port_info *pi)
return (rc);
}
+ for_each_txq(pi, i, txq) {
+ TXQ_LOCK(txq);
+ txq->eq.flags &= ~EQ_ENABLED;
+ TXQ_UNLOCK(txq);
+ }
+
clrbit(&sc->open_device_map, pi->port_id);
PORT_LOCK(pi);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
@@ -3543,15 +3532,17 @@ port_full_uninit(struct port_info *pi)
if (pi->flags & PORT_INIT_DONE) {
- /* Need to quiesce queues. XXX: ctrl queues? */
+ /* Need to quiesce queues. */
+
+ quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]);
for_each_txq(pi, i, txq) {
- quiesce_eq(sc, &txq->eq);
+ quiesce_txq(sc, txq);
}
#ifdef TCP_OFFLOAD
for_each_ofld_txq(pi, i, ofld_txq) {
- quiesce_eq(sc, &ofld_txq->eq);
+ quiesce_wrq(sc, ofld_txq);
}
#endif
@@ -3576,23 +3567,39 @@ port_full_uninit(struct port_info *pi)
}
static void
-quiesce_eq(struct adapter *sc, struct sge_eq *eq)
+quiesce_txq(struct adapter *sc, struct sge_txq *txq)
{
- EQ_LOCK(eq);
- eq->flags |= EQ_DOOMED;
+ struct sge_eq *eq = &txq->eq;
+ struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
- /*
- * Wait for the response to a credit flush if one's
- * pending.
- */
- while (eq->flags & EQ_CRFLUSHED)
- mtx_sleep(eq, &eq->eq_lock, 0, "crflush", 0);
- EQ_UNLOCK(eq);
+ (void) sc; /* unused */
+
+#ifdef INVARIANTS
+ TXQ_LOCK(txq);
+ MPASS((eq->flags & EQ_ENABLED) == 0);
+ TXQ_UNLOCK(txq);
+#endif
+
+ /* Wait for the mp_ring to empty. */
+ while (!mp_ring_is_idle(txq->r)) {
+ mp_ring_check_drainage(txq->r, 0);
+ pause("rquiesce", 1);
+ }
- callout_drain(&eq->tx_callout); /* XXX: iffy */
- pause("callout", 10); /* Still iffy */
+ /* Then wait for the hardware to finish. */
+ while (spg->cidx != htobe16(eq->pidx))
+ pause("equiesce", 1);
- taskqueue_drain(sc->tq[eq->tx_chan], &eq->tx_task);
+ /* Finally, wait for the driver to reclaim all descriptors. */
+ while (eq->cidx != eq->pidx)
+ pause("dquiesce", 1);
+}
+
+static void
+quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq)
+{
+
+ /* XXXTX */
}
static void
@@ -4892,6 +4899,9 @@ cxgbe_sysctls(struct port_info *pi)
oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD,
NULL, "port statistics");
children = SYSCTL_CHILDREN(oid);
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD,
+ &pi->tx_parse_error, 0,
+ "# of tx packets with invalid length or # of segments");
#define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \
SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \
@@ -6947,74 +6957,6 @@ sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS)
}
#endif
-static inline void
-txq_start(struct ifnet *ifp, struct sge_txq *txq)
-{
- struct buf_ring *br;
- struct mbuf *m;
-
- TXQ_LOCK_ASSERT_OWNED(txq);
-
- br = txq->br;
- m = txq->m ? txq->m : drbr_dequeue(ifp, br);
- if (m)
- t4_eth_tx(ifp, txq, m);
-}
-
-void
-t4_tx_callout(void *arg)
-{
- struct sge_eq *eq = arg;
- struct adapter *sc;
-
- if (EQ_TRYLOCK(eq) == 0)
- goto reschedule;
-
- if (eq->flags & EQ_STALLED && !can_resume_tx(eq)) {
- EQ_UNLOCK(eq);
-reschedule:
- if (__predict_true(!(eq->flags && EQ_DOOMED)))
- callout_schedule(&eq->tx_callout, 1);
- return;
- }
-
- EQ_LOCK_ASSERT_OWNED(eq);
-
- if (__predict_true((eq->flags & EQ_DOOMED) == 0)) {
-
- if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) {
- struct sge_txq *txq = arg;
- struct port_info *pi = txq->ifp->if_softc;
-
- sc = pi->adapter;
- } else {
- struct sge_wrq *wrq = arg;
-
- sc = wrq->adapter;
- }
-
- taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
- }
-
- EQ_UNLOCK(eq);
-}
-
-void
-t4_tx_task(void *arg, int count)
-{
- struct sge_eq *eq = arg;
-
- EQ_LOCK(eq);
- if ((eq->flags & EQ_TYPEMASK) == EQ_ETH) {
- struct sge_txq *txq = arg;
- txq_start(txq->ifp, txq);
- } else {
- struct sge_wrq *wrq = arg;
- t4_wrq_tx_locked(wrq->adapter, wrq, NULL);
- }
- EQ_UNLOCK(eq);
-}
-
static uint32_t
fconf_to_mode(uint32_t fconf)
{
@@ -7452,9 +7394,9 @@ static int
set_filter_wr(struct adapter *sc, int fidx)
{
struct filter_entry *f = &sc->tids.ftid_tab[fidx];
- struct wrqe *wr;
struct fw_filter_wr *fwr;
unsigned int ftid;
+ struct wrq_cookie cookie;
ASSERT_SYNCHRONIZED_OP(sc);
@@ -7473,12 +7415,10 @@ set_filter_wr(struct adapter *sc, int fidx)
ftid = sc->tids.ftid_base + fidx;
- wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq);
- if (wr == NULL)
+ fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
+ if (fwr == NULL)
return (ENOMEM);
-
- fwr = wrtod(wr);
- bzero(fwr, sizeof (*fwr));
+ bzero(fwr, sizeof(*fwr));
fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR));
fwr->len16_pkd = htobe32(FW_LEN16(*fwr));
@@ -7547,7 +7487,7 @@ set_filter_wr(struct adapter *sc, int fidx)
f->pending = 1;
sc->tids.ftids_in_use++;
- t4_wrq_tx(sc, wr);
+ commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
return (0);
}
@@ -7555,22 +7495,21 @@ static int
del_filter_wr(struct adapter *sc, int fidx)
{
struct filter_entry *f = &sc->tids.ftid_tab[fidx];
- struct wrqe *wr;
struct fw_filter_wr *fwr;
unsigned int ftid;
+ struct wrq_cookie cookie;
ftid = sc->tids.ftid_base + fidx;
- wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq);
- if (wr == NULL)
+ fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie);
+ if (fwr == NULL)
return (ENOMEM);
- fwr = wrtod(wr);
bzero(fwr, sizeof (*fwr));
t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id);
f->pending = 1;
- t4_wrq_tx(sc, wr);
+ commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie);
return (0);
}
@@ -8170,6 +8109,7 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
/* MAC stats */
t4_clr_port_stats(sc, pi->tx_chan);
+ pi->tx_parse_error = 0;
if (pi->flags & PORT_INIT_DONE) {
struct sge_rxq *rxq;
@@ -8192,24 +8132,24 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
txq->imm_wrs = 0;
txq->sgl_wrs = 0;
txq->txpkt_wrs = 0;
- txq->txpkts_wrs = 0;
- txq->txpkts_pkts = 0;
- txq->br->br_drops = 0;
- txq->no_dmamap = 0;
- txq->no_desc = 0;
+ txq->txpkts0_wrs = 0;
+ txq->txpkts1_wrs = 0;
+ txq->txpkts0_pkts = 0;
+ txq->txpkts1_pkts = 0;
+ mp_ring_reset_stats(txq->r);
}
#ifdef TCP_OFFLOAD
/* nothing to clear for each ofld_rxq */
for_each_ofld_txq(pi, i, wrq) {
- wrq->tx_wrs = 0;
- wrq->no_desc = 0;
+ wrq->tx_wrs_direct = 0;
+ wrq->tx_wrs_copied = 0;
}
#endif
wrq = &sc->sge.ctrlq[pi->port_id];
- wrq->tx_wrs = 0;
- wrq->no_desc = 0;
+ wrq->tx_wrs_direct = 0;
+ wrq->tx_wrs_copied = 0;
}
break;
}
diff --git a/sys/dev/cxgbe/t4_mp_ring.c b/sys/dev/cxgbe/t4_mp_ring.c
new file mode 100644
index 0000000..ef09f01
--- /dev/null
+++ b/sys/dev/cxgbe/t4_mp_ring.c
@@ -0,0 +1,364 @@
+/*-
+ * Copyright (c) 2014 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <machine/cpu.h>
+
+#include "t4_mp_ring.h"
+
+union ring_state {
+ struct {
+ uint16_t pidx_head;
+ uint16_t pidx_tail;
+ uint16_t cidx;
+ uint16_t flags;
+ };
+ uint64_t state;
+};
+
+enum {
+ IDLE = 0, /* consumer ran to completion, nothing more to do. */
+ BUSY, /* consumer is running already, or will be shortly. */
+ STALLED, /* consumer stopped due to lack of resources. */
+ ABDICATED, /* consumer stopped even though there was work to be
+ done because it wants another thread to take over. */
+};
+
+static inline uint16_t
+space_available(struct mp_ring *r, union ring_state s)
+{
+ uint16_t x = r->size - 1;
+
+ if (s.cidx == s.pidx_head)
+ return (x);
+ else if (s.cidx > s.pidx_head)
+ return (s.cidx - s.pidx_head - 1);
+ else
+ return (x - s.pidx_head + s.cidx);
+}
+
+static inline uint16_t
+increment_idx(struct mp_ring *r, uint16_t idx, uint16_t n)
+{
+ int x = r->size - idx;
+
+ MPASS(x > 0);
+ return (x > n ? idx + n : n - x);
+}
+
+/* Consumer is about to update the ring's state to s */
+static inline uint16_t
+state_to_flags(union ring_state s, int abdicate)
+{
+
+ if (s.cidx == s.pidx_tail)
+ return (IDLE);
+ else if (abdicate && s.pidx_tail != s.pidx_head)
+ return (ABDICATED);
+
+ return (BUSY);
+}
+
+/*
+ * Caller passes in a state, with a guarantee that there is work to do and that
+ * all items up to the pidx_tail in the state are visible.
+ */
+static void
+drain_ring(struct mp_ring *r, union ring_state os, uint16_t prev, int budget)
+{
+ union ring_state ns;
+ int n, pending, total;
+ uint16_t cidx = os.cidx;
+ uint16_t pidx = os.pidx_tail;
+
+ MPASS(os.flags == BUSY);
+ MPASS(cidx != pidx);
+
+ if (prev == IDLE)
+ counter_u64_add(r->starts, 1);
+ pending = 0;
+ total = 0;
+
+ while (cidx != pidx) {
+
+ /* Items from cidx to pidx are available for consumption. */
+ n = r->drain(r, cidx, pidx);
+ if (n == 0) {
+ critical_enter();
+ do {
+ os.state = ns.state = r->state;
+ ns.cidx = cidx;
+ ns.flags = STALLED;
+ } while (atomic_cmpset_64(&r->state, os.state,
+ ns.state) == 0);
+ critical_exit();
+ if (prev != STALLED)
+ counter_u64_add(r->stalls, 1);
+ else if (total > 0) {
+ counter_u64_add(r->restarts, 1);
+ counter_u64_add(r->stalls, 1);
+ }
+ break;
+ }
+ cidx = increment_idx(r, cidx, n);
+ pending += n;
+ total += n;
+
+ /*
+ * We update the cidx only if we've caught up with the pidx, the
+ * real cidx is getting too far ahead of the one visible to
+ * everyone else, or we have exceeded our budget.
+ */
+ if (cidx != pidx && pending < 64 && total < budget)
+ continue;
+ critical_enter();
+ do {
+ os.state = ns.state = r->state;
+ ns.cidx = cidx;
+ ns.flags = state_to_flags(ns, total >= budget);
+ } while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0);
+ critical_exit();
+
+ if (ns.flags == ABDICATED)
+ counter_u64_add(r->abdications, 1);
+ if (ns.flags != BUSY) {
+ /* Wrong loop exit if we're going to stall. */
+ MPASS(ns.flags != STALLED);
+ if (prev == STALLED) {
+ MPASS(total > 0);
+ counter_u64_add(r->restarts, 1);
+ }
+ break;
+ }
+
+ /*
+ * The acquire style atomic above guarantees visibility of items
+ * associated with any pidx change that we notice here.
+ */
+ pidx = ns.pidx_tail;
+ pending = 0;
+ }
+}
+
+int
+mp_ring_alloc(struct mp_ring **pr, int size, void *cookie, ring_drain_t drain,
+ ring_can_drain_t can_drain, struct malloc_type *mt, int flags)
+{
+ struct mp_ring *r;
+
+ /* All idx are 16b so size can be 65536 at most */
+ if (pr == NULL || size < 2 || size > 65536 || drain == NULL ||
+ can_drain == NULL)
+ return (EINVAL);
+ *pr = NULL;
+ flags &= M_NOWAIT | M_WAITOK;
+ MPASS(flags != 0);
+
+ r = malloc(__offsetof(struct mp_ring, items[size]), mt, flags | M_ZERO);
+ if (r == NULL)
+ return (ENOMEM);
+ r->size = size;
+ r->cookie = cookie;
+ r->mt = mt;
+ r->drain = drain;
+ r->can_drain = can_drain;
+ r->enqueues = counter_u64_alloc(flags);
+ r->drops = counter_u64_alloc(flags);
+ r->starts = counter_u64_alloc(flags);
+ r->stalls = counter_u64_alloc(flags);
+ r->restarts = counter_u64_alloc(flags);
+ r->abdications = counter_u64_alloc(flags);
+ if (r->enqueues == NULL || r->drops == NULL || r->starts == NULL ||
+ r->stalls == NULL || r->restarts == NULL ||
+ r->abdications == NULL) {
+ mp_ring_free(r);
+ return (ENOMEM);
+ }
+
+ *pr = r;
+ return (0);
+}
+
+void
+
+mp_ring_free(struct mp_ring *r)
+{
+
+ if (r == NULL)
+ return;
+
+ if (r->enqueues != NULL)
+ counter_u64_free(r->enqueues);
+ if (r->drops != NULL)
+ counter_u64_free(r->drops);
+ if (r->starts != NULL)
+ counter_u64_free(r->starts);
+ if (r->stalls != NULL)
+ counter_u64_free(r->stalls);
+ if (r->restarts != NULL)
+ counter_u64_free(r->restarts);
+ if (r->abdications != NULL)
+ counter_u64_free(r->abdications);
+
+ free(r, r->mt);
+}
+
+/*
+ * Enqueue n items and maybe drain the ring for some time.
+ *
+ * Returns an errno.
+ */
+int
+mp_ring_enqueue(struct mp_ring *r, void **items, int n, int budget)
+{
+ union ring_state os, ns;
+ uint16_t pidx_start, pidx_stop;
+ int i;
+
+ MPASS(items != NULL);
+ MPASS(n > 0);
+
+ /*
+ * Reserve room for the new items. Our reservation, if successful, is
+ * from 'pidx_start' to 'pidx_stop'.
+ */
+ for (;;) {
+ os.state = r->state;
+ if (n >= space_available(r, os)) {
+ counter_u64_add(r->drops, n);
+ MPASS(os.flags != IDLE);
+ if (os.flags == STALLED)
+ mp_ring_check_drainage(r, 0);
+ return (ENOBUFS);
+ }
+ ns.state = os.state;
+ ns.pidx_head = increment_idx(r, os.pidx_head, n);
+ critical_enter();
+ if (atomic_cmpset_64(&r->state, os.state, ns.state))
+ break;
+ critical_exit();
+ cpu_spinwait();
+ }
+ pidx_start = os.pidx_head;
+ pidx_stop = ns.pidx_head;
+
+ /*
+ * Wait for other producers who got in ahead of us to enqueue their
+ * items, one producer at a time. It is our turn when the ring's
+ * pidx_tail reaches the begining of our reservation (pidx_start).
+ */
+ while (ns.pidx_tail != pidx_start) {
+ cpu_spinwait();
+ ns.state = r->state;
+ }
+
+ /* Now it is our turn to fill up the area we reserved earlier. */
+ i = pidx_start;
+ do {
+ r->items[i] = *items++;
+ if (__predict_false(++i == r->size))
+ i = 0;
+ } while (i != pidx_stop);
+
+ /*
+ * Update the ring's pidx_tail. The release style atomic guarantees
+ * that the items are visible to any thread that sees the updated pidx.
+ */
+ do {
+ os.state = ns.state = r->state;
+ ns.pidx_tail = pidx_stop;
+ ns.flags = BUSY;
+ } while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0);
+ critical_exit();
+ counter_u64_add(r->enqueues, n);
+
+ /*
+ * Turn into a consumer if some other thread isn't active as a consumer
+ * already.
+ */
+ if (os.flags != BUSY)
+ drain_ring(r, ns, os.flags, budget);
+
+ return (0);
+}
+
+void
+mp_ring_check_drainage(struct mp_ring *r, int budget)
+{
+ union ring_state os, ns;
+
+ os.state = r->state;
+ if (os.flags != STALLED || os.pidx_head != os.pidx_tail ||
+ r->can_drain(r) == 0)
+ return;
+
+ MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */
+ ns.state = os.state;
+ ns.flags = BUSY;
+
+ /*
+ * The acquire style atomic guarantees visibility of items associated
+ * with the pidx that we read here.
+ */
+ if (!atomic_cmpset_acq_64(&r->state, os.state, ns.state))
+ return;
+
+ drain_ring(r, ns, os.flags, budget);
+}
+
+void
+mp_ring_reset_stats(struct mp_ring *r)
+{
+
+ counter_u64_zero(r->enqueues);
+ counter_u64_zero(r->drops);
+ counter_u64_zero(r->starts);
+ counter_u64_zero(r->stalls);
+ counter_u64_zero(r->restarts);
+ counter_u64_zero(r->abdications);
+}
+
+int
+mp_ring_is_idle(struct mp_ring *r)
+{
+ union ring_state s;
+
+ s.state = r->state;
+ if (s.pidx_head == s.pidx_tail && s.pidx_tail == s.cidx &&
+ s.flags == IDLE)
+ return (1);
+
+ return (0);
+}
diff --git a/sys/dev/cxgbe/t4_mp_ring.h b/sys/dev/cxgbe/t4_mp_ring.h
new file mode 100644
index 0000000..c9ee346
--- /dev/null
+++ b/sys/dev/cxgbe/t4_mp_ring.h
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 2014 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ */
+
+#ifndef __CXGBE_MP_RING_H
+#define __CXGBE_MP_RING_H
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+struct mp_ring;
+typedef u_int (*ring_drain_t)(struct mp_ring *, u_int, u_int);
+typedef u_int (*ring_can_drain_t)(struct mp_ring *);
+
+struct mp_ring {
+ volatile uint64_t state __aligned(CACHE_LINE_SIZE);
+
+ int size __aligned(CACHE_LINE_SIZE);
+ void * cookie;
+ struct malloc_type * mt;
+ ring_drain_t drain;
+ ring_can_drain_t can_drain; /* cheap, may be unreliable */
+ counter_u64_t enqueues;
+ counter_u64_t drops;
+ counter_u64_t starts;
+ counter_u64_t stalls;
+ counter_u64_t restarts; /* recovered after stalling */
+ counter_u64_t abdications;
+
+ void * volatile items[] __aligned(CACHE_LINE_SIZE);
+};
+
+int mp_ring_alloc(struct mp_ring **, int, void *, ring_drain_t,
+ ring_can_drain_t, struct malloc_type *, int);
+void mp_ring_free(struct mp_ring *);
+int mp_ring_enqueue(struct mp_ring *, void **, int, int);
+void mp_ring_check_drainage(struct mp_ring *, int);
+void mp_ring_reset_stats(struct mp_ring *);
+int mp_ring_is_idle(struct mp_ring *);
+
+#endif
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index 96e22cb..026b4ce 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -36,12 +36,12 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/kernel.h>
-#include <sys/kdb.h>
#include <sys/malloc.h>
#include <sys/queue.h>
#include <sys/sbuf.h>
#include <sys/taskqueue.h>
#include <sys/time.h>
+#include <sys/sglist.h>
#include <sys/sysctl.h>
#include <sys/smp.h>
#include <sys/counter.h>
@@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include "common/t4_regs.h"
#include "common/t4_regs_values.h"
#include "common/t4_msg.h"
+#include "t4_mp_ring.h"
#ifdef T4_PKT_TIMESTAMP
#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
@@ -147,19 +148,17 @@ TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster);
static int safest_rx_cluster = PAGE_SIZE;
TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster);
-/* Used to track coalesced tx work request */
struct txpkts {
- uint64_t *flitp; /* ptr to flit where next pkt should start */
- uint8_t npkt; /* # of packets in this work request */
- uint8_t nflits; /* # of flits used by this work request */
- uint16_t plen; /* total payload (sum of all packets) */
+ u_int wr_type; /* type 0 or type 1 */
+ u_int npkt; /* # of packets in this work request */
+ u_int plen; /* total payload (sum of all packets) */
+ u_int len16; /* # of 16B pieces used by this work request */
};
/* A packet's SGL. This + m_pkthdr has all info needed for tx */
struct sgl {
- int nsegs; /* # of segments in the SGL, 0 means imm. tx */
- int nflits; /* # of flits needed for the SGL */
- bus_dma_segment_t seg[TX_SGL_SEGS];
+ struct sglist sg;
+ struct sglist_seg seg[TX_SGL_SEGS];
};
static int service_iq(struct sge_iq *, int);
@@ -221,26 +220,31 @@ static void find_best_refill_source(struct adapter *, struct sge_fl *, int);
static void find_safe_refill_source(struct adapter *, struct sge_fl *);
static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
-static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
-static int free_pkt_sgl(struct sge_txq *, struct sgl *);
-static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
- struct sgl *);
-static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
- struct mbuf *, struct sgl *);
-static void write_txpkts_wr(struct sge_txq *, struct txpkts *);
-static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
- struct txpkts *, struct mbuf *, struct sgl *);
-static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
+static inline void get_pkt_gl(struct mbuf *, struct sglist *);
+static inline u_int txpkt_len16(u_int, u_int);
+static inline u_int txpkts0_len16(u_int);
+static inline u_int txpkts1_len16(void);
+static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *,
+ struct mbuf *, u_int);
+static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int);
+static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int);
+static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *,
+ struct mbuf *, const struct txpkts *, u_int);
+static void write_gl_to_txd(struct sge_txq *, struct mbuf *, caddr_t *, int);
static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
-static inline void ring_eq_db(struct adapter *, struct sge_eq *);
-static inline int reclaimable(struct sge_eq *);
-static int reclaim_tx_descs(struct sge_txq *, int, int);
-static void write_eqflush_wr(struct sge_eq *);
-static __be64 get_flit(bus_dma_segment_t *, int, int);
+static inline void ring_eq_db(struct adapter *, struct sge_eq *, u_int);
+static inline uint16_t read_hw_cidx(struct sge_eq *);
+static inline u_int reclaimable_tx_desc(struct sge_eq *);
+static inline u_int total_available_tx_desc(struct sge_eq *);
+static u_int reclaim_tx_descs(struct sge_txq *, u_int);
+static void tx_reclaim(void *, int);
+static __be64 get_flit(struct sglist_seg *, int, int);
static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
struct mbuf *);
static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
struct mbuf *);
+static void wrq_tx_drain(void *, int);
+static void drain_wrq_wr_list(struct adapter *, struct sge_wrq *);
static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
@@ -1785,327 +1789,679 @@ t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
}
/*
+ * Must drain the wrq or make sure that someone else will.
+ */
+static void
+wrq_tx_drain(void *arg, int n)
+{
+ struct sge_wrq *wrq = arg;
+ struct sge_eq *eq = &wrq->eq;
+
+ EQ_LOCK(eq);
+ if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list))
+ drain_wrq_wr_list(wrq->adapter, wrq);
+ EQ_UNLOCK(eq);
+}
+
+static void
+drain_wrq_wr_list(struct adapter *sc, struct sge_wrq *wrq)
+{
+ struct sge_eq *eq = &wrq->eq;
+ u_int available, dbdiff; /* # of hardware descriptors */
+ u_int n;
+ struct wrqe *wr;
+ struct fw_eth_tx_pkt_wr *dst; /* any fw WR struct will do */
+
+ EQ_LOCK_ASSERT_OWNED(eq);
+ MPASS(TAILQ_EMPTY(&wrq->incomplete_wrs));
+ wr = STAILQ_FIRST(&wrq->wr_list);
+ MPASS(wr != NULL); /* Must be called with something useful to do */
+ dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx);
+
+ do {
+ eq->cidx = read_hw_cidx(eq);
+ if (eq->pidx == eq->cidx)
+ available = eq->sidx - 1;
+ else
+ available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
+
+ MPASS(wr->wrq == wrq);
+ n = howmany(wr->wr_len, EQ_ESIZE);
+ if (available < n)
+ return;
+
+ dst = (void *)&eq->desc[eq->pidx];
+ if (__predict_true(eq->sidx - eq->pidx > n)) {
+ /* Won't wrap, won't end exactly at the status page. */
+ bcopy(&wr->wr[0], dst, wr->wr_len);
+ eq->pidx += n;
+ } else {
+ int first_portion = (eq->sidx - eq->pidx) * EQ_ESIZE;
+
+ bcopy(&wr->wr[0], dst, first_portion);
+ if (wr->wr_len > first_portion) {
+ bcopy(&wr->wr[first_portion], &eq->desc[0],
+ wr->wr_len - first_portion);
+ }
+ eq->pidx = n - (eq->sidx - eq->pidx);
+ }
+
+ if (available < eq->sidx / 4 &&
+ atomic_cmpset_int(&eq->equiq, 0, 1)) {
+ dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ |
+ F_FW_WR_EQUEQ);
+ eq->equeqidx = eq->pidx;
+ } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) {
+ dst->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
+ eq->equeqidx = eq->pidx;
+ }
+
+ dbdiff += n;
+ if (dbdiff >= 16) {
+ ring_eq_db(sc, eq, dbdiff);
+ dbdiff = 0;
+ }
+
+ STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
+ free_wrqe(wr);
+ MPASS(wrq->nwr_pending > 0);
+ wrq->nwr_pending--;
+ MPASS(wrq->ndesc_needed >= n);
+ wrq->ndesc_needed -= n;
+ } while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL);
+
+ if (dbdiff)
+ ring_eq_db(sc, eq, dbdiff);
+}
+
+/*
* Doesn't fail. Holds on to work requests it can't send right away.
*/
void
t4_wrq_tx_locked(struct adapter *sc, struct sge_wrq *wrq, struct wrqe *wr)
{
+#ifdef INVARIANTS
struct sge_eq *eq = &wrq->eq;
- int can_reclaim;
- caddr_t dst;
+#endif
+
+ EQ_LOCK_ASSERT_OWNED(eq);
+ MPASS(wr != NULL);
+ MPASS(wr->wr_len > 0 && wr->wr_len <= SGE_MAX_WR_LEN);
+ MPASS((wr->wr_len & 0x7) == 0);
- TXQ_LOCK_ASSERT_OWNED(wrq);
+ STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
+ wrq->nwr_pending++;
+ wrq->ndesc_needed += howmany(wr->wr_len, EQ_ESIZE);
+
+ if (!TAILQ_EMPTY(&wrq->incomplete_wrs))
+ return; /* commit_wrq_wr will drain wr_list as well. */
+
+ drain_wrq_wr_list(sc, wrq);
+
+ /* Doorbell must have caught up to the pidx. */
+ MPASS(eq->pidx == eq->dbidx);
+}
+
+void
+t4_update_fl_bufsize(struct ifnet *ifp)
+{
+ struct port_info *pi = ifp->if_softc;
+ struct adapter *sc = pi->adapter;
+ struct sge_rxq *rxq;
#ifdef TCP_OFFLOAD
- KASSERT((eq->flags & EQ_TYPEMASK) == EQ_OFLD ||
- (eq->flags & EQ_TYPEMASK) == EQ_CTRL,
- ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
-#else
- KASSERT((eq->flags & EQ_TYPEMASK) == EQ_CTRL,
- ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
+ struct sge_ofld_rxq *ofld_rxq;
#endif
+ struct sge_fl *fl;
+ int i, maxp, mtu = ifp->if_mtu;
- if (__predict_true(wr != NULL))
- STAILQ_INSERT_TAIL(&wrq->wr_list, wr, link);
+ maxp = mtu_to_max_payload(sc, mtu, 0);
+ for_each_rxq(pi, i, rxq) {
+ fl = &rxq->fl;
- can_reclaim = reclaimable(eq);
- if (__predict_false(eq->flags & EQ_STALLED)) {
- if (eq->avail + can_reclaim < tx_resume_threshold(eq))
- return;
- eq->flags &= ~EQ_STALLED;
- eq->unstalled++;
+ FL_LOCK(fl);
+ find_best_refill_source(sc, fl, maxp);
+ FL_UNLOCK(fl);
}
- eq->cidx += can_reclaim;
- eq->avail += can_reclaim;
- if (__predict_false(eq->cidx >= eq->cap))
- eq->cidx -= eq->cap;
+#ifdef TCP_OFFLOAD
+ maxp = mtu_to_max_payload(sc, mtu, 1);
+ for_each_ofld_rxq(pi, i, ofld_rxq) {
+ fl = &ofld_rxq->fl;
- while ((wr = STAILQ_FIRST(&wrq->wr_list)) != NULL) {
- int ndesc;
+ FL_LOCK(fl);
+ find_best_refill_source(sc, fl, maxp);
+ FL_UNLOCK(fl);
+ }
+#endif
+}
- if (__predict_false(wr->wr_len < 0 ||
- wr->wr_len > SGE_MAX_WR_LEN || (wr->wr_len & 0x7))) {
+static inline int
+mbuf_nsegs(struct mbuf *m)
+{
-#ifdef INVARIANTS
- panic("%s: work request with length %d", __func__,
- wr->wr_len);
-#endif
-#ifdef KDB
- kdb_backtrace();
-#endif
- log(LOG_ERR, "%s: %s work request with length %d",
- device_get_nameunit(sc->dev), __func__, wr->wr_len);
- STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
- free_wrqe(wr);
- continue;
- }
+ M_ASSERTPKTHDR(m);
+ KASSERT(m->m_pkthdr.l5hlen > 0,
+ ("%s: mbuf %p missing information on # of segments.", __func__, m));
- ndesc = howmany(wr->wr_len, EQ_ESIZE);
- if (eq->avail < ndesc) {
- wrq->no_desc++;
- break;
- }
+ return (m->m_pkthdr.l5hlen);
+}
- dst = (void *)&eq->desc[eq->pidx];
- copy_to_txd(eq, wrtod(wr), &dst, wr->wr_len);
+static inline void
+set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs)
+{
- eq->pidx += ndesc;
- eq->avail -= ndesc;
- if (__predict_false(eq->pidx >= eq->cap))
- eq->pidx -= eq->cap;
+ M_ASSERTPKTHDR(m);
+ m->m_pkthdr.l5hlen = nsegs;
+}
- eq->pending += ndesc;
- if (eq->pending >= 8)
- ring_eq_db(sc, eq);
+static inline int
+mbuf_len16(struct mbuf *m)
+{
+ int n;
- wrq->tx_wrs++;
- STAILQ_REMOVE_HEAD(&wrq->wr_list, link);
- free_wrqe(wr);
+ M_ASSERTPKTHDR(m);
+ n = m->m_pkthdr.PH_loc.eight[0];
+ MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16);
- if (eq->avail < 8) {
- can_reclaim = reclaimable(eq);
- eq->cidx += can_reclaim;
- eq->avail += can_reclaim;
- if (__predict_false(eq->cidx >= eq->cap))
- eq->cidx -= eq->cap;
- }
- }
+ return (n);
+}
+
+static inline void
+set_mbuf_len16(struct mbuf *m, uint8_t len16)
+{
- if (eq->pending)
- ring_eq_db(sc, eq);
+ M_ASSERTPKTHDR(m);
+ m->m_pkthdr.PH_loc.eight[0] = len16;
+}
+
+static inline int
+needs_tso(struct mbuf *m)
+{
- if (wr != NULL) {
- eq->flags |= EQ_STALLED;
- if (callout_pending(&eq->tx_callout) == 0)
- callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
+ M_ASSERTPKTHDR(m);
+
+ if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+ KASSERT(m->m_pkthdr.tso_segsz > 0,
+ ("%s: TSO requested in mbuf %p but MSS not provided",
+ __func__, m));
+ return (1);
}
+
+ return (0);
}
-/* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
-#define TXPKTS_PKT_HDR ((\
- sizeof(struct ulp_txpkt) + \
- sizeof(struct ulptx_idata) + \
- sizeof(struct cpl_tx_pkt_core) \
- ) / 8)
-
-/* Header of a coalesced tx WR, before SGL of first packet (in flits) */
-#define TXPKTS_WR_HDR (\
- sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
- TXPKTS_PKT_HDR)
-
-/* Header of a tx WR, before SGL of first packet (in flits) */
-#define TXPKT_WR_HDR ((\
- sizeof(struct fw_eth_tx_pkt_wr) + \
- sizeof(struct cpl_tx_pkt_core) \
- ) / 8 )
-
-/* Header of a tx LSO WR, before SGL of first packet (in flits) */
-#define TXPKT_LSO_WR_HDR ((\
- sizeof(struct fw_eth_tx_pkt_wr) + \
- sizeof(struct cpl_tx_pkt_lso_core) + \
- sizeof(struct cpl_tx_pkt_core) \
- ) / 8 )
+static inline int
+needs_l3_csum(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
-int
-t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
+ if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO))
+ return (1);
+ return (0);
+}
+
+static inline int
+needs_l4_csum(struct mbuf *m)
{
- struct port_info *pi = (void *)ifp->if_softc;
- struct adapter *sc = pi->adapter;
- struct sge_eq *eq = &txq->eq;
- struct buf_ring *br = txq->br;
- struct mbuf *next;
- int rc, coalescing, can_reclaim;
- struct txpkts txpkts;
- struct sgl sgl;
- TXQ_LOCK_ASSERT_OWNED(txq);
- KASSERT(m, ("%s: called with nothing to do.", __func__));
- KASSERT((eq->flags & EQ_TYPEMASK) == EQ_ETH,
- ("%s: eq type %d", __func__, eq->flags & EQ_TYPEMASK));
+ M_ASSERTPKTHDR(m);
- prefetch(&eq->desc[eq->pidx]);
- prefetch(&txq->sdesc[eq->pidx]);
+ if (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
+ CSUM_TCP_IPV6 | CSUM_TSO))
+ return (1);
+ return (0);
+}
- txpkts.npkt = 0;/* indicates there's nothing in txpkts */
- coalescing = 0;
+static inline int
+needs_vlan_insertion(struct mbuf *m)
+{
- can_reclaim = reclaimable(eq);
- if (__predict_false(eq->flags & EQ_STALLED)) {
- if (eq->avail + can_reclaim < tx_resume_threshold(eq)) {
- txq->m = m;
- return (0);
- }
- eq->flags &= ~EQ_STALLED;
- eq->unstalled++;
- }
+ M_ASSERTPKTHDR(m);
- if (__predict_false(eq->flags & EQ_DOOMED)) {
- m_freem(m);
- while ((m = buf_ring_dequeue_sc(txq->br)) != NULL)
- m_freem(m);
- return (ENETDOWN);
+ if (m->m_flags & M_VLANTAG) {
+ KASSERT(m->m_pkthdr.ether_vtag != 0,
+ ("%s: HWVLAN requested in mbuf %p but tag not provided",
+ __func__, m));
+ return (1);
}
+ return (0);
+}
- if (eq->avail < 8 && can_reclaim)
- reclaim_tx_descs(txq, can_reclaim, 32);
+static void *
+m_advance(struct mbuf **pm, int *poffset, int len)
+{
+ struct mbuf *m = *pm;
+ int offset = *poffset;
+ uintptr_t p = 0;
- for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
+ MPASS(len > 0);
- if (eq->avail < 8)
+ while (len) {
+ if (offset + len < m->m_len) {
+ offset += len;
+ p = mtod(m, uintptr_t) + offset;
break;
+ }
+ len -= m->m_len - offset;
+ m = m->m_next;
+ offset = 0;
+ MPASS(m != NULL);
+ }
+ *poffset = offset;
+ *pm = m;
+ return ((void *)p);
+}
- next = m->m_nextpkt;
- m->m_nextpkt = NULL;
+static inline int
+same_paddr(char *a, char *b)
+{
- if (next || buf_ring_peek(br))
- coalescing = 1;
+ if (a == b)
+ return (1);
+ else if (a != NULL && b != NULL) {
+ vm_offset_t x = (vm_offset_t)a;
+ vm_offset_t y = (vm_offset_t)b;
- rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
- if (rc != 0) {
- if (rc == ENOMEM) {
+ if ((x & PAGE_MASK) == (y & PAGE_MASK) &&
+ pmap_kextract(x) == pmap_kextract(y))
+ return (1);
+ }
- /* Short of resources, suspend tx */
+ return (0);
+}
- m->m_nextpkt = next;
- break;
- }
+/*
+ * Can deal with empty mbufs in the chain that have m_len = 0, but the chain
+ * must have at least one mbuf that's not empty.
+ */
+static inline int
+count_mbuf_nsegs(struct mbuf *m)
+{
+ char *prev_end, *start;
+ int len, nsegs;
- /*
- * Unrecoverable error for this packet, throw it away
- * and move on to the next. get_pkt_sgl may already
- * have freed m (it will be NULL in that case and the
- * m_freem here is still safe).
- */
+ MPASS(m != NULL);
- m_freem(m);
+ nsegs = 0;
+ prev_end = NULL;
+ for (; m; m = m->m_next) {
+
+ len = m->m_len;
+ if (__predict_false(len == 0))
continue;
- }
+ start = mtod(m, char *);
- if (coalescing &&
- add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
+ nsegs += sglist_count(start, len);
+ if (same_paddr(prev_end, start))
+ nsegs--;
+ prev_end = start + len;
+ }
- /* Successfully absorbed into txpkts */
+ MPASS(nsegs > 0);
+ return (nsegs);
+}
- write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
- goto doorbell;
+/*
+ * Analyze the mbuf to determine its tx needs. The mbuf passed in may change:
+ * a) caller can assume it's been freed if this function returns with an error.
+ * b) it may get defragged up if the gather list is too long for the hardware.
+ */
+int
+parse_pkt(struct mbuf **mp)
+{
+ struct mbuf *m0 = *mp, *m;
+ int rc, nsegs, defragged = 0, offset;
+ struct ether_header *eh;
+ void *l3hdr;
+#if defined(INET) || defined(INET6)
+ struct tcphdr *tcp;
+#endif
+ uint16_t eh_type;
+
+ M_ASSERTPKTHDR(m0);
+ if (__predict_false(m0->m_pkthdr.len < ETHER_HDR_LEN)) {
+ rc = EINVAL;
+fail:
+ m_freem(m0);
+ *mp = NULL;
+ return (rc);
+ }
+restart:
+ /*
+ * First count the number of gather list segments in the payload.
+ * Defrag the mbuf if nsegs exceeds the hardware limit.
+ */
+ M_ASSERTPKTHDR(m0);
+ MPASS(m0->m_pkthdr.len > 0);
+ nsegs = count_mbuf_nsegs(m0);
+ if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) {
+ if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) {
+ rc = EFBIG;
+ goto fail;
}
+ *mp = m0 = m; /* update caller's copy after defrag */
+ goto restart;
+ }
- /*
- * We weren't coalescing to begin with, or current frame could
- * not be coalesced (add_to_txpkts flushes txpkts if a frame
- * given to it can't be coalesced). Either way there should be
- * nothing in txpkts.
- */
- KASSERT(txpkts.npkt == 0,
- ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
+ if (__predict_false(nsegs > 2 && m0->m_pkthdr.len <= MHLEN)) {
+ m0 = m_pullup(m0, m0->m_pkthdr.len);
+ if (m0 == NULL) {
+ /* Should have left well enough alone. */
+ rc = EFBIG;
+ goto fail;
+ }
+ *mp = m0; /* update caller's copy after pullup */
+ goto restart;
+ }
+ set_mbuf_nsegs(m0, nsegs);
+ set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0)));
- /* We're sending out individual packets now */
- coalescing = 0;
+ if (!needs_tso(m0))
+ return (0);
- if (eq->avail < 8)
- reclaim_tx_descs(txq, 0, 8);
- rc = write_txpkt_wr(pi, txq, m, &sgl);
- if (rc != 0) {
+ m = m0;
+ eh = mtod(m, struct ether_header *);
+ eh_type = ntohs(eh->ether_type);
+ if (eh_type == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evh = (void *)eh;
- /* Short of hardware descriptors, suspend tx */
+ eh_type = ntohs(evh->evl_proto);
+ m0->m_pkthdr.l2hlen = sizeof(*evh);
+ } else
+ m0->m_pkthdr.l2hlen = sizeof(*eh);
- /*
- * This is an unlikely but expensive failure. We've
- * done all the hard work (DMA mappings etc.) and now we
- * can't send out the packet. What's worse, we have to
- * spend even more time freeing up everything in sgl.
- */
- txq->no_desc++;
- free_pkt_sgl(txq, &sgl);
+ offset = 0;
+ l3hdr = m_advance(&m, &offset, m0->m_pkthdr.l2hlen);
- m->m_nextpkt = next;
- break;
- }
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ {
+ struct ip6_hdr *ip6 = l3hdr;
- ETHER_BPF_MTAP(ifp, m);
- if (sgl.nsegs == 0)
- m_freem(m);
-doorbell:
- if (eq->pending >= 8)
- ring_eq_db(sc, eq);
+ MPASS(ip6->ip6_nxt == IPPROTO_TCP);
- can_reclaim = reclaimable(eq);
- if (can_reclaim >= 32)
- reclaim_tx_descs(txq, can_reclaim, 64);
+ m0->m_pkthdr.l3hlen = sizeof(*ip6);
+ break;
}
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip = l3hdr;
- if (txpkts.npkt > 0)
- write_txpkts_wr(txq, &txpkts);
+ m0->m_pkthdr.l3hlen = ip->ip_hl * 4;
+ break;
+ }
+#endif
+ default:
+ panic("%s: ethertype 0x%04x unknown. if_cxgbe must be compiled"
+ " with the same INET/INET6 options as the kernel.",
+ __func__, eh_type);
+ }
- /*
- * m not NULL means there was an error but we haven't thrown it away.
- * This can happen when we're short of tx descriptors (no_desc) or maybe
- * even DMA maps (no_dmamap). Either way, a credit flush and reclaim
- * will get things going again.
- */
- if (m && !(eq->flags & EQ_CRFLUSHED)) {
- struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
+#if defined(INET) || defined(INET6)
+ tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen);
+ m0->m_pkthdr.l4hlen = tcp->th_off * 4;
+#endif
+ MPASS(m0 == *mp);
+ return (0);
+}
- /*
- * If EQ_CRFLUSHED is not set then we know we have at least one
- * available descriptor because any WR that reduces eq->avail to
- * 0 also sets EQ_CRFLUSHED.
- */
- KASSERT(eq->avail > 0, ("%s: no space for eqflush.", __func__));
+void *
+start_wrq_wr(struct sge_wrq *wrq, int len16, struct wrq_cookie *cookie)
+{
+ struct sge_eq *eq = &wrq->eq;
+ struct adapter *sc = wrq->adapter;
+ int ndesc, available;
+ struct wrqe *wr;
+ void *w;
- txsd->desc_used = 1;
- txsd->credits = 0;
- write_eqflush_wr(eq);
- }
- txq->m = m;
+ MPASS(len16 > 0);
+ ndesc = howmany(len16, EQ_ESIZE / 16);
+ MPASS(ndesc > 0 && ndesc <= SGE_MAX_WR_NDESC);
- if (eq->pending)
- ring_eq_db(sc, eq);
+ EQ_LOCK(eq);
- reclaim_tx_descs(txq, 0, 128);
+ if (!STAILQ_EMPTY(&wrq->wr_list))
+ drain_wrq_wr_list(sc, wrq);
- if (eq->flags & EQ_STALLED && callout_pending(&eq->tx_callout) == 0)
- callout_reset(&eq->tx_callout, 1, t4_tx_callout, eq);
+ if (!STAILQ_EMPTY(&wrq->wr_list)) {
+slowpath:
+ EQ_UNLOCK(eq);
+ wr = alloc_wrqe(len16 * 16, wrq);
+ if (__predict_false(wr == NULL))
+ return (NULL);
+ cookie->pidx = -1;
+ cookie->ndesc = ndesc;
+ return (&wr->wr);
+ }
- return (0);
+ eq->cidx = read_hw_cidx(eq);
+ if (eq->pidx == eq->cidx)
+ available = eq->sidx - 1;
+ else
+ available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
+ if (available < ndesc)
+ goto slowpath;
+
+ cookie->pidx = eq->pidx;
+ cookie->ndesc = ndesc;
+ TAILQ_INSERT_TAIL(&wrq->incomplete_wrs, cookie, link);
+
+ w = &eq->desc[eq->pidx];
+ IDXINCR(eq->pidx, ndesc, eq->sidx);
+ if (__predict_false(eq->pidx < ndesc - 1)) {
+ w = &wrq->ss[0];
+ wrq->ss_pidx = cookie->pidx;
+ wrq->ss_len = len16 * 16;
+ }
+
+ EQ_UNLOCK(eq);
+
+ return (w);
}
void
-t4_update_fl_bufsize(struct ifnet *ifp)
+commit_wrq_wr(struct sge_wrq *wrq, void *w, struct wrq_cookie *cookie)
{
- struct port_info *pi = ifp->if_softc;
- struct adapter *sc = pi->adapter;
- struct sge_rxq *rxq;
-#ifdef TCP_OFFLOAD
- struct sge_ofld_rxq *ofld_rxq;
-#endif
- struct sge_fl *fl;
- int i, maxp, mtu = ifp->if_mtu;
+ struct sge_eq *eq = &wrq->eq;
+ struct adapter *sc = wrq->adapter;
+ int ndesc, pidx;
+ struct wrq_cookie *prev, *next;
- maxp = mtu_to_max_payload(sc, mtu, 0);
- for_each_rxq(pi, i, rxq) {
- fl = &rxq->fl;
+ if (cookie->pidx == -1) {
+ struct wrqe *wr = __containerof(w, struct wrqe, wr);
- FL_LOCK(fl);
- find_best_refill_source(sc, fl, maxp);
- FL_UNLOCK(fl);
+ t4_wrq_tx(sc, wr);
+ return;
}
-#ifdef TCP_OFFLOAD
- maxp = mtu_to_max_payload(sc, mtu, 1);
- for_each_ofld_rxq(pi, i, ofld_rxq) {
- fl = &ofld_rxq->fl;
- FL_LOCK(fl);
- find_best_refill_source(sc, fl, maxp);
- FL_UNLOCK(fl);
+ ndesc = cookie->ndesc; /* Can be more than SGE_MAX_WR_NDESC here. */
+ pidx = cookie->pidx;
+ MPASS(pidx >= 0 && pidx < eq->sidx);
+ if (__predict_false(w == &wrq->ss[0])) {
+ int n = (eq->sidx - wrq->ss_pidx) * EQ_ESIZE;
+
+ MPASS(wrq->ss_len > n); /* WR had better wrap around. */
+ bcopy(&wrq->ss[0], &eq->desc[wrq->ss_pidx], n);
+ bcopy(&wrq->ss[n], &eq->desc[0], wrq->ss_len - n);
+ wrq->tx_wrs_ss++;
+ } else
+ wrq->tx_wrs_direct++;
+
+ EQ_LOCK(eq);
+ prev = TAILQ_PREV(cookie, wrq_incomplete_wrs, link);
+ next = TAILQ_NEXT(cookie, link);
+ if (prev == NULL) {
+ MPASS(pidx == eq->dbidx);
+ if (next == NULL || ndesc >= 16)
+ ring_eq_db(wrq->adapter, eq, ndesc);
+ else {
+ MPASS(IDXDIFF(next->pidx, pidx, eq->sidx) == ndesc);
+ next->pidx = pidx;
+ next->ndesc += ndesc;
+ }
+ } else {
+ MPASS(IDXDIFF(pidx, prev->pidx, eq->sidx) == prev->ndesc);
+ prev->ndesc += ndesc;
+ }
+ TAILQ_REMOVE(&wrq->incomplete_wrs, cookie, link);
+
+ if (TAILQ_EMPTY(&wrq->incomplete_wrs) && !STAILQ_EMPTY(&wrq->wr_list))
+ drain_wrq_wr_list(sc, wrq);
+
+#ifdef INVARIANTS
+ if (TAILQ_EMPTY(&wrq->incomplete_wrs)) {
+ /* Doorbell must have caught up to the pidx. */
+ MPASS(wrq->eq.pidx == wrq->eq.dbidx);
}
#endif
+ EQ_UNLOCK(eq);
}
-int
-can_resume_tx(struct sge_eq *eq)
+static u_int
+can_resume_eth_tx(struct mp_ring *r)
+{
+ struct sge_eq *eq = r->cookie;
+
+ return (total_available_tx_desc(eq) > eq->sidx / 8);
+}
+
+static inline int
+cannot_use_txpkts(struct mbuf *m)
+{
+ /* maybe put a GL limit too, to avoid silliness? */
+
+ return (needs_tso(m));
+}
+
+/*
+ * r->items[cidx] to r->items[pidx], with a wraparound at r->size, are ready to
+ * be consumed. Return the actual number consumed. 0 indicates a stall.
+ */
+static u_int
+eth_tx(struct mp_ring *r, u_int cidx, u_int pidx)
{
+ struct sge_txq *txq = r->cookie;
+ struct sge_eq *eq = &txq->eq;
+ struct ifnet *ifp = txq->ifp;
+ struct port_info *pi = (void *)ifp->if_softc;
+ struct adapter *sc = pi->adapter;
+ u_int total, remaining; /* # of packets */
+ u_int available, dbdiff; /* # of hardware descriptors */
+ u_int n, next_cidx;
+ struct mbuf *m0, *tail;
+ struct txpkts txp;
+ struct fw_eth_tx_pkts_wr *wr; /* any fw WR struct will do */
+
+ remaining = IDXDIFF(pidx, cidx, r->size);
+ MPASS(remaining > 0); /* Must not be called without work to do. */
+ total = 0;
+
+ TXQ_LOCK(txq);
+ if (__predict_false((eq->flags & EQ_ENABLED) == 0)) {
+ while (cidx != pidx) {
+ m0 = r->items[cidx];
+ m_freem(m0);
+ if (++cidx == r->size)
+ cidx = 0;
+ }
+ reclaim_tx_descs(txq, 2048);
+ total = remaining;
+ goto done;
+ }
+
+ /* How many hardware descriptors do we have readily available. */
+ if (eq->pidx == eq->cidx)
+ available = eq->sidx - 1;
+ else
+ available = IDXDIFF(eq->cidx, eq->pidx, eq->sidx) - 1;
+ dbdiff = IDXDIFF(eq->pidx, eq->dbidx, eq->sidx);
+
+ while (remaining > 0) {
+
+ m0 = r->items[cidx];
+ M_ASSERTPKTHDR(m0);
+ MPASS(m0->m_nextpkt == NULL);
+
+ if (available < SGE_MAX_WR_NDESC) {
+ available += reclaim_tx_descs(txq, 64);
+ if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16))
+ break; /* out of descriptors */
+ }
- return (eq->avail + reclaimable(eq) >= tx_resume_threshold(eq));
+ next_cidx = cidx + 1;
+ if (__predict_false(next_cidx == r->size))
+ next_cidx = 0;
+
+ wr = (void *)&eq->desc[eq->pidx];
+ if (remaining > 1 &&
+ try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) {
+
+ /* pkts at cidx, next_cidx should both be in txp. */
+ MPASS(txp.npkt == 2);
+ tail = r->items[next_cidx];
+ MPASS(tail->m_nextpkt == NULL);
+ ETHER_BPF_MTAP(ifp, m0);
+ ETHER_BPF_MTAP(ifp, tail);
+ m0->m_nextpkt = tail;
+
+ if (__predict_false(++next_cidx == r->size))
+ next_cidx = 0;
+
+ while (next_cidx != pidx) {
+ if (add_to_txpkts(r->items[next_cidx], &txp,
+ available) != 0)
+ break;
+ tail->m_nextpkt = r->items[next_cidx];
+ tail = tail->m_nextpkt;
+ ETHER_BPF_MTAP(ifp, tail);
+ if (__predict_false(++next_cidx == r->size))
+ next_cidx = 0;
+ }
+
+ n = write_txpkts_wr(txq, wr, m0, &txp, available);
+ total += txp.npkt;
+ remaining -= txp.npkt;
+ } else {
+ total++;
+ remaining--;
+ n = write_txpkt_wr(txq, (void *)wr, m0, available);
+ ETHER_BPF_MTAP(ifp, m0);
+ }
+ MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC);
+
+ available -= n;
+ dbdiff += n;
+ IDXINCR(eq->pidx, n, eq->sidx);
+
+ if (total_available_tx_desc(eq) < eq->sidx / 4 &&
+ atomic_cmpset_int(&eq->equiq, 0, 1)) {
+ wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUIQ |
+ F_FW_WR_EQUEQ);
+ eq->equeqidx = eq->pidx;
+ } else if (IDXDIFF(eq->pidx, eq->equeqidx, eq->sidx) >= 32) {
+ wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
+ eq->equeqidx = eq->pidx;
+ }
+
+ if (dbdiff >= 16 && remaining >= 4) {
+ ring_eq_db(sc, eq, dbdiff);
+ available += reclaim_tx_descs(txq, 4 * dbdiff);
+ dbdiff = 0;
+ }
+
+ cidx = next_cidx;
+ }
+ if (dbdiff != 0) {
+ ring_eq_db(sc, eq, dbdiff);
+ reclaim_tx_descs(txq, 32);
+ }
+done:
+ TXQ_UNLOCK(txq);
+
+ return (total);
}
static inline void
@@ -2155,11 +2511,8 @@ init_eq(struct sge_eq *eq, int eqtype, int qsize, uint8_t tx_chan,
eq->flags = eqtype & EQ_TYPEMASK;
eq->tx_chan = tx_chan;
eq->iqid = iqid;
- eq->qsize = qsize;
+ eq->sidx = qsize - spg_len / EQ_ESIZE;
strlcpy(eq->lockname, name, sizeof(eq->lockname));
-
- TASK_INIT(&eq->tx_task, 0, t4_tx_task, eq);
- callout_init(&eq->tx_callout, CALLOUT_MPSAFE);
}
static int
@@ -2848,6 +3201,7 @@ ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
{
int rc, cntxt_id;
struct fw_eq_ctrl_cmd c;
+ int qsize = eq->sidx + spg_len / EQ_ESIZE;
bzero(&c, sizeof(c));
@@ -2856,17 +3210,16 @@ ctrl_eq_alloc(struct adapter *sc, struct sge_eq *eq)
V_FW_EQ_CTRL_CMD_VFN(0));
c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
- c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
+ c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid));
c.physeqid_pkd = htobe32(0);
c.fetchszm_to_iqid =
- htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
+ htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |
V_FW_EQ_CTRL_CMD_PCIECHN(eq->tx_chan) |
F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
c.dcaen_to_eqsize =
htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
- V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
- V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
+ V_FW_EQ_CTRL_CMD_EQSIZE(qsize));
c.eqaddr = htobe64(eq->ba);
rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
@@ -2892,6 +3245,7 @@ eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
{
int rc, cntxt_id;
struct fw_eq_eth_cmd c;
+ int qsize = eq->sidx + spg_len / EQ_ESIZE;
bzero(&c, sizeof(c));
@@ -2900,15 +3254,15 @@ eth_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
V_FW_EQ_ETH_CMD_VFN(0));
c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
- c.autoequiqe_to_viid = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
+ c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE |
+ F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(pi->viid));
c.fetchszm_to_iqid =
- htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
+ htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |
V_FW_EQ_ETH_CMD_PCIECHN(eq->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
V_FW_EQ_ETH_CMD_IQID(eq->iqid));
c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
- V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
- V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
- V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
+ V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
+ V_FW_EQ_ETH_CMD_EQSIZE(qsize));
c.eqaddr = htobe64(eq->ba);
rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
@@ -2935,6 +3289,7 @@ ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
{
int rc, cntxt_id;
struct fw_eq_ofld_cmd c;
+ int qsize = eq->sidx + spg_len / EQ_ESIZE;
bzero(&c, sizeof(c));
@@ -2944,14 +3299,13 @@ ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
c.alloc_to_len16 = htonl(F_FW_EQ_OFLD_CMD_ALLOC |
F_FW_EQ_OFLD_CMD_EQSTART | FW_LEN16(c));
c.fetchszm_to_iqid =
- htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
+ htonl(V_FW_EQ_OFLD_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |
V_FW_EQ_OFLD_CMD_PCIECHN(eq->tx_chan) |
F_FW_EQ_OFLD_CMD_FETCHRO | V_FW_EQ_OFLD_CMD_IQID(eq->iqid));
c.dcaen_to_eqsize =
htobe32(V_FW_EQ_OFLD_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
V_FW_EQ_OFLD_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
- V_FW_EQ_OFLD_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
- V_FW_EQ_OFLD_CMD_EQSIZE(eq->qsize));
+ V_FW_EQ_OFLD_CMD_EQSIZE(qsize));
c.eqaddr = htobe64(eq->ba);
rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
@@ -2976,21 +3330,20 @@ ofld_eq_alloc(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
static int
alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
{
- int rc;
+ int rc, qsize;
size_t len;
mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
- len = eq->qsize * EQ_ESIZE;
+ qsize = eq->sidx + spg_len / EQ_ESIZE;
+ len = qsize * EQ_ESIZE;
rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
&eq->ba, (void **)&eq->desc);
if (rc)
return (rc);
- eq->cap = eq->qsize - spg_len / EQ_ESIZE;
- eq->spg = (void *)&eq->desc[eq->cap];
- eq->avail = eq->cap - 1; /* one less to avoid cidx = pidx */
eq->pidx = eq->cidx = 0;
+ eq->equeqidx = eq->dbidx = 0;
eq->doorbells = sc->doorbells;
switch (eq->flags & EQ_TYPEMASK) {
@@ -3018,8 +3371,6 @@ alloc_eq(struct adapter *sc, struct port_info *pi, struct sge_eq *eq)
eq->flags & EQ_TYPEMASK, rc);
}
- eq->tx_callout.c_cpu = eq->cntxt_id % mp_ncpus;
-
if (isset(&eq->doorbells, DOORBELL_UDB) ||
isset(&eq->doorbells, DOORBELL_UDBWC) ||
isset(&eq->doorbells, DOORBELL_WCWR)) {
@@ -3101,7 +3452,11 @@ alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
return (rc);
wrq->adapter = sc;
+ TASK_INIT(&wrq->wrq_tx_task, 0, wrq_tx_drain, wrq);
+ TAILQ_INIT(&wrq->incomplete_wrs);
STAILQ_INIT(&wrq->wr_list);
+ wrq->nwr_pending = 0;
+ wrq->ndesc_needed = 0;
SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
&wrq->eq.cntxt_id, 0, "SGE context id of the queue");
@@ -3111,13 +3466,10 @@ alloc_wrq(struct adapter *sc, struct port_info *pi, struct sge_wrq *wrq,
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pidx",
CTLTYPE_INT | CTLFLAG_RD, &wrq->eq.pidx, 0, sysctl_uint16, "I",
"producer index");
- SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs", CTLFLAG_RD,
- &wrq->tx_wrs, "# of work requests");
- SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
- &wrq->no_desc, 0,
- "# of times queue ran out of hardware descriptors");
- SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
- &wrq->eq.unstalled, 0, "# of times queue recovered after stall");
+ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_direct", CTLFLAG_RD,
+ &wrq->tx_wrs_direct, "# of work requests (direct)");
+ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tx_wrs_copied", CTLFLAG_RD,
+ &wrq->tx_wrs_copied, "# of work requests (copied)");
return (rc);
}
@@ -3145,37 +3497,30 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
char name[16];
struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
- rc = alloc_eq(sc, pi, eq);
- if (rc)
- return (rc);
-
- txq->ifp = pi->ifp;
-
- txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
- M_ZERO | M_WAITOK);
- txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
-
- rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
- BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
- BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
+ rc = mp_ring_alloc(&txq->r, eq->sidx, txq, eth_tx, can_resume_eth_tx,
+ M_CXGBE, M_WAITOK);
if (rc != 0) {
- device_printf(sc->dev,
- "failed to create tx DMA tag: %d\n", rc);
+ device_printf(sc->dev, "failed to allocate mp_ring: %d\n", rc);
return (rc);
}
- /*
- * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
- * limit for any WR). txq->no_dmamap events shouldn't occur if maps is
- * sized for the worst case.
- */
- rc = t4_alloc_tx_maps(&txq->txmaps, txq->tx_tag, eq->qsize * 10 / 8,
- M_WAITOK);
+ rc = alloc_eq(sc, pi, eq);
if (rc != 0) {
- device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
+ mp_ring_free(txq->r);
+ txq->r = NULL;
return (rc);
}
+ /* Can't fail after this point. */
+
+ TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq);
+ txq->ifp = pi->ifp;
+ txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK);
+ txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
+ V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf));
+ txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE,
+ M_ZERO | M_WAITOK);
+
snprintf(name, sizeof(name), "%d", idx);
oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
NULL, "tx queue");
@@ -3203,23 +3548,39 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
&txq->sgl_wrs, "# of work requests with direct SGL");
SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
&txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
- SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
- &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
- SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
- &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
-
- SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
- &txq->br->br_drops, "# of drops in the buf_ring for this queue");
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
- &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
- &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
- &eq->egr_update, 0, "egress update notifications from the SGE");
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "unstalled", CTLFLAG_RD,
- &eq->unstalled, 0, "# of times txq recovered after stall");
+ SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts0_wrs",
+ CTLFLAG_RD, &txq->txpkts0_wrs,
+ "# of txpkts (type 0) work requests");
+ SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts1_wrs",
+ CTLFLAG_RD, &txq->txpkts1_wrs,
+ "# of txpkts (type 1) work requests");
+ SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts0_pkts",
+ CTLFLAG_RD, &txq->txpkts0_pkts,
+ "# of frames tx'd using type0 txpkts work requests");
+ SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts1_pkts",
+ CTLFLAG_RD, &txq->txpkts1_pkts,
+ "# of frames tx'd using type1 txpkts work requests");
+
+ SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_enqueues",
+ CTLFLAG_RD, &txq->r->enqueues,
+ "# of enqueues to the mp_ring for this queue");
+ SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_drops",
+ CTLFLAG_RD, &txq->r->drops,
+ "# of drops in the mp_ring for this queue");
+ SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_starts",
+ CTLFLAG_RD, &txq->r->starts,
+ "# of normal consumer starts in the mp_ring for this queue");
+ SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_stalls",
+ CTLFLAG_RD, &txq->r->stalls,
+ "# of consumer stalls in the mp_ring for this queue");
+ SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_restarts",
+ CTLFLAG_RD, &txq->r->restarts,
+ "# of consumer restarts in the mp_ring for this queue");
+ SYSCTL_ADD_COUNTER_U64(&pi->ctx, children, OID_AUTO, "r_abdications",
+ CTLFLAG_RD, &txq->r->abdications,
+ "# of consumer abdications in the mp_ring for this queue");
- return (rc);
+ return (0);
}
static int
@@ -3233,15 +3594,9 @@ free_txq(struct port_info *pi, struct sge_txq *txq)
if (rc)
return (rc);
+ sglist_free(txq->gl);
free(txq->sdesc, M_CXGBE);
-
- if (txq->txmaps.maps)
- t4_free_tx_maps(&txq->txmaps, txq->tx_tag);
-
- buf_ring_free(txq->br, M_CXGBE);
-
- if (txq->tx_tag)
- bus_dma_tag_destroy(txq->tx_tag);
+ mp_ring_free(txq->r);
bzero(txq, sizeof(*txq));
return (0);
@@ -3466,293 +3821,159 @@ free_fl_sdesc(struct adapter *sc, struct sge_fl *fl)
fl->sdesc = NULL;
}
-int
-t4_alloc_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag, int count,
- int flags)
+static inline void
+get_pkt_gl(struct mbuf *m, struct sglist *gl)
{
- struct tx_map *txm;
- int i, rc;
-
- txmaps->map_total = txmaps->map_avail = count;
- txmaps->map_cidx = txmaps->map_pidx = 0;
-
- txmaps->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
- M_ZERO | flags);
+ int rc;
- txm = txmaps->maps;
- for (i = 0; i < count; i++, txm++) {
- rc = bus_dmamap_create(tx_tag, 0, &txm->map);
- if (rc != 0)
- goto failed;
- }
+ M_ASSERTPKTHDR(m);
- return (0);
-failed:
- while (--i >= 0) {
- txm--;
- bus_dmamap_destroy(tx_tag, txm->map);
+ sglist_reset(gl);
+ rc = sglist_append_mbuf(gl, m);
+ if (__predict_false(rc != 0)) {
+ panic("%s: mbuf %p (%d segs) was vetted earlier but now fails "
+ "with %d.", __func__, m, mbuf_nsegs(m), rc);
}
- KASSERT(txm == txmaps->maps, ("%s: EDOOFUS", __func__));
- free(txmaps->maps, M_CXGBE);
- txmaps->maps = NULL;
-
- return (rc);
+ KASSERT(gl->sg_nseg == mbuf_nsegs(m),
+ ("%s: nsegs changed for mbuf %p from %d to %d", __func__, m,
+ mbuf_nsegs(m), gl->sg_nseg));
+ KASSERT(gl->sg_nseg > 0 &&
+ gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS),
+ ("%s: %d segments, should have been 1 <= nsegs <= %d", __func__,
+ gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS));
}
-void
-t4_free_tx_maps(struct tx_maps *txmaps, bus_dma_tag_t tx_tag)
+/*
+ * len16 for a txpkt WR with a GL. Includes the firmware work request header.
+ */
+static inline u_int
+txpkt_len16(u_int nsegs, u_int tso)
{
- struct tx_map *txm;
- int i;
+ u_int n;
- txm = txmaps->maps;
- for (i = 0; i < txmaps->map_total; i++, txm++) {
-
- if (txm->m) {
- bus_dmamap_unload(tx_tag, txm->map);
- m_freem(txm->m);
- txm->m = NULL;
- }
+ MPASS(nsegs > 0);
- bus_dmamap_destroy(tx_tag, txm->map);
- }
+ nsegs--; /* first segment is part of ulptx_sgl */
+ n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) +
+ sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
+ if (tso)
+ n += sizeof(struct cpl_tx_pkt_lso_core);
- free(txmaps->maps, M_CXGBE);
- txmaps->maps = NULL;
+ return (howmany(n, 16));
}
/*
- * We'll do immediate data tx for non-TSO, but only when not coalescing. We're
- * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
- * of immediate data.
- */
-#define IMM_LEN ( \
- 2 * EQ_ESIZE \
- - sizeof(struct fw_eth_tx_pkt_wr) \
- - sizeof(struct cpl_tx_pkt_core))
-
-/*
- * Returns non-zero on failure, no need to cleanup anything in that case.
- *
- * Note 1: We always try to defrag the mbuf if required and return EFBIG only
- * if the resulting chain still won't fit in a tx descriptor.
- *
- * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
- * does not have the TCP header in it.
+ * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work
+ * request header.
*/
-static int
-get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
- int sgl_only)
+static inline u_int
+txpkts0_len16(u_int nsegs)
{
- struct mbuf *m = *fp;
- struct tx_maps *txmaps;
- struct tx_map *txm;
- int rc, defragged = 0, n;
-
- TXQ_LOCK_ASSERT_OWNED(txq);
-
- if (m->m_pkthdr.tso_segsz)
- sgl_only = 1; /* Do not allow immediate data with LSO */
+ u_int n;
-start: sgl->nsegs = 0;
+ MPASS(nsegs > 0);
- if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
- return (0); /* nsegs = 0 tells caller to use imm. tx */
-
- txmaps = &txq->txmaps;
- if (txmaps->map_avail == 0) {
- txq->no_dmamap++;
- return (ENOMEM);
- }
- txm = &txmaps->maps[txmaps->map_pidx];
+ nsegs--; /* first segment is part of ulptx_sgl */
+ n = sizeof(struct ulp_txpkt) + sizeof(struct ulptx_idata) +
+ sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl) +
+ 8 * ((3 * nsegs) / 2 + (nsegs & 1));
- if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
- *fp = m_pullup(m, 50);
- m = *fp;
- if (m == NULL)
- return (ENOBUFS);
- }
-
- rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
- &sgl->nsegs, BUS_DMA_NOWAIT);
- if (rc == EFBIG && defragged == 0) {
- m = m_defrag(m, M_NOWAIT);
- if (m == NULL)
- return (EFBIG);
-
- defragged = 1;
- *fp = m;
- goto start;
- }
- if (rc != 0)
- return (rc);
-
- txm->m = m;
- txmaps->map_avail--;
- if (++txmaps->map_pidx == txmaps->map_total)
- txmaps->map_pidx = 0;
-
- KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
- ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
-
- /*
- * Store the # of flits required to hold this frame's SGL in nflits. An
- * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
- * multiple (len0 + len1, addr0, addr1) tuples. If addr1 is not used
- * then len1 must be set to 0.
- */
- n = sgl->nsegs - 1;
- sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
-
- return (0);
+ return (howmany(n, 16));
}
-
/*
- * Releases all the txq resources used up in the specified sgl.
+ * len16 for a txpkts type 1 WR with a GL. Does not include the firmware work
+ * request header.
*/
-static int
-free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
+static inline u_int
+txpkts1_len16(void)
{
- struct tx_maps *txmaps;
- struct tx_map *txm;
+ u_int n;
- TXQ_LOCK_ASSERT_OWNED(txq);
-
- if (sgl->nsegs == 0)
- return (0); /* didn't use any map */
+ n = sizeof(struct cpl_tx_pkt_core) + sizeof(struct ulptx_sgl);
- txmaps = &txq->txmaps;
-
- /* 1 pkt uses exactly 1 map, back it out */
+ return (howmany(n, 16));
+}
- txmaps->map_avail++;
- if (txmaps->map_pidx > 0)
- txmaps->map_pidx--;
- else
- txmaps->map_pidx = txmaps->map_total - 1;
+static inline u_int
+imm_payload(u_int ndesc)
+{
+ u_int n;
- txm = &txmaps->maps[txmaps->map_pidx];
- bus_dmamap_unload(txq->tx_tag, txm->map);
- txm->m = NULL;
+ n = ndesc * EQ_ESIZE - sizeof(struct fw_eth_tx_pkt_wr) -
+ sizeof(struct cpl_tx_pkt_core);
- return (0);
+ return (n);
}
-static int
-write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
- struct sgl *sgl)
+/*
+ * Write a txpkt WR for this packet to the hardware descriptors, update the
+ * software descriptor, and advance the pidx. It is guaranteed that enough
+ * descriptors are available.
+ *
+ * The return value is the # of hardware descriptors used.
+ */
+static u_int
+write_txpkt_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_wr *wr,
+ struct mbuf *m0, u_int available)
{
struct sge_eq *eq = &txq->eq;
- struct fw_eth_tx_pkt_wr *wr;
+ struct tx_sdesc *txsd;
struct cpl_tx_pkt_core *cpl;
uint32_t ctrl; /* used in many unrelated places */
uint64_t ctrl1;
- int nflits, ndesc, pktlen;
- struct tx_sdesc *txsd;
+ int len16, ndesc, pktlen, nsegs;
caddr_t dst;
TXQ_LOCK_ASSERT_OWNED(txq);
+ M_ASSERTPKTHDR(m0);
+ MPASS(available > 0 && available < eq->sidx);
- pktlen = m->m_pkthdr.len;
-
- /*
- * Do we have enough flits to send this frame out?
- */
+ len16 = mbuf_len16(m0);
+ nsegs = mbuf_nsegs(m0);
+ pktlen = m0->m_pkthdr.len;
ctrl = sizeof(struct cpl_tx_pkt_core);
- if (m->m_pkthdr.tso_segsz) {
- nflits = TXPKT_LSO_WR_HDR;
+ if (needs_tso(m0))
ctrl += sizeof(struct cpl_tx_pkt_lso_core);
- } else
- nflits = TXPKT_WR_HDR;
- if (sgl->nsegs > 0)
- nflits += sgl->nflits;
- else {
- nflits += howmany(pktlen, 8);
+ else if (pktlen <= imm_payload(2) && available >= 2) {
+ /* Immediate data. Recalculate len16 and set nsegs to 0. */
ctrl += pktlen;
+ len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) +
+ sizeof(struct cpl_tx_pkt_core) + pktlen, 16);
+ nsegs = 0;
}
- ndesc = howmany(nflits, 8);
- if (ndesc > eq->avail)
- return (ENOMEM);
+ ndesc = howmany(len16, EQ_ESIZE / 16);
+ MPASS(ndesc <= available);
/* Firmware work request header */
- wr = (void *)&eq->desc[eq->pidx];
+ MPASS(wr == (void *)&eq->desc[eq->pidx]);
wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
- ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
- if (eq->avail == ndesc) {
- if (!(eq->flags & EQ_CRFLUSHED)) {
- ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
- eq->flags |= EQ_CRFLUSHED;
- }
- eq->flags |= EQ_STALLED;
- }
+ ctrl = V_FW_WR_LEN16(len16);
wr->equiq_to_len16 = htobe32(ctrl);
wr->r3 = 0;
- if (m->m_pkthdr.tso_segsz) {
+ if (needs_tso(m0)) {
struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1);
- struct ether_header *eh;
- void *l3hdr;
-#if defined(INET) || defined(INET6)
- struct tcphdr *tcp;
-#endif
- uint16_t eh_type;
-
- ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
- F_LSO_LAST_SLICE;
- eh = mtod(m, struct ether_header *);
- eh_type = ntohs(eh->ether_type);
- if (eh_type == ETHERTYPE_VLAN) {
- struct ether_vlan_header *evh = (void *)eh;
+ KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
+ m0->m_pkthdr.l4hlen > 0,
+ ("%s: mbuf %p needs TSO but missing header lengths",
+ __func__, m0));
+ ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
+ F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2)
+ | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
+ if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header))
ctrl |= V_LSO_ETHHDR_LEN(1);
- l3hdr = evh + 1;
- eh_type = ntohs(evh->evl_proto);
- } else
- l3hdr = eh + 1;
-
- switch (eh_type) {
-#ifdef INET6
- case ETHERTYPE_IPV6:
- {
- struct ip6_hdr *ip6 = l3hdr;
-
- /*
- * XXX-BZ For now we do not pretend to support
- * IPv6 extension headers.
- */
- KASSERT(ip6->ip6_nxt == IPPROTO_TCP, ("%s: CSUM_TSO "
- "with ip6_nxt != TCP: %u", __func__, ip6->ip6_nxt));
- tcp = (struct tcphdr *)(ip6 + 1);
+ if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr))
ctrl |= F_LSO_IPV6;
- ctrl |= V_LSO_IPHDR_LEN(sizeof(*ip6) >> 2) |
- V_LSO_TCPHDR_LEN(tcp->th_off);
- break;
- }
-#endif
-#ifdef INET
- case ETHERTYPE_IP:
- {
- struct ip *ip = l3hdr;
-
- tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
- ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
- V_LSO_TCPHDR_LEN(tcp->th_off);
- break;
- }
-#endif
- default:
- panic("%s: CSUM_TSO but no supported IP version "
- "(0x%04x)", __func__, eh_type);
- }
lso->lso_ctrl = htobe32(ctrl);
lso->ipid_ofst = htobe16(0);
- lso->mss = htobe16(m->m_pkthdr.tso_segsz);
+ lso->mss = htobe16(m0->m_pkthdr.tso_segsz);
lso->seqno_offset = htobe32(0);
lso->len = htobe32(pktlen);
@@ -3764,48 +3985,36 @@ write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
/* Checksum offload */
ctrl1 = 0;
- if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
+ if (needs_l3_csum(m0) == 0)
ctrl1 |= F_TXPKT_IPCSUM_DIS;
- if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
- CSUM_TCP_IPV6 | CSUM_TSO)))
+ if (needs_l4_csum(m0) == 0)
ctrl1 |= F_TXPKT_L4CSUM_DIS;
- if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
+ if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
txq->txcsum++; /* some hardware assistance provided */
/* VLAN tag insertion */
- if (m->m_flags & M_VLANTAG) {
- ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
+ if (needs_vlan_insertion(m0)) {
+ ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag);
txq->vlan_insertion++;
}
/* CPL header */
- cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
- V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
+ cpl->ctrl0 = txq->cpl_ctrl0;
cpl->pack = 0;
cpl->len = htobe16(pktlen);
cpl->ctrl1 = htobe64(ctrl1);
- /* Software descriptor */
- txsd = &txq->sdesc[eq->pidx];
- txsd->desc_used = ndesc;
-
- eq->pending += ndesc;
- eq->avail -= ndesc;
- eq->pidx += ndesc;
- if (eq->pidx >= eq->cap)
- eq->pidx -= eq->cap;
-
/* SGL */
dst = (void *)(cpl + 1);
- if (sgl->nsegs > 0) {
- txsd->credits = 1;
+ if (nsegs > 0) {
+
+ write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx);
txq->sgl_wrs++;
- write_sgl_to_txd(eq, sgl, &dst);
} else {
- txsd->credits = 0;
- txq->imm_wrs++;
- for (; m; m = m->m_next) {
+ struct mbuf *m;
+
+ for (m = m0; m != NULL; m = m->m_next) {
copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
#ifdef INVARIANTS
pktlen -= m->m_len;
@@ -3814,245 +4023,225 @@ write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
#ifdef INVARIANTS
KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
#endif
-
+ txq->imm_wrs++;
}
txq->txpkt_wrs++;
- return (0);
+
+ txsd = &txq->sdesc[eq->pidx];
+ txsd->m = m0;
+ txsd->desc_used = ndesc;
+
+ return (ndesc);
}
-/*
- * Returns 0 to indicate that m has been accepted into a coalesced tx work
- * request. It has either been folded into txpkts or txpkts was flushed and m
- * has started a new coalesced work request (as the first frame in a fresh
- * txpkts).
- *
- * Returns non-zero to indicate a failure - caller is responsible for
- * transmitting m, if there was anything in txpkts it has been flushed.
- */
static int
-add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
- struct mbuf *m, struct sgl *sgl)
+try_txpkts(struct mbuf *m, struct mbuf *n, struct txpkts *txp, u_int available)
{
- struct sge_eq *eq = &txq->eq;
- int can_coalesce;
- struct tx_sdesc *txsd;
- int flits;
-
- TXQ_LOCK_ASSERT_OWNED(txq);
+ u_int needed, nsegs1, nsegs2, l1, l2;
- KASSERT(sgl->nsegs, ("%s: can't coalesce imm data", __func__));
+ if (cannot_use_txpkts(m) || cannot_use_txpkts(n))
+ return (1);
- if (txpkts->npkt > 0) {
- flits = TXPKTS_PKT_HDR + sgl->nflits;
- can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
- txpkts->nflits + flits <= TX_WR_FLITS &&
- txpkts->nflits + flits <= eq->avail * 8 &&
- txpkts->plen + m->m_pkthdr.len < 65536;
+ nsegs1 = mbuf_nsegs(m);
+ nsegs2 = mbuf_nsegs(n);
+ if (nsegs1 + nsegs2 == 2) {
+ txp->wr_type = 1;
+ l1 = l2 = txpkts1_len16();
+ } else {
+ txp->wr_type = 0;
+ l1 = txpkts0_len16(nsegs1);
+ l2 = txpkts0_len16(nsegs2);
+ }
+ txp->len16 = howmany(sizeof(struct fw_eth_tx_pkts_wr), 16) + l1 + l2;
+ needed = howmany(txp->len16, EQ_ESIZE / 16);
+ if (needed > SGE_MAX_WR_NDESC || needed > available)
+ return (1);
- if (can_coalesce) {
- txpkts->npkt++;
- txpkts->nflits += flits;
- txpkts->plen += m->m_pkthdr.len;
+ txp->plen = m->m_pkthdr.len + n->m_pkthdr.len;
+ if (txp->plen > 65535)
+ return (1);
- txsd = &txq->sdesc[eq->pidx];
- txsd->credits++;
+ txp->npkt = 2;
+ set_mbuf_len16(m, l1);
+ set_mbuf_len16(n, l2);
- return (0);
- }
-
- /*
- * Couldn't coalesce m into txpkts. The first order of business
- * is to send txpkts on its way. Then we'll revisit m.
- */
- write_txpkts_wr(txq, txpkts);
- }
+ return (0);
+}
- /*
- * Check if we can start a new coalesced tx work request with m as
- * the first packet in it.
- */
+static int
+add_to_txpkts(struct mbuf *m, struct txpkts *txp, u_int available)
+{
+ u_int plen, len16, needed, nsegs;
- KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
+ MPASS(txp->wr_type == 0 || txp->wr_type == 1);
- flits = TXPKTS_WR_HDR + sgl->nflits;
- can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
- flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
+ nsegs = mbuf_nsegs(m);
+ if (needs_tso(m) || (txp->wr_type == 1 && nsegs != 1))
+ return (1);
- if (can_coalesce == 0)
- return (EINVAL);
+ plen = txp->plen + m->m_pkthdr.len;
+ if (plen > 65535)
+ return (1);
- /*
- * Start a fresh coalesced tx WR with m as the first frame in it.
- */
- txpkts->npkt = 1;
- txpkts->nflits = flits;
- txpkts->flitp = &eq->desc[eq->pidx].flit[2];
- txpkts->plen = m->m_pkthdr.len;
+ if (txp->wr_type == 0)
+ len16 = txpkts0_len16(nsegs);
+ else
+ len16 = txpkts1_len16();
+ needed = howmany(txp->len16 + len16, EQ_ESIZE / 16);
+ if (needed > SGE_MAX_WR_NDESC || needed > available)
+ return (1);
- txsd = &txq->sdesc[eq->pidx];
- txsd->credits = 1;
+ txp->npkt++;
+ txp->plen = plen;
+ txp->len16 += len16;
+ set_mbuf_len16(m, len16);
return (0);
}
/*
- * Note that write_txpkts_wr can never run out of hardware descriptors (but
- * write_txpkt_wr can). add_to_txpkts ensures that a frame is accepted for
- * coalescing only if sufficient hardware descriptors are available.
+ * Write a txpkts WR for the packets in txp to the hardware descriptors, update
+ * the software descriptor, and advance the pidx. It is guaranteed that enough
+ * descriptors are available.
+ *
+ * The return value is the # of hardware descriptors used.
*/
-static void
-write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
+static u_int
+write_txpkts_wr(struct sge_txq *txq, struct fw_eth_tx_pkts_wr *wr,
+ struct mbuf *m0, const struct txpkts *txp, u_int available)
{
struct sge_eq *eq = &txq->eq;
- struct fw_eth_tx_pkts_wr *wr;
struct tx_sdesc *txsd;
+ struct cpl_tx_pkt_core *cpl;
uint32_t ctrl;
- int ndesc;
+ uint64_t ctrl1;
+ int ndesc, checkwrap;
+ struct mbuf *m;
+ void *flitp;
TXQ_LOCK_ASSERT_OWNED(txq);
+ MPASS(txp->npkt > 0);
+ MPASS(txp->plen < 65536);
+ MPASS(m0 != NULL);
+ MPASS(m0->m_nextpkt != NULL);
+ MPASS(txp->len16 <= howmany(SGE_MAX_WR_LEN, 16));
+ MPASS(available > 0 && available < eq->sidx);
- ndesc = howmany(txpkts->nflits, 8);
+ ndesc = howmany(txp->len16, EQ_ESIZE / 16);
+ MPASS(ndesc <= available);
- wr = (void *)&eq->desc[eq->pidx];
+ MPASS(wr == (void *)&eq->desc[eq->pidx]);
wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
- ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
- if (eq->avail == ndesc) {
- if (!(eq->flags & EQ_CRFLUSHED)) {
- ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
- eq->flags |= EQ_CRFLUSHED;
- }
- eq->flags |= EQ_STALLED;
- }
+ ctrl = V_FW_WR_LEN16(txp->len16);
wr->equiq_to_len16 = htobe32(ctrl);
- wr->plen = htobe16(txpkts->plen);
- wr->npkt = txpkts->npkt;
- wr->r3 = wr->type = 0;
-
- /* Everything else already written */
-
- txsd = &txq->sdesc[eq->pidx];
- txsd->desc_used = ndesc;
-
- KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
-
- eq->pending += ndesc;
- eq->avail -= ndesc;
- eq->pidx += ndesc;
- if (eq->pidx >= eq->cap)
- eq->pidx -= eq->cap;
+ wr->plen = htobe16(txp->plen);
+ wr->npkt = txp->npkt;
+ wr->r3 = 0;
+ wr->type = txp->wr_type;
+ flitp = wr + 1;
- txq->txpkts_pkts += txpkts->npkt;
- txq->txpkts_wrs++;
- txpkts->npkt = 0; /* emptied */
-}
+ /*
+ * At this point we are 16B into a hardware descriptor. If checkwrap is
+ * set then we know the WR is going to wrap around somewhere. We'll
+ * check for that at appropriate points.
+ */
+ checkwrap = eq->sidx - ndesc < eq->pidx;
+ for (m = m0; m != NULL; m = m->m_nextpkt) {
+ if (txp->wr_type == 0) {
+ struct ulp_txpkt *ulpmc;
+ struct ulptx_idata *ulpsc;
+
+ /* ULP master command */
+ ulpmc = flitp;
+ ulpmc->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
+ V_ULP_TXPKT_DEST(0) | V_ULP_TXPKT_FID(eq->iqid));
+ ulpmc->len = htobe32(mbuf_len16(m));
+
+ /* ULP subcommand */
+ ulpsc = (void *)(ulpmc + 1);
+ ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
+ F_ULP_TX_SC_MORE);
+ ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
+
+ cpl = (void *)(ulpsc + 1);
+ if (checkwrap &&
+ (uintptr_t)cpl == (uintptr_t)&eq->desc[eq->sidx])
+ cpl = (void *)&eq->desc[0];
+ txq->txpkts0_pkts += txp->npkt;
+ txq->txpkts0_wrs++;
+ } else {
+ cpl = flitp;
+ txq->txpkts1_pkts += txp->npkt;
+ txq->txpkts1_wrs++;
+ }
-static inline void
-write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
- struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
-{
- struct ulp_txpkt *ulpmc;
- struct ulptx_idata *ulpsc;
- struct cpl_tx_pkt_core *cpl;
- struct sge_eq *eq = &txq->eq;
- uintptr_t flitp, start, end;
- uint64_t ctrl;
- caddr_t dst;
+ /* Checksum offload */
+ ctrl1 = 0;
+ if (needs_l3_csum(m) == 0)
+ ctrl1 |= F_TXPKT_IPCSUM_DIS;
+ if (needs_l4_csum(m) == 0)
+ ctrl1 |= F_TXPKT_L4CSUM_DIS;
+ if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
+ CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
+ txq->txcsum++; /* some hardware assistance provided */
+
+ /* VLAN tag insertion */
+ if (needs_vlan_insertion(m)) {
+ ctrl1 |= F_TXPKT_VLAN_VLD |
+ V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
+ txq->vlan_insertion++;
+ }
- KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
+ /* CPL header */
+ cpl->ctrl0 = txq->cpl_ctrl0;
+ cpl->pack = 0;
+ cpl->len = htobe16(m->m_pkthdr.len);
+ cpl->ctrl1 = htobe64(ctrl1);
- start = (uintptr_t)eq->desc;
- end = (uintptr_t)eq->spg;
+ flitp = cpl + 1;
+ if (checkwrap &&
+ (uintptr_t)flitp == (uintptr_t)&eq->desc[eq->sidx])
+ flitp = (void *)&eq->desc[0];
- /* Checksum offload */
- ctrl = 0;
- if (!(m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)))
- ctrl |= F_TXPKT_IPCSUM_DIS;
- if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
- CSUM_TCP_IPV6 | CSUM_TSO)))
- ctrl |= F_TXPKT_L4CSUM_DIS;
- if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP |
- CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO))
- txq->txcsum++; /* some hardware assistance provided */
+ write_gl_to_txd(txq, m, (caddr_t *)(&flitp), checkwrap);
- /* VLAN tag insertion */
- if (m->m_flags & M_VLANTAG) {
- ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
- txq->vlan_insertion++;
}
- /*
- * The previous packet's SGL must have ended at a 16 byte boundary (this
- * is required by the firmware/hardware). It follows that flitp cannot
- * wrap around between the ULPTX master command and ULPTX subcommand (8
- * bytes each), and that it can not wrap around in the middle of the
- * cpl_tx_pkt_core either.
- */
- flitp = (uintptr_t)txpkts->flitp;
- KASSERT((flitp & 0xf) == 0,
- ("%s: last SGL did not end at 16 byte boundary: %p",
- __func__, txpkts->flitp));
-
- /* ULP master command */
- ulpmc = (void *)flitp;
- ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
- V_ULP_TXPKT_FID(eq->iqid));
- ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
- sizeof(*cpl) + 8 * sgl->nflits, 16));
-
- /* ULP subcommand */
- ulpsc = (void *)(ulpmc + 1);
- ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
- F_ULP_TX_SC_MORE);
- ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
-
- flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
- if (flitp == end)
- flitp = start;
-
- /* CPL_TX_PKT */
- cpl = (void *)flitp;
- cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
- V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
- cpl->pack = 0;
- cpl->len = htobe16(m->m_pkthdr.len);
- cpl->ctrl1 = htobe64(ctrl);
-
- flitp += sizeof(*cpl);
- if (flitp == end)
- flitp = start;
-
- /* SGL for this frame */
- dst = (caddr_t)flitp;
- txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
- txpkts->flitp = (void *)dst;
+ txsd = &txq->sdesc[eq->pidx];
+ txsd->m = m0;
+ txsd->desc_used = ndesc;
- KASSERT(((uintptr_t)dst & 0xf) == 0,
- ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
+ return (ndesc);
}
/*
* If the SGL ends on an address that is not 16 byte aligned, this function will
- * add a 0 filled flit at the end. It returns 1 in that case.
+ * add a 0 filled flit at the end.
*/
-static int
-write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
+static void
+write_gl_to_txd(struct sge_txq *txq, struct mbuf *m, caddr_t *to, int checkwrap)
{
- __be64 *flitp, *end;
+ struct sge_eq *eq = &txq->eq;
+ struct sglist *gl = txq->gl;
+ struct sglist_seg *seg;
+ __be64 *flitp, *wrap;
struct ulptx_sgl *usgl;
- bus_dma_segment_t *seg;
- int i, padded;
-
- KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
- ("%s: bad SGL - nsegs=%d, nflits=%d",
- __func__, sgl->nsegs, sgl->nflits));
+ int i, nflits, nsegs;
KASSERT(((uintptr_t)(*to) & 0xf) == 0,
("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
+ MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
+ MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
+ get_pkt_gl(m, gl);
+ nsegs = gl->sg_nseg;
+ MPASS(nsegs > 0);
+
+ nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2;
flitp = (__be64 *)(*to);
- end = flitp + sgl->nflits;
- seg = &sgl->seg[0];
+ wrap = (__be64 *)(&eq->desc[eq->sidx]);
+ seg = &gl->sg_segs[0];
usgl = (void *)flitp;
/*
@@ -4062,58 +4251,60 @@ write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
*/
usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
- V_ULPTX_NSGE(sgl->nsegs));
- usgl->len0 = htobe32(seg->ds_len);
- usgl->addr0 = htobe64(seg->ds_addr);
+ V_ULPTX_NSGE(nsegs));
+ usgl->len0 = htobe32(seg->ss_len);
+ usgl->addr0 = htobe64(seg->ss_paddr);
seg++;
- if ((uintptr_t)end <= (uintptr_t)eq->spg) {
+ if (checkwrap == 0 || (uintptr_t)(flitp + nflits) <= (uintptr_t)wrap) {
/* Won't wrap around at all */
- for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
- usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
- usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
+ for (i = 0; i < nsegs - 1; i++, seg++) {
+ usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len);
+ usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr);
}
if (i & 1)
usgl->sge[i / 2].len[1] = htobe32(0);
+ flitp += nflits;
} else {
/* Will wrap somewhere in the rest of the SGL */
/* 2 flits already written, write the rest flit by flit */
flitp = (void *)(usgl + 1);
- for (i = 0; i < sgl->nflits - 2; i++) {
- if ((uintptr_t)flitp == (uintptr_t)eq->spg)
+ for (i = 0; i < nflits - 2; i++) {
+ if (flitp == wrap)
flitp = (void *)eq->desc;
- *flitp++ = get_flit(seg, sgl->nsegs - 1, i);
+ *flitp++ = get_flit(seg, nsegs - 1, i);
}
- end = flitp;
}
- if ((uintptr_t)end & 0xf) {
- *(uint64_t *)end = 0;
- end++;
- padded = 1;
- } else
- padded = 0;
+ if (nflits & 1) {
+ MPASS(((uintptr_t)flitp) & 0xf);
+ *flitp++ = 0;
+ }
- if ((uintptr_t)end == (uintptr_t)eq->spg)
+ MPASS((((uintptr_t)flitp) & 0xf) == 0);
+ if (__predict_false(flitp == wrap))
*to = (void *)eq->desc;
else
- *to = (void *)end;
-
- return (padded);
+ *to = (void *)flitp;
}
static inline void
copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
{
- if (__predict_true((uintptr_t)(*to) + len <= (uintptr_t)eq->spg)) {
+
+ MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
+ MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
+
+ if (__predict_true((uintptr_t)(*to) + len <=
+ (uintptr_t)&eq->desc[eq->sidx])) {
bcopy(from, *to, len);
(*to) += len;
} else {
- int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
+ int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to);
bcopy(from, *to, portion);
from += portion;
@@ -4124,21 +4315,21 @@ copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
}
static inline void
-ring_eq_db(struct adapter *sc, struct sge_eq *eq)
+ring_eq_db(struct adapter *sc, struct sge_eq *eq, u_int n)
{
- u_int db, pending;
+ u_int db;
+
+ MPASS(n > 0);
db = eq->doorbells;
- pending = eq->pending;
- if (pending > 1)
+ if (n > 1)
clrbit(&db, DOORBELL_WCWR);
- eq->pending = 0;
wmb();
switch (ffs(db) - 1) {
case DOORBELL_UDB:
- *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
- return;
+ *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n));
+ break;
case DOORBELL_WCWR: {
volatile uint64_t *dst, *src;
@@ -4149,69 +4340,84 @@ ring_eq_db(struct adapter *sc, struct sge_eq *eq)
* use relative qid (udb_qid is always 0). Only queues with
* doorbell segments can do WCWR.
*/
- KASSERT(eq->udb_qid == 0 && pending == 1,
+ KASSERT(eq->udb_qid == 0 && n == 1,
("%s: inappropriate doorbell (0x%x, %d, %d) for eq %p",
- __func__, eq->doorbells, pending, eq->pidx, eq));
+ __func__, eq->doorbells, n, eq->dbidx, eq));
dst = (volatile void *)((uintptr_t)eq->udb + UDBS_WR_OFFSET -
UDBS_DB_OFFSET);
- i = eq->pidx ? eq->pidx - 1 : eq->cap - 1;
+ i = eq->dbidx;
src = (void *)&eq->desc[i];
while (src != (void *)&eq->desc[i + 1])
*dst++ = *src++;
wmb();
- return;
+ break;
}
case DOORBELL_UDBWC:
- *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(pending));
+ *eq->udb = htole32(V_QID(eq->udb_qid) | V_PIDX(n));
wmb();
- return;
+ break;
case DOORBELL_KDB:
t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
- V_QID(eq->cntxt_id) | V_PIDX(pending));
- return;
+ V_QID(eq->cntxt_id) | V_PIDX(n));
+ break;
}
+
+ IDXINCR(eq->dbidx, n, eq->sidx);
}
-static inline int
-reclaimable(struct sge_eq *eq)
+static inline u_int
+reclaimable_tx_desc(struct sge_eq *eq)
{
- unsigned int cidx;
+ uint16_t hw_cidx;
- cidx = eq->spg->cidx; /* stable snapshot */
- cidx = be16toh(cidx);
+ hw_cidx = read_hw_cidx(eq);
+ return (IDXDIFF(hw_cidx, eq->cidx, eq->sidx));
+}
+
+static inline u_int
+total_available_tx_desc(struct sge_eq *eq)
+{
+ uint16_t hw_cidx, pidx;
+
+ hw_cidx = read_hw_cidx(eq);
+ pidx = eq->pidx;
- if (cidx >= eq->cidx)
- return (cidx - eq->cidx);
+ if (pidx == hw_cidx)
+ return (eq->sidx - 1);
else
- return (cidx + eq->cap - eq->cidx);
+ return (IDXDIFF(hw_cidx, pidx, eq->sidx) - 1);
+}
+
+static inline uint16_t
+read_hw_cidx(struct sge_eq *eq)
+{
+ struct sge_qstat *spg = (void *)&eq->desc[eq->sidx];
+ uint16_t cidx = spg->cidx; /* stable snapshot */
+
+ return (be16toh(cidx));
}
/*
- * There are "can_reclaim" tx descriptors ready to be reclaimed. Reclaim as
- * many as possible but stop when there are around "n" mbufs to free.
- *
- * The actual number reclaimed is provided as the return value.
+ * Reclaim 'n' descriptors approximately.
*/
-static int
-reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
+static u_int
+reclaim_tx_descs(struct sge_txq *txq, u_int n)
{
struct tx_sdesc *txsd;
- struct tx_maps *txmaps;
- struct tx_map *txm;
- unsigned int reclaimed, maps;
struct sge_eq *eq = &txq->eq;
+ u_int can_reclaim, reclaimed;
TXQ_LOCK_ASSERT_OWNED(txq);
+ MPASS(n > 0);
- if (can_reclaim == 0)
- can_reclaim = reclaimable(eq);
-
- maps = reclaimed = 0;
- while (can_reclaim && maps < n) {
+ reclaimed = 0;
+ can_reclaim = reclaimable_tx_desc(eq);
+ while (can_reclaim && reclaimed < n) {
int ndesc;
+ struct mbuf *m, *nextpkt;
txsd = &txq->sdesc[eq->cidx];
ndesc = txsd->desc_used;
@@ -4221,73 +4427,37 @@ reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
("%s: unexpected number of credits: %d, %d",
__func__, can_reclaim, ndesc));
- maps += txsd->credits;
-
+ for (m = txsd->m; m != NULL; m = nextpkt) {
+ nextpkt = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ m_freem(m);
+ }
reclaimed += ndesc;
can_reclaim -= ndesc;
-
- eq->cidx += ndesc;
- if (__predict_false(eq->cidx >= eq->cap))
- eq->cidx -= eq->cap;
- }
-
- txmaps = &txq->txmaps;
- txm = &txmaps->maps[txmaps->map_cidx];
- if (maps)
- prefetch(txm->m);
-
- eq->avail += reclaimed;
- KASSERT(eq->avail < eq->cap, /* avail tops out at (cap - 1) */
- ("%s: too many descriptors available", __func__));
-
- txmaps->map_avail += maps;
- KASSERT(txmaps->map_avail <= txmaps->map_total,
- ("%s: too many maps available", __func__));
-
- while (maps--) {
- struct tx_map *next;
-
- next = txm + 1;
- if (__predict_false(txmaps->map_cidx + 1 == txmaps->map_total))
- next = txmaps->maps;
- prefetch(next->m);
-
- bus_dmamap_unload(txq->tx_tag, txm->map);
- m_freem(txm->m);
- txm->m = NULL;
-
- txm = next;
- if (__predict_false(++txmaps->map_cidx == txmaps->map_total))
- txmaps->map_cidx = 0;
+ IDXINCR(eq->cidx, ndesc, eq->sidx);
}
return (reclaimed);
}
static void
-write_eqflush_wr(struct sge_eq *eq)
+tx_reclaim(void *arg, int n)
{
- struct fw_eq_flush_wr *wr;
+ struct sge_txq *txq = arg;
+ struct sge_eq *eq = &txq->eq;
- EQ_LOCK_ASSERT_OWNED(eq);
- KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
- KASSERT(!(eq->flags & EQ_CRFLUSHED), ("%s: flushed already", __func__));
-
- wr = (void *)&eq->desc[eq->pidx];
- bzero(wr, sizeof(*wr));
- wr->opcode = FW_EQ_FLUSH_WR;
- wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
- F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
-
- eq->flags |= (EQ_CRFLUSHED | EQ_STALLED);
- eq->pending++;
- eq->avail--;
- if (++eq->pidx == eq->cap)
- eq->pidx = 0;
+ do {
+ if (TXQ_TRYLOCK(txq) == 0)
+ break;
+ n = reclaim_tx_descs(txq, 32);
+ if (eq->cidx == eq->pidx)
+ eq->equeqidx = eq->pidx;
+ TXQ_UNLOCK(txq);
+ } while (n > 0);
}
static __be64
-get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
+get_flit(struct sglist_seg *segs, int nsegs, int idx)
{
int i = (idx / 3) * 2;
@@ -4295,16 +4465,16 @@ get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
case 0: {
__be64 rc;
- rc = htobe32(sgl[i].ds_len);
+ rc = htobe32(segs[i].ss_len);
if (i + 1 < nsegs)
- rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
+ rc |= (uint64_t)htobe32(segs[i + 1].ss_len) << 32;
return (rc);
}
case 1:
- return htobe64(sgl[i].ds_addr);
+ return (htobe64(segs[i].ss_paddr));
case 2:
- return htobe64(sgl[i + 1].ds_addr);
+ return (htobe64(segs[i + 1].ss_paddr));
}
return (0);
@@ -4499,6 +4669,27 @@ add_fl_to_sfl(struct adapter *sc, struct sge_fl *fl)
mtx_unlock(&sc->sfl_lock);
}
+static void
+handle_wrq_egr_update(struct adapter *sc, struct sge_eq *eq)
+{
+ struct sge_wrq *wrq = (void *)eq;
+
+ atomic_readandclear_int(&eq->equiq);
+ taskqueue_enqueue(sc->tq[eq->tx_chan], &wrq->wrq_tx_task);
+}
+
+static void
+handle_eth_egr_update(struct adapter *sc, struct sge_eq *eq)
+{
+ struct sge_txq *txq = (void *)eq;
+
+ MPASS((eq->flags & EQ_TYPEMASK) == EQ_ETH);
+
+ atomic_readandclear_int(&eq->equiq);
+ mp_ring_check_drainage(txq->r, 0);
+ taskqueue_enqueue(sc->tq[eq->tx_chan], &txq->tx_reclaim_task);
+}
+
static int
handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m)
@@ -4508,22 +4699,15 @@ handle_sge_egr_update(struct sge_iq *iq, const struct rss_header *rss,
struct adapter *sc = iq->adapter;
struct sge *s = &sc->sge;
struct sge_eq *eq;
+ static void (*h[])(struct adapter *, struct sge_eq *) = {NULL,
+ &handle_wrq_egr_update, &handle_eth_egr_update,
+ &handle_wrq_egr_update};
KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
rss->opcode));
eq = s->eqmap[qid - s->eq_start];
- EQ_LOCK(eq);
- KASSERT(eq->flags & EQ_CRFLUSHED,
- ("%s: unsolicited egress update", __func__));
- eq->flags &= ~EQ_CRFLUSHED;
- eq->egr_update++;
-
- if (__predict_false(eq->flags & EQ_DOOMED))
- wakeup_one(eq);
- else if (eq->flags & EQ_STALLED && can_resume_tx(eq))
- taskqueue_enqueue(sc->tq[eq->tx_chan], &eq->tx_task);
- EQ_UNLOCK(eq);
+ (*h[eq->flags & EQ_TYPEMASK])(sc, eq);
return (0);
}
diff --git a/sys/modules/cxgbe/if_cxgbe/Makefile b/sys/modules/cxgbe/if_cxgbe/Makefile
index e4828f7..a66e45a 100644
--- a/sys/modules/cxgbe/if_cxgbe/Makefile
+++ b/sys/modules/cxgbe/if_cxgbe/Makefile
@@ -15,6 +15,7 @@ SRCS+= pci_if.h
SRCS+= t4_hw.c
SRCS+= t4_l2t.c
SRCS+= t4_main.c
+SRCS+= t4_mp_ring.c
SRCS+= t4_netmap.c
SRCS+= t4_sge.c
SRCS+= t4_tracer.c
OpenPOWER on IntegriCloud