summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkmacy <kmacy@FreeBSD.org>2008-01-15 08:08:09 +0000
committerkmacy <kmacy@FreeBSD.org>2008-01-15 08:08:09 +0000
commit90f3f8edb2bda24c666a8a36141ac4c371a0f58a (patch)
treec9b6e1e52defdce1aea0f33fb7f6d611bbe2c8c3
parent409fbbd08fad855cfd3efaab185d2f4091e931f9 (diff)
downloadFreeBSD-src-90f3f8edb2bda24c666a8a36141ac4c371a0f58a.zip
FreeBSD-src-90f3f8edb2bda24c666a8a36141ac4c371a0f58a.tar.gz
- Simplify mb_free_ext_fast
- increase asserts for mbuf accounting - track outstanding mbufs (maps very closely to leaked) - actually only create one thread per port if !multiq Oddly enough this fixes the use after free - move txq_segs to stack in t3_encap - add checks that pidx doesn't move pass cidx - simplify mbuf free logic in collapse mbufs routine
-rw-r--r--sys/dev/cxgb/cxgb_adapter.h3
-rw-r--r--sys/dev/cxgb/cxgb_multiq.c15
-rw-r--r--sys/dev/cxgb/cxgb_sge.c35
-rw-r--r--sys/dev/cxgb/sys/mvec.h13
-rw-r--r--sys/dev/cxgb/sys/uipc_mvec.c65
5 files changed, 91 insertions, 40 deletions
diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h
index 6910ba9..65c3fe7 100644
--- a/sys/dev/cxgb/cxgb_adapter.h
+++ b/sys/dev/cxgb/cxgb_adapter.h
@@ -298,8 +298,7 @@ struct sge_txq {
unsigned long txq_frees;
struct mtx lock;
struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1];
- bus_dma_segment_t txq_segs[TX_MAX_SEGS];
-#define TXQ_NAME_LEN 32
+ #define TXQ_NAME_LEN 32
char lockbuf[TXQ_NAME_LEN];
};
diff --git a/sys/dev/cxgb/cxgb_multiq.c b/sys/dev/cxgb/cxgb_multiq.c
index 537b1db..b1c402c 100644
--- a/sys/dev/cxgb/cxgb_multiq.c
+++ b/sys/dev/cxgb/cxgb_multiq.c
@@ -422,6 +422,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
txq = &qs->txq[TXQ_ETH];
mtx_assert(&txq->lock, MA_OWNED);
+ KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx));
retry:
if (!pi->link_config.link_ok)
@@ -667,13 +668,14 @@ cxgb_pcpu_startup_threads(struct adapter *sc)
#else
nqsets = 1;
#endif
- for (j = 0; j < pi->nqsets; ++j) {
+ for (j = 0; j < nqsets; ++j) {
struct sge_qset *qs;
qs = &sc->sge.qs[pi->first_qset + j];
qs->port = pi;
qs->qs_cpuid = ((pi->first_qset + j) % mp_ncpus);
- device_printf(sc->dev, "starting thread for %d\n", qs->qs_cpuid);
+ device_printf(sc->dev, "starting thread for %d\n",
+ qs->qs_cpuid);
kproc_create(cxgb_pcpu_start_proc, qs, &p,
RFNOWAIT, 0, "cxgbsp");
@@ -686,11 +688,18 @@ void
cxgb_pcpu_shutdown_threads(struct adapter *sc)
{
int i, j;
+ int nqsets;
+
+#ifdef IFNET_MULTIQUEUE
+ nqsets = pi->nqsets;
+#else
+ nqsets = 1;
+#endif
for (i = 0; i < sc->params.nports; i++) {
struct port_info *pi = &sc->port[i];
int first = pi->first_qset;
- for (j = 0; j < pi->nqsets; j++) {
+ for (j = 0; j < nqsets; j++) {
struct sge_qset *qs = &sc->sge.qs[first + j];
qs->qs_flags |= QS_EXITING;
diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c
index c854d9d..06ab98d 100644
--- a/sys/dev/cxgb/cxgb_sge.c
+++ b/sys/dev/cxgb/cxgb_sge.c
@@ -927,7 +927,17 @@ txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
txq->unacked &= 7;
txqs->pidx = txq->pidx;
txq->pidx += ndesc;
-
+#ifdef INVARIANTS
+ if (((txqs->pidx > txq->cidx) &&
+ (txq->pidx < txqs->pidx) &&
+ (txq->pidx >= txq->cidx)) ||
+ ((txqs->pidx < txq->cidx) &&
+ (txq->pidx >= txq-> cidx)) ||
+ ((txqs->pidx < txq->cidx) &&
+ (txq->cidx < txqs->pidx)))
+ panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
+ txqs->pidx, txq->pidx, txq->cidx);
+#endif
if (txq->pidx >= txq->size) {
txq->pidx -= txq->size;
txq->gen ^= 1;
@@ -1205,23 +1215,23 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
struct work_request_hdr *wrp;
struct tx_sw_desc *txsd;
struct sg_ent *sgp, *sgl;
- bus_dma_segment_t *segs;
uint32_t wr_hi, wr_lo, sgl_flits;
+ bus_dma_segment_t segs[TX_MAX_SEGS];
struct tx_desc *txd;
struct mbuf_vec *mv;
struct mbuf_iovec *mi;
DPRINTF("t3_encap cpu=%d ", curcpu);
+ KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx));
mi = NULL;
pi = qs->port;
sc = pi->adapter;
txq = &qs->txq[TXQ_ETH];
- txsd = &txq->sdesc[txq->pidx];
txd = &txq->desc[txq->pidx];
+ txsd = &txq->sdesc[txq->pidx];
sgl = txq->txq_sgl;
- segs = txq->txq_segs;
m0 = *m;
DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
@@ -1240,6 +1250,8 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
#endif
KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p",
txsd->mi.mi_base));
+ if (cxgb_debug)
+ printf("uipc_mvec PIO_LEN=%ld\n", PIO_LEN);
if (count > 1) {
panic("count > 1 not support in CVS\n");
@@ -1253,7 +1265,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
}
KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
- if (m0->m_pkthdr.len > PIO_LEN) {
+ if (!(m0->m_pkthdr.len <= PIO_LEN)) {
mi_collapse_mbuf(&txsd->mi, m0);
mi = &txsd->mi;
}
@@ -1393,8 +1405,11 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
check_ring_tx_db(pi->adapter, txq);
- if ((m0->m_type == MT_DATA) && ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT)) {
+ if ((m0->m_type == MT_DATA) &&
+ ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
+ (m0->m_ext.ext_type != EXT_PACKET)) {
m0->m_flags &= ~M_EXT ;
+ mbufs_outstanding--;
m_free(m0);
}
@@ -1797,6 +1812,7 @@ t3_free_tx_desc(struct sge_txq *q, int reclaimable)
cidx = q->cidx;
txsd = &q->sdesc[cidx];
DPRINTF("reclaiming %d WR\n", reclaimable);
+ mtx_assert(&q->lock, MA_OWNED);
while (reclaimable--) {
DPRINTF("cidx=%d d=%p\n", cidx, txsd);
if (txsd->mi.mi_base != NULL) {
@@ -2208,6 +2224,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
}
init_qset_cntxt(q, id);
+ q->idx = id;
if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
@@ -3203,7 +3220,11 @@ t3_add_attach_sysctls(adapter_t *sc)
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"ext_freed",
CTLFLAG_RD, &cxgb_ext_freed,
- 0, "#times a cluster was freed through ext_free");
+ 0, "#times a cluster was freed through ext_free");
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO,
+ "mbufs_outstanding",
+ CTLFLAG_RD, &mbufs_outstanding,
+ 0, "#mbufs in flight in the driver");
}
diff --git a/sys/dev/cxgb/sys/mvec.h b/sys/dev/cxgb/sys/mvec.h
index 57a6478..8395eae 100644
--- a/sys/dev/cxgb/sys/mvec.h
+++ b/sys/dev/cxgb/sys/mvec.h
@@ -45,6 +45,7 @@ void cxgb_cache_refill(void);
extern int cxgb_cached_allocations;
extern int cxgb_cached;
extern int cxgb_ext_freed;
+extern int mbufs_outstanding;
#define mtomv(m) ((struct mbuf_vec *)((m)->m_pktdat))
#define M_IOVEC 0x100000 /* mbuf immediate data area is used for cluster ptrs */
@@ -162,7 +163,7 @@ m_explode(struct mbuf *m)
static __inline void
busdma_map_mbuf_fast(struct mbuf *m, bus_dma_segment_t *seg)
{
- seg->ds_addr = pmap_kextract((vm_offset_t)m->m_data);
+ seg->ds_addr = pmap_kextract(mtod(m, vm_offset_t));
seg->ds_len = m->m_len;
}
@@ -229,11 +230,17 @@ m_free_iovec(struct mbuf *m, int type)
static __inline void
m_freem_iovec(struct mbuf_iovec *mi)
{
- struct mbuf *m;
+ struct mbuf *m = (struct mbuf *)mi->mi_base;
switch (mi->mi_type) {
case EXT_MBUF:
- m_free_fast((struct mbuf *)mi->mi_base);
+#ifdef PIO_LEN
+ KASSERT(m->m_pkthdr.len > PIO_LEN, ("freeing PIO buf"));
+#endif
+ KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf"));
+ KASSERT(m->m_next == NULL, ("freeing chain"));
+ mbufs_outstanding--;
+ m_free_fast(m);
break;
case EXT_IOVEC:
case EXT_CLIOVEC:
diff --git a/sys/dev/cxgb/sys/uipc_mvec.c b/sys/dev/cxgb/sys/uipc_mvec.c
index 9dc902b..4d4354f 100644
--- a/sys/dev/cxgb/sys/uipc_mvec.c
+++ b/sys/dev/cxgb/sys/uipc_mvec.c
@@ -75,6 +75,7 @@ extern uint32_t mb_free_vec_free;
uma_zone_t zone_miovec;
static int mi_inited = 0;
+int mbufs_outstanding = 0;
void
mi_init(void)
@@ -198,12 +199,18 @@ busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs)
struct mbuf *marray[TX_MAX_SEGS];
int i, type, seg_count, defragged = 0, err = 0;
struct mbuf_vec *mv;
+ int skipped, freed, outstanding;
+
+
KASSERT(n->m_pkthdr.len, ("packet has zero header len"));
if (n->m_flags & M_PKTHDR && !SLIST_EMPTY(&n->m_pkthdr.tags))
m_tag_delete_chain(n, NULL);
+ if (cxgb_debug)
+ printf("cxgb_sge PIO_LEN=%ld\n", PIO_LEN);
+
if (n->m_pkthdr.len <= PIO_LEN)
return (0);
retry:
@@ -211,16 +218,21 @@ retry:
if (n->m_next == NULL) {
busdma_map_mbuf_fast(n, segs);
*nsegs = 1;
+ if ((n->m_flags & M_NOFREE) == 0)
+ mbufs_outstanding++;
return (0);
}
+ skipped = freed = outstanding = 0;
while (n && seg_count < TX_MAX_SEGS) {
marray[seg_count] = n;
-
+
/*
* firmware doesn't like empty segments
*/
- if (__predict_true(n->m_len != 0))
+ if (__predict_true(n->m_len != 0))
seg_count++;
+ else
+ skipped++;
n = n->m_next;
}
@@ -265,24 +277,35 @@ retry:
}
n = *m;
while (n) {
- if (n->m_ext.ext_type == EXT_PACKET)
+ if (n->m_len == 0)
+ /* do nothing - free if mbuf or cluster */;
+ else if (((n->m_flags & M_EXT) == 0) ||
+ ((n->m_flags & M_EXT) && (n->m_ext.ext_type == EXT_PACKET)) ||
+ (n->m_flags & M_NOFREE))
goto skip;
- else if (n->m_len == 0)
- /* do nothing */;
else if ((n->m_flags & (M_EXT|M_NOFREE)) == M_EXT)
n->m_flags &= ~M_EXT;
- else
- goto skip;
mhead = n->m_next;
m_free(n);
n = mhead;
+ freed++;
continue;
skip:
+ /*
+ * is an immediate mbuf or is from the packet zone
+ */
+ outstanding++;
n = n->m_next;
}
*nsegs = seg_count;
*m = m0;
DPRINTF("pktlen=%d m0=%p *m=%p m=%p\n", m0->m_pkthdr.len, m0, *m, m);
+ mbufs_outstanding += outstanding;
+ KASSERT(outstanding + freed == skipped + seg_count,
+ ("outstanding=%d freed=%d skipped=%d seg_count=%d",
+ outstanding, freed, skipped, seg_count));
+
+
return (0);
err_out:
m_freem(*m);
@@ -325,6 +348,7 @@ busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret, bus_dma_segment_t *segs,
(*mp)->m_next = (*mp)->m_nextpkt = NULL;
if (((*mp)->m_flags & (M_EXT|M_NOFREE)) == M_EXT) {
(*mp)->m_flags &= ~M_EXT;
+ mbufs_outstanding--;
m_free(*mp);
}
}
@@ -336,29 +360,17 @@ busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret, bus_dma_segment_t *segs,
void
mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx)
{
- u_int cnt;
int dofree;
caddr_t cl;
/* Account for lazy ref count assign. */
dofree = (mi->mi_refcnt == NULL);
-
- /*
- * This is tricky. We need to make sure to decrement the
- * refcount in a safe way but to also clean up if we're the
- * last reference. This method seems to do it without race.
- */
- while (dofree == 0) {
- cnt = *(mi->mi_refcnt);
- if (cnt == 1) {
- dofree = 1;
- break;
- }
- if (atomic_cmpset_int(mi->mi_refcnt, cnt, cnt - 1)) {
- if (cnt == 1)
- dofree = 1;
- break;
- }
+ if (dofree == 0) {
+ KASSERT(mi->mi_type != EXT_MBUF,
+ ("refcnt must be null for mbuf"));
+ if (*(mi->mi_refcnt) == 1 ||
+ atomic_fetchadd_int(mi->mi_refcnt, -1) == 1)
+ dofree = 1;
}
if (dofree == 0)
return;
@@ -366,6 +378,8 @@ mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx)
cl = mi->mi_base;
switch (type) {
case EXT_MBUF:
+ KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf"));
+ mbufs_outstanding--;
m_free_fast((struct mbuf *)cl);
break;
case EXT_CLUSTER:
@@ -395,6 +409,7 @@ mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx)
mi->mi_ext.ext_args);
break;
case EXT_PACKET:
+ mbufs_outstanding--;
if (*(mi->mi_refcnt) == 0)
*(mi->mi_refcnt) = 1;
uma_zfree(zone_pack, mi->mi_mbuf);
OpenPOWER on IntegriCloud