summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorkmacy <kmacy@FreeBSD.org>2007-08-25 21:07:37 +0000
committerkmacy <kmacy@FreeBSD.org>2007-08-25 21:07:37 +0000
commit2e3a8d80169839d59c7e290fa44af4d1f3b691c2 (patch)
treedbd4c47fdf1dd54fe3f51b7c6d85f5d8d9f7006f /sys
parentca68d57012f9e4eaac06c4f3791837dac6fe5832 (diff)
downloadFreeBSD-src-2e3a8d80169839d59c7e290fa44af4d1f3b691c2.zip
FreeBSD-src-2e3a8d80169839d59c7e290fa44af4d1f3b691c2.tar.gz
Fixes for 4 port and small packet optimization
- remove cpl->iff panic - we can't know the port number from the rspq on the 4-port - pick the ifnet based on the interface in the CPL header - switch to using qset 0 for egress on the 4-port for now - may change when we start using RSS - move ether_ifdetach to before the port lock gets deinitialized to avoid hang in the case where there are BPF peers (cxgb_ioctl is called indirectly when BPF peers are present) - don't call t3_mac_reset if multiport is set, this was causing tx errors by misconfiguring the MAC on the 4-port - change V_TXPKT_INTF to use txpkt_intf as the interfaces are not contiguous - free the mbuf immediately in the case where the payload is small enough to be copied into the rspq - only update the coalesce timer if for a queue if packets were taken off of it - add in missed 20ms DELAY in initializaton vsc8211 - prompt MFC as this only applies to the 4-port which is currently completely broken - OK'd by kensmith Supported by: Chelsio Approved by: re (blanket) MFC after: 0 days
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/cxgb/common/cxgb_vsc8211.c1
-rw-r--r--sys/dev/cxgb/cxgb_adapter.h9
-rw-r--r--sys/dev/cxgb/cxgb_lro.c13
-rw-r--r--sys/dev/cxgb/cxgb_main.c109
-rw-r--r--sys/dev/cxgb/cxgb_sge.c69
5 files changed, 125 insertions, 76 deletions
diff --git a/sys/dev/cxgb/common/cxgb_vsc8211.c b/sys/dev/cxgb/common/cxgb_vsc8211.c
index c3b3532..382ecc7 100644
--- a/sys/dev/cxgb/common/cxgb_vsc8211.c
+++ b/sys/dev/cxgb/common/cxgb_vsc8211.c
@@ -248,4 +248,5 @@ void t3_vsc8211_phy_prep(struct cphy *phy, adapter_t *adapter, int phy_addr,
const struct mdio_ops *mdio_ops)
{
cphy_init(phy, adapter, phy_addr, &vsc8211_ops, mdio_ops);
+ t3_os_sleep(20); /* PHY needs ~10ms to start responding to MDIO */
}
diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h
index 6605ea9..9964bab 100644
--- a/sys/dev/cxgb/cxgb_adapter.h
+++ b/sys/dev/cxgb/cxgb_adapter.h
@@ -117,10 +117,13 @@ struct port_info {
#else
struct mtx lock;
#endif
- int port_id;
- uint8_t hw_addr[ETHER_ADDR_LEN];
+ uint8_t port_id;
+ uint8_t tx_chan;
+ uint8_t txpkt_intf;
uint8_t nqsets;
uint8_t first_qset;
+
+ uint8_t hw_addr[ETHER_ADDR_LEN];
struct taskqueue *tq;
struct task start_task;
struct task timer_reclaim_task;
@@ -515,7 +518,7 @@ void t3_sge_deinit_sw(adapter_t *);
void t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro);
-void t3_rx_eth(struct port_info *p, struct sge_rspq *rq, struct mbuf *m, int ethpad);
+void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad);
void t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state);
void t3_add_sysctls(adapter_t *sc);
diff --git a/sys/dev/cxgb/cxgb_lro.c b/sys/dev/cxgb/cxgb_lro.c
index 9f64a0d..a502859 100644
--- a/sys/dev/cxgb/cxgb_lro.c
+++ b/sys/dev/cxgb/cxgb_lro.c
@@ -199,7 +199,7 @@ lro_flush_session(struct sge_qset *qs, struct t3_lro_session *s, struct mbuf *m)
MBUF_HEADER_CHECK(sm);
sm->m_flags |= M_LRO;
- t3_rx_eth(qs->port, &qs->rspq, sm, 2);
+ t3_rx_eth(qs->port->adapter, &qs->rspq, sm, 2);
if (m) {
s->head = m;
@@ -341,7 +341,6 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
struct ip *ih;
struct tcphdr *th;
struct t3_lro_session *s = NULL;
- struct port_info *pi = qs->port;
if (lro == 0)
goto no_lro;
@@ -349,9 +348,6 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
if (!can_lro_packet(cpl, rss_csum))
goto no_lro;
- if (&adap->port[cpl->iff] != pi)
- panic("bad port index %d\n", cpl->iff);
-
ih = (struct ip *)(eh + 1);
th = (struct tcphdr *)(ih + 1);
@@ -366,9 +362,11 @@ t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m,
if (lro_update_session(s, m)) {
lro_flush_session(qs, s, m);
}
+#ifdef notyet
if (__predict_false(s->head->m_pkthdr.len + pi->ifp->if_mtu > 65535)) {
lro_flush_session(qs, s, NULL);
- }
+ }
+#endif
}
qs->port_stats[SGE_PSTATS_LRO_QUEUED]++;
@@ -380,7 +378,8 @@ no_lro:
if (m->m_len == 0 || m->m_pkthdr.len == 0 || (m->m_flags & M_PKTHDR) == 0)
DPRINTF("rx_eth_lro mbuf len=%d pktlen=%d flags=0x%x\n",
m->m_len, m->m_pkthdr.len, m->m_flags);
- t3_rx_eth(pi, rq, m, ethpad);
+
+ t3_rx_eth(adap, rq, m, ethpad);
}
void
diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c
index ef10aab..583214d 100644
--- a/sys/dev/cxgb/cxgb_main.c
+++ b/sys/dev/cxgb/cxgb_main.c
@@ -113,6 +113,8 @@ static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned i
static void cxgb_get_regs(adapter_t *sc, struct ifconf_regs *regs, uint8_t *buf);
static int cxgb_get_regs_len(void);
static int offload_open(struct port_info *pi);
+static void touch_bars(device_t dev);
+
#ifdef notyet
static int offload_close(struct toedev *tdev);
#endif
@@ -412,7 +414,8 @@ cxgb_controller_attach(device_t dev)
"PCIe x%d Link, expect reduced performance\n",
sc->link_width);
}
-
+
+ touch_bars(dev);
pci_enable_busmaster(dev);
/*
* Allocate the registers and make them available to the driver.
@@ -551,17 +554,23 @@ cxgb_controller_attach(device_t dev)
* will be done in these children.
*/
for (i = 0; i < (sc)->params.nports; i++) {
+ struct port_info *pi;
+
if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
device_printf(dev, "failed to add child port\n");
error = EINVAL;
goto out;
}
- sc->port[i].adapter = sc;
- sc->port[i].nqsets = port_qsets;
- sc->port[i].first_qset = i*port_qsets;
- sc->port[i].port_id = i;
+ pi = &sc->port[i];
+ pi->adapter = sc;
+ pi->nqsets = port_qsets;
+ pi->first_qset = i*port_qsets;
+ pi->port_id = i;
+ pi->tx_chan = i >= ai->nports0;
+ pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
+ sc->rxpkt_map[pi->txpkt_intf] = i;
sc->portdev[i] = child;
- device_set_softc(child, &sc->port[i]);
+ device_set_softc(child, pi);
}
if ((error = bus_generic_attach(dev)) != 0)
goto out;
@@ -633,22 +642,25 @@ cxgb_free(struct adapter *sc)
sc->msix_regs_res);
}
- t3_sge_deinit_sw(sc);
-
if (sc->tq != NULL) {
taskqueue_drain(sc->tq, &sc->ext_intr_task);
taskqueue_drain(sc->tq, &sc->tick_task);
- taskqueue_free(sc->tq);
- }
-
- tsleep(&sc, 0, "cxgb unload", hz);
+ }
+ t3_sge_deinit_sw(sc);
+ /*
+ * Wait for last callout
+ */
+ tsleep(&sc, 0, "cxgb unload", 3*hz);
+
for (i = 0; i < (sc)->params.nports; ++i) {
if (sc->portdev[i] != NULL)
device_delete_child(sc->dev, sc->portdev[i]);
}
bus_generic_detach(sc->dev);
+ if (sc->tq != NULL)
+ taskqueue_free(sc->tq);
#ifdef notyet
if (is_offload(sc)) {
cxgb_adapter_unofld(sc);
@@ -804,16 +816,19 @@ setup_sge_qsets(adapter_t *sc)
else
irq_idx = 0;
- for (qset_idx = 0, i = 0; i < (sc)->params.nports; ++i) {
+ for (qset_idx = 0, i = 0; i < (sc)->params.nports; i++) {
struct port_info *pi = &sc->port[i];
- for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
+ for (j = 0; j < pi->nqsets; j++, qset_idx++) {
+ printf("allocating qset_idx=%d for port_id=%d\n",
+ qset_idx, pi->port_id);
err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
(sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
&sc->params.sge.qset[qset_idx], ntxq, pi);
if (err) {
t3_free_sge_resources(sc);
- device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n", err);
+ device_printf(sc->dev, "t3_sge_alloc_qset failed with %d\n",
+ err);
return (err);
}
}
@@ -859,7 +874,7 @@ cxgb_setup_msix(adapter_t *sc, int msix_count)
if (bus_setup_intr(sc->dev, sc->irq_res, INTR_MPSAFE|INTR_TYPE_NET,
#ifdef INTR_FILTERS
- NULL,
+ NULL,
#endif
cxgb_async_intr, sc, &sc->intr_tag)) {
device_printf(sc->dev, "Cannot set up interrupt\n");
@@ -881,10 +896,12 @@ cxgb_setup_msix(adapter_t *sc, int msix_count)
return (EINVAL);
}
sc->msix_irq_rid[k] = rid;
+ printf("setting up interrupt for port=%d\n",
+ qs->port->port_id);
if (bus_setup_intr(sc->dev, sc->msix_irq_res[k],
INTR_MPSAFE|INTR_TYPE_NET,
#ifdef INTR_FILTERS
- NULL,
+ NULL,
#endif
t3_intr_msix, qs, &sc->msix_intr_tag[k])) {
device_printf(sc->dev, "Cannot set up "
@@ -1077,8 +1094,11 @@ cxgb_port_detach(device_t dev)
p->tq = NULL;
}
- PORT_LOCK_DEINIT(p);
ether_ifdetach(p->ifp);
+ /*
+ * the lock may be acquired in ifdetach
+ */
+ PORT_LOCK_DEINIT(p);
if_free(p->ifp);
if (p->port_cdev != NULL)
@@ -1251,7 +1271,8 @@ cxgb_link_start(struct port_info *p)
ifp = p->ifp;
t3_init_rx_mode(&rm, p);
- t3_mac_reset(mac);
+ if (!mac->multiport)
+ t3_mac_reset(mac);
t3_mac_set_mtu(mac, ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
t3_mac_set_address(mac, 0, p->hw_addr);
t3_mac_set_rx_mode(mac, &rm);
@@ -1278,13 +1299,16 @@ setup_rss(adapter_t *adap)
uint8_t cpus[SGE_QSETS + 1];
uint16_t rspq_map[RSS_TABLE_SIZE];
- nq[0] = adap->port[0].nqsets;
- nq[1] = max((u_int)adap->port[1].nqsets, 1U);
-
for (i = 0; i < SGE_QSETS; ++i)
cpus[i] = i;
cpus[SGE_QSETS] = 0xff;
+ nq[0] = nq[1] = 0;
+ for_each_port(adap, i) {
+ const struct port_info *pi = adap2pinfo(adap, i);
+
+ nq[pi->tx_chan] += pi->nqsets;
+ }
for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
rspq_map[i] = nq[0] ? i % nq[0] : 0;
rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
@@ -1534,7 +1558,8 @@ cxgb_up(struct adapter *sc)
if ((sc->flags & USING_MSIX) == 0) {
if ((sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ,
&sc->irq_rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) {
- device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n", sc->irq_rid);
+ device_printf(sc->dev, "Cannot allocate interrupt rid=%d\n",
+ sc->irq_rid);
err = EINVAL;
goto out;
}
@@ -1599,11 +1624,15 @@ cxgb_down_locked(struct adapter *sc)
callout_drain(&sc->cxgb_tick_ch);
callout_drain(&sc->sge_timer_ch);
- if (sc->tq != NULL)
+ if (sc->tq != NULL) {
taskqueue_drain(sc->tq, &sc->slow_intr_task);
- for (i = 0; i < sc->params.nports; i++)
+ for (i = 0; i < sc->params.nports; i++)
+ taskqueue_drain(sc->tq, &sc->port[i].timer_reclaim_task);
+ }
+#ifdef notyet
+
if (sc->port[i].tq != NULL)
- taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
+#endif
}
@@ -1718,7 +1747,8 @@ cxgb_init_locked(struct port_info *p)
cxgb_link_start(p);
t3_link_changed(sc, p->port_id);
ifp->if_baudrate = p->link_config.speed * 1000000;
-
+
+ printf("enabling interrupts on port=%d\n", p->port_id);
t3_port_intr_enable(sc, p->port_id);
callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz,
@@ -1891,7 +1921,7 @@ cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
struct sge_txq *txq;
struct port_info *p = ifp->if_softc;
struct mbuf *m0, *m = NULL;
- int err, in_use_init;
+ int err, in_use_init, qsidx = 0;
if (!p->link_config.link_ok)
return (ENXIO);
@@ -1899,7 +1929,10 @@ cxgb_start_tx(struct ifnet *ifp, uint32_t txmax)
if (IFQ_DRV_IS_EMPTY(&ifp->if_snd))
return (ENOBUFS);
- qs = &p->adapter->sge.qs[p->first_qset];
+ if (p->adapter->params.nports <= 2)
+ qsidx = p->first_qset;
+
+ qs = &p->adapter->sge.qs[qsidx];
txq = &qs->txq[TXQ_ETH];
err = 0;
@@ -2160,6 +2193,24 @@ cxgb_tick_handler(void *arg, int count)
check_t3b2_mac(sc);
}
+static void
+touch_bars(device_t dev)
+{
+ /*
+ * Don't enable yet
+ */
+#if !defined(__LP64__) && 0
+ u32 v;
+
+ pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
+ pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
+ pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
+ pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
+ pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
+ pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
+#endif
+}
+
#if 0
static void *
filter_get_idx(struct seq_file *seq, loff_t pos)
diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c
index 0a1d3ee..eb44d28 100644
--- a/sys/dev/cxgb/cxgb_sge.c
+++ b/sys/dev/cxgb/cxgb_sge.c
@@ -1169,7 +1169,7 @@ t3_encap(struct port_info *p, struct mbuf **m)
struct tx_sw_desc *stx;
struct txq_state txqs;
unsigned int ndesc, flits, cntrl, mlen;
- int err, nsegs, tso_info = 0;
+ int err, nsegs, tso_info = 0, qsidx = 0;
struct work_request_hdr *wrp;
struct tx_sw_desc *txsd;
@@ -1179,11 +1179,16 @@ t3_encap(struct port_info *p, struct mbuf **m)
struct tx_desc *txd;
struct cpl_tx_pkt *cpl;
-
- DPRINTF("t3_encap ");
+
m0 = *m;
sc = p->adapter;
- qs = &sc->sge.qs[p->first_qset];
+ if (sc->params.nports <= 2)
+ qsidx = p->first_qset;
+
+ DPRINTF("t3_encap qsidx=%d", qsidx);
+
+ qs = &sc->sge.qs[qsidx];
+
txq = &qs->txq[TXQ_ETH];
stx = &txq->sdesc[txq->pidx];
txd = &txq->desc[txq->pidx];
@@ -1191,12 +1196,12 @@ t3_encap(struct port_info *p, struct mbuf **m)
mlen = m0->m_pkthdr.len;
cpl->len = htonl(mlen | 0x80000000);
- DPRINTF("mlen=%d\n", mlen);
+ DPRINTF("mlen=%d pktintf=%d\n", mlen, p->txpkt_intf);
/*
* XXX handle checksum, TSO, and VLAN here
*
*/
- cntrl = V_TXPKT_INTF(p->port_id);
+ cntrl = V_TXPKT_INTF(p->txpkt_intf);
/*
* XXX need to add VLAN support for 6.x
@@ -1247,14 +1252,14 @@ t3_encap(struct port_info *p, struct mbuf **m)
if (mlen <= WR_LEN - sizeof(*cpl)) {
txq_prod(txq, 1, &txqs);
- txq->sdesc[txqs.pidx].m = m0;
- m_set_priority(m0, txqs.pidx);
+ txq->sdesc[txqs.pidx].m = NULL;
if (m0->m_len == m0->m_pkthdr.len)
memcpy(&txd->flit[2], mtod(m0, uint8_t *), mlen);
else
m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
+ m_freem(m0);
flits = (mlen + 7) / 8 + 2;
cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
@@ -1792,10 +1797,12 @@ calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
static int
ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
{
- unsigned int pidx, gen, ndesc;
+ int ret, nsegs;
+ unsigned int ndesc;
+ unsigned int pidx, gen;
struct mbuf *m_vec[TX_CLEAN_MAX_DESC];
bus_dma_segment_t segs[TX_MAX_SEGS];
- int i, cleaned, ret, nsegs;
+ int i, cleaned;
struct tx_sw_desc *stx = &q->sdesc[q->pidx];
mtx_lock(&q->lock);
@@ -2111,7 +2118,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
q->fl[1].type = EXT_JUMBOP;
q->lro.enabled = lro_default;
-
+
mtx_lock(&sc->sge.reg_lock);
ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
q->rspq.phys_addr, q->rspq.size,
@@ -2190,14 +2197,13 @@ err:
}
void
-t3_rx_eth(struct port_info *pi, struct sge_rspq *rq, struct mbuf *m, int ethpad)
+t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
{
struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
+ struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
struct ifnet *ifp = pi->ifp;
DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
- if (&pi->adapter->port[cpl->iff] != pi)
- panic("bad port index %d m->m_data=%p\n", cpl->iff, mtod(m, uint8_t *));
if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
cpl->csum_valid && cpl->csum == 0xffff) {
@@ -2506,9 +2512,9 @@ process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
printf("next_holdoff=%d\n", rq->next_holdoff);
last_holdoff = rq->next_holdoff;
}
-
- t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
- V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
+ if (work)
+ t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
+ V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
return work;
}
@@ -2523,10 +2529,9 @@ process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
void
t3b_intr(void *data)
{
- uint32_t map;
+ uint32_t i, map;
adapter_t *adap = data;
struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
- struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
t3_write_reg(adap, A_PL_CLI, 0);
map = t3_read_reg(adap, A_SG_DATA_INTR);
@@ -2538,13 +2543,9 @@ t3b_intr(void *data)
taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
mtx_lock(&q0->lock);
-
- if (__predict_true(map & 1))
- process_responses_gts(adap, q0);
-
- if (map & 2)
- process_responses_gts(adap, q1);
-
+ for_each_port(adap, i)
+ if (map & (1 << i))
+ process_responses_gts(adap, &adap->sge.qs[i].rspq);
mtx_unlock(&q0->lock);
}
@@ -2559,19 +2560,13 @@ t3_intr_msi(void *data)
{
adapter_t *adap = data;
struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
- struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
- int new_packets = 0;
-
+ int i, new_packets = 0;
+
mtx_lock(&q0->lock);
- if (process_responses_gts(adap, q0)) {
- new_packets = 1;
- }
- if (adap->params.nports == 2 &&
- process_responses_gts(adap, q1)) {
- new_packets = 1;
- }
-
+ for_each_port(adap, i)
+ if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
+ new_packets = 1;
mtx_unlock(&q0->lock);
if (new_packets == 0)
taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
OpenPOWER on IntegriCloud