diff options
author | adrian <adrian@FreeBSD.org> | 2015-10-21 01:41:18 +0000 |
---|---|---|
committer | adrian <adrian@FreeBSD.org> | 2015-10-21 01:41:18 +0000 |
commit | 3ec63ec821cd2663036e1b5fe134f37875691ae5 (patch) | |
tree | e1903b74d475c974ea7bdfbd218c2772e2fc824e /sys/mips/atheros | |
parent | 7cf294201ef86a814a4ada04c8a82738c156c5bc (diff) | |
download | FreeBSD-src-3ec63ec821cd2663036e1b5fe134f37875691ae5.zip FreeBSD-src-3ec63ec821cd2663036e1b5fe134f37875691ae5.tar.gz |
arge: don't do the rx fixup copy and just offset the mbuf by 2 bytes
The existing code meets the "alignment" requirement for the l3 payload
by offsetting the mbuf by uint64_t and then calling an rx fixup routine
to copy the frame backwards by 2 bytes. This DWORD aligns the
L3 payload so tcp, etc doesn't panic on unaligned access.
This is .. slow.
For arge MACs that support 1 byte TX/RX address alignment, we can do
the "other" hack: offset the RX address of the mbuf so the L3 payload
again is hopefully DWORD aligned.
This is much cheaper - since TX/RX is both 1 byte align ready (thanks
to the previous commit) there's no bounce buffering going on and there
is no rx fixup copying.
This gets bridging performance up from 180mbit/sec -> 410mbit/sec.
There's around 10% of CPU cycles spent in _bus_dmamap_sync(); I'll
investigate that later.
Tested:
* QCA955x SoC (AP135 reference board), bridging arge0/arge1
by programming the switch to have two vlangroups in dot1q mode:
# ifconfig bridge0 inet 192.168.2.20/24
# etherswitchcfg config vlan_mode dot1q
# etherswitchcfg vlangroup0 members 0,1,2,3,4
# etherswitchcfg vlangroup1 vlan 2 members 5,6
# etherswitchcfg port5 pvid 2
# etherswitchcfg port6 pvid 2
# ifconfig arge1 up
# ifconfig bridge0 addm arge1
Diffstat (limited to 'sys/mips/atheros')
-rw-r--r-- | sys/mips/atheros/if_arge.c | 31 |
1 files changed, 29 insertions, 2 deletions
diff --git a/sys/mips/atheros/if_arge.c b/sys/mips/atheros/if_arge.c index 446cd64..7a3efff 100644 --- a/sys/mips/atheros/if_arge.c +++ b/sys/mips/atheros/if_arge.c @@ -2165,6 +2165,7 @@ arge_newbuf(struct arge_softc *sc, int idx) bus_dmamap_t map; int nsegs; + /* XXX TODO: should just allocate an explicit 2KiB buffer */ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return (ENOBUFS); @@ -2174,7 +2175,15 @@ arge_newbuf(struct arge_softc *sc, int idx) * Add extra space to "adjust" (copy) the packet back to be aligned * for purposes of IPv4/IPv6 header contents. */ - m_adj(m, sizeof(uint64_t)); + if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) + m_adj(m, sizeof(uint64_t)); + /* + * If it's a 1-byte aligned buffer, then just offset it two bytes + * and that will give us a hopefully correctly DWORD aligned + * L3 payload - and we won't have to undo it afterwards. + */ + else if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE) + m_adj(m, sizeof(uint16_t)); if (bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_rx_tag, sc->arge_cdata.arge_rx_sparemap, m, segs, &nsegs, 0) != 0) { @@ -2186,6 +2195,11 @@ arge_newbuf(struct arge_softc *sc, int idx) rxd = &sc->arge_cdata.arge_rxdesc[idx]; if (rxd->rx_m != NULL) { bus_dmamap_unload(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap); + /* XXX TODO: free rx_m? */ + device_printf(sc->arge_dev, + "%s: ring[%d] rx_m wasn't free?\n", + __func__, + idx); } map = rxd->rx_dmamap; rxd->rx_dmamap = sc->arge_cdata.arge_rx_sparemap; @@ -2205,6 +2219,13 @@ arge_newbuf(struct arge_softc *sc, int idx) return (0); } +/* + * Move the data backwards 16 bits to (hopefully!) ensure the + * IPv4/IPv6 payload is aligned. + * + * This is required for earlier hardware where the RX path + * requires DWORD aligned buffers. + */ static __inline void arge_fixup_rx(struct mbuf *m) { @@ -2344,7 +2365,13 @@ arge_rx_locked(struct arge_softc *sc) BUS_DMASYNC_POSTREAD); m = rxd->rx_m; - arge_fixup_rx(m); + /* + * If the MAC requires 4 byte alignment then the RX setup + * routine will have pre-offset things; so un-offset it here. + */ + if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE) + arge_fixup_rx(m); + m->m_pkthdr.rcvif = ifp; /* Skip 4 bytes of CRC */ m->m_pkthdr.len = m->m_len = packet_len - ETHER_CRC_LEN; |