summaryrefslogtreecommitdiffstats
path: root/sys/mips/atheros
diff options
context:
space:
mode:
authoradrian <adrian@FreeBSD.org>2015-10-21 01:41:18 +0000
committeradrian <adrian@FreeBSD.org>2015-10-21 01:41:18 +0000
commit3ec63ec821cd2663036e1b5fe134f37875691ae5 (patch)
treee1903b74d475c974ea7bdfbd218c2772e2fc824e /sys/mips/atheros
parent7cf294201ef86a814a4ada04c8a82738c156c5bc (diff)
downloadFreeBSD-src-3ec63ec821cd2663036e1b5fe134f37875691ae5.zip
FreeBSD-src-3ec63ec821cd2663036e1b5fe134f37875691ae5.tar.gz
arge: don't do the rx fixup copy and just offset the mbuf by 2 bytes
The existing code meets the "alignment" requirement for the l3 payload by offsetting the mbuf by uint64_t and then calling an rx fixup routine to copy the frame backwards by 2 bytes. This DWORD aligns the L3 payload so tcp, etc doesn't panic on unaligned access. This is .. slow. For arge MACs that support 1 byte TX/RX address alignment, we can do the "other" hack: offset the RX address of the mbuf so the L3 payload again is hopefully DWORD aligned. This is much cheaper - since TX/RX is both 1 byte align ready (thanks to the previous commit) there's no bounce buffering going on and there is no rx fixup copying. This gets bridging performance up from 180mbit/sec -> 410mbit/sec. There's around 10% of CPU cycles spent in _bus_dmamap_sync(); I'll investigate that later. Tested: * QCA955x SoC (AP135 reference board), bridging arge0/arge1 by programming the switch to have two vlangroups in dot1q mode: # ifconfig bridge0 inet 192.168.2.20/24 # etherswitchcfg config vlan_mode dot1q # etherswitchcfg vlangroup0 members 0,1,2,3,4 # etherswitchcfg vlangroup1 vlan 2 members 5,6 # etherswitchcfg port5 pvid 2 # etherswitchcfg port6 pvid 2 # ifconfig arge1 up # ifconfig bridge0 addm arge1
Diffstat (limited to 'sys/mips/atheros')
-rw-r--r--sys/mips/atheros/if_arge.c31
1 files changed, 29 insertions, 2 deletions
diff --git a/sys/mips/atheros/if_arge.c b/sys/mips/atheros/if_arge.c
index 446cd64..7a3efff 100644
--- a/sys/mips/atheros/if_arge.c
+++ b/sys/mips/atheros/if_arge.c
@@ -2165,6 +2165,7 @@ arge_newbuf(struct arge_softc *sc, int idx)
bus_dmamap_t map;
int nsegs;
+ /* XXX TODO: should just allocate an explicit 2KiB buffer */
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
if (m == NULL)
return (ENOBUFS);
@@ -2174,7 +2175,15 @@ arge_newbuf(struct arge_softc *sc, int idx)
* Add extra space to "adjust" (copy) the packet back to be aligned
* for purposes of IPv4/IPv6 header contents.
*/
- m_adj(m, sizeof(uint64_t));
+ if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE)
+ m_adj(m, sizeof(uint64_t));
+ /*
+ * If it's a 1-byte aligned buffer, then just offset it two bytes
+ * and that will give us a hopefully correctly DWORD aligned
+ * L3 payload - and we won't have to undo it afterwards.
+ */
+ else if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_1BYTE)
+ m_adj(m, sizeof(uint16_t));
if (bus_dmamap_load_mbuf_sg(sc->arge_cdata.arge_rx_tag,
sc->arge_cdata.arge_rx_sparemap, m, segs, &nsegs, 0) != 0) {
@@ -2186,6 +2195,11 @@ arge_newbuf(struct arge_softc *sc, int idx)
rxd = &sc->arge_cdata.arge_rxdesc[idx];
if (rxd->rx_m != NULL) {
bus_dmamap_unload(sc->arge_cdata.arge_rx_tag, rxd->rx_dmamap);
+ /* XXX TODO: free rx_m? */
+ device_printf(sc->arge_dev,
+ "%s: ring[%d] rx_m wasn't free?\n",
+ __func__,
+ idx);
}
map = rxd->rx_dmamap;
rxd->rx_dmamap = sc->arge_cdata.arge_rx_sparemap;
@@ -2205,6 +2219,13 @@ arge_newbuf(struct arge_softc *sc, int idx)
return (0);
}
+/*
+ * Move the data backwards 16 bits to (hopefully!) ensure the
+ * IPv4/IPv6 payload is aligned.
+ *
+ * This is required for earlier hardware where the RX path
+ * requires DWORD aligned buffers.
+ */
static __inline void
arge_fixup_rx(struct mbuf *m)
{
@@ -2344,7 +2365,13 @@ arge_rx_locked(struct arge_softc *sc)
BUS_DMASYNC_POSTREAD);
m = rxd->rx_m;
- arge_fixup_rx(m);
+ /*
+ * If the MAC requires 4 byte alignment then the RX setup
+ * routine will have pre-offset things; so un-offset it here.
+ */
+ if (sc->arge_hw_flags & ARGE_HW_FLG_RX_DESC_ALIGN_4BYTE)
+ arge_fixup_rx(m);
+
m->m_pkthdr.rcvif = ifp;
/* Skip 4 bytes of CRC */
m->m_pkthdr.len = m->m_len = packet_len - ETHER_CRC_LEN;
OpenPOWER on IntegriCloud