diff options
author | luigi <luigi@FreeBSD.org> | 2001-11-29 22:46:48 +0000 |
---|---|---|
committer | luigi <luigi@FreeBSD.org> | 2001-11-29 22:46:48 +0000 |
commit | 21d95a8778ba9e3b0c772562bf186d63935a003a (patch) | |
tree | 797adae1a314e6970fa5583f123d967aa2b1c164 /sys | |
parent | 09990be9983e807bd7d1049320397cc1c984483f (diff) | |
download | FreeBSD-src-21d95a8778ba9e3b0c772562bf186d63935a003a.zip FreeBSD-src-21d95a8778ba9e3b0c772562bf186d63935a003a.tar.gz |
For i386 architecture, remove an expensive m_devget() (and the
underlying unaligned bcopy) on incoming packets that are already
available (albeit unaligned) in a buffer.
The performance improvement varies, depending on CPU and memory
speed, but can be quite large especially on slow CPUs. I have seen
over 50% increase on forwarding speed on the sis driver for the
486/133 (embedded systems), which does exactly the same thing.
The behaviour is controlled by a sysctl variable, hw.dc_quick which
defaults to 1. Set it to 0 to restore the old behaviour.
After running a few experiments (in userland, though) I am convinced
that doing the m_devget() is detrimental to performance in almost
all cases.
Even if your CPU has degraded performance with misaligned data,
the bcopy() in the driver has the same overhead due to misaligment
as the one that you save in the uiomove(), plus you do one extra
copy and pollute the cache.
But more often than not, you do not even have to touch the payload,
e.g. when you are forwarding packets, and even in the often-cited
case of NFS, you often end up passing a pointer to the payload to
the disk controller.
In any case, you can play with the sysctl variable to toggle between
the two behaviours, and see if it makes a difference.
MFC-after: 3 days
Diffstat (limited to 'sys')
-rw-r--r-- | sys/dev/dc/if_dc.c | 71 | ||||
-rw-r--r-- | sys/pci/if_dc.c | 71 |
2 files changed, 80 insertions, 62 deletions
diff --git a/sys/dev/dc/if_dc.c b/sys/dev/dc/if_dc.c index 4592c29..81cb936 100644 --- a/sys/dev/dc/if_dc.c +++ b/sys/dev/dc/if_dc.c @@ -96,6 +96,7 @@ #include <sys/malloc.h> #include <sys/kernel.h> #include <sys/socket.h> +#include <sys/sysctl.h> #include <net/if.h> #include <net/if_arp.h> @@ -297,6 +298,11 @@ static driver_t dc_driver = { }; static devclass_t dc_devclass; +#ifdef __i386__ +static int dc_quick=1; +SYSCTL_INT(_hw, OID_AUTO, dc_quick, CTLFLAG_RW, + &dc_quick,0,"do not mdevget in dc driver"); +#endif DRIVER_MODULE(if_dc, cardbus, dc_driver, dc_devclass, 0, 0); DRIVER_MODULE(if_dc, pci, dc_driver, dc_devclass, 0, 0); @@ -2206,18 +2212,13 @@ static int dc_list_tx_init(sc) { struct dc_chain_data *cd; struct dc_list_data *ld; - int i; + int i, nexti; cd = &sc->dc_cdata; ld = sc->dc_ldata; for (i = 0; i < DC_TX_LIST_CNT; i++) { - if (i == (DC_TX_LIST_CNT - 1)) { - ld->dc_tx_list[i].dc_next = - vtophys(&ld->dc_tx_list[0]); - } else { - ld->dc_tx_list[i].dc_next = - vtophys(&ld->dc_tx_list[i + 1]); - } + nexti = (i == (DC_TX_LIST_CNT - 1)) ? 0 : i+1 ; + ld->dc_tx_list[i].dc_next = vtophys(&ld->dc_tx_list[nexti]); cd->dc_tx_chain[i] = NULL; ld->dc_tx_list[i].dc_data = 0; ld->dc_tx_list[i].dc_ctl = 0; @@ -2239,7 +2240,7 @@ static int dc_list_rx_init(sc) { struct dc_chain_data *cd; struct dc_list_data *ld; - int i; + int i, nexti; cd = &sc->dc_cdata; ld = sc->dc_ldata; @@ -2247,13 +2248,8 @@ static int dc_list_rx_init(sc) for (i = 0; i < DC_RX_LIST_CNT; i++) { if (dc_newbuf(sc, i, NULL) == ENOBUFS) return(ENOBUFS); - if (i == (DC_RX_LIST_CNT - 1)) { - ld->dc_rx_list[i].dc_next = - vtophys(&ld->dc_rx_list[0]); - } else { - ld->dc_rx_list[i].dc_next = - vtophys(&ld->dc_rx_list[i + 1]); - } + nexti = (i == (DC_RX_LIST_CNT - 1)) ? 0 : i+1 ; + ld->dc_rx_list[i].dc_next = vtophys(&ld->dc_rx_list[nexti]); } cd->dc_rx_prod = 0; @@ -2276,16 +2272,11 @@ static int dc_newbuf(sc, i, m) if (m == NULL) { MGETHDR(m_new, M_DONTWAIT, MT_DATA); - if (m_new == NULL) { - printf("dc%d: no memory for rx list " - "-- packet dropped!\n", sc->dc_unit); + if (m_new == NULL) return(ENOBUFS); - } MCLGET(m_new, M_DONTWAIT); if (!(m_new->m_flags & M_EXT)) { - printf("dc%d: no memory for rx list " - "-- packet dropped!\n", sc->dc_unit); m_freem(m_new); return(ENOBUFS); } @@ -2479,7 +2470,6 @@ static void dc_rxeof(sc) i = sc->dc_cdata.dc_rx_prod; while(!(sc->dc_ldata->dc_rx_list[i].dc_status & DC_RXSTAT_OWN)) { - struct mbuf *m0 = NULL; cur_rx = &sc->dc_ldata->dc_rx_list[i]; rxstat = cur_rx->dc_status; @@ -2524,16 +2514,35 @@ static void dc_rxeof(sc) /* No errors; receive the packet. */ total_len -= ETHER_CRC_LEN; +#ifdef __i386__ + /* + * On the x86 we do not have alignment problems, so try to + * allocate a new buffer for the receive ring, and pass up + * the one where the packet is already, saving the expensive + * copy done in m_devget(). + * If we are on an architecture with alignment problems, or + * if the allocation fails, then use m_devget and leave the + * existing buffer in the receive ring. + */ + if (dc_quick && dc_newbuf(sc, i, NULL) == 0) { + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = m->m_len = total_len; + DC_INC(i, DC_RX_LIST_CNT); + } else +#endif + { + struct mbuf *m0; - m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp, - NULL); - dc_newbuf(sc, i, m); - DC_INC(i, DC_RX_LIST_CNT); - if (m0 == NULL) { - ifp->if_ierrors++; - continue; + m0 = m_devget(mtod(m, char *), total_len, + ETHER_ALIGN, ifp, NULL); + dc_newbuf(sc, i, m); + DC_INC(i, DC_RX_LIST_CNT); + if (m0 == NULL) { + ifp->if_ierrors++; + continue; + } + m = m0; } - m = m0; ifp->if_ipackets++; eh = mtod(m, struct ether_header *); diff --git a/sys/pci/if_dc.c b/sys/pci/if_dc.c index 4592c29..81cb936 100644 --- a/sys/pci/if_dc.c +++ b/sys/pci/if_dc.c @@ -96,6 +96,7 @@ #include <sys/malloc.h> #include <sys/kernel.h> #include <sys/socket.h> +#include <sys/sysctl.h> #include <net/if.h> #include <net/if_arp.h> @@ -297,6 +298,11 @@ static driver_t dc_driver = { }; static devclass_t dc_devclass; +#ifdef __i386__ +static int dc_quick=1; +SYSCTL_INT(_hw, OID_AUTO, dc_quick, CTLFLAG_RW, + &dc_quick,0,"do not mdevget in dc driver"); +#endif DRIVER_MODULE(if_dc, cardbus, dc_driver, dc_devclass, 0, 0); DRIVER_MODULE(if_dc, pci, dc_driver, dc_devclass, 0, 0); @@ -2206,18 +2212,13 @@ static int dc_list_tx_init(sc) { struct dc_chain_data *cd; struct dc_list_data *ld; - int i; + int i, nexti; cd = &sc->dc_cdata; ld = sc->dc_ldata; for (i = 0; i < DC_TX_LIST_CNT; i++) { - if (i == (DC_TX_LIST_CNT - 1)) { - ld->dc_tx_list[i].dc_next = - vtophys(&ld->dc_tx_list[0]); - } else { - ld->dc_tx_list[i].dc_next = - vtophys(&ld->dc_tx_list[i + 1]); - } + nexti = (i == (DC_TX_LIST_CNT - 1)) ? 0 : i+1 ; + ld->dc_tx_list[i].dc_next = vtophys(&ld->dc_tx_list[nexti]); cd->dc_tx_chain[i] = NULL; ld->dc_tx_list[i].dc_data = 0; ld->dc_tx_list[i].dc_ctl = 0; @@ -2239,7 +2240,7 @@ static int dc_list_rx_init(sc) { struct dc_chain_data *cd; struct dc_list_data *ld; - int i; + int i, nexti; cd = &sc->dc_cdata; ld = sc->dc_ldata; @@ -2247,13 +2248,8 @@ static int dc_list_rx_init(sc) for (i = 0; i < DC_RX_LIST_CNT; i++) { if (dc_newbuf(sc, i, NULL) == ENOBUFS) return(ENOBUFS); - if (i == (DC_RX_LIST_CNT - 1)) { - ld->dc_rx_list[i].dc_next = - vtophys(&ld->dc_rx_list[0]); - } else { - ld->dc_rx_list[i].dc_next = - vtophys(&ld->dc_rx_list[i + 1]); - } + nexti = (i == (DC_RX_LIST_CNT - 1)) ? 0 : i+1 ; + ld->dc_rx_list[i].dc_next = vtophys(&ld->dc_rx_list[nexti]); } cd->dc_rx_prod = 0; @@ -2276,16 +2272,11 @@ static int dc_newbuf(sc, i, m) if (m == NULL) { MGETHDR(m_new, M_DONTWAIT, MT_DATA); - if (m_new == NULL) { - printf("dc%d: no memory for rx list " - "-- packet dropped!\n", sc->dc_unit); + if (m_new == NULL) return(ENOBUFS); - } MCLGET(m_new, M_DONTWAIT); if (!(m_new->m_flags & M_EXT)) { - printf("dc%d: no memory for rx list " - "-- packet dropped!\n", sc->dc_unit); m_freem(m_new); return(ENOBUFS); } @@ -2479,7 +2470,6 @@ static void dc_rxeof(sc) i = sc->dc_cdata.dc_rx_prod; while(!(sc->dc_ldata->dc_rx_list[i].dc_status & DC_RXSTAT_OWN)) { - struct mbuf *m0 = NULL; cur_rx = &sc->dc_ldata->dc_rx_list[i]; rxstat = cur_rx->dc_status; @@ -2524,16 +2514,35 @@ static void dc_rxeof(sc) /* No errors; receive the packet. */ total_len -= ETHER_CRC_LEN; +#ifdef __i386__ + /* + * On the x86 we do not have alignment problems, so try to + * allocate a new buffer for the receive ring, and pass up + * the one where the packet is already, saving the expensive + * copy done in m_devget(). + * If we are on an architecture with alignment problems, or + * if the allocation fails, then use m_devget and leave the + * existing buffer in the receive ring. + */ + if (dc_quick && dc_newbuf(sc, i, NULL) == 0) { + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.len = m->m_len = total_len; + DC_INC(i, DC_RX_LIST_CNT); + } else +#endif + { + struct mbuf *m0; - m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp, - NULL); - dc_newbuf(sc, i, m); - DC_INC(i, DC_RX_LIST_CNT); - if (m0 == NULL) { - ifp->if_ierrors++; - continue; + m0 = m_devget(mtod(m, char *), total_len, + ETHER_ALIGN, ifp, NULL); + dc_newbuf(sc, i, m); + DC_INC(i, DC_RX_LIST_CNT); + if (m0 == NULL) { + ifp->if_ierrors++; + continue; + } + m = m0; } - m = m0; ifp->if_ipackets++; eh = mtod(m, struct ether_header *); |