summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorluigi <luigi@FreeBSD.org>2001-11-29 22:46:48 +0000
committerluigi <luigi@FreeBSD.org>2001-11-29 22:46:48 +0000
commit21d95a8778ba9e3b0c772562bf186d63935a003a (patch)
tree797adae1a314e6970fa5583f123d967aa2b1c164 /sys
parent09990be9983e807bd7d1049320397cc1c984483f (diff)
downloadFreeBSD-src-21d95a8778ba9e3b0c772562bf186d63935a003a.zip
FreeBSD-src-21d95a8778ba9e3b0c772562bf186d63935a003a.tar.gz
For i386 architecture, remove an expensive m_devget() (and the
underlying unaligned bcopy) on incoming packets that are already available (albeit unaligned) in a buffer. The performance improvement varies, depending on CPU and memory speed, but can be quite large especially on slow CPUs. I have seen over 50% increase on forwarding speed on the sis driver for the 486/133 (embedded systems), which does exactly the same thing. The behaviour is controlled by a sysctl variable, hw.dc_quick which defaults to 1. Set it to 0 to restore the old behaviour. After running a few experiments (in userland, though) I am convinced that doing the m_devget() is detrimental to performance in almost all cases. Even if your CPU has degraded performance with misaligned data, the bcopy() in the driver has the same overhead due to misaligment as the one that you save in the uiomove(), plus you do one extra copy and pollute the cache. But more often than not, you do not even have to touch the payload, e.g. when you are forwarding packets, and even in the often-cited case of NFS, you often end up passing a pointer to the payload to the disk controller. In any case, you can play with the sysctl variable to toggle between the two behaviours, and see if it makes a difference. MFC-after: 3 days
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/dc/if_dc.c71
-rw-r--r--sys/pci/if_dc.c71
2 files changed, 80 insertions, 62 deletions
diff --git a/sys/dev/dc/if_dc.c b/sys/dev/dc/if_dc.c
index 4592c29..81cb936 100644
--- a/sys/dev/dc/if_dc.c
+++ b/sys/dev/dc/if_dc.c
@@ -96,6 +96,7 @@
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/socket.h>
+#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_arp.h>
@@ -297,6 +298,11 @@ static driver_t dc_driver = {
};
static devclass_t dc_devclass;
+#ifdef __i386__
+static int dc_quick=1;
+SYSCTL_INT(_hw, OID_AUTO, dc_quick, CTLFLAG_RW,
+ &dc_quick,0,"do not mdevget in dc driver");
+#endif
DRIVER_MODULE(if_dc, cardbus, dc_driver, dc_devclass, 0, 0);
DRIVER_MODULE(if_dc, pci, dc_driver, dc_devclass, 0, 0);
@@ -2206,18 +2212,13 @@ static int dc_list_tx_init(sc)
{
struct dc_chain_data *cd;
struct dc_list_data *ld;
- int i;
+ int i, nexti;
cd = &sc->dc_cdata;
ld = sc->dc_ldata;
for (i = 0; i < DC_TX_LIST_CNT; i++) {
- if (i == (DC_TX_LIST_CNT - 1)) {
- ld->dc_tx_list[i].dc_next =
- vtophys(&ld->dc_tx_list[0]);
- } else {
- ld->dc_tx_list[i].dc_next =
- vtophys(&ld->dc_tx_list[i + 1]);
- }
+ nexti = (i == (DC_TX_LIST_CNT - 1)) ? 0 : i+1 ;
+ ld->dc_tx_list[i].dc_next = vtophys(&ld->dc_tx_list[nexti]);
cd->dc_tx_chain[i] = NULL;
ld->dc_tx_list[i].dc_data = 0;
ld->dc_tx_list[i].dc_ctl = 0;
@@ -2239,7 +2240,7 @@ static int dc_list_rx_init(sc)
{
struct dc_chain_data *cd;
struct dc_list_data *ld;
- int i;
+ int i, nexti;
cd = &sc->dc_cdata;
ld = sc->dc_ldata;
@@ -2247,13 +2248,8 @@ static int dc_list_rx_init(sc)
for (i = 0; i < DC_RX_LIST_CNT; i++) {
if (dc_newbuf(sc, i, NULL) == ENOBUFS)
return(ENOBUFS);
- if (i == (DC_RX_LIST_CNT - 1)) {
- ld->dc_rx_list[i].dc_next =
- vtophys(&ld->dc_rx_list[0]);
- } else {
- ld->dc_rx_list[i].dc_next =
- vtophys(&ld->dc_rx_list[i + 1]);
- }
+ nexti = (i == (DC_RX_LIST_CNT - 1)) ? 0 : i+1 ;
+ ld->dc_rx_list[i].dc_next = vtophys(&ld->dc_rx_list[nexti]);
}
cd->dc_rx_prod = 0;
@@ -2276,16 +2272,11 @@ static int dc_newbuf(sc, i, m)
if (m == NULL) {
MGETHDR(m_new, M_DONTWAIT, MT_DATA);
- if (m_new == NULL) {
- printf("dc%d: no memory for rx list "
- "-- packet dropped!\n", sc->dc_unit);
+ if (m_new == NULL)
return(ENOBUFS);
- }
MCLGET(m_new, M_DONTWAIT);
if (!(m_new->m_flags & M_EXT)) {
- printf("dc%d: no memory for rx list "
- "-- packet dropped!\n", sc->dc_unit);
m_freem(m_new);
return(ENOBUFS);
}
@@ -2479,7 +2470,6 @@ static void dc_rxeof(sc)
i = sc->dc_cdata.dc_rx_prod;
while(!(sc->dc_ldata->dc_rx_list[i].dc_status & DC_RXSTAT_OWN)) {
- struct mbuf *m0 = NULL;
cur_rx = &sc->dc_ldata->dc_rx_list[i];
rxstat = cur_rx->dc_status;
@@ -2524,16 +2514,35 @@ static void dc_rxeof(sc)
/* No errors; receive the packet. */
total_len -= ETHER_CRC_LEN;
+#ifdef __i386__
+ /*
+ * On the x86 we do not have alignment problems, so try to
+ * allocate a new buffer for the receive ring, and pass up
+ * the one where the packet is already, saving the expensive
+ * copy done in m_devget().
+ * If we are on an architecture with alignment problems, or
+ * if the allocation fails, then use m_devget and leave the
+ * existing buffer in the receive ring.
+ */
+ if (dc_quick && dc_newbuf(sc, i, NULL) == 0) {
+ m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.len = m->m_len = total_len;
+ DC_INC(i, DC_RX_LIST_CNT);
+ } else
+#endif
+ {
+ struct mbuf *m0;
- m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp,
- NULL);
- dc_newbuf(sc, i, m);
- DC_INC(i, DC_RX_LIST_CNT);
- if (m0 == NULL) {
- ifp->if_ierrors++;
- continue;
+ m0 = m_devget(mtod(m, char *), total_len,
+ ETHER_ALIGN, ifp, NULL);
+ dc_newbuf(sc, i, m);
+ DC_INC(i, DC_RX_LIST_CNT);
+ if (m0 == NULL) {
+ ifp->if_ierrors++;
+ continue;
+ }
+ m = m0;
}
- m = m0;
ifp->if_ipackets++;
eh = mtod(m, struct ether_header *);
diff --git a/sys/pci/if_dc.c b/sys/pci/if_dc.c
index 4592c29..81cb936 100644
--- a/sys/pci/if_dc.c
+++ b/sys/pci/if_dc.c
@@ -96,6 +96,7 @@
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/socket.h>
+#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_arp.h>
@@ -297,6 +298,11 @@ static driver_t dc_driver = {
};
static devclass_t dc_devclass;
+#ifdef __i386__
+static int dc_quick=1;
+SYSCTL_INT(_hw, OID_AUTO, dc_quick, CTLFLAG_RW,
+ &dc_quick,0,"do not mdevget in dc driver");
+#endif
DRIVER_MODULE(if_dc, cardbus, dc_driver, dc_devclass, 0, 0);
DRIVER_MODULE(if_dc, pci, dc_driver, dc_devclass, 0, 0);
@@ -2206,18 +2212,13 @@ static int dc_list_tx_init(sc)
{
struct dc_chain_data *cd;
struct dc_list_data *ld;
- int i;
+ int i, nexti;
cd = &sc->dc_cdata;
ld = sc->dc_ldata;
for (i = 0; i < DC_TX_LIST_CNT; i++) {
- if (i == (DC_TX_LIST_CNT - 1)) {
- ld->dc_tx_list[i].dc_next =
- vtophys(&ld->dc_tx_list[0]);
- } else {
- ld->dc_tx_list[i].dc_next =
- vtophys(&ld->dc_tx_list[i + 1]);
- }
+ nexti = (i == (DC_TX_LIST_CNT - 1)) ? 0 : i+1 ;
+ ld->dc_tx_list[i].dc_next = vtophys(&ld->dc_tx_list[nexti]);
cd->dc_tx_chain[i] = NULL;
ld->dc_tx_list[i].dc_data = 0;
ld->dc_tx_list[i].dc_ctl = 0;
@@ -2239,7 +2240,7 @@ static int dc_list_rx_init(sc)
{
struct dc_chain_data *cd;
struct dc_list_data *ld;
- int i;
+ int i, nexti;
cd = &sc->dc_cdata;
ld = sc->dc_ldata;
@@ -2247,13 +2248,8 @@ static int dc_list_rx_init(sc)
for (i = 0; i < DC_RX_LIST_CNT; i++) {
if (dc_newbuf(sc, i, NULL) == ENOBUFS)
return(ENOBUFS);
- if (i == (DC_RX_LIST_CNT - 1)) {
- ld->dc_rx_list[i].dc_next =
- vtophys(&ld->dc_rx_list[0]);
- } else {
- ld->dc_rx_list[i].dc_next =
- vtophys(&ld->dc_rx_list[i + 1]);
- }
+ nexti = (i == (DC_RX_LIST_CNT - 1)) ? 0 : i+1 ;
+ ld->dc_rx_list[i].dc_next = vtophys(&ld->dc_rx_list[nexti]);
}
cd->dc_rx_prod = 0;
@@ -2276,16 +2272,11 @@ static int dc_newbuf(sc, i, m)
if (m == NULL) {
MGETHDR(m_new, M_DONTWAIT, MT_DATA);
- if (m_new == NULL) {
- printf("dc%d: no memory for rx list "
- "-- packet dropped!\n", sc->dc_unit);
+ if (m_new == NULL)
return(ENOBUFS);
- }
MCLGET(m_new, M_DONTWAIT);
if (!(m_new->m_flags & M_EXT)) {
- printf("dc%d: no memory for rx list "
- "-- packet dropped!\n", sc->dc_unit);
m_freem(m_new);
return(ENOBUFS);
}
@@ -2479,7 +2470,6 @@ static void dc_rxeof(sc)
i = sc->dc_cdata.dc_rx_prod;
while(!(sc->dc_ldata->dc_rx_list[i].dc_status & DC_RXSTAT_OWN)) {
- struct mbuf *m0 = NULL;
cur_rx = &sc->dc_ldata->dc_rx_list[i];
rxstat = cur_rx->dc_status;
@@ -2524,16 +2514,35 @@ static void dc_rxeof(sc)
/* No errors; receive the packet. */
total_len -= ETHER_CRC_LEN;
+#ifdef __i386__
+ /*
+ * On the x86 we do not have alignment problems, so try to
+ * allocate a new buffer for the receive ring, and pass up
+ * the one where the packet is already, saving the expensive
+ * copy done in m_devget().
+ * If we are on an architecture with alignment problems, or
+ * if the allocation fails, then use m_devget and leave the
+ * existing buffer in the receive ring.
+ */
+ if (dc_quick && dc_newbuf(sc, i, NULL) == 0) {
+ m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.len = m->m_len = total_len;
+ DC_INC(i, DC_RX_LIST_CNT);
+ } else
+#endif
+ {
+ struct mbuf *m0;
- m0 = m_devget(mtod(m, char *), total_len, ETHER_ALIGN, ifp,
- NULL);
- dc_newbuf(sc, i, m);
- DC_INC(i, DC_RX_LIST_CNT);
- if (m0 == NULL) {
- ifp->if_ierrors++;
- continue;
+ m0 = m_devget(mtod(m, char *), total_len,
+ ETHER_ALIGN, ifp, NULL);
+ dc_newbuf(sc, i, m);
+ DC_INC(i, DC_RX_LIST_CNT);
+ if (m0 == NULL) {
+ ifp->if_ierrors++;
+ continue;
+ }
+ m = m0;
}
- m = m0;
ifp->if_ipackets++;
eh = mtod(m, struct ether_header *);
OpenPOWER on IntegriCloud