summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Buesch <mb@bu3sch.de>2009-02-19 23:45:43 +0100
committerJohn W. Linville <linville@tuxdriver.com>2009-02-27 14:52:48 -0500
commitbdceeb2dad5c8487ffeb4d0fa949686e4350ec7f (patch)
treea980d4cb770bf47a939206fc9fbb92deb3232938
parent8eccb53f1b858c9bd0b745f839174725b76508ec (diff)
downloadop-kernel-dev-bdceeb2dad5c8487ffeb4d0fa949686e4350ec7f.zip
op-kernel-dev-bdceeb2dad5c8487ffeb4d0fa949686e4350ec7f.tar.gz
b43: Optimize DMA buffers
In the old days we used one slot per frame. But when we changed that to 2, we didn't raise the overall slot count. Which resulted in an effective division of two to the number of slots. Double the number of TX slots, so we have an effective hardware queue of 128 frames per QoS queue. Also optimize the TX header cache handling. We don't need a cached TX header for slots that will never carry an actual header. So we reduce the memory consumption of the cache by 50%. So as a net result we end up with more or less the same memory usage before and after this patch (except a few tiny meta structures), but have twice the number of TX slots available. Signed-off-by: Michael Buesch <mb@bu3sch.de> Signed-off-by: John W. Linville <linville@tuxdriver.com>
-rw-r--r--drivers/net/wireless/b43/dma.c38
-rw-r--r--drivers/net/wireless/b43/dma.h4
2 files changed, 25 insertions, 17 deletions
diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index 3824604..189b2ec 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -41,6 +41,12 @@
#include <asm/div64.h>
+/* Required number of TX DMA slots per TX frame.
+ * This currently is 2, because we put the header and the ieee80211 frame
+ * into separate slots. */
+#define TX_SLOTS_PER_FRAME 2
+
+
/* 32bit DMA ops. */
static
struct b43_dmadesc_generic *op32_idx2desc(struct b43_dmaring *ring,
@@ -574,12 +580,11 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
return -ENOMEM;
dmaaddr = map_descbuffer(ring, skb->data,
ring->rx_buffersize, 0);
- }
-
- if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) {
- b43err(ring->dev->wl, "RX DMA buffer allocation failed\n");
- dev_kfree_skb_any(skb);
- return -EIO;
+ if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) {
+ b43err(ring->dev->wl, "RX DMA buffer allocation failed\n");
+ dev_kfree_skb_any(skb);
+ return -EIO;
+ }
}
meta->skb = skb;
@@ -837,7 +842,7 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
#endif
if (for_tx) {
- ring->txhdr_cache = kcalloc(ring->nr_slots,
+ ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME,
b43_txhdr_size(dev),
GFP_KERNEL);
if (!ring->txhdr_cache)
@@ -853,7 +858,7 @@ struct b43_dmaring *b43_setup_dmaring(struct b43_wldev *dev,
b43_txhdr_size(dev), 1)) {
/* ugh realloc */
kfree(ring->txhdr_cache);
- ring->txhdr_cache = kcalloc(ring->nr_slots,
+ ring->txhdr_cache = kcalloc(ring->nr_slots / TX_SLOTS_PER_FRAME,
b43_txhdr_size(dev),
GFP_KERNEL | GFP_DMA);
if (!ring->txhdr_cache)
@@ -1144,7 +1149,10 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
u16 cookie;
size_t hdrsize = b43_txhdr_size(ring->dev);
-#define SLOTS_PER_PACKET 2
+ /* Important note: If the number of used DMA slots per TX frame
+ * is changed here, the TX_SLOTS_PER_FRAME definition at the top of
+ * the file has to be updated, too!
+ */
old_top_slot = ring->current_slot;
old_used_slots = ring->used_slots;
@@ -1154,7 +1162,7 @@ static int dma_tx_fragment(struct b43_dmaring *ring,
desc = ops->idx2desc(ring, slot, &meta_hdr);
memset(meta_hdr, 0, sizeof(*meta_hdr));
- header = &(ring->txhdr_cache[slot * hdrsize]);
+ header = &(ring->txhdr_cache[(slot / TX_SLOTS_PER_FRAME) * hdrsize]);
cookie = generate_cookie(ring, slot);
err = b43_generate_txhdr(ring->dev, header,
skb->data, skb->len, info, cookie);
@@ -1308,7 +1316,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
* That would be a mac80211 bug. */
B43_WARN_ON(ring->stopped);
- if (unlikely(free_slots(ring) < SLOTS_PER_PACKET)) {
+ if (unlikely(free_slots(ring) < TX_SLOTS_PER_FRAME)) {
b43warn(dev->wl, "DMA queue overflow\n");
err = -ENOSPC;
goto out_unlock;
@@ -1332,7 +1340,7 @@ int b43_dma_tx(struct b43_wldev *dev, struct sk_buff *skb)
goto out_unlock;
}
ring->nr_tx_packets++;
- if ((free_slots(ring) < SLOTS_PER_PACKET) ||
+ if ((free_slots(ring) < TX_SLOTS_PER_FRAME) ||
should_inject_overflow(ring)) {
/* This TX ring is full. */
ieee80211_stop_queue(dev->wl->hw, skb_get_queue_mapping(skb));
@@ -1416,7 +1424,7 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev,
}
dev->stats.last_tx = jiffies;
if (ring->stopped) {
- B43_WARN_ON(free_slots(ring) < SLOTS_PER_PACKET);
+ B43_WARN_ON(free_slots(ring) < TX_SLOTS_PER_FRAME);
ieee80211_wake_queue(dev->wl->hw, ring->queue_prio);
ring->stopped = 0;
if (b43_debug(dev, B43_DBG_DMAVERBOSE)) {
@@ -1439,8 +1447,8 @@ void b43_dma_get_tx_stats(struct b43_wldev *dev,
ring = select_ring_by_priority(dev, i);
spin_lock_irqsave(&ring->lock, flags);
- stats[i].len = ring->used_slots / SLOTS_PER_PACKET;
- stats[i].limit = ring->nr_slots / SLOTS_PER_PACKET;
+ stats[i].len = ring->used_slots / TX_SLOTS_PER_FRAME;
+ stats[i].limit = ring->nr_slots / TX_SLOTS_PER_FRAME;
stats[i].count = ring->nr_tx_packets;
spin_unlock_irqrestore(&ring->lock, flags);
}
diff --git a/drivers/net/wireless/b43/dma.h b/drivers/net/wireless/b43/dma.h
index 4ec24e8..05dde64 100644
--- a/drivers/net/wireless/b43/dma.h
+++ b/drivers/net/wireless/b43/dma.h
@@ -162,7 +162,7 @@ struct b43_dmadesc_generic {
#define B43_DMA0_RX_FRAMEOFFSET 30
/* DMA engine tuning knobs */
-#define B43_TXRING_SLOTS 128
+#define B43_TXRING_SLOTS 256
#define B43_RXRING_SLOTS 64
#define B43_DMA0_RX_BUFFERSIZE IEEE80211_MAX_FRAME_LEN
@@ -212,7 +212,7 @@ struct b43_dmaring {
void *descbase;
/* Meta data about all descriptors. */
struct b43_dmadesc_meta *meta;
- /* Cache of TX headers for each slot.
+ /* Cache of TX headers for each TX frame.
* This is to avoid an allocation on each TX.
* This is NULL for an RX ring.
*/
OpenPOWER on IntegriCloud