summaryrefslogtreecommitdiffstats
path: root/sys/dev
diff options
context:
space:
mode:
authornp <np@FreeBSD.org>2014-05-06 06:49:39 +0000
committernp <np@FreeBSD.org>2014-05-06 06:49:39 +0000
commitebd722c9b426d98268fb004c5ec8f676c912c3fa (patch)
tree2b44b65f9305bd1e879b68d770db42815486cdb2 /sys/dev
parentc4599dcf4454fdbbe5a70d7dc29c5ee3efa6a415 (diff)
downloadFreeBSD-src-ebd722c9b426d98268fb004c5ec8f676c912c3fa.zip
FreeBSD-src-ebd722c9b426d98268fb004c5ec8f676c912c3fa.tar.gz
MFC r263317, r263412, and r263451.
r263317: cxgbe(4): significant rx rework. - More flexible cluster size selection, including the ability to fall back to a safe cluster size (PAGE_SIZE from zone_jumbop by default) in case an allocation of a larger size fails. - A single get_fl_payload() function that assembles the payload into an mbuf chain for any kind of freelist. This replaces two variants: one for freelists with buffer packing enabled and another for those without. - Buffer packing with any sized cluster. It was limited to 4K clusters only before this change. - Enable buffer packing for TOE rx queues as well. - Statistics and tunables to go with all these changes. The driver's man page will be updated separately. r263412: cxgbe(4): if_iqdrops statistic should include tunnel congestion drops. r263451: cxgbe(4): man page updates.
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/cxgbe/adapter.h74
-rw-r--r--sys/dev/cxgbe/common/t4_hw.h1
-rw-r--r--sys/dev/cxgbe/t4_main.c18
-rw-r--r--sys/dev/cxgbe/t4_sge.c1130
4 files changed, 647 insertions, 576 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index 29e139b..9673834 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -128,10 +128,11 @@ enum {
RX_FL_ESIZE = EQ_ESIZE, /* 8 64bit addresses */
#if MJUMPAGESIZE != MCLBYTES
- FL_BUF_SIZES_MAX = 5, /* cluster, jumbop, jumbo9k, jumbo16k, extra */
+ SW_ZONE_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */
#else
- FL_BUF_SIZES_MAX = 4, /* cluster, jumbo9k, jumbo16k, extra */
+ SW_ZONE_SIZES = 3, /* cluster, jumbo9k, jumbo16k */
#endif
+ CL_METADATA_SIZE = CACHE_LINE_SIZE,
CTRL_EQ_QSIZE = 128,
@@ -235,15 +236,28 @@ struct port_info {
uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */
};
-struct fl_sdesc {
- bus_dmamap_t map;
- caddr_t cl;
- uint8_t tag_idx; /* the fl->tag entry this map comes from */
+/* Where the cluster came from, how it has been carved up. */
+struct cluster_layout {
+ int8_t zidx;
+ int8_t hwidx;
+ uint16_t region1; /* mbufs laid out within this region */
+ /* region2 is the DMA region */
+ uint16_t region3; /* cluster_metadata within this region */
+};
+
+struct cluster_metadata {
+ u_int refcount;
#ifdef INVARIANTS
- __be64 ba_hwtag;
+ struct fl_sdesc *sd; /* For debug only. Could easily be stale */
#endif
};
+struct fl_sdesc {
+ caddr_t cl;
+ uint8_t nmbuf;
+ struct cluster_layout cll;
+};
+
struct tx_desc {
__be64 flit[8];
};
@@ -362,17 +376,19 @@ struct sge_eq {
uint32_t unstalled; /* recovered from stall */
};
-struct fl_buf_info {
- u_int size;
- int type;
- int hwtag:4; /* tag in low 4 bits of the pa. */
- uma_zone_t zone;
+struct sw_zone_info {
+ uma_zone_t zone; /* zone that this cluster comes from */
+ int size; /* size of cluster: 2K, 4K, 9K, 16K, etc. */
+ int type; /* EXT_xxx type of the cluster */
+ int8_t head_hwidx;
+ int8_t tail_hwidx;
+};
+
+struct hw_buf_info {
+ int8_t zidx; /* backpointer to zone; -ve means unused */
+ int8_t next; /* next hwidx for this zone; -1 means no more */
+ int size;
};
-#define FL_BUF_SIZES(sc) (sc->sge.fl_buf_sizes)
-#define FL_BUF_SIZE(sc, x) (sc->sge.fl_buf_info[x].size)
-#define FL_BUF_TYPE(sc, x) (sc->sge.fl_buf_info[x].type)
-#define FL_BUF_HWTAG(sc, x) (sc->sge.fl_buf_info[x].hwtag)
-#define FL_BUF_ZONE(sc, x) (sc->sge.fl_buf_info[x].zone)
enum {
FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */
@@ -386,9 +402,8 @@ enum {
struct sge_fl {
bus_dma_tag_t desc_tag;
bus_dmamap_t desc_map;
- bus_dma_tag_t tag[FL_BUF_SIZES_MAX]; /* only first FL_BUF_SIZES(sc) are
- valid */
- uint8_t tag_idx;
+ struct cluster_layout cll_def; /* default refill zone, layout */
+ struct cluster_layout cll_alt; /* alternate refill zone, layout */
struct mtx fl_lock;
char lockname[16];
int flags;
@@ -405,9 +420,17 @@ struct sge_fl {
uint32_t needed; /* # of buffers needed to fill up fl. */
uint32_t lowat; /* # of buffers <= this means fl needs help */
uint32_t pending; /* # of bufs allocated since last doorbell */
- u_int dmamap_failed;
- struct mbuf *mstash[8];
TAILQ_ENTRY(sge_fl) link; /* All starving freelists */
+
+ struct mbuf *m0;
+ struct mbuf **pnext;
+ u_int remaining;
+
+ uint64_t mbuf_allocated;/* # of mbuf allocated from zone_mbuf */
+ uint64_t mbuf_inlined; /* # of mbuf created within clusters */
+ uint64_t cl_allocated; /* # of clusters allocated */
+ uint64_t cl_recycled; /* # of clusters recycled */
+ uint64_t cl_fast_recycled; /* # of clusters recycled (fast) */
};
/* txq: SGE egress queue + what's needed for Ethernet NIC */
@@ -541,8 +564,11 @@ struct sge {
struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */
struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */
- u_int fl_buf_sizes __aligned(CACHE_LINE_SIZE);
- struct fl_buf_info fl_buf_info[FL_BUF_SIZES_MAX];
+ int pack_boundary;
+ int8_t safe_hwidx1; /* may not have room for metadata */
+ int8_t safe_hwidx2; /* with room for metadata and maybe more */
+ struct sw_zone_info sw_zone_info[SW_ZONE_SIZES];
+ struct hw_buf_info hw_buf_info[SGE_FLBUF_SIZES];
};
struct rss_header;
diff --git a/sys/dev/cxgbe/common/t4_hw.h b/sys/dev/cxgbe/common/t4_hw.h
index b0b82bd..34f462c 100644
--- a/sys/dev/cxgbe/common/t4_hw.h
+++ b/sys/dev/cxgbe/common/t4_hw.h
@@ -87,6 +87,7 @@ enum {
SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */
SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */
SGE_MAX_IQ_SIZE = 65520,
+ SGE_FLBUF_SIZES = 16,
};
struct sge_qstat { /* data written to SGE queue status entries */
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index 3211c5d..4fddd35 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -494,6 +494,8 @@ CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS);
CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES);
+CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE);
+
static int
t4_probe(device_t dev)
{
@@ -4039,6 +4041,7 @@ static void
cxgbe_tick(void *arg)
{
struct port_info *pi = arg;
+ struct adapter *sc = pi->adapter;
struct ifnet *ifp = pi->ifp;
struct sge_txq *txq;
int i, drops;
@@ -4050,7 +4053,7 @@ cxgbe_tick(void *arg)
return; /* without scheduling another callout */
}
- t4_get_port_stats(pi->adapter, pi->tx_chan, s);
+ t4_get_port_stats(sc, pi->tx_chan, s);
ifp->if_opackets = s->tx_frames - s->tx_pause;
ifp->if_ipackets = s->rx_frames - s->rx_pause;
@@ -4061,6 +4064,19 @@ cxgbe_tick(void *arg)
ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
s->rx_trunc3;
+ for (i = 0; i < 4; i++) {
+ if (pi->rx_chan_map & (1 << i)) {
+ uint32_t v;
+
+ /*
+ * XXX: indirect reads from the same ADDR/DATA pair can
+ * race with each other.
+ */
+ t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v,
+ 1, A_TP_MIB_TNL_CNG_DROP_0 + i);
+ ifp->if_iqdrops += v;
+ }
+ }
drops = s->tx_drop;
for_each_txq(pi, i, txq)
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index a9bdb0e..2b2a689 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kdb.h>
#include <sys/malloc.h>
#include <sys/queue.h>
+#include <sys/sbuf.h>
#include <sys/taskqueue.h>
#include <sys/time.h>
#include <sys/sysctl.h>
@@ -51,6 +52,8 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <machine/md_var.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
#include "common/common.h"
#include "common/t4_regs.h"
@@ -123,6 +126,27 @@ static int t4_fl_pack;
static int t5_fl_pack;
TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack);
+/*
+ * Allow the driver to create mbuf(s) in a cluster allocated for rx.
+ * 0: never; always allocate mbufs from the zone_mbuf UMA zone.
+ * 1: ok to create mbuf(s) within a cluster if there is room.
+ */
+static int allow_mbufs_in_cluster = 1;
+TUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster);
+
+/*
+ * Largest rx cluster size that the driver is allowed to allocate.
+ */
+static int largest_rx_cluster = MJUM16BYTES;
+TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster);
+
+/*
+ * Size of cluster allocation that's most likely to succeed. The driver will
+ * fall back to this size if it fails to allocate clusters larger than this.
+ */
+static int safest_rx_cluster = PAGE_SIZE;
+TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster);
+
/* Used to track coalesced tx work request */
struct txpkts {
uint64_t *flitp; /* ptr to flit where next pkt should start */
@@ -139,9 +163,7 @@ struct sgl {
};
static int service_iq(struct sge_iq *, int);
-static struct mbuf *get_fl_payload1(struct adapter *, struct sge_fl *, uint32_t,
- int *);
-static struct mbuf *get_fl_payload2(struct adapter *, struct sge_fl *, uint32_t,
+static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
int *);
static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
@@ -157,6 +179,8 @@ static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
int, int);
static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
+static void add_fl_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *,
+ struct sge_fl *);
static int alloc_fwq(struct adapter *);
static int free_fwq(struct adapter *);
static int alloc_mgmtq(struct adapter *);
@@ -190,7 +214,8 @@ static int refill_fl(struct adapter *, struct sge_fl *, int);
static void refill_sfl(void *);
static int alloc_fl_sdesc(struct sge_fl *);
static void free_fl_sdesc(struct adapter *, struct sge_fl *);
-static void set_fl_tag_idx(struct adapter *, struct sge_fl *, int);
+static void find_best_refill_source(struct adapter *, struct sge_fl *, int);
+static void find_safe_refill_source(struct adapter *, struct sge_fl *);
static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
@@ -215,6 +240,7 @@ static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
struct mbuf *);
static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
+static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS);
/*
* Called on MOD_LOAD. Validates and calculates the SGE tunables.
@@ -263,7 +289,7 @@ t4_sge_modload(void)
/* T5's pack boundary is independent of the pad boundary. */
if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
!powerof2(fl_pack))
- t5_fl_pack = max(pad, 64);
+ t5_fl_pack = max(pad, CACHE_LINE_SIZE);
else
t5_fl_pack = fl_pack;
@@ -312,14 +338,18 @@ t4_tweak_chip_settings(struct adapter *sc)
int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
- int sw_flbuf_sizes[] = {
+ static int sge_flbuf_sizes[] = {
MCLBYTES,
#if MJUMPAGESIZE != MCLBYTES
MJUMPAGESIZE,
+ MJUMPAGESIZE - CL_METADATA_SIZE,
+ MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE,
#endif
MJUM9BYTES,
MJUM16BYTES,
- MJUMPAGESIZE - MSIZE
+ MCLBYTES - MSIZE - CL_METADATA_SIZE,
+ MJUM9BYTES - CL_METADATA_SIZE,
+ MJUM16BYTES - CL_METADATA_SIZE,
};
KASSERT(sc->flags & MASTER_PF,
@@ -357,9 +387,11 @@ t4_tweak_chip_settings(struct adapter *sc)
V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
- for (i = 0; i < min(nitems(sw_flbuf_sizes), 16); i++) {
+ KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES,
+ ("%s: hw buffer size table too big", __func__));
+ for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) {
t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
- sw_flbuf_sizes[i]);
+ sge_flbuf_sizes[i]);
}
v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
@@ -414,6 +446,18 @@ t4_tweak_chip_settings(struct adapter *sc)
}
/*
+ * SGE wants the buffer to be at least 64B and then a multiple of the pad
+ * boundary or 16, whichever is greater.
+ */
+static inline int
+hwsz_ok(int hwsz)
+{
+ int mask = max(fl_pad, 16) - 1;
+
+ return (hwsz >= 64 && (hwsz & mask) == 0);
+}
+
+/*
* XXX: driver really should be able to deal with unexpected settings.
*/
int
@@ -423,7 +467,7 @@ t4_read_chip_settings(struct adapter *sc)
int i, j, n, rc = 0;
uint32_t m, v, r;
uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
- uint32_t sge_flbuf_sizes[16], sw_flbuf_sizes[] = {
+ static int sw_buf_sizes[] = { /* Sorted by size */
MCLBYTES,
#if MJUMPAGESIZE != MCLBYTES
MJUMPAGESIZE,
@@ -431,6 +475,8 @@ t4_read_chip_settings(struct adapter *sc)
MJUM9BYTES,
MJUM16BYTES
};
+ struct sw_zone_info *swz, *safe_swz;
+ struct hw_buf_info *hwb;
m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
@@ -461,6 +507,7 @@ t4_read_chip_settings(struct adapter *sc)
rc = EINVAL;
}
}
+ s->pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack;
v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
@@ -476,45 +523,93 @@ t4_read_chip_settings(struct adapter *sc)
rc = EINVAL;
}
- /*
- * Make a list of SGE FL buffer sizes programmed in the chip and tally
- * it with the FL buffer sizes that we'd like to use.
- */
- n = 0;
- for (i = 0; i < nitems(sge_flbuf_sizes); i++) {
+ /* Filter out unusable hw buffer sizes entirely (mark with -2). */
+ hwb = &s->hw_buf_info[0];
+ for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) {
r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
- sge_flbuf_sizes[i] = r;
- if (r == MJUMPAGESIZE - MSIZE &&
- (sc->flags & BUF_PACKING_OK) == 0) {
- sc->flags |= BUF_PACKING_OK;
- FL_BUF_HWTAG(sc, n) = i;
- FL_BUF_SIZE(sc, n) = MJUMPAGESIZE - MSIZE;
- FL_BUF_TYPE(sc, n) = m_gettype(MJUMPAGESIZE);
- FL_BUF_ZONE(sc, n) = m_getzone(MJUMPAGESIZE);
- n++;
- }
+ hwb->size = r;
+ hwb->zidx = hwsz_ok(r) ? -1 : -2;
+ hwb->next = -1;
}
- for (i = 0; i < nitems(sw_flbuf_sizes); i++) {
- for (j = 0; j < nitems(sge_flbuf_sizes); j++) {
- if (sw_flbuf_sizes[i] != sge_flbuf_sizes[j])
+
+ /*
+ * Create a sorted list in decreasing order of hw buffer sizes (and so
+ * increasing order of spare area) for each software zone.
+ */
+ n = 0; /* no usable buffer size to begin with */
+ swz = &s->sw_zone_info[0];
+ safe_swz = NULL;
+ for (i = 0; i < SW_ZONE_SIZES; i++, swz++) {
+ int8_t head = -1, tail = -1;
+
+ swz->size = sw_buf_sizes[i];
+ swz->zone = m_getzone(swz->size);
+ swz->type = m_gettype(swz->size);
+
+ if (swz->size == safest_rx_cluster)
+ safe_swz = swz;
+
+ hwb = &s->hw_buf_info[0];
+ for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) {
+ if (hwb->zidx != -1 || hwb->size > swz->size)
continue;
- FL_BUF_HWTAG(sc, n) = j;
- FL_BUF_SIZE(sc, n) = sw_flbuf_sizes[i];
- FL_BUF_TYPE(sc, n) = m_gettype(sw_flbuf_sizes[i]);
- FL_BUF_ZONE(sc, n) = m_getzone(sw_flbuf_sizes[i]);
+ hwb->zidx = i;
+ if (head == -1)
+ head = tail = j;
+ else if (hwb->size < s->hw_buf_info[tail].size) {
+ s->hw_buf_info[tail].next = j;
+ tail = j;
+ } else {
+ int8_t *cur;
+ struct hw_buf_info *t;
+
+ for (cur = &head; *cur != -1; cur = &t->next) {
+ t = &s->hw_buf_info[*cur];
+ if (hwb->size == t->size) {
+ hwb->zidx = -2;
+ break;
+ }
+ if (hwb->size > t->size) {
+ hwb->next = *cur;
+ *cur = j;
+ break;
+ }
+ }
+ }
+ }
+ swz->head_hwidx = head;
+ swz->tail_hwidx = tail;
+
+ if (tail != -1) {
n++;
- break;
+ if (swz->size - s->hw_buf_info[tail].size >=
+ CL_METADATA_SIZE)
+ sc->flags |= BUF_PACKING_OK;
}
}
if (n == 0) {
device_printf(sc->dev, "no usable SGE FL buffer size.\n");
rc = EINVAL;
- } else if (n == 1 && (sc->flags & BUF_PACKING_OK)) {
- device_printf(sc->dev,
- "no usable SGE FL buffer size when not packing buffers.\n");
- rc = EINVAL;
}
- FL_BUF_SIZES(sc) = n;
+
+ s->safe_hwidx1 = -1;
+ s->safe_hwidx2 = -1;
+ if (safe_swz != NULL) {
+ s->safe_hwidx1 = safe_swz->head_hwidx;
+ for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) {
+ int spare;
+
+ hwb = &s->hw_buf_info[i];
+ spare = safe_swz->size - hwb->size;
+ if (spare < CL_METADATA_SIZE)
+ continue;
+ if (s->safe_hwidx2 == -1 ||
+ spare == CL_METADATA_SIZE + MSIZE)
+ s->safe_hwidx2 = i;
+ if (spare >= CL_METADATA_SIZE + MSIZE)
+ break;
+ }
+ }
r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
s->counter_val[0] = G_THRESHOLD_0(r);
@@ -626,6 +721,10 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
struct sysctl_oid_list *children)
{
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes",
+ CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A",
+ "freelist buffer sizes");
+
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD,
NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)");
@@ -643,8 +742,7 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
"pack multiple frames in one fl buffer");
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD,
- NULL, is_t5(sc) ? t5_fl_pack : t4_fl_pack,
- "payload pack boundary (bytes)");
+ NULL, sc->sge.pack_boundary, "payload pack boundary (bytes)");
}
int
@@ -764,7 +862,7 @@ port_intr_iq(struct port_info *pi, int idx)
#ifdef TCP_OFFLOAD
if (sc->flags & INTR_DIRECT) {
idx %= pi->nrxq + pi->nofldrxq;
-
+
if (idx >= pi->nrxq) {
idx -= pi->nrxq;
iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq;
@@ -795,29 +893,28 @@ port_intr_iq(struct port_info *pi, int idx)
return (iq);
}
+/* Maximum payload that can be delivered with a single iq descriptor */
static inline int
-mtu_to_bufsize(int mtu)
+mtu_to_max_payload(struct adapter *sc, int mtu, const int toe)
{
- int bufsize;
-
- /* large enough for a frame even when VLAN extraction is disabled */
- bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu;
- bufsize = roundup2(bufsize + fl_pktshift, fl_pad);
-
- return (bufsize);
-}
+ int payload;
#ifdef TCP_OFFLOAD
-static inline int
-mtu_to_bufsize_toe(struct adapter *sc, int mtu)
-{
-
- if (sc->tt.rx_coalesce)
- return (G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)));
+ if (toe) {
+ payload = sc->tt.rx_coalesce ?
+ G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu;
+ } else {
+#endif
+ /* large enough even when hw VLAN extraction is disabled */
+ payload = fl_pktshift + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
+ mtu;
+#ifdef TCP_OFFLOAD
+ }
+#endif
+ payload = roundup2(payload, fl_pad);
- return (mtu);
+ return (payload);
}
-#endif
int
t4_setup_port_queues(struct port_info *pi)
@@ -836,7 +933,7 @@ t4_setup_port_queues(struct port_info *pi)
struct ifnet *ifp = pi->ifp;
struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
- int bufsize, pack;
+ int maxp, pack, mtu = ifp->if_mtu;
oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
NULL, "rx queues");
@@ -857,7 +954,7 @@ t4_setup_port_queues(struct port_info *pi)
* a) initialize iq and fl
* b) allocate queue iff it will take direct interrupts.
*/
- bufsize = mtu_to_bufsize(ifp->if_mtu);
+ maxp = mtu_to_max_payload(sc, mtu, 0);
pack = enable_buffer_packing(sc);
for_each_rxq(pi, i, rxq) {
@@ -866,7 +963,7 @@ t4_setup_port_queues(struct port_info *pi)
snprintf(name, sizeof(name), "%s rxq%d-fl",
device_get_nameunit(pi->dev), i);
- init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, bufsize, pack, name);
+ init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, pack, name);
if (sc->flags & INTR_DIRECT
#ifdef TCP_OFFLOAD
@@ -882,8 +979,7 @@ t4_setup_port_queues(struct port_info *pi)
}
#ifdef TCP_OFFLOAD
- bufsize = mtu_to_bufsize_toe(sc, ifp->if_mtu);
- pack = 0; /* XXX: think about this some more */
+ maxp = mtu_to_max_payload(sc, mtu, 1);
for_each_ofld_rxq(pi, i, ofld_rxq) {
init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
@@ -891,8 +987,7 @@ t4_setup_port_queues(struct port_info *pi)
snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
device_get_nameunit(pi->dev), i);
- init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, bufsize, pack,
- name);
+ init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, pack, name);
if (sc->flags & INTR_DIRECT ||
(sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
@@ -1169,10 +1264,7 @@ service_iq(struct sge_iq *iq, int budget)
("%s: data for an iq (%p) with no freelist",
__func__, iq));
- m0 = fl->flags & FL_BUF_PACKING ?
- get_fl_payload1(sc, fl, lq, &fl_bufs_used) :
- get_fl_payload2(sc, fl, lq, &fl_bufs_used);
-
+ m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
if (__predict_false(m0 == NULL))
goto process_iql;
#ifdef T4_PKT_TIMESTAMP
@@ -1245,6 +1337,14 @@ service_iq(struct sge_iq *iq, int budget)
break;
}
+ if (fl_bufs_used >= 16) {
+ FL_LOCK(fl);
+ fl->needed += fl_bufs_used;
+ refill_fl(sc, fl, 32);
+ FL_UNLOCK(fl);
+ fl_bufs_used = 0;
+ }
+
iq_next(iq);
if (++ndescs == limit) {
t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
@@ -1261,14 +1361,6 @@ service_iq(struct sge_iq *iq, int budget)
}
#endif
- if (fl_bufs_used > 0) {
- FL_LOCK(fl);
- fl->needed += fl_bufs_used;
- refill_fl(sc, fl, fl->cap / 8);
- FL_UNLOCK(fl);
- fl_bufs_used = 0;
- }
-
if (budget)
return (EINPROGRESS);
}
@@ -1311,7 +1403,7 @@ process_iql:
FL_LOCK(fl);
fl->needed += fl_bufs_used;
- starved = refill_fl(sc, fl, fl->cap / 4);
+ starved = refill_fl(sc, fl, 64);
FL_UNLOCK(fl);
if (__predict_false(starved != 0))
add_fl_to_sfl(sc, fl);
@@ -1320,74 +1412,28 @@ process_iql:
return (0);
}
-static int
-fill_mbuf_stash(struct sge_fl *fl)
-{
- int i;
-
- for (i = 0; i < nitems(fl->mstash); i++) {
- if (fl->mstash[i] == NULL) {
- struct mbuf *m;
- if ((m = m_get(M_NOWAIT, MT_NOINIT)) == NULL)
- return (ENOBUFS);
- fl->mstash[i] = m;
- }
- }
- return (0);
-}
-
-static struct mbuf *
-get_mbuf_from_stash(struct sge_fl *fl)
+static inline int
+cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll)
{
- int i;
-
- for (i = 0; i < nitems(fl->mstash); i++) {
- if (fl->mstash[i] != NULL) {
- struct mbuf *m;
+ int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0;
- m = fl->mstash[i];
- fl->mstash[i] = NULL;
- return (m);
- } else
- fl->mstash[i] = m_get(M_NOWAIT, MT_NOINIT);
- }
+ if (rc)
+ MPASS(cll->region3 >= CL_METADATA_SIZE);
- return (m_get(M_NOWAIT, MT_NOINIT));
+ return (rc);
}
-static void
-return_mbuf_to_stash(struct sge_fl *fl, struct mbuf *m)
+static inline struct cluster_metadata *
+cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll,
+ caddr_t cl)
{
- int i;
- if (m == NULL)
- return;
+ if (cl_has_metadata(fl, cll)) {
+ struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
- for (i = 0; i < nitems(fl->mstash); i++) {
- if (fl->mstash[i] == NULL) {
- fl->mstash[i] = m;
- return;
- }
+ return ((struct cluster_metadata *)(cl + swz->size) - 1);
}
- m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
- m_free(m);
-}
-
-/* buf can be any address within the buffer */
-static inline u_int *
-find_buf_refcnt(caddr_t buf)
-{
- uintptr_t ptr = (uintptr_t)buf;
-
- return ((u_int *)((ptr & ~(MJUMPAGESIZE - 1)) + MSIZE - sizeof(u_int)));
-}
-
-static inline struct mbuf *
-find_buf_mbuf(caddr_t buf)
-{
- uintptr_t ptr = (uintptr_t)buf;
-
- return ((struct mbuf *)(ptr & ~(MJUMPAGESIZE - 1)));
+ return (NULL);
}
static int
@@ -1395,179 +1441,117 @@ rxb_free(struct mbuf *m, void *arg1, void *arg2)
{
uma_zone_t zone = arg1;
caddr_t cl = arg2;
-#ifdef notyet
- u_int refcount;
- refcount = *find_buf_refcnt(cl);
- KASSERT(refcount == 0, ("%s: cl %p refcount is %u", __func__,
- cl - MSIZE, refcount));
-#endif
- cl -= MSIZE;
uma_zfree(zone, cl);
return (EXT_FREE_OK);
}
+/*
+ * The mbuf returned by this function could be allocated from zone_mbuf or
+ * constructed in spare room in the cluster.
+ *
+ * The mbuf carries the payload in one of these ways
+ * a) frame inside the mbuf (mbuf from zone_mbuf)
+ * b) m_cljset (for clusters without metadata) zone_mbuf
+ * c) m_extaddref (cluster with metadata) inline mbuf
+ * d) m_extaddref (cluster with metadata) zone_mbuf
+ */
static struct mbuf *
-get_fl_payload1(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
- int *fl_bufs_used)
+get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags)
{
- struct mbuf *m0, *m;
+ struct mbuf *m;
struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
- unsigned int nbuf, len;
- int pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack;
-
- /*
- * No assertion for the fl lock because we don't need it. This routine
- * is called only from the rx interrupt handler and it only updates
- * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be
- * updated in the rx interrupt handler or the starvation helper routine.
- * That's why code that manipulates fl->pidx/fl->needed needs the fl
- * lock but this routine does not).
- */
+ struct cluster_layout *cll = &sd->cll;
+ struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
+ struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx];
+ struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl);
+ int len, padded_len;
+ caddr_t payload;
- KASSERT(fl->flags & FL_BUF_PACKING,
- ("%s: buffer packing disabled for fl %p", __func__, fl));
+ len = min(total, hwb->size - fl->rx_offset);
+ padded_len = roundup2(len, fl_pad);
+ payload = sd->cl + cll->region1 + fl->rx_offset;
- len = G_RSPD_LEN(len_newbuf);
-
- if ((len_newbuf & F_RSPD_NEWBUF) == 0) {
- KASSERT(fl->rx_offset > 0,
- ("%s: packed frame but driver at offset=0", __func__));
+ if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) {
- /* A packed frame is guaranteed to fit entirely in this buf. */
- KASSERT(FL_BUF_SIZE(sc, sd->tag_idx) - fl->rx_offset >= len,
- ("%s: packing error. bufsz=%u, offset=%u, len=%u",
- __func__, FL_BUF_SIZE(sc, sd->tag_idx), fl->rx_offset,
- len));
+ /*
+ * Copy payload into a freshly allocated mbuf.
+ */
- m0 = get_mbuf_from_stash(fl);
- if (m0 == NULL ||
- m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR) != 0) {
- return_mbuf_to_stash(fl, m0);
+ m = flags & M_PKTHDR ?
+ m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
return (NULL);
- }
-
- bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
- BUS_DMASYNC_POSTREAD);
- if (sc->sc_do_rxcopy && (len < RX_COPY_THRESHOLD)) {
+ fl->mbuf_allocated++;
#ifdef T4_PKT_TIMESTAMP
- /* Leave room for a timestamp */
- m0->m_data += 8;
+ /* Leave room for a timestamp */
+ m->m_data += 8;
#endif
- bcopy(sd->cl + fl->rx_offset, mtod(m0, caddr_t), len);
- m0->m_pkthdr.len = len;
- m0->m_len = len;
- } else {
- m0->m_pkthdr.len = len;
- m0->m_len = len;
- m_extaddref(m0, sd->cl + fl->rx_offset,
- roundup2(m0->m_len, fl_pad),
- find_buf_refcnt(sd->cl), rxb_free,
- FL_BUF_ZONE(sc, sd->tag_idx), sd->cl);
- }
- fl->rx_offset += len;
- fl->rx_offset = roundup2(fl->rx_offset, fl_pad);
- fl->rx_offset = roundup2(fl->rx_offset, pack_boundary);
- if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) {
- fl->rx_offset = 0;
- (*fl_bufs_used) += 1;
- if (__predict_false(++fl->cidx == fl->cap))
- fl->cidx = 0;
- }
+ /* copy data to mbuf */
+ bcopy(payload, mtod(m, caddr_t), len);
- return (m0);
- }
+ } else if (sd->nmbuf * MSIZE < cll->region1) {
- KASSERT(len_newbuf & F_RSPD_NEWBUF,
- ("%s: only new buffer handled here", __func__));
+ /*
+ * There's spare room in the cluster for an mbuf. Create one
+ * and associate it with the payload that's in the cluster too.
+ */
- nbuf = 0;
+ MPASS(clm != NULL);
+ m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE);
+ /* No bzero required */
+ if (m_init(m, NULL, 0, M_NOWAIT, MT_DATA, flags | M_NOFREE))
+ return (NULL);
+ fl->mbuf_inlined++;
+ m_extaddref(m, payload, padded_len, &clm->refcount, rxb_free,
+ swz->zone, sd->cl);
+ sd->nmbuf++;
- /*
- * Move to the start of the next buffer if we are still in the middle of
- * some buffer. This is the case where there was some room left in the
- * previous buffer but not enough to fit this frame in its entirety.
- */
- if (fl->rx_offset > 0) {
- KASSERT(roundup2(len, fl_pad) > FL_BUF_SIZE(sc, sd->tag_idx) -
- fl->rx_offset, ("%s: frame (%u bytes) should have fit at "
- "cidx %u offset %u bufsize %u", __func__, len, fl->cidx,
- fl->rx_offset, FL_BUF_SIZE(sc, sd->tag_idx)));
- nbuf++;
- fl->rx_offset = 0;
- sd++;
- if (__predict_false(++fl->cidx == fl->cap)) {
- sd = fl->sdesc;
- fl->cidx = 0;
- }
- }
+ } else {
- m0 = find_buf_mbuf(sd->cl);
- if (m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR | M_NOFREE))
- goto done;
- bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
- m0->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx));
- m_extaddref(m0, sd->cl, roundup2(m0->m_len, fl_pad),
- find_buf_refcnt(sd->cl), rxb_free, FL_BUF_ZONE(sc, sd->tag_idx),
- sd->cl);
- m0->m_pkthdr.len = len;
-
- fl->rx_offset = roundup2(m0->m_len, fl_pad);
- fl->rx_offset = roundup2(fl->rx_offset, pack_boundary);
- if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) {
- fl->rx_offset = 0;
- nbuf++;
- sd++;
- if (__predict_false(++fl->cidx == fl->cap)) {
- sd = fl->sdesc;
- fl->cidx = 0;
+ /*
+ * Grab an mbuf from zone_mbuf and associate it with the
+ * payload in the cluster.
+ */
+
+ m = flags & M_PKTHDR ?
+ m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA);
+ if (m == NULL)
+ return (NULL);
+ fl->mbuf_allocated++;
+ if (clm != NULL)
+ m_extaddref(m, payload, padded_len, &clm->refcount,
+ rxb_free, swz->zone, sd->cl);
+ else {
+ m_cljset(m, sd->cl, swz->type);
+ sd->cl = NULL; /* consumed, not a recycle candidate */
}
}
+ if (flags & M_PKTHDR)
+ m->m_pkthdr.len = total;
+ m->m_len = len;
- m = m0;
- len -= m->m_len;
+ if (fl->flags & FL_BUF_PACKING) {
+ fl->rx_offset += roundup2(padded_len, sc->sge.pack_boundary);
+ MPASS(fl->rx_offset <= hwb->size);
+ if (fl->rx_offset < hwb->size)
+ return (m); /* without advancing the cidx */
+ }
- while (len > 0) {
- m->m_next = find_buf_mbuf(sd->cl);
- m = m->m_next;
-
- bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
- BUS_DMASYNC_POSTREAD);
-
- /* m_init for !M_PKTHDR can't fail so don't bother */
- m_init(m, NULL, 0, M_NOWAIT, MT_DATA, M_NOFREE);
- m->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx));
- m_extaddref(m, sd->cl, roundup2(m->m_len, fl_pad),
- find_buf_refcnt(sd->cl), rxb_free,
- FL_BUF_ZONE(sc, sd->tag_idx), sd->cl);
-
- fl->rx_offset = roundup2(m->m_len, fl_pad);
- fl->rx_offset = roundup2(fl->rx_offset, pack_boundary);
- if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) {
- fl->rx_offset = 0;
- nbuf++;
- sd++;
- if (__predict_false(++fl->cidx == fl->cap)) {
- sd = fl->sdesc;
- fl->cidx = 0;
- }
- }
+ if (__predict_false(++fl->cidx == fl->cap))
+ fl->cidx = 0;
+ fl->rx_offset = 0;
- len -= m->m_len;
- }
-done:
- (*fl_bufs_used) += nbuf;
- return (m0);
+ return (m);
}
static struct mbuf *
-get_fl_payload2(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
+get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
int *fl_bufs_used)
{
- struct mbuf *m0, *m;
- struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
- unsigned int nbuf, len;
+ struct mbuf *m0, *m, **pnext;
+ u_int nbuf, len;
/*
* No assertion for the fl lock because we don't need it. This routine
@@ -1578,87 +1562,54 @@ get_fl_payload2(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
* lock but this routine does not).
*/
- KASSERT((fl->flags & FL_BUF_PACKING) == 0,
- ("%s: buffer packing enabled for fl %p", __func__, fl));
- if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0))
- panic("%s: cannot handle packed frames", __func__);
+ nbuf = 0;
len = G_RSPD_LEN(len_newbuf);
-
- /*
- * We never want to run out of mbufs in between a frame when a frame
- * spans multiple fl buffers. If the fl's mbuf stash isn't full and
- * can't be filled up to the brim then fail early.
- */
- if (len > FL_BUF_SIZE(sc, sd->tag_idx) && fill_mbuf_stash(fl) != 0)
- return (NULL);
-
- m0 = get_mbuf_from_stash(fl);
- if (m0 == NULL ||
- m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR) != 0) {
- return_mbuf_to_stash(fl, m0);
- return (NULL);
+ if (__predict_false(fl->m0 != NULL)) {
+ MPASS(len == fl->m0->m_pkthdr.len);
+ MPASS(fl->remaining < len);
+
+ m0 = fl->m0;
+ pnext = fl->pnext;
+ len = fl->remaining;
+ fl->m0 = NULL;
+ goto get_segment;
}
- bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
-
- if (sc->sc_do_rxcopy && (len < RX_COPY_THRESHOLD)) {
-#ifdef T4_PKT_TIMESTAMP
- /* Leave room for a timestamp */
- m0->m_data += 8;
-#endif
- /* copy data to mbuf, buffer will be recycled */
- bcopy(sd->cl, mtod(m0, caddr_t), len);
- m0->m_len = len;
- } else {
- bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
- m_cljset(m0, sd->cl, FL_BUF_TYPE(sc, sd->tag_idx));
- sd->cl = NULL; /* consumed */
- m0->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx));
- }
- m0->m_pkthdr.len = len;
-
- sd++;
- if (__predict_false(++fl->cidx == fl->cap)) {
- sd = fl->sdesc;
- fl->cidx = 0;
+ if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) {
+ nbuf++;
+ fl->rx_offset = 0;
+ if (__predict_false(++fl->cidx == fl->cap))
+ fl->cidx = 0;
}
- m = m0;
- len -= m->m_len;
- nbuf = 1; /* # of fl buffers used */
+ /*
+ * Payload starts at rx_offset in the current hw buffer. Its length is
+ * 'len' and it may span multiple hw buffers.
+ */
+ m0 = get_scatter_segment(sc, fl, len, M_PKTHDR);
+ len -= m0->m_len;
+ pnext = &m0->m_next;
while (len > 0) {
- /* Can't fail, we checked earlier that the stash was full. */
- m->m_next = get_mbuf_from_stash(fl);
- m = m->m_next;
-
- bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
- BUS_DMASYNC_POSTREAD);
-
- /* m_init for !M_PKTHDR can't fail so don't bother */
- m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
- if (len <= MLEN) {
- bcopy(sd->cl, mtod(m, caddr_t), len);
- m->m_len = len;
- } else {
- bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
- m_cljset(m, sd->cl, FL_BUF_TYPE(sc, sd->tag_idx));
- sd->cl = NULL; /* consumed */
- m->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx));
- }
-
- sd++;
- if (__predict_false(++fl->cidx == fl->cap)) {
- sd = fl->sdesc;
- fl->cidx = 0;
+ nbuf++;
+get_segment:
+ MPASS(fl->rx_offset == 0);
+ m = get_scatter_segment(sc, fl, len, 0);
+ if (m == NULL) {
+ fl->m0 = m0;
+ fl->pnext = pnext;
+ fl->remaining = len;
+ return (NULL);
}
-
+ *pnext = m;
+ pnext = &m->m_next;
len -= m->m_len;
- nbuf++;
}
+ *pnext = NULL;
+ if (fl->rx_offset == 0)
+ nbuf++;
(*fl_bufs_used) += nbuf;
-
return (m0);
}
@@ -2015,23 +1966,23 @@ t4_update_fl_bufsize(struct ifnet *ifp)
struct sge_ofld_rxq *ofld_rxq;
#endif
struct sge_fl *fl;
- int i, bufsize;
+ int i, maxp, mtu = ifp->if_mtu;
- bufsize = mtu_to_bufsize(ifp->if_mtu);
+ maxp = mtu_to_max_payload(sc, mtu, 0);
for_each_rxq(pi, i, rxq) {
fl = &rxq->fl;
FL_LOCK(fl);
- set_fl_tag_idx(sc, fl, bufsize);
+ find_best_refill_source(sc, fl, maxp);
FL_UNLOCK(fl);
}
#ifdef TCP_OFFLOAD
- bufsize = mtu_to_bufsize_toe(pi->adapter, ifp->if_mtu);
+ maxp = mtu_to_max_payload(sc, mtu, 1);
for_each_ofld_rxq(pi, i, ofld_rxq) {
fl = &ofld_rxq->fl;
FL_LOCK(fl);
- set_fl_tag_idx(sc, fl, bufsize);
+ find_best_refill_source(sc, fl, maxp);
FL_UNLOCK(fl);
}
#endif
@@ -2065,7 +2016,7 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
}
static inline void
-init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int bufsize, int pack,
+init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, int pack,
char *name)
{
@@ -2073,7 +2024,8 @@ init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int bufsize, int pack,
strlcpy(fl->lockname, name, sizeof(fl->lockname));
if (pack)
fl->flags |= FL_BUF_PACKING;
- set_fl_tag_idx(sc, fl, bufsize);
+ find_best_refill_source(sc, fl, maxp);
+ find_safe_refill_source(sc, fl);
}
static inline void
@@ -2202,24 +2154,6 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
if (fl) {
mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
- for (i = 0; i < FL_BUF_SIZES(sc); i++) {
-
- /*
- * A freelist buffer must be 16 byte aligned as the SGE
- * uses the low 4 bits of the bus addr to figure out the
- * buffer size.
- */
- rc = bus_dma_tag_create(sc->dmat, 16, 0,
- BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
- FL_BUF_SIZE(sc, i), 1, FL_BUF_SIZE(sc, i),
- BUS_DMA_ALLOCNOW, NULL, NULL, &fl->tag[i]);
- if (rc != 0) {
- device_printf(sc->dev,
- "failed to create fl DMA tag[%d]: %d\n",
- i, rc);
- return (rc);
- }
- }
len = fl->qsize * RX_FL_ESIZE;
rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
&fl->ba, (void **)&fl->desc);
@@ -2336,7 +2270,7 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
static int
free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
{
- int i, rc;
+ int rc;
struct adapter *sc = iq->adapter;
device_t dev;
@@ -2368,29 +2302,48 @@ free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
if (fl->sdesc)
free_fl_sdesc(sc, fl);
- for (i = 0; i < nitems(fl->mstash); i++) {
- struct mbuf *m = fl->mstash[i];
-
- if (m != NULL) {
- m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
- m_free(m);
- }
- }
-
if (mtx_initialized(&fl->fl_lock))
mtx_destroy(&fl->fl_lock);
- for (i = 0; i < FL_BUF_SIZES(sc); i++) {
- if (fl->tag[i])
- bus_dma_tag_destroy(fl->tag[i]);
- }
-
bzero(fl, sizeof(*fl));
}
return (0);
}
+static void
+add_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid,
+ struct sge_fl *fl)
+{
+ struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
+
+ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL,
+ "freelist");
+ children = SYSCTL_CHILDREN(oid);
+
+ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id",
+ CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I",
+ "SGE context id of the freelist");
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx,
+ 0, "consumer index");
+ if (fl->flags & FL_BUF_PACKING) {
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset",
+ CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset");
+ }
+ SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx,
+ 0, "producer index");
+ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated",
+ CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated");
+ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined",
+ CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters");
+ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated",
+ CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated");
+ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled",
+ CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled");
+ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled",
+ CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)");
+}
+
static int
alloc_fwq(struct adapter *sc)
{
@@ -2531,22 +2484,7 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
CTLFLAG_RD, &rxq->vlan_extraction,
"# of times hardware extracted 802.1Q tag");
- children = SYSCTL_CHILDREN(oid);
- oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
- NULL, "freelist");
- children = SYSCTL_CHILDREN(oid);
-
- SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
- CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I",
- "SGE context id of the queue");
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
- &rxq->fl.cidx, 0, "consumer index");
- if (rxq->fl.flags & FL_BUF_PACKING) {
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "rx_offset",
- CTLFLAG_RD, &rxq->fl.rx_offset, 0, "packing rx offset");
- }
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
- &rxq->fl.pidx, 0, "producer index");
+ add_fl_sysctls(&pi->ctx, oid, &rxq->fl);
return (rc);
}
@@ -2601,18 +2539,7 @@ alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq,
CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I",
"consumer index");
- children = SYSCTL_CHILDREN(oid);
- oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
- NULL, "freelist");
- children = SYSCTL_CHILDREN(oid);
-
- SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
- CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16,
- "I", "SGE context id of the queue");
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
- &ofld_rxq->fl.cidx, 0, "consumer index");
- SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
- &ofld_rxq->fl.pidx, 0, "producer index");
+ add_fl_sysctls(&pi->ctx, oid, &ofld_rxq->fl);
return (rc);
}
@@ -3100,103 +3027,83 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs)
{
__be64 *d = &fl->desc[fl->pidx];
struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
- bus_dma_tag_t tag;
- bus_addr_t pa;
+ uintptr_t pa;
caddr_t cl;
- int rc;
+ struct cluster_layout *cll = &fl->cll_def; /* default layout */
+ struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx];
+ struct cluster_metadata *clm;
FL_LOCK_ASSERT_OWNED(fl);
-#ifdef INVARIANTS
- if (fl->flags & FL_BUF_PACKING)
- KASSERT(sd->tag_idx == 0,
- ("%s: expected tag 0 but found tag %d at pidx %u instead",
- __func__, sd->tag_idx, fl->pidx));
-#endif
if (nbufs > fl->needed)
nbufs = fl->needed;
+ nbufs -= (fl->pidx + nbufs) % 8;
while (nbufs--) {
if (sd->cl != NULL) {
- KASSERT(*d == sd->ba_hwtag,
- ("%s: recyling problem at pidx %d",
- __func__, fl->pidx));
-
- if (fl->flags & FL_BUF_PACKING) {
- u_int *refcount = find_buf_refcnt(sd->cl);
-
- if (atomic_fetchadd_int(refcount, -1) == 1) {
- *refcount = 1; /* reinstate */
- d++;
- goto recycled;
- }
- sd->cl = NULL; /* gave up my reference */
- } else {
+ if (sd->nmbuf == 0) {
/*
- * This happens when a frame small enough to fit
- * entirely in an mbuf was received in cl last
- * time. We'd held on to cl and can reuse it
- * now. Note that we reuse a cluster of the old
- * size if fl->tag_idx is no longer the same as
- * sd->tag_idx.
+ * Fast recycle without involving any atomics on
+ * the cluster's metadata (if the cluster has
+ * metadata). This happens when all frames
+ * received in the cluster were small enough to
+ * fit within a single mbuf each.
*/
- d++;
- goto recycled;
+ fl->cl_fast_recycled++;
+ goto recycled_fast;
}
- }
-
- if (__predict_false(fl->tag_idx != sd->tag_idx)) {
- bus_dmamap_t map;
- bus_dma_tag_t newtag = fl->tag[fl->tag_idx];
- bus_dma_tag_t oldtag = fl->tag[sd->tag_idx];
/*
- * An MTU change can get us here. Discard the old map
- * which was created with the old tag, but only if
- * we're able to get a new one.
+ * Cluster is guaranteed to have metadata. Clusters
+ * without metadata always take the fast recycle path
+ * when they're recycled.
*/
- rc = bus_dmamap_create(newtag, 0, &map);
- if (rc == 0) {
- bus_dmamap_destroy(oldtag, sd->map);
- sd->map = map;
- sd->tag_idx = fl->tag_idx;
- }
- }
-
- tag = fl->tag[sd->tag_idx];
+ clm = cl_metadata(sc, fl, &sd->cll, sd->cl);
+ MPASS(clm != NULL);
- cl = uma_zalloc(FL_BUF_ZONE(sc, sd->tag_idx), M_NOWAIT);
- if (cl == NULL)
- break;
- if (fl->flags & FL_BUF_PACKING) {
- *find_buf_refcnt(cl) = 1;
- cl += MSIZE;
+ if (atomic_fetchadd_int(&clm->refcount, -1) == 1) {
+ fl->cl_recycled++;
+ goto recycled;
+ }
+ sd->cl = NULL; /* gave up my reference */
}
+ MPASS(sd->cl == NULL);
+alloc:
+ cl = uma_zalloc(swz->zone, M_NOWAIT);
+ if (__predict_false(cl == NULL)) {
+ if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 ||
+ fl->cll_def.zidx == fl->cll_alt.zidx)
+ break;
- rc = bus_dmamap_load(tag, sd->map, cl,
- FL_BUF_SIZE(sc, sd->tag_idx), oneseg_dma_callback, &pa, 0);
- if (rc != 0 || pa == 0) {
- fl->dmamap_failed++;
- if (fl->flags & FL_BUF_PACKING)
- cl -= MSIZE;
- uma_zfree(FL_BUF_ZONE(sc, sd->tag_idx), cl);
- break;
+ /* fall back to the safe zone */
+ cll = &fl->cll_alt;
+ swz = &sc->sge.sw_zone_info[cll->zidx];
+ goto alloc;
}
+ fl->cl_allocated++;
+ pa = pmap_kextract((vm_offset_t)cl);
+ pa += cll->region1;
sd->cl = cl;
- *d++ = htobe64(pa | FL_BUF_HWTAG(sc, sd->tag_idx));
-
+ sd->cll = *cll;
+ *d = htobe64(pa | cll->hwidx);
+ clm = cl_metadata(sc, fl, cll, cl);
+ if (clm != NULL) {
+recycled:
#ifdef INVARIANTS
- sd->ba_hwtag = htobe64(pa | FL_BUF_HWTAG(sc, sd->tag_idx));
+ clm->sd = sd;
#endif
-
-recycled:
+ clm->refcount = 1;
+ }
+ sd->nmbuf = 0;
+recycled_fast:
fl->pending++;
fl->needed--;
+ d++;
sd++;
- if (++fl->pidx == fl->cap) {
+ if (__predict_false(++fl->pidx == fl->cap)) {
fl->pidx = 0;
sd = fl->sdesc;
d = fl->desc;
@@ -3237,53 +3144,33 @@ refill_sfl(void *arg)
static int
alloc_fl_sdesc(struct sge_fl *fl)
{
- struct fl_sdesc *sd;
- bus_dma_tag_t tag;
- int i, rc;
fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
M_ZERO | M_WAITOK);
- tag = fl->tag[fl->tag_idx];
- sd = fl->sdesc;
- for (i = 0; i < fl->cap; i++, sd++) {
-
- sd->tag_idx = fl->tag_idx;
- rc = bus_dmamap_create(tag, 0, &sd->map);
- if (rc != 0)
- goto failed;
- }
-
return (0);
-failed:
- while (--i >= 0) {
- sd--;
- bus_dmamap_destroy(tag, sd->map);
- }
- KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__));
-
- free(fl->sdesc, M_CXGBE);
- fl->sdesc = NULL;
-
- return (rc);
}
static void
free_fl_sdesc(struct adapter *sc, struct sge_fl *fl)
{
struct fl_sdesc *sd;
+ struct cluster_metadata *clm;
+ struct cluster_layout *cll;
int i;
sd = fl->sdesc;
for (i = 0; i < fl->cap; i++, sd++) {
+ if (sd->cl == NULL)
+ continue;
- if (sd->cl) {
- bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
- uma_zfree(FL_BUF_ZONE(sc, sd->tag_idx), sd->cl);
- sd->cl = NULL;
+ cll = &sd->cll;
+ clm = cl_metadata(sc, fl, cll, sd->cl);
+ if (sd->nmbuf == 0 ||
+ (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1)) {
+ uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl);
}
-
- bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map);
+ sd->cl = NULL;
}
free(fl->sdesc, M_CXGBE);
@@ -4107,7 +3994,7 @@ write_eqflush_wr(struct sge_eq *eq)
eq->pending++;
eq->avail--;
if (++eq->pidx == eq->cap)
- eq->pidx = 0;
+ eq->pidx = 0;
}
static __be64
@@ -4134,52 +4021,167 @@ get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
return (0);
}
-/*
- * Find an SGE FL buffer size to use for the given bufsize. Look for the the
- * smallest size that is large enough to hold bufsize or pick the largest size
- * if all sizes are less than bufsize.
- */
static void
-set_fl_tag_idx(struct adapter *sc, struct sge_fl *fl, int bufsize)
+find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp)
{
- int i, largest, best, delta, start;
+ int8_t zidx, hwidx, idx;
+ uint16_t region1, region3;
+ int spare, spare_needed, n;
+ struct sw_zone_info *swz;
+ struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0];
- if (fl->flags & FL_BUF_PACKING) {
- fl->tag_idx = 0; /* first tag is the one for packing */
- return;
- }
+ /*
+ * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize
+ * large enough for the max payload and cluster metadata. Otherwise
+ * settle for the largest bufsize that leaves enough room in the cluster
+ * for metadata.
+ *
+ * Without buffer packing: Look for the smallest zone which has a
+ * bufsize large enough for the max payload. Settle for the largest
+ * bufsize available if there's nothing big enough for max payload.
+ */
+ spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0;
+ swz = &sc->sge.sw_zone_info[0];
+ hwidx = -1;
+ for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) {
+ if (swz->size > largest_rx_cluster) {
+ if (__predict_true(hwidx != -1))
+ break;
- start = sc->flags & BUF_PACKING_OK ? 1 : 0;
- delta = FL_BUF_SIZE(sc, start) - bufsize;
- if (delta == 0) {
- fl->tag_idx = start; /* ideal fit, look no further */
- return;
- }
- best = start;
- largest = start;
+ /*
+ * This is a misconfiguration. largest_rx_cluster is
+ * preventing us from finding a refill source. See
+ * dev.t5nex.<n>.buffer_sizes to figure out why.
+ */
+ device_printf(sc->dev, "largest_rx_cluster=%u leaves no"
+ " refill source for fl %p (dma %u). Ignored.\n",
+ largest_rx_cluster, fl, maxp);
+ }
+ for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) {
+ hwb = &hwb_list[idx];
+ spare = swz->size - hwb->size;
+ if (spare < spare_needed)
+ continue;
- for (i = start + 1; i < FL_BUF_SIZES(sc); i++) {
- int d, fl_buf_size;
+ hwidx = idx; /* best option so far */
+ if (hwb->size >= maxp) {
- fl_buf_size = FL_BUF_SIZE(sc, i);
- d = fl_buf_size - bufsize;
+ if ((fl->flags & FL_BUF_PACKING) == 0)
+ goto done; /* stop looking (not packing) */
- if (d == 0) {
- fl->tag_idx = i; /* ideal fit, look no further */
- return;
+ if (swz->size >= safest_rx_cluster)
+ goto done; /* stop looking (packing) */
+ }
+ break; /* keep looking, next zone */
}
- if (fl_buf_size > FL_BUF_SIZE(sc, largest))
- largest = i;
- if (d > 0 && (delta < 0 || delta > d)) {
- delta = d;
- best = i;
+ }
+done:
+ /* A usable hwidx has been located. */
+ MPASS(hwidx != -1);
+ hwb = &hwb_list[hwidx];
+ zidx = hwb->zidx;
+ swz = &sc->sge.sw_zone_info[zidx];
+ region1 = 0;
+ region3 = swz->size - hwb->size;
+
+ /*
+ * Stay within this zone and see if there is a better match when mbuf
+ * inlining is allowed. Remember that the hwidx's are sorted in
+ * decreasing order of size (so in increasing order of spare area).
+ */
+ for (idx = hwidx; idx != -1; idx = hwb->next) {
+ hwb = &hwb_list[idx];
+ spare = swz->size - hwb->size;
+
+ if (allow_mbufs_in_cluster == 0 || hwb->size < maxp)
+ break;
+ if (spare < CL_METADATA_SIZE + MSIZE)
+ continue;
+ n = (spare - CL_METADATA_SIZE) / MSIZE;
+ if (n > howmany(hwb->size, maxp))
+ break;
+
+ hwidx = idx;
+ if (fl->flags & FL_BUF_PACKING) {
+ region1 = n * MSIZE;
+ region3 = spare - region1;
+ } else {
+ region1 = MSIZE;
+ region3 = spare - region1;
+ break;
}
}
- if (delta > 0)
- fl->tag_idx = best; /* Found a buf bigger than bufsize */
+ KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES,
+ ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp));
+ KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES,
+ ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp));
+ KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 ==
+ sc->sge.sw_zone_info[zidx].size,
+ ("%s: bad buffer layout for fl %p, maxp %d. "
+ "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
+ sc->sge.sw_zone_info[zidx].size, region1,
+ sc->sge.hw_buf_info[hwidx].size, region3));
+ if (fl->flags & FL_BUF_PACKING || region1 > 0) {
+ KASSERT(region3 >= CL_METADATA_SIZE,
+ ("%s: no room for metadata. fl %p, maxp %d; "
+ "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
+ sc->sge.sw_zone_info[zidx].size, region1,
+ sc->sge.hw_buf_info[hwidx].size, region3));
+ KASSERT(region1 % MSIZE == 0,
+ ("%s: bad mbuf region for fl %p, maxp %d. "
+ "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp,
+ sc->sge.sw_zone_info[zidx].size, region1,
+ sc->sge.hw_buf_info[hwidx].size, region3));
+ }
+
+ fl->cll_def.zidx = zidx;
+ fl->cll_def.hwidx = hwidx;
+ fl->cll_def.region1 = region1;
+ fl->cll_def.region3 = region3;
+}
+
+static void
+find_safe_refill_source(struct adapter *sc, struct sge_fl *fl)
+{
+ struct sge *s = &sc->sge;
+ struct hw_buf_info *hwb;
+ struct sw_zone_info *swz;
+ int spare;
+ int8_t hwidx;
+
+ if (fl->flags & FL_BUF_PACKING)
+ hwidx = s->safe_hwidx2; /* with room for metadata */
+ else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) {
+ hwidx = s->safe_hwidx2;
+ hwb = &s->hw_buf_info[hwidx];
+ swz = &s->sw_zone_info[hwb->zidx];
+ spare = swz->size - hwb->size;
+
+ /* no good if there isn't room for an mbuf as well */
+ if (spare < CL_METADATA_SIZE + MSIZE)
+ hwidx = s->safe_hwidx1;
+ } else
+ hwidx = s->safe_hwidx1;
+
+ if (hwidx == -1) {
+ /* No fallback source */
+ fl->cll_alt.hwidx = -1;
+ fl->cll_alt.zidx = -1;
+
+ return;
+ }
+
+ hwb = &s->hw_buf_info[hwidx];
+ swz = &s->sw_zone_info[hwb->zidx];
+ spare = swz->size - hwb->size;
+ fl->cll_alt.hwidx = hwidx;
+ fl->cll_alt.zidx = hwb->zidx;
+ if (allow_mbufs_in_cluster)
+ fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE;
else
- fl->tag_idx = largest; /* No buf large enough for bufsize */
+ fl->cll_alt.region1 = 0;
+ fl->cll_alt.region3 = spare - fl->cll_alt.region1;
}
static void
@@ -4256,3 +4258,29 @@ sysctl_uint16(SYSCTL_HANDLER_ARGS)
return sysctl_handle_int(oidp, &i, 0, req);
}
+
+static int
+sysctl_bufsizes(SYSCTL_HANDLER_ARGS)
+{
+ struct sge *s = arg1;
+ struct hw_buf_info *hwb = &s->hw_buf_info[0];
+ struct sw_zone_info *swz = &s->sw_zone_info[0];
+ int i, rc;
+ struct sbuf sb;
+ char c;
+
+ sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND);
+ for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) {
+ if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster)
+ c = '*';
+ else
+ c = '\0';
+
+ sbuf_printf(&sb, "%u%c ", hwb->size, c);
+ }
+ sbuf_trim(&sb);
+ sbuf_finish(&sb);
+ rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
+ sbuf_delete(&sb);
+ return (rc);
+}
OpenPOWER on IntegriCloud