diff options
-rw-r--r-- | share/man/man4/cxgbe.4 | 61 | ||||
-rw-r--r-- | sys/dev/cxgbe/adapter.h | 74 | ||||
-rw-r--r-- | sys/dev/cxgbe/common/t4_hw.h | 1 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_main.c | 18 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_sge.c | 1130 |
5 files changed, 695 insertions, 589 deletions
diff --git a/share/man/man4/cxgbe.4 b/share/man/man4/cxgbe.4 index 68540c3..82966bd 100644 --- a/share/man/man4/cxgbe.4 +++ b/share/man/man4/cxgbe.4 @@ -1,4 +1,4 @@ -.\" Copyright (c) 2011-2013, Chelsio Inc +.\" Copyright (c) 2011-2014, Chelsio Inc .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 18, 2013 +.Dd March 20, 2014 .Dt CXGBE 4 .Os .Sh NAME @@ -141,11 +141,11 @@ prompt before booting the kernel or stored in .Xr loader.conf 5 . .Bl -tag -width indent .It Va hw.cxgbe.ntxq10g -The number of tx queues to use for a 10Gb port. +The number of tx queues to use for a 10Gb or 40Gb port. The default is 16 or the number of CPU cores in the system, whichever is less. .It Va hw.cxgbe.nrxq10g -The number of rx queues to use for a 10Gb port. +The number of rx queues to use for a 10Gb or 40Gb port. The default is 8 or the number of CPU cores in the system, whichever is less. .It Va hw.cxgbe.ntxq1g @@ -157,11 +157,11 @@ The number of rx queues to use for a 1Gb port. The default is 2 or the number of CPU cores in the system, whichever is less. .It Va hw.cxgbe.nofldtxq10g -The number of TOE tx queues to use for a 10Gb port. +The number of TOE tx queues to use for a 10Gb or 40Gb port. The default is 8 or the number of CPU cores in the system, whichever is less. .It Va hw.cxgbe.nofldrxq10g -The number of TOE rx queues to use for a 10Gb port. +The number of TOE rx queues to use for a 10Gb or 40Gb port. The default is 2 or the number of CPU cores in the system, whichever is less. .It Va hw.cxgbe.nofldtxq1g @@ -177,8 +177,7 @@ The timer index value to use to delay interrupts. The holdoff timer list has the values 1, 5, 10, 50, 100, and 200 by default (all values are in microseconds) and the index selects a value from this list. -The default value is 1 for both 10Gb and 1Gb ports, which means the -timer value is 5us. +The default value is 1 which means the timer value is 5us. Different interfaces can be assigned different values at any time via the dev.cxgbe.X.holdoff_tmr_idx or dev.cxl.X.holdoff_tmr_idx sysctl. .It Va hw.cxgbe.holdoff_pktc_idx_10G @@ -186,9 +185,8 @@ dev.cxgbe.X.holdoff_tmr_idx or dev.cxl.X.holdoff_tmr_idx sysctl. The packet-count index value to use to delay interrupts. The packet-count list has the values 1, 8, 16, and 32 by default and the index selects a value from this list. -The default value is -1 for both 10Gb and 1Gb ports, which means packet -counting is disabled and interrupts are generated based solely on the -holdoff timer value. +The default value is -1 which means packet counting is disabled and interrupts +are generated based solely on the holdoff timer value. Different interfaces can be assigned different values via the dev.cxgbe.X.holdoff_pktc_idx or dev.cxl.X.holdoff_pktc_idx sysctl. This sysctl works only when the interface has never been marked up (as done by @@ -228,6 +226,43 @@ already on the card. long as it is compatible with the driver and is a different version than the one already on the card. The default is 1. +.It Va hw.cxgbe.fl_pktshift +The number of bytes of padding inserted before the begining of an Ethernet +frame in the receive buffer. +The default value of 2 ensures that the Ethernet payload (usually the IP header) +is at a 4 byte aligned address. +0-7 are all valid values. +.It Va hw.cxgbe.fl_pad +A non-zero value ensures that writes from the hardware to a receive buffer are +padded up to the specified boundary. +The default is -1 which lets the driver pick a pad boundary. +0 disables trailer padding completely. +.It Va hw.cxgbe.cong_drop +Controls the hardware response to congestion. +-1 disables congestion feedback and is not recommended. +0 instructs the hardware to backpressure its pipeline on congestion. +This usually results in the port emitting pause frames. +1 instructs the hardware to drop frames destined for congested queues. +.It Va hw.cxgbe.buffer_packing +Allow the hardware to deliver multiple frames in the same receive buffer +opportunistically. +The default is -1 which lets the driver decide. +0 or 1 explicitly disable or enable this feature. +.It Va hw.cxgbe.allow_mbufs_in_cluster +1 allows the driver to lay down one or more mbufs within the receive buffer +opportunistically. This is the default. +0 prohibits the driver from doing so. +.It Va hw.cxgbe.largest_rx_cluster +.It Va hw.cxgbe.safest_rx_cluster +Sizes of rx clusters. Each of these must be set to one of the sizes available +(usually 2048, 4096, 9216, and 16384) and largest_rx_cluster must be greater +than or equal to safest_rx_cluster. +The defaults are 16384 and 4096 respectively. +The driver will never attempt to allocate a receive buffer larger than +largest_rx_cluster and will fall back to allocating buffers of +safest_rx_cluster size if an allocation larger than safest_rx_cluster fails. +Note that largest_rx_cluster merely establishes a ceiling -- the driver is +allowed to allocate buffers of smaller sizes. .It Va hw.cxgbe.config_file Select a pre-packaged device configuration file. A configuration file contains a recipe for partitioning and configuring the @@ -235,7 +270,7 @@ hardware resources on the card. This tunable is for specialized applications only and should not be used in normal operation. The configuration profile currently in use is available in the dev.t4nex.X.cf -and dev.t4nex.X.cfcsum sysctls. +and dev.t4nex.X.cfcsum (dev.t5nex for T5 cards) sysctls. .It Va hw.cxgbe.linkcaps_allowed .It Va hw.cxgbe.niccaps_allowed .It Va hw.cxgbe.toecaps_allowed @@ -249,7 +284,7 @@ capability. This tunable is for specialized applications only and should not be used in normal operation. The capabilities for which hardware resources have been reserved are listed in -dev.t4nex.X.*caps sysctls. +dev.t4nex.X.*caps or dev.t5nex.X.*caps sysctls. .El .Sh SUPPORT For general information and support, diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 29e139b..9673834 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -128,10 +128,11 @@ enum { RX_FL_ESIZE = EQ_ESIZE, /* 8 64bit addresses */ #if MJUMPAGESIZE != MCLBYTES - FL_BUF_SIZES_MAX = 5, /* cluster, jumbop, jumbo9k, jumbo16k, extra */ + SW_ZONE_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */ #else - FL_BUF_SIZES_MAX = 4, /* cluster, jumbo9k, jumbo16k, extra */ + SW_ZONE_SIZES = 3, /* cluster, jumbo9k, jumbo16k */ #endif + CL_METADATA_SIZE = CACHE_LINE_SIZE, CTRL_EQ_QSIZE = 128, @@ -235,15 +236,28 @@ struct port_info { uint8_t hw_addr[ETHER_ADDR_LEN]; /* factory MAC address, won't change */ }; -struct fl_sdesc { - bus_dmamap_t map; - caddr_t cl; - uint8_t tag_idx; /* the fl->tag entry this map comes from */ +/* Where the cluster came from, how it has been carved up. */ +struct cluster_layout { + int8_t zidx; + int8_t hwidx; + uint16_t region1; /* mbufs laid out within this region */ + /* region2 is the DMA region */ + uint16_t region3; /* cluster_metadata within this region */ +}; + +struct cluster_metadata { + u_int refcount; #ifdef INVARIANTS - __be64 ba_hwtag; + struct fl_sdesc *sd; /* For debug only. Could easily be stale */ #endif }; +struct fl_sdesc { + caddr_t cl; + uint8_t nmbuf; + struct cluster_layout cll; +}; + struct tx_desc { __be64 flit[8]; }; @@ -362,17 +376,19 @@ struct sge_eq { uint32_t unstalled; /* recovered from stall */ }; -struct fl_buf_info { - u_int size; - int type; - int hwtag:4; /* tag in low 4 bits of the pa. */ - uma_zone_t zone; +struct sw_zone_info { + uma_zone_t zone; /* zone that this cluster comes from */ + int size; /* size of cluster: 2K, 4K, 9K, 16K, etc. */ + int type; /* EXT_xxx type of the cluster */ + int8_t head_hwidx; + int8_t tail_hwidx; +}; + +struct hw_buf_info { + int8_t zidx; /* backpointer to zone; -ve means unused */ + int8_t next; /* next hwidx for this zone; -1 means no more */ + int size; }; -#define FL_BUF_SIZES(sc) (sc->sge.fl_buf_sizes) -#define FL_BUF_SIZE(sc, x) (sc->sge.fl_buf_info[x].size) -#define FL_BUF_TYPE(sc, x) (sc->sge.fl_buf_info[x].type) -#define FL_BUF_HWTAG(sc, x) (sc->sge.fl_buf_info[x].hwtag) -#define FL_BUF_ZONE(sc, x) (sc->sge.fl_buf_info[x].zone) enum { FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */ @@ -386,9 +402,8 @@ enum { struct sge_fl { bus_dma_tag_t desc_tag; bus_dmamap_t desc_map; - bus_dma_tag_t tag[FL_BUF_SIZES_MAX]; /* only first FL_BUF_SIZES(sc) are - valid */ - uint8_t tag_idx; + struct cluster_layout cll_def; /* default refill zone, layout */ + struct cluster_layout cll_alt; /* alternate refill zone, layout */ struct mtx fl_lock; char lockname[16]; int flags; @@ -405,9 +420,17 @@ struct sge_fl { uint32_t needed; /* # of buffers needed to fill up fl. */ uint32_t lowat; /* # of buffers <= this means fl needs help */ uint32_t pending; /* # of bufs allocated since last doorbell */ - u_int dmamap_failed; - struct mbuf *mstash[8]; TAILQ_ENTRY(sge_fl) link; /* All starving freelists */ + + struct mbuf *m0; + struct mbuf **pnext; + u_int remaining; + + uint64_t mbuf_allocated;/* # of mbuf allocated from zone_mbuf */ + uint64_t mbuf_inlined; /* # of mbuf created within clusters */ + uint64_t cl_allocated; /* # of clusters allocated */ + uint64_t cl_recycled; /* # of clusters recycled */ + uint64_t cl_fast_recycled; /* # of clusters recycled (fast) */ }; /* txq: SGE egress queue + what's needed for Ethernet NIC */ @@ -541,8 +564,11 @@ struct sge { struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ - u_int fl_buf_sizes __aligned(CACHE_LINE_SIZE); - struct fl_buf_info fl_buf_info[FL_BUF_SIZES_MAX]; + int pack_boundary; + int8_t safe_hwidx1; /* may not have room for metadata */ + int8_t safe_hwidx2; /* with room for metadata and maybe more */ + struct sw_zone_info sw_zone_info[SW_ZONE_SIZES]; + struct hw_buf_info hw_buf_info[SGE_FLBUF_SIZES]; }; struct rss_header; diff --git a/sys/dev/cxgbe/common/t4_hw.h b/sys/dev/cxgbe/common/t4_hw.h index b0b82bd..34f462c 100644 --- a/sys/dev/cxgbe/common/t4_hw.h +++ b/sys/dev/cxgbe/common/t4_hw.h @@ -87,6 +87,7 @@ enum { SGE_NTIMERS = 6, /* # of interrupt holdoff timer values */ SGE_NCOUNTERS = 4, /* # of interrupt packet counter values */ SGE_MAX_IQ_SIZE = 65520, + SGE_FLBUF_SIZES = 16, }; struct sge_qstat { /* data written to SGE queue status entries */ diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index 3211c5d..4fddd35 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -494,6 +494,8 @@ CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); CTASSERT(nitems(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS); CTASSERT(nitems(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES); +CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); + static int t4_probe(device_t dev) { @@ -4039,6 +4041,7 @@ static void cxgbe_tick(void *arg) { struct port_info *pi = arg; + struct adapter *sc = pi->adapter; struct ifnet *ifp = pi->ifp; struct sge_txq *txq; int i, drops; @@ -4050,7 +4053,7 @@ cxgbe_tick(void *arg) return; /* without scheduling another callout */ } - t4_get_port_stats(pi->adapter, pi->tx_chan, s); + t4_get_port_stats(sc, pi->tx_chan, s); ifp->if_opackets = s->tx_frames - s->tx_pause; ifp->if_ipackets = s->rx_frames - s->rx_pause; @@ -4061,6 +4064,19 @@ cxgbe_tick(void *arg) ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3; + for (i = 0; i < 4; i++) { + if (pi->rx_chan_map & (1 << i)) { + uint32_t v; + + /* + * XXX: indirect reads from the same ADDR/DATA pair can + * race with each other. + */ + t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, + 1, A_TP_MIB_TNL_CNG_DROP_0 + i); + ifp->if_iqdrops += v; + } + } drops = s->tx_drop; for_each_txq(pi, i, txq) diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index a9bdb0e..2b2a689 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kdb.h> #include <sys/malloc.h> #include <sys/queue.h> +#include <sys/sbuf.h> #include <sys/taskqueue.h> #include <sys/time.h> #include <sys/sysctl.h> @@ -51,6 +52,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip6.h> #include <netinet/tcp.h> #include <machine/md_var.h> +#include <vm/vm.h> +#include <vm/pmap.h> #include "common/common.h" #include "common/t4_regs.h" @@ -123,6 +126,27 @@ static int t4_fl_pack; static int t5_fl_pack; TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); +/* + * Allow the driver to create mbuf(s) in a cluster allocated for rx. + * 0: never; always allocate mbufs from the zone_mbuf UMA zone. + * 1: ok to create mbuf(s) within a cluster if there is room. + */ +static int allow_mbufs_in_cluster = 1; +TUNABLE_INT("hw.cxgbe.allow_mbufs_in_cluster", &allow_mbufs_in_cluster); + +/* + * Largest rx cluster size that the driver is allowed to allocate. + */ +static int largest_rx_cluster = MJUM16BYTES; +TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx_cluster); + +/* + * Size of cluster allocation that's most likely to succeed. The driver will + * fall back to this size if it fails to allocate clusters larger than this. + */ +static int safest_rx_cluster = PAGE_SIZE; +TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster); + /* Used to track coalesced tx work request */ struct txpkts { uint64_t *flitp; /* ptr to flit where next pkt should start */ @@ -139,9 +163,7 @@ struct sgl { }; static int service_iq(struct sge_iq *, int); -static struct mbuf *get_fl_payload1(struct adapter *, struct sge_fl *, uint32_t, - int *); -static struct mbuf *get_fl_payload2(struct adapter *, struct sge_fl *, uint32_t, +static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t, int *); static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int, @@ -157,6 +179,8 @@ static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t, static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *, int, int); static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *); +static void add_fl_sysctls(struct sysctl_ctx_list *, struct sysctl_oid *, + struct sge_fl *); static int alloc_fwq(struct adapter *); static int free_fwq(struct adapter *); static int alloc_mgmtq(struct adapter *); @@ -190,7 +214,8 @@ static int refill_fl(struct adapter *, struct sge_fl *, int); static void refill_sfl(void *); static int alloc_fl_sdesc(struct sge_fl *); static void free_fl_sdesc(struct adapter *, struct sge_fl *); -static void set_fl_tag_idx(struct adapter *, struct sge_fl *, int); +static void find_best_refill_source(struct adapter *, struct sge_fl *, int); +static void find_safe_refill_source(struct adapter *, struct sge_fl *); static void add_fl_to_sfl(struct adapter *, struct sge_fl *); static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int); @@ -215,6 +240,7 @@ static int handle_fw_msg(struct sge_iq *, const struct rss_header *, struct mbuf *); static int sysctl_uint16(SYSCTL_HANDLER_ARGS); +static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); /* * Called on MOD_LOAD. Validates and calculates the SGE tunables. @@ -263,7 +289,7 @@ t4_sge_modload(void) /* T5's pack boundary is independent of the pad boundary. */ if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || !powerof2(fl_pack)) - t5_fl_pack = max(pad, 64); + t5_fl_pack = max(pad, CACHE_LINE_SIZE); else t5_fl_pack = fl_pack; @@ -312,14 +338,18 @@ t4_tweak_chip_settings(struct adapter *sc) int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk; int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */ uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); - int sw_flbuf_sizes[] = { + static int sge_flbuf_sizes[] = { MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, + MJUMPAGESIZE - CL_METADATA_SIZE, + MJUMPAGESIZE - 2 * MSIZE - CL_METADATA_SIZE, #endif MJUM9BYTES, MJUM16BYTES, - MJUMPAGESIZE - MSIZE + MCLBYTES - MSIZE - CL_METADATA_SIZE, + MJUM9BYTES - CL_METADATA_SIZE, + MJUM16BYTES - CL_METADATA_SIZE, }; KASSERT(sc->flags & MASTER_PF, @@ -357,9 +387,11 @@ t4_tweak_chip_settings(struct adapter *sc) V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10); t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v); - for (i = 0; i < min(nitems(sw_flbuf_sizes), 16); i++) { + KASSERT(nitems(sge_flbuf_sizes) <= SGE_FLBUF_SIZES, + ("%s: hw buffer size table too big", __func__)); + for (i = 0; i < min(nitems(sge_flbuf_sizes), SGE_FLBUF_SIZES); i++) { t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i), - sw_flbuf_sizes[i]); + sge_flbuf_sizes[i]); } v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) | @@ -414,6 +446,18 @@ t4_tweak_chip_settings(struct adapter *sc) } /* + * SGE wants the buffer to be at least 64B and then a multiple of the pad + * boundary or 16, whichever is greater. + */ +static inline int +hwsz_ok(int hwsz) +{ + int mask = max(fl_pad, 16) - 1; + + return (hwsz >= 64 && (hwsz & mask) == 0); +} + +/* * XXX: driver really should be able to deal with unexpected settings. */ int @@ -423,7 +467,7 @@ t4_read_chip_settings(struct adapter *sc) int i, j, n, rc = 0; uint32_t m, v, r; uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE); - uint32_t sge_flbuf_sizes[16], sw_flbuf_sizes[] = { + static int sw_buf_sizes[] = { /* Sorted by size */ MCLBYTES, #if MJUMPAGESIZE != MCLBYTES MJUMPAGESIZE, @@ -431,6 +475,8 @@ t4_read_chip_settings(struct adapter *sc) MJUM9BYTES, MJUM16BYTES }; + struct sw_zone_info *swz, *safe_swz; + struct hw_buf_info *hwb; m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | @@ -461,6 +507,7 @@ t4_read_chip_settings(struct adapter *sc) rc = EINVAL; } } + s->pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack; v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | @@ -476,45 +523,93 @@ t4_read_chip_settings(struct adapter *sc) rc = EINVAL; } - /* - * Make a list of SGE FL buffer sizes programmed in the chip and tally - * it with the FL buffer sizes that we'd like to use. - */ - n = 0; - for (i = 0; i < nitems(sge_flbuf_sizes); i++) { + /* Filter out unusable hw buffer sizes entirely (mark with -2). */ + hwb = &s->hw_buf_info[0]; + for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i)); - sge_flbuf_sizes[i] = r; - if (r == MJUMPAGESIZE - MSIZE && - (sc->flags & BUF_PACKING_OK) == 0) { - sc->flags |= BUF_PACKING_OK; - FL_BUF_HWTAG(sc, n) = i; - FL_BUF_SIZE(sc, n) = MJUMPAGESIZE - MSIZE; - FL_BUF_TYPE(sc, n) = m_gettype(MJUMPAGESIZE); - FL_BUF_ZONE(sc, n) = m_getzone(MJUMPAGESIZE); - n++; - } + hwb->size = r; + hwb->zidx = hwsz_ok(r) ? -1 : -2; + hwb->next = -1; } - for (i = 0; i < nitems(sw_flbuf_sizes); i++) { - for (j = 0; j < nitems(sge_flbuf_sizes); j++) { - if (sw_flbuf_sizes[i] != sge_flbuf_sizes[j]) + + /* + * Create a sorted list in decreasing order of hw buffer sizes (and so + * increasing order of spare area) for each software zone. + */ + n = 0; /* no usable buffer size to begin with */ + swz = &s->sw_zone_info[0]; + safe_swz = NULL; + for (i = 0; i < SW_ZONE_SIZES; i++, swz++) { + int8_t head = -1, tail = -1; + + swz->size = sw_buf_sizes[i]; + swz->zone = m_getzone(swz->size); + swz->type = m_gettype(swz->size); + + if (swz->size == safest_rx_cluster) + safe_swz = swz; + + hwb = &s->hw_buf_info[0]; + for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { + if (hwb->zidx != -1 || hwb->size > swz->size) continue; - FL_BUF_HWTAG(sc, n) = j; - FL_BUF_SIZE(sc, n) = sw_flbuf_sizes[i]; - FL_BUF_TYPE(sc, n) = m_gettype(sw_flbuf_sizes[i]); - FL_BUF_ZONE(sc, n) = m_getzone(sw_flbuf_sizes[i]); + hwb->zidx = i; + if (head == -1) + head = tail = j; + else if (hwb->size < s->hw_buf_info[tail].size) { + s->hw_buf_info[tail].next = j; + tail = j; + } else { + int8_t *cur; + struct hw_buf_info *t; + + for (cur = &head; *cur != -1; cur = &t->next) { + t = &s->hw_buf_info[*cur]; + if (hwb->size == t->size) { + hwb->zidx = -2; + break; + } + if (hwb->size > t->size) { + hwb->next = *cur; + *cur = j; + break; + } + } + } + } + swz->head_hwidx = head; + swz->tail_hwidx = tail; + + if (tail != -1) { n++; - break; + if (swz->size - s->hw_buf_info[tail].size >= + CL_METADATA_SIZE) + sc->flags |= BUF_PACKING_OK; } } if (n == 0) { device_printf(sc->dev, "no usable SGE FL buffer size.\n"); rc = EINVAL; - } else if (n == 1 && (sc->flags & BUF_PACKING_OK)) { - device_printf(sc->dev, - "no usable SGE FL buffer size when not packing buffers.\n"); - rc = EINVAL; } - FL_BUF_SIZES(sc) = n; + + s->safe_hwidx1 = -1; + s->safe_hwidx2 = -1; + if (safe_swz != NULL) { + s->safe_hwidx1 = safe_swz->head_hwidx; + for (i = safe_swz->head_hwidx; i != -1; i = hwb->next) { + int spare; + + hwb = &s->hw_buf_info[i]; + spare = safe_swz->size - hwb->size; + if (spare < CL_METADATA_SIZE) + continue; + if (s->safe_hwidx2 == -1 || + spare == CL_METADATA_SIZE + MSIZE) + s->safe_hwidx2 = i; + if (spare >= CL_METADATA_SIZE + MSIZE) + break; + } + } r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD); s->counter_val[0] = G_THRESHOLD_0(r); @@ -626,6 +721,10 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *children) { + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "buffer_sizes", + CTLTYPE_STRING | CTLFLAG_RD, &sc->sge, 0, sysctl_bufsizes, "A", + "freelist buffer sizes"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pktshift", CTLFLAG_RD, NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)"); @@ -643,8 +742,7 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, "pack multiple frames in one fl buffer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, - NULL, is_t5(sc) ? t5_fl_pack : t4_fl_pack, - "payload pack boundary (bytes)"); + NULL, sc->sge.pack_boundary, "payload pack boundary (bytes)"); } int @@ -764,7 +862,7 @@ port_intr_iq(struct port_info *pi, int idx) #ifdef TCP_OFFLOAD if (sc->flags & INTR_DIRECT) { idx %= pi->nrxq + pi->nofldrxq; - + if (idx >= pi->nrxq) { idx -= pi->nrxq; iq = &s->ofld_rxq[pi->first_ofld_rxq + idx].iq; @@ -795,29 +893,28 @@ port_intr_iq(struct port_info *pi, int idx) return (iq); } +/* Maximum payload that can be delivered with a single iq descriptor */ static inline int -mtu_to_bufsize(int mtu) +mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) { - int bufsize; - - /* large enough for a frame even when VLAN extraction is disabled */ - bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu; - bufsize = roundup2(bufsize + fl_pktshift, fl_pad); - - return (bufsize); -} + int payload; #ifdef TCP_OFFLOAD -static inline int -mtu_to_bufsize_toe(struct adapter *sc, int mtu) -{ - - if (sc->tt.rx_coalesce) - return (G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2))); + if (toe) { + payload = sc->tt.rx_coalesce ? + G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2)) : mtu; + } else { +#endif + /* large enough even when hw VLAN extraction is disabled */ + payload = fl_pktshift + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + + mtu; +#ifdef TCP_OFFLOAD + } +#endif + payload = roundup2(payload, fl_pad); - return (mtu); + return (payload); } -#endif int t4_setup_port_queues(struct port_info *pi) @@ -836,7 +933,7 @@ t4_setup_port_queues(struct port_info *pi) struct ifnet *ifp = pi->ifp; struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - int bufsize, pack; + int maxp, pack, mtu = ifp->if_mtu; oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); @@ -857,7 +954,7 @@ t4_setup_port_queues(struct port_info *pi) * a) initialize iq and fl * b) allocate queue iff it will take direct interrupts. */ - bufsize = mtu_to_bufsize(ifp->if_mtu); + maxp = mtu_to_max_payload(sc, mtu, 0); pack = enable_buffer_packing(sc); for_each_rxq(pi, i, rxq) { @@ -866,7 +963,7 @@ t4_setup_port_queues(struct port_info *pi) snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(pi->dev), i); - init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, bufsize, pack, name); + init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, pack, name); if (sc->flags & INTR_DIRECT #ifdef TCP_OFFLOAD @@ -882,8 +979,7 @@ t4_setup_port_queues(struct port_info *pi) } #ifdef TCP_OFFLOAD - bufsize = mtu_to_bufsize_toe(sc, ifp->if_mtu); - pack = 0; /* XXX: think about this some more */ + maxp = mtu_to_max_payload(sc, mtu, 1); for_each_ofld_rxq(pi, i, ofld_rxq) { init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, @@ -891,8 +987,7 @@ t4_setup_port_queues(struct port_info *pi) snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", device_get_nameunit(pi->dev), i); - init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, bufsize, pack, - name); + init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, pack, name); if (sc->flags & INTR_DIRECT || (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) { @@ -1169,10 +1264,7 @@ service_iq(struct sge_iq *iq, int budget) ("%s: data for an iq (%p) with no freelist", __func__, iq)); - m0 = fl->flags & FL_BUF_PACKING ? - get_fl_payload1(sc, fl, lq, &fl_bufs_used) : - get_fl_payload2(sc, fl, lq, &fl_bufs_used); - + m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used); if (__predict_false(m0 == NULL)) goto process_iql; #ifdef T4_PKT_TIMESTAMP @@ -1245,6 +1337,14 @@ service_iq(struct sge_iq *iq, int budget) break; } + if (fl_bufs_used >= 16) { + FL_LOCK(fl); + fl->needed += fl_bufs_used; + refill_fl(sc, fl, 32); + FL_UNLOCK(fl); + fl_bufs_used = 0; + } + iq_next(iq); if (++ndescs == limit) { t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), @@ -1261,14 +1361,6 @@ service_iq(struct sge_iq *iq, int budget) } #endif - if (fl_bufs_used > 0) { - FL_LOCK(fl); - fl->needed += fl_bufs_used; - refill_fl(sc, fl, fl->cap / 8); - FL_UNLOCK(fl); - fl_bufs_used = 0; - } - if (budget) return (EINPROGRESS); } @@ -1311,7 +1403,7 @@ process_iql: FL_LOCK(fl); fl->needed += fl_bufs_used; - starved = refill_fl(sc, fl, fl->cap / 4); + starved = refill_fl(sc, fl, 64); FL_UNLOCK(fl); if (__predict_false(starved != 0)) add_fl_to_sfl(sc, fl); @@ -1320,74 +1412,28 @@ process_iql: return (0); } -static int -fill_mbuf_stash(struct sge_fl *fl) -{ - int i; - - for (i = 0; i < nitems(fl->mstash); i++) { - if (fl->mstash[i] == NULL) { - struct mbuf *m; - if ((m = m_get(M_NOWAIT, MT_NOINIT)) == NULL) - return (ENOBUFS); - fl->mstash[i] = m; - } - } - return (0); -} - -static struct mbuf * -get_mbuf_from_stash(struct sge_fl *fl) +static inline int +cl_has_metadata(struct sge_fl *fl, struct cluster_layout *cll) { - int i; - - for (i = 0; i < nitems(fl->mstash); i++) { - if (fl->mstash[i] != NULL) { - struct mbuf *m; + int rc = fl->flags & FL_BUF_PACKING || cll->region1 > 0; - m = fl->mstash[i]; - fl->mstash[i] = NULL; - return (m); - } else - fl->mstash[i] = m_get(M_NOWAIT, MT_NOINIT); - } + if (rc) + MPASS(cll->region3 >= CL_METADATA_SIZE); - return (m_get(M_NOWAIT, MT_NOINIT)); + return (rc); } -static void -return_mbuf_to_stash(struct sge_fl *fl, struct mbuf *m) +static inline struct cluster_metadata * +cl_metadata(struct adapter *sc, struct sge_fl *fl, struct cluster_layout *cll, + caddr_t cl) { - int i; - if (m == NULL) - return; + if (cl_has_metadata(fl, cll)) { + struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; - for (i = 0; i < nitems(fl->mstash); i++) { - if (fl->mstash[i] == NULL) { - fl->mstash[i] = m; - return; - } + return ((struct cluster_metadata *)(cl + swz->size) - 1); } - m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); - m_free(m); -} - -/* buf can be any address within the buffer */ -static inline u_int * -find_buf_refcnt(caddr_t buf) -{ - uintptr_t ptr = (uintptr_t)buf; - - return ((u_int *)((ptr & ~(MJUMPAGESIZE - 1)) + MSIZE - sizeof(u_int))); -} - -static inline struct mbuf * -find_buf_mbuf(caddr_t buf) -{ - uintptr_t ptr = (uintptr_t)buf; - - return ((struct mbuf *)(ptr & ~(MJUMPAGESIZE - 1))); + return (NULL); } static int @@ -1395,179 +1441,117 @@ rxb_free(struct mbuf *m, void *arg1, void *arg2) { uma_zone_t zone = arg1; caddr_t cl = arg2; -#ifdef notyet - u_int refcount; - refcount = *find_buf_refcnt(cl); - KASSERT(refcount == 0, ("%s: cl %p refcount is %u", __func__, - cl - MSIZE, refcount)); -#endif - cl -= MSIZE; uma_zfree(zone, cl); return (EXT_FREE_OK); } +/* + * The mbuf returned by this function could be allocated from zone_mbuf or + * constructed in spare room in the cluster. + * + * The mbuf carries the payload in one of these ways + * a) frame inside the mbuf (mbuf from zone_mbuf) + * b) m_cljset (for clusters without metadata) zone_mbuf + * c) m_extaddref (cluster with metadata) inline mbuf + * d) m_extaddref (cluster with metadata) zone_mbuf + */ static struct mbuf * -get_fl_payload1(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, - int *fl_bufs_used) +get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags) { - struct mbuf *m0, *m; + struct mbuf *m; struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; - unsigned int nbuf, len; - int pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack; - - /* - * No assertion for the fl lock because we don't need it. This routine - * is called only from the rx interrupt handler and it only updates - * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be - * updated in the rx interrupt handler or the starvation helper routine. - * That's why code that manipulates fl->pidx/fl->needed needs the fl - * lock but this routine does not). - */ + struct cluster_layout *cll = &sd->cll; + struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; + struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; + struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); + int len, padded_len; + caddr_t payload; - KASSERT(fl->flags & FL_BUF_PACKING, - ("%s: buffer packing disabled for fl %p", __func__, fl)); + len = min(total, hwb->size - fl->rx_offset); + padded_len = roundup2(len, fl_pad); + payload = sd->cl + cll->region1 + fl->rx_offset; - len = G_RSPD_LEN(len_newbuf); - - if ((len_newbuf & F_RSPD_NEWBUF) == 0) { - KASSERT(fl->rx_offset > 0, - ("%s: packed frame but driver at offset=0", __func__)); + if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { - /* A packed frame is guaranteed to fit entirely in this buf. */ - KASSERT(FL_BUF_SIZE(sc, sd->tag_idx) - fl->rx_offset >= len, - ("%s: packing error. bufsz=%u, offset=%u, len=%u", - __func__, FL_BUF_SIZE(sc, sd->tag_idx), fl->rx_offset, - len)); + /* + * Copy payload into a freshly allocated mbuf. + */ - m0 = get_mbuf_from_stash(fl); - if (m0 == NULL || - m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR) != 0) { - return_mbuf_to_stash(fl, m0); + m = flags & M_PKTHDR ? + m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); + if (m == NULL) return (NULL); - } - - bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, - BUS_DMASYNC_POSTREAD); - if (sc->sc_do_rxcopy && (len < RX_COPY_THRESHOLD)) { + fl->mbuf_allocated++; #ifdef T4_PKT_TIMESTAMP - /* Leave room for a timestamp */ - m0->m_data += 8; + /* Leave room for a timestamp */ + m->m_data += 8; #endif - bcopy(sd->cl + fl->rx_offset, mtod(m0, caddr_t), len); - m0->m_pkthdr.len = len; - m0->m_len = len; - } else { - m0->m_pkthdr.len = len; - m0->m_len = len; - m_extaddref(m0, sd->cl + fl->rx_offset, - roundup2(m0->m_len, fl_pad), - find_buf_refcnt(sd->cl), rxb_free, - FL_BUF_ZONE(sc, sd->tag_idx), sd->cl); - } - fl->rx_offset += len; - fl->rx_offset = roundup2(fl->rx_offset, fl_pad); - fl->rx_offset = roundup2(fl->rx_offset, pack_boundary); - if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) { - fl->rx_offset = 0; - (*fl_bufs_used) += 1; - if (__predict_false(++fl->cidx == fl->cap)) - fl->cidx = 0; - } + /* copy data to mbuf */ + bcopy(payload, mtod(m, caddr_t), len); - return (m0); - } + } else if (sd->nmbuf * MSIZE < cll->region1) { - KASSERT(len_newbuf & F_RSPD_NEWBUF, - ("%s: only new buffer handled here", __func__)); + /* + * There's spare room in the cluster for an mbuf. Create one + * and associate it with the payload that's in the cluster too. + */ - nbuf = 0; + MPASS(clm != NULL); + m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); + /* No bzero required */ + if (m_init(m, NULL, 0, M_NOWAIT, MT_DATA, flags | M_NOFREE)) + return (NULL); + fl->mbuf_inlined++; + m_extaddref(m, payload, padded_len, &clm->refcount, rxb_free, + swz->zone, sd->cl); + sd->nmbuf++; - /* - * Move to the start of the next buffer if we are still in the middle of - * some buffer. This is the case where there was some room left in the - * previous buffer but not enough to fit this frame in its entirety. - */ - if (fl->rx_offset > 0) { - KASSERT(roundup2(len, fl_pad) > FL_BUF_SIZE(sc, sd->tag_idx) - - fl->rx_offset, ("%s: frame (%u bytes) should have fit at " - "cidx %u offset %u bufsize %u", __func__, len, fl->cidx, - fl->rx_offset, FL_BUF_SIZE(sc, sd->tag_idx))); - nbuf++; - fl->rx_offset = 0; - sd++; - if (__predict_false(++fl->cidx == fl->cap)) { - sd = fl->sdesc; - fl->cidx = 0; - } - } + } else { - m0 = find_buf_mbuf(sd->cl); - if (m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR | M_NOFREE)) - goto done; - bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); - m0->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx)); - m_extaddref(m0, sd->cl, roundup2(m0->m_len, fl_pad), - find_buf_refcnt(sd->cl), rxb_free, FL_BUF_ZONE(sc, sd->tag_idx), - sd->cl); - m0->m_pkthdr.len = len; - - fl->rx_offset = roundup2(m0->m_len, fl_pad); - fl->rx_offset = roundup2(fl->rx_offset, pack_boundary); - if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) { - fl->rx_offset = 0; - nbuf++; - sd++; - if (__predict_false(++fl->cidx == fl->cap)) { - sd = fl->sdesc; - fl->cidx = 0; + /* + * Grab an mbuf from zone_mbuf and associate it with the + * payload in the cluster. + */ + + m = flags & M_PKTHDR ? + m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); + if (m == NULL) + return (NULL); + fl->mbuf_allocated++; + if (clm != NULL) + m_extaddref(m, payload, padded_len, &clm->refcount, + rxb_free, swz->zone, sd->cl); + else { + m_cljset(m, sd->cl, swz->type); + sd->cl = NULL; /* consumed, not a recycle candidate */ } } + if (flags & M_PKTHDR) + m->m_pkthdr.len = total; + m->m_len = len; - m = m0; - len -= m->m_len; + if (fl->flags & FL_BUF_PACKING) { + fl->rx_offset += roundup2(padded_len, sc->sge.pack_boundary); + MPASS(fl->rx_offset <= hwb->size); + if (fl->rx_offset < hwb->size) + return (m); /* without advancing the cidx */ + } - while (len > 0) { - m->m_next = find_buf_mbuf(sd->cl); - m = m->m_next; - - bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, - BUS_DMASYNC_POSTREAD); - - /* m_init for !M_PKTHDR can't fail so don't bother */ - m_init(m, NULL, 0, M_NOWAIT, MT_DATA, M_NOFREE); - m->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx)); - m_extaddref(m, sd->cl, roundup2(m->m_len, fl_pad), - find_buf_refcnt(sd->cl), rxb_free, - FL_BUF_ZONE(sc, sd->tag_idx), sd->cl); - - fl->rx_offset = roundup2(m->m_len, fl_pad); - fl->rx_offset = roundup2(fl->rx_offset, pack_boundary); - if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) { - fl->rx_offset = 0; - nbuf++; - sd++; - if (__predict_false(++fl->cidx == fl->cap)) { - sd = fl->sdesc; - fl->cidx = 0; - } - } + if (__predict_false(++fl->cidx == fl->cap)) + fl->cidx = 0; + fl->rx_offset = 0; - len -= m->m_len; - } -done: - (*fl_bufs_used) += nbuf; - return (m0); + return (m); } static struct mbuf * -get_fl_payload2(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, +get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, int *fl_bufs_used) { - struct mbuf *m0, *m; - struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; - unsigned int nbuf, len; + struct mbuf *m0, *m, **pnext; + u_int nbuf, len; /* * No assertion for the fl lock because we don't need it. This routine @@ -1578,87 +1562,54 @@ get_fl_payload2(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf, * lock but this routine does not). */ - KASSERT((fl->flags & FL_BUF_PACKING) == 0, - ("%s: buffer packing enabled for fl %p", __func__, fl)); - if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0)) - panic("%s: cannot handle packed frames", __func__); + nbuf = 0; len = G_RSPD_LEN(len_newbuf); - - /* - * We never want to run out of mbufs in between a frame when a frame - * spans multiple fl buffers. If the fl's mbuf stash isn't full and - * can't be filled up to the brim then fail early. - */ - if (len > FL_BUF_SIZE(sc, sd->tag_idx) && fill_mbuf_stash(fl) != 0) - return (NULL); - - m0 = get_mbuf_from_stash(fl); - if (m0 == NULL || - m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR) != 0) { - return_mbuf_to_stash(fl, m0); - return (NULL); + if (__predict_false(fl->m0 != NULL)) { + MPASS(len == fl->m0->m_pkthdr.len); + MPASS(fl->remaining < len); + + m0 = fl->m0; + pnext = fl->pnext; + len = fl->remaining; + fl->m0 = NULL; + goto get_segment; } - bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD); - - if (sc->sc_do_rxcopy && (len < RX_COPY_THRESHOLD)) { -#ifdef T4_PKT_TIMESTAMP - /* Leave room for a timestamp */ - m0->m_data += 8; -#endif - /* copy data to mbuf, buffer will be recycled */ - bcopy(sd->cl, mtod(m0, caddr_t), len); - m0->m_len = len; - } else { - bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); - m_cljset(m0, sd->cl, FL_BUF_TYPE(sc, sd->tag_idx)); - sd->cl = NULL; /* consumed */ - m0->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx)); - } - m0->m_pkthdr.len = len; - - sd++; - if (__predict_false(++fl->cidx == fl->cap)) { - sd = fl->sdesc; - fl->cidx = 0; + if (fl->rx_offset > 0 && len_newbuf & F_RSPD_NEWBUF) { + nbuf++; + fl->rx_offset = 0; + if (__predict_false(++fl->cidx == fl->cap)) + fl->cidx = 0; } - m = m0; - len -= m->m_len; - nbuf = 1; /* # of fl buffers used */ + /* + * Payload starts at rx_offset in the current hw buffer. Its length is + * 'len' and it may span multiple hw buffers. + */ + m0 = get_scatter_segment(sc, fl, len, M_PKTHDR); + len -= m0->m_len; + pnext = &m0->m_next; while (len > 0) { - /* Can't fail, we checked earlier that the stash was full. */ - m->m_next = get_mbuf_from_stash(fl); - m = m->m_next; - - bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, - BUS_DMASYNC_POSTREAD); - - /* m_init for !M_PKTHDR can't fail so don't bother */ - m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); - if (len <= MLEN) { - bcopy(sd->cl, mtod(m, caddr_t), len); - m->m_len = len; - } else { - bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); - m_cljset(m, sd->cl, FL_BUF_TYPE(sc, sd->tag_idx)); - sd->cl = NULL; /* consumed */ - m->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx)); - } - - sd++; - if (__predict_false(++fl->cidx == fl->cap)) { - sd = fl->sdesc; - fl->cidx = 0; + nbuf++; +get_segment: + MPASS(fl->rx_offset == 0); + m = get_scatter_segment(sc, fl, len, 0); + if (m == NULL) { + fl->m0 = m0; + fl->pnext = pnext; + fl->remaining = len; + return (NULL); } - + *pnext = m; + pnext = &m->m_next; len -= m->m_len; - nbuf++; } + *pnext = NULL; + if (fl->rx_offset == 0) + nbuf++; (*fl_bufs_used) += nbuf; - return (m0); } @@ -2015,23 +1966,23 @@ t4_update_fl_bufsize(struct ifnet *ifp) struct sge_ofld_rxq *ofld_rxq; #endif struct sge_fl *fl; - int i, bufsize; + int i, maxp, mtu = ifp->if_mtu; - bufsize = mtu_to_bufsize(ifp->if_mtu); + maxp = mtu_to_max_payload(sc, mtu, 0); for_each_rxq(pi, i, rxq) { fl = &rxq->fl; FL_LOCK(fl); - set_fl_tag_idx(sc, fl, bufsize); + find_best_refill_source(sc, fl, maxp); FL_UNLOCK(fl); } #ifdef TCP_OFFLOAD - bufsize = mtu_to_bufsize_toe(pi->adapter, ifp->if_mtu); + maxp = mtu_to_max_payload(sc, mtu, 1); for_each_ofld_rxq(pi, i, ofld_rxq) { fl = &ofld_rxq->fl; FL_LOCK(fl); - set_fl_tag_idx(sc, fl, bufsize); + find_best_refill_source(sc, fl, maxp); FL_UNLOCK(fl); } #endif @@ -2065,7 +2016,7 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, } static inline void -init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int bufsize, int pack, +init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, int pack, char *name) { @@ -2073,7 +2024,8 @@ init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int bufsize, int pack, strlcpy(fl->lockname, name, sizeof(fl->lockname)); if (pack) fl->flags |= FL_BUF_PACKING; - set_fl_tag_idx(sc, fl, bufsize); + find_best_refill_source(sc, fl, maxp); + find_safe_refill_source(sc, fl); } static inline void @@ -2202,24 +2154,6 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, if (fl) { mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF); - for (i = 0; i < FL_BUF_SIZES(sc); i++) { - - /* - * A freelist buffer must be 16 byte aligned as the SGE - * uses the low 4 bits of the bus addr to figure out the - * buffer size. - */ - rc = bus_dma_tag_create(sc->dmat, 16, 0, - BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, - FL_BUF_SIZE(sc, i), 1, FL_BUF_SIZE(sc, i), - BUS_DMA_ALLOCNOW, NULL, NULL, &fl->tag[i]); - if (rc != 0) { - device_printf(sc->dev, - "failed to create fl DMA tag[%d]: %d\n", - i, rc); - return (rc); - } - } len = fl->qsize * RX_FL_ESIZE; rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map, &fl->ba, (void **)&fl->desc); @@ -2336,7 +2270,7 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, static int free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) { - int i, rc; + int rc; struct adapter *sc = iq->adapter; device_t dev; @@ -2368,29 +2302,48 @@ free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl) if (fl->sdesc) free_fl_sdesc(sc, fl); - for (i = 0; i < nitems(fl->mstash); i++) { - struct mbuf *m = fl->mstash[i]; - - if (m != NULL) { - m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0); - m_free(m); - } - } - if (mtx_initialized(&fl->fl_lock)) mtx_destroy(&fl->fl_lock); - for (i = 0; i < FL_BUF_SIZES(sc); i++) { - if (fl->tag[i]) - bus_dma_tag_destroy(fl->tag[i]); - } - bzero(fl, sizeof(*fl)); } return (0); } +static void +add_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, + struct sge_fl *fl) +{ + struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); + + oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fl", CTLFLAG_RD, NULL, + "freelist"); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", + CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", + "SGE context id of the freelist"); + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, + 0, "consumer index"); + if (fl->flags & FL_BUF_PACKING) { + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_offset", + CTLFLAG_RD, &fl->rx_offset, 0, "packing rx offset"); + } + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, &fl->pidx, + 0, "producer index"); + SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_allocated", + CTLFLAG_RD, &fl->mbuf_allocated, "# of mbuf allocated"); + SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "mbuf_inlined", + CTLFLAG_RD, &fl->mbuf_inlined, "# of mbuf inlined in clusters"); + SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_allocated", + CTLFLAG_RD, &fl->cl_allocated, "# of clusters allocated"); + SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_recycled", + CTLFLAG_RD, &fl->cl_recycled, "# of clusters recycled"); + SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "cluster_fast_recycled", + CTLFLAG_RD, &fl->cl_fast_recycled, "# of clusters recycled (fast)"); +} + static int alloc_fwq(struct adapter *sc) { @@ -2531,22 +2484,7 @@ alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx, CTLFLAG_RD, &rxq->vlan_extraction, "# of times hardware extracted 802.1Q tag"); - children = SYSCTL_CHILDREN(oid); - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, - NULL, "freelist"); - children = SYSCTL_CHILDREN(oid); - - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", - CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I", - "SGE context id of the queue"); - SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, - &rxq->fl.cidx, 0, "consumer index"); - if (rxq->fl.flags & FL_BUF_PACKING) { - SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "rx_offset", - CTLFLAG_RD, &rxq->fl.rx_offset, 0, "packing rx offset"); - } - SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, - &rxq->fl.pidx, 0, "producer index"); + add_fl_sysctls(&pi->ctx, oid, &rxq->fl); return (rc); } @@ -2601,18 +2539,7 @@ alloc_ofld_rxq(struct port_info *pi, struct sge_ofld_rxq *ofld_rxq, CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->iq.cidx, 0, sysctl_uint16, "I", "consumer index"); - children = SYSCTL_CHILDREN(oid); - oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD, - NULL, "freelist"); - children = SYSCTL_CHILDREN(oid); - - SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id", - CTLTYPE_INT | CTLFLAG_RD, &ofld_rxq->fl.cntxt_id, 0, sysctl_uint16, - "I", "SGE context id of the queue"); - SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, - &ofld_rxq->fl.cidx, 0, "consumer index"); - SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD, - &ofld_rxq->fl.pidx, 0, "producer index"); + add_fl_sysctls(&pi->ctx, oid, &ofld_rxq->fl); return (rc); } @@ -3100,103 +3027,83 @@ refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs) { __be64 *d = &fl->desc[fl->pidx]; struct fl_sdesc *sd = &fl->sdesc[fl->pidx]; - bus_dma_tag_t tag; - bus_addr_t pa; + uintptr_t pa; caddr_t cl; - int rc; + struct cluster_layout *cll = &fl->cll_def; /* default layout */ + struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; + struct cluster_metadata *clm; FL_LOCK_ASSERT_OWNED(fl); -#ifdef INVARIANTS - if (fl->flags & FL_BUF_PACKING) - KASSERT(sd->tag_idx == 0, - ("%s: expected tag 0 but found tag %d at pidx %u instead", - __func__, sd->tag_idx, fl->pidx)); -#endif if (nbufs > fl->needed) nbufs = fl->needed; + nbufs -= (fl->pidx + nbufs) % 8; while (nbufs--) { if (sd->cl != NULL) { - KASSERT(*d == sd->ba_hwtag, - ("%s: recyling problem at pidx %d", - __func__, fl->pidx)); - - if (fl->flags & FL_BUF_PACKING) { - u_int *refcount = find_buf_refcnt(sd->cl); - - if (atomic_fetchadd_int(refcount, -1) == 1) { - *refcount = 1; /* reinstate */ - d++; - goto recycled; - } - sd->cl = NULL; /* gave up my reference */ - } else { + if (sd->nmbuf == 0) { /* - * This happens when a frame small enough to fit - * entirely in an mbuf was received in cl last - * time. We'd held on to cl and can reuse it - * now. Note that we reuse a cluster of the old - * size if fl->tag_idx is no longer the same as - * sd->tag_idx. + * Fast recycle without involving any atomics on + * the cluster's metadata (if the cluster has + * metadata). This happens when all frames + * received in the cluster were small enough to + * fit within a single mbuf each. */ - d++; - goto recycled; + fl->cl_fast_recycled++; + goto recycled_fast; } - } - - if (__predict_false(fl->tag_idx != sd->tag_idx)) { - bus_dmamap_t map; - bus_dma_tag_t newtag = fl->tag[fl->tag_idx]; - bus_dma_tag_t oldtag = fl->tag[sd->tag_idx]; /* - * An MTU change can get us here. Discard the old map - * which was created with the old tag, but only if - * we're able to get a new one. + * Cluster is guaranteed to have metadata. Clusters + * without metadata always take the fast recycle path + * when they're recycled. */ - rc = bus_dmamap_create(newtag, 0, &map); - if (rc == 0) { - bus_dmamap_destroy(oldtag, sd->map); - sd->map = map; - sd->tag_idx = fl->tag_idx; - } - } - - tag = fl->tag[sd->tag_idx]; + clm = cl_metadata(sc, fl, &sd->cll, sd->cl); + MPASS(clm != NULL); - cl = uma_zalloc(FL_BUF_ZONE(sc, sd->tag_idx), M_NOWAIT); - if (cl == NULL) - break; - if (fl->flags & FL_BUF_PACKING) { - *find_buf_refcnt(cl) = 1; - cl += MSIZE; + if (atomic_fetchadd_int(&clm->refcount, -1) == 1) { + fl->cl_recycled++; + goto recycled; + } + sd->cl = NULL; /* gave up my reference */ } + MPASS(sd->cl == NULL); +alloc: + cl = uma_zalloc(swz->zone, M_NOWAIT); + if (__predict_false(cl == NULL)) { + if (cll == &fl->cll_alt || fl->cll_alt.zidx == -1 || + fl->cll_def.zidx == fl->cll_alt.zidx) + break; - rc = bus_dmamap_load(tag, sd->map, cl, - FL_BUF_SIZE(sc, sd->tag_idx), oneseg_dma_callback, &pa, 0); - if (rc != 0 || pa == 0) { - fl->dmamap_failed++; - if (fl->flags & FL_BUF_PACKING) - cl -= MSIZE; - uma_zfree(FL_BUF_ZONE(sc, sd->tag_idx), cl); - break; + /* fall back to the safe zone */ + cll = &fl->cll_alt; + swz = &sc->sge.sw_zone_info[cll->zidx]; + goto alloc; } + fl->cl_allocated++; + pa = pmap_kextract((vm_offset_t)cl); + pa += cll->region1; sd->cl = cl; - *d++ = htobe64(pa | FL_BUF_HWTAG(sc, sd->tag_idx)); - + sd->cll = *cll; + *d = htobe64(pa | cll->hwidx); + clm = cl_metadata(sc, fl, cll, cl); + if (clm != NULL) { +recycled: #ifdef INVARIANTS - sd->ba_hwtag = htobe64(pa | FL_BUF_HWTAG(sc, sd->tag_idx)); + clm->sd = sd; #endif - -recycled: + clm->refcount = 1; + } + sd->nmbuf = 0; +recycled_fast: fl->pending++; fl->needed--; + d++; sd++; - if (++fl->pidx == fl->cap) { + if (__predict_false(++fl->pidx == fl->cap)) { fl->pidx = 0; sd = fl->sdesc; d = fl->desc; @@ -3237,53 +3144,33 @@ refill_sfl(void *arg) static int alloc_fl_sdesc(struct sge_fl *fl) { - struct fl_sdesc *sd; - bus_dma_tag_t tag; - int i, rc; fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE, M_ZERO | M_WAITOK); - tag = fl->tag[fl->tag_idx]; - sd = fl->sdesc; - for (i = 0; i < fl->cap; i++, sd++) { - - sd->tag_idx = fl->tag_idx; - rc = bus_dmamap_create(tag, 0, &sd->map); - if (rc != 0) - goto failed; - } - return (0); -failed: - while (--i >= 0) { - sd--; - bus_dmamap_destroy(tag, sd->map); - } - KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__)); - - free(fl->sdesc, M_CXGBE); - fl->sdesc = NULL; - - return (rc); } static void free_fl_sdesc(struct adapter *sc, struct sge_fl *fl) { struct fl_sdesc *sd; + struct cluster_metadata *clm; + struct cluster_layout *cll; int i; sd = fl->sdesc; for (i = 0; i < fl->cap; i++, sd++) { + if (sd->cl == NULL) + continue; - if (sd->cl) { - bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map); - uma_zfree(FL_BUF_ZONE(sc, sd->tag_idx), sd->cl); - sd->cl = NULL; + cll = &sd->cll; + clm = cl_metadata(sc, fl, cll, sd->cl); + if (sd->nmbuf == 0 || + (clm && atomic_fetchadd_int(&clm->refcount, -1) == 1)) { + uma_zfree(sc->sge.sw_zone_info[cll->zidx].zone, sd->cl); } - - bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map); + sd->cl = NULL; } free(fl->sdesc, M_CXGBE); @@ -4107,7 +3994,7 @@ write_eqflush_wr(struct sge_eq *eq) eq->pending++; eq->avail--; if (++eq->pidx == eq->cap) - eq->pidx = 0; + eq->pidx = 0; } static __be64 @@ -4134,52 +4021,167 @@ get_flit(bus_dma_segment_t *sgl, int nsegs, int idx) return (0); } -/* - * Find an SGE FL buffer size to use for the given bufsize. Look for the the - * smallest size that is large enough to hold bufsize or pick the largest size - * if all sizes are less than bufsize. - */ static void -set_fl_tag_idx(struct adapter *sc, struct sge_fl *fl, int bufsize) +find_best_refill_source(struct adapter *sc, struct sge_fl *fl, int maxp) { - int i, largest, best, delta, start; + int8_t zidx, hwidx, idx; + uint16_t region1, region3; + int spare, spare_needed, n; + struct sw_zone_info *swz; + struct hw_buf_info *hwb, *hwb_list = &sc->sge.hw_buf_info[0]; - if (fl->flags & FL_BUF_PACKING) { - fl->tag_idx = 0; /* first tag is the one for packing */ - return; - } + /* + * Buffer Packing: Look for PAGE_SIZE or larger zone which has a bufsize + * large enough for the max payload and cluster metadata. Otherwise + * settle for the largest bufsize that leaves enough room in the cluster + * for metadata. + * + * Without buffer packing: Look for the smallest zone which has a + * bufsize large enough for the max payload. Settle for the largest + * bufsize available if there's nothing big enough for max payload. + */ + spare_needed = fl->flags & FL_BUF_PACKING ? CL_METADATA_SIZE : 0; + swz = &sc->sge.sw_zone_info[0]; + hwidx = -1; + for (zidx = 0; zidx < SW_ZONE_SIZES; zidx++, swz++) { + if (swz->size > largest_rx_cluster) { + if (__predict_true(hwidx != -1)) + break; - start = sc->flags & BUF_PACKING_OK ? 1 : 0; - delta = FL_BUF_SIZE(sc, start) - bufsize; - if (delta == 0) { - fl->tag_idx = start; /* ideal fit, look no further */ - return; - } - best = start; - largest = start; + /* + * This is a misconfiguration. largest_rx_cluster is + * preventing us from finding a refill source. See + * dev.t5nex.<n>.buffer_sizes to figure out why. + */ + device_printf(sc->dev, "largest_rx_cluster=%u leaves no" + " refill source for fl %p (dma %u). Ignored.\n", + largest_rx_cluster, fl, maxp); + } + for (idx = swz->head_hwidx; idx != -1; idx = hwb->next) { + hwb = &hwb_list[idx]; + spare = swz->size - hwb->size; + if (spare < spare_needed) + continue; - for (i = start + 1; i < FL_BUF_SIZES(sc); i++) { - int d, fl_buf_size; + hwidx = idx; /* best option so far */ + if (hwb->size >= maxp) { - fl_buf_size = FL_BUF_SIZE(sc, i); - d = fl_buf_size - bufsize; + if ((fl->flags & FL_BUF_PACKING) == 0) + goto done; /* stop looking (not packing) */ - if (d == 0) { - fl->tag_idx = i; /* ideal fit, look no further */ - return; + if (swz->size >= safest_rx_cluster) + goto done; /* stop looking (packing) */ + } + break; /* keep looking, next zone */ } - if (fl_buf_size > FL_BUF_SIZE(sc, largest)) - largest = i; - if (d > 0 && (delta < 0 || delta > d)) { - delta = d; - best = i; + } +done: + /* A usable hwidx has been located. */ + MPASS(hwidx != -1); + hwb = &hwb_list[hwidx]; + zidx = hwb->zidx; + swz = &sc->sge.sw_zone_info[zidx]; + region1 = 0; + region3 = swz->size - hwb->size; + + /* + * Stay within this zone and see if there is a better match when mbuf + * inlining is allowed. Remember that the hwidx's are sorted in + * decreasing order of size (so in increasing order of spare area). + */ + for (idx = hwidx; idx != -1; idx = hwb->next) { + hwb = &hwb_list[idx]; + spare = swz->size - hwb->size; + + if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) + break; + if (spare < CL_METADATA_SIZE + MSIZE) + continue; + n = (spare - CL_METADATA_SIZE) / MSIZE; + if (n > howmany(hwb->size, maxp)) + break; + + hwidx = idx; + if (fl->flags & FL_BUF_PACKING) { + region1 = n * MSIZE; + region3 = spare - region1; + } else { + region1 = MSIZE; + region3 = spare - region1; + break; } } - if (delta > 0) - fl->tag_idx = best; /* Found a buf bigger than bufsize */ + KASSERT(zidx >= 0 && zidx < SW_ZONE_SIZES, + ("%s: bad zone %d for fl %p, maxp %d", __func__, zidx, fl, maxp)); + KASSERT(hwidx >= 0 && hwidx <= SGE_FLBUF_SIZES, + ("%s: bad hwidx %d for fl %p, maxp %d", __func__, hwidx, fl, maxp)); + KASSERT(region1 + sc->sge.hw_buf_info[hwidx].size + region3 == + sc->sge.sw_zone_info[zidx].size, + ("%s: bad buffer layout for fl %p, maxp %d. " + "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, + sc->sge.sw_zone_info[zidx].size, region1, + sc->sge.hw_buf_info[hwidx].size, region3)); + if (fl->flags & FL_BUF_PACKING || region1 > 0) { + KASSERT(region3 >= CL_METADATA_SIZE, + ("%s: no room for metadata. fl %p, maxp %d; " + "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, + sc->sge.sw_zone_info[zidx].size, region1, + sc->sge.hw_buf_info[hwidx].size, region3)); + KASSERT(region1 % MSIZE == 0, + ("%s: bad mbuf region for fl %p, maxp %d. " + "cl %d; r1 %d, payload %d, r3 %d", __func__, fl, maxp, + sc->sge.sw_zone_info[zidx].size, region1, + sc->sge.hw_buf_info[hwidx].size, region3)); + } + + fl->cll_def.zidx = zidx; + fl->cll_def.hwidx = hwidx; + fl->cll_def.region1 = region1; + fl->cll_def.region3 = region3; +} + +static void +find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) +{ + struct sge *s = &sc->sge; + struct hw_buf_info *hwb; + struct sw_zone_info *swz; + int spare; + int8_t hwidx; + + if (fl->flags & FL_BUF_PACKING) + hwidx = s->safe_hwidx2; /* with room for metadata */ + else if (allow_mbufs_in_cluster && s->safe_hwidx2 != -1) { + hwidx = s->safe_hwidx2; + hwb = &s->hw_buf_info[hwidx]; + swz = &s->sw_zone_info[hwb->zidx]; + spare = swz->size - hwb->size; + + /* no good if there isn't room for an mbuf as well */ + if (spare < CL_METADATA_SIZE + MSIZE) + hwidx = s->safe_hwidx1; + } else + hwidx = s->safe_hwidx1; + + if (hwidx == -1) { + /* No fallback source */ + fl->cll_alt.hwidx = -1; + fl->cll_alt.zidx = -1; + + return; + } + + hwb = &s->hw_buf_info[hwidx]; + swz = &s->sw_zone_info[hwb->zidx]; + spare = swz->size - hwb->size; + fl->cll_alt.hwidx = hwidx; + fl->cll_alt.zidx = hwb->zidx; + if (allow_mbufs_in_cluster) + fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; else - fl->tag_idx = largest; /* No buf large enough for bufsize */ + fl->cll_alt.region1 = 0; + fl->cll_alt.region3 = spare - fl->cll_alt.region1; } static void @@ -4256,3 +4258,29 @@ sysctl_uint16(SYSCTL_HANDLER_ARGS) return sysctl_handle_int(oidp, &i, 0, req); } + +static int +sysctl_bufsizes(SYSCTL_HANDLER_ARGS) +{ + struct sge *s = arg1; + struct hw_buf_info *hwb = &s->hw_buf_info[0]; + struct sw_zone_info *swz = &s->sw_zone_info[0]; + int i, rc; + struct sbuf sb; + char c; + + sbuf_new(&sb, NULL, 32, SBUF_AUTOEXTEND); + for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) { + if (hwb->zidx >= 0 && swz[hwb->zidx].size <= largest_rx_cluster) + c = '*'; + else + c = '\0'; + + sbuf_printf(&sb, "%u%c ", hwb->size, c); + } + sbuf_trim(&sb); + sbuf_finish(&sb); + rc = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); + sbuf_delete(&sb); + return (rc); +} |