diff options
-rw-r--r-- | sys/dev/cxgbe/adapter.h | 1 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_sge.c | 321 |
2 files changed, 176 insertions, 146 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index 1a2e835..01a17aa9 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -687,6 +687,7 @@ struct sge { struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */ struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */ + int pad_boundary; int pack_boundary; int8_t safe_hwidx1; /* may not have room for metadata */ int8_t safe_hwidx2; /* with room for metadata and maybe more */ diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 3df1492..e806793 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -119,19 +119,10 @@ TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing); /* * Start next frame in a packed buffer at this boundary. * -1: driver should figure out a good value. - * T4: - * --- - * if fl_pad != 0 - * value specified here will be overridden by fl_pad. - * else - * power of 2 from 32 to 4096 (both inclusive) is a valid value here. - * T5: - * --- - * 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. + * T4: driver will ignore this and use the same value as fl_pad above. + * T5: 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value. */ static int fl_pack = -1; -static int t4_fl_pack; -static int t5_fl_pack; TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack); /* @@ -174,8 +165,7 @@ static int service_iq(struct sge_iq *, int); static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t); static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *); static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int); -static inline void init_fl(struct adapter *, struct sge_fl *, int, int, int, - char *); +static inline void init_fl(struct adapter *, struct sge_fl *, int, int, char *); static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t, char *); static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *, @@ -263,15 +253,6 @@ static counter_u64_t extfree_rels; void t4_sge_modload(void) { - int pad; - - /* set pad to a reasonable powerof2 between 16 and 4096 (inclusive) */ -#if defined(__i386__) || defined(__amd64__) - pad = max(cpu_clflush_line_size, 16); -#else - pad = max(CACHE_LINE_SIZE, 16); -#endif - pad = min(pad, 4096); if (fl_pktshift < 0 || fl_pktshift > 7) { printf("Invalid hw.cxgbe.fl_pktshift value (%d)," @@ -279,35 +260,6 @@ t4_sge_modload(void) fl_pktshift = 2; } - if (fl_pad != 0 && - (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad))) { - - if (fl_pad != -1) { - printf("Invalid hw.cxgbe.fl_pad value (%d)," - " using %d instead.\n", fl_pad, max(pad, 32)); - } - fl_pad = max(pad, 32); - } - - /* - * T4 has the same pad and pack boundary. If a pad boundary is set, - * pack boundary must be set to the same value. Otherwise take the - * specified value or auto-calculate something reasonable. - */ - if (fl_pad) - t4_fl_pack = fl_pad; - else if (fl_pack < 32 || fl_pack > 4096 || !powerof2(fl_pack)) - t4_fl_pack = max(pad, 32); - else - t4_fl_pack = fl_pack; - - /* T5's pack boundary is independent of the pad boundary. */ - if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || - !powerof2(fl_pack)) - t5_fl_pack = max(pad, CACHE_LINE_SIZE); - else - t5_fl_pack = fl_pack; - if (spg_len != 64 && spg_len != 128) { int len; @@ -365,6 +317,71 @@ t4_init_sge_cpl_handlers(struct adapter *sc) t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); } +static inline void +setup_pad_and_pack_boundaries(struct adapter *sc) +{ + uint32_t v, m; + int pad, pack; + + pad = fl_pad; + if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) { + /* + * If there is any chance that we might use buffer packing and + * the chip is a T4, then pick 64 as the pad/pack boundary. Set + * it to 32 in all other cases. + */ + pad = is_t4(sc) && buffer_packing ? 64 : 32; + + /* + * For fl_pad = 0 we'll still write a reasonable value to the + * register but all the freelists will opt out of padding. + * We'll complain here only if the user tried to set it to a + * value greater than 0 that was invalid. + */ + if (fl_pad > 0) { + device_printf(sc->dev, "Invalid hw.cxgbe.fl_pad value" + " (%d), using %d instead.\n", fl_pad, pad); + } + } + m = V_INGPADBOUNDARY(M_INGPADBOUNDARY); + v = V_INGPADBOUNDARY(ilog2(pad) - 5); + t4_set_reg_field(sc, A_SGE_CONTROL, m, v); + + if (is_t4(sc)) { + if (fl_pack != -1 && fl_pack != pad) { + /* Complain but carry on. */ + device_printf(sc->dev, "hw.cxgbe.fl_pack (%d) ignored," + " using %d instead.\n", fl_pack, pad); + } + return; + } + + pack = fl_pack; + if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 || + !powerof2(fl_pack)) { + pack = max(sc->params.pci.mps, CACHE_LINE_SIZE); + MPASS(powerof2(pack)); + if (pack < 16) + pack = 16; + if (pack == 32) + pack = 64; + if (pack > 4096) + pack = 4096; + if (fl_pack != -1) { + device_printf(sc->dev, "Invalid hw.cxgbe.fl_pack value" + " (%d), using %d instead.\n", fl_pack, pack); + } + } + m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); + if (pack == 16) + v = V_INGPACKBOUNDARY(0); + else + v = V_INGPACKBOUNDARY(ilog2(pack) - 5); + + MPASS(!is_t4(sc)); /* T4 doesn't have SGE_CONTROL2 */ + t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); +} + /* * adap->params.vpd.cclk must be set up before this is called. */ @@ -397,24 +414,9 @@ t4_tweak_chip_settings(struct adapter *sc) m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | V_EGRSTATUSPAGESIZE(spg_len == 128); - if (is_t4(sc) && (fl_pad || buffer_packing)) { - /* t4_fl_pack has the correct value even when fl_pad = 0 */ - m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); - v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5); - } else if (is_t5(sc) && fl_pad) { - m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); - v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5); - } t4_set_reg_field(sc, A_SGE_CONTROL, m, v); - if (is_t5(sc) && buffer_packing) { - m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); - if (t5_fl_pack == 16) - v = V_INGPACKBOUNDARY(0); - else - v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5); - t4_set_reg_field(sc, A_SGE_CONTROL2, m, v); - } + setup_pad_and_pack_boundaries(sc); v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | @@ -485,13 +487,16 @@ t4_tweak_chip_settings(struct adapter *sc) } /* - * SGE wants the buffer to be at least 64B and then a multiple of the pad - * boundary or 16, whichever is greater. + * SGE wants the buffer to be at least 64B and then a multiple of 16. If + * padding is is use the buffer's start and end need to be aligned to the pad + * boundary as well. We'll just make sure that the size is a multiple of the + * boundary here, it is up to the buffer allocation code to make sure the start + * of the buffer is aligned as well. */ static inline int -hwsz_ok(int hwsz) +hwsz_ok(struct adapter *sc, int hwsz) { - int mask = max(fl_pad, 16) - 1; + int mask = fl_pad ? sc->sge.pad_boundary - 1 : 16 - 1; return (hwsz >= 64 && (hwsz & mask) == 0); } @@ -520,33 +525,22 @@ t4_read_chip_settings(struct adapter *sc) m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE; v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE | V_EGRSTATUSPAGESIZE(spg_len == 128); - if (is_t4(sc) && (fl_pad || buffer_packing)) { - m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); - v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5); - } else if (is_t5(sc) && fl_pad) { - m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY); - v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5); - } r = t4_read_reg(sc, A_SGE_CONTROL); if ((r & m) != v) { device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r); rc = EINVAL; } + s->pad_boundary = 1 << (G_INGPADBOUNDARY(r) + 5); - if (is_t5(sc) && buffer_packing) { - m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY); - if (t5_fl_pack == 16) - v = V_INGPACKBOUNDARY(0); - else - v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5); + if (is_t4(sc)) + s->pack_boundary = s->pad_boundary; + else { r = t4_read_reg(sc, A_SGE_CONTROL2); - if ((r & m) != v) { - device_printf(sc->dev, - "invalid SGE_CONTROL2(0x%x)\n", r); - rc = EINVAL; - } + if (G_INGPACKBOUNDARY(r) == 0) + s->pack_boundary = 16; + else + s->pack_boundary = 1 << (G_INGPACKBOUNDARY(r) + 5); } - s->pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack; v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) | V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) | @@ -567,13 +561,22 @@ t4_read_chip_settings(struct adapter *sc) for (i = 0; i < nitems(s->hw_buf_info); i++, hwb++) { r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i)); hwb->size = r; - hwb->zidx = hwsz_ok(r) ? -1 : -2; + hwb->zidx = hwsz_ok(sc, r) ? -1 : -2; hwb->next = -1; } /* * Create a sorted list in decreasing order of hw buffer sizes (and so * increasing order of spare area) for each software zone. + * + * If padding is enabled then the start and end of the buffer must align + * to the pad boundary; if packing is enabled then they must align with + * the pack boundary as well. Allocations from the cluster zones are + * aligned to min(size, 4K), so the buffer starts at that alignment and + * ends at hwb->size alignment. If mbuf inlining is allowed the + * starting alignment will be reduced to MSIZE and the driver will + * exercise appropriate caution when deciding on the best buffer layout + * to use. */ n = 0; /* no usable buffer size to begin with */ swz = &s->sw_zone_info[0]; @@ -585,6 +588,12 @@ t4_read_chip_settings(struct adapter *sc) swz->zone = m_getzone(swz->size); swz->type = m_gettype(swz->size); + if (swz->size < PAGE_SIZE) { + MPASS(powerof2(swz->size)); + if (fl_pad && (swz->size % sc->sge.pad_boundary != 0)) + continue; + } + if (swz->size == safest_rx_cluster) safe_swz = swz; @@ -592,6 +601,10 @@ t4_read_chip_settings(struct adapter *sc) for (j = 0; j < SGE_FLBUF_SIZES; j++, hwb++) { if (hwb->zidx != -1 || hwb->size > swz->size) continue; +#ifdef INVARIANTS + if (fl_pad) + MPASS(hwb->size % sc->sge.pad_boundary == 0); +#endif hwb->zidx = i; if (head == -1) head = tail = j; @@ -639,14 +652,15 @@ t4_read_chip_settings(struct adapter *sc) int spare; hwb = &s->hw_buf_info[i]; +#ifdef INVARIANTS + if (fl_pad) + MPASS(hwb->size % sc->sge.pad_boundary == 0); +#endif spare = safe_swz->size - hwb->size; - if (spare < CL_METADATA_SIZE) - continue; - if (s->safe_hwidx2 == -1 || - spare == CL_METADATA_SIZE + MSIZE) + if (spare >= CL_METADATA_SIZE) { s->safe_hwidx2 = i; - if (spare >= CL_METADATA_SIZE + MSIZE) break; + } } } @@ -744,17 +758,6 @@ t4_create_dma_tag(struct adapter *sc) return (rc); } -static inline int -enable_buffer_packing(struct adapter *sc) -{ - - if (sc->flags & BUF_PACKING_OK && - ((is_t5(sc) && buffer_packing) || /* 1 or -1 both ok for T5 */ - (is_t4(sc) && buffer_packing == 1))) - return (1); - return (0); -} - void t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, struct sysctl_oid_list *children) @@ -768,7 +771,7 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, NULL, fl_pktshift, "payload DMA offset in rx buffer (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pad", CTLFLAG_RD, - NULL, fl_pad, "payload pad boundary (bytes)"); + NULL, sc->sge.pad_boundary, "payload pad boundary (bytes)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "spg_len", CTLFLAG_RD, NULL, spg_len, "status page size (bytes)"); @@ -776,10 +779,6 @@ t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx, SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD, NULL, cong_drop, "congestion drop setting"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "buffer_packing", CTLFLAG_RD, - NULL, enable_buffer_packing(sc), - "pack multiple frames in one fl buffer"); - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD, NULL, sc->sge.pack_boundary, "payload pack boundary (bytes)"); } @@ -957,7 +956,6 @@ mtu_to_max_payload(struct adapter *sc, int mtu, const int toe) #ifdef TCP_OFFLOAD } #endif - payload = roundup2(payload, fl_pad); return (payload); } @@ -982,7 +980,7 @@ t4_setup_port_queues(struct port_info *pi) struct ifnet *ifp = pi->ifp; struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev); struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid); - int maxp, pack, mtu = ifp->if_mtu; + int maxp, mtu = ifp->if_mtu; /* Interrupt vector to start from (when using multiple vectors) */ intr_idx = first_vector(pi); @@ -993,7 +991,6 @@ t4_setup_port_queues(struct port_info *pi) * b) allocate queue iff it will take direct interrupts. */ maxp = mtu_to_max_payload(sc, mtu, 0); - pack = enable_buffer_packing(sc); if (pi->flags & INTR_RXQ) { oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD, NULL, "rx queues"); @@ -1004,7 +1001,7 @@ t4_setup_port_queues(struct port_info *pi) snprintf(name, sizeof(name), "%s rxq%d-fl", device_get_nameunit(pi->dev), i); - init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, pack, name); + init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, maxp, name); if (pi->flags & INTR_RXQ) { rxq->iq.flags |= IQ_INTR; @@ -1028,7 +1025,7 @@ t4_setup_port_queues(struct port_info *pi) snprintf(name, sizeof(name), "%s ofld_rxq%d-fl", device_get_nameunit(pi->dev), i); - init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, pack, name); + init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, maxp, name); if (pi->flags & INTR_OFLD_RXQ) { ofld_rxq->iq.flags |= IQ_INTR; @@ -1561,7 +1558,8 @@ rxb_free(struct mbuf *m, void *arg1, void *arg2) * d) m_extaddref (cluster with metadata) zone_mbuf */ static struct mbuf * -get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags) +get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int fr_offset, + int remaining) { struct mbuf *m; struct fl_sdesc *sd = &fl->sdesc[fl->cidx]; @@ -1569,12 +1567,23 @@ get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags) struct sw_zone_info *swz = &sc->sge.sw_zone_info[cll->zidx]; struct hw_buf_info *hwb = &sc->sge.hw_buf_info[cll->hwidx]; struct cluster_metadata *clm = cl_metadata(sc, fl, cll, sd->cl); - int len, padded_len; + int len, blen; caddr_t payload; - len = min(total, hwb->size - fl->rx_offset); - padded_len = roundup2(len, fl->buf_boundary); + blen = hwb->size - fl->rx_offset; /* max possible in this buf */ + len = min(remaining, blen); payload = sd->cl + cll->region1 + fl->rx_offset; + if (fl->flags & FL_BUF_PACKING) { + const u_int l = fr_offset + len; + const u_int pad = roundup2(l, fl->buf_boundary) - l; + + if (fl->rx_offset + len + pad < hwb->size) + blen = len + pad; + MPASS(fl->rx_offset + blen <= hwb->size); + } else { + MPASS(fl->rx_offset == 0); /* not packing */ + } + if (sc->sc_do_rxcopy && len < RX_COPY_THRESHOLD) { @@ -1582,7 +1591,7 @@ get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags) * Copy payload into a freshly allocated mbuf. */ - m = flags & M_PKTHDR ? + m = fr_offset == 0 ? m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); @@ -1604,10 +1613,11 @@ get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags) MPASS(clm != NULL); m = (struct mbuf *)(sd->cl + sd->nmbuf * MSIZE); /* No bzero required */ - if (m_init(m, NULL, 0, M_NOWAIT, MT_DATA, flags | M_NOFREE)) + if (m_init(m, NULL, 0, M_NOWAIT, MT_DATA, + fr_offset == 0 ? M_PKTHDR | M_NOFREE : M_NOFREE)) return (NULL); fl->mbuf_inlined++; - m_extaddref(m, payload, padded_len, &clm->refcount, rxb_free, + m_extaddref(m, payload, blen, &clm->refcount, rxb_free, swz->zone, sd->cl); if (sd->nmbuf++ == 0) counter_u64_add(extfree_refs, 1); @@ -1619,13 +1629,13 @@ get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags) * payload in the cluster. */ - m = flags & M_PKTHDR ? + m = fr_offset == 0 ? m_gethdr(M_NOWAIT, MT_DATA) : m_get(M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); fl->mbuf_allocated++; if (clm != NULL) { - m_extaddref(m, payload, padded_len, &clm->refcount, + m_extaddref(m, payload, blen, &clm->refcount, rxb_free, swz->zone, sd->cl); if (sd->nmbuf++ == 0) counter_u64_add(extfree_refs, 1); @@ -1634,12 +1644,12 @@ get_scatter_segment(struct adapter *sc, struct sge_fl *fl, int total, int flags) sd->cl = NULL; /* consumed, not a recycle candidate */ } } - if (flags & M_PKTHDR) - m->m_pkthdr.len = total; + if (fr_offset == 0) + m->m_pkthdr.len = remaining; m->m_len = len; if (fl->flags & FL_BUF_PACKING) { - fl->rx_offset += padded_len; + fl->rx_offset += blen; MPASS(fl->rx_offset <= hwb->size); if (fl->rx_offset < hwb->size) return (m); /* without advancing the cidx */ @@ -1661,17 +1671,17 @@ static struct mbuf * get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) { struct mbuf *m0, *m, **pnext; - u_int len; + u_int remaining; + const u_int total = G_RSPD_LEN(len_newbuf); - len = G_RSPD_LEN(len_newbuf); if (__predict_false(fl->flags & FL_BUF_RESUME)) { M_ASSERTPKTHDR(fl->m0); - MPASS(len == fl->m0->m_pkthdr.len); - MPASS(fl->remaining < len); + MPASS(fl->m0->m_pkthdr.len == total); + MPASS(fl->remaining < total); m0 = fl->m0; pnext = fl->pnext; - len = fl->remaining; + remaining = fl->remaining; fl->flags &= ~FL_BUF_RESUME; goto get_segment; } @@ -1692,25 +1702,25 @@ get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf) * 'len' and it may span multiple hw buffers. */ - m0 = get_scatter_segment(sc, fl, len, M_PKTHDR); + m0 = get_scatter_segment(sc, fl, 0, total); if (m0 == NULL) return (NULL); - len -= m0->m_len; + remaining = total - m0->m_len; pnext = &m0->m_next; - while (len > 0) { + while (remaining > 0) { get_segment: MPASS(fl->rx_offset == 0); - m = get_scatter_segment(sc, fl, len, 0); + m = get_scatter_segment(sc, fl, total - remaining, remaining); if (__predict_false(m == NULL)) { fl->m0 = m0; fl->pnext = pnext; - fl->remaining = len; + fl->remaining = remaining; fl->flags |= FL_BUF_RESUME; return (NULL); } *pnext = m; pnext = &m->m_next; - len -= m->m_len; + remaining -= m->m_len; } *pnext = NULL; @@ -2122,14 +2132,15 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx, } static inline void -init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, int pack, - char *name) +init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int maxp, char *name) { fl->qsize = qsize; fl->sidx = qsize - spg_len / EQ_ESIZE; strlcpy(fl->lockname, name, sizeof(fl->lockname)); - if (pack) + if (sc->flags & BUF_PACKING_OK && + ((!is_t4(sc) && buffer_packing) || /* T5+: enabled unless 0 */ + (is_t4(sc) && buffer_packing == 1)))/* T4: disabled unless 1 */ fl->flags |= FL_BUF_PACKING; find_best_refill_source(sc, fl, maxp); find_safe_refill_source(sc, fl); @@ -2278,11 +2289,13 @@ alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl, if (fl->flags & FL_BUF_PACKING) { fl->lowat = roundup2(sc->sge.fl_starve_threshold2, 8); - fl->buf_boundary = max(fl_pad, sc->sge.pack_boundary); + fl->buf_boundary = sc->sge.pack_boundary; } else { fl->lowat = roundup2(sc->sge.fl_starve_threshold, 8); - fl->buf_boundary = fl_pad; + fl->buf_boundary = 16; } + if (fl_pad && fl->buf_boundary < sc->sge.pad_boundary) + fl->buf_boundary = sc->sge.pad_boundary; c.iqns_to_fl0congen |= htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) | @@ -2453,6 +2466,10 @@ add_fl_sysctls(struct sysctl_ctx_list *ctx, struct sysctl_oid *oid, SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cntxt_id", CTLTYPE_INT | CTLFLAG_RD, &fl->cntxt_id, 0, sysctl_uint16, "I", "SGE context id of the freelist"); + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "padding", CTLFLAG_RD, NULL, + fl_pad ? 1 : 0, "padding enabled"); + SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "packing", CTLFLAG_RD, NULL, + fl->flags & FL_BUF_PACKING ? 1 : 0, "packing enabled"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cidx", CTLFLAG_RD, &fl->cidx, 0, "consumer index"); if (fl->flags & FL_BUF_PACKING) { @@ -4368,6 +4385,17 @@ done: if (allow_mbufs_in_cluster == 0 || hwb->size < maxp) break; + + /* + * Do not inline mbufs if doing so would violate the pad/pack + * boundary alignment requirement. + */ + if (fl_pad && (MSIZE % sc->sge.pad_boundary) != 0) + continue; + if (fl->flags & FL_BUF_PACKING && + (MSIZE % sc->sge.pack_boundary) != 0) + continue; + if (spare < CL_METADATA_SIZE + MSIZE) continue; n = (spare - CL_METADATA_SIZE) / MSIZE; @@ -4450,7 +4478,8 @@ find_safe_refill_source(struct adapter *sc, struct sge_fl *fl) spare = swz->size - hwb->size; fl->cll_alt.hwidx = hwidx; fl->cll_alt.zidx = hwb->zidx; - if (allow_mbufs_in_cluster) + if (allow_mbufs_in_cluster && + (fl_pad == 0 || (MSIZE % sc->sge.pad_boundary) == 0)) fl->cll_alt.region1 = ((spare - CL_METADATA_SIZE) / MSIZE) * MSIZE; else fl->cll_alt.region1 = 0; |