diff options
Diffstat (limited to 'sys/dev/cxgbe')
-rw-r--r-- | sys/dev/cxgbe/adapter.h | 60 | ||||
-rw-r--r-- | sys/dev/cxgbe/common/jhash.h | 140 | ||||
-rw-r--r-- | sys/dev/cxgbe/common/t4_msg.h | 2 | ||||
-rw-r--r-- | sys/dev/cxgbe/firmware/t4fw_cfg.txt | 8 | ||||
-rw-r--r-- | sys/dev/cxgbe/offload.h | 58 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_l2t.c | 44 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_l2t.h | 6 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_main.c | 664 | ||||
-rw-r--r-- | sys/dev/cxgbe/t4_sge.c | 2 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_connect.c | 127 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_cpl_io.c | 3 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_listen.c | 409 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_tom.c | 240 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_tom.h | 24 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_tom_l2t.c | 124 |
15 files changed, 1248 insertions, 663 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h index c1d8a6b..55cfa52 100644 --- a/sys/dev/cxgbe/adapter.h +++ b/sys/dev/cxgbe/adapter.h @@ -158,6 +158,16 @@ enum { }; enum { + /* flags understood by begin_synchronized_op */ + HOLD_LOCK = (1 << 0), + SLEEP_OK = (1 << 1), + INTR_OK = (1 << 2), + + /* flags understood by end_synchronized_op */ + LOCK_HELD = HOLD_LOCK, +}; + +enum { /* adapter flags */ FULL_INIT_DONE = (1 << 0), FW_OK = (1 << 1), @@ -174,11 +184,11 @@ enum { PORT_SYSCTL_CTX = (1 << 2), }; -#define IS_DOOMED(pi) (pi->flags & DOOMED) -#define SET_DOOMED(pi) do {pi->flags |= DOOMED;} while (0) -#define IS_BUSY(sc) (sc->flags & CXGBE_BUSY) -#define SET_BUSY(sc) do {sc->flags |= CXGBE_BUSY;} while (0) -#define CLR_BUSY(sc) do {sc->flags &= ~CXGBE_BUSY;} while (0) +#define IS_DOOMED(pi) ((pi)->flags & DOOMED) +#define SET_DOOMED(pi) do {(pi)->flags |= DOOMED;} while (0) +#define IS_BUSY(sc) ((sc)->flags & CXGBE_BUSY) +#define SET_BUSY(sc) do {(sc)->flags |= CXGBE_BUSY;} while (0) +#define CLR_BUSY(sc) do {(sc)->flags &= ~CXGBE_BUSY;} while (0) struct port_info { device_t dev; @@ -567,7 +577,8 @@ struct adapter { int flags; char fw_version[32]; - unsigned int cfcsum; + char cfg_file[32]; + u_int cfcsum; struct adapter_params params; struct t4_virt_res vres; @@ -591,6 +602,11 @@ struct adapter { an_handler_t an_handler __aligned(CACHE_LINE_SIZE); fw_msg_handler_t fw_msg_handler[4]; /* NUM_FW6_TYPES */ cpl_handler_t cpl_handler[0xef]; /* NUM_CPL_CMDS */ + +#ifdef INVARIANTS + const char *last_op; + const void *last_op_thr; +#endif }; #define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock) @@ -598,6 +614,12 @@ struct adapter { #define ADAPTER_LOCK_ASSERT_OWNED(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED) #define ADAPTER_LOCK_ASSERT_NOTOWNED(sc) mtx_assert(&(sc)->sc_lock, MA_NOTOWNED) +/* XXX: not bulletproof, but much better than nothing */ +#define ASSERT_SYNCHRONIZED_OP(sc) \ + KASSERT(IS_BUSY(sc) && \ + (mtx_owned(&(sc)->sc_lock) || sc->last_op_thr == curthread), \ + ("%s: operation not synchronized.", __func__)) + #define PORT_LOCK(pi) mtx_lock(&(pi)->pi_lock) #define PORT_UNLOCK(pi) mtx_unlock(&(pi)->pi_lock) #define PORT_LOCK_ASSERT_OWNED(pi) mtx_assert(&(pi)->pi_lock, MA_OWNED) @@ -626,18 +648,18 @@ struct adapter { #define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq) #define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq) -#define for_each_txq(pi, iter, txq) \ - txq = &pi->adapter->sge.txq[pi->first_txq]; \ - for (iter = 0; iter < pi->ntxq; ++iter, ++txq) -#define for_each_rxq(pi, iter, rxq) \ - rxq = &pi->adapter->sge.rxq[pi->first_rxq]; \ - for (iter = 0; iter < pi->nrxq; ++iter, ++rxq) -#define for_each_ofld_txq(pi, iter, ofld_txq) \ - ofld_txq = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq]; \ - for (iter = 0; iter < pi->nofldtxq; ++iter, ++ofld_txq) -#define for_each_ofld_rxq(pi, iter, ofld_rxq) \ - ofld_rxq = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq]; \ - for (iter = 0; iter < pi->nofldrxq; ++iter, ++ofld_rxq) +#define for_each_txq(pi, iter, q) \ + for (q = &pi->adapter->sge.txq[pi->first_txq], iter = 0; \ + iter < pi->ntxq; ++iter, ++q) +#define for_each_rxq(pi, iter, q) \ + for (q = &pi->adapter->sge.rxq[pi->first_rxq], iter = 0; \ + iter < pi->nrxq; ++iter, ++q) +#define for_each_ofld_txq(pi, iter, q) \ + for (q = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq], iter = 0; \ + iter < pi->nofldtxq; ++iter, ++q) +#define for_each_ofld_rxq(pi, iter, q) \ + for (q = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq], iter = 0; \ + iter < pi->nofldrxq; ++iter, ++q) /* One for errors, one for firmware events */ #define T4_EXTRA_INTR 2 @@ -751,6 +773,8 @@ int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t); int t4_register_an_handler(struct adapter *, an_handler_t); int t4_register_fw_msg_handler(struct adapter *, int, fw_msg_handler_t); int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); +int begin_synchronized_op(struct adapter *, struct port_info *, int, char *); +void end_synchronized_op(struct adapter *, int); /* t4_sge.c */ void t4_sge_modload(void); diff --git a/sys/dev/cxgbe/common/jhash.h b/sys/dev/cxgbe/common/jhash.h deleted file mode 100644 index 4546b7b..0000000 --- a/sys/dev/cxgbe/common/jhash.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef _JHASH_H -#define _JHASH_H - -/* jhash.h: Jenkins hash support. - * - * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) - * - * http://burtleburtle.net/bob/hash/ - * - * These are the credits from Bob's sources: - * - * lookup2.c, by Bob Jenkins, December 1996, Public Domain. - * hash(), hash2(), hash3, and mix() are externally useful functions. - * Routines to test the hash are included if SELF_TEST is defined. - * You can use this free for any purpose. It has no warranty. - * - * $FreeBSD$ - */ - -/* NOTE: Arguments are modified. */ -#define __jhash_mix(a, b, c) \ -{ \ - a -= b; a -= c; a ^= (c>>13); \ - b -= c; b -= a; b ^= (a<<8); \ - c -= a; c -= b; c ^= (b>>13); \ - a -= b; a -= c; a ^= (c>>12); \ - b -= c; b -= a; b ^= (a<<16); \ - c -= a; c -= b; c ^= (b>>5); \ - a -= b; a -= c; a ^= (c>>3); \ - b -= c; b -= a; b ^= (a<<10); \ - c -= a; c -= b; c ^= (b>>15); \ -} - -/* The golden ration: an arbitrary value */ -#define JHASH_GOLDEN_RATIO 0x9e3779b9 - -/* The most generic version, hashes an arbitrary sequence - * of bytes. No alignment or length assumptions are made about - * the input key. - */ -static inline u32 jhash(const void *key, u32 length, u32 initval) -{ - u32 a, b, c, len; - const u8 *k = key; - - len = length; - a = b = JHASH_GOLDEN_RATIO; - c = initval; - - while (len >= 12) { - a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24)); - b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24)); - c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24)); - - __jhash_mix(a,b,c); - - k += 12; - len -= 12; - } - - c += length; - switch (len) { - case 11: c += ((u32)k[10]<<24); - case 10: c += ((u32)k[9]<<16); - case 9 : c += ((u32)k[8]<<8); - case 8 : b += ((u32)k[7]<<24); - case 7 : b += ((u32)k[6]<<16); - case 6 : b += ((u32)k[5]<<8); - case 5 : b += k[4]; - case 4 : a += ((u32)k[3]<<24); - case 3 : a += ((u32)k[2]<<16); - case 2 : a += ((u32)k[1]<<8); - case 1 : a += k[0]; - }; - - __jhash_mix(a,b,c); - - return c; -} - -/* A special optimized version that handles 1 or more of u32s. - * The length parameter here is the number of u32s in the key. - */ -static inline u32 jhash2(u32 *k, u32 length, u32 initval) -{ - u32 a, b, c, len; - - a = b = JHASH_GOLDEN_RATIO; - c = initval; - len = length; - - while (len >= 3) { - a += k[0]; - b += k[1]; - c += k[2]; - __jhash_mix(a, b, c); - k += 3; len -= 3; - } - - c += length * 4; - - switch (len) { - case 2 : b += k[1]; - case 1 : a += k[0]; - }; - - __jhash_mix(a,b,c); - - return c; -} - - -/* A special ultra-optimized versions that knows they are hashing exactly - * 3, 2 or 1 word(s). - * - * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally - * done at the end is not done here. - */ -static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) -{ - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += initval; - - __jhash_mix(a, b, c); - - return c; -} - -static inline u32 jhash_2words(u32 a, u32 b, u32 initval) -{ - return jhash_3words(a, b, 0, initval); -} - -static inline u32 jhash_1word(u32 a, u32 initval) -{ - return jhash_3words(a, 0, 0, initval); -} - -#endif /* _JHASH_H */ diff --git a/sys/dev/cxgbe/common/t4_msg.h b/sys/dev/cxgbe/common/t4_msg.h index 5bd3cef..92f760b 100644 --- a/sys/dev/cxgbe/common/t4_msg.h +++ b/sys/dev/cxgbe/common/t4_msg.h @@ -159,6 +159,8 @@ enum CPL_error { CPL_ERR_KEEPALIVE_TIMEDOUT = 34, CPL_ERR_RTX_NEG_ADVICE = 35, CPL_ERR_PERSIST_NEG_ADVICE = 36, + CPL_ERR_KEEPALV_NEG_ADVICE = 37, + CPL_ERR_WAIT_ARP_RPL = 41, CPL_ERR_ABORT_FAILED = 42, CPL_ERR_IWARP_FLM = 50, }; diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg.txt b/sys/dev/cxgbe/firmware/t4fw_cfg.txt index bf6a9a1..2a9db62 100644 --- a/sys/dev/cxgbe/firmware/t4fw_cfg.txt +++ b/sys/dev/cxgbe/firmware/t4fw_cfg.txt @@ -56,7 +56,7 @@ [function "4"] wx_caps = all r_caps = all - nvi = 54 + nvi = 32 niqflint = 256 nethctrl = 128 neq = 256 @@ -74,8 +74,8 @@ # Each entry in these categories takes 4 cells each. nhash will use the # TCAM iff there is room left (that is, the rest don't add up to 2048). nroute = 32 - nclip = 0 # needed only for IPv6 offload - nfilter = 1488 + nclip = 32 + nfilter = 1456 nserver = 512 nhash = 16384 @@ -137,7 +137,7 @@ [fini] version = 0x1 - checksum = 0x162df193 + checksum = 0xfdebb6ef # # $FreeBSD$ # diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h index ced15a6..55ac71b 100644 --- a/sys/dev/cxgbe/offload.h +++ b/sys/dev/cxgbe/offload.h @@ -54,16 +54,20 @@ OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \ } while (0) +TAILQ_HEAD(stid_head, stid_region); +struct listen_ctx; + +struct stid_region { + TAILQ_ENTRY(stid_region) link; + int used; /* # of stids used by this region */ + int free; /* # of contiguous stids free right after this region */ +}; + /* * Max # of ATIDs. The absolute HW max is 16K but we keep it lower. */ #define MAX_ATIDS 8192U -union serv_entry { - void *data; - union serv_entry *next; -}; - union aopen_entry { void *data; union aopen_entry *next; @@ -75,34 +79,33 @@ union aopen_entry { */ struct tid_info { void **tid_tab; - unsigned int ntids; - - union serv_entry *stid_tab; - unsigned int nstids; - unsigned int stid_base; - + u_int ntids; + u_int tids_in_use; + + struct mtx stid_lock __aligned(CACHE_LINE_SIZE); + struct listen_ctx **stid_tab; + u_int nstids; + u_int stid_base; + u_int stids_in_use; + u_int nstids_free_head; /* # of available stids at the begining */ + struct stid_head stids; + + struct mtx atid_lock __aligned(CACHE_LINE_SIZE); union aopen_entry *atid_tab; - unsigned int natids; - - struct filter_entry *ftid_tab; - unsigned int nftids; - unsigned int ftid_base; - unsigned int ftids_in_use; - - struct mtx atid_lock; + u_int natids; union aopen_entry *afree; - unsigned int atids_in_use; - - struct mtx stid_lock; - union serv_entry *sfree; - unsigned int stids_in_use; + u_int atids_in_use; - unsigned int tids_in_use; + struct mtx ftid_lock __aligned(CACHE_LINE_SIZE); + struct filter_entry *ftid_tab; + u_int nftids; + u_int ftid_base; + u_int ftids_in_use; }; struct t4_range { - unsigned int start; - unsigned int size; + u_int start; + u_int size; }; struct t4_virt_res { /* virtualized HW resources */ @@ -114,6 +117,7 @@ struct t4_virt_res { /* virtualized HW resources */ struct t4_range qp; struct t4_range cq; struct t4_range ocq; + struct t4_range l2t; }; #ifdef TCP_OFFLOAD diff --git a/sys/dev/cxgbe/t4_l2t.c b/sys/dev/cxgbe/t4_l2t.c index dd8748e..dcff5e8 100644 --- a/sys/dev/cxgbe/t4_l2t.c +++ b/sys/dev/cxgbe/t4_l2t.c @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include "common/common.h" -#include "common/jhash.h" #include "common/t4_msg.h" #include "t4_l2t.h" @@ -78,7 +77,7 @@ t4_alloc_l2e(struct l2t_data *d) return (NULL); /* there's definitely a free entry */ - for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e) + for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e) if (atomic_load_acq_int(&e->refcnt) == 0) goto found; @@ -115,6 +114,7 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) { struct wrqe *wr; struct cpl_l2t_write_req *req; + int idx = e->idx + sc->vres.l2t.start; mtx_assert(&e->lock, MA_OWNED); @@ -124,10 +124,10 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync) req = wrtod(wr); INIT_TP_WR(req, 0); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx | + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx | V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id))); req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync)); - req->l2t_idx = htons(e->idx); + req->l2t_idx = htons(idx); req->vlan = htons(e->vlan); memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); @@ -183,18 +183,24 @@ t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan, int t4_init_l2t(struct adapter *sc, int flags) { - int i; + int i, l2t_size; struct l2t_data *d; - d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags); + l2t_size = sc->vres.l2t.size; + if (l2t_size < 2) /* At least 1 bucket for IP and 1 for IPv6 */ + return (EINVAL); + + d = malloc(sizeof(*d) + l2t_size * sizeof (struct l2t_entry), M_CXGBE, + M_ZERO | flags); if (!d) return (ENOMEM); + d->l2t_size = l2t_size; d->rover = d->l2tab; - atomic_store_rel_int(&d->nfree, L2T_SIZE); + atomic_store_rel_int(&d->nfree, l2t_size); rw_init(&d->lock, "L2T"); - for (i = 0; i < L2T_SIZE; i++) { + for (i = 0; i < l2t_size; i++) { struct l2t_entry *e = &d->l2tab[i]; e->idx = i; @@ -215,7 +221,7 @@ t4_free_l2t(struct l2t_data *d) { int i; - for (i = 0; i < L2T_SIZE; i++) + for (i = 0; i < d->l2t_size; i++) mtx_destroy(&d->l2tab[i].lock); rw_destroy(&d->lock); free(d, M_CXGBE); @@ -229,11 +235,11 @@ do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, { const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); unsigned int tid = GET_TID(rpl); - unsigned int idx = tid & (L2T_SIZE - 1); + unsigned int idx = tid % L2T_SIZE; if (__predict_false(rpl->status != CPL_ERR_NONE)) { log(LOG_ERR, - "Unexpected L2T_WRITE_RPL status %u for entry %u\n", + "Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n", rpl->status, idx); return (EINVAL); } @@ -269,7 +275,7 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS) struct l2t_entry *e; struct sbuf *sb; int rc, i, header = 0; - char ip[60]; + char ip[INET6_ADDRSTRLEN]; if (l2t == NULL) return (ENXIO); @@ -283,7 +289,7 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS) return (ENOMEM); e = &l2t->l2tab[0]; - for (i = 0; i < L2T_SIZE; i++, e++) { + for (i = 0; i < l2t->l2t_size; i++, e++) { mtx_lock(&e->lock); if (e->state == L2T_STATE_UNUSED) goto skip; @@ -295,11 +301,15 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS) } if (e->state == L2T_STATE_SWITCHING) ip[0] = 0; - else - snprintf(ip, sizeof(ip), "%s", - inet_ntoa(*(struct in_addr *)&e->addr)); + else { + inet_ntop(e->ipv6 ? AF_INET6 : AF_INET, &e->addr[0], + &ip[0], sizeof(ip)); + } - /* XXX: e->ifp may not be around */ + /* + * XXX: e->ifp may not be around. + * XXX: IPv6 addresses may not align properly in the output. + */ sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d" " %u %2u %c %5u %s", e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2], diff --git a/sys/dev/cxgbe/t4_l2t.h b/sys/dev/cxgbe/t4_l2t.h index 6927b81..c60eef1 100644 --- a/sys/dev/cxgbe/t4_l2t.h +++ b/sys/dev/cxgbe/t4_l2t.h @@ -60,7 +60,7 @@ enum { struct l2t_entry { uint16_t state; /* entry state */ uint16_t idx; /* entry index */ - uint32_t addr; /* next hop IP address */ + uint32_t addr[4]; /* next hop IP or IPv6 address */ struct ifnet *ifp; /* outgoing interface */ uint16_t smt_idx; /* SMT index */ uint16_t vlan; /* VLAN TCI (id: 0-11, prio: 13-15) */ @@ -70,15 +70,17 @@ struct l2t_entry { struct mtx lock; volatile int refcnt; /* entry reference count */ uint16_t hash; /* hash bucket the entry is on */ + uint8_t ipv6; /* entry is for an IPv6 address */ uint8_t lport; /* associated offload logical port */ uint8_t dmac[ETHER_ADDR_LEN]; /* next hop's MAC address */ }; struct l2t_data { struct rwlock lock; + u_int l2t_size; volatile int nfree; /* number of free entries */ struct l2t_entry *rover;/* starting point for next allocation */ - struct l2t_entry l2tab[L2T_SIZE]; + struct l2t_entry l2tab[]; }; diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c index aeaa4d2..c22ec21 100644 --- a/sys/dev/cxgbe/t4_main.c +++ b/sys/dev/cxgbe/t4_main.c @@ -284,9 +284,7 @@ static int get_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *); static int update_mac_settings(struct port_info *, int); -static int cxgbe_init_locked(struct port_info *); static int cxgbe_init_synchronized(struct port_info *); -static int cxgbe_uninit_locked(struct port_info *); static int cxgbe_uninit_synchronized(struct port_info *); static int setup_intr_handlers(struct adapter *); static int adapter_full_init(struct adapter *); @@ -348,6 +346,7 @@ static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); static int get_sge_context(struct adapter *, struct t4_sge_context *); +static int load_fw(struct adapter *, struct t4_data *); static int read_card_mem(struct adapter *, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); #ifdef TCP_OFFLOAD @@ -820,6 +819,8 @@ t4_detach(device_t dev) mtx_destroy(&sc->sc_lock); } + if (mtx_initialized(&sc->tids.ftid_lock)) + mtx_destroy(&sc->tids.ftid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); @@ -874,7 +875,7 @@ cxgbe_attach(device_t dev) ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (is_offload(pi->adapter)) - ifp->if_capabilities |= IFCAP_TOE4; + ifp->if_capabilities |= IFCAP_TOE; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | @@ -918,6 +919,10 @@ cxgbe_detach(device_t dev) while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); +#ifdef INVARIANTS + sc->last_op = "t4detach"; + sc->last_op_thr = curthread; +#endif ADAPTER_UNLOCK(sc); if (pi->vlan_c) @@ -939,7 +944,7 @@ cxgbe_detach(device_t dev) ADAPTER_LOCK(sc); CLR_BUSY(sc); - wakeup_one(&sc->flags); + wakeup(&sc->flags); ADAPTER_UNLOCK(sc); return (0); @@ -951,9 +956,10 @@ cxgbe_init(void *arg) struct port_info *pi = arg; struct adapter *sc = pi->adapter; - ADAPTER_LOCK(sc); - cxgbe_init_locked(pi); /* releases adapter lock */ - ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + if (begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4init") != 0) + return; + cxgbe_init_synchronized(pi); + end_synchronized_op(sc, 0); } static int @@ -967,81 +973,56 @@ cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) switch (cmd) { case SIOCSIFMTU: - ADAPTER_LOCK(sc); - rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); - if (rc) { -fail: - ADAPTER_UNLOCK(sc); - return (rc); - } - mtu = ifr->ifr_mtu; - if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) { - rc = EINVAL; - } else { - ifp->if_mtu = mtu; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - t4_update_fl_bufsize(ifp); - PORT_LOCK(pi); - rc = update_mac_settings(pi, XGMAC_MTU); - PORT_UNLOCK(pi); - } + if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) + return (EINVAL); + + rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4mtu"); + if (rc) + return (rc); + ifp->if_mtu = mtu; + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + t4_update_fl_bufsize(ifp); + rc = update_mac_settings(pi, XGMAC_MTU); } - ADAPTER_UNLOCK(sc); + end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: - ADAPTER_LOCK(sc); - if (IS_DOOMED(pi)) { - rc = ENXIO; - goto fail; - } + rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4flg"); + if (rc) + return (rc); + if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = pi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { - if (IS_BUSY(sc)) { - rc = EBUSY; - goto fail; - } - PORT_LOCK(pi); rc = update_mac_settings(pi, XGMAC_PROMISC | XGMAC_ALLMULTI); - PORT_UNLOCK(pi); } - ADAPTER_UNLOCK(sc); } else - rc = cxgbe_init_locked(pi); + rc = cxgbe_init_synchronized(pi); pi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) - rc = cxgbe_uninit_locked(pi); - else - ADAPTER_UNLOCK(sc); - - ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + rc = cxgbe_uninit_synchronized(pi); + end_synchronized_op(sc, 0); break; case SIOCADDMULTI: - case SIOCDELMULTI: /* these two can be called with a mutex held :-( */ - ADAPTER_LOCK(sc); - rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); + case SIOCDELMULTI: /* these two are called with a mutex held :-( */ + rc = begin_synchronized_op(sc, pi, HOLD_LOCK, "t4multi"); if (rc) - goto fail; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - PORT_LOCK(pi); + return (rc); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(pi, XGMAC_MCADDRS); - PORT_UNLOCK(pi); - } - ADAPTER_UNLOCK(sc); + end_synchronized_op(sc, LOCK_HELD); break; case SIOCSIFCAP: - ADAPTER_LOCK(sc); - rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); + rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) - goto fail; + return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { @@ -1122,11 +1103,8 @@ fail: #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - PORT_LOCK(pi); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(pi, XGMAC_VLANEX); - PORT_UNLOCK(pi); - } } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; @@ -1141,7 +1119,8 @@ fail: #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif - ADAPTER_UNLOCK(sc); +fail: + end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: @@ -1625,21 +1604,28 @@ prep_firmware(struct adapter *sc) /* Partition adapter resources as specified in the config file. */ if (sc->flags & MASTER_PF) { - if (strncmp(t4_cfg_file, "default", sizeof(t4_cfg_file))) { + snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", + pci_get_device(sc->dev) == 0x440a ? "uwire" : t4_cfg_file); + if (strncmp(sc->cfg_file, "default", sizeof(sc->cfg_file))) { char s[32]; - snprintf(s, sizeof(s), "t4fw_cfg_%s", t4_cfg_file); + snprintf(s, sizeof(s), "t4fw_cfg_%s", sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { device_printf(sc->dev, "unable to locate %s module, " "will use default config file.\n", s); + snprintf(sc->cfg_file, sizeof(sc->cfg_file), + "%s", "default"); } } rc = partition_resources(sc, cfg ? cfg : default_cfg); if (rc != 0) goto done; /* error message displayed already */ + } else { + snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", "notme"); + sc->cfcsum = (u_int)-1; } sc->flags |= FW_OK; @@ -1887,7 +1873,9 @@ get_params__post_init(struct adapter *sc) param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); - rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val); + param[4] = FW_PARAM_PFVF(L2T_START); + param[5] = FW_PARAM_PFVF(L2T_END); + rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); @@ -1898,6 +1886,11 @@ get_params__post_init(struct adapter *sc) sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; + sc->vres.l2t.start = val[4]; + sc->vres.l2t.size = val[5] - val[4] + 1; + KASSERT(sc->vres.l2t.size <= L2T_SIZE, + ("%s: L2 table size (%u) larger than expected (%u)", + __func__, sc->vres.l2t.size, L2T_SIZE)); /* get capabilites */ bzero(&caps, sizeof(caps)); @@ -2111,7 +2104,7 @@ update_mac_settings(struct port_info *pi, int flags) struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; - PORT_LOCK_ASSERT_OWNED(pi); + ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) @@ -2213,39 +2206,74 @@ mcfail: return (rc); } -static int -cxgbe_init_locked(struct port_info *pi) +int +begin_synchronized_op(struct adapter *sc, struct port_info *pi, int flags, + char *wmesg) { - struct adapter *sc = pi->adapter; - int rc = 0; + int rc, pri; - ADAPTER_LOCK_ASSERT_OWNED(sc); +#ifdef WITNESS + /* the caller thinks it's ok to sleep, but is it really? */ + if (flags & SLEEP_OK) + pause("t4slptst", 1); +#endif - while (!IS_DOOMED(pi) && IS_BUSY(sc)) { - if (mtx_sleep(&sc->flags, &sc->sc_lock, PCATCH, "t4init", 0)) { + if (INTR_OK) + pri = PCATCH; + else + pri = 0; + + ADAPTER_LOCK(sc); + for (;;) { + + if (pi && IS_DOOMED(pi)) { + rc = ENXIO; + goto done; + } + + if (!IS_BUSY(sc)) { + rc = 0; + break; + } + + if (!(flags & SLEEP_OK)) { + rc = EBUSY; + goto done; + } + + if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } - if (IS_DOOMED(pi)) { - rc = ENXIO; - goto done; - } - KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); - /* Give up the adapter lock, port init code can sleep. */ + KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); - ADAPTER_UNLOCK(sc); - - rc = cxgbe_init_synchronized(pi); +#ifdef INVARIANTS + sc->last_op = wmesg; + sc->last_op_thr = curthread; +#endif done: - ADAPTER_LOCK(sc); + if (!(flags & HOLD_LOCK) || rc) + ADAPTER_UNLOCK(sc); + + return (rc); +} + +void +end_synchronized_op(struct adapter *sc, int flags) +{ + + if (flags & LOCK_HELD) + ADAPTER_LOCK_ASSERT_OWNED(sc); + else + ADAPTER_LOCK(sc); + KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); - wakeup_one(&sc->flags); + wakeup(&sc->flags); ADAPTER_UNLOCK(sc); - return (rc); } static int @@ -2255,7 +2283,7 @@ cxgbe_init_synchronized(struct port_info *pi) struct ifnet *ifp = pi->ifp; int rc = 0; - ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + ASSERT_SYNCHRONIZED_OP(sc); if (isset(&sc->open_device_map, pi->port_id)) { KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING, @@ -2271,9 +2299,7 @@ cxgbe_init_synchronized(struct port_info *pi) ((rc = port_full_init(pi)) != 0)) return (rc); /* error message displayed already */ - PORT_LOCK(pi); rc = update_mac_settings(pi, XGMAC_ALL); - PORT_UNLOCK(pi); if (rc) goto done; /* error message displayed already */ @@ -2291,7 +2317,9 @@ cxgbe_init_synchronized(struct port_info *pi) /* all ok */ setbit(&sc->open_device_map, pi->port_id); + PORT_LOCK(pi); ifp->if_drv_flags |= IFF_DRV_RUNNING; + PORT_UNLOCK(pi); callout_reset(&pi->tick, hz, cxgbe_tick, pi); done: @@ -2301,39 +2329,6 @@ done: return (rc); } -static int -cxgbe_uninit_locked(struct port_info *pi) -{ - struct adapter *sc = pi->adapter; - int rc; - - ADAPTER_LOCK_ASSERT_OWNED(sc); - - while (!IS_DOOMED(pi) && IS_BUSY(sc)) { - if (mtx_sleep(&sc->flags, &sc->sc_lock, PCATCH, "t4uninit", 0)) { - rc = EINTR; - goto done; - } - } - if (IS_DOOMED(pi)) { - rc = ENXIO; - goto done; - } - KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); - SET_BUSY(sc); - ADAPTER_UNLOCK(sc); - - rc = cxgbe_uninit_synchronized(pi); - - ADAPTER_LOCK(sc); - KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); - CLR_BUSY(sc); - wakeup_one(&sc->flags); -done: - ADAPTER_UNLOCK(sc); - return (rc); -} - /* * Idempotent. */ @@ -2344,7 +2339,7 @@ cxgbe_uninit_synchronized(struct port_info *pi) struct ifnet *ifp = pi->ifp; int rc; - ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + ASSERT_SYNCHRONIZED_OP(sc); /* * Disable the VI so that all its data in either direction is discarded @@ -2360,7 +2355,9 @@ cxgbe_uninit_synchronized(struct port_info *pi) } clrbit(&sc->open_device_map, pi->port_id); + PORT_LOCK(pi); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; @@ -2539,7 +2536,7 @@ port_full_init(struct port_info *pi) struct sge_rxq *rxq; int rc, i; - ADAPTER_LOCK_ASSERT_NOTOWNED(sc); + ASSERT_SYNCHRONIZED_OP(sc); KASSERT((pi->flags & PORT_INIT_DONE) == 0, ("%s: PORT_INIT_DONE already", __func__)); @@ -3119,7 +3116,7 @@ t4_sysctls(struct adapter *sc) CTLFLAG_RD, &sc->fw_version, 0, "firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", - CTLFLAG_RD, &t4_cfg_file, 0, "configuration file"); + CTLFLAG_RD, &sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, &sc->cfcsum, 0, "config file checksum"); @@ -3524,6 +3521,8 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) struct port_info *pi = arg1; struct adapter *sc = pi->adapter; int idx, rc, i; + struct sge_rxq *rxq; + uint8_t v; idx = pi->tmr_idx; @@ -3534,25 +3533,23 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); - ADAPTER_LOCK(sc); - rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); - if (rc == 0) { - struct sge_rxq *rxq; - uint8_t v; + rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, + "t4tmr"); + if (rc) + return (rc); - v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1); - for_each_rxq(pi, i, rxq) { + v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1); + for_each_rxq(pi, i, rxq) { #ifdef atomic_store_rel_8 - atomic_store_rel_8(&rxq->iq.intr_params, v); + atomic_store_rel_8(&rxq->iq.intr_params, v); #else - rxq->iq.intr_params = v; + rxq->iq.intr_params = v; #endif - } - pi->tmr_idx = idx; } + pi->tmr_idx = idx; - ADAPTER_UNLOCK(sc); - return (rc); + end_synchronized_op(sc, LOCK_HELD); + return (0); } static int @@ -3571,15 +3568,17 @@ sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); - ADAPTER_LOCK(sc); - rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); - if (rc == 0 && pi->flags & PORT_INIT_DONE) - rc = EBUSY; /* cannot be changed once the queues are created */ + rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, + "t4pktc"); + if (rc) + return (rc); - if (rc == 0) + if (pi->flags & PORT_INIT_DONE) + rc = EBUSY; /* cannot be changed once the queues are created */ + else pi->pktc_idx = idx; - ADAPTER_UNLOCK(sc); + end_synchronized_op(sc, LOCK_HELD); return (rc); } @@ -3599,15 +3598,17 @@ sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) if (qsize < 128 || (qsize & 7)) return (EINVAL); - ADAPTER_LOCK(sc); - rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); - if (rc == 0 && pi->flags & PORT_INIT_DONE) - rc = EBUSY; /* cannot be changed once the queues are created */ + rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, + "t4rxqs"); + if (rc) + return (rc); - if (rc == 0) + if (pi->flags & PORT_INIT_DONE) + rc = EBUSY; /* cannot be changed once the queues are created */ + else pi->qsize_rxq = qsize; - ADAPTER_UNLOCK(sc); + end_synchronized_op(sc, LOCK_HELD); return (rc); } @@ -3624,18 +3625,21 @@ sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) if (rc != 0 || req->newptr == NULL) return (rc); - if (qsize < 128) + /* bufring size must be powerof2 */ + if (qsize < 128 || !powerof2(qsize)) return (EINVAL); - ADAPTER_LOCK(sc); - rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0); - if (rc == 0 && pi->flags & PORT_INIT_DONE) - rc = EBUSY; /* cannot be changed once the queues are created */ + rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK, + "t4txqs"); + if (rc) + return (rc); - if (rc == 0) + if (pi->flags & PORT_INIT_DONE) + rc = EBUSY; /* cannot be changed once the queues are created */ + else pi->qsize_txq = qsize; - ADAPTER_UNLOCK(sc); + end_synchronized_op(sc, LOCK_HELD); return (rc); } @@ -4674,8 +4678,14 @@ fspec_to_fconf(struct t4_filter_specification *fs) static int get_filter_mode(struct adapter *sc, uint32_t *mode) { + int rc; uint32_t fconf; + rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, + "t4getfm"); + if (rc) + return (rc); + t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1, A_TP_VLAN_PRI_MAP); @@ -4687,6 +4697,7 @@ get_filter_mode(struct adapter *sc, uint32_t *mode) *mode = fconf_to_mode(sc->filter_mode); + end_synchronized_op(sc, LOCK_HELD); return (0); } @@ -4698,11 +4709,10 @@ set_filter_mode(struct adapter *sc, uint32_t mode) fconf = mode_to_fconf(mode); - ADAPTER_LOCK(sc); - if (IS_BUSY(sc)) { - rc = EAGAIN; - goto done; - } + rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, + "t4setfm"); + if (rc) + return (rc); if (sc->tids.ftids_in_use > 0) { rc = EBUSY; @@ -4725,7 +4735,7 @@ set_filter_mode(struct adapter *sc, uint32_t mode) #endif done: - ADAPTER_UNLOCK(sc); + end_synchronized_op(sc, LOCK_HELD); return (rc); } @@ -4746,18 +4756,18 @@ get_filter_hits(struct adapter *sc, uint32_t fid) static int get_filter(struct adapter *sc, struct t4_filter *t) { - int i, nfilters = sc->tids.nftids; + int i, rc, nfilters = sc->tids.nftids; struct filter_entry *f; - ADAPTER_LOCK_ASSERT_OWNED(sc); - - if (IS_BUSY(sc)) - return (EAGAIN); + rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, + "t4getf"); + if (rc) + return (rc); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; - return (0); + goto done; } f = &sc->tids.ftid_tab[t->idx]; @@ -4772,11 +4782,13 @@ get_filter(struct adapter *sc, struct t4_filter *t) t->hits = UINT64_MAX; t->fs = f->fs; - return (0); + goto done; } } t->idx = 0xffffffff; +done: + end_synchronized_op(sc, LOCK_HELD); return (0); } @@ -4785,40 +4797,58 @@ set_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters, nports; struct filter_entry *f; - int i; + int i, rc; - ADAPTER_LOCK_ASSERT_OWNED(sc); + rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); + if (rc) + return (rc); nfilters = sc->tids.nftids; nports = sc->params.nports; - if (nfilters == 0) - return (ENOTSUP); + if (nfilters == 0) { + rc = ENOTSUP; + goto done; + } - if (!(sc->flags & FULL_INIT_DONE)) - return (EAGAIN); + if (!(sc->flags & FULL_INIT_DONE)) { + rc = EAGAIN; + goto done; + } - if (t->idx >= nfilters) - return (EINVAL); + if (t->idx >= nfilters) { + rc = EINVAL; + goto done; + } /* Validate against the global filter mode */ - if ((sc->filter_mode | fspec_to_fconf(&t->fs)) != sc->filter_mode) - return (E2BIG); + if ((sc->filter_mode | fspec_to_fconf(&t->fs)) != sc->filter_mode) { + rc = E2BIG; + goto done; + } - if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) - return (EINVAL); + if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) { + rc = EINVAL; + goto done; + } - if (t->fs.val.iport >= nports) - return (EINVAL); + if (t->fs.val.iport >= nports) { + rc = EINVAL; + goto done; + } /* Can't specify an iq if not steering to it */ - if (!t->fs.dirsteer && t->fs.iq) - return (EINVAL); + if (!t->fs.dirsteer && t->fs.iq) { + rc = EINVAL; + goto done; + } /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && - ((t->idx & 0x3) || t->idx + 4 >= nfilters)) - return (EINVAL); + ((t->idx & 0x3) || t->idx + 4 >= nfilters)) { + rc = EINVAL; + goto done; + } if (sc->tids.ftid_tab == NULL) { KASSERT(sc->tids.ftids_in_use == 0, @@ -4827,17 +4857,24 @@ set_filter(struct adapter *sc, struct t4_filter *t) sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * nfilters, M_CXGBE, M_NOWAIT | M_ZERO); - if (sc->tids.ftid_tab == NULL) - return (ENOMEM); + if (sc->tids.ftid_tab == NULL) { + rc = ENOMEM; + goto done; + } + mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF); } for (i = 0; i < 4; i++) { f = &sc->tids.ftid_tab[t->idx + i]; - if (f->pending || f->valid) - return (EBUSY); - if (f->locked) - return (EPERM); + if (f->pending || f->valid) { + rc = EBUSY; + goto done; + } + if (f->locked) { + rc = EPERM; + goto done; + } if (t->fs.type == 0) break; @@ -4846,7 +4883,27 @@ set_filter(struct adapter *sc, struct t4_filter *t) f = &sc->tids.ftid_tab[t->idx]; f->fs = t->fs; - return set_filter_wr(sc, t->idx); + rc = set_filter_wr(sc, t->idx); +done: + end_synchronized_op(sc, 0); + + if (rc == 0) { + mtx_lock(&sc->tids.ftid_lock); + for (;;) { + if (f->pending == 0) { + rc = f->valid ? 0 : EIO; + break; + } + + if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, + PCATCH, "t4setfw", 0)) { + rc = EINPROGRESS; + break; + } + } + mtx_unlock(&sc->tids.ftid_lock); + } + return (rc); } static int @@ -4854,37 +4911,67 @@ del_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters; struct filter_entry *f; + int rc; - ADAPTER_LOCK_ASSERT_OWNED(sc); - - if (IS_BUSY(sc)) - return (EAGAIN); + rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf"); + if (rc) + return (rc); nfilters = sc->tids.nftids; - if (nfilters == 0) - return (ENOTSUP); + if (nfilters == 0) { + rc = ENOTSUP; + goto done; + } if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || - t->idx >= nfilters) - return (EINVAL); + t->idx >= nfilters) { + rc = EINVAL; + goto done; + } - if (!(sc->flags & FULL_INIT_DONE)) - return (EAGAIN); + if (!(sc->flags & FULL_INIT_DONE)) { + rc = EAGAIN; + goto done; + } f = &sc->tids.ftid_tab[t->idx]; - if (f->pending) - return (EBUSY); - if (f->locked) - return (EPERM); + if (f->pending) { + rc = EBUSY; + goto done; + } + if (f->locked) { + rc = EPERM; + goto done; + } if (f->valid) { t->fs = f->fs; /* extra info for the caller */ - return del_filter_wr(sc, t->idx); + rc = del_filter_wr(sc, t->idx); } - return (0); +done: + end_synchronized_op(sc, 0); + + if (rc == 0) { + mtx_lock(&sc->tids.ftid_lock); + for (;;) { + if (f->pending == 0) { + rc = f->valid ? EIO : 0; + break; + } + + if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, + PCATCH, "t4delfw", 0)) { + rc = EINPROGRESS; + break; + } + } + mtx_unlock(&sc->tids.ftid_lock); + } + + return (rc); } static void @@ -4904,7 +4991,7 @@ set_filter_wr(struct adapter *sc, int fidx) struct fw_filter_wr *fwr; unsigned int ftid; - ADAPTER_LOCK_ASSERT_OWNED(sc); + ASSERT_SYNCHRONIZED_OP(sc); if (f->fs.newdmac || f->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ @@ -5007,8 +5094,6 @@ del_filter_wr(struct adapter *sc, int fidx) struct fw_filter_wr *fwr; unsigned int ftid; - ADAPTER_LOCK_ASSERT_OWNED(sc); - ftid = sc->tids.ftid_base + fidx; wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq); @@ -5039,8 +5124,10 @@ t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) unsigned int rc = G_COOKIE(rpl->cookie); struct filter_entry *f = &sc->tids.ftid_tab[idx]; - ADAPTER_LOCK(sc); + mtx_lock(&sc->tids.ftid_lock); if (rc == FW_FILTER_WR_FLT_ADDED) { + KASSERT(f->pending, ("%s: filter[%u] isn't pending.", + __func__, idx)); f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; @@ -5055,7 +5142,8 @@ t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) clear_filter(f); sc->tids.ftids_in_use--; } - ADAPTER_UNLOCK(sc); + wakeup(&sc->tids.ftid_tab); + mtx_unlock(&sc->tids.ftid_lock); } return (0); @@ -5064,29 +5152,63 @@ t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { - int rc = EINVAL; + int rc; if (cntxt->cid > M_CTXTQID) - return (rc); + return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) - return (rc); + return (EINVAL); if (sc->flags & FW_OK) { - ADAPTER_LOCK(sc); /* Avoid parallel t4_wr_mbox */ - rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, - &cntxt->data[0]); - ADAPTER_UNLOCK(sc); + rc = begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4ctxt"); + if (rc == 0) { + rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, + cntxt->mem_id, &cntxt->data[0]); + end_synchronized_op(sc, LOCK_HELD); + if (rc == 0) + return (0); + } } - if (rc != 0) { - /* Read via firmware failed or wasn't even attempted */ + /* + * Read via firmware failed or wasn't even attempted. Read directly via + * the backdoor. + */ + rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, + &cntxt->data[0]); + return (rc); +} + +static int +load_fw(struct adapter *sc, struct t4_data *fw) +{ + int rc; + uint8_t *fw_data; + + rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); + if (rc) + return (rc); + + if (sc->flags & FULL_INIT_DONE) { + rc = EBUSY; + goto done; + } - rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, - &cntxt->data[0]); + fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); + if (fw_data == NULL) { + rc = ENOMEM; + goto done; } + rc = copyin(fw->data, fw_data, fw->len); + if (rc == 0) + rc = -t4_load_fw(sc, fw_data, fw->len); + + free(fw_data, M_CXGBE); +done: + end_synchronized_op(sc, 0); return (rc); } @@ -5173,8 +5295,6 @@ read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; - ADAPTER_LOCK_ASSERT_OWNED(sc); /* for mbox */ - if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); @@ -5183,8 +5303,12 @@ read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) return (ENOTSUP); } + rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); + if (rc) + return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, &i2cd->data[0]); + end_synchronized_op(sc, 0); return (rc); } @@ -5354,56 +5478,78 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: - ADAPTER_LOCK(sc); rc = get_filter(sc, (struct t4_filter *)data); - ADAPTER_UNLOCK(sc); break; case CHELSIO_T4_SET_FILTER: - ADAPTER_LOCK(sc); rc = set_filter(sc, (struct t4_filter *)data); - ADAPTER_UNLOCK(sc); break; case CHELSIO_T4_DEL_FILTER: - ADAPTER_LOCK(sc); rc = del_filter(sc, (struct t4_filter *)data); - ADAPTER_UNLOCK(sc); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; - case CHELSIO_T4_LOAD_FW: { - struct t4_data *fw = (struct t4_data *)data; - uint8_t *fw_data; - - if (sc->flags & FULL_INIT_DONE) - return (EBUSY); - - fw_data = malloc(fw->len, M_CXGBE, M_NOWAIT); - if (fw_data == NULL) - return (ENOMEM); - - rc = copyin(fw->data, fw_data, fw->len); - if (rc == 0) - rc = -t4_load_fw(sc, fw_data, fw->len); - - free(fw_data, M_CXGBE); + case CHELSIO_T4_LOAD_FW: + rc = load_fw(sc, (struct t4_data *)data); break; - } case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: - ADAPTER_LOCK(sc); rc = read_i2c(sc, (struct t4_i2c_data *)data); - ADAPTER_UNLOCK(sc); break; case CHELSIO_T4_CLEAR_STATS: { + int i; u_int port_id = *(uint32_t *)data; + struct port_info *pi; if (port_id >= sc->params.nports) return (EINVAL); + /* MAC stats */ t4_clr_port_stats(sc, port_id); + + pi = sc->port[port_id]; + if (pi->flags & PORT_INIT_DONE) { + struct sge_rxq *rxq; + struct sge_txq *txq; + struct sge_wrq *wrq; + + for_each_rxq(pi, i, rxq) { +#if defined(INET) || defined(INET6) + rxq->lro.lro_queued = 0; + rxq->lro.lro_flushed = 0; +#endif + rxq->rxcsum = 0; + rxq->vlan_extraction = 0; + } + + for_each_txq(pi, i, txq) { + txq->txcsum = 0; + txq->tso_wrs = 0; + txq->vlan_insertion = 0; + txq->imm_wrs = 0; + txq->sgl_wrs = 0; + txq->txpkt_wrs = 0; + txq->txpkts_wrs = 0; + txq->txpkts_pkts = 0; + txq->br->br_drops = 0; + txq->no_dmamap = 0; + txq->no_desc = 0; + } + +#ifdef TCP_OFFLOAD + /* nothing to clear for each ofld_rxq */ + + for_each_ofld_txq(pi, i, wrq) { + wrq->tx_wrs = 0; + wrq->no_desc = 0; + } +#endif + wrq = &sc->sge.ctrlq[pi->port_id]; + wrq->tx_wrs = 0; + wrq->no_desc = 0; + } break; } default: @@ -5420,16 +5566,16 @@ toe_capability(struct port_info *pi, int enable) int rc; struct adapter *sc = pi->adapter; - ADAPTER_LOCK_ASSERT_OWNED(sc); + ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if (!(sc->flags & FULL_INIT_DONE)) { - log(LOG_WARNING, - "You must enable a cxgbe interface first\n"); - return (EAGAIN); + rc = cxgbe_init_synchronized(pi); + if (rc) + return (rc); } if (isset(&sc->offload_map, pi->port_id)) @@ -5518,6 +5664,8 @@ t4_activate_uld(struct adapter *sc, int id) int rc = EAGAIN; struct uld_info *ui; + ASSERT_SYNCHRONIZED_OP(sc); + mtx_lock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { @@ -5540,6 +5688,8 @@ t4_deactivate_uld(struct adapter *sc, int id) int rc = EINVAL; struct uld_info *ui; + ASSERT_SYNCHRONIZED_OP(sc); + mtx_lock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index 62d9eb3..62ceec4 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -2362,6 +2362,8 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx, SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD, &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests"); + SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD, + &txq->br->br_drops, "# of drops in the buf_ring for this queue"); SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD, &txq->no_dmamap, 0, "# of times txq ran out of DMA maps"); SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD, diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index 8d36b1e..17ed1d3 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #ifdef TCP_OFFLOAD #include <sys/param.h> @@ -195,7 +196,7 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status); /* Ignore negative advice */ - if (status == CPL_ERR_RTX_NEG_ADVICE) + if (negative_advice(status)) return (0); free_atid(sc, atid); @@ -220,10 +221,9 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, * Options2 for active open. */ static uint32_t -calc_opt2a(struct socket *so) +calc_opt2a(struct socket *so, struct toepcb *toep) { struct tcpcb *tp = so_sototcpcb(so); - struct toepcb *toep = tp->t_toe; struct port_info *pi = toep->port; struct adapter *sc = pi->adapter; uint32_t opt2 = 0; @@ -260,6 +260,12 @@ t4_init_connect_cpl_handlers(struct adapter *sc) t4_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl); } +#define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \ + reason = __LINE__; \ + rc = (x); \ + goto failed; \ +} while (0) + /* * active open (soconnect). * @@ -275,20 +281,19 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, struct sockaddr *nam) { struct adapter *sc = tod->tod_softc; + struct tom_data *td = tod_td(tod); struct toepcb *toep = NULL; struct wrqe *wr = NULL; - struct cpl_act_open_req *cpl; - struct l2t_entry *e = NULL; struct ifnet *rt_ifp = rt->rt_ifp; struct port_info *pi; - int atid = -1, mtu_idx, rscale, qid_atid, rc = ENOMEM; + int mtu_idx, rscale, qid_atid, rc, isipv6; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); + int reason; INP_WLOCK_ASSERT(inp); - - if (nam->sa_family != AF_INET) - CXGBE_UNIMPLEMENTED("IPv6 connect"); + KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, + ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family)); if (rt_ifp->if_type == IFT_ETHER) pi = rt_ifp->if_softc; @@ -297,30 +302,29 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, pi = ifp->if_softc; } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) - return (ENOSYS); /* XXX: implement lagg support */ + DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else - return (ENOTSUP); + DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); toep = alloc_toepcb(pi, -1, -1, M_NOWAIT); if (toep == NULL) - goto failed; + DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - atid = alloc_atid(sc, toep); - if (atid < 0) - goto failed; + toep->tid = alloc_atid(sc, toep); + if (toep->tid < 0) + DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - e = t4_l2t_get(pi, rt_ifp, + toep->l2te = t4_l2t_get(pi, rt_ifp, rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); - if (e == NULL) - goto failed; + if (toep->l2te == NULL) + DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - wr = alloc_wrqe(sizeof(*cpl), toep->ctrlq); + isipv6 = nam->sa_family == AF_INET6; + wr = alloc_wrqe(isipv6 ? sizeof(struct cpl_act_open_req6) : + sizeof(struct cpl_act_open_req), toep->ctrlq); if (wr == NULL) - goto failed; - cpl = wrtod(wr); + DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - toep->tid = atid; - toep->l2te = e; if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) set_tcpddp_ulp_mode(toep); else @@ -330,8 +334,6 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); - offload_socket(so, toep); - /* * The kernel sets request_r_scale based on sb_max whereas we need to * take hardware's MAX_RCV_WND into account too. This is normally a @@ -342,39 +344,78 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, else rscale = 0; mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0); - qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | atid; - - INIT_TP_WR(cpl, 0); - OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); - inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, - &cpl->peer_port); - cpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, toep->rx_credits, - toep->ulp_mode); - cpl->params = select_ntuple(pi, e, sc->filter_mode); - cpl->opt2 = calc_opt2a(so); + qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | toep->tid; + + if (isipv6) { + struct cpl_act_open_req6 *cpl = wrtod(wr); + + if ((inp->inp_vflag & INP_IPV6) == 0) { + /* XXX think about this a bit more */ + log(LOG_ERR, + "%s: time to think about AF_INET6 + vflag 0x%x.\n", + __func__, inp->inp_vflag); + DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); + } + + toep->ce = hold_lip(td, &inp->in6p_laddr); + if (toep->ce == NULL) + DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); + + INIT_TP_WR(cpl, 0); + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + qid_atid)); + + cpl->local_port = inp->inp_lport; + cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; + cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; + cpl->peer_port = inp->inp_fport; + cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; + cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; + cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, + toep->rx_credits, toep->ulp_mode); + cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode); + cpl->opt2 = calc_opt2a(so, toep); + } else { + struct cpl_act_open_req *cpl = wrtod(wr); + + INIT_TP_WR(cpl, 0); + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + qid_atid)); + inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, + &cpl->peer_ip, &cpl->peer_port); + cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, + toep->rx_credits, toep->ulp_mode); + cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode); + cpl->opt2 = calc_opt2a(so, toep); + } CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__, toep->tid, tcpstates[tp->t_state], toep, inp); - rc = t4_l2t_send(sc, wr, e); + offload_socket(so, toep); + rc = t4_l2t_send(sc, wr, toep->l2te); if (rc == 0) { toep->flags |= TPF_CPL_PENDING; return (0); } undo_offload_socket(so); + reason = __LINE__; failed: - CTR5(KTR_CXGBE, "%s: FAILED, atid %d, toep %p, l2te %p, wr %p", - __func__, atid, toep, e, wr); + CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc); - if (e) - t4_l2t_release(e); if (wr) free_wrqe(wr); - if (atid >= 0) - free_atid(sc, atid); - if (toep) + + if (toep) { + if (toep->tid >= 0) + free_atid(sc, toep->tid); + if (toep->l2te) + t4_l2t_release(toep->l2te); + if (toep->ce) + release_lip(td, toep->ce); free_toepcb(toep); + } return (rc); } diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 6ae1ec4..9aead9f 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -1018,8 +1018,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); - if (cpl->status == CPL_ERR_RTX_NEG_ADVICE || - cpl->status == CPL_ERR_PERSIST_NEG_ADVICE) { + if (negative_advice(cpl->status)) { CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", __func__, cpl->status, tid, toep->flags); return (0); /* Ignore negative advice */ diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index 523f7f3..b80702d 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #ifdef TCP_OFFLOAD #include <sys/param.h> @@ -50,6 +51,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_pcb.h> #include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet6/scope6_var.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #define TCPSTATES @@ -63,9 +66,9 @@ __FBSDID("$FreeBSD$"); #include "tom/t4_tom.h" /* stid services */ -static int alloc_stid(struct adapter *, void *); -static void *lookup_stid(struct adapter *, int); -static void free_stid(struct adapter *, int); +static int alloc_stid(struct adapter *, struct listen_ctx *, int); +static struct listen_ctx *lookup_stid(struct adapter *, int); +static void free_stid(struct adapter *, struct listen_ctx *); /* lctx services */ static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, @@ -81,45 +84,105 @@ static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *); static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); static void send_reset_synqe(struct toedev *, struct synq_entry *); -/* XXX: won't work for IPv6 */ static int -alloc_stid(struct adapter *sc, void *ctx) +alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6) { struct tid_info *t = &sc->tids; - int stid = -1; + u_int stid, n, f, mask; + struct stid_region *sr = &lctx->stid_region; + + /* + * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in + * the TCAM. The start of the stid region is properly aligned (the chip + * requires each region to be 128-cell aligned). + */ + n = isipv6 ? 2 : 1; + mask = n - 1; + KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0, + ("%s: stid region (%u, %u) not properly aligned. n = %u", + __func__, t->stid_base, t->nstids, n)); mtx_lock(&t->stid_lock); - if (t->sfree) { - union serv_entry *p = t->sfree; - - stid = p - t->stid_tab; - stid += t->stid_base; - t->sfree = p->next; - p->data = ctx; - t->stids_in_use++; + if (n > t->nstids - t->stids_in_use) { + mtx_unlock(&t->stid_lock); + return (-1); } + + if (t->nstids_free_head >= n) { + /* + * This allocation will definitely succeed because the region + * starts at a good alignment and we just checked we have enough + * stids free. + */ + f = t->nstids_free_head & mask; + t->nstids_free_head -= n + f; + stid = t->nstids_free_head; + TAILQ_INSERT_HEAD(&t->stids, sr, link); + } else { + struct stid_region *s; + + stid = t->nstids_free_head; + TAILQ_FOREACH(s, &t->stids, link) { + stid += s->used + s->free; + f = stid & mask; + if (n <= s->free - f) { + stid -= n + f; + s->free -= n + f; + TAILQ_INSERT_AFTER(&t->stids, s, sr, link); + goto allocated; + } + } + + if (__predict_false(stid != t->nstids)) { + panic("%s: stids TAILQ (%p) corrupt." + " At %d instead of %d at the end of the queue.", + __func__, &t->stids, stid, t->nstids); + } + + mtx_unlock(&t->stid_lock); + return (-1); + } + +allocated: + sr->used = n; + sr->free = f; + t->stids_in_use += n; + t->stid_tab[stid] = lctx; mtx_unlock(&t->stid_lock); - return (stid); + + KASSERT(((stid + t->stid_base) & mask) == 0, + ("%s: EDOOFUS.", __func__)); + return (stid + t->stid_base); } -static void * +static struct listen_ctx * lookup_stid(struct adapter *sc, int stid) { struct tid_info *t = &sc->tids; - return (t->stid_tab[stid - t->stid_base].data); + return (t->stid_tab[stid - t->stid_base]); } static void -free_stid(struct adapter *sc, int stid) +free_stid(struct adapter *sc, struct listen_ctx *lctx) { struct tid_info *t = &sc->tids; - union serv_entry *p = &t->stid_tab[stid - t->stid_base]; + struct stid_region *sr = &lctx->stid_region; + struct stid_region *s; + + KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used)); mtx_lock(&t->stid_lock); - p->next = t->sfree; - t->sfree = p; - t->stids_in_use--; + s = TAILQ_PREV(sr, stid_head, link); + if (s != NULL) + s->free += sr->used + sr->free; + else + t->nstids_free_head += sr->used + sr->free; + KASSERT(t->stids_in_use >= sr->used, + ("%s: stids_in_use (%u) < stids being freed (%u)", __func__, + t->stids_in_use, sr->used)); + t->stids_in_use -= sr->used; + TAILQ_REMOVE(&t->stids, sr, link); mtx_unlock(&t->stid_lock); } @@ -134,7 +197,7 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) if (lctx == NULL) return (NULL); - lctx->stid = alloc_stid(sc, lctx); + lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6); if (lctx->stid < 0) { free(lctx, M_CXGBE); return (NULL); @@ -167,7 +230,7 @@ free_lctx(struct adapter *sc, struct listen_ctx *lctx) CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p", __func__, lctx->stid, lctx, lctx->inp); - free_stid(sc, lctx->stid); + free_stid(sc, lctx); free(lctx, M_CXGBE); return (in_pcbrele_wlocked(inp)); @@ -339,7 +402,7 @@ create_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_pass_open_req *req; - struct in_conninfo *inc = &lctx->inp->inp_inc; + struct inpcb *inp = lctx->inp; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { @@ -350,9 +413,9 @@ create_server(struct adapter *sc, struct listen_ctx *lctx) INIT_TP_WR(req, 0); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid)); - req->local_port = inc->inc_lport; + req->local_port = inp->inp_lport; req->peer_port = 0; - req->local_ip = inc->inc_laddr.s_addr; + req->local_ip = inp->inp_laddr.s_addr; req->peer_ip = 0; req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | @@ -363,6 +426,36 @@ create_server(struct adapter *sc, struct listen_ctx *lctx) } static int +create_server6(struct adapter *sc, struct listen_ctx *lctx) +{ + struct wrqe *wr; + struct cpl_pass_open_req6 *req; + struct inpcb *inp = lctx->inp; + + wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); + if (wr == NULL) { + log(LOG_ERR, "%s: allocation failure", __func__); + return (ENOMEM); + } + req = wrtod(wr); + + INIT_TP_WR(req, 0); + OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid)); + req->local_port = inp->inp_lport; + req->peer_port = 0; + req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; + req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; + req->peer_ip_hi = 0; + req->peer_ip_lo = 0; + req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); + req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | + F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); + + t4_wrq_tx(sc, wr); + return (0); +} + +static int destroy_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; @@ -398,13 +491,10 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) struct port_info *pi; struct inpcb *inp = tp->t_inpcb; struct listen_ctx *lctx; - int i; + int i, rc; INP_WLOCK_ASSERT(inp); - if ((inp->inp_vflag & INP_IPV4) == 0) - return (0); - #if 0 ADAPTER_LOCK(sc); if (IS_BUSY(sc)) { @@ -421,8 +511,9 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) goto done; /* no port that's UP with IFCAP_TOE enabled */ /* - * Find a running port with IFCAP_TOE4. We'll use the first such port's - * queues to send the passive open and receive the reply to it. + * Find a running port with IFCAP_TOE (4 or 6). We'll use the first + * such port's queues to send the passive open and receive the reply to + * it. * * XXX: need a way to mark a port in use by offload. if_cxgbe should * then reject any attempt to bring down such a port (and maybe reject @@ -430,7 +521,7 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) */ for_each_port(sc, i) { if (isset(&sc->open_device_map, i) && - sc->port[i]->ifp->if_capenable & IFCAP_TOE4) + sc->port[i]->ifp->if_capenable & IFCAP_TOE) break; } KASSERT(i < sc->params.nports, @@ -449,12 +540,17 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) } listen_hash_add(sc, lctx); - CTR5(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p", __func__, - lctx->stid, tcpstates[tp->t_state], lctx, inp); + CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x", + __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp, + inp->inp_vflag); - if (create_server(sc, lctx) != 0) { - log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__, - device_get_nameunit(sc->dev)); + if (inp->inp_vflag & INP_IPV6) + rc = create_server6(sc, lctx); + else + rc = create_server(sc, lctx); + if (rc != 0) { + log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n", + __func__, device_get_nameunit(sc->dev), rc); (void) listen_hash_del(sc, inp); inp = release_lctx(sc, lctx); /* can't be freed, host stack has a reference */ @@ -558,7 +654,7 @@ t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) struct l2t_entry *e; struct tcpopt to; struct ip *ip = mtod(m, struct ip *); - struct tcphdr *th = (void *)(ip + 1); + struct tcphdr *th; wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr); if (wr == NULL) { @@ -566,6 +662,10 @@ t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) return (EALREADY); } + if (ip->ip_v == IPVERSION) + th = (void *)(ip + 1); + else + th = (void *)((struct ip6_hdr *)ip + 1); bzero(&to, sizeof(to)); tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th), TO_SYN); @@ -608,7 +708,7 @@ do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, lctx->flags &= ~LCTX_RPL_PENDING; if (status != CPL_ERR_NONE) - log(LOG_ERR, "listener with stid %u failed: %d", stid, status); + log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status); #ifdef INVARIANTS /* @@ -678,7 +778,7 @@ do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss, CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status); if (status != CPL_ERR_NONE) { - log(LOG_ERR, "%s: failed (%u) to close listener for stid %u", + log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n", __func__, status, stid); return (status); } @@ -735,8 +835,7 @@ do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss, CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); - if (cpl->status == CPL_ERR_RTX_NEG_ADVICE || - cpl->status == CPL_ERR_PERSIST_NEG_ADVICE) + if (negative_advice(cpl->status)) return (0); /* Ignore negative advice */ INP_WLOCK(inp); @@ -855,7 +954,7 @@ mbuf_to_synqe(struct mbuf *m) return (NULL); synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE; } else { - synqe = (void *)(m->m_data + m->m_len + tspace - sizeof(*synqe)); + synqe = (void *)(m->m_data + m->m_len + tspace - len); synqe->flags = TPF_SYNQE; } @@ -936,21 +1035,29 @@ pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, const struct cpl_pass_accept_req *cpl = mtod(m, const void *); const struct ether_header *eh; unsigned int hlen = be32toh(cpl->hdr_len); - const struct ip *ip; + uintptr_t l3hdr; const struct tcphdr *tcp; eh = (const void *)(cpl + 1); - ip = (const void *)((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); - tcp = (const void *)((uintptr_t)ip + G_IP_HDR_LEN(hlen)); + l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); + tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen)); if (inc) { bzero(inc, sizeof(*inc)); - inc->inc_faddr = ip->ip_src; - inc->inc_laddr = ip->ip_dst; inc->inc_fport = tcp->th_sport; inc->inc_lport = tcp->th_dport; - if (ip->ip_v == 6) + if (((struct ip *)l3hdr)->ip_v == IPVERSION) { + const struct ip *ip = (const void *)l3hdr; + + inc->inc_faddr = ip->ip_src; + inc->inc_laddr = ip->ip_dst; + } else { + const struct ip6_hdr *ip6 = (const void *)l3hdr; + inc->inc_flags |= INC_ISIPV6; + inc->inc6_faddr = ip6->ip6_src; + inc->inc6_laddr = ip6->ip6_dst; + } } if (th) { @@ -959,6 +1066,105 @@ pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, } } +static int +ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6) +{ + struct ifaddr *ifa; + struct sockaddr_in6 *sin6; + int found = 0; + struct in6_addr in6 = *ip6; + + /* Just as in ip6_input */ + if (in6_clearscope(&in6) || in6_clearscope(&in6)) + return (0); + in6_setscope(&in6, ifp, NULL); + + if_addr_rlock(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + sin6 = (void *)ifa->ifa_addr; + if (sin6->sin6_family != AF_INET6) + continue; + + if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) { + found = 1; + break; + } + } + if_addr_runlock(ifp); + + return (found); +} + +static struct l2t_entry * +get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, + struct in_conninfo *inc) +{ + struct rtentry *rt; + struct l2t_entry *e; + struct sockaddr_in6 sin6; + struct sockaddr *dst = (void *)&sin6; + + if (inc->inc_flags & INC_ISIPV6) { + dst->sa_len = sizeof(struct sockaddr_in6); + dst->sa_family = AF_INET6; + ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr; + + if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) { + /* no need for route lookup */ + e = t4_l2t_get(pi, ifp, dst); + return (e); + } + } else { + dst->sa_len = sizeof(struct sockaddr_in); + dst->sa_family = AF_INET; + ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr; + } + + rt = rtalloc1(dst, 0, 0); + if (rt == NULL) + return (NULL); + else { + struct sockaddr *nexthop; + + RT_UNLOCK(rt); + if (rt->rt_ifp != ifp) + e = NULL; + else { + if (rt->rt_flags & RTF_GATEWAY) + nexthop = rt->rt_gateway; + else + nexthop = dst; + e = t4_l2t_get(pi, ifp, nexthop); + } + RTFREE(rt); + } + + return (e); +} + +static int +ifnet_has_ip(struct ifnet *ifp, struct in_addr in) +{ + struct ifaddr *ifa; + struct sockaddr_in *sin; + int found = 0; + + if_addr_rlock(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + sin = (void *)ifa->ifa_addr; + if (sin->sin_family != AF_INET) + continue; + + if (sin->sin_addr.s_addr == in.s_addr) { + found = 1; + break; + } + } + if_addr_runlock(ifp); + + return (found); +} + #define REJECT_PASS_ACCEPT() do { \ reject_reason = __LINE__; \ goto reject; \ @@ -994,10 +1200,8 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, struct tcphdr th; struct tcpopt to; struct port_info *pi; - struct ifnet *ifp, *ifp_vlan = NULL; + struct ifnet *hw_ifp, *ifp; struct l2t_entry *e = NULL; - struct rtentry *rt; - struct sockaddr_in nam; int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; struct synq_entry *synqe = NULL; int reject_reason; @@ -1017,31 +1221,24 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, t4opt_to_tcpopt(&cpl->tcpopt, &to); pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; - ifp = pi->ifp; - m->m_pkthdr.rcvif = ifp; - tod = TOEDEV(ifp); + hw_ifp = pi->ifp; /* the cxgbeX ifnet */ + m->m_pkthdr.rcvif = hw_ifp; + tod = TOEDEV(hw_ifp); /* - * Don't offload if the interface that received the SYN doesn't have - * IFCAP_TOE enabled. - */ - if ((ifp->if_capenable & IFCAP_TOE4) == 0) - REJECT_PASS_ACCEPT(); - - /* Don't offload IPv6 connections. XXX: add IPv6 support */ - if (inc.inc_flags & INC_ISIPV6) - REJECT_PASS_ACCEPT(); - - /* - * Don't offload if the SYN had a VLAN tag and the vid doesn't match - * anything on this interface. + * Figure out if there is a pseudo interface (vlan, lagg, etc.) + * involved. Don't offload if the SYN had a VLAN tag and the vid + * doesn't match anything on this interface. + * + * XXX: lagg support, lagg + vlan support. */ vid = EVL_VLANOFTAG(be16toh(cpl->vlan)); if (vid != 0xfff) { - ifp_vlan = VLAN_DEVAT(ifp, vid); - if (ifp_vlan == NULL) + ifp = VLAN_DEVAT(hw_ifp, vid); + if (ifp == NULL) REJECT_PASS_ACCEPT(); - } + } else + ifp = hw_ifp; /* * Don't offload if the peer requested a TCP option that's not known to @@ -1050,31 +1247,36 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, if (cpl->tcpopt.unknown) REJECT_PASS_ACCEPT(); - /* - * Don't offload if the outgoing interface for the route back to the - * peer is not the same as the interface that received the SYN. - * XXX: too restrictive. - */ - nam.sin_len = sizeof(nam); - nam.sin_family = AF_INET; - nam.sin_addr = inc.inc_faddr; - rt = rtalloc1((struct sockaddr *)&nam, 0, 0); - if (rt == NULL) - REJECT_PASS_ACCEPT(); - else { - struct sockaddr *nexthop; + if (inc.inc_flags & INC_ISIPV6) { - RT_UNLOCK(rt); - nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : - (struct sockaddr *)&nam; - if (rt->rt_ifp == ifp || - (ifp_vlan != NULL && rt->rt_ifp == ifp_vlan)) - e = t4_l2t_get(pi, rt->rt_ifp, nexthop); - RTFREE(rt); - if (e == NULL) - REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */ + /* Don't offload if the ifcap isn't enabled */ + if ((ifp->if_capenable & IFCAP_TOE6) == 0) + REJECT_PASS_ACCEPT(); + + /* + * SYN must be directed to an IP6 address on this ifnet. This + * is more restrictive than in6_localip. + */ + if (!ifnet_has_ip6(ifp, &inc.inc6_laddr)) + REJECT_PASS_ACCEPT(); + } else { + + /* Don't offload if the ifcap isn't enabled */ + if ((ifp->if_capenable & IFCAP_TOE4) == 0) + REJECT_PASS_ACCEPT(); + + /* + * SYN must be directed to an IP address on this ifnet. This + * is more restrictive than in_localip. + */ + if (!ifnet_has_ip(ifp, inc.inc_laddr)) + REJECT_PASS_ACCEPT(); } + e = get_l2te_for_nexthop(pi, ifp, &inc); + if (e == NULL) + REJECT_PASS_ACCEPT(); + synqe = mbuf_to_synqe(m); if (synqe == NULL) REJECT_PASS_ACCEPT(); @@ -1133,7 +1335,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, synqe->lctx = lctx; synqe->syn = m; m = NULL; - refcount_init(&synqe->refcnt, 0); + refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */ synqe->l2e_idx = e->idx; synqe->rcv_bufsize = rx_credits; atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr); @@ -1166,7 +1368,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, */ m = m_dup(synqe->syn, M_NOWAIT); if (m) - m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.rcvif = hw_ifp; remove_tid(sc, synqe->tid); free(wr, M_CXGBE); @@ -1179,6 +1381,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, if (inp) INP_WUNLOCK(inp); + release_synqe(synqe); /* extra hold */ REJECT_PASS_ACCEPT(); } @@ -1193,15 +1396,19 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, * this tid because there was no L2T entry for the tid at that * time. Abort it now. The reply to the abort will clean up. */ - CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, ABORT", - __func__, stid, tid, lctx, synqe); - send_reset_synqe(tod, synqe); + CTR6(KTR_CXGBE, + "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT", + __func__, stid, tid, lctx, synqe, synqe->flags); + if (!(synqe->flags & TPF_SYNQE_EXPANDED)) + send_reset_synqe(tod, synqe); INP_WUNLOCK(inp); + release_synqe(synqe); /* extra hold */ return (__LINE__); } INP_WUNLOCK(inp); + release_synqe(synqe); /* extra hold */ return (0); reject: CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, @@ -1216,7 +1423,7 @@ reject: m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; - ifp->if_input(ifp, m); + hw_ifp->if_input(hw_ifp, m); } return (reject_reason); diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 330172d..64e8b26 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #include <sys/param.h> #include <sys/types.h> @@ -40,10 +41,14 @@ __FBSDID("$FreeBSD$"); #include <sys/domain.h> #include <sys/socket.h> #include <sys/socketvar.h> +#include <net/if.h> #include <netinet/in.h> #include <netinet/in_pcb.h> +#include <netinet/in_var.h> #include <netinet/ip.h> +#include <netinet/ip6.h> #include <netinet/tcp_var.h> +#include <netinet6/scope6_var.h> #define TCPSTATES #include <netinet/tcp_fsm.h> #include <netinet/toecore.h> @@ -58,6 +63,9 @@ __FBSDID("$FreeBSD$"); static struct protosw ddp_protosw; static struct pr_usrreqs ddp_usrreqs; +static struct protosw ddp6_protosw; +static struct pr_usrreqs ddp6_usrreqs; + /* Module ops */ static int t4_tom_mod_load(void); static int t4_tom_mod_unload(void); @@ -77,6 +85,11 @@ static void queue_tid_release(struct adapter *, int); static void release_offload_resources(struct toepcb *); static int alloc_tid_tabs(struct tid_info *); static void free_tid_tabs(struct tid_info *); +static int add_lip(struct adapter *, struct in6_addr *); +static int delete_lip(struct adapter *, struct in6_addr *); +static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); +static void init_clip_table(struct adapter *, struct tom_data *); +static void destroy_clip_table(struct adapter *, struct tom_data *); static void free_tom_data(struct adapter *, struct tom_data *); struct toepcb * @@ -170,8 +183,12 @@ offload_socket(struct socket *so, struct toepcb *toep) sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; - if (toep->ulp_mode == ULP_MODE_TCPDDP) - so->so_proto = &ddp_protosw; + if (toep->ulp_mode == ULP_MODE_TCPDDP) { + if (inp->inp_vflag & INP_IPV6) + so->so_proto = &ddp6_protosw; + else + so->so_proto = &ddp_protosw; + } SOCKBUF_UNLOCK(sb); /* Update TCP PCB */ @@ -237,8 +254,8 @@ release_offload_resources(struct toepcb *toep) KASSERT(!(toep->flags & TPF_ATTACHED), ("%s: %p is still attached.", __func__, toep)); - CTR4(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p)", - __func__, toep, tid, toep->l2te); + CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", + __func__, toep, tid, toep->l2te, toep->ce); if (toep->ulp_mode == ULP_MODE_TCPDDP) release_ddp_resources(toep); @@ -251,6 +268,9 @@ release_offload_resources(struct toepcb *toep) release_tid(sc, tid, toep->ctrlq); } + if (toep->ce) + release_lip(td, toep->ce); + mtx_lock(&td->toep_list_lock); TAILQ_REMOVE(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); @@ -394,7 +414,7 @@ int find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) { unsigned short *mtus = &sc->params.mtus[0]; - int i = 0, mss; + int i, mss, n; KASSERT(inc != NULL || pmss > 0, ("%s: at least one of inc/pmss must be specified", __func__)); @@ -403,8 +423,13 @@ find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) if (pmss > 0 && mss > pmss) mss = pmss; - while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40) - ++i; + if (inc->inc_flags & INC_ISIPV6) + n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + else + n = sizeof(struct ip) + sizeof(struct tcphdr); + + for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++) + continue; return (i); } @@ -513,6 +538,24 @@ select_ntuple(struct port_info *pi, struct l2t_entry *e, uint32_t filter_mode) return (htobe32(ntuple)); } +void +set_tcpddp_ulp_mode(struct toepcb *toep) +{ + + toep->ulp_mode = ULP_MODE_TCPDDP; + toep->ddp_flags = DDP_OK; + toep->ddp_score = DDP_LOW_SCORE; +} + +int +negative_advice(int status) +{ + + return (status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE); +} + static int alloc_tid_tabs(struct tid_info *t) { @@ -536,12 +579,10 @@ alloc_tid_tabs(struct tid_info *t) t->atid_tab[t->natids - 1].next = NULL; mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); - t->stid_tab = (union serv_entry *)&t->atid_tab[t->natids]; - t->sfree = t->stid_tab; + t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids]; t->stids_in_use = 0; - for (i = 1; i < t->nstids; i++) - t->stid_tab[i - 1].next = &t->stid_tab[i]; - t->stid_tab[t->nstids - 1].next = NULL; + TAILQ_INIT(&t->stids); + t->nstids_free_head = t->nstids; atomic_store_rel_int(&t->tids_in_use, 0); @@ -567,9 +608,157 @@ free_tid_tabs(struct tid_info *t) mtx_destroy(&t->stid_lock); } +static int +add_lip(struct adapter *sc, struct in6_addr *lip) +{ + struct fw_clip_cmd c; + + ASSERT_SYNCHRONIZED_OP(sc); + /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ + + memset(&c, 0, sizeof(c)); + c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_WRITE); + c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); + c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; + c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + + return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); +} + +static int +delete_lip(struct adapter *sc, struct in6_addr *lip) +{ + struct fw_clip_cmd c; + + ASSERT_SYNCHRONIZED_OP(sc); + /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ + + memset(&c, 0, sizeof(c)); + c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_READ); + c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); + c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; + c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + + return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); +} + +static struct clip_entry * +search_lip(struct tom_data *td, struct in6_addr *lip) +{ + struct clip_entry *ce; + + mtx_assert(&td->clip_table_lock, MA_OWNED); + + TAILQ_FOREACH(ce, &td->clip_table, link) { + if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) + return (ce); + } + + return (NULL); +} + +struct clip_entry * +hold_lip(struct tom_data *td, struct in6_addr *lip) +{ + struct clip_entry *ce; + + mtx_lock(&td->clip_table_lock); + ce = search_lip(td, lip); + if (ce != NULL) + ce->refcount++; + mtx_unlock(&td->clip_table_lock); + + return (ce); +} + +void +release_lip(struct tom_data *td, struct clip_entry *ce) +{ + + mtx_lock(&td->clip_table_lock); + KASSERT(search_lip(td, &ce->lip) == ce, + ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); + KASSERT(ce->refcount > 0, + ("%s: CLIP entry %p has refcount 0", __func__, ce)); + --ce->refcount; + mtx_unlock(&td->clip_table_lock); +} + +static void +init_clip_table(struct adapter *sc, struct tom_data *td) +{ + struct in6_ifaddr *ia; + struct in6_addr *lip, tlip; + struct clip_entry *ce; + + ASSERT_SYNCHRONIZED_OP(sc); + + mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); + TAILQ_INIT(&td->clip_table); + + IN6_IFADDR_RLOCK(); + TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + lip = &ia->ia_addr.sin6_addr; + + KASSERT(!IN6_IS_ADDR_MULTICAST(lip), + ("%s: mcast address in in6_ifaddr list", __func__)); + + if (IN6_IS_ADDR_LOOPBACK(lip)) + continue; + if (IN6_IS_SCOPE_EMBED(lip)) { + /* Remove the embedded scope */ + tlip = *lip; + lip = &tlip; + in6_clearscope(lip); + } + /* + * XXX: how to weed out the link local address for the loopback + * interface? It's fe80::1 usually (always?). + */ + + mtx_lock(&td->clip_table_lock); + if (search_lip(td, lip) == NULL) { + ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); + memcpy(&ce->lip, lip, sizeof(ce->lip)); + ce->refcount = 0; + if (add_lip(sc, lip) == 0) + TAILQ_INSERT_TAIL(&td->clip_table, ce, link); + else + free(ce, M_CXGBE); + } + mtx_unlock(&td->clip_table_lock); + } + IN6_IFADDR_RUNLOCK(); +} + +static void +destroy_clip_table(struct adapter *sc, struct tom_data *td) +{ + struct clip_entry *ce, *ce_temp; + + if (mtx_initialized(&td->clip_table_lock)) { + mtx_lock(&td->clip_table_lock); + TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { + KASSERT(ce->refcount == 0, + ("%s: CLIP entry %p still in use (%d)", __func__, + ce, ce->refcount)); + TAILQ_REMOVE(&td->clip_table, ce, link); + delete_lip(sc, &ce->lip); + free(ce, M_CXGBE); + } + mtx_unlock(&td->clip_table_lock); + mtx_destroy(&td->clip_table_lock); + } +} + static void free_tom_data(struct adapter *sc, struct tom_data *td) { + + ASSERT_SYNCHRONIZED_OP(sc); + KASSERT(TAILQ_EMPTY(&td->toep_list), ("%s: TOE PCB list is not empty.", __func__)); KASSERT(td->lctx_count == 0, @@ -578,6 +767,7 @@ free_tom_data(struct adapter *sc, struct tom_data *td) t4_uninit_l2t_cpl_handlers(sc); t4_uninit_cpl_io_handlers(sc); t4_uninit_ddp(sc, td); + destroy_clip_table(sc, td); if (td->listen_mask != 0) hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); @@ -602,7 +792,7 @@ t4_tom_activate(struct adapter *sc) struct toedev *tod; int i, rc; - ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */ + ASSERT_SYNCHRONIZED_OP(sc); /* per-adapter softc for TOM */ td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); @@ -623,8 +813,12 @@ t4_tom_activate(struct adapter *sc) if (rc != 0) goto done; + /* DDP page pods and CPL handlers */ t4_init_ddp(sc, td); + /* CLIP table for IPv6 offload */ + init_clip_table(sc, td); + /* CPL handlers */ t4_init_connect_cpl_handlers(sc); t4_init_l2t_cpl_handlers(sc); @@ -668,7 +862,7 @@ t4_tom_deactivate(struct adapter *sc) int rc = 0; struct tom_data *td = sc->tom_softc; - ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */ + ASSERT_SYNCHRONIZED_OP(sc); if (td == NULL) return (0); /* XXX. KASSERT? */ @@ -700,17 +894,24 @@ static int t4_tom_mod_load(void) { int rc; - struct protosw *tcp_protosw; + struct protosw *tcp_protosw, *tcp6_protosw; tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); if (tcp_protosw == NULL) return (ENOPROTOOPT); - bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw)); bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs)); ddp_usrreqs.pru_soreceive = t4_soreceive_ddp; ddp_protosw.pr_usrreqs = &ddp_usrreqs; + tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); + if (tcp6_protosw == NULL) + return (ENOPROTOOPT); + bcopy(tcp6_protosw, &ddp6_protosw, sizeof(ddp6_protosw)); + bcopy(tcp6_protosw->pr_usrreqs, &ddp6_usrreqs, sizeof(ddp6_usrreqs)); + ddp6_usrreqs.pru_soreceive = t4_soreceive_ddp; + ddp6_protosw.pr_usrreqs = &ddp6_usrreqs; + rc = t4_register_uld(&tom_uld_info); if (rc != 0) t4_tom_mod_unload(); @@ -721,11 +922,14 @@ t4_tom_mod_load(void) static void tom_uninit(struct adapter *sc, void *arg __unused) { + if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomun")) + return; + /* Try to free resources (works only if no port has IFCAP_TOE) */ - ADAPTER_LOCK(sc); if (sc->flags & TOM_INIT_DONE) t4_deactivate_uld(sc, ULD_TOM); - ADAPTER_UNLOCK(sc); + + end_synchronized_op(sc, LOCK_HELD); } static int diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index 9549b0b..d0fbbd2 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -109,6 +109,7 @@ struct toepcb { struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ctrlq; struct l2t_entry *l2te; /* L2 table entry used by this connection */ + struct clip_entry *ce; /* CLIP table entry used by this tid */ int tid; /* Connection identifier */ unsigned int tx_credits;/* tx WR credits (in 16 byte units) remaining */ unsigned int sb_cc; /* last noted value of so_rcv->sb_cc */ @@ -140,15 +141,6 @@ struct flowc_tx_params { #define DDP_LOW_SCORE 1 #define DDP_HIGH_SCORE 3 -static inline void -set_tcpddp_ulp_mode(struct toepcb *toep) -{ - - toep->ulp_mode = ULP_MODE_TCPDDP; - toep->ddp_flags = DDP_OK; - toep->ddp_score = DDP_LOW_SCORE; -} - /* * Compressed state for embryonic connections for a listener. Barely fits in * 64B, try not to grow it further. @@ -174,6 +166,7 @@ struct listen_ctx { LIST_ENTRY(listen_ctx) link; /* listen hash linkage */ volatile int refcount; int stid; + struct stid_region stid_region; int flags; struct inpcb *inp; /* listening socket's inp */ struct sge_wrq *ctrlq; @@ -183,6 +176,12 @@ struct listen_ctx { TAILQ_HEAD(ppod_head, ppod_region); +struct clip_entry { + TAILQ_ENTRY(clip_entry) link; + struct in6_addr lip; /* local IPv6 address */ + u_int refcount; +}; + struct tom_data { struct toedev tod; @@ -200,6 +199,9 @@ struct tom_data { int nppods_free; /* # of available ppods */ int nppods_free_head; /* # of available ppods at the begining */ struct ppod_head ppods; + + struct mtx clip_table_lock; + TAILQ_HEAD(, clip_entry) clip_table; }; static inline struct tom_data * @@ -233,6 +235,10 @@ int select_rcv_wscale(void); uint64_t calc_opt0(struct socket *, struct port_info *, struct l2t_entry *, int, int, int, int); uint32_t select_ntuple(struct port_info *, struct l2t_entry *, uint32_t); +void set_tcpddp_ulp_mode(struct toepcb *); +int negative_advice(int); +struct clip_entry *hold_lip(struct tom_data *, struct in6_addr *); +void release_lip(struct tom_data *, struct clip_entry *); /* t4_connect.c */ void t4_init_connect_cpl_handlers(struct adapter *); diff --git a/sys/dev/cxgbe/tom/t4_tom_l2t.c b/sys/dev/cxgbe/tom/t4_tom_l2t.c index ffe64c5..7a75394 100644 --- a/sys/dev/cxgbe/tom/t4_tom_l2t.c +++ b/sys/dev/cxgbe/tom/t4_tom_l2t.c @@ -27,6 +27,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #ifdef TCP_OFFLOAD #include <sys/param.h> @@ -34,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/module.h> #include <sys/bus.h> +#include <sys/fnv_hash.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/rwlock.h> @@ -48,28 +50,89 @@ __FBSDID("$FreeBSD$"); #include <netinet/toecore.h> #include "common/common.h" -#include "common/jhash.h" #include "common/t4_msg.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" #define VLAN_NONE 0xfff -#define SA(x) ((struct sockaddr *)(x)) -#define SIN(x) ((struct sockaddr_in *)(x)) -#define SINADDR(x) (SIN(x)->sin_addr.s_addr) - static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e) { + if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ atomic_subtract_int(&d->nfree, 1); } -static inline unsigned int -arp_hash(const uint32_t key, int ifindex) +static inline u_int +l2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex) { - return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1); + u_int hash, half = d->l2t_size / 2, start = 0; + const void *key; + size_t len; + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + key = &sin->sin_addr; + len = sizeof(sin->sin_addr); + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + key = &sin6->sin6_addr; + len = sizeof(sin6->sin6_addr); + start = half; + } + + hash = fnv_32_buf(key, len, FNV1_32_INIT); + hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash); + hash %= half; + + return (hash + start); +} + +static inline int +l2_cmp(const struct sockaddr *sa, struct l2t_entry *e) +{ + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + return (e->addr[0] != sin->sin_addr.s_addr); + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr))); + } +} + +static inline void +l2_store(const struct sockaddr *sa, struct l2t_entry *e) +{ + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + e->addr[0] = sin->sin_addr.s_addr; + e->ipv6 = 0; + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)); + e->ipv6 = 1; + } } /* @@ -100,7 +163,7 @@ send_pending(struct adapter *sc, struct l2t_entry *e) static void resolution_failed_for_wr(struct wrqe *wr) { - log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr, + log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr, wr->wr_len); /* free(wr, M_CXGBE); */ @@ -175,15 +238,25 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e) struct tom_data *td = sc->tom_softc; struct toedev *tod = &td->tod; struct sockaddr_in sin = {0}; + struct sockaddr_in6 sin6 = {0}; + struct sockaddr *sa; uint8_t dmac[ETHER_ADDR_LEN]; uint16_t vtag = VLAN_NONE; int rc; - sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); - SINADDR(&sin) = e->addr; + if (e->ipv6 == 0) { + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_addr.s_addr = e->addr[0]; + sa = (void *)&sin; + } else { + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr)); + sa = (void *)&sin6; + } - rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag); + rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag); if (rc == EWOULDBLOCK) return (rc); @@ -263,7 +336,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, struct adapter *sc = iq->adapter; const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); unsigned int tid = GET_TID(rpl); - unsigned int idx = tid & (L2T_SIZE - 1); + unsigned int idx = tid % L2T_SIZE; int rc; rc = do_l2t_write_rpl(iq, rss, m); @@ -271,7 +344,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, return (rc); if (tid & F_SYNC_WR) { - struct l2t_entry *e = &sc->l2t->l2tab[idx]; + struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start]; mtx_lock(&e->lock); if (e->state != L2T_STATE_SWITCHING) { @@ -310,21 +383,22 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) { struct l2t_entry *e; struct l2t_data *d = pi->adapter->l2t; - uint32_t addr = SINADDR(sa); - int hash = arp_hash(addr, ifp->if_index); - unsigned int smt_idx = pi->port_id; + u_int hash, smt_idx = pi->port_id; - if (sa->sa_family != AF_INET) - return (NULL); /* XXX: no IPv6 support right now */ + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); #ifndef VLAN_TAG if (ifp->if_type == IFT_L2VLAN) return (NULL); #endif + hash = l2_hash(d, sa, ifp->if_index); rw_wlock(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) { - if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) { + if (l2_cmp(sa, e) == 0 && e->ifp == ifp && + e->smt_idx == smt_idx) { l2t_hold(d, e); goto done; } @@ -338,7 +412,7 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) d->l2tab[hash].first = e; e->state = L2T_STATE_RESOLVING; - e->addr = addr; + l2_store(sa, e); e->ifp = ifp; e->smt_idx = smt_idx; e->hash = hash; @@ -368,14 +442,14 @@ t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, struct adapter *sc = tod->tod_softc; struct l2t_entry *e; struct l2t_data *d = sc->l2t; - uint32_t addr = SINADDR(sa); - int hash = arp_hash(addr, ifp->if_index); + u_int hash; KASSERT(d != NULL, ("%s: no L2 table", __func__)); + hash = l2_hash(d, sa, ifp->if_index); rw_rlock(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) { - if (e->addr == addr && e->ifp == ifp) { + if (l2_cmp(sa, e) == 0 && e->ifp == ifp) { mtx_lock(&e->lock); if (atomic_load_acq_int(&e->refcnt)) goto found; |