summaryrefslogtreecommitdiffstats
path: root/sys/dev/cxgbe
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/cxgbe')
-rw-r--r--sys/dev/cxgbe/adapter.h60
-rw-r--r--sys/dev/cxgbe/common/jhash.h140
-rw-r--r--sys/dev/cxgbe/common/t4_msg.h2
-rw-r--r--sys/dev/cxgbe/firmware/t4fw_cfg.txt8
-rw-r--r--sys/dev/cxgbe/offload.h58
-rw-r--r--sys/dev/cxgbe/t4_l2t.c44
-rw-r--r--sys/dev/cxgbe/t4_l2t.h6
-rw-r--r--sys/dev/cxgbe/t4_main.c664
-rw-r--r--sys/dev/cxgbe/t4_sge.c2
-rw-r--r--sys/dev/cxgbe/tom/t4_connect.c127
-rw-r--r--sys/dev/cxgbe/tom/t4_cpl_io.c3
-rw-r--r--sys/dev/cxgbe/tom/t4_listen.c409
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.c240
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.h24
-rw-r--r--sys/dev/cxgbe/tom/t4_tom_l2t.c124
15 files changed, 1248 insertions, 663 deletions
diff --git a/sys/dev/cxgbe/adapter.h b/sys/dev/cxgbe/adapter.h
index c1d8a6b..55cfa52 100644
--- a/sys/dev/cxgbe/adapter.h
+++ b/sys/dev/cxgbe/adapter.h
@@ -158,6 +158,16 @@ enum {
};
enum {
+ /* flags understood by begin_synchronized_op */
+ HOLD_LOCK = (1 << 0),
+ SLEEP_OK = (1 << 1),
+ INTR_OK = (1 << 2),
+
+ /* flags understood by end_synchronized_op */
+ LOCK_HELD = HOLD_LOCK,
+};
+
+enum {
/* adapter flags */
FULL_INIT_DONE = (1 << 0),
FW_OK = (1 << 1),
@@ -174,11 +184,11 @@ enum {
PORT_SYSCTL_CTX = (1 << 2),
};
-#define IS_DOOMED(pi) (pi->flags & DOOMED)
-#define SET_DOOMED(pi) do {pi->flags |= DOOMED;} while (0)
-#define IS_BUSY(sc) (sc->flags & CXGBE_BUSY)
-#define SET_BUSY(sc) do {sc->flags |= CXGBE_BUSY;} while (0)
-#define CLR_BUSY(sc) do {sc->flags &= ~CXGBE_BUSY;} while (0)
+#define IS_DOOMED(pi) ((pi)->flags & DOOMED)
+#define SET_DOOMED(pi) do {(pi)->flags |= DOOMED;} while (0)
+#define IS_BUSY(sc) ((sc)->flags & CXGBE_BUSY)
+#define SET_BUSY(sc) do {(sc)->flags |= CXGBE_BUSY;} while (0)
+#define CLR_BUSY(sc) do {(sc)->flags &= ~CXGBE_BUSY;} while (0)
struct port_info {
device_t dev;
@@ -567,7 +577,8 @@ struct adapter {
int flags;
char fw_version[32];
- unsigned int cfcsum;
+ char cfg_file[32];
+ u_int cfcsum;
struct adapter_params params;
struct t4_virt_res vres;
@@ -591,6 +602,11 @@ struct adapter {
an_handler_t an_handler __aligned(CACHE_LINE_SIZE);
fw_msg_handler_t fw_msg_handler[4]; /* NUM_FW6_TYPES */
cpl_handler_t cpl_handler[0xef]; /* NUM_CPL_CMDS */
+
+#ifdef INVARIANTS
+ const char *last_op;
+ const void *last_op_thr;
+#endif
};
#define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock)
@@ -598,6 +614,12 @@ struct adapter {
#define ADAPTER_LOCK_ASSERT_OWNED(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED)
#define ADAPTER_LOCK_ASSERT_NOTOWNED(sc) mtx_assert(&(sc)->sc_lock, MA_NOTOWNED)
+/* XXX: not bulletproof, but much better than nothing */
+#define ASSERT_SYNCHRONIZED_OP(sc) \
+ KASSERT(IS_BUSY(sc) && \
+ (mtx_owned(&(sc)->sc_lock) || sc->last_op_thr == curthread), \
+ ("%s: operation not synchronized.", __func__))
+
#define PORT_LOCK(pi) mtx_lock(&(pi)->pi_lock)
#define PORT_UNLOCK(pi) mtx_unlock(&(pi)->pi_lock)
#define PORT_LOCK_ASSERT_OWNED(pi) mtx_assert(&(pi)->pi_lock, MA_OWNED)
@@ -626,18 +648,18 @@ struct adapter {
#define TXQ_LOCK_ASSERT_OWNED(txq) EQ_LOCK_ASSERT_OWNED(&(txq)->eq)
#define TXQ_LOCK_ASSERT_NOTOWNED(txq) EQ_LOCK_ASSERT_NOTOWNED(&(txq)->eq)
-#define for_each_txq(pi, iter, txq) \
- txq = &pi->adapter->sge.txq[pi->first_txq]; \
- for (iter = 0; iter < pi->ntxq; ++iter, ++txq)
-#define for_each_rxq(pi, iter, rxq) \
- rxq = &pi->adapter->sge.rxq[pi->first_rxq]; \
- for (iter = 0; iter < pi->nrxq; ++iter, ++rxq)
-#define for_each_ofld_txq(pi, iter, ofld_txq) \
- ofld_txq = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq]; \
- for (iter = 0; iter < pi->nofldtxq; ++iter, ++ofld_txq)
-#define for_each_ofld_rxq(pi, iter, ofld_rxq) \
- ofld_rxq = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq]; \
- for (iter = 0; iter < pi->nofldrxq; ++iter, ++ofld_rxq)
+#define for_each_txq(pi, iter, q) \
+ for (q = &pi->adapter->sge.txq[pi->first_txq], iter = 0; \
+ iter < pi->ntxq; ++iter, ++q)
+#define for_each_rxq(pi, iter, q) \
+ for (q = &pi->adapter->sge.rxq[pi->first_rxq], iter = 0; \
+ iter < pi->nrxq; ++iter, ++q)
+#define for_each_ofld_txq(pi, iter, q) \
+ for (q = &pi->adapter->sge.ofld_txq[pi->first_ofld_txq], iter = 0; \
+ iter < pi->nofldtxq; ++iter, ++q)
+#define for_each_ofld_rxq(pi, iter, q) \
+ for (q = &pi->adapter->sge.ofld_rxq[pi->first_ofld_rxq], iter = 0; \
+ iter < pi->nofldrxq; ++iter, ++q)
/* One for errors, one for firmware events */
#define T4_EXTRA_INTR 2
@@ -751,6 +773,8 @@ int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t);
int t4_register_an_handler(struct adapter *, an_handler_t);
int t4_register_fw_msg_handler(struct adapter *, int, fw_msg_handler_t);
int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
+int begin_synchronized_op(struct adapter *, struct port_info *, int, char *);
+void end_synchronized_op(struct adapter *, int);
/* t4_sge.c */
void t4_sge_modload(void);
diff --git a/sys/dev/cxgbe/common/jhash.h b/sys/dev/cxgbe/common/jhash.h
deleted file mode 100644
index 4546b7b..0000000
--- a/sys/dev/cxgbe/common/jhash.h
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef _JHASH_H
-#define _JHASH_H
-
-/* jhash.h: Jenkins hash support.
- *
- * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
- *
- * http://burtleburtle.net/bob/hash/
- *
- * These are the credits from Bob's sources:
- *
- * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
- * hash(), hash2(), hash3, and mix() are externally useful functions.
- * Routines to test the hash are included if SELF_TEST is defined.
- * You can use this free for any purpose. It has no warranty.
- *
- * $FreeBSD$
- */
-
-/* NOTE: Arguments are modified. */
-#define __jhash_mix(a, b, c) \
-{ \
- a -= b; a -= c; a ^= (c>>13); \
- b -= c; b -= a; b ^= (a<<8); \
- c -= a; c -= b; c ^= (b>>13); \
- a -= b; a -= c; a ^= (c>>12); \
- b -= c; b -= a; b ^= (a<<16); \
- c -= a; c -= b; c ^= (b>>5); \
- a -= b; a -= c; a ^= (c>>3); \
- b -= c; b -= a; b ^= (a<<10); \
- c -= a; c -= b; c ^= (b>>15); \
-}
-
-/* The golden ration: an arbitrary value */
-#define JHASH_GOLDEN_RATIO 0x9e3779b9
-
-/* The most generic version, hashes an arbitrary sequence
- * of bytes. No alignment or length assumptions are made about
- * the input key.
- */
-static inline u32 jhash(const void *key, u32 length, u32 initval)
-{
- u32 a, b, c, len;
- const u8 *k = key;
-
- len = length;
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
-
- while (len >= 12) {
- a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24));
- b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24));
- c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24));
-
- __jhash_mix(a,b,c);
-
- k += 12;
- len -= 12;
- }
-
- c += length;
- switch (len) {
- case 11: c += ((u32)k[10]<<24);
- case 10: c += ((u32)k[9]<<16);
- case 9 : c += ((u32)k[8]<<8);
- case 8 : b += ((u32)k[7]<<24);
- case 7 : b += ((u32)k[6]<<16);
- case 6 : b += ((u32)k[5]<<8);
- case 5 : b += k[4];
- case 4 : a += ((u32)k[3]<<24);
- case 3 : a += ((u32)k[2]<<16);
- case 2 : a += ((u32)k[1]<<8);
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
-
-/* A special optimized version that handles 1 or more of u32s.
- * The length parameter here is the number of u32s in the key.
- */
-static inline u32 jhash2(u32 *k, u32 length, u32 initval)
-{
- u32 a, b, c, len;
-
- a = b = JHASH_GOLDEN_RATIO;
- c = initval;
- len = length;
-
- while (len >= 3) {
- a += k[0];
- b += k[1];
- c += k[2];
- __jhash_mix(a, b, c);
- k += 3; len -= 3;
- }
-
- c += length * 4;
-
- switch (len) {
- case 2 : b += k[1];
- case 1 : a += k[0];
- };
-
- __jhash_mix(a,b,c);
-
- return c;
-}
-
-
-/* A special ultra-optimized versions that knows they are hashing exactly
- * 3, 2 or 1 word(s).
- *
- * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
- * done at the end is not done here.
- */
-static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
-{
- a += JHASH_GOLDEN_RATIO;
- b += JHASH_GOLDEN_RATIO;
- c += initval;
-
- __jhash_mix(a, b, c);
-
- return c;
-}
-
-static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
-{
- return jhash_3words(a, b, 0, initval);
-}
-
-static inline u32 jhash_1word(u32 a, u32 initval)
-{
- return jhash_3words(a, 0, 0, initval);
-}
-
-#endif /* _JHASH_H */
diff --git a/sys/dev/cxgbe/common/t4_msg.h b/sys/dev/cxgbe/common/t4_msg.h
index 5bd3cef..92f760b 100644
--- a/sys/dev/cxgbe/common/t4_msg.h
+++ b/sys/dev/cxgbe/common/t4_msg.h
@@ -159,6 +159,8 @@ enum CPL_error {
CPL_ERR_KEEPALIVE_TIMEDOUT = 34,
CPL_ERR_RTX_NEG_ADVICE = 35,
CPL_ERR_PERSIST_NEG_ADVICE = 36,
+ CPL_ERR_KEEPALV_NEG_ADVICE = 37,
+ CPL_ERR_WAIT_ARP_RPL = 41,
CPL_ERR_ABORT_FAILED = 42,
CPL_ERR_IWARP_FLM = 50,
};
diff --git a/sys/dev/cxgbe/firmware/t4fw_cfg.txt b/sys/dev/cxgbe/firmware/t4fw_cfg.txt
index bf6a9a1..2a9db62 100644
--- a/sys/dev/cxgbe/firmware/t4fw_cfg.txt
+++ b/sys/dev/cxgbe/firmware/t4fw_cfg.txt
@@ -56,7 +56,7 @@
[function "4"]
wx_caps = all
r_caps = all
- nvi = 54
+ nvi = 32
niqflint = 256
nethctrl = 128
neq = 256
@@ -74,8 +74,8 @@
# Each entry in these categories takes 4 cells each. nhash will use the
# TCAM iff there is room left (that is, the rest don't add up to 2048).
nroute = 32
- nclip = 0 # needed only for IPv6 offload
- nfilter = 1488
+ nclip = 32
+ nfilter = 1456
nserver = 512
nhash = 16384
@@ -137,7 +137,7 @@
[fini]
version = 0x1
- checksum = 0x162df193
+ checksum = 0xfdebb6ef
#
# $FreeBSD$
#
diff --git a/sys/dev/cxgbe/offload.h b/sys/dev/cxgbe/offload.h
index ced15a6..55ac71b 100644
--- a/sys/dev/cxgbe/offload.h
+++ b/sys/dev/cxgbe/offload.h
@@ -54,16 +54,20 @@
OPCODE_TID(w) = htonl(MK_OPCODE_TID(cpl, tid)); \
} while (0)
+TAILQ_HEAD(stid_head, stid_region);
+struct listen_ctx;
+
+struct stid_region {
+ TAILQ_ENTRY(stid_region) link;
+ int used; /* # of stids used by this region */
+ int free; /* # of contiguous stids free right after this region */
+};
+
/*
* Max # of ATIDs. The absolute HW max is 16K but we keep it lower.
*/
#define MAX_ATIDS 8192U
-union serv_entry {
- void *data;
- union serv_entry *next;
-};
-
union aopen_entry {
void *data;
union aopen_entry *next;
@@ -75,34 +79,33 @@ union aopen_entry {
*/
struct tid_info {
void **tid_tab;
- unsigned int ntids;
-
- union serv_entry *stid_tab;
- unsigned int nstids;
- unsigned int stid_base;
-
+ u_int ntids;
+ u_int tids_in_use;
+
+ struct mtx stid_lock __aligned(CACHE_LINE_SIZE);
+ struct listen_ctx **stid_tab;
+ u_int nstids;
+ u_int stid_base;
+ u_int stids_in_use;
+ u_int nstids_free_head; /* # of available stids at the begining */
+ struct stid_head stids;
+
+ struct mtx atid_lock __aligned(CACHE_LINE_SIZE);
union aopen_entry *atid_tab;
- unsigned int natids;
-
- struct filter_entry *ftid_tab;
- unsigned int nftids;
- unsigned int ftid_base;
- unsigned int ftids_in_use;
-
- struct mtx atid_lock;
+ u_int natids;
union aopen_entry *afree;
- unsigned int atids_in_use;
-
- struct mtx stid_lock;
- union serv_entry *sfree;
- unsigned int stids_in_use;
+ u_int atids_in_use;
- unsigned int tids_in_use;
+ struct mtx ftid_lock __aligned(CACHE_LINE_SIZE);
+ struct filter_entry *ftid_tab;
+ u_int nftids;
+ u_int ftid_base;
+ u_int ftids_in_use;
};
struct t4_range {
- unsigned int start;
- unsigned int size;
+ u_int start;
+ u_int size;
};
struct t4_virt_res { /* virtualized HW resources */
@@ -114,6 +117,7 @@ struct t4_virt_res { /* virtualized HW resources */
struct t4_range qp;
struct t4_range cq;
struct t4_range ocq;
+ struct t4_range l2t;
};
#ifdef TCP_OFFLOAD
diff --git a/sys/dev/cxgbe/t4_l2t.c b/sys/dev/cxgbe/t4_l2t.c
index dd8748e..dcff5e8 100644
--- a/sys/dev/cxgbe/t4_l2t.c
+++ b/sys/dev/cxgbe/t4_l2t.c
@@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include "common/common.h"
-#include "common/jhash.h"
#include "common/t4_msg.h"
#include "t4_l2t.h"
@@ -78,7 +77,7 @@ t4_alloc_l2e(struct l2t_data *d)
return (NULL);
/* there's definitely a free entry */
- for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
+ for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e)
if (atomic_load_acq_int(&e->refcnt) == 0)
goto found;
@@ -115,6 +114,7 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
{
struct wrqe *wr;
struct cpl_l2t_write_req *req;
+ int idx = e->idx + sc->vres.l2t.start;
mtx_assert(&e->lock, MA_OWNED);
@@ -124,10 +124,10 @@ t4_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync)
req = wrtod(wr);
INIT_TP_WR(req, 0);
- OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx |
+ OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx |
V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id)));
req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync));
- req->l2t_idx = htons(e->idx);
+ req->l2t_idx = htons(idx);
req->vlan = htons(e->vlan);
memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac));
@@ -183,18 +183,24 @@ t4_l2t_set_switching(struct adapter *sc, struct l2t_entry *e, uint16_t vlan,
int
t4_init_l2t(struct adapter *sc, int flags)
{
- int i;
+ int i, l2t_size;
struct l2t_data *d;
- d = malloc(sizeof(*d), M_CXGBE, M_ZERO | flags);
+ l2t_size = sc->vres.l2t.size;
+ if (l2t_size < 2) /* At least 1 bucket for IP and 1 for IPv6 */
+ return (EINVAL);
+
+ d = malloc(sizeof(*d) + l2t_size * sizeof (struct l2t_entry), M_CXGBE,
+ M_ZERO | flags);
if (!d)
return (ENOMEM);
+ d->l2t_size = l2t_size;
d->rover = d->l2tab;
- atomic_store_rel_int(&d->nfree, L2T_SIZE);
+ atomic_store_rel_int(&d->nfree, l2t_size);
rw_init(&d->lock, "L2T");
- for (i = 0; i < L2T_SIZE; i++) {
+ for (i = 0; i < l2t_size; i++) {
struct l2t_entry *e = &d->l2tab[i];
e->idx = i;
@@ -215,7 +221,7 @@ t4_free_l2t(struct l2t_data *d)
{
int i;
- for (i = 0; i < L2T_SIZE; i++)
+ for (i = 0; i < d->l2t_size; i++)
mtx_destroy(&d->l2tab[i].lock);
rw_destroy(&d->lock);
free(d, M_CXGBE);
@@ -229,11 +235,11 @@ do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss,
{
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(rpl);
- unsigned int idx = tid & (L2T_SIZE - 1);
+ unsigned int idx = tid % L2T_SIZE;
if (__predict_false(rpl->status != CPL_ERR_NONE)) {
log(LOG_ERR,
- "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
+ "Unexpected L2T_WRITE_RPL (%u) for entry at hw_idx %u\n",
rpl->status, idx);
return (EINVAL);
}
@@ -269,7 +275,7 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS)
struct l2t_entry *e;
struct sbuf *sb;
int rc, i, header = 0;
- char ip[60];
+ char ip[INET6_ADDRSTRLEN];
if (l2t == NULL)
return (ENXIO);
@@ -283,7 +289,7 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS)
return (ENOMEM);
e = &l2t->l2tab[0];
- for (i = 0; i < L2T_SIZE; i++, e++) {
+ for (i = 0; i < l2t->l2t_size; i++, e++) {
mtx_lock(&e->lock);
if (e->state == L2T_STATE_UNUSED)
goto skip;
@@ -295,11 +301,15 @@ sysctl_l2t(SYSCTL_HANDLER_ARGS)
}
if (e->state == L2T_STATE_SWITCHING)
ip[0] = 0;
- else
- snprintf(ip, sizeof(ip), "%s",
- inet_ntoa(*(struct in_addr *)&e->addr));
+ else {
+ inet_ntop(e->ipv6 ? AF_INET6 : AF_INET, &e->addr[0],
+ &ip[0], sizeof(ip));
+ }
- /* XXX: e->ifp may not be around */
+ /*
+ * XXX: e->ifp may not be around.
+ * XXX: IPv6 addresses may not align properly in the output.
+ */
sbuf_printf(sb, "\n%4u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d"
" %u %2u %c %5u %s",
e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2],
diff --git a/sys/dev/cxgbe/t4_l2t.h b/sys/dev/cxgbe/t4_l2t.h
index 6927b81..c60eef1 100644
--- a/sys/dev/cxgbe/t4_l2t.h
+++ b/sys/dev/cxgbe/t4_l2t.h
@@ -60,7 +60,7 @@ enum {
struct l2t_entry {
uint16_t state; /* entry state */
uint16_t idx; /* entry index */
- uint32_t addr; /* next hop IP address */
+ uint32_t addr[4]; /* next hop IP or IPv6 address */
struct ifnet *ifp; /* outgoing interface */
uint16_t smt_idx; /* SMT index */
uint16_t vlan; /* VLAN TCI (id: 0-11, prio: 13-15) */
@@ -70,15 +70,17 @@ struct l2t_entry {
struct mtx lock;
volatile int refcnt; /* entry reference count */
uint16_t hash; /* hash bucket the entry is on */
+ uint8_t ipv6; /* entry is for an IPv6 address */
uint8_t lport; /* associated offload logical port */
uint8_t dmac[ETHER_ADDR_LEN]; /* next hop's MAC address */
};
struct l2t_data {
struct rwlock lock;
+ u_int l2t_size;
volatile int nfree; /* number of free entries */
struct l2t_entry *rover;/* starting point for next allocation */
- struct l2t_entry l2tab[L2T_SIZE];
+ struct l2t_entry l2tab[];
};
diff --git a/sys/dev/cxgbe/t4_main.c b/sys/dev/cxgbe/t4_main.c
index aeaa4d2..c22ec21 100644
--- a/sys/dev/cxgbe/t4_main.c
+++ b/sys/dev/cxgbe/t4_main.c
@@ -284,9 +284,7 @@ static int get_params__post_init(struct adapter *);
static void t4_set_desc(struct adapter *);
static void build_medialist(struct port_info *);
static int update_mac_settings(struct port_info *, int);
-static int cxgbe_init_locked(struct port_info *);
static int cxgbe_init_synchronized(struct port_info *);
-static int cxgbe_uninit_locked(struct port_info *);
static int cxgbe_uninit_synchronized(struct port_info *);
static int setup_intr_handlers(struct adapter *);
static int adapter_full_init(struct adapter *);
@@ -348,6 +346,7 @@ static void clear_filter(struct filter_entry *);
static int set_filter_wr(struct adapter *, int);
static int del_filter_wr(struct adapter *, int);
static int get_sge_context(struct adapter *, struct t4_sge_context *);
+static int load_fw(struct adapter *, struct t4_data *);
static int read_card_mem(struct adapter *, struct t4_mem_range *);
static int read_i2c(struct adapter *, struct t4_i2c_data *);
#ifdef TCP_OFFLOAD
@@ -820,6 +819,8 @@ t4_detach(device_t dev)
mtx_destroy(&sc->sc_lock);
}
+ if (mtx_initialized(&sc->tids.ftid_lock))
+ mtx_destroy(&sc->tids.ftid_lock);
if (mtx_initialized(&sc->sfl_lock))
mtx_destroy(&sc->sfl_lock);
@@ -874,7 +875,7 @@ cxgbe_attach(device_t dev)
ifp->if_capabilities = T4_CAP;
#ifdef TCP_OFFLOAD
if (is_offload(pi->adapter))
- ifp->if_capabilities |= IFCAP_TOE4;
+ ifp->if_capabilities |= IFCAP_TOE;
#endif
ifp->if_capenable = T4_CAP_ENABLE;
ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
@@ -918,6 +919,10 @@ cxgbe_detach(device_t dev)
while (IS_BUSY(sc))
mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0);
SET_BUSY(sc);
+#ifdef INVARIANTS
+ sc->last_op = "t4detach";
+ sc->last_op_thr = curthread;
+#endif
ADAPTER_UNLOCK(sc);
if (pi->vlan_c)
@@ -939,7 +944,7 @@ cxgbe_detach(device_t dev)
ADAPTER_LOCK(sc);
CLR_BUSY(sc);
- wakeup_one(&sc->flags);
+ wakeup(&sc->flags);
ADAPTER_UNLOCK(sc);
return (0);
@@ -951,9 +956,10 @@ cxgbe_init(void *arg)
struct port_info *pi = arg;
struct adapter *sc = pi->adapter;
- ADAPTER_LOCK(sc);
- cxgbe_init_locked(pi); /* releases adapter lock */
- ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+ if (begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4init") != 0)
+ return;
+ cxgbe_init_synchronized(pi);
+ end_synchronized_op(sc, 0);
}
static int
@@ -967,81 +973,56 @@ cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
switch (cmd) {
case SIOCSIFMTU:
- ADAPTER_LOCK(sc);
- rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
- if (rc) {
-fail:
- ADAPTER_UNLOCK(sc);
- return (rc);
- }
-
mtu = ifr->ifr_mtu;
- if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
- rc = EINVAL;
- } else {
- ifp->if_mtu = mtu;
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- t4_update_fl_bufsize(ifp);
- PORT_LOCK(pi);
- rc = update_mac_settings(pi, XGMAC_MTU);
- PORT_UNLOCK(pi);
- }
+ if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
+ return (EINVAL);
+
+ rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4mtu");
+ if (rc)
+ return (rc);
+ ifp->if_mtu = mtu;
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ t4_update_fl_bufsize(ifp);
+ rc = update_mac_settings(pi, XGMAC_MTU);
}
- ADAPTER_UNLOCK(sc);
+ end_synchronized_op(sc, 0);
break;
case SIOCSIFFLAGS:
- ADAPTER_LOCK(sc);
- if (IS_DOOMED(pi)) {
- rc = ENXIO;
- goto fail;
- }
+ rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4flg");
+ if (rc)
+ return (rc);
+
if (ifp->if_flags & IFF_UP) {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
flags = pi->if_flags;
if ((ifp->if_flags ^ flags) &
(IFF_PROMISC | IFF_ALLMULTI)) {
- if (IS_BUSY(sc)) {
- rc = EBUSY;
- goto fail;
- }
- PORT_LOCK(pi);
rc = update_mac_settings(pi,
XGMAC_PROMISC | XGMAC_ALLMULTI);
- PORT_UNLOCK(pi);
}
- ADAPTER_UNLOCK(sc);
} else
- rc = cxgbe_init_locked(pi);
+ rc = cxgbe_init_synchronized(pi);
pi->if_flags = ifp->if_flags;
} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
- rc = cxgbe_uninit_locked(pi);
- else
- ADAPTER_UNLOCK(sc);
-
- ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+ rc = cxgbe_uninit_synchronized(pi);
+ end_synchronized_op(sc, 0);
break;
case SIOCADDMULTI:
- case SIOCDELMULTI: /* these two can be called with a mutex held :-( */
- ADAPTER_LOCK(sc);
- rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
+ case SIOCDELMULTI: /* these two are called with a mutex held :-( */
+ rc = begin_synchronized_op(sc, pi, HOLD_LOCK, "t4multi");
if (rc)
- goto fail;
-
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- PORT_LOCK(pi);
+ return (rc);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
rc = update_mac_settings(pi, XGMAC_MCADDRS);
- PORT_UNLOCK(pi);
- }
- ADAPTER_UNLOCK(sc);
+ end_synchronized_op(sc, LOCK_HELD);
break;
case SIOCSIFCAP:
- ADAPTER_LOCK(sc);
- rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
+ rc = begin_synchronized_op(sc, pi, SLEEP_OK | INTR_OK, "t4cap");
if (rc)
- goto fail;
+ return (rc);
mask = ifr->ifr_reqcap ^ ifp->if_capenable;
if (mask & IFCAP_TXCSUM) {
@@ -1122,11 +1103,8 @@ fail:
#endif
if (mask & IFCAP_VLAN_HWTAGGING) {
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
- if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
- PORT_LOCK(pi);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
rc = update_mac_settings(pi, XGMAC_VLANEX);
- PORT_UNLOCK(pi);
- }
}
if (mask & IFCAP_VLAN_MTU) {
ifp->if_capenable ^= IFCAP_VLAN_MTU;
@@ -1141,7 +1119,8 @@ fail:
#ifdef VLAN_CAPABILITIES
VLAN_CAPABILITIES(ifp);
#endif
- ADAPTER_UNLOCK(sc);
+fail:
+ end_synchronized_op(sc, 0);
break;
case SIOCSIFMEDIA:
@@ -1625,21 +1604,28 @@ prep_firmware(struct adapter *sc)
/* Partition adapter resources as specified in the config file. */
if (sc->flags & MASTER_PF) {
- if (strncmp(t4_cfg_file, "default", sizeof(t4_cfg_file))) {
+ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s",
+ pci_get_device(sc->dev) == 0x440a ? "uwire" : t4_cfg_file);
+ if (strncmp(sc->cfg_file, "default", sizeof(sc->cfg_file))) {
char s[32];
- snprintf(s, sizeof(s), "t4fw_cfg_%s", t4_cfg_file);
+ snprintf(s, sizeof(s), "t4fw_cfg_%s", sc->cfg_file);
cfg = firmware_get(s);
if (cfg == NULL) {
device_printf(sc->dev,
"unable to locate %s module, "
"will use default config file.\n", s);
+ snprintf(sc->cfg_file, sizeof(sc->cfg_file),
+ "%s", "default");
}
}
rc = partition_resources(sc, cfg ? cfg : default_cfg);
if (rc != 0)
goto done; /* error message displayed already */
+ } else {
+ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", "notme");
+ sc->cfcsum = (u_int)-1;
}
sc->flags |= FW_OK;
@@ -1887,7 +1873,9 @@ get_params__post_init(struct adapter *sc)
param[1] = FW_PARAM_PFVF(EQ_START);
param[2] = FW_PARAM_PFVF(FILTER_START);
param[3] = FW_PARAM_PFVF(FILTER_END);
- rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val);
+ param[4] = FW_PARAM_PFVF(L2T_START);
+ param[5] = FW_PARAM_PFVF(L2T_END);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val);
if (rc != 0) {
device_printf(sc->dev,
"failed to query parameters (post_init): %d.\n", rc);
@@ -1898,6 +1886,11 @@ get_params__post_init(struct adapter *sc)
sc->sge.eq_start = val[1];
sc->tids.ftid_base = val[2];
sc->tids.nftids = val[3] - val[2] + 1;
+ sc->vres.l2t.start = val[4];
+ sc->vres.l2t.size = val[5] - val[4] + 1;
+ KASSERT(sc->vres.l2t.size <= L2T_SIZE,
+ ("%s: L2 table size (%u) larger than expected (%u)",
+ __func__, sc->vres.l2t.size, L2T_SIZE));
/* get capabilites */
bzero(&caps, sizeof(caps));
@@ -2111,7 +2104,7 @@ update_mac_settings(struct port_info *pi, int flags)
struct adapter *sc = pi->adapter;
int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
- PORT_LOCK_ASSERT_OWNED(pi);
+ ASSERT_SYNCHRONIZED_OP(sc);
KASSERT(flags, ("%s: not told what to update.", __func__));
if (flags & XGMAC_MTU)
@@ -2213,39 +2206,74 @@ mcfail:
return (rc);
}
-static int
-cxgbe_init_locked(struct port_info *pi)
+int
+begin_synchronized_op(struct adapter *sc, struct port_info *pi, int flags,
+ char *wmesg)
{
- struct adapter *sc = pi->adapter;
- int rc = 0;
+ int rc, pri;
- ADAPTER_LOCK_ASSERT_OWNED(sc);
+#ifdef WITNESS
+ /* the caller thinks it's ok to sleep, but is it really? */
+ if (flags & SLEEP_OK)
+ pause("t4slptst", 1);
+#endif
- while (!IS_DOOMED(pi) && IS_BUSY(sc)) {
- if (mtx_sleep(&sc->flags, &sc->sc_lock, PCATCH, "t4init", 0)) {
+ if (INTR_OK)
+ pri = PCATCH;
+ else
+ pri = 0;
+
+ ADAPTER_LOCK(sc);
+ for (;;) {
+
+ if (pi && IS_DOOMED(pi)) {
+ rc = ENXIO;
+ goto done;
+ }
+
+ if (!IS_BUSY(sc)) {
+ rc = 0;
+ break;
+ }
+
+ if (!(flags & SLEEP_OK)) {
+ rc = EBUSY;
+ goto done;
+ }
+
+ if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) {
rc = EINTR;
goto done;
}
}
- if (IS_DOOMED(pi)) {
- rc = ENXIO;
- goto done;
- }
- KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
- /* Give up the adapter lock, port init code can sleep. */
+ KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
SET_BUSY(sc);
- ADAPTER_UNLOCK(sc);
-
- rc = cxgbe_init_synchronized(pi);
+#ifdef INVARIANTS
+ sc->last_op = wmesg;
+ sc->last_op_thr = curthread;
+#endif
done:
- ADAPTER_LOCK(sc);
+ if (!(flags & HOLD_LOCK) || rc)
+ ADAPTER_UNLOCK(sc);
+
+ return (rc);
+}
+
+void
+end_synchronized_op(struct adapter *sc, int flags)
+{
+
+ if (flags & LOCK_HELD)
+ ADAPTER_LOCK_ASSERT_OWNED(sc);
+ else
+ ADAPTER_LOCK(sc);
+
KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
CLR_BUSY(sc);
- wakeup_one(&sc->flags);
+ wakeup(&sc->flags);
ADAPTER_UNLOCK(sc);
- return (rc);
}
static int
@@ -2255,7 +2283,7 @@ cxgbe_init_synchronized(struct port_info *pi)
struct ifnet *ifp = pi->ifp;
int rc = 0;
- ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+ ASSERT_SYNCHRONIZED_OP(sc);
if (isset(&sc->open_device_map, pi->port_id)) {
KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
@@ -2271,9 +2299,7 @@ cxgbe_init_synchronized(struct port_info *pi)
((rc = port_full_init(pi)) != 0))
return (rc); /* error message displayed already */
- PORT_LOCK(pi);
rc = update_mac_settings(pi, XGMAC_ALL);
- PORT_UNLOCK(pi);
if (rc)
goto done; /* error message displayed already */
@@ -2291,7 +2317,9 @@ cxgbe_init_synchronized(struct port_info *pi)
/* all ok */
setbit(&sc->open_device_map, pi->port_id);
+ PORT_LOCK(pi);
ifp->if_drv_flags |= IFF_DRV_RUNNING;
+ PORT_UNLOCK(pi);
callout_reset(&pi->tick, hz, cxgbe_tick, pi);
done:
@@ -2301,39 +2329,6 @@ done:
return (rc);
}
-static int
-cxgbe_uninit_locked(struct port_info *pi)
-{
- struct adapter *sc = pi->adapter;
- int rc;
-
- ADAPTER_LOCK_ASSERT_OWNED(sc);
-
- while (!IS_DOOMED(pi) && IS_BUSY(sc)) {
- if (mtx_sleep(&sc->flags, &sc->sc_lock, PCATCH, "t4uninit", 0)) {
- rc = EINTR;
- goto done;
- }
- }
- if (IS_DOOMED(pi)) {
- rc = ENXIO;
- goto done;
- }
- KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
- SET_BUSY(sc);
- ADAPTER_UNLOCK(sc);
-
- rc = cxgbe_uninit_synchronized(pi);
-
- ADAPTER_LOCK(sc);
- KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
- CLR_BUSY(sc);
- wakeup_one(&sc->flags);
-done:
- ADAPTER_UNLOCK(sc);
- return (rc);
-}
-
/*
* Idempotent.
*/
@@ -2344,7 +2339,7 @@ cxgbe_uninit_synchronized(struct port_info *pi)
struct ifnet *ifp = pi->ifp;
int rc;
- ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+ ASSERT_SYNCHRONIZED_OP(sc);
/*
* Disable the VI so that all its data in either direction is discarded
@@ -2360,7 +2355,9 @@ cxgbe_uninit_synchronized(struct port_info *pi)
}
clrbit(&sc->open_device_map, pi->port_id);
+ PORT_LOCK(pi);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+ PORT_UNLOCK(pi);
pi->link_cfg.link_ok = 0;
pi->link_cfg.speed = 0;
@@ -2539,7 +2536,7 @@ port_full_init(struct port_info *pi)
struct sge_rxq *rxq;
int rc, i;
- ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
+ ASSERT_SYNCHRONIZED_OP(sc);
KASSERT((pi->flags & PORT_INIT_DONE) == 0,
("%s: PORT_INIT_DONE already", __func__));
@@ -3119,7 +3116,7 @@ t4_sysctls(struct adapter *sc)
CTLFLAG_RD, &sc->fw_version, 0, "firmware version");
SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf",
- CTLFLAG_RD, &t4_cfg_file, 0, "configuration file");
+ CTLFLAG_RD, &sc->cfg_file, 0, "configuration file");
SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD,
&sc->cfcsum, 0, "config file checksum");
@@ -3524,6 +3521,8 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
struct port_info *pi = arg1;
struct adapter *sc = pi->adapter;
int idx, rc, i;
+ struct sge_rxq *rxq;
+ uint8_t v;
idx = pi->tmr_idx;
@@ -3534,25 +3533,23 @@ sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
if (idx < 0 || idx >= SGE_NTIMERS)
return (EINVAL);
- ADAPTER_LOCK(sc);
- rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
- if (rc == 0) {
- struct sge_rxq *rxq;
- uint8_t v;
+ rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK,
+ "t4tmr");
+ if (rc)
+ return (rc);
- v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1);
- for_each_rxq(pi, i, rxq) {
+ v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(pi->pktc_idx != -1);
+ for_each_rxq(pi, i, rxq) {
#ifdef atomic_store_rel_8
- atomic_store_rel_8(&rxq->iq.intr_params, v);
+ atomic_store_rel_8(&rxq->iq.intr_params, v);
#else
- rxq->iq.intr_params = v;
+ rxq->iq.intr_params = v;
#endif
- }
- pi->tmr_idx = idx;
}
+ pi->tmr_idx = idx;
- ADAPTER_UNLOCK(sc);
- return (rc);
+ end_synchronized_op(sc, LOCK_HELD);
+ return (0);
}
static int
@@ -3571,15 +3568,17 @@ sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS)
if (idx < -1 || idx >= SGE_NCOUNTERS)
return (EINVAL);
- ADAPTER_LOCK(sc);
- rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
- if (rc == 0 && pi->flags & PORT_INIT_DONE)
- rc = EBUSY; /* cannot be changed once the queues are created */
+ rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK,
+ "t4pktc");
+ if (rc)
+ return (rc);
- if (rc == 0)
+ if (pi->flags & PORT_INIT_DONE)
+ rc = EBUSY; /* cannot be changed once the queues are created */
+ else
pi->pktc_idx = idx;
- ADAPTER_UNLOCK(sc);
+ end_synchronized_op(sc, LOCK_HELD);
return (rc);
}
@@ -3599,15 +3598,17 @@ sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS)
if (qsize < 128 || (qsize & 7))
return (EINVAL);
- ADAPTER_LOCK(sc);
- rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
- if (rc == 0 && pi->flags & PORT_INIT_DONE)
- rc = EBUSY; /* cannot be changed once the queues are created */
+ rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK,
+ "t4rxqs");
+ if (rc)
+ return (rc);
- if (rc == 0)
+ if (pi->flags & PORT_INIT_DONE)
+ rc = EBUSY; /* cannot be changed once the queues are created */
+ else
pi->qsize_rxq = qsize;
- ADAPTER_UNLOCK(sc);
+ end_synchronized_op(sc, LOCK_HELD);
return (rc);
}
@@ -3624,18 +3625,21 @@ sysctl_qsize_txq(SYSCTL_HANDLER_ARGS)
if (rc != 0 || req->newptr == NULL)
return (rc);
- if (qsize < 128)
+ /* bufring size must be powerof2 */
+ if (qsize < 128 || !powerof2(qsize))
return (EINVAL);
- ADAPTER_LOCK(sc);
- rc = IS_DOOMED(pi) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
- if (rc == 0 && pi->flags & PORT_INIT_DONE)
- rc = EBUSY; /* cannot be changed once the queues are created */
+ rc = begin_synchronized_op(sc, pi, HOLD_LOCK | SLEEP_OK | INTR_OK,
+ "t4txqs");
+ if (rc)
+ return (rc);
- if (rc == 0)
+ if (pi->flags & PORT_INIT_DONE)
+ rc = EBUSY; /* cannot be changed once the queues are created */
+ else
pi->qsize_txq = qsize;
- ADAPTER_UNLOCK(sc);
+ end_synchronized_op(sc, LOCK_HELD);
return (rc);
}
@@ -4674,8 +4678,14 @@ fspec_to_fconf(struct t4_filter_specification *fs)
static int
get_filter_mode(struct adapter *sc, uint32_t *mode)
{
+ int rc;
uint32_t fconf;
+ rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
+ "t4getfm");
+ if (rc)
+ return (rc);
+
t4_read_indirect(sc, A_TP_PIO_ADDR, A_TP_PIO_DATA, &fconf, 1,
A_TP_VLAN_PRI_MAP);
@@ -4687,6 +4697,7 @@ get_filter_mode(struct adapter *sc, uint32_t *mode)
*mode = fconf_to_mode(sc->filter_mode);
+ end_synchronized_op(sc, LOCK_HELD);
return (0);
}
@@ -4698,11 +4709,10 @@ set_filter_mode(struct adapter *sc, uint32_t mode)
fconf = mode_to_fconf(mode);
- ADAPTER_LOCK(sc);
- if (IS_BUSY(sc)) {
- rc = EAGAIN;
- goto done;
- }
+ rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
+ "t4setfm");
+ if (rc)
+ return (rc);
if (sc->tids.ftids_in_use > 0) {
rc = EBUSY;
@@ -4725,7 +4735,7 @@ set_filter_mode(struct adapter *sc, uint32_t mode)
#endif
done:
- ADAPTER_UNLOCK(sc);
+ end_synchronized_op(sc, LOCK_HELD);
return (rc);
}
@@ -4746,18 +4756,18 @@ get_filter_hits(struct adapter *sc, uint32_t fid)
static int
get_filter(struct adapter *sc, struct t4_filter *t)
{
- int i, nfilters = sc->tids.nftids;
+ int i, rc, nfilters = sc->tids.nftids;
struct filter_entry *f;
- ADAPTER_LOCK_ASSERT_OWNED(sc);
-
- if (IS_BUSY(sc))
- return (EAGAIN);
+ rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK,
+ "t4getf");
+ if (rc)
+ return (rc);
if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL ||
t->idx >= nfilters) {
t->idx = 0xffffffff;
- return (0);
+ goto done;
}
f = &sc->tids.ftid_tab[t->idx];
@@ -4772,11 +4782,13 @@ get_filter(struct adapter *sc, struct t4_filter *t)
t->hits = UINT64_MAX;
t->fs = f->fs;
- return (0);
+ goto done;
}
}
t->idx = 0xffffffff;
+done:
+ end_synchronized_op(sc, LOCK_HELD);
return (0);
}
@@ -4785,40 +4797,58 @@ set_filter(struct adapter *sc, struct t4_filter *t)
{
unsigned int nfilters, nports;
struct filter_entry *f;
- int i;
+ int i, rc;
- ADAPTER_LOCK_ASSERT_OWNED(sc);
+ rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf");
+ if (rc)
+ return (rc);
nfilters = sc->tids.nftids;
nports = sc->params.nports;
- if (nfilters == 0)
- return (ENOTSUP);
+ if (nfilters == 0) {
+ rc = ENOTSUP;
+ goto done;
+ }
- if (!(sc->flags & FULL_INIT_DONE))
- return (EAGAIN);
+ if (!(sc->flags & FULL_INIT_DONE)) {
+ rc = EAGAIN;
+ goto done;
+ }
- if (t->idx >= nfilters)
- return (EINVAL);
+ if (t->idx >= nfilters) {
+ rc = EINVAL;
+ goto done;
+ }
/* Validate against the global filter mode */
- if ((sc->filter_mode | fspec_to_fconf(&t->fs)) != sc->filter_mode)
- return (E2BIG);
+ if ((sc->filter_mode | fspec_to_fconf(&t->fs)) != sc->filter_mode) {
+ rc = E2BIG;
+ goto done;
+ }
- if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports)
- return (EINVAL);
+ if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) {
+ rc = EINVAL;
+ goto done;
+ }
- if (t->fs.val.iport >= nports)
- return (EINVAL);
+ if (t->fs.val.iport >= nports) {
+ rc = EINVAL;
+ goto done;
+ }
/* Can't specify an iq if not steering to it */
- if (!t->fs.dirsteer && t->fs.iq)
- return (EINVAL);
+ if (!t->fs.dirsteer && t->fs.iq) {
+ rc = EINVAL;
+ goto done;
+ }
/* IPv6 filter idx must be 4 aligned */
if (t->fs.type == 1 &&
- ((t->idx & 0x3) || t->idx + 4 >= nfilters))
- return (EINVAL);
+ ((t->idx & 0x3) || t->idx + 4 >= nfilters)) {
+ rc = EINVAL;
+ goto done;
+ }
if (sc->tids.ftid_tab == NULL) {
KASSERT(sc->tids.ftids_in_use == 0,
@@ -4827,17 +4857,24 @@ set_filter(struct adapter *sc, struct t4_filter *t)
sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) *
nfilters, M_CXGBE, M_NOWAIT | M_ZERO);
- if (sc->tids.ftid_tab == NULL)
- return (ENOMEM);
+ if (sc->tids.ftid_tab == NULL) {
+ rc = ENOMEM;
+ goto done;
+ }
+ mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF);
}
for (i = 0; i < 4; i++) {
f = &sc->tids.ftid_tab[t->idx + i];
- if (f->pending || f->valid)
- return (EBUSY);
- if (f->locked)
- return (EPERM);
+ if (f->pending || f->valid) {
+ rc = EBUSY;
+ goto done;
+ }
+ if (f->locked) {
+ rc = EPERM;
+ goto done;
+ }
if (t->fs.type == 0)
break;
@@ -4846,7 +4883,27 @@ set_filter(struct adapter *sc, struct t4_filter *t)
f = &sc->tids.ftid_tab[t->idx];
f->fs = t->fs;
- return set_filter_wr(sc, t->idx);
+ rc = set_filter_wr(sc, t->idx);
+done:
+ end_synchronized_op(sc, 0);
+
+ if (rc == 0) {
+ mtx_lock(&sc->tids.ftid_lock);
+ for (;;) {
+ if (f->pending == 0) {
+ rc = f->valid ? 0 : EIO;
+ break;
+ }
+
+ if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
+ PCATCH, "t4setfw", 0)) {
+ rc = EINPROGRESS;
+ break;
+ }
+ }
+ mtx_unlock(&sc->tids.ftid_lock);
+ }
+ return (rc);
}
static int
@@ -4854,37 +4911,67 @@ del_filter(struct adapter *sc, struct t4_filter *t)
{
unsigned int nfilters;
struct filter_entry *f;
+ int rc;
- ADAPTER_LOCK_ASSERT_OWNED(sc);
-
- if (IS_BUSY(sc))
- return (EAGAIN);
+ rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf");
+ if (rc)
+ return (rc);
nfilters = sc->tids.nftids;
- if (nfilters == 0)
- return (ENOTSUP);
+ if (nfilters == 0) {
+ rc = ENOTSUP;
+ goto done;
+ }
if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 ||
- t->idx >= nfilters)
- return (EINVAL);
+ t->idx >= nfilters) {
+ rc = EINVAL;
+ goto done;
+ }
- if (!(sc->flags & FULL_INIT_DONE))
- return (EAGAIN);
+ if (!(sc->flags & FULL_INIT_DONE)) {
+ rc = EAGAIN;
+ goto done;
+ }
f = &sc->tids.ftid_tab[t->idx];
- if (f->pending)
- return (EBUSY);
- if (f->locked)
- return (EPERM);
+ if (f->pending) {
+ rc = EBUSY;
+ goto done;
+ }
+ if (f->locked) {
+ rc = EPERM;
+ goto done;
+ }
if (f->valid) {
t->fs = f->fs; /* extra info for the caller */
- return del_filter_wr(sc, t->idx);
+ rc = del_filter_wr(sc, t->idx);
}
- return (0);
+done:
+ end_synchronized_op(sc, 0);
+
+ if (rc == 0) {
+ mtx_lock(&sc->tids.ftid_lock);
+ for (;;) {
+ if (f->pending == 0) {
+ rc = f->valid ? EIO : 0;
+ break;
+ }
+
+ if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock,
+ PCATCH, "t4delfw", 0)) {
+ rc = EINPROGRESS;
+ break;
+ }
+ }
+ mtx_unlock(&sc->tids.ftid_lock);
+ }
+
+ return (rc);
}
static void
@@ -4904,7 +4991,7 @@ set_filter_wr(struct adapter *sc, int fidx)
struct fw_filter_wr *fwr;
unsigned int ftid;
- ADAPTER_LOCK_ASSERT_OWNED(sc);
+ ASSERT_SYNCHRONIZED_OP(sc);
if (f->fs.newdmac || f->fs.newvlan) {
/* This filter needs an L2T entry; allocate one. */
@@ -5007,8 +5094,6 @@ del_filter_wr(struct adapter *sc, int fidx)
struct fw_filter_wr *fwr;
unsigned int ftid;
- ADAPTER_LOCK_ASSERT_OWNED(sc);
-
ftid = sc->tids.ftid_base + fidx;
wr = alloc_wrqe(sizeof(*fwr), &sc->sge.mgmtq);
@@ -5039,8 +5124,10 @@ t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
unsigned int rc = G_COOKIE(rpl->cookie);
struct filter_entry *f = &sc->tids.ftid_tab[idx];
- ADAPTER_LOCK(sc);
+ mtx_lock(&sc->tids.ftid_lock);
if (rc == FW_FILTER_WR_FLT_ADDED) {
+ KASSERT(f->pending, ("%s: filter[%u] isn't pending.",
+ __func__, idx));
f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff;
f->pending = 0; /* asynchronous setup completed */
f->valid = 1;
@@ -5055,7 +5142,8 @@ t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
clear_filter(f);
sc->tids.ftids_in_use--;
}
- ADAPTER_UNLOCK(sc);
+ wakeup(&sc->tids.ftid_tab);
+ mtx_unlock(&sc->tids.ftid_lock);
}
return (0);
@@ -5064,29 +5152,63 @@ t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
static int
get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt)
{
- int rc = EINVAL;
+ int rc;
if (cntxt->cid > M_CTXTQID)
- return (rc);
+ return (EINVAL);
if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS &&
cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM)
- return (rc);
+ return (EINVAL);
if (sc->flags & FW_OK) {
- ADAPTER_LOCK(sc); /* Avoid parallel t4_wr_mbox */
- rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id,
- &cntxt->data[0]);
- ADAPTER_UNLOCK(sc);
+ rc = begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4ctxt");
+ if (rc == 0) {
+ rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid,
+ cntxt->mem_id, &cntxt->data[0]);
+ end_synchronized_op(sc, LOCK_HELD);
+ if (rc == 0)
+ return (0);
+ }
}
- if (rc != 0) {
- /* Read via firmware failed or wasn't even attempted */
+ /*
+ * Read via firmware failed or wasn't even attempted. Read directly via
+ * the backdoor.
+ */
+ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id,
+ &cntxt->data[0]);
+ return (rc);
+}
+
+static int
+load_fw(struct adapter *sc, struct t4_data *fw)
+{
+ int rc;
+ uint8_t *fw_data;
+
+ rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw");
+ if (rc)
+ return (rc);
+
+ if (sc->flags & FULL_INIT_DONE) {
+ rc = EBUSY;
+ goto done;
+ }
- rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id,
- &cntxt->data[0]);
+ fw_data = malloc(fw->len, M_CXGBE, M_WAITOK);
+ if (fw_data == NULL) {
+ rc = ENOMEM;
+ goto done;
}
+ rc = copyin(fw->data, fw_data, fw->len);
+ if (rc == 0)
+ rc = -t4_load_fw(sc, fw_data, fw->len);
+
+ free(fw_data, M_CXGBE);
+done:
+ end_synchronized_op(sc, 0);
return (rc);
}
@@ -5173,8 +5295,6 @@ read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
{
int rc;
- ADAPTER_LOCK_ASSERT_OWNED(sc); /* for mbox */
-
if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports)
return (EINVAL);
@@ -5183,8 +5303,12 @@ read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd)
return (ENOTSUP);
}
+ rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd");
+ if (rc)
+ return (rc);
rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr,
i2cd->offset, &i2cd->data[0]);
+ end_synchronized_op(sc, 0);
return (rc);
}
@@ -5354,56 +5478,78 @@ t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
rc = set_filter_mode(sc, *(uint32_t *)data);
break;
case CHELSIO_T4_GET_FILTER:
- ADAPTER_LOCK(sc);
rc = get_filter(sc, (struct t4_filter *)data);
- ADAPTER_UNLOCK(sc);
break;
case CHELSIO_T4_SET_FILTER:
- ADAPTER_LOCK(sc);
rc = set_filter(sc, (struct t4_filter *)data);
- ADAPTER_UNLOCK(sc);
break;
case CHELSIO_T4_DEL_FILTER:
- ADAPTER_LOCK(sc);
rc = del_filter(sc, (struct t4_filter *)data);
- ADAPTER_UNLOCK(sc);
break;
case CHELSIO_T4_GET_SGE_CONTEXT:
rc = get_sge_context(sc, (struct t4_sge_context *)data);
break;
- case CHELSIO_T4_LOAD_FW: {
- struct t4_data *fw = (struct t4_data *)data;
- uint8_t *fw_data;
-
- if (sc->flags & FULL_INIT_DONE)
- return (EBUSY);
-
- fw_data = malloc(fw->len, M_CXGBE, M_NOWAIT);
- if (fw_data == NULL)
- return (ENOMEM);
-
- rc = copyin(fw->data, fw_data, fw->len);
- if (rc == 0)
- rc = -t4_load_fw(sc, fw_data, fw->len);
-
- free(fw_data, M_CXGBE);
+ case CHELSIO_T4_LOAD_FW:
+ rc = load_fw(sc, (struct t4_data *)data);
break;
- }
case CHELSIO_T4_GET_MEM:
rc = read_card_mem(sc, (struct t4_mem_range *)data);
break;
case CHELSIO_T4_GET_I2C:
- ADAPTER_LOCK(sc);
rc = read_i2c(sc, (struct t4_i2c_data *)data);
- ADAPTER_UNLOCK(sc);
break;
case CHELSIO_T4_CLEAR_STATS: {
+ int i;
u_int port_id = *(uint32_t *)data;
+ struct port_info *pi;
if (port_id >= sc->params.nports)
return (EINVAL);
+ /* MAC stats */
t4_clr_port_stats(sc, port_id);
+
+ pi = sc->port[port_id];
+ if (pi->flags & PORT_INIT_DONE) {
+ struct sge_rxq *rxq;
+ struct sge_txq *txq;
+ struct sge_wrq *wrq;
+
+ for_each_rxq(pi, i, rxq) {
+#if defined(INET) || defined(INET6)
+ rxq->lro.lro_queued = 0;
+ rxq->lro.lro_flushed = 0;
+#endif
+ rxq->rxcsum = 0;
+ rxq->vlan_extraction = 0;
+ }
+
+ for_each_txq(pi, i, txq) {
+ txq->txcsum = 0;
+ txq->tso_wrs = 0;
+ txq->vlan_insertion = 0;
+ txq->imm_wrs = 0;
+ txq->sgl_wrs = 0;
+ txq->txpkt_wrs = 0;
+ txq->txpkts_wrs = 0;
+ txq->txpkts_pkts = 0;
+ txq->br->br_drops = 0;
+ txq->no_dmamap = 0;
+ txq->no_desc = 0;
+ }
+
+#ifdef TCP_OFFLOAD
+ /* nothing to clear for each ofld_rxq */
+
+ for_each_ofld_txq(pi, i, wrq) {
+ wrq->tx_wrs = 0;
+ wrq->no_desc = 0;
+ }
+#endif
+ wrq = &sc->sge.ctrlq[pi->port_id];
+ wrq->tx_wrs = 0;
+ wrq->no_desc = 0;
+ }
break;
}
default:
@@ -5420,16 +5566,16 @@ toe_capability(struct port_info *pi, int enable)
int rc;
struct adapter *sc = pi->adapter;
- ADAPTER_LOCK_ASSERT_OWNED(sc);
+ ASSERT_SYNCHRONIZED_OP(sc);
if (!is_offload(sc))
return (ENODEV);
if (enable) {
if (!(sc->flags & FULL_INIT_DONE)) {
- log(LOG_WARNING,
- "You must enable a cxgbe interface first\n");
- return (EAGAIN);
+ rc = cxgbe_init_synchronized(pi);
+ if (rc)
+ return (rc);
}
if (isset(&sc->offload_map, pi->port_id))
@@ -5518,6 +5664,8 @@ t4_activate_uld(struct adapter *sc, int id)
int rc = EAGAIN;
struct uld_info *ui;
+ ASSERT_SYNCHRONIZED_OP(sc);
+
mtx_lock(&t4_uld_list_lock);
SLIST_FOREACH(ui, &t4_uld_list, link) {
@@ -5540,6 +5688,8 @@ t4_deactivate_uld(struct adapter *sc, int id)
int rc = EINVAL;
struct uld_info *ui;
+ ASSERT_SYNCHRONIZED_OP(sc);
+
mtx_lock(&t4_uld_list_lock);
SLIST_FOREACH(ui, &t4_uld_list, link) {
diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c
index 62d9eb3..62ceec4 100644
--- a/sys/dev/cxgbe/t4_sge.c
+++ b/sys/dev/cxgbe/t4_sge.c
@@ -2362,6 +2362,8 @@ alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx,
SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
&txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
+ SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "br_drops", CTLFLAG_RD,
+ &txq->br->br_drops, "# of drops in the buf_ring for this queue");
SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
&txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c
index 8d36b1e..17ed1d3 100644
--- a/sys/dev/cxgbe/tom/t4_connect.c
+++ b/sys/dev/cxgbe/tom/t4_connect.c
@@ -29,6 +29,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_inet6.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
@@ -195,7 +196,7 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status);
/* Ignore negative advice */
- if (status == CPL_ERR_RTX_NEG_ADVICE)
+ if (negative_advice(status))
return (0);
free_atid(sc, atid);
@@ -220,10 +221,9 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
* Options2 for active open.
*/
static uint32_t
-calc_opt2a(struct socket *so)
+calc_opt2a(struct socket *so, struct toepcb *toep)
{
struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
struct port_info *pi = toep->port;
struct adapter *sc = pi->adapter;
uint32_t opt2 = 0;
@@ -260,6 +260,12 @@ t4_init_connect_cpl_handlers(struct adapter *sc)
t4_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl);
}
+#define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \
+ reason = __LINE__; \
+ rc = (x); \
+ goto failed; \
+} while (0)
+
/*
* active open (soconnect).
*
@@ -275,20 +281,19 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
struct sockaddr *nam)
{
struct adapter *sc = tod->tod_softc;
+ struct tom_data *td = tod_td(tod);
struct toepcb *toep = NULL;
struct wrqe *wr = NULL;
- struct cpl_act_open_req *cpl;
- struct l2t_entry *e = NULL;
struct ifnet *rt_ifp = rt->rt_ifp;
struct port_info *pi;
- int atid = -1, mtu_idx, rscale, qid_atid, rc = ENOMEM;
+ int mtu_idx, rscale, qid_atid, rc, isipv6;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = intotcpcb(inp);
+ int reason;
INP_WLOCK_ASSERT(inp);
-
- if (nam->sa_family != AF_INET)
- CXGBE_UNIMPLEMENTED("IPv6 connect");
+ KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
+ ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family));
if (rt_ifp->if_type == IFT_ETHER)
pi = rt_ifp->if_softc;
@@ -297,30 +302,29 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
pi = ifp->if_softc;
} else if (rt_ifp->if_type == IFT_IEEE8023ADLAG)
- return (ENOSYS); /* XXX: implement lagg support */
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */
else
- return (ENOTSUP);
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
toep = alloc_toepcb(pi, -1, -1, M_NOWAIT);
if (toep == NULL)
- goto failed;
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
- atid = alloc_atid(sc, toep);
- if (atid < 0)
- goto failed;
+ toep->tid = alloc_atid(sc, toep);
+ if (toep->tid < 0)
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
- e = t4_l2t_get(pi, rt_ifp,
+ toep->l2te = t4_l2t_get(pi, rt_ifp,
rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam);
- if (e == NULL)
- goto failed;
+ if (toep->l2te == NULL)
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
- wr = alloc_wrqe(sizeof(*cpl), toep->ctrlq);
+ isipv6 = nam->sa_family == AF_INET6;
+ wr = alloc_wrqe(isipv6 ? sizeof(struct cpl_act_open_req6) :
+ sizeof(struct cpl_act_open_req), toep->ctrlq);
if (wr == NULL)
- goto failed;
- cpl = wrtod(wr);
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
- toep->tid = atid;
- toep->l2te = e;
if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
set_tcpddp_ulp_mode(toep);
else
@@ -330,8 +334,6 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
SOCKBUF_UNLOCK(&so->so_rcv);
- offload_socket(so, toep);
-
/*
* The kernel sets request_r_scale based on sb_max whereas we need to
* take hardware's MAX_RCV_WND into account too. This is normally a
@@ -342,39 +344,78 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
else
rscale = 0;
mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
- qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | atid;
-
- INIT_TP_WR(cpl, 0);
- OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid));
- inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip,
- &cpl->peer_port);
- cpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, toep->rx_credits,
- toep->ulp_mode);
- cpl->params = select_ntuple(pi, e, sc->filter_mode);
- cpl->opt2 = calc_opt2a(so);
+ qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | toep->tid;
+
+ if (isipv6) {
+ struct cpl_act_open_req6 *cpl = wrtod(wr);
+
+ if ((inp->inp_vflag & INP_IPV6) == 0) {
+ /* XXX think about this a bit more */
+ log(LOG_ERR,
+ "%s: time to think about AF_INET6 + vflag 0x%x.\n",
+ __func__, inp->inp_vflag);
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
+ }
+
+ toep->ce = hold_lip(td, &inp->in6p_laddr);
+ if (toep->ce == NULL)
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOENT);
+
+ INIT_TP_WR(cpl, 0);
+ OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+ qid_atid));
+
+ cpl->local_port = inp->inp_lport;
+ cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
+ cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
+ cpl->peer_port = inp->inp_fport;
+ cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0];
+ cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8];
+ cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale,
+ toep->rx_credits, toep->ulp_mode);
+ cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode);
+ cpl->opt2 = calc_opt2a(so, toep);
+ } else {
+ struct cpl_act_open_req *cpl = wrtod(wr);
+
+ INIT_TP_WR(cpl, 0);
+ OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+ qid_atid));
+ inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port,
+ &cpl->peer_ip, &cpl->peer_port);
+ cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale,
+ toep->rx_credits, toep->ulp_mode);
+ cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode);
+ cpl->opt2 = calc_opt2a(so, toep);
+ }
CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__,
toep->tid, tcpstates[tp->t_state], toep, inp);
- rc = t4_l2t_send(sc, wr, e);
+ offload_socket(so, toep);
+ rc = t4_l2t_send(sc, wr, toep->l2te);
if (rc == 0) {
toep->flags |= TPF_CPL_PENDING;
return (0);
}
undo_offload_socket(so);
+ reason = __LINE__;
failed:
- CTR5(KTR_CXGBE, "%s: FAILED, atid %d, toep %p, l2te %p, wr %p",
- __func__, atid, toep, e, wr);
+ CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc);
- if (e)
- t4_l2t_release(e);
if (wr)
free_wrqe(wr);
- if (atid >= 0)
- free_atid(sc, atid);
- if (toep)
+
+ if (toep) {
+ if (toep->tid >= 0)
+ free_atid(sc, toep->tid);
+ if (toep->l2te)
+ t4_l2t_release(toep->l2te);
+ if (toep->ce)
+ release_lip(td, toep->ce);
free_toepcb(toep);
+ }
return (rc);
}
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 6ae1ec4..9aead9f 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -1018,8 +1018,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
- if (cpl->status == CPL_ERR_RTX_NEG_ADVICE ||
- cpl->status == CPL_ERR_PERSIST_NEG_ADVICE) {
+ if (negative_advice(cpl->status)) {
CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)",
__func__, cpl->status, tid, toep->flags);
return (0); /* Ignore negative advice */
diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c
index 523f7f3..b80702d 100644
--- a/sys/dev/cxgbe/tom/t4_listen.c
+++ b/sys/dev/cxgbe/tom/t4_listen.c
@@ -29,6 +29,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_inet6.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
@@ -50,6 +51,8 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet6/scope6_var.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#define TCPSTATES
@@ -63,9 +66,9 @@ __FBSDID("$FreeBSD$");
#include "tom/t4_tom.h"
/* stid services */
-static int alloc_stid(struct adapter *, void *);
-static void *lookup_stid(struct adapter *, int);
-static void free_stid(struct adapter *, int);
+static int alloc_stid(struct adapter *, struct listen_ctx *, int);
+static struct listen_ctx *lookup_stid(struct adapter *, int);
+static void free_stid(struct adapter *, struct listen_ctx *);
/* lctx services */
static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
@@ -81,45 +84,105 @@ static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *);
static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
static void send_reset_synqe(struct toedev *, struct synq_entry *);
-/* XXX: won't work for IPv6 */
static int
-alloc_stid(struct adapter *sc, void *ctx)
+alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
{
struct tid_info *t = &sc->tids;
- int stid = -1;
+ u_int stid, n, f, mask;
+ struct stid_region *sr = &lctx->stid_region;
+
+ /*
+ * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
+ * the TCAM. The start of the stid region is properly aligned (the chip
+ * requires each region to be 128-cell aligned).
+ */
+ n = isipv6 ? 2 : 1;
+ mask = n - 1;
+ KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
+ ("%s: stid region (%u, %u) not properly aligned. n = %u",
+ __func__, t->stid_base, t->nstids, n));
mtx_lock(&t->stid_lock);
- if (t->sfree) {
- union serv_entry *p = t->sfree;
-
- stid = p - t->stid_tab;
- stid += t->stid_base;
- t->sfree = p->next;
- p->data = ctx;
- t->stids_in_use++;
+ if (n > t->nstids - t->stids_in_use) {
+ mtx_unlock(&t->stid_lock);
+ return (-1);
}
+
+ if (t->nstids_free_head >= n) {
+ /*
+ * This allocation will definitely succeed because the region
+ * starts at a good alignment and we just checked we have enough
+ * stids free.
+ */
+ f = t->nstids_free_head & mask;
+ t->nstids_free_head -= n + f;
+ stid = t->nstids_free_head;
+ TAILQ_INSERT_HEAD(&t->stids, sr, link);
+ } else {
+ struct stid_region *s;
+
+ stid = t->nstids_free_head;
+ TAILQ_FOREACH(s, &t->stids, link) {
+ stid += s->used + s->free;
+ f = stid & mask;
+ if (n <= s->free - f) {
+ stid -= n + f;
+ s->free -= n + f;
+ TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
+ goto allocated;
+ }
+ }
+
+ if (__predict_false(stid != t->nstids)) {
+ panic("%s: stids TAILQ (%p) corrupt."
+ " At %d instead of %d at the end of the queue.",
+ __func__, &t->stids, stid, t->nstids);
+ }
+
+ mtx_unlock(&t->stid_lock);
+ return (-1);
+ }
+
+allocated:
+ sr->used = n;
+ sr->free = f;
+ t->stids_in_use += n;
+ t->stid_tab[stid] = lctx;
mtx_unlock(&t->stid_lock);
- return (stid);
+
+ KASSERT(((stid + t->stid_base) & mask) == 0,
+ ("%s: EDOOFUS.", __func__));
+ return (stid + t->stid_base);
}
-static void *
+static struct listen_ctx *
lookup_stid(struct adapter *sc, int stid)
{
struct tid_info *t = &sc->tids;
- return (t->stid_tab[stid - t->stid_base].data);
+ return (t->stid_tab[stid - t->stid_base]);
}
static void
-free_stid(struct adapter *sc, int stid)
+free_stid(struct adapter *sc, struct listen_ctx *lctx)
{
struct tid_info *t = &sc->tids;
- union serv_entry *p = &t->stid_tab[stid - t->stid_base];
+ struct stid_region *sr = &lctx->stid_region;
+ struct stid_region *s;
+
+ KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
mtx_lock(&t->stid_lock);
- p->next = t->sfree;
- t->sfree = p;
- t->stids_in_use--;
+ s = TAILQ_PREV(sr, stid_head, link);
+ if (s != NULL)
+ s->free += sr->used + sr->free;
+ else
+ t->nstids_free_head += sr->used + sr->free;
+ KASSERT(t->stids_in_use >= sr->used,
+ ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
+ t->stids_in_use, sr->used));
+ t->stids_in_use -= sr->used;
+ TAILQ_REMOVE(&t->stids, sr, link);
mtx_unlock(&t->stid_lock);
}
@@ -134,7 +197,7 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi)
if (lctx == NULL)
return (NULL);
- lctx->stid = alloc_stid(sc, lctx);
+ lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
if (lctx->stid < 0) {
free(lctx, M_CXGBE);
return (NULL);
@@ -167,7 +230,7 @@ free_lctx(struct adapter *sc, struct listen_ctx *lctx)
CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
__func__, lctx->stid, lctx, lctx->inp);
- free_stid(sc, lctx->stid);
+ free_stid(sc, lctx);
free(lctx, M_CXGBE);
return (in_pcbrele_wlocked(inp));
@@ -339,7 +402,7 @@ create_server(struct adapter *sc, struct listen_ctx *lctx)
{
struct wrqe *wr;
struct cpl_pass_open_req *req;
- struct in_conninfo *inc = &lctx->inp->inp_inc;
+ struct inpcb *inp = lctx->inp;
wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
if (wr == NULL) {
@@ -350,9 +413,9 @@ create_server(struct adapter *sc, struct listen_ctx *lctx)
INIT_TP_WR(req, 0);
OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
- req->local_port = inc->inc_lport;
+ req->local_port = inp->inp_lport;
req->peer_port = 0;
- req->local_ip = inc->inc_laddr.s_addr;
+ req->local_ip = inp->inp_laddr.s_addr;
req->peer_ip = 0;
req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
@@ -363,6 +426,36 @@ create_server(struct adapter *sc, struct listen_ctx *lctx)
}
static int
+create_server6(struct adapter *sc, struct listen_ctx *lctx)
+{
+ struct wrqe *wr;
+ struct cpl_pass_open_req6 *req;
+ struct inpcb *inp = lctx->inp;
+
+ wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
+ if (wr == NULL) {
+ log(LOG_ERR, "%s: allocation failure", __func__);
+ return (ENOMEM);
+ }
+ req = wrtod(wr);
+
+ INIT_TP_WR(req, 0);
+ OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
+ req->local_port = inp->inp_lport;
+ req->peer_port = 0;
+ req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
+ req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
+ req->peer_ip_hi = 0;
+ req->peer_ip_lo = 0;
+ req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
+ req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
+ F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
+
+ t4_wrq_tx(sc, wr);
+ return (0);
+}
+
+static int
destroy_server(struct adapter *sc, struct listen_ctx *lctx)
{
struct wrqe *wr;
@@ -398,13 +491,10 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
struct port_info *pi;
struct inpcb *inp = tp->t_inpcb;
struct listen_ctx *lctx;
- int i;
+ int i, rc;
INP_WLOCK_ASSERT(inp);
- if ((inp->inp_vflag & INP_IPV4) == 0)
- return (0);
-
#if 0
ADAPTER_LOCK(sc);
if (IS_BUSY(sc)) {
@@ -421,8 +511,9 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
goto done; /* no port that's UP with IFCAP_TOE enabled */
/*
- * Find a running port with IFCAP_TOE4. We'll use the first such port's
- * queues to send the passive open and receive the reply to it.
+ * Find a running port with IFCAP_TOE (4 or 6). We'll use the first
+ * such port's queues to send the passive open and receive the reply to
+ * it.
*
* XXX: need a way to mark a port in use by offload. if_cxgbe should
* then reject any attempt to bring down such a port (and maybe reject
@@ -430,7 +521,7 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
*/
for_each_port(sc, i) {
if (isset(&sc->open_device_map, i) &&
- sc->port[i]->ifp->if_capenable & IFCAP_TOE4)
+ sc->port[i]->ifp->if_capenable & IFCAP_TOE)
break;
}
KASSERT(i < sc->params.nports,
@@ -449,12 +540,17 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
}
listen_hash_add(sc, lctx);
- CTR5(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p", __func__,
- lctx->stid, tcpstates[tp->t_state], lctx, inp);
+ CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
+ __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
+ inp->inp_vflag);
- if (create_server(sc, lctx) != 0) {
- log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__,
- device_get_nameunit(sc->dev));
+ if (inp->inp_vflag & INP_IPV6)
+ rc = create_server6(sc, lctx);
+ else
+ rc = create_server(sc, lctx);
+ if (rc != 0) {
+ log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
+ __func__, device_get_nameunit(sc->dev), rc);
(void) listen_hash_del(sc, inp);
inp = release_lctx(sc, lctx);
/* can't be freed, host stack has a reference */
@@ -558,7 +654,7 @@ t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
struct l2t_entry *e;
struct tcpopt to;
struct ip *ip = mtod(m, struct ip *);
- struct tcphdr *th = (void *)(ip + 1);
+ struct tcphdr *th;
wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr);
if (wr == NULL) {
@@ -566,6 +662,10 @@ t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
return (EALREADY);
}
+ if (ip->ip_v == IPVERSION)
+ th = (void *)(ip + 1);
+ else
+ th = (void *)((struct ip6_hdr *)ip + 1);
bzero(&to, sizeof(to));
tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
TO_SYN);
@@ -608,7 +708,7 @@ do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
lctx->flags &= ~LCTX_RPL_PENDING;
if (status != CPL_ERR_NONE)
- log(LOG_ERR, "listener with stid %u failed: %d", stid, status);
+ log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
#ifdef INVARIANTS
/*
@@ -678,7 +778,7 @@ do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
if (status != CPL_ERR_NONE) {
- log(LOG_ERR, "%s: failed (%u) to close listener for stid %u",
+ log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
__func__, status, stid);
return (status);
}
@@ -735,8 +835,7 @@ do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
__func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
- if (cpl->status == CPL_ERR_RTX_NEG_ADVICE ||
- cpl->status == CPL_ERR_PERSIST_NEG_ADVICE)
+ if (negative_advice(cpl->status))
return (0); /* Ignore negative advice */
INP_WLOCK(inp);
@@ -855,7 +954,7 @@ mbuf_to_synqe(struct mbuf *m)
return (NULL);
synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
} else {
- synqe = (void *)(m->m_data + m->m_len + tspace - sizeof(*synqe));
+ synqe = (void *)(m->m_data + m->m_len + tspace - len);
synqe->flags = TPF_SYNQE;
}
@@ -936,21 +1035,29 @@ pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc,
const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
const struct ether_header *eh;
unsigned int hlen = be32toh(cpl->hdr_len);
- const struct ip *ip;
+ uintptr_t l3hdr;
const struct tcphdr *tcp;
eh = (const void *)(cpl + 1);
- ip = (const void *)((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
- tcp = (const void *)((uintptr_t)ip + G_IP_HDR_LEN(hlen));
+ l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
+ tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
if (inc) {
bzero(inc, sizeof(*inc));
- inc->inc_faddr = ip->ip_src;
- inc->inc_laddr = ip->ip_dst;
inc->inc_fport = tcp->th_sport;
inc->inc_lport = tcp->th_dport;
- if (ip->ip_v == 6)
+ if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
+ const struct ip *ip = (const void *)l3hdr;
+
+ inc->inc_faddr = ip->ip_src;
+ inc->inc_laddr = ip->ip_dst;
+ } else {
+ const struct ip6_hdr *ip6 = (const void *)l3hdr;
+
inc->inc_flags |= INC_ISIPV6;
+ inc->inc6_faddr = ip6->ip6_src;
+ inc->inc6_laddr = ip6->ip6_dst;
+ }
}
if (th) {
@@ -959,6 +1066,105 @@ pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc,
}
}
+static int
+ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6)
+{
+ struct ifaddr *ifa;
+ struct sockaddr_in6 *sin6;
+ int found = 0;
+ struct in6_addr in6 = *ip6;
+
+ /* Just as in ip6_input */
+ if (in6_clearscope(&in6) || in6_clearscope(&in6))
+ return (0);
+ in6_setscope(&in6, ifp, NULL);
+
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ sin6 = (void *)ifa->ifa_addr;
+ if (sin6->sin6_family != AF_INET6)
+ continue;
+
+ if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) {
+ found = 1;
+ break;
+ }
+ }
+ if_addr_runlock(ifp);
+
+ return (found);
+}
+
+static struct l2t_entry *
+get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
+ struct in_conninfo *inc)
+{
+ struct rtentry *rt;
+ struct l2t_entry *e;
+ struct sockaddr_in6 sin6;
+ struct sockaddr *dst = (void *)&sin6;
+
+ if (inc->inc_flags & INC_ISIPV6) {
+ dst->sa_len = sizeof(struct sockaddr_in6);
+ dst->sa_family = AF_INET6;
+ ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
+
+ if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
+ /* no need for route lookup */
+ e = t4_l2t_get(pi, ifp, dst);
+ return (e);
+ }
+ } else {
+ dst->sa_len = sizeof(struct sockaddr_in);
+ dst->sa_family = AF_INET;
+ ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
+ }
+
+ rt = rtalloc1(dst, 0, 0);
+ if (rt == NULL)
+ return (NULL);
+ else {
+ struct sockaddr *nexthop;
+
+ RT_UNLOCK(rt);
+ if (rt->rt_ifp != ifp)
+ e = NULL;
+ else {
+ if (rt->rt_flags & RTF_GATEWAY)
+ nexthop = rt->rt_gateway;
+ else
+ nexthop = dst;
+ e = t4_l2t_get(pi, ifp, nexthop);
+ }
+ RTFREE(rt);
+ }
+
+ return (e);
+}
+
+static int
+ifnet_has_ip(struct ifnet *ifp, struct in_addr in)
+{
+ struct ifaddr *ifa;
+ struct sockaddr_in *sin;
+ int found = 0;
+
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ sin = (void *)ifa->ifa_addr;
+ if (sin->sin_family != AF_INET)
+ continue;
+
+ if (sin->sin_addr.s_addr == in.s_addr) {
+ found = 1;
+ break;
+ }
+ }
+ if_addr_runlock(ifp);
+
+ return (found);
+}
+
#define REJECT_PASS_ACCEPT() do { \
reject_reason = __LINE__; \
goto reject; \
@@ -994,10 +1200,8 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
struct tcphdr th;
struct tcpopt to;
struct port_info *pi;
- struct ifnet *ifp, *ifp_vlan = NULL;
+ struct ifnet *hw_ifp, *ifp;
struct l2t_entry *e = NULL;
- struct rtentry *rt;
- struct sockaddr_in nam;
int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
struct synq_entry *synqe = NULL;
int reject_reason;
@@ -1017,31 +1221,24 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
t4opt_to_tcpopt(&cpl->tcpopt, &to);
pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
- ifp = pi->ifp;
- m->m_pkthdr.rcvif = ifp;
- tod = TOEDEV(ifp);
+ hw_ifp = pi->ifp; /* the cxgbeX ifnet */
+ m->m_pkthdr.rcvif = hw_ifp;
+ tod = TOEDEV(hw_ifp);
/*
- * Don't offload if the interface that received the SYN doesn't have
- * IFCAP_TOE enabled.
- */
- if ((ifp->if_capenable & IFCAP_TOE4) == 0)
- REJECT_PASS_ACCEPT();
-
- /* Don't offload IPv6 connections. XXX: add IPv6 support */
- if (inc.inc_flags & INC_ISIPV6)
- REJECT_PASS_ACCEPT();
-
- /*
- * Don't offload if the SYN had a VLAN tag and the vid doesn't match
- * anything on this interface.
+ * Figure out if there is a pseudo interface (vlan, lagg, etc.)
+ * involved. Don't offload if the SYN had a VLAN tag and the vid
+ * doesn't match anything on this interface.
+ *
+ * XXX: lagg support, lagg + vlan support.
*/
vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
if (vid != 0xfff) {
- ifp_vlan = VLAN_DEVAT(ifp, vid);
- if (ifp_vlan == NULL)
+ ifp = VLAN_DEVAT(hw_ifp, vid);
+ if (ifp == NULL)
REJECT_PASS_ACCEPT();
- }
+ } else
+ ifp = hw_ifp;
/*
* Don't offload if the peer requested a TCP option that's not known to
@@ -1050,31 +1247,36 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
if (cpl->tcpopt.unknown)
REJECT_PASS_ACCEPT();
- /*
- * Don't offload if the outgoing interface for the route back to the
- * peer is not the same as the interface that received the SYN.
- * XXX: too restrictive.
- */
- nam.sin_len = sizeof(nam);
- nam.sin_family = AF_INET;
- nam.sin_addr = inc.inc_faddr;
- rt = rtalloc1((struct sockaddr *)&nam, 0, 0);
- if (rt == NULL)
- REJECT_PASS_ACCEPT();
- else {
- struct sockaddr *nexthop;
+ if (inc.inc_flags & INC_ISIPV6) {
- RT_UNLOCK(rt);
- nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway :
- (struct sockaddr *)&nam;
- if (rt->rt_ifp == ifp ||
- (ifp_vlan != NULL && rt->rt_ifp == ifp_vlan))
- e = t4_l2t_get(pi, rt->rt_ifp, nexthop);
- RTFREE(rt);
- if (e == NULL)
- REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */
+ /* Don't offload if the ifcap isn't enabled */
+ if ((ifp->if_capenable & IFCAP_TOE6) == 0)
+ REJECT_PASS_ACCEPT();
+
+ /*
+ * SYN must be directed to an IP6 address on this ifnet. This
+ * is more restrictive than in6_localip.
+ */
+ if (!ifnet_has_ip6(ifp, &inc.inc6_laddr))
+ REJECT_PASS_ACCEPT();
+ } else {
+
+ /* Don't offload if the ifcap isn't enabled */
+ if ((ifp->if_capenable & IFCAP_TOE4) == 0)
+ REJECT_PASS_ACCEPT();
+
+ /*
+ * SYN must be directed to an IP address on this ifnet. This
+ * is more restrictive than in_localip.
+ */
+ if (!ifnet_has_ip(ifp, inc.inc_laddr))
+ REJECT_PASS_ACCEPT();
}
+ e = get_l2te_for_nexthop(pi, ifp, &inc);
+ if (e == NULL)
+ REJECT_PASS_ACCEPT();
+
synqe = mbuf_to_synqe(m);
if (synqe == NULL)
REJECT_PASS_ACCEPT();
@@ -1133,7 +1335,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
synqe->lctx = lctx;
synqe->syn = m;
m = NULL;
- refcount_init(&synqe->refcnt, 0);
+ refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */
synqe->l2e_idx = e->idx;
synqe->rcv_bufsize = rx_credits;
atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
@@ -1166,7 +1368,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
*/
m = m_dup(synqe->syn, M_NOWAIT);
if (m)
- m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.rcvif = hw_ifp;
remove_tid(sc, synqe->tid);
free(wr, M_CXGBE);
@@ -1179,6 +1381,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
if (inp)
INP_WUNLOCK(inp);
+ release_synqe(synqe); /* extra hold */
REJECT_PASS_ACCEPT();
}
@@ -1193,15 +1396,19 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
* this tid because there was no L2T entry for the tid at that
* time. Abort it now. The reply to the abort will clean up.
*/
- CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, ABORT",
- __func__, stid, tid, lctx, synqe);
- send_reset_synqe(tod, synqe);
+ CTR6(KTR_CXGBE,
+ "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT",
+ __func__, stid, tid, lctx, synqe, synqe->flags);
+ if (!(synqe->flags & TPF_SYNQE_EXPANDED))
+ send_reset_synqe(tod, synqe);
INP_WUNLOCK(inp);
+ release_synqe(synqe); /* extra hold */
return (__LINE__);
}
INP_WUNLOCK(inp);
+ release_synqe(synqe); /* extra hold */
return (0);
reject:
CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
@@ -1216,7 +1423,7 @@ reject:
m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m->m_pkthdr.csum_data = 0xffff;
- ifp->if_input(ifp, m);
+ hw_ifp->if_input(hw_ifp, m);
}
return (reject_reason);
diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c
index 330172d..64e8b26 100644
--- a/sys/dev/cxgbe/tom/t4_tom.c
+++ b/sys/dev/cxgbe/tom/t4_tom.c
@@ -29,6 +29,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/types.h>
@@ -40,10 +41,14 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <net/if.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/ip6.h>
#include <netinet/tcp_var.h>
+#include <netinet6/scope6_var.h>
#define TCPSTATES
#include <netinet/tcp_fsm.h>
#include <netinet/toecore.h>
@@ -58,6 +63,9 @@ __FBSDID("$FreeBSD$");
static struct protosw ddp_protosw;
static struct pr_usrreqs ddp_usrreqs;
+static struct protosw ddp6_protosw;
+static struct pr_usrreqs ddp6_usrreqs;
+
/* Module ops */
static int t4_tom_mod_load(void);
static int t4_tom_mod_unload(void);
@@ -77,6 +85,11 @@ static void queue_tid_release(struct adapter *, int);
static void release_offload_resources(struct toepcb *);
static int alloc_tid_tabs(struct tid_info *);
static void free_tid_tabs(struct tid_info *);
+static int add_lip(struct adapter *, struct in6_addr *);
+static int delete_lip(struct adapter *, struct in6_addr *);
+static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *);
+static void init_clip_table(struct adapter *, struct tom_data *);
+static void destroy_clip_table(struct adapter *, struct tom_data *);
static void free_tom_data(struct adapter *, struct tom_data *);
struct toepcb *
@@ -170,8 +183,12 @@ offload_socket(struct socket *so, struct toepcb *toep)
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOCOALESCE;
- if (toep->ulp_mode == ULP_MODE_TCPDDP)
- so->so_proto = &ddp_protosw;
+ if (toep->ulp_mode == ULP_MODE_TCPDDP) {
+ if (inp->inp_vflag & INP_IPV6)
+ so->so_proto = &ddp6_protosw;
+ else
+ so->so_proto = &ddp_protosw;
+ }
SOCKBUF_UNLOCK(sb);
/* Update TCP PCB */
@@ -237,8 +254,8 @@ release_offload_resources(struct toepcb *toep)
KASSERT(!(toep->flags & TPF_ATTACHED),
("%s: %p is still attached.", __func__, toep));
- CTR4(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p)",
- __func__, toep, tid, toep->l2te);
+ CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)",
+ __func__, toep, tid, toep->l2te, toep->ce);
if (toep->ulp_mode == ULP_MODE_TCPDDP)
release_ddp_resources(toep);
@@ -251,6 +268,9 @@ release_offload_resources(struct toepcb *toep)
release_tid(sc, tid, toep->ctrlq);
}
+ if (toep->ce)
+ release_lip(td, toep->ce);
+
mtx_lock(&td->toep_list_lock);
TAILQ_REMOVE(&td->toep_list, toep, link);
mtx_unlock(&td->toep_list_lock);
@@ -394,7 +414,7 @@ int
find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
{
unsigned short *mtus = &sc->params.mtus[0];
- int i = 0, mss;
+ int i, mss, n;
KASSERT(inc != NULL || pmss > 0,
("%s: at least one of inc/pmss must be specified", __func__));
@@ -403,8 +423,13 @@ find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
if (pmss > 0 && mss > pmss)
mss = pmss;
- while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40)
- ++i;
+ if (inc->inc_flags & INC_ISIPV6)
+ n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+ else
+ n = sizeof(struct ip) + sizeof(struct tcphdr);
+
+ for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++)
+ continue;
return (i);
}
@@ -513,6 +538,24 @@ select_ntuple(struct port_info *pi, struct l2t_entry *e, uint32_t filter_mode)
return (htobe32(ntuple));
}
+void
+set_tcpddp_ulp_mode(struct toepcb *toep)
+{
+
+ toep->ulp_mode = ULP_MODE_TCPDDP;
+ toep->ddp_flags = DDP_OK;
+ toep->ddp_score = DDP_LOW_SCORE;
+}
+
+int
+negative_advice(int status)
+{
+
+ return (status == CPL_ERR_RTX_NEG_ADVICE ||
+ status == CPL_ERR_PERSIST_NEG_ADVICE ||
+ status == CPL_ERR_KEEPALV_NEG_ADVICE);
+}
+
static int
alloc_tid_tabs(struct tid_info *t)
{
@@ -536,12 +579,10 @@ alloc_tid_tabs(struct tid_info *t)
t->atid_tab[t->natids - 1].next = NULL;
mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
- t->stid_tab = (union serv_entry *)&t->atid_tab[t->natids];
- t->sfree = t->stid_tab;
+ t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids];
t->stids_in_use = 0;
- for (i = 1; i < t->nstids; i++)
- t->stid_tab[i - 1].next = &t->stid_tab[i];
- t->stid_tab[t->nstids - 1].next = NULL;
+ TAILQ_INIT(&t->stids);
+ t->nstids_free_head = t->nstids;
atomic_store_rel_int(&t->tids_in_use, 0);
@@ -567,9 +608,157 @@ free_tid_tabs(struct tid_info *t)
mtx_destroy(&t->stid_lock);
}
+static int
+add_lip(struct adapter *sc, struct in6_addr *lip)
+{
+ struct fw_clip_cmd c;
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+ /* mtx_assert(&td->clip_table_lock, MA_OWNED); */
+
+ memset(&c, 0, sizeof(c));
+ c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST |
+ F_FW_CMD_WRITE);
+ c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c));
+ c.ip_hi = *(uint64_t *)&lip->s6_addr[0];
+ c.ip_lo = *(uint64_t *)&lip->s6_addr[8];
+
+ return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c));
+}
+
+static int
+delete_lip(struct adapter *sc, struct in6_addr *lip)
+{
+ struct fw_clip_cmd c;
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+ /* mtx_assert(&td->clip_table_lock, MA_OWNED); */
+
+ memset(&c, 0, sizeof(c));
+ c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST |
+ F_FW_CMD_READ);
+ c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c));
+ c.ip_hi = *(uint64_t *)&lip->s6_addr[0];
+ c.ip_lo = *(uint64_t *)&lip->s6_addr[8];
+
+ return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c));
+}
+
+static struct clip_entry *
+search_lip(struct tom_data *td, struct in6_addr *lip)
+{
+ struct clip_entry *ce;
+
+ mtx_assert(&td->clip_table_lock, MA_OWNED);
+
+ TAILQ_FOREACH(ce, &td->clip_table, link) {
+ if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
+ return (ce);
+ }
+
+ return (NULL);
+}
+
+struct clip_entry *
+hold_lip(struct tom_data *td, struct in6_addr *lip)
+{
+ struct clip_entry *ce;
+
+ mtx_lock(&td->clip_table_lock);
+ ce = search_lip(td, lip);
+ if (ce != NULL)
+ ce->refcount++;
+ mtx_unlock(&td->clip_table_lock);
+
+ return (ce);
+}
+
+void
+release_lip(struct tom_data *td, struct clip_entry *ce)
+{
+
+ mtx_lock(&td->clip_table_lock);
+ KASSERT(search_lip(td, &ce->lip) == ce,
+ ("%s: CLIP entry %p p not in CLIP table.", __func__, ce));
+ KASSERT(ce->refcount > 0,
+ ("%s: CLIP entry %p has refcount 0", __func__, ce));
+ --ce->refcount;
+ mtx_unlock(&td->clip_table_lock);
+}
+
+static void
+init_clip_table(struct adapter *sc, struct tom_data *td)
+{
+ struct in6_ifaddr *ia;
+ struct in6_addr *lip, tlip;
+ struct clip_entry *ce;
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+
+ mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF);
+ TAILQ_INIT(&td->clip_table);
+
+ IN6_IFADDR_RLOCK();
+ TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+ lip = &ia->ia_addr.sin6_addr;
+
+ KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
+ ("%s: mcast address in in6_ifaddr list", __func__));
+
+ if (IN6_IS_ADDR_LOOPBACK(lip))
+ continue;
+ if (IN6_IS_SCOPE_EMBED(lip)) {
+ /* Remove the embedded scope */
+ tlip = *lip;
+ lip = &tlip;
+ in6_clearscope(lip);
+ }
+ /*
+ * XXX: how to weed out the link local address for the loopback
+ * interface? It's fe80::1 usually (always?).
+ */
+
+ mtx_lock(&td->clip_table_lock);
+ if (search_lip(td, lip) == NULL) {
+ ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
+ memcpy(&ce->lip, lip, sizeof(ce->lip));
+ ce->refcount = 0;
+ if (add_lip(sc, lip) == 0)
+ TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
+ else
+ free(ce, M_CXGBE);
+ }
+ mtx_unlock(&td->clip_table_lock);
+ }
+ IN6_IFADDR_RUNLOCK();
+}
+
+static void
+destroy_clip_table(struct adapter *sc, struct tom_data *td)
+{
+ struct clip_entry *ce, *ce_temp;
+
+ if (mtx_initialized(&td->clip_table_lock)) {
+ mtx_lock(&td->clip_table_lock);
+ TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) {
+ KASSERT(ce->refcount == 0,
+ ("%s: CLIP entry %p still in use (%d)", __func__,
+ ce, ce->refcount));
+ TAILQ_REMOVE(&td->clip_table, ce, link);
+ delete_lip(sc, &ce->lip);
+ free(ce, M_CXGBE);
+ }
+ mtx_unlock(&td->clip_table_lock);
+ mtx_destroy(&td->clip_table_lock);
+ }
+}
+
static void
free_tom_data(struct adapter *sc, struct tom_data *td)
{
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+
KASSERT(TAILQ_EMPTY(&td->toep_list),
("%s: TOE PCB list is not empty.", __func__));
KASSERT(td->lctx_count == 0,
@@ -578,6 +767,7 @@ free_tom_data(struct adapter *sc, struct tom_data *td)
t4_uninit_l2t_cpl_handlers(sc);
t4_uninit_cpl_io_handlers(sc);
t4_uninit_ddp(sc, td);
+ destroy_clip_table(sc, td);
if (td->listen_mask != 0)
hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
@@ -602,7 +792,7 @@ t4_tom_activate(struct adapter *sc)
struct toedev *tod;
int i, rc;
- ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */
+ ASSERT_SYNCHRONIZED_OP(sc);
/* per-adapter softc for TOM */
td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT);
@@ -623,8 +813,12 @@ t4_tom_activate(struct adapter *sc)
if (rc != 0)
goto done;
+ /* DDP page pods and CPL handlers */
t4_init_ddp(sc, td);
+ /* CLIP table for IPv6 offload */
+ init_clip_table(sc, td);
+
/* CPL handlers */
t4_init_connect_cpl_handlers(sc);
t4_init_l2t_cpl_handlers(sc);
@@ -668,7 +862,7 @@ t4_tom_deactivate(struct adapter *sc)
int rc = 0;
struct tom_data *td = sc->tom_softc;
- ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */
+ ASSERT_SYNCHRONIZED_OP(sc);
if (td == NULL)
return (0); /* XXX. KASSERT? */
@@ -700,17 +894,24 @@ static int
t4_tom_mod_load(void)
{
int rc;
- struct protosw *tcp_protosw;
+ struct protosw *tcp_protosw, *tcp6_protosw;
tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM);
if (tcp_protosw == NULL)
return (ENOPROTOOPT);
-
bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw));
bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs));
ddp_usrreqs.pru_soreceive = t4_soreceive_ddp;
ddp_protosw.pr_usrreqs = &ddp_usrreqs;
+ tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM);
+ if (tcp6_protosw == NULL)
+ return (ENOPROTOOPT);
+ bcopy(tcp6_protosw, &ddp6_protosw, sizeof(ddp6_protosw));
+ bcopy(tcp6_protosw->pr_usrreqs, &ddp6_usrreqs, sizeof(ddp6_usrreqs));
+ ddp6_usrreqs.pru_soreceive = t4_soreceive_ddp;
+ ddp6_protosw.pr_usrreqs = &ddp6_usrreqs;
+
rc = t4_register_uld(&tom_uld_info);
if (rc != 0)
t4_tom_mod_unload();
@@ -721,11 +922,14 @@ t4_tom_mod_load(void)
static void
tom_uninit(struct adapter *sc, void *arg __unused)
{
+ if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomun"))
+ return;
+
/* Try to free resources (works only if no port has IFCAP_TOE) */
- ADAPTER_LOCK(sc);
if (sc->flags & TOM_INIT_DONE)
t4_deactivate_uld(sc, ULD_TOM);
- ADAPTER_UNLOCK(sc);
+
+ end_synchronized_op(sc, LOCK_HELD);
}
static int
diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h
index 9549b0b..d0fbbd2 100644
--- a/sys/dev/cxgbe/tom/t4_tom.h
+++ b/sys/dev/cxgbe/tom/t4_tom.h
@@ -109,6 +109,7 @@ struct toepcb {
struct sge_ofld_rxq *ofld_rxq;
struct sge_wrq *ctrlq;
struct l2t_entry *l2te; /* L2 table entry used by this connection */
+ struct clip_entry *ce; /* CLIP table entry used by this tid */
int tid; /* Connection identifier */
unsigned int tx_credits;/* tx WR credits (in 16 byte units) remaining */
unsigned int sb_cc; /* last noted value of so_rcv->sb_cc */
@@ -140,15 +141,6 @@ struct flowc_tx_params {
#define DDP_LOW_SCORE 1
#define DDP_HIGH_SCORE 3
-static inline void
-set_tcpddp_ulp_mode(struct toepcb *toep)
-{
-
- toep->ulp_mode = ULP_MODE_TCPDDP;
- toep->ddp_flags = DDP_OK;
- toep->ddp_score = DDP_LOW_SCORE;
-}
-
/*
* Compressed state for embryonic connections for a listener. Barely fits in
* 64B, try not to grow it further.
@@ -174,6 +166,7 @@ struct listen_ctx {
LIST_ENTRY(listen_ctx) link; /* listen hash linkage */
volatile int refcount;
int stid;
+ struct stid_region stid_region;
int flags;
struct inpcb *inp; /* listening socket's inp */
struct sge_wrq *ctrlq;
@@ -183,6 +176,12 @@ struct listen_ctx {
TAILQ_HEAD(ppod_head, ppod_region);
+struct clip_entry {
+ TAILQ_ENTRY(clip_entry) link;
+ struct in6_addr lip; /* local IPv6 address */
+ u_int refcount;
+};
+
struct tom_data {
struct toedev tod;
@@ -200,6 +199,9 @@ struct tom_data {
int nppods_free; /* # of available ppods */
int nppods_free_head; /* # of available ppods at the begining */
struct ppod_head ppods;
+
+ struct mtx clip_table_lock;
+ TAILQ_HEAD(, clip_entry) clip_table;
};
static inline struct tom_data *
@@ -233,6 +235,10 @@ int select_rcv_wscale(void);
uint64_t calc_opt0(struct socket *, struct port_info *, struct l2t_entry *,
int, int, int, int);
uint32_t select_ntuple(struct port_info *, struct l2t_entry *, uint32_t);
+void set_tcpddp_ulp_mode(struct toepcb *);
+int negative_advice(int);
+struct clip_entry *hold_lip(struct tom_data *, struct in6_addr *);
+void release_lip(struct tom_data *, struct clip_entry *);
/* t4_connect.c */
void t4_init_connect_cpl_handlers(struct adapter *);
diff --git a/sys/dev/cxgbe/tom/t4_tom_l2t.c b/sys/dev/cxgbe/tom/t4_tom_l2t.c
index ffe64c5..7a75394 100644
--- a/sys/dev/cxgbe/tom/t4_tom_l2t.c
+++ b/sys/dev/cxgbe/tom/t4_tom_l2t.c
@@ -27,6 +27,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_inet6.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
@@ -34,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
+#include <sys/fnv_hash.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
@@ -48,28 +50,89 @@ __FBSDID("$FreeBSD$");
#include <netinet/toecore.h>
#include "common/common.h"
-#include "common/jhash.h"
#include "common/t4_msg.h"
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
#define VLAN_NONE 0xfff
-#define SA(x) ((struct sockaddr *)(x))
-#define SIN(x) ((struct sockaddr_in *)(x))
-#define SINADDR(x) (SIN(x)->sin_addr.s_addr)
-
static inline void
l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
+
if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */
atomic_subtract_int(&d->nfree, 1);
}
-static inline unsigned int
-arp_hash(const uint32_t key, int ifindex)
+static inline u_int
+l2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex)
{
- return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1);
+ u_int hash, half = d->l2t_size / 2, start = 0;
+ const void *key;
+ size_t len;
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: sa %p has unexpected sa_family %d", __func__, sa,
+ sa->sa_family));
+
+ if (sa->sa_family == AF_INET) {
+ const struct sockaddr_in *sin = (const void *)sa;
+
+ key = &sin->sin_addr;
+ len = sizeof(sin->sin_addr);
+ } else {
+ const struct sockaddr_in6 *sin6 = (const void *)sa;
+
+ key = &sin6->sin6_addr;
+ len = sizeof(sin6->sin6_addr);
+ start = half;
+ }
+
+ hash = fnv_32_buf(key, len, FNV1_32_INIT);
+ hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash);
+ hash %= half;
+
+ return (hash + start);
+}
+
+static inline int
+l2_cmp(const struct sockaddr *sa, struct l2t_entry *e)
+{
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: sa %p has unexpected sa_family %d", __func__, sa,
+ sa->sa_family));
+
+ if (sa->sa_family == AF_INET) {
+ const struct sockaddr_in *sin = (const void *)sa;
+
+ return (e->addr[0] != sin->sin_addr.s_addr);
+ } else {
+ const struct sockaddr_in6 *sin6 = (const void *)sa;
+
+ return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)));
+ }
+}
+
+static inline void
+l2_store(const struct sockaddr *sa, struct l2t_entry *e)
+{
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: sa %p has unexpected sa_family %d", __func__, sa,
+ sa->sa_family));
+
+ if (sa->sa_family == AF_INET) {
+ const struct sockaddr_in *sin = (const void *)sa;
+
+ e->addr[0] = sin->sin_addr.s_addr;
+ e->ipv6 = 0;
+ } else {
+ const struct sockaddr_in6 *sin6 = (const void *)sa;
+
+ memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr));
+ e->ipv6 = 1;
+ }
}
/*
@@ -100,7 +163,7 @@ send_pending(struct adapter *sc, struct l2t_entry *e)
static void
resolution_failed_for_wr(struct wrqe *wr)
{
- log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr,
+ log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr,
wr->wr_len);
/* free(wr, M_CXGBE); */
@@ -175,15 +238,25 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e)
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
+ struct sockaddr_in6 sin6 = {0};
+ struct sockaddr *sa;
uint8_t dmac[ETHER_ADDR_LEN];
uint16_t vtag = VLAN_NONE;
int rc;
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(struct sockaddr_in);
- SINADDR(&sin) = e->addr;
+ if (e->ipv6 == 0) {
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_addr.s_addr = e->addr[0];
+ sa = (void *)&sin;
+ } else {
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr));
+ sa = (void *)&sin6;
+ }
- rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
+ rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag);
if (rc == EWOULDBLOCK)
return (rc);
@@ -263,7 +336,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
struct adapter *sc = iq->adapter;
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(rpl);
- unsigned int idx = tid & (L2T_SIZE - 1);
+ unsigned int idx = tid % L2T_SIZE;
int rc;
rc = do_l2t_write_rpl(iq, rss, m);
@@ -271,7 +344,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
return (rc);
if (tid & F_SYNC_WR) {
- struct l2t_entry *e = &sc->l2t->l2tab[idx];
+ struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start];
mtx_lock(&e->lock);
if (e->state != L2T_STATE_SWITCHING) {
@@ -310,21 +383,22 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
{
struct l2t_entry *e;
struct l2t_data *d = pi->adapter->l2t;
- uint32_t addr = SINADDR(sa);
- int hash = arp_hash(addr, ifp->if_index);
- unsigned int smt_idx = pi->port_id;
+ u_int hash, smt_idx = pi->port_id;
- if (sa->sa_family != AF_INET)
- return (NULL); /* XXX: no IPv6 support right now */
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: sa %p has unexpected sa_family %d", __func__, sa,
+ sa->sa_family));
#ifndef VLAN_TAG
if (ifp->if_type == IFT_L2VLAN)
return (NULL);
#endif
+ hash = l2_hash(d, sa, ifp->if_index);
rw_wlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
- if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
+ if (l2_cmp(sa, e) == 0 && e->ifp == ifp &&
+ e->smt_idx == smt_idx) {
l2t_hold(d, e);
goto done;
}
@@ -338,7 +412,7 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
d->l2tab[hash].first = e;
e->state = L2T_STATE_RESOLVING;
- e->addr = addr;
+ l2_store(sa, e);
e->ifp = ifp;
e->smt_idx = smt_idx;
e->hash = hash;
@@ -368,14 +442,14 @@ t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
struct adapter *sc = tod->tod_softc;
struct l2t_entry *e;
struct l2t_data *d = sc->l2t;
- uint32_t addr = SINADDR(sa);
- int hash = arp_hash(addr, ifp->if_index);
+ u_int hash;
KASSERT(d != NULL, ("%s: no L2 table", __func__));
+ hash = l2_hash(d, sa, ifp->if_index);
rw_rlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
- if (e->addr == addr && e->ifp == ifp) {
+ if (l2_cmp(sa, e) == 0 && e->ifp == ifp) {
mtx_lock(&e->lock);
if (atomic_load_acq_int(&e->refcnt))
goto found;
OpenPOWER on IntegriCloud