summaryrefslogtreecommitdiffstats
path: root/sys/dev/cxgbe/tom
diff options
context:
space:
mode:
authorattilio <attilio@FreeBSD.org>2013-02-03 20:13:33 +0000
committerattilio <attilio@FreeBSD.org>2013-02-03 20:13:33 +0000
commit0d3b58aee00948d85d75a9d3d222deb454afc98e (patch)
tree865d112b57519913a8de64b2d9ca8787633c95a2 /sys/dev/cxgbe/tom
parent561dd1163dbb481d204da7a526739ac6e43d08f2 (diff)
parent2d2c37fb592dfc24f15e4bf14c2f109b5d4b5a83 (diff)
downloadFreeBSD-src-0d3b58aee00948d85d75a9d3d222deb454afc98e.zip
FreeBSD-src-0d3b58aee00948d85d75a9d3d222deb454afc98e.tar.gz
MFC
Diffstat (limited to 'sys/dev/cxgbe/tom')
-rw-r--r--sys/dev/cxgbe/tom/t4_connect.c127
-rw-r--r--sys/dev/cxgbe/tom/t4_cpl_io.c3
-rw-r--r--sys/dev/cxgbe/tom/t4_listen.c409
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.c240
-rw-r--r--sys/dev/cxgbe/tom/t4_tom.h24
-rw-r--r--sys/dev/cxgbe/tom/t4_tom_l2t.c124
6 files changed, 729 insertions, 198 deletions
diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c
index 8d36b1e..17ed1d3 100644
--- a/sys/dev/cxgbe/tom/t4_connect.c
+++ b/sys/dev/cxgbe/tom/t4_connect.c
@@ -29,6 +29,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_inet6.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
@@ -195,7 +196,7 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status);
/* Ignore negative advice */
- if (status == CPL_ERR_RTX_NEG_ADVICE)
+ if (negative_advice(status))
return (0);
free_atid(sc, atid);
@@ -220,10 +221,9 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
* Options2 for active open.
*/
static uint32_t
-calc_opt2a(struct socket *so)
+calc_opt2a(struct socket *so, struct toepcb *toep)
{
struct tcpcb *tp = so_sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
struct port_info *pi = toep->port;
struct adapter *sc = pi->adapter;
uint32_t opt2 = 0;
@@ -260,6 +260,12 @@ t4_init_connect_cpl_handlers(struct adapter *sc)
t4_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl);
}
+#define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \
+ reason = __LINE__; \
+ rc = (x); \
+ goto failed; \
+} while (0)
+
/*
* active open (soconnect).
*
@@ -275,20 +281,19 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
struct sockaddr *nam)
{
struct adapter *sc = tod->tod_softc;
+ struct tom_data *td = tod_td(tod);
struct toepcb *toep = NULL;
struct wrqe *wr = NULL;
- struct cpl_act_open_req *cpl;
- struct l2t_entry *e = NULL;
struct ifnet *rt_ifp = rt->rt_ifp;
struct port_info *pi;
- int atid = -1, mtu_idx, rscale, qid_atid, rc = ENOMEM;
+ int mtu_idx, rscale, qid_atid, rc, isipv6;
struct inpcb *inp = sotoinpcb(so);
struct tcpcb *tp = intotcpcb(inp);
+ int reason;
INP_WLOCK_ASSERT(inp);
-
- if (nam->sa_family != AF_INET)
- CXGBE_UNIMPLEMENTED("IPv6 connect");
+ KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
+ ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family));
if (rt_ifp->if_type == IFT_ETHER)
pi = rt_ifp->if_softc;
@@ -297,30 +302,29 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
pi = ifp->if_softc;
} else if (rt_ifp->if_type == IFT_IEEE8023ADLAG)
- return (ENOSYS); /* XXX: implement lagg support */
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */
else
- return (ENOTSUP);
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
toep = alloc_toepcb(pi, -1, -1, M_NOWAIT);
if (toep == NULL)
- goto failed;
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
- atid = alloc_atid(sc, toep);
- if (atid < 0)
- goto failed;
+ toep->tid = alloc_atid(sc, toep);
+ if (toep->tid < 0)
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
- e = t4_l2t_get(pi, rt_ifp,
+ toep->l2te = t4_l2t_get(pi, rt_ifp,
rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam);
- if (e == NULL)
- goto failed;
+ if (toep->l2te == NULL)
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
- wr = alloc_wrqe(sizeof(*cpl), toep->ctrlq);
+ isipv6 = nam->sa_family == AF_INET6;
+ wr = alloc_wrqe(isipv6 ? sizeof(struct cpl_act_open_req6) :
+ sizeof(struct cpl_act_open_req), toep->ctrlq);
if (wr == NULL)
- goto failed;
- cpl = wrtod(wr);
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
- toep->tid = atid;
- toep->l2te = e;
if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
set_tcpddp_ulp_mode(toep);
else
@@ -330,8 +334,6 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
SOCKBUF_UNLOCK(&so->so_rcv);
- offload_socket(so, toep);
-
/*
* The kernel sets request_r_scale based on sb_max whereas we need to
* take hardware's MAX_RCV_WND into account too. This is normally a
@@ -342,39 +344,78 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt,
else
rscale = 0;
mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0);
- qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | atid;
-
- INIT_TP_WR(cpl, 0);
- OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid));
- inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip,
- &cpl->peer_port);
- cpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, toep->rx_credits,
- toep->ulp_mode);
- cpl->params = select_ntuple(pi, e, sc->filter_mode);
- cpl->opt2 = calc_opt2a(so);
+ qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | toep->tid;
+
+ if (isipv6) {
+ struct cpl_act_open_req6 *cpl = wrtod(wr);
+
+ if ((inp->inp_vflag & INP_IPV6) == 0) {
+ /* XXX think about this a bit more */
+ log(LOG_ERR,
+ "%s: time to think about AF_INET6 + vflag 0x%x.\n",
+ __func__, inp->inp_vflag);
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP);
+ }
+
+ toep->ce = hold_lip(td, &inp->in6p_laddr);
+ if (toep->ce == NULL)
+ DONT_OFFLOAD_ACTIVE_OPEN(ENOENT);
+
+ INIT_TP_WR(cpl, 0);
+ OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+ qid_atid));
+
+ cpl->local_port = inp->inp_lport;
+ cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
+ cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
+ cpl->peer_port = inp->inp_fport;
+ cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0];
+ cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8];
+ cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale,
+ toep->rx_credits, toep->ulp_mode);
+ cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode);
+ cpl->opt2 = calc_opt2a(so, toep);
+ } else {
+ struct cpl_act_open_req *cpl = wrtod(wr);
+
+ INIT_TP_WR(cpl, 0);
+ OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+ qid_atid));
+ inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port,
+ &cpl->peer_ip, &cpl->peer_port);
+ cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale,
+ toep->rx_credits, toep->ulp_mode);
+ cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode);
+ cpl->opt2 = calc_opt2a(so, toep);
+ }
CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__,
toep->tid, tcpstates[tp->t_state], toep, inp);
- rc = t4_l2t_send(sc, wr, e);
+ offload_socket(so, toep);
+ rc = t4_l2t_send(sc, wr, toep->l2te);
if (rc == 0) {
toep->flags |= TPF_CPL_PENDING;
return (0);
}
undo_offload_socket(so);
+ reason = __LINE__;
failed:
- CTR5(KTR_CXGBE, "%s: FAILED, atid %d, toep %p, l2te %p, wr %p",
- __func__, atid, toep, e, wr);
+ CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc);
- if (e)
- t4_l2t_release(e);
if (wr)
free_wrqe(wr);
- if (atid >= 0)
- free_atid(sc, atid);
- if (toep)
+
+ if (toep) {
+ if (toep->tid >= 0)
+ free_atid(sc, toep->tid);
+ if (toep->l2te)
+ t4_l2t_release(toep->l2te);
+ if (toep->ce)
+ release_lip(td, toep->ce);
free_toepcb(toep);
+ }
return (rc);
}
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 6ae1ec4..9aead9f 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -1018,8 +1018,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
- if (cpl->status == CPL_ERR_RTX_NEG_ADVICE ||
- cpl->status == CPL_ERR_PERSIST_NEG_ADVICE) {
+ if (negative_advice(cpl->status)) {
CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)",
__func__, cpl->status, tid, toep->flags);
return (0); /* Ignore negative advice */
diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c
index 523f7f3..b80702d 100644
--- a/sys/dev/cxgbe/tom/t4_listen.c
+++ b/sys/dev/cxgbe/tom/t4_listen.c
@@ -29,6 +29,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_inet6.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
@@ -50,6 +51,8 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet6/scope6_var.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#define TCPSTATES
@@ -63,9 +66,9 @@ __FBSDID("$FreeBSD$");
#include "tom/t4_tom.h"
/* stid services */
-static int alloc_stid(struct adapter *, void *);
-static void *lookup_stid(struct adapter *, int);
-static void free_stid(struct adapter *, int);
+static int alloc_stid(struct adapter *, struct listen_ctx *, int);
+static struct listen_ctx *lookup_stid(struct adapter *, int);
+static void free_stid(struct adapter *, struct listen_ctx *);
/* lctx services */
static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
@@ -81,45 +84,105 @@ static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *);
static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
static void send_reset_synqe(struct toedev *, struct synq_entry *);
-/* XXX: won't work for IPv6 */
static int
-alloc_stid(struct adapter *sc, void *ctx)
+alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6)
{
struct tid_info *t = &sc->tids;
- int stid = -1;
+ u_int stid, n, f, mask;
+ struct stid_region *sr = &lctx->stid_region;
+
+ /*
+ * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in
+ * the TCAM. The start of the stid region is properly aligned (the chip
+ * requires each region to be 128-cell aligned).
+ */
+ n = isipv6 ? 2 : 1;
+ mask = n - 1;
+ KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0,
+ ("%s: stid region (%u, %u) not properly aligned. n = %u",
+ __func__, t->stid_base, t->nstids, n));
mtx_lock(&t->stid_lock);
- if (t->sfree) {
- union serv_entry *p = t->sfree;
-
- stid = p - t->stid_tab;
- stid += t->stid_base;
- t->sfree = p->next;
- p->data = ctx;
- t->stids_in_use++;
+ if (n > t->nstids - t->stids_in_use) {
+ mtx_unlock(&t->stid_lock);
+ return (-1);
}
+
+ if (t->nstids_free_head >= n) {
+ /*
+ * This allocation will definitely succeed because the region
+ * starts at a good alignment and we just checked we have enough
+ * stids free.
+ */
+ f = t->nstids_free_head & mask;
+ t->nstids_free_head -= n + f;
+ stid = t->nstids_free_head;
+ TAILQ_INSERT_HEAD(&t->stids, sr, link);
+ } else {
+ struct stid_region *s;
+
+ stid = t->nstids_free_head;
+ TAILQ_FOREACH(s, &t->stids, link) {
+ stid += s->used + s->free;
+ f = stid & mask;
+ if (n <= s->free - f) {
+ stid -= n + f;
+ s->free -= n + f;
+ TAILQ_INSERT_AFTER(&t->stids, s, sr, link);
+ goto allocated;
+ }
+ }
+
+ if (__predict_false(stid != t->nstids)) {
+ panic("%s: stids TAILQ (%p) corrupt."
+ " At %d instead of %d at the end of the queue.",
+ __func__, &t->stids, stid, t->nstids);
+ }
+
+ mtx_unlock(&t->stid_lock);
+ return (-1);
+ }
+
+allocated:
+ sr->used = n;
+ sr->free = f;
+ t->stids_in_use += n;
+ t->stid_tab[stid] = lctx;
mtx_unlock(&t->stid_lock);
- return (stid);
+
+ KASSERT(((stid + t->stid_base) & mask) == 0,
+ ("%s: EDOOFUS.", __func__));
+ return (stid + t->stid_base);
}
-static void *
+static struct listen_ctx *
lookup_stid(struct adapter *sc, int stid)
{
struct tid_info *t = &sc->tids;
- return (t->stid_tab[stid - t->stid_base].data);
+ return (t->stid_tab[stid - t->stid_base]);
}
static void
-free_stid(struct adapter *sc, int stid)
+free_stid(struct adapter *sc, struct listen_ctx *lctx)
{
struct tid_info *t = &sc->tids;
- union serv_entry *p = &t->stid_tab[stid - t->stid_base];
+ struct stid_region *sr = &lctx->stid_region;
+ struct stid_region *s;
+
+ KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used));
mtx_lock(&t->stid_lock);
- p->next = t->sfree;
- t->sfree = p;
- t->stids_in_use--;
+ s = TAILQ_PREV(sr, stid_head, link);
+ if (s != NULL)
+ s->free += sr->used + sr->free;
+ else
+ t->nstids_free_head += sr->used + sr->free;
+ KASSERT(t->stids_in_use >= sr->used,
+ ("%s: stids_in_use (%u) < stids being freed (%u)", __func__,
+ t->stids_in_use, sr->used));
+ t->stids_in_use -= sr->used;
+ TAILQ_REMOVE(&t->stids, sr, link);
mtx_unlock(&t->stid_lock);
}
@@ -134,7 +197,7 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi)
if (lctx == NULL)
return (NULL);
- lctx->stid = alloc_stid(sc, lctx);
+ lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6);
if (lctx->stid < 0) {
free(lctx, M_CXGBE);
return (NULL);
@@ -167,7 +230,7 @@ free_lctx(struct adapter *sc, struct listen_ctx *lctx)
CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
__func__, lctx->stid, lctx, lctx->inp);
- free_stid(sc, lctx->stid);
+ free_stid(sc, lctx);
free(lctx, M_CXGBE);
return (in_pcbrele_wlocked(inp));
@@ -339,7 +402,7 @@ create_server(struct adapter *sc, struct listen_ctx *lctx)
{
struct wrqe *wr;
struct cpl_pass_open_req *req;
- struct in_conninfo *inc = &lctx->inp->inp_inc;
+ struct inpcb *inp = lctx->inp;
wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
if (wr == NULL) {
@@ -350,9 +413,9 @@ create_server(struct adapter *sc, struct listen_ctx *lctx)
INIT_TP_WR(req, 0);
OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
- req->local_port = inc->inc_lport;
+ req->local_port = inp->inp_lport;
req->peer_port = 0;
- req->local_ip = inc->inc_laddr.s_addr;
+ req->local_ip = inp->inp_laddr.s_addr;
req->peer_ip = 0;
req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
@@ -363,6 +426,36 @@ create_server(struct adapter *sc, struct listen_ctx *lctx)
}
static int
+create_server6(struct adapter *sc, struct listen_ctx *lctx)
+{
+ struct wrqe *wr;
+ struct cpl_pass_open_req6 *req;
+ struct inpcb *inp = lctx->inp;
+
+ wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
+ if (wr == NULL) {
+ log(LOG_ERR, "%s: allocation failure", __func__);
+ return (ENOMEM);
+ }
+ req = wrtod(wr);
+
+ INIT_TP_WR(req, 0);
+ OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
+ req->local_port = inp->inp_lport;
+ req->peer_port = 0;
+ req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
+ req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
+ req->peer_ip_hi = 0;
+ req->peer_ip_lo = 0;
+ req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
+ req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
+ F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
+
+ t4_wrq_tx(sc, wr);
+ return (0);
+}
+
+static int
destroy_server(struct adapter *sc, struct listen_ctx *lctx)
{
struct wrqe *wr;
@@ -398,13 +491,10 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
struct port_info *pi;
struct inpcb *inp = tp->t_inpcb;
struct listen_ctx *lctx;
- int i;
+ int i, rc;
INP_WLOCK_ASSERT(inp);
- if ((inp->inp_vflag & INP_IPV4) == 0)
- return (0);
-
#if 0
ADAPTER_LOCK(sc);
if (IS_BUSY(sc)) {
@@ -421,8 +511,9 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
goto done; /* no port that's UP with IFCAP_TOE enabled */
/*
- * Find a running port with IFCAP_TOE4. We'll use the first such port's
- * queues to send the passive open and receive the reply to it.
+ * Find a running port with IFCAP_TOE (4 or 6). We'll use the first
+ * such port's queues to send the passive open and receive the reply to
+ * it.
*
* XXX: need a way to mark a port in use by offload. if_cxgbe should
* then reject any attempt to bring down such a port (and maybe reject
@@ -430,7 +521,7 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
*/
for_each_port(sc, i) {
if (isset(&sc->open_device_map, i) &&
- sc->port[i]->ifp->if_capenable & IFCAP_TOE4)
+ sc->port[i]->ifp->if_capenable & IFCAP_TOE)
break;
}
KASSERT(i < sc->params.nports,
@@ -449,12 +540,17 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp)
}
listen_hash_add(sc, lctx);
- CTR5(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p", __func__,
- lctx->stid, tcpstates[tp->t_state], lctx, inp);
+ CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
+ __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
+ inp->inp_vflag);
- if (create_server(sc, lctx) != 0) {
- log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__,
- device_get_nameunit(sc->dev));
+ if (inp->inp_vflag & INP_IPV6)
+ rc = create_server6(sc, lctx);
+ else
+ rc = create_server(sc, lctx);
+ if (rc != 0) {
+ log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
+ __func__, device_get_nameunit(sc->dev), rc);
(void) listen_hash_del(sc, inp);
inp = release_lctx(sc, lctx);
/* can't be freed, host stack has a reference */
@@ -558,7 +654,7 @@ t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
struct l2t_entry *e;
struct tcpopt to;
struct ip *ip = mtod(m, struct ip *);
- struct tcphdr *th = (void *)(ip + 1);
+ struct tcphdr *th;
wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr);
if (wr == NULL) {
@@ -566,6 +662,10 @@ t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
return (EALREADY);
}
+ if (ip->ip_v == IPVERSION)
+ th = (void *)(ip + 1);
+ else
+ th = (void *)((struct ip6_hdr *)ip + 1);
bzero(&to, sizeof(to));
tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
TO_SYN);
@@ -608,7 +708,7 @@ do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
lctx->flags &= ~LCTX_RPL_PENDING;
if (status != CPL_ERR_NONE)
- log(LOG_ERR, "listener with stid %u failed: %d", stid, status);
+ log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
#ifdef INVARIANTS
/*
@@ -678,7 +778,7 @@ do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
if (status != CPL_ERR_NONE) {
- log(LOG_ERR, "%s: failed (%u) to close listener for stid %u",
+ log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
__func__, status, stid);
return (status);
}
@@ -735,8 +835,7 @@ do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
__func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
- if (cpl->status == CPL_ERR_RTX_NEG_ADVICE ||
- cpl->status == CPL_ERR_PERSIST_NEG_ADVICE)
+ if (negative_advice(cpl->status))
return (0); /* Ignore negative advice */
INP_WLOCK(inp);
@@ -855,7 +954,7 @@ mbuf_to_synqe(struct mbuf *m)
return (NULL);
synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
} else {
- synqe = (void *)(m->m_data + m->m_len + tspace - sizeof(*synqe));
+ synqe = (void *)(m->m_data + m->m_len + tspace - len);
synqe->flags = TPF_SYNQE;
}
@@ -936,21 +1035,29 @@ pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc,
const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
const struct ether_header *eh;
unsigned int hlen = be32toh(cpl->hdr_len);
- const struct ip *ip;
+ uintptr_t l3hdr;
const struct tcphdr *tcp;
eh = (const void *)(cpl + 1);
- ip = (const void *)((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
- tcp = (const void *)((uintptr_t)ip + G_IP_HDR_LEN(hlen));
+ l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
+ tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
if (inc) {
bzero(inc, sizeof(*inc));
- inc->inc_faddr = ip->ip_src;
- inc->inc_laddr = ip->ip_dst;
inc->inc_fport = tcp->th_sport;
inc->inc_lport = tcp->th_dport;
- if (ip->ip_v == 6)
+ if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
+ const struct ip *ip = (const void *)l3hdr;
+
+ inc->inc_faddr = ip->ip_src;
+ inc->inc_laddr = ip->ip_dst;
+ } else {
+ const struct ip6_hdr *ip6 = (const void *)l3hdr;
+
inc->inc_flags |= INC_ISIPV6;
+ inc->inc6_faddr = ip6->ip6_src;
+ inc->inc6_laddr = ip6->ip6_dst;
+ }
}
if (th) {
@@ -959,6 +1066,105 @@ pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc,
}
}
+static int
+ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6)
+{
+ struct ifaddr *ifa;
+ struct sockaddr_in6 *sin6;
+ int found = 0;
+ struct in6_addr in6 = *ip6;
+
+ /* Just as in ip6_input */
+ if (in6_clearscope(&in6) || in6_clearscope(&in6))
+ return (0);
+ in6_setscope(&in6, ifp, NULL);
+
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ sin6 = (void *)ifa->ifa_addr;
+ if (sin6->sin6_family != AF_INET6)
+ continue;
+
+ if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) {
+ found = 1;
+ break;
+ }
+ }
+ if_addr_runlock(ifp);
+
+ return (found);
+}
+
+static struct l2t_entry *
+get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
+ struct in_conninfo *inc)
+{
+ struct rtentry *rt;
+ struct l2t_entry *e;
+ struct sockaddr_in6 sin6;
+ struct sockaddr *dst = (void *)&sin6;
+
+ if (inc->inc_flags & INC_ISIPV6) {
+ dst->sa_len = sizeof(struct sockaddr_in6);
+ dst->sa_family = AF_INET6;
+ ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
+
+ if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
+ /* no need for route lookup */
+ e = t4_l2t_get(pi, ifp, dst);
+ return (e);
+ }
+ } else {
+ dst->sa_len = sizeof(struct sockaddr_in);
+ dst->sa_family = AF_INET;
+ ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
+ }
+
+ rt = rtalloc1(dst, 0, 0);
+ if (rt == NULL)
+ return (NULL);
+ else {
+ struct sockaddr *nexthop;
+
+ RT_UNLOCK(rt);
+ if (rt->rt_ifp != ifp)
+ e = NULL;
+ else {
+ if (rt->rt_flags & RTF_GATEWAY)
+ nexthop = rt->rt_gateway;
+ else
+ nexthop = dst;
+ e = t4_l2t_get(pi, ifp, nexthop);
+ }
+ RTFREE(rt);
+ }
+
+ return (e);
+}
+
+static int
+ifnet_has_ip(struct ifnet *ifp, struct in_addr in)
+{
+ struct ifaddr *ifa;
+ struct sockaddr_in *sin;
+ int found = 0;
+
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ sin = (void *)ifa->ifa_addr;
+ if (sin->sin_family != AF_INET)
+ continue;
+
+ if (sin->sin_addr.s_addr == in.s_addr) {
+ found = 1;
+ break;
+ }
+ }
+ if_addr_runlock(ifp);
+
+ return (found);
+}
+
#define REJECT_PASS_ACCEPT() do { \
reject_reason = __LINE__; \
goto reject; \
@@ -994,10 +1200,8 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
struct tcphdr th;
struct tcpopt to;
struct port_info *pi;
- struct ifnet *ifp, *ifp_vlan = NULL;
+ struct ifnet *hw_ifp, *ifp;
struct l2t_entry *e = NULL;
- struct rtentry *rt;
- struct sockaddr_in nam;
int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
struct synq_entry *synqe = NULL;
int reject_reason;
@@ -1017,31 +1221,24 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
t4opt_to_tcpopt(&cpl->tcpopt, &to);
pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
- ifp = pi->ifp;
- m->m_pkthdr.rcvif = ifp;
- tod = TOEDEV(ifp);
+ hw_ifp = pi->ifp; /* the cxgbeX ifnet */
+ m->m_pkthdr.rcvif = hw_ifp;
+ tod = TOEDEV(hw_ifp);
/*
- * Don't offload if the interface that received the SYN doesn't have
- * IFCAP_TOE enabled.
- */
- if ((ifp->if_capenable & IFCAP_TOE4) == 0)
- REJECT_PASS_ACCEPT();
-
- /* Don't offload IPv6 connections. XXX: add IPv6 support */
- if (inc.inc_flags & INC_ISIPV6)
- REJECT_PASS_ACCEPT();
-
- /*
- * Don't offload if the SYN had a VLAN tag and the vid doesn't match
- * anything on this interface.
+ * Figure out if there is a pseudo interface (vlan, lagg, etc.)
+ * involved. Don't offload if the SYN had a VLAN tag and the vid
+ * doesn't match anything on this interface.
+ *
+ * XXX: lagg support, lagg + vlan support.
*/
vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
if (vid != 0xfff) {
- ifp_vlan = VLAN_DEVAT(ifp, vid);
- if (ifp_vlan == NULL)
+ ifp = VLAN_DEVAT(hw_ifp, vid);
+ if (ifp == NULL)
REJECT_PASS_ACCEPT();
- }
+ } else
+ ifp = hw_ifp;
/*
* Don't offload if the peer requested a TCP option that's not known to
@@ -1050,31 +1247,36 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
if (cpl->tcpopt.unknown)
REJECT_PASS_ACCEPT();
- /*
- * Don't offload if the outgoing interface for the route back to the
- * peer is not the same as the interface that received the SYN.
- * XXX: too restrictive.
- */
- nam.sin_len = sizeof(nam);
- nam.sin_family = AF_INET;
- nam.sin_addr = inc.inc_faddr;
- rt = rtalloc1((struct sockaddr *)&nam, 0, 0);
- if (rt == NULL)
- REJECT_PASS_ACCEPT();
- else {
- struct sockaddr *nexthop;
+ if (inc.inc_flags & INC_ISIPV6) {
- RT_UNLOCK(rt);
- nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway :
- (struct sockaddr *)&nam;
- if (rt->rt_ifp == ifp ||
- (ifp_vlan != NULL && rt->rt_ifp == ifp_vlan))
- e = t4_l2t_get(pi, rt->rt_ifp, nexthop);
- RTFREE(rt);
- if (e == NULL)
- REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */
+ /* Don't offload if the ifcap isn't enabled */
+ if ((ifp->if_capenable & IFCAP_TOE6) == 0)
+ REJECT_PASS_ACCEPT();
+
+ /*
+ * SYN must be directed to an IP6 address on this ifnet. This
+ * is more restrictive than in6_localip.
+ */
+ if (!ifnet_has_ip6(ifp, &inc.inc6_laddr))
+ REJECT_PASS_ACCEPT();
+ } else {
+
+ /* Don't offload if the ifcap isn't enabled */
+ if ((ifp->if_capenable & IFCAP_TOE4) == 0)
+ REJECT_PASS_ACCEPT();
+
+ /*
+ * SYN must be directed to an IP address on this ifnet. This
+ * is more restrictive than in_localip.
+ */
+ if (!ifnet_has_ip(ifp, inc.inc_laddr))
+ REJECT_PASS_ACCEPT();
}
+ e = get_l2te_for_nexthop(pi, ifp, &inc);
+ if (e == NULL)
+ REJECT_PASS_ACCEPT();
+
synqe = mbuf_to_synqe(m);
if (synqe == NULL)
REJECT_PASS_ACCEPT();
@@ -1133,7 +1335,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
synqe->lctx = lctx;
synqe->syn = m;
m = NULL;
- refcount_init(&synqe->refcnt, 0);
+ refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */
synqe->l2e_idx = e->idx;
synqe->rcv_bufsize = rx_credits;
atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
@@ -1166,7 +1368,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
*/
m = m_dup(synqe->syn, M_NOWAIT);
if (m)
- m->m_pkthdr.rcvif = ifp;
+ m->m_pkthdr.rcvif = hw_ifp;
remove_tid(sc, synqe->tid);
free(wr, M_CXGBE);
@@ -1179,6 +1381,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
if (inp)
INP_WUNLOCK(inp);
+ release_synqe(synqe); /* extra hold */
REJECT_PASS_ACCEPT();
}
@@ -1193,15 +1396,19 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
* this tid because there was no L2T entry for the tid at that
* time. Abort it now. The reply to the abort will clean up.
*/
- CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, ABORT",
- __func__, stid, tid, lctx, synqe);
- send_reset_synqe(tod, synqe);
+ CTR6(KTR_CXGBE,
+ "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT",
+ __func__, stid, tid, lctx, synqe, synqe->flags);
+ if (!(synqe->flags & TPF_SYNQE_EXPANDED))
+ send_reset_synqe(tod, synqe);
INP_WUNLOCK(inp);
+ release_synqe(synqe); /* extra hold */
return (__LINE__);
}
INP_WUNLOCK(inp);
+ release_synqe(synqe); /* extra hold */
return (0);
reject:
CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
@@ -1216,7 +1423,7 @@ reject:
m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
m->m_pkthdr.csum_data = 0xffff;
- ifp->if_input(ifp, m);
+ hw_ifp->if_input(hw_ifp, m);
}
return (reject_reason);
diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c
index 330172d..64e8b26 100644
--- a/sys/dev/cxgbe/tom/t4_tom.c
+++ b/sys/dev/cxgbe/tom/t4_tom.c
@@ -29,6 +29,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/types.h>
@@ -40,10 +41,14 @@ __FBSDID("$FreeBSD$");
#include <sys/domain.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
+#include <net/if.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/ip6.h>
#include <netinet/tcp_var.h>
+#include <netinet6/scope6_var.h>
#define TCPSTATES
#include <netinet/tcp_fsm.h>
#include <netinet/toecore.h>
@@ -58,6 +63,9 @@ __FBSDID("$FreeBSD$");
static struct protosw ddp_protosw;
static struct pr_usrreqs ddp_usrreqs;
+static struct protosw ddp6_protosw;
+static struct pr_usrreqs ddp6_usrreqs;
+
/* Module ops */
static int t4_tom_mod_load(void);
static int t4_tom_mod_unload(void);
@@ -77,6 +85,11 @@ static void queue_tid_release(struct adapter *, int);
static void release_offload_resources(struct toepcb *);
static int alloc_tid_tabs(struct tid_info *);
static void free_tid_tabs(struct tid_info *);
+static int add_lip(struct adapter *, struct in6_addr *);
+static int delete_lip(struct adapter *, struct in6_addr *);
+static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *);
+static void init_clip_table(struct adapter *, struct tom_data *);
+static void destroy_clip_table(struct adapter *, struct tom_data *);
static void free_tom_data(struct adapter *, struct tom_data *);
struct toepcb *
@@ -170,8 +183,12 @@ offload_socket(struct socket *so, struct toepcb *toep)
sb = &so->so_rcv;
SOCKBUF_LOCK(sb);
sb->sb_flags |= SB_NOCOALESCE;
- if (toep->ulp_mode == ULP_MODE_TCPDDP)
- so->so_proto = &ddp_protosw;
+ if (toep->ulp_mode == ULP_MODE_TCPDDP) {
+ if (inp->inp_vflag & INP_IPV6)
+ so->so_proto = &ddp6_protosw;
+ else
+ so->so_proto = &ddp_protosw;
+ }
SOCKBUF_UNLOCK(sb);
/* Update TCP PCB */
@@ -237,8 +254,8 @@ release_offload_resources(struct toepcb *toep)
KASSERT(!(toep->flags & TPF_ATTACHED),
("%s: %p is still attached.", __func__, toep));
- CTR4(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p)",
- __func__, toep, tid, toep->l2te);
+ CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)",
+ __func__, toep, tid, toep->l2te, toep->ce);
if (toep->ulp_mode == ULP_MODE_TCPDDP)
release_ddp_resources(toep);
@@ -251,6 +268,9 @@ release_offload_resources(struct toepcb *toep)
release_tid(sc, tid, toep->ctrlq);
}
+ if (toep->ce)
+ release_lip(td, toep->ce);
+
mtx_lock(&td->toep_list_lock);
TAILQ_REMOVE(&td->toep_list, toep, link);
mtx_unlock(&td->toep_list_lock);
@@ -394,7 +414,7 @@ int
find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
{
unsigned short *mtus = &sc->params.mtus[0];
- int i = 0, mss;
+ int i, mss, n;
KASSERT(inc != NULL || pmss > 0,
("%s: at least one of inc/pmss must be specified", __func__));
@@ -403,8 +423,13 @@ find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss)
if (pmss > 0 && mss > pmss)
mss = pmss;
- while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40)
- ++i;
+ if (inc->inc_flags & INC_ISIPV6)
+ n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
+ else
+ n = sizeof(struct ip) + sizeof(struct tcphdr);
+
+ for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++)
+ continue;
return (i);
}
@@ -513,6 +538,24 @@ select_ntuple(struct port_info *pi, struct l2t_entry *e, uint32_t filter_mode)
return (htobe32(ntuple));
}
+void
+set_tcpddp_ulp_mode(struct toepcb *toep)
+{
+
+ toep->ulp_mode = ULP_MODE_TCPDDP;
+ toep->ddp_flags = DDP_OK;
+ toep->ddp_score = DDP_LOW_SCORE;
+}
+
+int
+negative_advice(int status)
+{
+
+ return (status == CPL_ERR_RTX_NEG_ADVICE ||
+ status == CPL_ERR_PERSIST_NEG_ADVICE ||
+ status == CPL_ERR_KEEPALV_NEG_ADVICE);
+}
+
static int
alloc_tid_tabs(struct tid_info *t)
{
@@ -536,12 +579,10 @@ alloc_tid_tabs(struct tid_info *t)
t->atid_tab[t->natids - 1].next = NULL;
mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
- t->stid_tab = (union serv_entry *)&t->atid_tab[t->natids];
- t->sfree = t->stid_tab;
+ t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids];
t->stids_in_use = 0;
- for (i = 1; i < t->nstids; i++)
- t->stid_tab[i - 1].next = &t->stid_tab[i];
- t->stid_tab[t->nstids - 1].next = NULL;
+ TAILQ_INIT(&t->stids);
+ t->nstids_free_head = t->nstids;
atomic_store_rel_int(&t->tids_in_use, 0);
@@ -567,9 +608,157 @@ free_tid_tabs(struct tid_info *t)
mtx_destroy(&t->stid_lock);
}
+static int
+add_lip(struct adapter *sc, struct in6_addr *lip)
+{
+ struct fw_clip_cmd c;
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+ /* mtx_assert(&td->clip_table_lock, MA_OWNED); */
+
+ memset(&c, 0, sizeof(c));
+ c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST |
+ F_FW_CMD_WRITE);
+ c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c));
+ c.ip_hi = *(uint64_t *)&lip->s6_addr[0];
+ c.ip_lo = *(uint64_t *)&lip->s6_addr[8];
+
+ return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c));
+}
+
+static int
+delete_lip(struct adapter *sc, struct in6_addr *lip)
+{
+ struct fw_clip_cmd c;
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+ /* mtx_assert(&td->clip_table_lock, MA_OWNED); */
+
+ memset(&c, 0, sizeof(c));
+ c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST |
+ F_FW_CMD_READ);
+ c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c));
+ c.ip_hi = *(uint64_t *)&lip->s6_addr[0];
+ c.ip_lo = *(uint64_t *)&lip->s6_addr[8];
+
+ return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c));
+}
+
+static struct clip_entry *
+search_lip(struct tom_data *td, struct in6_addr *lip)
+{
+ struct clip_entry *ce;
+
+ mtx_assert(&td->clip_table_lock, MA_OWNED);
+
+ TAILQ_FOREACH(ce, &td->clip_table, link) {
+ if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
+ return (ce);
+ }
+
+ return (NULL);
+}
+
+struct clip_entry *
+hold_lip(struct tom_data *td, struct in6_addr *lip)
+{
+ struct clip_entry *ce;
+
+ mtx_lock(&td->clip_table_lock);
+ ce = search_lip(td, lip);
+ if (ce != NULL)
+ ce->refcount++;
+ mtx_unlock(&td->clip_table_lock);
+
+ return (ce);
+}
+
+void
+release_lip(struct tom_data *td, struct clip_entry *ce)
+{
+
+ mtx_lock(&td->clip_table_lock);
+ KASSERT(search_lip(td, &ce->lip) == ce,
+ ("%s: CLIP entry %p p not in CLIP table.", __func__, ce));
+ KASSERT(ce->refcount > 0,
+ ("%s: CLIP entry %p has refcount 0", __func__, ce));
+ --ce->refcount;
+ mtx_unlock(&td->clip_table_lock);
+}
+
+static void
+init_clip_table(struct adapter *sc, struct tom_data *td)
+{
+ struct in6_ifaddr *ia;
+ struct in6_addr *lip, tlip;
+ struct clip_entry *ce;
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+
+ mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF);
+ TAILQ_INIT(&td->clip_table);
+
+ IN6_IFADDR_RLOCK();
+ TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+ lip = &ia->ia_addr.sin6_addr;
+
+ KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
+ ("%s: mcast address in in6_ifaddr list", __func__));
+
+ if (IN6_IS_ADDR_LOOPBACK(lip))
+ continue;
+ if (IN6_IS_SCOPE_EMBED(lip)) {
+ /* Remove the embedded scope */
+ tlip = *lip;
+ lip = &tlip;
+ in6_clearscope(lip);
+ }
+ /*
+ * XXX: how to weed out the link local address for the loopback
+ * interface? It's fe80::1 usually (always?).
+ */
+
+ mtx_lock(&td->clip_table_lock);
+ if (search_lip(td, lip) == NULL) {
+ ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
+ memcpy(&ce->lip, lip, sizeof(ce->lip));
+ ce->refcount = 0;
+ if (add_lip(sc, lip) == 0)
+ TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
+ else
+ free(ce, M_CXGBE);
+ }
+ mtx_unlock(&td->clip_table_lock);
+ }
+ IN6_IFADDR_RUNLOCK();
+}
+
+static void
+destroy_clip_table(struct adapter *sc, struct tom_data *td)
+{
+ struct clip_entry *ce, *ce_temp;
+
+ if (mtx_initialized(&td->clip_table_lock)) {
+ mtx_lock(&td->clip_table_lock);
+ TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) {
+ KASSERT(ce->refcount == 0,
+ ("%s: CLIP entry %p still in use (%d)", __func__,
+ ce, ce->refcount));
+ TAILQ_REMOVE(&td->clip_table, ce, link);
+ delete_lip(sc, &ce->lip);
+ free(ce, M_CXGBE);
+ }
+ mtx_unlock(&td->clip_table_lock);
+ mtx_destroy(&td->clip_table_lock);
+ }
+}
+
static void
free_tom_data(struct adapter *sc, struct tom_data *td)
{
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+
KASSERT(TAILQ_EMPTY(&td->toep_list),
("%s: TOE PCB list is not empty.", __func__));
KASSERT(td->lctx_count == 0,
@@ -578,6 +767,7 @@ free_tom_data(struct adapter *sc, struct tom_data *td)
t4_uninit_l2t_cpl_handlers(sc);
t4_uninit_cpl_io_handlers(sc);
t4_uninit_ddp(sc, td);
+ destroy_clip_table(sc, td);
if (td->listen_mask != 0)
hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask);
@@ -602,7 +792,7 @@ t4_tom_activate(struct adapter *sc)
struct toedev *tod;
int i, rc;
- ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */
+ ASSERT_SYNCHRONIZED_OP(sc);
/* per-adapter softc for TOM */
td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT);
@@ -623,8 +813,12 @@ t4_tom_activate(struct adapter *sc)
if (rc != 0)
goto done;
+ /* DDP page pods and CPL handlers */
t4_init_ddp(sc, td);
+ /* CLIP table for IPv6 offload */
+ init_clip_table(sc, td);
+
/* CPL handlers */
t4_init_connect_cpl_handlers(sc);
t4_init_l2t_cpl_handlers(sc);
@@ -668,7 +862,7 @@ t4_tom_deactivate(struct adapter *sc)
int rc = 0;
struct tom_data *td = sc->tom_softc;
- ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */
+ ASSERT_SYNCHRONIZED_OP(sc);
if (td == NULL)
return (0); /* XXX. KASSERT? */
@@ -700,17 +894,24 @@ static int
t4_tom_mod_load(void)
{
int rc;
- struct protosw *tcp_protosw;
+ struct protosw *tcp_protosw, *tcp6_protosw;
tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM);
if (tcp_protosw == NULL)
return (ENOPROTOOPT);
-
bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw));
bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs));
ddp_usrreqs.pru_soreceive = t4_soreceive_ddp;
ddp_protosw.pr_usrreqs = &ddp_usrreqs;
+ tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM);
+ if (tcp6_protosw == NULL)
+ return (ENOPROTOOPT);
+ bcopy(tcp6_protosw, &ddp6_protosw, sizeof(ddp6_protosw));
+ bcopy(tcp6_protosw->pr_usrreqs, &ddp6_usrreqs, sizeof(ddp6_usrreqs));
+ ddp6_usrreqs.pru_soreceive = t4_soreceive_ddp;
+ ddp6_protosw.pr_usrreqs = &ddp6_usrreqs;
+
rc = t4_register_uld(&tom_uld_info);
if (rc != 0)
t4_tom_mod_unload();
@@ -721,11 +922,14 @@ t4_tom_mod_load(void)
static void
tom_uninit(struct adapter *sc, void *arg __unused)
{
+ if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomun"))
+ return;
+
/* Try to free resources (works only if no port has IFCAP_TOE) */
- ADAPTER_LOCK(sc);
if (sc->flags & TOM_INIT_DONE)
t4_deactivate_uld(sc, ULD_TOM);
- ADAPTER_UNLOCK(sc);
+
+ end_synchronized_op(sc, LOCK_HELD);
}
static int
diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h
index 9549b0b..d0fbbd2 100644
--- a/sys/dev/cxgbe/tom/t4_tom.h
+++ b/sys/dev/cxgbe/tom/t4_tom.h
@@ -109,6 +109,7 @@ struct toepcb {
struct sge_ofld_rxq *ofld_rxq;
struct sge_wrq *ctrlq;
struct l2t_entry *l2te; /* L2 table entry used by this connection */
+ struct clip_entry *ce; /* CLIP table entry used by this tid */
int tid; /* Connection identifier */
unsigned int tx_credits;/* tx WR credits (in 16 byte units) remaining */
unsigned int sb_cc; /* last noted value of so_rcv->sb_cc */
@@ -140,15 +141,6 @@ struct flowc_tx_params {
#define DDP_LOW_SCORE 1
#define DDP_HIGH_SCORE 3
-static inline void
-set_tcpddp_ulp_mode(struct toepcb *toep)
-{
-
- toep->ulp_mode = ULP_MODE_TCPDDP;
- toep->ddp_flags = DDP_OK;
- toep->ddp_score = DDP_LOW_SCORE;
-}
-
/*
* Compressed state for embryonic connections for a listener. Barely fits in
* 64B, try not to grow it further.
@@ -174,6 +166,7 @@ struct listen_ctx {
LIST_ENTRY(listen_ctx) link; /* listen hash linkage */
volatile int refcount;
int stid;
+ struct stid_region stid_region;
int flags;
struct inpcb *inp; /* listening socket's inp */
struct sge_wrq *ctrlq;
@@ -183,6 +176,12 @@ struct listen_ctx {
TAILQ_HEAD(ppod_head, ppod_region);
+struct clip_entry {
+ TAILQ_ENTRY(clip_entry) link;
+ struct in6_addr lip; /* local IPv6 address */
+ u_int refcount;
+};
+
struct tom_data {
struct toedev tod;
@@ -200,6 +199,9 @@ struct tom_data {
int nppods_free; /* # of available ppods */
int nppods_free_head; /* # of available ppods at the begining */
struct ppod_head ppods;
+
+ struct mtx clip_table_lock;
+ TAILQ_HEAD(, clip_entry) clip_table;
};
static inline struct tom_data *
@@ -233,6 +235,10 @@ int select_rcv_wscale(void);
uint64_t calc_opt0(struct socket *, struct port_info *, struct l2t_entry *,
int, int, int, int);
uint32_t select_ntuple(struct port_info *, struct l2t_entry *, uint32_t);
+void set_tcpddp_ulp_mode(struct toepcb *);
+int negative_advice(int);
+struct clip_entry *hold_lip(struct tom_data *, struct in6_addr *);
+void release_lip(struct tom_data *, struct clip_entry *);
/* t4_connect.c */
void t4_init_connect_cpl_handlers(struct adapter *);
diff --git a/sys/dev/cxgbe/tom/t4_tom_l2t.c b/sys/dev/cxgbe/tom/t4_tom_l2t.c
index ffe64c5..7a75394 100644
--- a/sys/dev/cxgbe/tom/t4_tom_l2t.c
+++ b/sys/dev/cxgbe/tom/t4_tom_l2t.c
@@ -27,6 +27,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
+#include "opt_inet6.h"
#ifdef TCP_OFFLOAD
#include <sys/param.h>
@@ -34,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/bus.h>
+#include <sys/fnv_hash.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/rwlock.h>
@@ -48,28 +50,89 @@ __FBSDID("$FreeBSD$");
#include <netinet/toecore.h>
#include "common/common.h"
-#include "common/jhash.h"
#include "common/t4_msg.h"
#include "tom/t4_tom_l2t.h"
#include "tom/t4_tom.h"
#define VLAN_NONE 0xfff
-#define SA(x) ((struct sockaddr *)(x))
-#define SIN(x) ((struct sockaddr_in *)(x))
-#define SINADDR(x) (SIN(x)->sin_addr.s_addr)
-
static inline void
l2t_hold(struct l2t_data *d, struct l2t_entry *e)
{
+
if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */
atomic_subtract_int(&d->nfree, 1);
}
-static inline unsigned int
-arp_hash(const uint32_t key, int ifindex)
+static inline u_int
+l2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex)
{
- return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1);
+ u_int hash, half = d->l2t_size / 2, start = 0;
+ const void *key;
+ size_t len;
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: sa %p has unexpected sa_family %d", __func__, sa,
+ sa->sa_family));
+
+ if (sa->sa_family == AF_INET) {
+ const struct sockaddr_in *sin = (const void *)sa;
+
+ key = &sin->sin_addr;
+ len = sizeof(sin->sin_addr);
+ } else {
+ const struct sockaddr_in6 *sin6 = (const void *)sa;
+
+ key = &sin6->sin6_addr;
+ len = sizeof(sin6->sin6_addr);
+ start = half;
+ }
+
+ hash = fnv_32_buf(key, len, FNV1_32_INIT);
+ hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash);
+ hash %= half;
+
+ return (hash + start);
+}
+
+static inline int
+l2_cmp(const struct sockaddr *sa, struct l2t_entry *e)
+{
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: sa %p has unexpected sa_family %d", __func__, sa,
+ sa->sa_family));
+
+ if (sa->sa_family == AF_INET) {
+ const struct sockaddr_in *sin = (const void *)sa;
+
+ return (e->addr[0] != sin->sin_addr.s_addr);
+ } else {
+ const struct sockaddr_in6 *sin6 = (const void *)sa;
+
+ return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)));
+ }
+}
+
+static inline void
+l2_store(const struct sockaddr *sa, struct l2t_entry *e)
+{
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: sa %p has unexpected sa_family %d", __func__, sa,
+ sa->sa_family));
+
+ if (sa->sa_family == AF_INET) {
+ const struct sockaddr_in *sin = (const void *)sa;
+
+ e->addr[0] = sin->sin_addr.s_addr;
+ e->ipv6 = 0;
+ } else {
+ const struct sockaddr_in6 *sin6 = (const void *)sa;
+
+ memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr));
+ e->ipv6 = 1;
+ }
}
/*
@@ -100,7 +163,7 @@ send_pending(struct adapter *sc, struct l2t_entry *e)
static void
resolution_failed_for_wr(struct wrqe *wr)
{
- log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr,
+ log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr,
wr->wr_len);
/* free(wr, M_CXGBE); */
@@ -175,15 +238,25 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e)
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
+ struct sockaddr_in6 sin6 = {0};
+ struct sockaddr *sa;
uint8_t dmac[ETHER_ADDR_LEN];
uint16_t vtag = VLAN_NONE;
int rc;
- sin.sin_family = AF_INET;
- sin.sin_len = sizeof(struct sockaddr_in);
- SINADDR(&sin) = e->addr;
+ if (e->ipv6 == 0) {
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(struct sockaddr_in);
+ sin.sin_addr.s_addr = e->addr[0];
+ sa = (void *)&sin;
+ } else {
+ sin6.sin6_family = AF_INET6;
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr));
+ sa = (void *)&sin6;
+ }
- rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag);
+ rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag);
if (rc == EWOULDBLOCK)
return (rc);
@@ -263,7 +336,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
struct adapter *sc = iq->adapter;
const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1);
unsigned int tid = GET_TID(rpl);
- unsigned int idx = tid & (L2T_SIZE - 1);
+ unsigned int idx = tid % L2T_SIZE;
int rc;
rc = do_l2t_write_rpl(iq, rss, m);
@@ -271,7 +344,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss,
return (rc);
if (tid & F_SYNC_WR) {
- struct l2t_entry *e = &sc->l2t->l2tab[idx];
+ struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start];
mtx_lock(&e->lock);
if (e->state != L2T_STATE_SWITCHING) {
@@ -310,21 +383,22 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
{
struct l2t_entry *e;
struct l2t_data *d = pi->adapter->l2t;
- uint32_t addr = SINADDR(sa);
- int hash = arp_hash(addr, ifp->if_index);
- unsigned int smt_idx = pi->port_id;
+ u_int hash, smt_idx = pi->port_id;
- if (sa->sa_family != AF_INET)
- return (NULL); /* XXX: no IPv6 support right now */
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: sa %p has unexpected sa_family %d", __func__, sa,
+ sa->sa_family));
#ifndef VLAN_TAG
if (ifp->if_type == IFT_L2VLAN)
return (NULL);
#endif
+ hash = l2_hash(d, sa, ifp->if_index);
rw_wlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
- if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) {
+ if (l2_cmp(sa, e) == 0 && e->ifp == ifp &&
+ e->smt_idx == smt_idx) {
l2t_hold(d, e);
goto done;
}
@@ -338,7 +412,7 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa)
d->l2tab[hash].first = e;
e->state = L2T_STATE_RESOLVING;
- e->addr = addr;
+ l2_store(sa, e);
e->ifp = ifp;
e->smt_idx = smt_idx;
e->hash = hash;
@@ -368,14 +442,14 @@ t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
struct adapter *sc = tod->tod_softc;
struct l2t_entry *e;
struct l2t_data *d = sc->l2t;
- uint32_t addr = SINADDR(sa);
- int hash = arp_hash(addr, ifp->if_index);
+ u_int hash;
KASSERT(d != NULL, ("%s: no L2 table", __func__));
+ hash = l2_hash(d, sa, ifp->if_index);
rw_rlock(&d->lock);
for (e = d->l2tab[hash].first; e; e = e->next) {
- if (e->addr == addr && e->ifp == ifp) {
+ if (l2_cmp(sa, e) == 0 && e->ifp == ifp) {
mtx_lock(&e->lock);
if (atomic_load_acq_int(&e->refcnt))
goto found;
OpenPOWER on IntegriCloud