diff options
author | attilio <attilio@FreeBSD.org> | 2013-02-03 20:13:33 +0000 |
---|---|---|
committer | attilio <attilio@FreeBSD.org> | 2013-02-03 20:13:33 +0000 |
commit | 0d3b58aee00948d85d75a9d3d222deb454afc98e (patch) | |
tree | 865d112b57519913a8de64b2d9ca8787633c95a2 /sys/dev/cxgbe/tom | |
parent | 561dd1163dbb481d204da7a526739ac6e43d08f2 (diff) | |
parent | 2d2c37fb592dfc24f15e4bf14c2f109b5d4b5a83 (diff) | |
download | FreeBSD-src-0d3b58aee00948d85d75a9d3d222deb454afc98e.zip FreeBSD-src-0d3b58aee00948d85d75a9d3d222deb454afc98e.tar.gz |
MFC
Diffstat (limited to 'sys/dev/cxgbe/tom')
-rw-r--r-- | sys/dev/cxgbe/tom/t4_connect.c | 127 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_cpl_io.c | 3 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_listen.c | 409 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_tom.c | 240 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_tom.h | 24 | ||||
-rw-r--r-- | sys/dev/cxgbe/tom/t4_tom_l2t.c | 124 |
6 files changed, 729 insertions, 198 deletions
diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index 8d36b1e..17ed1d3 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #ifdef TCP_OFFLOAD #include <sys/param.h> @@ -195,7 +196,7 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status); /* Ignore negative advice */ - if (status == CPL_ERR_RTX_NEG_ADVICE) + if (negative_advice(status)) return (0); free_atid(sc, atid); @@ -220,10 +221,9 @@ do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, * Options2 for active open. */ static uint32_t -calc_opt2a(struct socket *so) +calc_opt2a(struct socket *so, struct toepcb *toep) { struct tcpcb *tp = so_sototcpcb(so); - struct toepcb *toep = tp->t_toe; struct port_info *pi = toep->port; struct adapter *sc = pi->adapter; uint32_t opt2 = 0; @@ -260,6 +260,12 @@ t4_init_connect_cpl_handlers(struct adapter *sc) t4_register_cpl_handler(sc, CPL_ACT_OPEN_RPL, do_act_open_rpl); } +#define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \ + reason = __LINE__; \ + rc = (x); \ + goto failed; \ +} while (0) + /* * active open (soconnect). * @@ -275,20 +281,19 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, struct sockaddr *nam) { struct adapter *sc = tod->tod_softc; + struct tom_data *td = tod_td(tod); struct toepcb *toep = NULL; struct wrqe *wr = NULL; - struct cpl_act_open_req *cpl; - struct l2t_entry *e = NULL; struct ifnet *rt_ifp = rt->rt_ifp; struct port_info *pi; - int atid = -1, mtu_idx, rscale, qid_atid, rc = ENOMEM; + int mtu_idx, rscale, qid_atid, rc, isipv6; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); + int reason; INP_WLOCK_ASSERT(inp); - - if (nam->sa_family != AF_INET) - CXGBE_UNIMPLEMENTED("IPv6 connect"); + KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, + ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family)); if (rt_ifp->if_type == IFT_ETHER) pi = rt_ifp->if_softc; @@ -297,30 +302,29 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, pi = ifp->if_softc; } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) - return (ENOSYS); /* XXX: implement lagg support */ + DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else - return (ENOTSUP); + DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); toep = alloc_toepcb(pi, -1, -1, M_NOWAIT); if (toep == NULL) - goto failed; + DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - atid = alloc_atid(sc, toep); - if (atid < 0) - goto failed; + toep->tid = alloc_atid(sc, toep); + if (toep->tid < 0) + DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - e = t4_l2t_get(pi, rt_ifp, + toep->l2te = t4_l2t_get(pi, rt_ifp, rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); - if (e == NULL) - goto failed; + if (toep->l2te == NULL) + DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - wr = alloc_wrqe(sizeof(*cpl), toep->ctrlq); + isipv6 = nam->sa_family == AF_INET6; + wr = alloc_wrqe(isipv6 ? sizeof(struct cpl_act_open_req6) : + sizeof(struct cpl_act_open_req), toep->ctrlq); if (wr == NULL) - goto failed; - cpl = wrtod(wr); + DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); - toep->tid = atid; - toep->l2te = e; if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) set_tcpddp_ulp_mode(toep); else @@ -330,8 +334,6 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ); SOCKBUF_UNLOCK(&so->so_rcv); - offload_socket(so, toep); - /* * The kernel sets request_r_scale based on sb_max whereas we need to * take hardware's MAX_RCV_WND into account too. This is normally a @@ -342,39 +344,78 @@ t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, else rscale = 0; mtu_idx = find_best_mtu_idx(sc, &inp->inp_inc, 0); - qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | atid; - - INIT_TP_WR(cpl, 0); - OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); - inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, - &cpl->peer_port); - cpl->opt0 = calc_opt0(so, pi, e, mtu_idx, rscale, toep->rx_credits, - toep->ulp_mode); - cpl->params = select_ntuple(pi, e, sc->filter_mode); - cpl->opt2 = calc_opt2a(so); + qid_atid = (toep->ofld_rxq->iq.abs_id << 14) | toep->tid; + + if (isipv6) { + struct cpl_act_open_req6 *cpl = wrtod(wr); + + if ((inp->inp_vflag & INP_IPV6) == 0) { + /* XXX think about this a bit more */ + log(LOG_ERR, + "%s: time to think about AF_INET6 + vflag 0x%x.\n", + __func__, inp->inp_vflag); + DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); + } + + toep->ce = hold_lip(td, &inp->in6p_laddr); + if (toep->ce == NULL) + DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); + + INIT_TP_WR(cpl, 0); + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + qid_atid)); + + cpl->local_port = inp->inp_lport; + cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; + cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; + cpl->peer_port = inp->inp_fport; + cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; + cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; + cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, + toep->rx_credits, toep->ulp_mode); + cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode); + cpl->opt2 = calc_opt2a(so, toep); + } else { + struct cpl_act_open_req *cpl = wrtod(wr); + + INIT_TP_WR(cpl, 0); + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + qid_atid)); + inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, + &cpl->peer_ip, &cpl->peer_port); + cpl->opt0 = calc_opt0(so, pi, toep->l2te, mtu_idx, rscale, + toep->rx_credits, toep->ulp_mode); + cpl->params = select_ntuple(pi, toep->l2te, sc->filter_mode); + cpl->opt2 = calc_opt2a(so, toep); + } CTR5(KTR_CXGBE, "%s: atid %u (%s), toep %p, inp %p", __func__, toep->tid, tcpstates[tp->t_state], toep, inp); - rc = t4_l2t_send(sc, wr, e); + offload_socket(so, toep); + rc = t4_l2t_send(sc, wr, toep->l2te); if (rc == 0) { toep->flags |= TPF_CPL_PENDING; return (0); } undo_offload_socket(so); + reason = __LINE__; failed: - CTR5(KTR_CXGBE, "%s: FAILED, atid %d, toep %p, l2te %p, wr %p", - __func__, atid, toep, e, wr); + CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc); - if (e) - t4_l2t_release(e); if (wr) free_wrqe(wr); - if (atid >= 0) - free_atid(sc, atid); - if (toep) + + if (toep) { + if (toep->tid >= 0) + free_atid(sc, toep->tid); + if (toep->l2te) + t4_l2t_release(toep->l2te); + if (toep->ce) + release_lip(td, toep->ce); free_toepcb(toep); + } return (rc); } diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c index 6ae1ec4..9aead9f 100644 --- a/sys/dev/cxgbe/tom/t4_cpl_io.c +++ b/sys/dev/cxgbe/tom/t4_cpl_io.c @@ -1018,8 +1018,7 @@ do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); - if (cpl->status == CPL_ERR_RTX_NEG_ADVICE || - cpl->status == CPL_ERR_PERSIST_NEG_ADVICE) { + if (negative_advice(cpl->status)) { CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", __func__, cpl->status, tid, toep->flags); return (0); /* Ignore negative advice */ diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c index 523f7f3..b80702d 100644 --- a/sys/dev/cxgbe/tom/t4_listen.c +++ b/sys/dev/cxgbe/tom/t4_listen.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #ifdef TCP_OFFLOAD #include <sys/param.h> @@ -50,6 +51,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_pcb.h> #include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet6/scope6_var.h> #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #define TCPSTATES @@ -63,9 +66,9 @@ __FBSDID("$FreeBSD$"); #include "tom/t4_tom.h" /* stid services */ -static int alloc_stid(struct adapter *, void *); -static void *lookup_stid(struct adapter *, int); -static void free_stid(struct adapter *, int); +static int alloc_stid(struct adapter *, struct listen_ctx *, int); +static struct listen_ctx *lookup_stid(struct adapter *, int); +static void free_stid(struct adapter *, struct listen_ctx *); /* lctx services */ static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *, @@ -81,45 +84,105 @@ static inline void save_qids_in_mbuf(struct mbuf *, struct port_info *); static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *); static void send_reset_synqe(struct toedev *, struct synq_entry *); -/* XXX: won't work for IPv6 */ static int -alloc_stid(struct adapter *sc, void *ctx) +alloc_stid(struct adapter *sc, struct listen_ctx *lctx, int isipv6) { struct tid_info *t = &sc->tids; - int stid = -1; + u_int stid, n, f, mask; + struct stid_region *sr = &lctx->stid_region; + + /* + * An IPv6 server needs 2 naturally aligned stids (1 stid = 4 cells) in + * the TCAM. The start of the stid region is properly aligned (the chip + * requires each region to be 128-cell aligned). + */ + n = isipv6 ? 2 : 1; + mask = n - 1; + KASSERT((t->stid_base & mask) == 0 && (t->nstids & mask) == 0, + ("%s: stid region (%u, %u) not properly aligned. n = %u", + __func__, t->stid_base, t->nstids, n)); mtx_lock(&t->stid_lock); - if (t->sfree) { - union serv_entry *p = t->sfree; - - stid = p - t->stid_tab; - stid += t->stid_base; - t->sfree = p->next; - p->data = ctx; - t->stids_in_use++; + if (n > t->nstids - t->stids_in_use) { + mtx_unlock(&t->stid_lock); + return (-1); } + + if (t->nstids_free_head >= n) { + /* + * This allocation will definitely succeed because the region + * starts at a good alignment and we just checked we have enough + * stids free. + */ + f = t->nstids_free_head & mask; + t->nstids_free_head -= n + f; + stid = t->nstids_free_head; + TAILQ_INSERT_HEAD(&t->stids, sr, link); + } else { + struct stid_region *s; + + stid = t->nstids_free_head; + TAILQ_FOREACH(s, &t->stids, link) { + stid += s->used + s->free; + f = stid & mask; + if (n <= s->free - f) { + stid -= n + f; + s->free -= n + f; + TAILQ_INSERT_AFTER(&t->stids, s, sr, link); + goto allocated; + } + } + + if (__predict_false(stid != t->nstids)) { + panic("%s: stids TAILQ (%p) corrupt." + " At %d instead of %d at the end of the queue.", + __func__, &t->stids, stid, t->nstids); + } + + mtx_unlock(&t->stid_lock); + return (-1); + } + +allocated: + sr->used = n; + sr->free = f; + t->stids_in_use += n; + t->stid_tab[stid] = lctx; mtx_unlock(&t->stid_lock); - return (stid); + + KASSERT(((stid + t->stid_base) & mask) == 0, + ("%s: EDOOFUS.", __func__)); + return (stid + t->stid_base); } -static void * +static struct listen_ctx * lookup_stid(struct adapter *sc, int stid) { struct tid_info *t = &sc->tids; - return (t->stid_tab[stid - t->stid_base].data); + return (t->stid_tab[stid - t->stid_base]); } static void -free_stid(struct adapter *sc, int stid) +free_stid(struct adapter *sc, struct listen_ctx *lctx) { struct tid_info *t = &sc->tids; - union serv_entry *p = &t->stid_tab[stid - t->stid_base]; + struct stid_region *sr = &lctx->stid_region; + struct stid_region *s; + + KASSERT(sr->used > 0, ("%s: nonsense free (%d)", __func__, sr->used)); mtx_lock(&t->stid_lock); - p->next = t->sfree; - t->sfree = p; - t->stids_in_use--; + s = TAILQ_PREV(sr, stid_head, link); + if (s != NULL) + s->free += sr->used + sr->free; + else + t->nstids_free_head += sr->used + sr->free; + KASSERT(t->stids_in_use >= sr->used, + ("%s: stids_in_use (%u) < stids being freed (%u)", __func__, + t->stids_in_use, sr->used)); + t->stids_in_use -= sr->used; + TAILQ_REMOVE(&t->stids, sr, link); mtx_unlock(&t->stid_lock); } @@ -134,7 +197,7 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, struct port_info *pi) if (lctx == NULL) return (NULL); - lctx->stid = alloc_stid(sc, lctx); + lctx->stid = alloc_stid(sc, lctx, inp->inp_vflag & INP_IPV6); if (lctx->stid < 0) { free(lctx, M_CXGBE); return (NULL); @@ -167,7 +230,7 @@ free_lctx(struct adapter *sc, struct listen_ctx *lctx) CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p", __func__, lctx->stid, lctx, lctx->inp); - free_stid(sc, lctx->stid); + free_stid(sc, lctx); free(lctx, M_CXGBE); return (in_pcbrele_wlocked(inp)); @@ -339,7 +402,7 @@ create_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; struct cpl_pass_open_req *req; - struct in_conninfo *inc = &lctx->inp->inp_inc; + struct inpcb *inp = lctx->inp; wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); if (wr == NULL) { @@ -350,9 +413,9 @@ create_server(struct adapter *sc, struct listen_ctx *lctx) INIT_TP_WR(req, 0); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid)); - req->local_port = inc->inc_lport; + req->local_port = inp->inp_lport; req->peer_port = 0; - req->local_ip = inc->inc_laddr.s_addr; + req->local_ip = inp->inp_laddr.s_addr; req->peer_ip = 0; req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | @@ -363,6 +426,36 @@ create_server(struct adapter *sc, struct listen_ctx *lctx) } static int +create_server6(struct adapter *sc, struct listen_ctx *lctx) +{ + struct wrqe *wr; + struct cpl_pass_open_req6 *req; + struct inpcb *inp = lctx->inp; + + wr = alloc_wrqe(sizeof(*req), lctx->ctrlq); + if (wr == NULL) { + log(LOG_ERR, "%s: allocation failure", __func__); + return (ENOMEM); + } + req = wrtod(wr); + + INIT_TP_WR(req, 0); + OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid)); + req->local_port = inp->inp_lport; + req->peer_port = 0; + req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; + req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; + req->peer_ip_hi = 0; + req->peer_ip_lo = 0; + req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan)); + req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) | + F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id)); + + t4_wrq_tx(sc, wr); + return (0); +} + +static int destroy_server(struct adapter *sc, struct listen_ctx *lctx) { struct wrqe *wr; @@ -398,13 +491,10 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) struct port_info *pi; struct inpcb *inp = tp->t_inpcb; struct listen_ctx *lctx; - int i; + int i, rc; INP_WLOCK_ASSERT(inp); - if ((inp->inp_vflag & INP_IPV4) == 0) - return (0); - #if 0 ADAPTER_LOCK(sc); if (IS_BUSY(sc)) { @@ -421,8 +511,9 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) goto done; /* no port that's UP with IFCAP_TOE enabled */ /* - * Find a running port with IFCAP_TOE4. We'll use the first such port's - * queues to send the passive open and receive the reply to it. + * Find a running port with IFCAP_TOE (4 or 6). We'll use the first + * such port's queues to send the passive open and receive the reply to + * it. * * XXX: need a way to mark a port in use by offload. if_cxgbe should * then reject any attempt to bring down such a port (and maybe reject @@ -430,7 +521,7 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) */ for_each_port(sc, i) { if (isset(&sc->open_device_map, i) && - sc->port[i]->ifp->if_capenable & IFCAP_TOE4) + sc->port[i]->ifp->if_capenable & IFCAP_TOE) break; } KASSERT(i < sc->params.nports, @@ -449,12 +540,17 @@ t4_listen_start(struct toedev *tod, struct tcpcb *tp) } listen_hash_add(sc, lctx); - CTR5(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p", __func__, - lctx->stid, tcpstates[tp->t_state], lctx, inp); + CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x", + __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp, + inp->inp_vflag); - if (create_server(sc, lctx) != 0) { - log(LOG_ERR, "%s: %s failed to create hw listener.\n", __func__, - device_get_nameunit(sc->dev)); + if (inp->inp_vflag & INP_IPV6) + rc = create_server6(sc, lctx); + else + rc = create_server(sc, lctx); + if (rc != 0) { + log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n", + __func__, device_get_nameunit(sc->dev), rc); (void) listen_hash_del(sc, inp); inp = release_lctx(sc, lctx); /* can't be freed, host stack has a reference */ @@ -558,7 +654,7 @@ t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) struct l2t_entry *e; struct tcpopt to; struct ip *ip = mtod(m, struct ip *); - struct tcphdr *th = (void *)(ip + 1); + struct tcphdr *th; wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr); if (wr == NULL) { @@ -566,6 +662,10 @@ t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m) return (EALREADY); } + if (ip->ip_v == IPVERSION) + th = (void *)(ip + 1); + else + th = (void *)((struct ip6_hdr *)ip + 1); bzero(&to, sizeof(to)); tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th), TO_SYN); @@ -608,7 +708,7 @@ do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss, lctx->flags &= ~LCTX_RPL_PENDING; if (status != CPL_ERR_NONE) - log(LOG_ERR, "listener with stid %u failed: %d", stid, status); + log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status); #ifdef INVARIANTS /* @@ -678,7 +778,7 @@ do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss, CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status); if (status != CPL_ERR_NONE) { - log(LOG_ERR, "%s: failed (%u) to close listener for stid %u", + log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n", __func__, status, stid); return (status); } @@ -735,8 +835,7 @@ do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss, CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d", __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status); - if (cpl->status == CPL_ERR_RTX_NEG_ADVICE || - cpl->status == CPL_ERR_PERSIST_NEG_ADVICE) + if (negative_advice(cpl->status)) return (0); /* Ignore negative advice */ INP_WLOCK(inp); @@ -855,7 +954,7 @@ mbuf_to_synqe(struct mbuf *m) return (NULL); synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE; } else { - synqe = (void *)(m->m_data + m->m_len + tspace - sizeof(*synqe)); + synqe = (void *)(m->m_data + m->m_len + tspace - len); synqe->flags = TPF_SYNQE; } @@ -936,21 +1035,29 @@ pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, const struct cpl_pass_accept_req *cpl = mtod(m, const void *); const struct ether_header *eh; unsigned int hlen = be32toh(cpl->hdr_len); - const struct ip *ip; + uintptr_t l3hdr; const struct tcphdr *tcp; eh = (const void *)(cpl + 1); - ip = (const void *)((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); - tcp = (const void *)((uintptr_t)ip + G_IP_HDR_LEN(hlen)); + l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen)); + tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen)); if (inc) { bzero(inc, sizeof(*inc)); - inc->inc_faddr = ip->ip_src; - inc->inc_laddr = ip->ip_dst; inc->inc_fport = tcp->th_sport; inc->inc_lport = tcp->th_dport; - if (ip->ip_v == 6) + if (((struct ip *)l3hdr)->ip_v == IPVERSION) { + const struct ip *ip = (const void *)l3hdr; + + inc->inc_faddr = ip->ip_src; + inc->inc_laddr = ip->ip_dst; + } else { + const struct ip6_hdr *ip6 = (const void *)l3hdr; + inc->inc_flags |= INC_ISIPV6; + inc->inc6_faddr = ip6->ip6_src; + inc->inc6_laddr = ip6->ip6_dst; + } } if (th) { @@ -959,6 +1066,105 @@ pass_accept_req_to_protohdrs(const struct mbuf *m, struct in_conninfo *inc, } } +static int +ifnet_has_ip6(struct ifnet *ifp, struct in6_addr *ip6) +{ + struct ifaddr *ifa; + struct sockaddr_in6 *sin6; + int found = 0; + struct in6_addr in6 = *ip6; + + /* Just as in ip6_input */ + if (in6_clearscope(&in6) || in6_clearscope(&in6)) + return (0); + in6_setscope(&in6, ifp, NULL); + + if_addr_rlock(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + sin6 = (void *)ifa->ifa_addr; + if (sin6->sin6_family != AF_INET6) + continue; + + if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &in6)) { + found = 1; + break; + } + } + if_addr_runlock(ifp); + + return (found); +} + +static struct l2t_entry * +get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp, + struct in_conninfo *inc) +{ + struct rtentry *rt; + struct l2t_entry *e; + struct sockaddr_in6 sin6; + struct sockaddr *dst = (void *)&sin6; + + if (inc->inc_flags & INC_ISIPV6) { + dst->sa_len = sizeof(struct sockaddr_in6); + dst->sa_family = AF_INET6; + ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr; + + if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) { + /* no need for route lookup */ + e = t4_l2t_get(pi, ifp, dst); + return (e); + } + } else { + dst->sa_len = sizeof(struct sockaddr_in); + dst->sa_family = AF_INET; + ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr; + } + + rt = rtalloc1(dst, 0, 0); + if (rt == NULL) + return (NULL); + else { + struct sockaddr *nexthop; + + RT_UNLOCK(rt); + if (rt->rt_ifp != ifp) + e = NULL; + else { + if (rt->rt_flags & RTF_GATEWAY) + nexthop = rt->rt_gateway; + else + nexthop = dst; + e = t4_l2t_get(pi, ifp, nexthop); + } + RTFREE(rt); + } + + return (e); +} + +static int +ifnet_has_ip(struct ifnet *ifp, struct in_addr in) +{ + struct ifaddr *ifa; + struct sockaddr_in *sin; + int found = 0; + + if_addr_rlock(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + sin = (void *)ifa->ifa_addr; + if (sin->sin_family != AF_INET) + continue; + + if (sin->sin_addr.s_addr == in.s_addr) { + found = 1; + break; + } + } + if_addr_runlock(ifp); + + return (found); +} + #define REJECT_PASS_ACCEPT() do { \ reject_reason = __LINE__; \ goto reject; \ @@ -994,10 +1200,8 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, struct tcphdr th; struct tcpopt to; struct port_info *pi; - struct ifnet *ifp, *ifp_vlan = NULL; + struct ifnet *hw_ifp, *ifp; struct l2t_entry *e = NULL; - struct rtentry *rt; - struct sockaddr_in nam; int rscale, mtu_idx, rx_credits, rxqid, ulp_mode; struct synq_entry *synqe = NULL; int reject_reason; @@ -1017,31 +1221,24 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, t4opt_to_tcpopt(&cpl->tcpopt, &to); pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; - ifp = pi->ifp; - m->m_pkthdr.rcvif = ifp; - tod = TOEDEV(ifp); + hw_ifp = pi->ifp; /* the cxgbeX ifnet */ + m->m_pkthdr.rcvif = hw_ifp; + tod = TOEDEV(hw_ifp); /* - * Don't offload if the interface that received the SYN doesn't have - * IFCAP_TOE enabled. - */ - if ((ifp->if_capenable & IFCAP_TOE4) == 0) - REJECT_PASS_ACCEPT(); - - /* Don't offload IPv6 connections. XXX: add IPv6 support */ - if (inc.inc_flags & INC_ISIPV6) - REJECT_PASS_ACCEPT(); - - /* - * Don't offload if the SYN had a VLAN tag and the vid doesn't match - * anything on this interface. + * Figure out if there is a pseudo interface (vlan, lagg, etc.) + * involved. Don't offload if the SYN had a VLAN tag and the vid + * doesn't match anything on this interface. + * + * XXX: lagg support, lagg + vlan support. */ vid = EVL_VLANOFTAG(be16toh(cpl->vlan)); if (vid != 0xfff) { - ifp_vlan = VLAN_DEVAT(ifp, vid); - if (ifp_vlan == NULL) + ifp = VLAN_DEVAT(hw_ifp, vid); + if (ifp == NULL) REJECT_PASS_ACCEPT(); - } + } else + ifp = hw_ifp; /* * Don't offload if the peer requested a TCP option that's not known to @@ -1050,31 +1247,36 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, if (cpl->tcpopt.unknown) REJECT_PASS_ACCEPT(); - /* - * Don't offload if the outgoing interface for the route back to the - * peer is not the same as the interface that received the SYN. - * XXX: too restrictive. - */ - nam.sin_len = sizeof(nam); - nam.sin_family = AF_INET; - nam.sin_addr = inc.inc_faddr; - rt = rtalloc1((struct sockaddr *)&nam, 0, 0); - if (rt == NULL) - REJECT_PASS_ACCEPT(); - else { - struct sockaddr *nexthop; + if (inc.inc_flags & INC_ISIPV6) { - RT_UNLOCK(rt); - nexthop = rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : - (struct sockaddr *)&nam; - if (rt->rt_ifp == ifp || - (ifp_vlan != NULL && rt->rt_ifp == ifp_vlan)) - e = t4_l2t_get(pi, rt->rt_ifp, nexthop); - RTFREE(rt); - if (e == NULL) - REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */ + /* Don't offload if the ifcap isn't enabled */ + if ((ifp->if_capenable & IFCAP_TOE6) == 0) + REJECT_PASS_ACCEPT(); + + /* + * SYN must be directed to an IP6 address on this ifnet. This + * is more restrictive than in6_localip. + */ + if (!ifnet_has_ip6(ifp, &inc.inc6_laddr)) + REJECT_PASS_ACCEPT(); + } else { + + /* Don't offload if the ifcap isn't enabled */ + if ((ifp->if_capenable & IFCAP_TOE4) == 0) + REJECT_PASS_ACCEPT(); + + /* + * SYN must be directed to an IP address on this ifnet. This + * is more restrictive than in_localip. + */ + if (!ifnet_has_ip(ifp, inc.inc_laddr)) + REJECT_PASS_ACCEPT(); } + e = get_l2te_for_nexthop(pi, ifp, &inc); + if (e == NULL) + REJECT_PASS_ACCEPT(); + synqe = mbuf_to_synqe(m); if (synqe == NULL) REJECT_PASS_ACCEPT(); @@ -1133,7 +1335,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, synqe->lctx = lctx; synqe->syn = m; m = NULL; - refcount_init(&synqe->refcnt, 0); + refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */ synqe->l2e_idx = e->idx; synqe->rcv_bufsize = rx_credits; atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr); @@ -1166,7 +1368,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, */ m = m_dup(synqe->syn, M_NOWAIT); if (m) - m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.rcvif = hw_ifp; remove_tid(sc, synqe->tid); free(wr, M_CXGBE); @@ -1179,6 +1381,7 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, if (inp) INP_WUNLOCK(inp); + release_synqe(synqe); /* extra hold */ REJECT_PASS_ACCEPT(); } @@ -1193,15 +1396,19 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss, * this tid because there was no L2T entry for the tid at that * time. Abort it now. The reply to the abort will clean up. */ - CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, ABORT", - __func__, stid, tid, lctx, synqe); - send_reset_synqe(tod, synqe); + CTR6(KTR_CXGBE, + "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT", + __func__, stid, tid, lctx, synqe, synqe->flags); + if (!(synqe->flags & TPF_SYNQE_EXPANDED)) + send_reset_synqe(tod, synqe); INP_WUNLOCK(inp); + release_synqe(synqe); /* extra hold */ return (__LINE__); } INP_WUNLOCK(inp); + release_synqe(synqe); /* extra hold */ return (0); reject: CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, @@ -1216,7 +1423,7 @@ reject: m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m->m_pkthdr.csum_data = 0xffff; - ifp->if_input(ifp, m); + hw_ifp->if_input(hw_ifp, m); } return (reject_reason); diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 330172d..64e8b26 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #include <sys/param.h> #include <sys/types.h> @@ -40,10 +41,14 @@ __FBSDID("$FreeBSD$"); #include <sys/domain.h> #include <sys/socket.h> #include <sys/socketvar.h> +#include <net/if.h> #include <netinet/in.h> #include <netinet/in_pcb.h> +#include <netinet/in_var.h> #include <netinet/ip.h> +#include <netinet/ip6.h> #include <netinet/tcp_var.h> +#include <netinet6/scope6_var.h> #define TCPSTATES #include <netinet/tcp_fsm.h> #include <netinet/toecore.h> @@ -58,6 +63,9 @@ __FBSDID("$FreeBSD$"); static struct protosw ddp_protosw; static struct pr_usrreqs ddp_usrreqs; +static struct protosw ddp6_protosw; +static struct pr_usrreqs ddp6_usrreqs; + /* Module ops */ static int t4_tom_mod_load(void); static int t4_tom_mod_unload(void); @@ -77,6 +85,11 @@ static void queue_tid_release(struct adapter *, int); static void release_offload_resources(struct toepcb *); static int alloc_tid_tabs(struct tid_info *); static void free_tid_tabs(struct tid_info *); +static int add_lip(struct adapter *, struct in6_addr *); +static int delete_lip(struct adapter *, struct in6_addr *); +static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); +static void init_clip_table(struct adapter *, struct tom_data *); +static void destroy_clip_table(struct adapter *, struct tom_data *); static void free_tom_data(struct adapter *, struct tom_data *); struct toepcb * @@ -170,8 +183,12 @@ offload_socket(struct socket *so, struct toepcb *toep) sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; - if (toep->ulp_mode == ULP_MODE_TCPDDP) - so->so_proto = &ddp_protosw; + if (toep->ulp_mode == ULP_MODE_TCPDDP) { + if (inp->inp_vflag & INP_IPV6) + so->so_proto = &ddp6_protosw; + else + so->so_proto = &ddp_protosw; + } SOCKBUF_UNLOCK(sb); /* Update TCP PCB */ @@ -237,8 +254,8 @@ release_offload_resources(struct toepcb *toep) KASSERT(!(toep->flags & TPF_ATTACHED), ("%s: %p is still attached.", __func__, toep)); - CTR4(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p)", - __func__, toep, tid, toep->l2te); + CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", + __func__, toep, tid, toep->l2te, toep->ce); if (toep->ulp_mode == ULP_MODE_TCPDDP) release_ddp_resources(toep); @@ -251,6 +268,9 @@ release_offload_resources(struct toepcb *toep) release_tid(sc, tid, toep->ctrlq); } + if (toep->ce) + release_lip(td, toep->ce); + mtx_lock(&td->toep_list_lock); TAILQ_REMOVE(&td->toep_list, toep, link); mtx_unlock(&td->toep_list_lock); @@ -394,7 +414,7 @@ int find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) { unsigned short *mtus = &sc->params.mtus[0]; - int i = 0, mss; + int i, mss, n; KASSERT(inc != NULL || pmss > 0, ("%s: at least one of inc/pmss must be specified", __func__)); @@ -403,8 +423,13 @@ find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) if (pmss > 0 && mss > pmss) mss = pmss; - while (i < NMTUS - 1 && mtus[i + 1] <= mss + 40) - ++i; + if (inc->inc_flags & INC_ISIPV6) + n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + else + n = sizeof(struct ip) + sizeof(struct tcphdr); + + for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++) + continue; return (i); } @@ -513,6 +538,24 @@ select_ntuple(struct port_info *pi, struct l2t_entry *e, uint32_t filter_mode) return (htobe32(ntuple)); } +void +set_tcpddp_ulp_mode(struct toepcb *toep) +{ + + toep->ulp_mode = ULP_MODE_TCPDDP; + toep->ddp_flags = DDP_OK; + toep->ddp_score = DDP_LOW_SCORE; +} + +int +negative_advice(int status) +{ + + return (status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE); +} + static int alloc_tid_tabs(struct tid_info *t) { @@ -536,12 +579,10 @@ alloc_tid_tabs(struct tid_info *t) t->atid_tab[t->natids - 1].next = NULL; mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); - t->stid_tab = (union serv_entry *)&t->atid_tab[t->natids]; - t->sfree = t->stid_tab; + t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids]; t->stids_in_use = 0; - for (i = 1; i < t->nstids; i++) - t->stid_tab[i - 1].next = &t->stid_tab[i]; - t->stid_tab[t->nstids - 1].next = NULL; + TAILQ_INIT(&t->stids); + t->nstids_free_head = t->nstids; atomic_store_rel_int(&t->tids_in_use, 0); @@ -567,9 +608,157 @@ free_tid_tabs(struct tid_info *t) mtx_destroy(&t->stid_lock); } +static int +add_lip(struct adapter *sc, struct in6_addr *lip) +{ + struct fw_clip_cmd c; + + ASSERT_SYNCHRONIZED_OP(sc); + /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ + + memset(&c, 0, sizeof(c)); + c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_WRITE); + c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); + c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; + c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + + return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); +} + +static int +delete_lip(struct adapter *sc, struct in6_addr *lip) +{ + struct fw_clip_cmd c; + + ASSERT_SYNCHRONIZED_OP(sc); + /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ + + memset(&c, 0, sizeof(c)); + c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | + F_FW_CMD_READ); + c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); + c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; + c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; + + return (t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); +} + +static struct clip_entry * +search_lip(struct tom_data *td, struct in6_addr *lip) +{ + struct clip_entry *ce; + + mtx_assert(&td->clip_table_lock, MA_OWNED); + + TAILQ_FOREACH(ce, &td->clip_table, link) { + if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) + return (ce); + } + + return (NULL); +} + +struct clip_entry * +hold_lip(struct tom_data *td, struct in6_addr *lip) +{ + struct clip_entry *ce; + + mtx_lock(&td->clip_table_lock); + ce = search_lip(td, lip); + if (ce != NULL) + ce->refcount++; + mtx_unlock(&td->clip_table_lock); + + return (ce); +} + +void +release_lip(struct tom_data *td, struct clip_entry *ce) +{ + + mtx_lock(&td->clip_table_lock); + KASSERT(search_lip(td, &ce->lip) == ce, + ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); + KASSERT(ce->refcount > 0, + ("%s: CLIP entry %p has refcount 0", __func__, ce)); + --ce->refcount; + mtx_unlock(&td->clip_table_lock); +} + +static void +init_clip_table(struct adapter *sc, struct tom_data *td) +{ + struct in6_ifaddr *ia; + struct in6_addr *lip, tlip; + struct clip_entry *ce; + + ASSERT_SYNCHRONIZED_OP(sc); + + mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); + TAILQ_INIT(&td->clip_table); + + IN6_IFADDR_RLOCK(); + TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + lip = &ia->ia_addr.sin6_addr; + + KASSERT(!IN6_IS_ADDR_MULTICAST(lip), + ("%s: mcast address in in6_ifaddr list", __func__)); + + if (IN6_IS_ADDR_LOOPBACK(lip)) + continue; + if (IN6_IS_SCOPE_EMBED(lip)) { + /* Remove the embedded scope */ + tlip = *lip; + lip = &tlip; + in6_clearscope(lip); + } + /* + * XXX: how to weed out the link local address for the loopback + * interface? It's fe80::1 usually (always?). + */ + + mtx_lock(&td->clip_table_lock); + if (search_lip(td, lip) == NULL) { + ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); + memcpy(&ce->lip, lip, sizeof(ce->lip)); + ce->refcount = 0; + if (add_lip(sc, lip) == 0) + TAILQ_INSERT_TAIL(&td->clip_table, ce, link); + else + free(ce, M_CXGBE); + } + mtx_unlock(&td->clip_table_lock); + } + IN6_IFADDR_RUNLOCK(); +} + +static void +destroy_clip_table(struct adapter *sc, struct tom_data *td) +{ + struct clip_entry *ce, *ce_temp; + + if (mtx_initialized(&td->clip_table_lock)) { + mtx_lock(&td->clip_table_lock); + TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { + KASSERT(ce->refcount == 0, + ("%s: CLIP entry %p still in use (%d)", __func__, + ce, ce->refcount)); + TAILQ_REMOVE(&td->clip_table, ce, link); + delete_lip(sc, &ce->lip); + free(ce, M_CXGBE); + } + mtx_unlock(&td->clip_table_lock); + mtx_destroy(&td->clip_table_lock); + } +} + static void free_tom_data(struct adapter *sc, struct tom_data *td) { + + ASSERT_SYNCHRONIZED_OP(sc); + KASSERT(TAILQ_EMPTY(&td->toep_list), ("%s: TOE PCB list is not empty.", __func__)); KASSERT(td->lctx_count == 0, @@ -578,6 +767,7 @@ free_tom_data(struct adapter *sc, struct tom_data *td) t4_uninit_l2t_cpl_handlers(sc); t4_uninit_cpl_io_handlers(sc); t4_uninit_ddp(sc, td); + destroy_clip_table(sc, td); if (td->listen_mask != 0) hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); @@ -602,7 +792,7 @@ t4_tom_activate(struct adapter *sc) struct toedev *tod; int i, rc; - ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */ + ASSERT_SYNCHRONIZED_OP(sc); /* per-adapter softc for TOM */ td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); @@ -623,8 +813,12 @@ t4_tom_activate(struct adapter *sc) if (rc != 0) goto done; + /* DDP page pods and CPL handlers */ t4_init_ddp(sc, td); + /* CLIP table for IPv6 offload */ + init_clip_table(sc, td); + /* CPL handlers */ t4_init_connect_cpl_handlers(sc); t4_init_l2t_cpl_handlers(sc); @@ -668,7 +862,7 @@ t4_tom_deactivate(struct adapter *sc) int rc = 0; struct tom_data *td = sc->tom_softc; - ADAPTER_LOCK_ASSERT_OWNED(sc); /* for sc->flags */ + ASSERT_SYNCHRONIZED_OP(sc); if (td == NULL) return (0); /* XXX. KASSERT? */ @@ -700,17 +894,24 @@ static int t4_tom_mod_load(void) { int rc; - struct protosw *tcp_protosw; + struct protosw *tcp_protosw, *tcp6_protosw; tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); if (tcp_protosw == NULL) return (ENOPROTOOPT); - bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw)); bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs)); ddp_usrreqs.pru_soreceive = t4_soreceive_ddp; ddp_protosw.pr_usrreqs = &ddp_usrreqs; + tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); + if (tcp6_protosw == NULL) + return (ENOPROTOOPT); + bcopy(tcp6_protosw, &ddp6_protosw, sizeof(ddp6_protosw)); + bcopy(tcp6_protosw->pr_usrreqs, &ddp6_usrreqs, sizeof(ddp6_usrreqs)); + ddp6_usrreqs.pru_soreceive = t4_soreceive_ddp; + ddp6_protosw.pr_usrreqs = &ddp6_usrreqs; + rc = t4_register_uld(&tom_uld_info); if (rc != 0) t4_tom_mod_unload(); @@ -721,11 +922,14 @@ t4_tom_mod_load(void) static void tom_uninit(struct adapter *sc, void *arg __unused) { + if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomun")) + return; + /* Try to free resources (works only if no port has IFCAP_TOE) */ - ADAPTER_LOCK(sc); if (sc->flags & TOM_INIT_DONE) t4_deactivate_uld(sc, ULD_TOM); - ADAPTER_UNLOCK(sc); + + end_synchronized_op(sc, LOCK_HELD); } static int diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h index 9549b0b..d0fbbd2 100644 --- a/sys/dev/cxgbe/tom/t4_tom.h +++ b/sys/dev/cxgbe/tom/t4_tom.h @@ -109,6 +109,7 @@ struct toepcb { struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ctrlq; struct l2t_entry *l2te; /* L2 table entry used by this connection */ + struct clip_entry *ce; /* CLIP table entry used by this tid */ int tid; /* Connection identifier */ unsigned int tx_credits;/* tx WR credits (in 16 byte units) remaining */ unsigned int sb_cc; /* last noted value of so_rcv->sb_cc */ @@ -140,15 +141,6 @@ struct flowc_tx_params { #define DDP_LOW_SCORE 1 #define DDP_HIGH_SCORE 3 -static inline void -set_tcpddp_ulp_mode(struct toepcb *toep) -{ - - toep->ulp_mode = ULP_MODE_TCPDDP; - toep->ddp_flags = DDP_OK; - toep->ddp_score = DDP_LOW_SCORE; -} - /* * Compressed state for embryonic connections for a listener. Barely fits in * 64B, try not to grow it further. @@ -174,6 +166,7 @@ struct listen_ctx { LIST_ENTRY(listen_ctx) link; /* listen hash linkage */ volatile int refcount; int stid; + struct stid_region stid_region; int flags; struct inpcb *inp; /* listening socket's inp */ struct sge_wrq *ctrlq; @@ -183,6 +176,12 @@ struct listen_ctx { TAILQ_HEAD(ppod_head, ppod_region); +struct clip_entry { + TAILQ_ENTRY(clip_entry) link; + struct in6_addr lip; /* local IPv6 address */ + u_int refcount; +}; + struct tom_data { struct toedev tod; @@ -200,6 +199,9 @@ struct tom_data { int nppods_free; /* # of available ppods */ int nppods_free_head; /* # of available ppods at the begining */ struct ppod_head ppods; + + struct mtx clip_table_lock; + TAILQ_HEAD(, clip_entry) clip_table; }; static inline struct tom_data * @@ -233,6 +235,10 @@ int select_rcv_wscale(void); uint64_t calc_opt0(struct socket *, struct port_info *, struct l2t_entry *, int, int, int, int); uint32_t select_ntuple(struct port_info *, struct l2t_entry *, uint32_t); +void set_tcpddp_ulp_mode(struct toepcb *); +int negative_advice(int); +struct clip_entry *hold_lip(struct tom_data *, struct in6_addr *); +void release_lip(struct tom_data *, struct clip_entry *); /* t4_connect.c */ void t4_init_connect_cpl_handlers(struct adapter *); diff --git a/sys/dev/cxgbe/tom/t4_tom_l2t.c b/sys/dev/cxgbe/tom/t4_tom_l2t.c index ffe64c5..7a75394 100644 --- a/sys/dev/cxgbe/tom/t4_tom_l2t.c +++ b/sys/dev/cxgbe/tom/t4_tom_l2t.c @@ -27,6 +27,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_inet6.h" #ifdef TCP_OFFLOAD #include <sys/param.h> @@ -34,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/module.h> #include <sys/bus.h> +#include <sys/fnv_hash.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/rwlock.h> @@ -48,28 +50,89 @@ __FBSDID("$FreeBSD$"); #include <netinet/toecore.h> #include "common/common.h" -#include "common/jhash.h" #include "common/t4_msg.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" #define VLAN_NONE 0xfff -#define SA(x) ((struct sockaddr *)(x)) -#define SIN(x) ((struct sockaddr_in *)(x)) -#define SINADDR(x) (SIN(x)->sin_addr.s_addr) - static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e) { + if (atomic_fetchadd_int(&e->refcnt, 1) == 0) /* 0 -> 1 transition */ atomic_subtract_int(&d->nfree, 1); } -static inline unsigned int -arp_hash(const uint32_t key, int ifindex) +static inline u_int +l2_hash(struct l2t_data *d, const struct sockaddr *sa, int ifindex) { - return jhash_2words(key, ifindex, 0) & (L2T_SIZE - 1); + u_int hash, half = d->l2t_size / 2, start = 0; + const void *key; + size_t len; + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + key = &sin->sin_addr; + len = sizeof(sin->sin_addr); + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + key = &sin6->sin6_addr; + len = sizeof(sin6->sin6_addr); + start = half; + } + + hash = fnv_32_buf(key, len, FNV1_32_INIT); + hash = fnv_32_buf(&ifindex, sizeof(ifindex), hash); + hash %= half; + + return (hash + start); +} + +static inline int +l2_cmp(const struct sockaddr *sa, struct l2t_entry *e) +{ + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + return (e->addr[0] != sin->sin_addr.s_addr); + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + return (memcmp(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr))); + } +} + +static inline void +l2_store(const struct sockaddr *sa, struct l2t_entry *e) +{ + + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); + + if (sa->sa_family == AF_INET) { + const struct sockaddr_in *sin = (const void *)sa; + + e->addr[0] = sin->sin_addr.s_addr; + e->ipv6 = 0; + } else { + const struct sockaddr_in6 *sin6 = (const void *)sa; + + memcpy(&e->addr[0], &sin6->sin6_addr, sizeof(e->addr)); + e->ipv6 = 1; + } } /* @@ -100,7 +163,7 @@ send_pending(struct adapter *sc, struct l2t_entry *e) static void resolution_failed_for_wr(struct wrqe *wr) { - log(LOG_ERR, "%s: leaked work request %p, wr_len %d", __func__, wr, + log(LOG_ERR, "%s: leaked work request %p, wr_len %d\n", __func__, wr, wr->wr_len); /* free(wr, M_CXGBE); */ @@ -175,15 +238,25 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e) struct tom_data *td = sc->tom_softc; struct toedev *tod = &td->tod; struct sockaddr_in sin = {0}; + struct sockaddr_in6 sin6 = {0}; + struct sockaddr *sa; uint8_t dmac[ETHER_ADDR_LEN]; uint16_t vtag = VLAN_NONE; int rc; - sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); - SINADDR(&sin) = e->addr; + if (e->ipv6 == 0) { + sin.sin_family = AF_INET; + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_addr.s_addr = e->addr[0]; + sa = (void *)&sin; + } else { + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(struct sockaddr_in6); + memcpy(&sin6.sin6_addr, &e->addr[0], sizeof(e->addr)); + sa = (void *)&sin6; + } - rc = toe_l2_resolve(tod, e->ifp, SA(&sin), dmac, &vtag); + rc = toe_l2_resolve(tod, e->ifp, sa, dmac, &vtag); if (rc == EWOULDBLOCK) return (rc); @@ -263,7 +336,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, struct adapter *sc = iq->adapter; const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); unsigned int tid = GET_TID(rpl); - unsigned int idx = tid & (L2T_SIZE - 1); + unsigned int idx = tid % L2T_SIZE; int rc; rc = do_l2t_write_rpl(iq, rss, m); @@ -271,7 +344,7 @@ do_l2t_write_rpl2(struct sge_iq *iq, const struct rss_header *rss, return (rc); if (tid & F_SYNC_WR) { - struct l2t_entry *e = &sc->l2t->l2tab[idx]; + struct l2t_entry *e = &sc->l2t->l2tab[idx - sc->vres.l2t.start]; mtx_lock(&e->lock); if (e->state != L2T_STATE_SWITCHING) { @@ -310,21 +383,22 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) { struct l2t_entry *e; struct l2t_data *d = pi->adapter->l2t; - uint32_t addr = SINADDR(sa); - int hash = arp_hash(addr, ifp->if_index); - unsigned int smt_idx = pi->port_id; + u_int hash, smt_idx = pi->port_id; - if (sa->sa_family != AF_INET) - return (NULL); /* XXX: no IPv6 support right now */ + KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6, + ("%s: sa %p has unexpected sa_family %d", __func__, sa, + sa->sa_family)); #ifndef VLAN_TAG if (ifp->if_type == IFT_L2VLAN) return (NULL); #endif + hash = l2_hash(d, sa, ifp->if_index); rw_wlock(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) { - if (e->addr == addr && e->ifp == ifp && e->smt_idx == smt_idx) { + if (l2_cmp(sa, e) == 0 && e->ifp == ifp && + e->smt_idx == smt_idx) { l2t_hold(d, e); goto done; } @@ -338,7 +412,7 @@ t4_l2t_get(struct port_info *pi, struct ifnet *ifp, struct sockaddr *sa) d->l2tab[hash].first = e; e->state = L2T_STATE_RESOLVING; - e->addr = addr; + l2_store(sa, e); e->ifp = ifp; e->smt_idx = smt_idx; e->hash = hash; @@ -368,14 +442,14 @@ t4_l2_update(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, struct adapter *sc = tod->tod_softc; struct l2t_entry *e; struct l2t_data *d = sc->l2t; - uint32_t addr = SINADDR(sa); - int hash = arp_hash(addr, ifp->if_index); + u_int hash; KASSERT(d != NULL, ("%s: no L2 table", __func__)); + hash = l2_hash(d, sa, ifp->if_index); rw_rlock(&d->lock); for (e = d->l2tab[hash].first; e; e = e->next) { - if (e->addr == addr && e->ifp == ifp) { + if (l2_cmp(sa, e) == 0 && e->ifp == ifp) { mtx_lock(&e->lock); if (atomic_load_acq_int(&e->refcnt)) goto found; |