From 49e613fa454dfc8bdee157e4ef30055fe784332b Mon Sep 17 00:00:00 2001 From: kmacy Date: Mon, 5 May 2008 01:41:53 +0000 Subject: MFSVN: - add / remove clients from cxgb_main.c now - change ifdef TOE_ENABLED to TCP_OFFLOAD_DISABLE - update copyrights - fix transmit data mismatch bug caused by not setting SB_NOCOALESCE on tx sockbuf on passive connections - fix receive sequence mismatch bug caused by not setting SB_NOCOALESCE on rx sockbuf on passive connections - don't sleep without checking SBS_CANTRCVMORE first - various ddp ordering fixes Supported by: Chelsio Inc. --- sys/dev/cxgb/cxgb_config.h | 2 +- sys/dev/cxgb/cxgb_main.c | 9 +++- sys/dev/cxgb/cxgb_offload.c | 5 +- sys/dev/cxgb/t3cdev.h | 2 +- sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c | 38 ++++++++------ sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c | 93 +++++++++++++++++++-------------- sys/dev/cxgb/ulp/tom/cxgb_ddp.c | 10 ++-- sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h | 5 +- sys/dev/cxgb/ulp/tom/cxgb_toepcb.h | 4 +- 9 files changed, 99 insertions(+), 69 deletions(-) (limited to 'sys/dev/cxgb') diff --git a/sys/dev/cxgb/cxgb_config.h b/sys/dev/cxgb/cxgb_config.h index 6b072c3..71a1475 100644 --- a/sys/dev/cxgb/cxgb_config.h +++ b/sys/dev/cxgb/cxgb_config.h @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/cxgb/cxgb_main.c b/sys/dev/cxgb/cxgb_main.c index 838ab51..d77996a 100644 --- a/sys/dev/cxgb/cxgb_main.c +++ b/sys/dev/cxgb/cxgb_main.c @@ -1741,6 +1741,10 @@ offload_open(struct port_info *pi) adapter->port[0].ifp->if_mtu : 0xffff); init_smt(adapter); + /* Call back all registered clients */ + cxgb_add_clients(tdev); + + /* restore them in case the offload module has changed them */ if (err) { t3_tp_set_offload_mode(adapter, 0); @@ -1757,7 +1761,10 @@ offload_close(struct t3cdev *tdev) if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)) return (0); - + + /* Call back all registered clients */ + cxgb_remove_clients(tdev); + tdev->lldev = NULL; cxgb_set_dummy_ops(tdev); t3_tp_set_offload_mode(adapter, 0); diff --git a/sys/dev/cxgb/cxgb_offload.c b/sys/dev/cxgb/cxgb_offload.c index 1eeafaf..d865e7f 100644 --- a/sys/dev/cxgb/cxgb_offload.c +++ b/sys/dev/cxgb/cxgb_offload.c @@ -1,7 +1,6 @@ - /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -104,7 +103,7 @@ unregister_tdev(struct t3cdev *tdev) mtx_unlock(&cxgb_db_lock); } -#ifdef TOE_ENABLED +#ifndef TCP_OFFLOAD_DISABLE /** * cxgb_register_client - register an offload client * @client: the client diff --git a/sys/dev/cxgb/t3cdev.h b/sys/dev/cxgb/t3cdev.h index 714557b..e0004b7 100644 --- a/sys/dev/cxgb/t3cdev.h +++ b/sys/dev/cxgb/t3cdev.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2007, Chelsio Inc. + * Copyright (c) 2007-2008, Chelsio Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c index e1d78f8..1741c13 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c @@ -173,7 +173,8 @@ SBAPPEND(struct sockbuf *sb, struct mbuf *n) m->m_next, m->m_nextpkt, m->m_flags)); m = m->m_next; } - sbappend_locked(sb, n); + KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set")); + sbappendstream_locked(sb, n); m = sb->sb_mb; while (m) { @@ -1449,7 +1450,10 @@ active_open_failed(struct toepcb *toep, struct mbuf *m) } else #endif { - inp_wlock(inp); + inp_wlock(inp); + /* + * drops the inpcb lock + */ fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); } @@ -1502,6 +1506,9 @@ act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) inp_wlock(inp); if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { + /* + * drops the inpcb lock + */ fail_act_open(so, EHOSTUNREACH); printf("freeing %p\n", m); @@ -1647,12 +1654,14 @@ t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) return (EPERM); inp = so_sotoinpcb(so); + inp_wlock(inp); inp_ip_tos_set(inp, optval); #if 0 inp->inp_ip_tos = optval; #endif t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe); - + inp_wunlock(inp); + return (0); } @@ -1715,7 +1724,7 @@ t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) inp_wunlock(inp); - if (oldval != tp->t_flags) + if (oldval != tp->t_flags && (tp->t_toe != NULL)) t3_set_nagle(tp->t_toe); } @@ -2329,13 +2338,14 @@ process_ddp_complete(struct toepcb *toep, struct mbuf *m) #endif inp_wunlock(tp->t_inpcb); - KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); + KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); CTR5(KTR_TOM, "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " "ddp_report 0x%x offset %u, len %u", tp->rcv_nxt, bsp->cur_offset, ddp_report, G_DDP_OFFSET(ddp_report), m->m_len); - + + m->m_cur_offset = bsp->cur_offset; bsp->cur_offset += m->m_len; if (!(bsp->flags & DDP_BF_NOFLIP)) { @@ -2518,7 +2528,10 @@ do_peer_fin(struct toepcb *toep, struct mbuf *m) } } if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { + CTR1(KTR_TOM, + "waking up waiters for cantrcvmore on %p ", so); socantrcvmore(so); + /* * If connection is half-synchronized * (ie NEEDSYN flag on) then delay ACK, @@ -2567,9 +2580,6 @@ do_peer_fin(struct toepcb *toep, struct mbuf *m) } inp_wunlock(tp->t_inpcb); - DPRINTF("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(rcv), rcv->sb_flags); - - if (action == TCP_TIMEWAIT) { enter_timewait(tp); } else if (action == TCP_DROP) { @@ -2577,7 +2587,7 @@ do_peer_fin(struct toepcb *toep, struct mbuf *m) } else if (action == TCP_CLOSE) { tcp_offload_close(tp); } - + #ifdef notyet /* Do not send POLL_HUP for half duplex close. */ if ((sk->sk_shutdown & SEND_SHUTDOWN) || @@ -3641,10 +3651,6 @@ do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) inp_wunlock(tp->t_inpcb); - snd = so_sockbuf_snd(so); - rcv = so_sockbuf_rcv(so); - - so_lock(so); LIST_REMOVE(toep, synq_entry); so_unlock(so); @@ -3665,7 +3671,9 @@ do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) tp = so_sototcpcb(so); inp_wlock(tp->t_inpcb); - + snd = so_sockbuf_snd(so); + rcv = so_sockbuf_rcv(so); + snd->sb_flags |= SB_NOCOALESCE; rcv->sb_flags |= SB_NOCOALESCE; diff --git a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c index e6cfa59..77a3d76 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -58,14 +58,10 @@ __FBSDID("$FreeBSD$"); #include #include - #include #include #include - - #include - #include #include #include @@ -320,11 +316,10 @@ copy_data(const struct mbuf *m, int offset, int len, struct uio *uio) { struct iovec *to = uio->uio_iov; int err; - - if (__predict_true(!is_ddp(m))) { /* RX_DATA */ + if (__predict_true(!is_ddp(m))) /* RX_DATA */ return m_uiomove(m, offset, len, uio); - } if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */ + if (__predict_true(m->m_ddp_flags & DDP_BF_NOCOPY)) { /* user DDP */ to->iov_len -= len; to->iov_base = ((caddr_t)to->iov_base) + len; uio->uio_iov = to; @@ -536,9 +531,7 @@ cxgb_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, static __inline void sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord) { -#ifdef notyet - SOCKBUF_LOCK_ASSERT(sb); -#endif + sockbuf_lock_assert(sb); /* * First, update for the new value of nextrecord. If necessary, make * it the first record. @@ -674,6 +667,9 @@ restart: if (copied >= target) user_ddp_ok = 0; + if (rcv->sb_state & SBS_CANTRCVMORE) + goto done; + CTR0(KTR_TOM, "ddp pending -- waiting"); if ((err = sbwait(rcv)) != 0) goto done; //for timers to work await_ddp_completion(sk, flags, &timeo); @@ -695,18 +691,35 @@ restart: } if (rcv->sb_mb) goto restart; - if ((err = sbwait(rcv)) != 0) - goto done; + + if (rcv->sb_state & SBS_CANTRCVMORE) + goto done; + + CTR0(KTR_TOM, "no buffers -- waiting"); + + if ((err = sbwait(rcv)) != 0) + goto done; } goto restart; got_mbuf: - CTR6(KTR_TOM, "t3_soreceive: ddp=%d m_len=%u resid=%u " - "m_seq=0x%08x copied_seq=0x%08x copied_unacked=%u", - is_ddp(m), m->m_pkthdr.len, len, m->m_seq, toep->tp_copied_seq, - copied_unacked); - KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d m_pktlen=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len, m->m_pkthdr.len)); - KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x m->m_len=%d", - m->m_next, m->m_nextpkt, m->m_flags, m->m_len)); + /* + * Adjust the mbuf seqno if it has already been partially processed by + * soreceive_generic + */ + if (m->m_pkthdr.len != m->m_len) { + m->m_seq += m->m_pkthdr.len - m->m_len; + m->m_pkthdr.len = m->m_len; + } + + CTR6(KTR_TOM, "t3_soreceive: ddp_flags=0x%x m_len=%u resid=%u " + "m_seq=0x%08x c_seq=0x%08x c_unack=%u", + (is_ddp(m) ? m->m_ddp_flags : 0), m->m_pkthdr.len, len, + m->m_seq, toep->tp_copied_seq, copied_unacked); + KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), + ("unexpected type M_EXT=%d ext_type=%d m_len=%d m_pktlen=%d\n", !!(m->m_flags & M_EXT), + m->m_ext.ext_type, m->m_len, m->m_pkthdr.len)); + KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p" + " m_flags=0x%x m->m_len=%d", m->m_next, m->m_nextpkt, m->m_flags, m->m_len)); if (m->m_pkthdr.len == 0) { if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0) panic("empty mbuf and NOCOPY not set\n"); @@ -716,15 +729,10 @@ got_mbuf: goto done; } - - if (is_ddp(m)) { - KASSERT((int32_t)(toep->tp_copied_seq + copied_unacked - m->m_seq) >= 0, - ("offset will go negative: offset=%d copied_seq=0x%08x copied_unacked=%d m_seq=0x%08x", - offset, toep->tp_copied_seq, copied_unacked, m->m_seq)); - - offset = toep->tp_copied_seq + copied_unacked - m->m_seq; - } else - offset = 0; + KASSERT((int32_t)(toep->tp_copied_seq + copied_unacked - m->m_seq) >= 0, + ("offset will go negative: offset=%d copied_seq=0x%08x copied_unacked=%d m_seq=0x%08x", + offset, toep->tp_copied_seq, copied_unacked, m->m_seq)); + offset = toep->tp_copied_seq + copied_unacked - m->m_seq; if (offset >= m->m_pkthdr.len) panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x " @@ -737,8 +745,10 @@ got_mbuf: if (is_ddp(m) && (m->m_ddp_flags & DDP_BF_NOCOPY)) panic("bad state in t3_soreceive len=%d avail=%d offset=%d\n", len, avail, offset); avail = len; - } - + rcv->sb_flags |= SB_IN_TOE; + } else if (p->kbuf_posted == 0 && p->user_ddp_pending == 0) + rcv->sb_flags &= ~SB_IN_TOE; + #ifdef URGENT_DATA_SUPPORTED /* * Check if the data we are preparing to copy contains urgent @@ -800,7 +810,7 @@ got_mbuf: err = EFAULT; goto done_unlocked; } - + sockbuf_lock(rcv); if (avail != (resid - uio->uio_resid)) printf("didn't copy all bytes :-/ avail=%d offset=%d pktlen=%d resid=%d uio_resid=%d copied=%d copied_unacked=%d is_ddp(m)=%d\n", @@ -852,6 +862,7 @@ skip_copy: while (count > 0) { count -= m->m_len; KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); + CTR2(KTR_TOM, "freeing mbuf m_len = %d pktlen = %d", m->m_len, m->m_pkthdr.len); sbfree(rcv, m); rcv->sb_mb = m_free(m); m = rcv->sb_mb; @@ -909,8 +920,11 @@ skip_copy: } else if (so_should_ddp(toep, copied) && uio->uio_iovcnt == 1) { CTR1(KTR_TOM ,"entering ddp on tid=%u", toep->tp_tid); if (!t3_enter_ddp(toep, TOM_TUNABLE(toep->tp_toedev, - ddp_copy_limit), 0, IS_NONBLOCKING(so))) + ddp_copy_limit), 0, IS_NONBLOCKING(so))) { + rcv->sb_flags |= SB_IN_TOE; p->kbuf_posted = 1; + } + } } #ifdef T3_TRACE @@ -939,6 +953,7 @@ cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct toedev *tdev; int rv, zcopy_thres, zcopy_enabled, flags; struct tcpcb *tp = so_sototcpcb(so); + struct sockbuf *rcv = so_sockbuf_rcv(so); flags = flagsp ? *flagsp &~ MSG_EOR : 0; @@ -956,17 +971,17 @@ cxgb_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, * */ if (tp && (tp->t_flags & TF_TOE) && uio && ((flags & (MSG_OOB|MSG_PEEK|MSG_DONTWAIT)) == 0) - && (uio->uio_iovcnt == 1) && (mp0 == NULL)) { + && (uio->uio_iovcnt == 1) && (mp0 == NULL) && + ((rcv->sb_flags & SB_IN_TOE) || (uio->uio_iovcnt == 1))) { struct toepcb *toep = tp->t_toe; tdev = toep->tp_toedev; zcopy_thres = TOM_TUNABLE(tdev, ddp_thres); zcopy_enabled = TOM_TUNABLE(tdev, ddp); - if ((uio->uio_resid > zcopy_thres) && - (uio->uio_iovcnt == 1) - && zcopy_enabled) { - CTR3(KTR_CXGB, "cxgb_soreceive: t_flags=0x%x flags=0x%x uio_resid=%d", - tp->t_flags, flags, uio->uio_resid); + if ((rcv->sb_flags & SB_IN_TOE) ||((uio->uio_resid > zcopy_thres) && + (uio->uio_iovcnt == 1) && zcopy_enabled)) { + CTR4(KTR_TOM, "cxgb_soreceive: sb_flags=0x%x t_flags=0x%x flags=0x%x uio_resid=%d", + rcv->sb_flags, tp->t_flags, flags, uio->uio_resid); rv = t3_soreceive(so, flagsp, uio); if (rv != EAGAIN) return (rv); diff --git a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c index 498b409..86e1e91 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_ddp.c +++ b/sys/dev/cxgb/ulp/tom/cxgb_ddp.c @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -677,7 +677,7 @@ err: int t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len) { - int page_off, resid_init, err; + int resid_init, err; struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl; resid_init = uio->uio_resid; @@ -685,12 +685,14 @@ t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len) if (!gl->dgl_pages) panic("pages not set\n"); + CTR4(KTR_TOM, "t3_ddp_copy: offset=%d dgl_offset=%d cur_offset=%d len=%d", + offset, gl->dgl_offset, m->m_cur_offset, len); offset += gl->dgl_offset + m->m_cur_offset; - page_off = offset & PAGE_MASK; KASSERT(len <= gl->dgl_length, ("len=%d > dgl_length=%d in ddp_copy\n", len, gl->dgl_length)); - err = uiomove_fromphys(gl->dgl_pages, page_off, len, uio); + + err = uiomove_fromphys(gl->dgl_pages, offset, len, uio); return (err); } diff --git a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h index a1a4bf3..66dff93 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h +++ b/sys/dev/cxgb/ulp/tom/cxgb_tcp_offload.h @@ -60,8 +60,8 @@ int sockbuf_sbspace(struct sockbuf *); struct tcphdr; struct tcpopt; -int syncache_offload_expand(struct in_conninfo *, struct tcpopt *, - struct tcphdr *, struct socket **, struct mbuf *); +int syncache_offload_expand(struct in_conninfo *, struct tcpopt *, + struct tcphdr *, struct socket **, struct mbuf *); #ifndef _SYS_SOCKETVAR_H_ #include @@ -82,6 +82,7 @@ int syncache_offload_expand(struct in_conninfo *, struct tcpopt *, #define SB_AIO 0x80 /* AIO operations queued */ #define SB_KNOTE 0x100 /* kernel note attached */ #define SB_NOCOALESCE 0x200 /* don't coalesce new data into existing mbufs */ +#define SB_IN_TOE 0x400 /* socket buffer is in the middle of an operation */ #define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ diff --git a/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h b/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h index 8a9c498..7c4bd0c 100644 --- a/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h +++ b/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h @@ -1,6 +1,5 @@ - /*- - * Copyright (c) 2007, Chelsio Inc. + * Copyright (c) 2007-2008, Chelsio Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,7 +35,6 @@ struct toepcb { struct toedev *tp_toedev; struct l2t_entry *tp_l2t; - pr_ctloutput_t *tp_ctloutput; unsigned int tp_tid; int tp_wr_max; int tp_wr_avail; -- cgit v1.1