diff options
author | zbb <zbb@FreeBSD.org> | 2016-02-25 14:12:51 +0000 |
---|---|---|
committer | zbb <zbb@FreeBSD.org> | 2016-02-25 14:12:51 +0000 |
commit | 973c78df29863cbe659c9c2a0066c0473393e58d (patch) | |
tree | 8c9218a428e115a07c4f3589d96ae737c9303d05 | |
parent | 3c08d1a5a9079a1d43c8c5eb47594b4fcaad3fff (diff) | |
download | FreeBSD-src-973c78df29863cbe659c9c2a0066c0473393e58d.zip FreeBSD-src-973c78df29863cbe659c9c2a0066c0473393e58d.tar.gz |
Add support for hardware Tx and Rx checksums to VNIC driver
- The network controller verifies Rx TCP/UDP/SCTP checksums by default.
Communicate this to the stack when the packet is not marked as erroneous
to avoid redundant checksum calculation in kernel.
- It is not uncommon to get the mbuf with m_len that is less than
the minimal size for the IP, TCP, UDP, etc. when HW checsumming
is enabled. To avoid data corruption performed by the HW that is
intended to write IP and TCP/UDP/SCTP checksums to the data segment,
the mbuf needs to be pulled up by the required number of bytes.
- Make sure that one can modify the mbufs that require checsum calculation
rather than check for NULL mbuf on each transmission.
Reviewed by: wma
Obtained from: Semihalf
Sponsored by: Cavium
Differential Revision: https://reviews.freebsd.org/D5320
-rw-r--r-- | sys/dev/vnic/nicvf_main.c | 28 | ||||
-rw-r--r-- | sys/dev/vnic/nicvf_queues.c | 134 |
2 files changed, 155 insertions, 7 deletions
diff --git a/sys/dev/vnic/nicvf_main.c b/sys/dev/vnic/nicvf_main.c index 42a1d37..22e4122 100644 --- a/sys/dev/vnic/nicvf_main.c +++ b/sys/dev/vnic/nicvf_main.c @@ -353,6 +353,13 @@ nicvf_setup_ifnet(struct nicvf *nic) if_setmtu(ifp, ETHERMTU); if_setcapabilities(ifp, IFCAP_VLAN_MTU); + /* + * HW offload capabilities + */ + /* IP/TCP/UDP HW checksums */ + if_setcapabilitiesbit(ifp, IFCAP_HWCSUM, 0); + if_sethwassistbits(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP), 0); + #ifdef DEVICE_POLLING #error "DEVICE_POLLING not supported in VNIC driver yet" if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0); @@ -500,6 +507,10 @@ nicvf_if_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) /* No work to do except acknowledge the change took. */ ifp->if_capenable ^= IFCAP_VLAN_MTU; } + if (mask & IFCAP_TXCSUM) + ifp->if_capenable ^= IFCAP_TXCSUM; + if (mask & IFCAP_RXCSUM) + ifp->if_capenable ^= IFCAP_RXCSUM; break; default: @@ -591,6 +602,7 @@ nicvf_if_transmit(struct ifnet *ifp, struct mbuf *mbuf) struct nicvf *nic = if_getsoftc(ifp); struct queue_set *qs = nic->qs; struct snd_queue *sq; + struct mbuf *mtmp; int qidx; int err = 0; @@ -610,16 +622,24 @@ nicvf_if_transmit(struct ifnet *ifp, struct mbuf *mbuf) if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { - if (mbuf != NULL) - err = drbr_enqueue(ifp, sq->br, mbuf); + err = drbr_enqueue(ifp, sq->br, mbuf); return (err); } - if (mbuf != NULL) { + if (mbuf->m_next != NULL && + (mbuf->m_pkthdr.csum_flags & + (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)) != 0) { + if (M_WRITABLE(mbuf) == 0) { + mtmp = m_dup(mbuf, M_NOWAIT); + m_freem(mbuf); + if (mtmp == NULL) + return (ENOBUFS); + mbuf = mtmp; + } + } err = drbr_enqueue(ifp, sq->br, mbuf); if (err != 0) return (err); - } taskqueue_enqueue(sq->snd_taskq, &sq->snd_task); diff --git a/sys/dev/vnic/nicvf_queues.c b/sys/dev/vnic/nicvf_queues.c index 4c5d280..e361e22 100644 --- a/sys/dev/vnic/nicvf_queues.c +++ b/sys/dev/vnic/nicvf_queues.c @@ -29,6 +29,9 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_inet.h" +#include "opt_inet6.h" + #include <sys/param.h> #include <sys/systm.h> #include <sys/bitset.h> @@ -64,6 +67,16 @@ __FBSDID("$FreeBSD$"); #include <net/if_media.h> #include <net/ifq.h> +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/sctp.h> +#include <netinet/tcp.h> +#include <netinet/tcp_lro.h> +#include <netinet/udp.h> + #include <dev/pci/pcireg.h> #include <dev/pci/pcivar.h> @@ -1658,11 +1671,17 @@ nicvf_sq_free_used_descs(struct nicvf *nic, struct snd_queue *sq, int qidx) * Add SQ HEADER subdescriptor. * First subdescriptor for every send descriptor. */ -static __inline void +static __inline int nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, int subdesc_cnt, struct mbuf *mbuf, int len) { struct sq_hdr_subdesc *hdr; + struct ether_vlan_header *eh; +#ifdef INET + struct ip *ip; +#endif + uint16_t etype; + int ehdrlen, iphlen, poff; hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); sq->snd_buff[qentry].mbuf = mbuf; @@ -1675,7 +1694,93 @@ nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, hdr->subdesc_cnt = subdesc_cnt; hdr->tot_len = len; - /* ARM64TODO: Implement HW checksums calculation */ + if (mbuf->m_pkthdr.csum_flags != 0) { + hdr->csum_l3 = 1; /* Enable IP csum calculation */ + + eh = mtod(mbuf, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + etype = ntohs(eh->evl_proto); + } else { + ehdrlen = ETHER_HDR_LEN; + etype = ntohs(eh->evl_encap_proto); + } + + if (mbuf->m_len < ehdrlen + sizeof(struct ip)) { + mbuf = m_pullup(mbuf, ehdrlen + sizeof(struct ip)); + sq->snd_buff[qentry].mbuf = mbuf; + if (mbuf == NULL) + return (ENOBUFS); + } + + switch (etype) { +#ifdef INET6 + case ETHERTYPE_IPV6: + /* ARM64TODO: Add support for IPv6 */ + hdr->csum_l3 = 0; + sq->snd_buff[qentry].mbuf = NULL; + return (ENXIO); +#endif +#ifdef INET + case ETHERTYPE_IP: + ip = (struct ip *)(mbuf->m_data + ehdrlen); + ip->ip_sum = 0; + iphlen = ip->ip_hl << 2; + poff = ehdrlen + iphlen; + + switch (ip->ip_p) { + case IPPROTO_TCP: + if ((mbuf->m_pkthdr.csum_flags & CSUM_TCP) == 0) + break; + + if (mbuf->m_len < (poff + sizeof(struct tcphdr))) { + mbuf = m_pullup(mbuf, poff + sizeof(struct tcphdr)); + sq->snd_buff[qentry].mbuf = mbuf; + if (mbuf == NULL) + return (ENOBUFS); + } + hdr->csum_l4 = SEND_L4_CSUM_TCP; + break; + case IPPROTO_UDP: + if ((mbuf->m_pkthdr.csum_flags & CSUM_UDP) == 0) + break; + + if (mbuf->m_len < (poff + sizeof(struct udphdr))) { + mbuf = m_pullup(mbuf, poff + sizeof(struct udphdr)); + sq->snd_buff[qentry].mbuf = mbuf; + if (mbuf == NULL) + return (ENOBUFS); + } + hdr->csum_l4 = SEND_L4_CSUM_UDP; + break; + case IPPROTO_SCTP: + if ((mbuf->m_pkthdr.csum_flags & CSUM_SCTP) == 0) + break; + + if (mbuf->m_len < (poff + sizeof(struct sctphdr))) { + mbuf = m_pullup(mbuf, poff + sizeof(struct sctphdr)); + sq->snd_buff[qentry].mbuf = mbuf; + if (mbuf == NULL) + return (ENOBUFS); + } + hdr->csum_l4 = SEND_L4_CSUM_SCTP; + break; + default: + break; + } + break; +#endif + default: + hdr->csum_l3 = 0; + return (0); + } + + hdr->l3_offset = ehdrlen; + hdr->l4_offset = ehdrlen + iphlen; + } else + hdr->csum_l3 = 0; + + return (0); } /* @@ -1734,8 +1839,12 @@ nicvf_tx_mbuf_locked(struct snd_queue *sq, struct mbuf *mbuf) qentry = nicvf_get_sq_desc(sq, subdesc_cnt); /* Add SQ header subdesc */ - nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, mbuf, + err = nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, mbuf, mbuf->m_pkthdr.len); + if (err != 0) { + bus_dmamap_unload(sq->snd_buff_dmat, snd_buff->dmap); + return (err); + } /* Add SQ gather subdescs */ for (seg = 0; seg < nsegs; seg++) { @@ -1806,6 +1915,25 @@ nicvf_get_rcv_mbuf(struct nicvf *nic, struct cqe_rx_t *cqe_rx) m_fixhdr(mbuf); mbuf->m_pkthdr.flowid = cqe_rx->rq_idx; M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE); + if (__predict_true((if_getcapenable(nic->ifp) & IFCAP_RXCSUM) != 0)) { + /* + * HW by default verifies IP & TCP/UDP/SCTP checksums + */ + + /* XXX: Do we need to include IP with options too? */ + if (__predict_true(cqe_rx->l3_type == L3TYPE_IPV4 || + cqe_rx->l3_type == L3TYPE_IPV6)) { + mbuf->m_pkthdr.csum_flags = + (CSUM_IP_CHECKED | CSUM_IP_VALID); + } + if (cqe_rx->l4_type == L4TYPE_TCP || + cqe_rx->l4_type == L4TYPE_UDP || + cqe_rx->l4_type == L4TYPE_SCTP) { + mbuf->m_pkthdr.csum_flags |= + (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + mbuf->m_pkthdr.csum_data = htons(0xffff); + } + } } return (mbuf); |