From 9b4d669bc06c215d64f56f1eb0d4eb96e14d689d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 25 Mar 2013 20:19:55 +0000 Subject: macvtap: set transport header before passing skb to lower device Set the transport header for 1) some drivers (e.g ixgbe) needs l4 header 2) precise packet length estimation (introduced in 1def9238) needs l4 header to compute header length. For the packets with partial checksum, the patch just set the transport header to csum_start. Otherwise tries to use skb_flow_dissect() to get l4 offset, if it fails, just pretend no l4 header. Cc: Eric Dumazet Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a449439..acf6450 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * A macvtap queue is the central object of this driver, it connects @@ -645,6 +646,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, int vnet_hdr_len = 0; int copylen = 0; bool zerocopy = false; + struct flow_keys keys; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = q->vnet_hdr_sz; @@ -725,6 +727,13 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, goto err_kfree; } + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_set_transport_header(skb, skb_checksum_start_offset(skb)); + else if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_set_transport_header(skb, ETH_HLEN); + rcu_read_lock_bh(); vlan = rcu_dereference_bh(q->vlan); /* copy skb_ubuf_info for callback when skb has no error */ -- cgit v1.1 From 38502af77e07b5d6650b9ff99a0b482d86366592 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 25 Mar 2013 20:19:56 +0000 Subject: tuntap: set transport header before passing it to kernel Currently, for the packets receives from tuntap, before doing header check, kernel just reset the transport header in netif_receive_skb() which pretends no l4 header. This is suboptimal for precise packet length estimation (introduced in 1def9238) which needs correct l4 header for gso packets. So this patch set the transport header to csum_start for partial checksum packets, otherwise it first try skb_flow_dissect(), if it fails, just reset the transport header. Cc: Eric Dumazet Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 95837c1..48cd73a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -70,6 +70,7 @@ #include #include +#include /* Uncomment to enable debugging */ /* #define TUN_DEBUG 1 */ @@ -1049,6 +1050,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, bool zerocopy = false; int err; u32 rxhash; + struct flow_keys keys; if (!(tun->flags & TUN_NO_PI)) { if ((len -= sizeof(pi)) > total_len) @@ -1203,6 +1205,14 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } skb_reset_network_header(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_set_transport_header(skb, skb_checksum_start_offset(skb)); + else if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_reset_transport_header(skb); + rxhash = skb_get_rxhash(skb); netif_rx_ni(skb); -- cgit v1.1 From c1aad275b0293d2b1905ec95a945422262470684 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 25 Mar 2013 20:19:57 +0000 Subject: packet: set transport header before doing xmit Set the transport header for 1) some drivers (e.g ixgbe needs l4 header to do atr) 2) precise packet length estimation (introduced in 1def9238) needs l4 header to compute header length. So this patch first tries to get l4 header for packet socket through skb_flow_dissect(), and pretend no l4 header if skb_flow_dissect() fails. Cc: Eric Dumazet Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- net/packet/af_packet.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index bd0d14c..83fdd0a 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -88,6 +88,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include @@ -1412,6 +1413,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, __be16 proto = 0; int err; int extra_len = 0; + struct flow_keys keys; /* * Get and verify the address. @@ -1512,6 +1514,11 @@ retry: if (unlikely(extra_len == 4)) skb->no_fcs = 1; + if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_reset_transport_header(skb); + dev_queue_xmit(skb); rcu_read_unlock(); return len; @@ -1918,6 +1925,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, struct page *page; void *data; int err; + struct flow_keys keys; ph.raw = frame; @@ -1943,6 +1951,11 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); + if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_reset_transport_header(skb); + if (po->tp_tx_has_off) { int off_min, off_max, off; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -2199,6 +2212,7 @@ static int packet_snd(struct socket *sock, unsigned short gso_type = 0; int hlen, tlen; int extra_len = 0; + struct flow_keys keys; /* * Get and verify the address. @@ -2351,6 +2365,13 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } + if (skb->ip_summed == CHECKSUM_PARTIAL) + skb_set_transport_header(skb, skb_checksum_start_offset(skb)); + else if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_set_transport_header(skb, reserve); + if (unlikely(extra_len == 4)) skb->no_fcs = 1; -- cgit v1.1 From f9ca8f74399f9195fd8e01f67a8424a8d33efa55 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 25 Mar 2013 20:19:58 +0000 Subject: netback: set transport header before passing it to kernel Currently, for the packets receives from netback, before doing header check, kernel just reset the transport header in netif_receive_skb() which pretends non l4 header. This is suboptimal for precise packet length estimation (introduced in 1def9238: net_sched: more precise pkt_len computation) which needs correct l4 header for gso packets. The patch just reuse the header probed by netback for partial checksum packets and tries to use skb_flow_dissect() for other cases, if both fail, just pretend no l4 header. Cc: Eric Dumazet Cc: Ian Campbell Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index aa28550..fc8faa7 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -1184,6 +1185,7 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) if (th >= skb_tail_pointer(skb)) goto out; + skb_set_transport_header(skb, 4 * iph->ihl); skb->csum_start = th - skb->head; switch (iph->protocol) { case IPPROTO_TCP: @@ -1495,6 +1497,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) skb->dev = vif->dev; skb->protocol = eth_type_trans(skb, skb->dev); + skb_reset_network_header(skb); if (checksum_setup(vif, skb)) { netdev_dbg(vif->dev, @@ -1503,6 +1506,15 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) continue; } + if (!skb_transport_header_was_set(skb)) { + struct flow_keys keys; + + if (skb_flow_dissect(skb, &keys)) + skb_set_transport_header(skb, keys.thoff); + else + skb_reset_transport_header(skb); + } + vif->dev->stats.rx_bytes += skb->len; vif->dev->stats.rx_packets++; -- cgit v1.1 From 15e5a030716468dce4032fa0f398d840fa2756f6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 25 Mar 2013 20:19:59 +0000 Subject: net_sched: better precise estimation on packet length for untrusted packets gso_segs were reset to zero when kernel receive packets from untrusted source. But we use this zero value to estimate precise packet len which is wrong. So this patch tries to estimate the correct gso_segs value before using it in qdisc_pkt_len_init(). Cc: Eric Dumazet Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index de930b7..f5ad23b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2588,6 +2588,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) */ if (shinfo->gso_size) { unsigned int hdr_len; + u16 gso_segs = shinfo->gso_segs; /* mac layer + network layer */ hdr_len = skb_transport_header(skb) - skb_mac_header(skb); @@ -2597,7 +2598,12 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len += tcp_hdrlen(skb); else hdr_len += sizeof(struct udphdr); - qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; + + if (shinfo->gso_type & SKB_GSO_DODGY) + gso_segs = DIV_ROUND_UP(skb->len - hdr_len, + shinfo->gso_size); + + qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; } } -- cgit v1.1