diff options
author | Eric Dumazet <edumazet@google.com> | 2013-01-25 20:34:37 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-01-28 00:27:15 -0500 |
commit | cef401de7be8c4e155c6746bfccf721a4fa5fab9 (patch) | |
tree | af082329bf0613ed9de5b9575e3f94f3f03b77ec /net/ipv4 | |
parent | 61550022b9586972082904b80de26a464c558437 (diff) | |
download | op-kernel-dev-cef401de7be8c4e155c6746bfccf721a4fa5fab9.zip op-kernel-dev-cef401de7be8c4e155c6746bfccf721a4fa5fab9.tar.gz |
net: fix possible wrong checksum generation
Pravin Shelar mentioned that GSO could potentially generate
wrong TX checksum if skb has fragments that are overwritten
by the user between the checksum computation and transmit.
He suggested to linearize skbs but this extra copy can be
avoided for normal tcp skbs cooked by tcp_sendmsg().
This patch introduces a new SKB_GSO_SHARED_FRAG flag, set
in skb_shinfo(skb)->gso_type if at least one frag can be
modified by the user.
Typical sources of such possible overwrites are {vm}splice(),
sendfile(), and macvtap/tun/virtio_net drivers.
Tested:
$ netperf -H 7.7.8.84
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to
7.7.8.84 () port 0 AF_INET
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
87380 16384 16384 10.00 3959.52
$ netperf -H 7.7.8.84 -t TCP_SENDFILE
TCP SENDFILE TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 7.7.8.84 ()
port 0 AF_INET
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
87380 16384 16384 10.00 3216.80
Performance of the SENDFILE is impacted by the extra allocation and
copy, and because we use order-0 pages, while the TCP_STREAM uses
bigger pages.
Reported-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 4 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 4 |
6 files changed, 14 insertions, 6 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 4b70539..49ddca3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1306,6 +1306,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | + SKB_GSO_SHARED_FRAG | 0))) goto out; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 303012a..af6be70 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -738,7 +738,7 @@ drop: static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; const struct iphdr *tiph; struct flowi4 fl4; u8 tos; @@ -756,6 +756,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev skb_checksum_help(skb)) goto tx_error; + old_iph = ip_hdr(skb); + if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 191fc24..8f024d4 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -472,7 +472,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - const struct iphdr *old_iph = ip_hdr(skb); + const struct iphdr *old_iph; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; @@ -486,6 +486,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb_checksum_help(skb)) goto tx_error; + old_iph = ip_hdr(skb); + if (tos & 1) tos = old_iph->tos; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5227194..3ec1f69 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -896,6 +896,8 @@ new_segment: skb_fill_page_desc(skb, i, page, offset, copy); } + skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG; + skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -3032,6 +3034,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | + SKB_GSO_SHARED_FRAG | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0905997..492c7cf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1240,13 +1240,13 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, */ if (!skb_shinfo(prev)->gso_size) { skb_shinfo(prev)->gso_size = mss; - skb_shinfo(prev)->gso_type = sk->sk_gso_type; + skb_shinfo(prev)->gso_type |= sk->sk_gso_type; } /* CHECKME: To clear or not to clear? Mimics normal skb currently */ if (skb_shinfo(skb)->gso_segs <= 1) { skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; } /* Difference in this won't matter, both ACKed by the same cumul. ACK */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 667a6ad..367e2ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1133,6 +1133,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + skb_shinfo(skb)->gso_type &= SKB_GSO_SHARED_FRAG; if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1140,11 +1141,10 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, */ skb_shinfo(skb)->gso_segs = 1; skb_shinfo(skb)->gso_size = 0; - skb_shinfo(skb)->gso_type = 0; } else { skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss_now); skb_shinfo(skb)->gso_size = mss_now; - skb_shinfo(skb)->gso_type = sk->sk_gso_type; + skb_shinfo(skb)->gso_type |= sk->sk_gso_type; } } |