diff options
author | David S. Miller <davem@davemloft.net> | 2014-10-01 21:35:58 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-10-01 21:35:58 -0400 |
commit | 25e379c475121c658a344cfd5eeed9affe272d31 (patch) | |
tree | aacc22b440766d6bdbda7b035962a7dcc590263c /net | |
parent | f44d61cdd3ab4259289ccf314093eb45d83a69e6 (diff) | |
parent | 996c9fd167c99b382bcb9c1eb833b5b4711140ab (diff) | |
download | op-kernel-dev-25e379c475121c658a344cfd5eeed9affe272d31.zip op-kernel-dev-25e379c475121c658a344cfd5eeed9affe272d31.tar.gz |
Merge branch 'udp_gso'
Tom Herbert says:
====================
udp: Generalize GSO for UDP tunnels
This patch set generalizes the UDP tunnel segmentation functions so
that they can work with various protocol encapsulations. The primary
change is to set the inner_protocol field in the skbuff when creating
the encapsulated packet, and then in skb_udp_tunnel_segment this data
is used to determine the function for segmenting the encapsulated
packet. The inner_protocol field is overloaded to take either an
Ethertype or IP protocol.
The inner_protocol is set on transmit using skb_set_inner_ipproto or
skb_set_inner_protocol functions. VXLAN and IP tunnels (for fou GSO)
were modified to call these.
Notes:
- GSO for GRE/UDP where GRE checksum is enabled does not work.
Handling this will require some special case code.
- Software GSO now supports many varieties of encapsulation with
SKB_GSO_UDP_TUNNEL{_CSUM}. We still need a mechanism to query
for device support of particular combinations (I intend to
add ndo_gso_check for that).
- MPLS seems to be the only previous user of inner_protocol. I don't
believe these patches can affect that. For supporting GSO with
MPLS over UDP, the inner_protocol should be set using the
helper functions in this patch.
- GSO for L2TP/UDP should also be straightforward now.
v2:
- Respin for Eric's restructuring of skbuff.
Tested GRE, IPIP, and SIT over fou as well as VLXAN. This was
done using 200 TCP_STREAMs in netperf.
GRE
IPv4, FOU, UDP checksum enabled
TCP_STREAM TSO enabled on tun interface
14.04% TX CPU utilization
13.17% RX CPU utilization
9211 Mbps
TCP_STREAM TSO disabled on tun interface
27.82% TX CPU utilization
25.41% RX CPU utilization
9336 Mbps
IPv4, FOU, UDP checksum disabled
TCP_STREAM TSO enabled on tun interface
13.14% TX CPU utilization
23.18% RX CPU utilization
9277 Mbps
TCP_STREAM TSO disabled on tun interface
30.00% TX CPU utilization
31.28% RX CPU utilization
9327 Mbps
IPIP
FOU, UDP checksum enabled
TCP_STREAM TSO enabled on tun interface
15.28% TX CPU utilization
13.92% RX CPU utilization
9342 Mbps
TCP_STREAM TSO disabled on tun interface
27.82% TX CPU utilization
25.41% RX CPU utilization
9336 Mbps
FOU, UDP checksum disabled
TCP_STREAM TSO enabled on tun interface
15.08% TX CPU utilization
24.64% RX CPU utilization
9226 Mbps
TCP_STREAM TSO disabled on tun interface
30.00% TX CPU utilization
31.28% RX CPU utilization
9327 Mbps
SIT
FOU, UDP checksum enabled
TCP_STREAM TSO enabled on tun interface
14.47% TX CPU utilization
14.58% RX CPU utilization
9106 Mbps
TCP_STREAM TSO disabled on tun interface
31.82% TX CPU utilization
30.82% RX CPU utilization
9204 Mbps
FOU, UDP checksum disabled
TCP_STREAM TSO enabled on tun interface
15.70% TX CPU utilization
27.93% RX CPU utilization
9097 Mbps
TCP_STREAM TSO disabled on tun interface
33.48% TX CPU utilization
37.36% RX CPU utilization
9197 Mbps
VXLAN
TCP_STREAM TSO enabled on tun interface
16.42% TX CPU utilization
23.66% RX CPU utilization
9081 Mbps
TCP_STREAM TSO disabled on tun interface
30.32% TX CPU utilization
30.55% RX CPU utilization
9185 Mbps
Baseline (no encp, TSO and LRO enabled)
TCP_STREAM
11.85% TX CPU utilization
15.13% RX CPU utilization
9452 Mbps
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/ipv4/ip_gre.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 2 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 51 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 8 | ||||
-rw-r--r-- | net/ipv6/sit.c | 4 | ||||
-rw-r--r-- | net/ipv6/udp_offload.c | 2 |
6 files changed, 61 insertions, 8 deletions
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 829aff8b..0485ef1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -241,6 +241,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, /* Push GRE header. */ gre_build_header(skb, &tpi, tunnel->tun_hlen); + skb_set_inner_protocol(skb, tpi.proto); + ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index bfec31d..ea88ab3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -224,6 +224,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (IS_ERR(skb)) goto out; + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 19ebe6a..8c35f2c 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -25,8 +25,11 @@ struct udp_offload_priv { struct udp_offload_priv __rcu *next; }; -struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features, + struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, + netdev_features_t features), + __be16 new_protocol) { struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; @@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); - skb->protocol = htons(ETH_P_TEB); + skb->protocol = new_protocol; need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); if (need_csum) @@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); - segs = skb_mac_gso_segment(skb, enc_features); + segs = gso_inner_segment(skb, enc_features); if (IS_ERR_OR_NULL(segs)) { skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, mac_len); @@ -101,6 +104,44 @@ out: return segs; } +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features, + bool is_ipv6) +{ + __be16 protocol = skb->protocol; + const struct net_offload **offloads; + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, + netdev_features_t features); + + rcu_read_lock(); + + switch (skb->inner_protocol_type) { + case ENCAP_TYPE_ETHER: + protocol = skb->inner_protocol; + gso_inner_segment = skb_mac_gso_segment; + break; + case ENCAP_TYPE_IPPROTO: + offloads = is_ipv6 ? inet6_offloads : inet_offloads; + ops = rcu_dereference(offloads[skb->inner_ipproto]); + if (!ops || !ops->callbacks.gso_segment) + goto out_unlock; + gso_inner_segment = ops->callbacks.gso_segment; + break; + default: + goto out_unlock; + } + + segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment, + protocol); + +out_unlock: + rcu_read_unlock(); + + return segs; +} + static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { @@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (skb->encapsulation && (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { - segs = skb_udp_tunnel_segment(skb, features); + segs = skb_udp_tunnel_segment(skb, features, false); goto out; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 5f19dfb..9a0a1aa 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -616,6 +616,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, int err = -1; u8 proto; struct sk_buff *new_skb; + __be16 protocol; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -732,8 +733,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, ipv6h->daddr = fl6->daddr; ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; - ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; + protocol = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; + ((__be16 *)(ipv6h + 1))[1] = protocol; if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); @@ -754,6 +756,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } } + skb_set_inner_protocol(skb, protocol); + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index db75809..0d4e274 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -982,6 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } + skb_set_inner_ipproto(skb, IPPROTO_IPV6); + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); @@ -1006,6 +1008,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (IS_ERR(skb)) goto out; + skb_set_inner_ipproto(skb, IPPROTO_IPIP); + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); return NETDEV_TX_OK; out: diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 212ebfc..8f96988 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -58,7 +58,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) - segs = skb_udp_tunnel_segment(skb, features); + segs = skb_udp_tunnel_segment(skb, features, true); else { const struct ipv6hdr *ipv6h; struct udphdr *uh; |