From a4c9ea5e8fec680134d22aa99b54d1cd8c226ebd Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 30 Dec 2014 19:10:16 -0800 Subject: geneve: Add Geneve GRO support This results in an approximately 30% increase in throughput when handling encapsulated bulk traffic. Signed-off-by: Joe Stringer Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 2 deletions(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 394a200..19e256e 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -149,6 +149,99 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, } EXPORT_SYMBOL_GPL(geneve_xmit_skb); +static int geneve_hlen(struct genevehdr *gh) +{ + return sizeof(*gh) + gh->opt_len * 4; +} + +static struct sk_buff **geneve_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct sk_buff *p, **pp = NULL; + struct genevehdr *gh, *gh2; + unsigned int hlen, gh_len, off_gnv; + const struct packet_offload *ptype; + __be16 type; + int flush = 1; + + off_gnv = skb_gro_offset(skb); + hlen = off_gnv + sizeof(*gh); + gh = skb_gro_header_fast(skb, off_gnv); + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + if (gh->ver != GENEVE_VER || gh->oam) + goto out; + gh_len = geneve_hlen(gh); + + hlen = off_gnv + gh_len; + if (skb_gro_header_hard(skb, hlen)) { + gh = skb_gro_header_slow(skb, hlen, off_gnv); + if (unlikely(!gh)) + goto out; + } + + flush = 0; + + for (p = *head; p; p = p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + gh2 = (struct genevehdr *)(p->data + off_gnv); + if (gh->opt_len != gh2->opt_len || + memcmp(gh, gh2, gh_len)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + } + + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_receive_by_type(type); + if (ptype == NULL) { + flush = 1; + goto out_unlock; + } + + skb_gro_pull(skb, gh_len); + skb_gro_postpull_rcsum(skb, gh, gh_len); + pp = ptype->callbacks.gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int geneve_gro_complete(struct sk_buff *skb, int nhoff) +{ + struct genevehdr *gh; + struct packet_offload *ptype; + __be16 type; + int gh_len; + int err = -ENOSYS; + + udp_tunnel_gro_complete(skb, nhoff); + + gh = (struct genevehdr *)(skb->data + nhoff); + gh_len = geneve_hlen(gh); + type = gh->proto_type; + + rcu_read_lock(); + ptype = gro_find_complete_by_type(type); + if (ptype != NULL) + err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); + + rcu_read_unlock(); + return err; +} + static void geneve_notify_add_rx_port(struct geneve_sock *gs) { struct sock *sk = gs->sock->sk; @@ -278,8 +371,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, /* Initialize the geneve udp offloads structure */ gs->udp_offloads.port = port; - gs->udp_offloads.callbacks.gro_receive = NULL; - gs->udp_offloads.callbacks.gro_complete = NULL; + gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; + gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; spin_lock(&gn->sock_lock); hlist_add_head_rcu(&gs->hlist, gs_head(net, port)); -- cgit v1.1 From 61f3cade763dca46127146a52d829e30b8f48921 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 2 Jan 2015 18:26:02 -0800 Subject: geneve: Remove workqueue. The work queue is used only to free the UDP socket upon destruction. This is not necessary with Geneve and generally makes the code more difficult to reason about. It also introduces nondeterministic behavior such as when a socket is rapidly deleted and recreated, which could fail as the the deletion happens asynchronously. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 19e256e..136a829 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -61,8 +61,6 @@ struct geneve_net { static int geneve_net_id; -static struct workqueue_struct *geneve_wq; - static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) { return (struct genevehdr *)(udp_hdr(skb) + 1); @@ -307,15 +305,6 @@ error: return 1; } -static void geneve_del_work(struct work_struct *work) -{ - struct geneve_sock *gs = container_of(work, struct geneve_sock, - del_work); - - udp_tunnel_sock_release(gs->sock); - kfree_rcu(gs, rcu); -} - static struct socket *geneve_create_sock(struct net *net, bool ipv6, __be16 port) { @@ -356,8 +345,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, if (!gs) return ERR_PTR(-ENOMEM); - INIT_WORK(&gs->del_work, geneve_del_work); - sock = geneve_create_sock(net, ipv6, port); if (IS_ERR(sock)) { kfree(gs); @@ -430,7 +417,8 @@ void geneve_sock_release(struct geneve_sock *gs) geneve_notify_del_rx_port(gs); spin_unlock(&gn->sock_lock); - queue_work(geneve_wq, &gs->del_work); + udp_tunnel_sock_release(gs->sock); + kfree_rcu(gs, rcu); } EXPORT_SYMBOL_GPL(geneve_sock_release); @@ -458,10 +446,6 @@ static int __init geneve_init_module(void) { int rc; - geneve_wq = alloc_workqueue("geneve", 0, 0); - if (!geneve_wq) - return -ENOMEM; - rc = register_pernet_subsys(&geneve_net_ops); if (rc) return rc; @@ -474,7 +458,6 @@ late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { - destroy_workqueue(geneve_wq); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); -- cgit v1.1 From 829a3ada9cc7d4c30fa61f8033403fb6c8f8092a Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 2 Jan 2015 18:26:03 -0800 Subject: geneve: Simplify locking. The existing Geneve locking scheme was pulled over directly from VXLAN. However, VXLAN has a number of built in mechanisms which make the locking more complex and are unlikely to be necessary with Geneve. This simplifies the locking to use a basic scheme of a mutex when doing updates plus RCU on receive. In addition to making the code easier to read, this also avoids the possibility of a race when creating or destroying sockets since UDP sockets and the list of Geneve sockets are protected by different locks. After this change, the entire operation is atomic. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 59 ++++++++++++++++++++++++------------------------------- 1 file changed, 26 insertions(+), 33 deletions(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 136a829..ad8dbae 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -50,13 +51,15 @@ #include #endif +/* Protects sock_list and refcounts. */ +static DEFINE_MUTEX(geneve_mutex); + #define PORT_HASH_BITS 8 #define PORT_HASH_SIZE (1<sock->sk)->inet_sport == port) return gs; } @@ -336,7 +339,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, geneve_rcv_t *rcv, void *data, bool ipv6) { - struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; @@ -352,7 +354,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, } gs->sock = sock; - atomic_set(&gs->refcnt, 1); + gs->refcnt = 1; gs->rcv = rcv; gs->rcv_data = data; @@ -360,11 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, gs->udp_offloads.port = port; gs->udp_offloads.callbacks.gro_receive = geneve_gro_receive; gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete; - - spin_lock(&gn->sock_lock); - hlist_add_head_rcu(&gs->hlist, gs_head(net, port)); geneve_notify_add_rx_port(gs); - spin_unlock(&gn->sock_lock); /* Mark socket as an encapsulation socket */ tunnel_cfg.sk_user_data = gs; @@ -373,6 +371,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); + hlist_add_head(&gs->hlist, gs_head(net, port)); + return gs; } @@ -380,25 +380,21 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, geneve_rcv_t *rcv, void *data, bool no_share, bool ipv6) { - struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; - gs = geneve_socket_create(net, port, rcv, data, ipv6); - if (!IS_ERR(gs)) - return gs; - - if (no_share) /* Return error if sharing is not allowed. */ - return ERR_PTR(-EINVAL); + mutex_lock(&geneve_mutex); - spin_lock(&gn->sock_lock); gs = geneve_find_sock(net, port); - if (gs && ((gs->rcv != rcv) || - !atomic_add_unless(&gs->refcnt, 1, 0))) + if (gs) { + if (!no_share && gs->rcv == rcv) + gs->refcnt++; + else gs = ERR_PTR(-EBUSY); - spin_unlock(&gn->sock_lock); + } else { + gs = geneve_socket_create(net, port, rcv, data, ipv6); + } - if (!gs) - gs = ERR_PTR(-EINVAL); + mutex_unlock(&geneve_mutex); return gs; } @@ -406,19 +402,18 @@ EXPORT_SYMBOL_GPL(geneve_sock_add); void geneve_sock_release(struct geneve_sock *gs) { - struct net *net = sock_net(gs->sock->sk); - struct geneve_net *gn = net_generic(net, geneve_net_id); + mutex_lock(&geneve_mutex); - if (!atomic_dec_and_test(&gs->refcnt)) - return; + if (--gs->refcnt) + goto unlock; - spin_lock(&gn->sock_lock); - hlist_del_rcu(&gs->hlist); + hlist_del(&gs->hlist); geneve_notify_del_rx_port(gs); - spin_unlock(&gn->sock_lock); - udp_tunnel_sock_release(gs->sock); kfree_rcu(gs, rcu); + +unlock: + mutex_unlock(&geneve_mutex); } EXPORT_SYMBOL_GPL(geneve_sock_release); @@ -427,8 +422,6 @@ static __net_init int geneve_init_net(struct net *net) struct geneve_net *gn = net_generic(net, geneve_net_id); unsigned int h; - spin_lock_init(&gn->sock_lock); - for (h = 0; h < PORT_HASH_SIZE; ++h) INIT_HLIST_HEAD(&gn->sock_list[h]); @@ -454,7 +447,7 @@ static int __init geneve_init_module(void) return 0; } -late_initcall(geneve_init_module); +module_init(geneve_init_module); static void __exit geneve_cleanup_module(void) { -- cgit v1.1 From df5dba8e52be50e615e03ef73b34611d82587f42 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 2 Jan 2015 18:26:04 -0800 Subject: geneve: Remove socket hash table. The hash table for open Geneve ports is used only on creation and deletion time. It is not performance critical and is not likely to grow to a large number of items. Therefore, this can be changed to use a simple linked list. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index ad8dbae..4fe5a59 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -54,12 +53,9 @@ /* Protects sock_list and refcounts. */ static DEFINE_MUTEX(geneve_mutex); -#define PORT_HASH_BITS 8 -#define PORT_HASH_SIZE (1<sock_list[hash_32(ntohs(port), PORT_HASH_BITS)]; -} - /* Find geneve socket based on network namespace and UDP port */ static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) { + struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; - hlist_for_each_entry(gs, gs_head(net, port), hlist) { + list_for_each_entry(gs, &gn->sock_list, list) { if (inet_sk(gs->sock->sk)->inet_sport == port) return gs; } @@ -339,6 +329,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, geneve_rcv_t *rcv, void *data, bool ipv6) { + struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; @@ -371,7 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); - hlist_add_head(&gs->hlist, gs_head(net, port)); + list_add(&gs->list, &gn->sock_list); return gs; } @@ -407,7 +398,7 @@ void geneve_sock_release(struct geneve_sock *gs) if (--gs->refcnt) goto unlock; - hlist_del(&gs->hlist); + list_del(&gs->list); geneve_notify_del_rx_port(gs); udp_tunnel_sock_release(gs->sock); kfree_rcu(gs, rcu); @@ -420,17 +411,14 @@ EXPORT_SYMBOL_GPL(geneve_sock_release); static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); - unsigned int h; - for (h = 0; h < PORT_HASH_SIZE; ++h) - INIT_HLIST_HEAD(&gn->sock_list[h]); + INIT_LIST_HEAD(&gn->sock_list); return 0; } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, - .exit = NULL, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; -- cgit v1.1 From 46b1e4f9115d48d17bb92f612e4fa45a521a0537 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Fri, 2 Jan 2015 18:26:05 -0800 Subject: geneve: Check family when reusing sockets. When searching for an existing socket to reuse, the address family is not taken into account - only port number. This means that an IPv4 socket could be used for IPv6 traffic and vice versa, which is sure to cause problems when passing packets. It is not possible to trigger this problem currently because the only user of Geneve creates just IPv4 sockets. However, that is likely to change in the near future. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 4fe5a59..5b52046 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -65,14 +65,15 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) return (struct genevehdr *)(udp_hdr(skb) + 1); } -/* Find geneve socket based on network namespace and UDP port */ -static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port) +static struct geneve_sock *geneve_find_sock(struct net *net, + sa_family_t family, __be16 port) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; list_for_each_entry(gs, &gn->sock_list, list) { - if (inet_sk(gs->sock->sk)->inet_sport == port) + if (inet_sk(gs->sock->sk)->inet_sport == port && + inet_sk(gs->sock->sk)->sk.sk_family == family) return gs; } @@ -375,7 +376,7 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port, mutex_lock(&geneve_mutex); - gs = geneve_find_sock(net, port); + gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); if (gs) { if (!no_share && gs->rcv == rcv) gs->refcnt++; -- cgit v1.1 From df8a39defad46b83694ea6dd868d332976d62cc0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 13 Jan 2015 17:13:44 +0100 Subject: net: rename vlan_tx_* helpers since "tx" is misleading there The same macros are used for rx as well. So rename it. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 5b52046..23744c7 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -119,7 +119,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) - + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); err = skb_cow_head(skb, min_headroom); if (unlikely(err)) { -- cgit v1.1 From a2b12f3c7ac1ea43ae646db74faf0b56c2bba563 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 12 Jan 2015 17:00:37 -0800 Subject: udp: pass udp_offload struct to UDP gro callbacks This patch introduces udp_offload_callbacks which has the same GRO functions (but not a GSO function) as offload_callbacks, except there is an argument to a udp_offload struct passed to gro_receive and gro_complete functions. This additional argument can be used to retrieve the per port structure of the encapsulation for use in gro processing (mostly by doing container_of on the structure). Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 23744c7..9568594 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -147,7 +147,8 @@ static int geneve_hlen(struct genevehdr *gh) } static struct sk_buff **geneve_gro_receive(struct sk_buff **head, - struct sk_buff *skb) + struct sk_buff *skb, + struct udp_offload *uoff) { struct sk_buff *p, **pp = NULL; struct genevehdr *gh, *gh2; @@ -211,7 +212,8 @@ out: return pp; } -static int geneve_gro_complete(struct sk_buff *skb, int nhoff) +static int geneve_gro_complete(struct sk_buff *skb, int nhoff, + struct udp_offload *uoff) { struct genevehdr *gh; struct packet_offload *ptype; -- cgit v1.1 From d998f8efa47221405ceae129aa93fa6d4ac8510d Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 20 Jan 2015 11:23:04 -0800 Subject: udp: Do not require sock in udp_tunnel_xmit_skb The UDP tunnel transmit functions udp_tunnel_xmit_skb and udp_tunnel6_xmit_skb include a socket argument. The socket being passed to the functions (from VXLAN) is a UDP created for receive side. The only thing that the socket is used for in the transmit functions is to get the setting for checksum (enabled or zero). This patch removes the argument and and adds a nocheck argument for checksum setting. This eliminates the unnecessary dependency on a UDP socket for UDP tunnel transmit. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 9568594..93e5119 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -136,8 +136,9 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, - tos, ttl, df, src_port, dst_port, xnet); + return udp_tunnel_xmit_skb(rt, skb, src, dst, + tos, ttl, df, src_port, dst_port, xnet, + gs->sock->sk->sk_no_check_tx); } EXPORT_SYMBOL_GPL(geneve_xmit_skb); -- cgit v1.1 From b8693877ae016ac525d674d5d7a84ea0ea68ba60 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 28 Jan 2015 16:32:46 -0800 Subject: openvswitch: Add support for checksums on UDP tunnels. Currently, it isn't possible to request checksums on the outer UDP header of tunnels - the TUNNEL_CSUM flag is ignored. This adds support for requesting that UDP checksums be computed on transmit and properly reported if they are present on receive. Signed-off-by: Jesse Gross Signed-off-by: David S. Miller --- net/ipv4/geneve.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/geneve.c') diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index 93e5119..5a4828b 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -107,13 +107,13 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, - bool xnet) + bool csum, bool xnet) { struct genevehdr *gnvh; int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); + skb = udp_tunnel_handle_offloads(skb, csum); if (IS_ERR(skb)) return PTR_ERR(skb); @@ -138,7 +138,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, return udp_tunnel_xmit_skb(rt, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet, - gs->sock->sk->sk_no_check_tx); + !csum); } EXPORT_SYMBOL_GPL(geneve_xmit_skb); -- cgit v1.1