From f3f121359caa069cefbc48008e94bcd862ca21e2 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Mon, 22 Oct 2012 21:41:48 +0000 Subject: ipv6: tcp: clean up tcp_v6_early_demux() icsk variable Remove an icsk variable, which by convention should refer to an inet_connection_sock rather than an inet_sock. In the process, make the tcp_v6_early_demux() code and formatting a bit more like tcp_v4_early_demux(), to ease comparisons and maintenance. Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 26175bf..bb6782e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1741,11 +1741,11 @@ static void tcp_v6_early_demux(struct sk_buff *skb) skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { struct dst_entry *dst = sk->sk_rx_dst; - struct inet_sock *icsk = inet_sk(sk); + if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst && - icsk->rx_dst_ifindex == skb->skb_iif) + inet_sk(sk)->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } -- cgit v1.1 From e6c022a4fa2d2d9ca9d0a7ac3b05ad988f39fc30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Oct 2012 23:16:46 +0000 Subject: tcp: better retrans tracking for defer-accept For passive TCP connections using TCP_DEFER_ACCEPT facility, we incorrectly increment req->retrans each time timeout triggers while no SYNACK is sent. SYNACK are not sent for TCP_DEFER_ACCEPT that were established (for which we received the ACK from client). Only the last SYNACK is sent so that we can receive again an ACK from client, to move the req into accept queue. We plan to change this later to avoid the useless retransmit (and potential problem as this SYNACK could be lost) TCP_INFO later gives wrong information to user, claiming imaginary retransmits. Decouple req->retrans field into two independent fields : num_retrans : number of retransmit num_timeout : number of timeouts num_timeout is the counter that is incremented at each timeout, regardless of actual SYNACK being sent or not, and used to compute the exponential timeout. Introduce inet_rtx_syn_ack() helper to increment num_retrans only if ->rtx_syn_ack() succeeded. Use inet_rtx_syn_ack() from tcp_check_req() to increment num_retrans when we re-send a SYNACK in answer to a (retransmitted) SYN. Prior to this patch, we were not counting these retransmits. Change tcp_v[46]_rtx_synack() to increment TCP_MIB_RETRANSSEGS only if a synack packet was successfully queued. Reported-by: Yuchung Cheng Signed-off-by: Eric Dumazet Cc: Julian Anastasov Cc: Vijay Subramanian Cc: Elliott Hughes Cc: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb6782e..c73d0eb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -495,9 +495,12 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, struct request_values *rvp) { struct flowi6 fl6; + int res; - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); - return tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + if (!res) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + return res; } static void tcp_v6_reqsk_destructor(struct request_sock *req) @@ -1364,7 +1367,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_initialize_rcv_mss(newsk); tcp_synack_rtt_meas(newsk, req); - newtp->total_retrans = req->retrans; + newtp->total_retrans = req->num_retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; @@ -1866,7 +1869,7 @@ static void get_openreq6(struct seq_file *seq, 0,0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), - req->retrans, + req->num_timeout, from_kuid_munged(seq_user_ns(seq), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ -- cgit v1.1 From 8ca896cfdd17f32f5aa2747644733ebf3725360d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:13 +0000 Subject: ipv6: Add new offload registration infrastructure. Create a new data structure for IPv6 protocols that holds GRO/GSO callbacks and a new array to track the protocols that register GRO/GSO. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c73d0eb..6884a95 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2073,6 +2073,13 @@ static const struct inet6_protocol tcpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; +static const struct net_offload tcpv6_offload = { + .gso_send_check = tcp_v6_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp6_gro_receive, + .gro_complete = tcp6_gro_complete, +}; + static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, -- cgit v1.1 From 3336288a9feaa809839adbaf05778dc2f16665dc Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:15 +0000 Subject: ipv6: Switch to using new offload infrastructure. Switch IPv6 protocol to using the new GRO/GSO calls and data. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6884a95..635206e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2066,10 +2066,6 @@ static const struct inet6_protocol tcpv6_protocol = { .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, - .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp6_gro_receive, - .gro_complete = tcp6_gro_complete, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; @@ -2116,10 +2112,14 @@ int __init tcpv6_init(void) { int ret; - ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); + ret = inet6_add_offload(&tcpv6_offload, IPPROTO_TCP); if (ret) goto out; + ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); + if (ret) + goto out_offload; + /* register inet6 protocol */ ret = inet6_register_protosw(&tcpv6_protosw); if (ret) @@ -2131,10 +2131,12 @@ int __init tcpv6_init(void) out: return ret; -out_tcpv6_protocol: - inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); +out_tcpv6_protocol: + inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); +out_offload: + inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); goto out; } @@ -2143,4 +2145,5 @@ void tcpv6_exit(void) unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); + inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); } -- cgit v1.1 From 8663e02aba154e04679c9bb1665af52021d32547 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:17 +0000 Subject: ipv6: Separate tcp offload functionality Pull TCPv6 offload functionality into its won file in preparation for moving it out of the module. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 113 ++-------------------------------------------------- 1 file changed, 4 insertions(+), 109 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 635206e..5bed594 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -71,15 +71,13 @@ #include #include +#include "ip6_offload.h" static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req); static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); -static void __tcp_v6_send_check(struct sk_buff *skb, - const struct in6_addr *saddr, - const struct in6_addr *daddr); static const struct inet_connection_sock_af_ops ipv6_mapped; static const struct inet_connection_sock_af_ops ipv6_specific; @@ -119,14 +117,6 @@ static void tcp_v6_hash(struct sock *sk) } } -static __inline__ __sum16 tcp_v6_check(int len, - const struct in6_addr *saddr, - const struct in6_addr *daddr, - __wsum base) -{ - return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); -} - static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, @@ -722,94 +712,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { }; #endif -static void __tcp_v6_send_check(struct sk_buff *skb, - const struct in6_addr *saddr, const struct in6_addr *daddr) -{ - struct tcphdr *th = tcp_hdr(skb); - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - } else { - th->check = tcp_v6_check(skb->len, saddr, daddr, - csum_partial(th, th->doff << 2, - skb->csum)); - } -} - -static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - - __tcp_v6_send_check(skb, &np->saddr, &np->daddr); -} - -static int tcp_v6_gso_send_check(struct sk_buff *skb) -{ - const struct ipv6hdr *ipv6h; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - ipv6h = ipv6_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr); - return 0; -} - -static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, - struct sk_buff *skb) -{ - const struct ipv6hdr *iph = skb_gro_network_header(skb); - __wsum wsum; - __sum16 sum; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } -flush: - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - - return tcp_gro_receive(head, skb); -} - -static int tcp6_gro_complete(struct sk_buff *skb) -{ - const struct ipv6hdr *iph = ipv6_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), - &iph->saddr, &iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; - - return tcp_gro_complete(skb); -} - static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, struct tcp_md5sig_key *key, int rst, u8 tclass) { @@ -2069,13 +1971,6 @@ static const struct inet6_protocol tcpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static const struct net_offload tcpv6_offload = { - .gso_send_check = tcp_v6_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp6_gro_receive, - .gro_complete = tcp6_gro_complete, -}; - static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, @@ -2112,7 +2007,7 @@ int __init tcpv6_init(void) { int ret; - ret = inet6_add_offload(&tcpv6_offload, IPPROTO_TCP); + ret = tcpv6_offload_init(); if (ret) goto out; @@ -2136,7 +2031,7 @@ out_tcpv6_protosw: out_tcpv6_protocol: inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); out_offload: - inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); + tcpv6_offload_cleanup(); goto out; } @@ -2145,5 +2040,5 @@ void tcpv6_exit(void) unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); - inet6_del_offload(&tcpv6_offload, IPPROTO_TCP); + tcpv6_offload_cleanup(); } -- cgit v1.1 From c6b641a4c6b32f39db678c2441cb1ef824110d74 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 15 Nov 2012 08:49:22 +0000 Subject: ipv6: Pull IPv6 GSO registration out of the module Sing GSO support is now separate, pull it out of the module and make it its own init call. Remove the cleanup functions as they are no longer called. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5bed594..6c0f252 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -71,7 +71,6 @@ #include #include -#include "ip6_offload.h" static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, @@ -2007,13 +2006,9 @@ int __init tcpv6_init(void) { int ret; - ret = tcpv6_offload_init(); - if (ret) - goto out; - ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); if (ret) - goto out_offload; + goto out; /* register inet6 protocol */ ret = inet6_register_protosw(&tcpv6_protosw); @@ -2030,8 +2025,6 @@ out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); out_tcpv6_protocol: inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); -out_offload: - tcpv6_offload_cleanup(); goto out; } @@ -2040,5 +2033,4 @@ void tcpv6_exit(void) unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); - tcpv6_offload_cleanup(); } -- cgit v1.1 From 2b9164771efe191c4ef266ae53c8c05ab92dd115 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Thu, 22 Nov 2012 01:13:58 +0000 Subject: ipv6: adapt connect for repair move This is work the same as for ipv4. All other hacks about tcp repair are in common code for ipv4 and ipv6, so this patch is enough for repairing ipv6 connections. Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Pavel Emelyanov Signed-off-by: Andrey Vagin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6c0f252..6565cf5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -295,7 +295,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) goto late_failure; - if (!tp->write_seq) + if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, inet->inet_sport, -- cgit v1.1 From e337e24d6624e74a558aa69071e112a65f7b5758 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Fri, 14 Dec 2012 04:07:58 +0000 Subject: inet: Fix kmemleak in tcp_v4/6_syn_recv_sock and dccp_v4/6_request_recv_sock If in either of the above functions inet_csk_route_child_sock() or __inet_inherit_port() fails, the newsk will not be freed: unreferenced object 0xffff88022e8a92c0 (size 1592): comm "softirq", pid 0, jiffies 4294946244 (age 726.160s) hex dump (first 32 bytes): 0a 01 01 01 0a 01 01 02 00 00 00 00 a7 cc 16 00 ................ 02 00 03 01 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x21/0x3e [] kmem_cache_alloc+0xb5/0xc5 [] sk_prot_alloc.isra.53+0x2b/0xcd [] sk_clone_lock+0x16/0x21e [] inet_csk_clone_lock+0x10/0x7b [] tcp_create_openreq_child+0x21/0x481 [] tcp_v4_syn_recv_sock+0x3a/0x23b [] tcp_check_req+0x29f/0x416 [] tcp_v4_do_rcv+0x161/0x2bc [] tcp_v4_rcv+0x6c9/0x701 [] ip_local_deliver_finish+0x70/0xc4 [] ip_local_deliver+0x4e/0x7f [] ip_rcv_finish+0x1fc/0x233 [] ip_rcv+0x217/0x267 [] __netif_receive_skb+0x49e/0x553 [] netif_receive_skb+0x50/0x82 This happens, because sk_clone_lock initializes sk_refcnt to 2, and thus a single sock_put() is not enough to free the memory. Additionally, things like xfrm, memcg, cookie_values,... may have been initialized. We have to free them properly. This is fixed by forcing a call to tcp_done(), ending up in inet_csk_destroy_sock, doing the final sock_put(). tcp_done() is necessary, because it ends up doing all the cleanup on xfrm, memcg, cookie_values, xfrm,... Before calling tcp_done, we have to set the socket to SOCK_DEAD, to force it entering inet_csk_destroy_sock. To avoid the warning in inet_csk_destroy_sock, inet_num has to be set to 0. As inet_csk_destroy_sock does a dec on orphan_count, we first have to increase it. Calling tcp_done() allows us to remove the calls to tcp_clear_xmit_timer() and tcp_cleanup_congestion_control(). A similar approach is taken for dccp by calling dccp_done(). This is in the kernel since 093d282321 (tproxy: fix hash locking issue when using port redirection in __inet_inherit_port()), thus since version >= 2.6.37. Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6/tcp_ipv6.c') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6565cf5..93825dd 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1288,7 +1288,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #endif if (__inet_inherit_port(sk, newsk) < 0) { - sock_put(newsk); + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); goto out; } __inet6_hash(newsk, NULL); -- cgit v1.1