From 105970f6087ae240b00deaff85773ed9bf381145 Mon Sep 17 00:00:00 2001 From: Kenjiro Nakayama Date: Mon, 20 Oct 2014 18:15:50 +0900 Subject: net: Remove trailing whitespace in tcp.h icmp.c syncookies.c Remove trailing whitespace in tcp.h icmp.c syncookies.c Signed-off-by: Kenjiro Nakayama Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 1 - net/ipv6/syncookies.c | 1 - 2 files changed, 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 97ae700..62c1037 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -1009,4 +1009,3 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) return table; } #endif - diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 2f25cb6..0e26e79 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -269,4 +269,3 @@ out_free: reqsk_free(req); return NULL; } - -- cgit v1.1 From a3c00e46efdb4009369971c97203ea67f7630fe4 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 20 Oct 2014 13:42:43 -0700 Subject: ipv6: Remove BACKTRACK macro It is the prep work to reduce the number of calls to fib6_lookup(). The BACKTRACK macro could be hard-to-read and error-prone due to its side effects (mainly goto). This patch is to: 1. Replace BACKTRACK macro with a function (fib6_backtrack) with the following return values: * If it is backtrack-able, returns next fn for retry. * If it reaches the root, returns NULL. 2. The caller needs to decide if a backtrack is needed (by testing rt == net->ipv6.ip6_null_entry). 3. Rename the goto labels in ip6_pol_route() to make the next few patches easier to read. Cc: David Miller Cc: Hannes Frederic Sowa Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 70 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 30 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a318dd89..f1ab2f4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -772,23 +772,22 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK(__net, saddr) \ -do { \ - if (rt == __net->ipv6.ip6_null_entry) { \ - struct fib6_node *pn; \ - while (1) { \ - if (fn->fn_flags & RTN_TL_ROOT) \ - goto out; \ - pn = fn->parent; \ - if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ - fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ - else \ - fn = pn; \ - if (fn->fn_flags & RTN_RTINFO) \ - goto restart; \ - } \ - } \ -} while (0) +static struct fib6_node* fib6_backtrack(struct fib6_node *fn, + struct in6_addr *saddr) +{ + struct fib6_node *pn; + while (1) { + if (fn->fn_flags & RTN_TL_ROOT) + return NULL; + pn = fn->parent; + if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) + fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); + else + fn = pn; + if (fn->fn_flags & RTN_RTINFO) + return fn; + } +} static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, @@ -804,8 +803,11 @@ restart: rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); - BACKTRACK(net, &fl6->saddr); -out: + if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto restart; + } dst_use(&rt->dst, jiffies); read_unlock_bh(&table->tb6_lock); return rt; @@ -924,19 +926,25 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, strict |= flags & RT6_LOOKUP_F_IFACE; -relookup: +redo_fib6_lookup_lock: read_lock_bh(&table->tb6_lock); -restart_2: +redo_fib6_lookup: fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); -restart: +redo_rt6_select: rt = rt6_select(fn, oif, strict | reachable); if (rt->rt6i_nsiblings) rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); - BACKTRACK(net, &fl6->saddr); - if (rt == net->ipv6.ip6_null_entry || - rt->rt6i_flags & RTF_CACHE) + if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto redo_rt6_select; + else + goto out; + } + + if (rt->rt6i_flags & RTF_CACHE) goto out; dst_hold(&rt->dst); @@ -967,12 +975,12 @@ restart: * released someone could insert this route. Relookup. */ ip6_rt_put(rt); - goto relookup; + goto redo_fib6_lookup_lock; out: if (reachable) { reachable = 0; - goto restart_2; + goto redo_fib6_lookup; } dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); @@ -1235,10 +1243,12 @@ restart: rt = net->ipv6.ip6_null_entry; else if (rt->dst.error) { rt = net->ipv6.ip6_null_entry; - goto out; + } else if (rt == net->ipv6.ip6_null_entry) { + fn = fib6_backtrack(fn, &fl6->saddr); + if (fn) + goto restart; } - BACKTRACK(net, &fl6->saddr); -out: + dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); -- cgit v1.1 From 94c77bb41d871deb848e5011aacb5d7c24358ddd Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 20 Oct 2014 13:42:44 -0700 Subject: ipv6: Avoid redoing fib6_lookup() for RTF_CACHE hit case When there is a RTF_CACHE hit, no need to redo fib6_lookup() with reachable=0. Cc: David Miller Cc: Hannes Frederic Sowa Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f1ab2f4..98c523f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -944,12 +944,12 @@ redo_rt6_select: goto out; } - if (rt->rt6i_flags & RTF_CACHE) - goto out; - dst_hold(&rt->dst); read_unlock_bh(&table->tb6_lock); + if (rt->rt6i_flags & RTF_CACHE) + goto out2; + if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); else if (!(rt->dst.flags & DST_HOST)) -- cgit v1.1 From 367efcb932c1cfc134d5b1fd9db8665ae5e6a251 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 20 Oct 2014 13:42:45 -0700 Subject: ipv6: Avoid redoing fib6_lookup() with reachable = 0 by saving fn This patch save the fn before doing rt6_backtrack. Hence, without redo-ing the fib6_lookup(), saved_fn can be used to redo rt6_select() with RT6_LOOKUP_F_REACHABLE off. Some minor changes I think make sense to review as a single patch: * Remove the 'out:' goto label. * Remove the 'reachable' variable. Only use the 'strict' variable instead. After this patch, "failing ip6_ins_rt()" should be the only case that requires a redo of fib6_lookup(). Cc: David Miller Cc: Hannes Frederic Sowa Signed-off-by: Martin KaFai Lau Signed-off-by: David S. Miller --- net/ipv6/route.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 98c523f..c910831 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -917,31 +917,40 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { - struct fib6_node *fn; + struct fib6_node *fn, *saved_fn; struct rt6_info *rt, *nrt; int strict = 0; int attempts = 3; int err; - int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; strict |= flags & RT6_LOOKUP_F_IFACE; + if (net->ipv6.devconf_all->forwarding == 0) + strict |= RT6_LOOKUP_F_REACHABLE; redo_fib6_lookup_lock: read_lock_bh(&table->tb6_lock); -redo_fib6_lookup: fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + saved_fn = fn; redo_rt6_select: - rt = rt6_select(fn, oif, strict | reachable); + rt = rt6_select(fn, oif, strict); if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); + rt = rt6_multipath_select(rt, fl6, oif, strict); if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; - else - goto out; + else if (strict & RT6_LOOKUP_F_REACHABLE) { + /* also consider unreachable route */ + strict &= ~RT6_LOOKUP_F_REACHABLE; + fn = saved_fn; + goto redo_rt6_select; + } else { + dst_hold(&rt->dst); + read_unlock_bh(&table->tb6_lock); + goto out2; + } } dst_hold(&rt->dst); @@ -977,13 +986,6 @@ redo_rt6_select: ip6_rt_put(rt); goto redo_fib6_lookup_lock; -out: - if (reachable) { - reachable = 0; - goto redo_fib6_lookup; - } - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); out2: rt->dst.lastuse = jiffies; rt->dst.__use++; -- cgit v1.1 From 9451a304ceeb96c45f91f88f2caa7364f5cee087 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 27 Oct 2014 19:11:56 +0100 Subject: ipv6: replace min/casting by min_t Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 725c763..50b95b2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2315,8 +2315,8 @@ ok: else stored_lft = 0; if (!update_lft && !create && stored_lft) { - const u32 minimum_lft = min( - stored_lft, (u32)MIN_VALID_LIFETIME); + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); valid_lft = max(valid_lft, minimum_lft); /* RFC4862 Section 5.5.3e: -- cgit v1.1 From ce256981e5f1be1f0aa6b4b8b16e0c2918468457 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 27 Oct 2014 19:12:58 +0100 Subject: ipv6: include linux/uaccess.h instead of asm/uaccess.h Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller --- net/ipv6/exthdrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index bfde361..601d896 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -47,7 +47,7 @@ #include #endif -#include +#include /* * Parsing tlv encoded headers. -- cgit v1.1 From 9de920eddb74bf67f1d6af603acc5ed05dcd35e9 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Fri, 17 Oct 2014 12:37:52 +0200 Subject: netfilter: refactor NAT redirect IPv6 code to use it from nf_tables This patch refactors the IPv6 code so it can be usable both from xt and nf_tables. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/Kconfig | 6 +++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nf_nat_redirect_ipv6.c | 75 +++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) create mode 100644 net/ipv6/netfilter/nf_nat_redirect_ipv6.c (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6af874f..462eebb 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -82,6 +82,12 @@ config NF_NAT_MASQUERADE_IPV6 This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection) for IPv6. +config NF_NAT_REDIRECT_IPV6 + tristate "IPv6 redirect support" + help + This is the kernel functionality to provide NAT in the redirect + flavour (redirect packet to local machine) for IPv6. + config NFT_MASQ_IPV6 tristate "IPv6 masquerade support for nf_tables" depends on NF_TABLES_IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fbb25f0..6c2baab 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -19,6 +19,7 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o +obj-$(CONFIG_NF_NAT_REDIRECT_IPV6) += nf_nat_redirect_ipv6.o # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/nf_nat_redirect_ipv6.c b/net/ipv6/netfilter/nf_nat_redirect_ipv6.c new file mode 100644 index 0000000..ea1308a --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_redirect_ipv6.c @@ -0,0 +1,75 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +unsigned int +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, + unsigned int hooknum) +{ + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (hooknum == NF_INET_LOCAL_OUT) { + newdst = loopback_addr; + } else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} +EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); -- cgit v1.1 From e9105f1bead4ec3f64904564c7c6268185d6b363 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Fri, 17 Oct 2014 12:39:09 +0200 Subject: netfilter: nf_tables: add new expression nft_redir This new expression provides NAT in the redirect flavour, which is to redirect packets to local machine. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/Kconfig | 9 +++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/nft_redir_ipv6.c | 77 +++++++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 net/ipv6/netfilter/nft_redir_ipv6.c (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 462eebb..0dbe5c7 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -97,6 +97,15 @@ config NFT_MASQ_IPV6 This is the expression that provides IPv4 masquerading support for nf_tables. +config NFT_REDIR_IPV6 + tristate "IPv6 redirect support for nf_tables" + depends on NF_TABLES_IPV6 + depends on NFT_REDIR + select NF_NAT_REDIRECT_IPV6 + help + This is the expression that provides IPv4 redirect support for + nf_tables. + endif # NF_NAT_IPV6 config IP6_NF_IPTABLES diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 6c2baab..d2ac9f5 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o +obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c new file mode 100644 index 0000000..83420ee --- /dev/null +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2014 Arturo Borrero Gonzalez + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void nft_redir_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_redir *priv = nft_expr_priv(expr); + struct nf_nat_range range; + unsigned int verdict; + + memset(&range, 0, sizeof(range)); + if (priv->sreg_proto_min) { + range.min_proto.all = (__force __be16) + data[priv->sreg_proto_min].data[0]; + range.max_proto.all = (__force __be16) + data[priv->sreg_proto_max].data[0]; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + } + + range.flags |= priv->flags; + + verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); + data[NFT_REG_VERDICT].verdict = verdict; +} + +static struct nft_expr_type nft_redir_ipv6_type; +static const struct nft_expr_ops nft_redir_ipv6_ops = { + .type = &nft_redir_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)), + .eval = nft_redir_ipv6_eval, + .init = nft_redir_init, + .dump = nft_redir_dump, + .validate = nft_redir_validate, +}; + +static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "redir", + .ops = &nft_redir_ipv6_ops, + .policy = nft_redir_policy, + .maxattr = NFTA_REDIR_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_redir_ipv6_module_init(void) +{ + return nft_register_expr(&nft_redir_ipv6_type); +} + +static void __exit nft_redir_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_redir_ipv6_type); +} + +module_init(nft_redir_ipv6_module_init); +module_exit(nft_redir_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arturo Borrero Gonzalez "); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); -- cgit v1.1 From 7fd2561e4ebdd070ebba6d3326c4c5b13942323f Mon Sep 17 00:00:00 2001 From: Erik Kline Date: Tue, 28 Oct 2014 18:11:14 +0900 Subject: net: ipv6: Add a sysctl to make optimistic addresses useful candidates Add a sysctl that causes an interface's optimistic addresses to be considered equivalent to other non-deprecated addresses for source address selection purposes. Preferred addresses will still take precedence over optimistic addresses, subject to other ranking in the source address selection algorithm. This is useful where different interfaces are connected to different networks from different ISPs (e.g., a cell network and a home wifi network). The current behaviour complies with RFC 3484/6724, and it makes sense if the host has only one interface, or has multiple interfaces on the same network (same or cooperating administrative domain(s), but not in the multiple distinct networks case. For example, if a mobile device has an IPv6 address on an LTE network and then connects to IPv6-enabled wifi, while the wifi IPv6 address is undergoing DAD, IPv6 connections will try use the wifi default route with the LTE IPv6 address, and will get stuck until they time out. Also, because optimistic nodes can receive frames, issue an RTM_NEWADDR as soon as DAD starts (with the IFA_F_OPTIMSTIC flag appropriately set). A second RTM_NEWADDR is sent if DAD completes (the address flags have changed), otherwise an RTM_DELADDR is sent. Also: add an entry in ip-sysctl.txt for optimistic_dad. Signed-off-by: Erik Kline Acked-by: Lorenzo Colitti Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 50b95b2..8d12b7c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1170,6 +1170,9 @@ enum { IPV6_SADDR_RULE_PRIVACY, IPV6_SADDR_RULE_ORCHID, IPV6_SADDR_RULE_PREFIX, +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + IPV6_SADDR_RULE_NOT_OPTIMISTIC, +#endif IPV6_SADDR_RULE_MAX }; @@ -1197,6 +1200,15 @@ static inline int ipv6_saddr_preferred(int type) return 0; } +static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) +{ +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; +#else + return false; +#endif +} + static int ipv6_get_saddr_eval(struct net *net, struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, @@ -1257,10 +1269,16 @@ static int ipv6_get_saddr_eval(struct net *net, score->scopedist = ret; break; case IPV6_SADDR_RULE_PREFERRED: + { /* Rule 3: Avoid deprecated and optimistic addresses */ + u8 avoid = IFA_F_DEPRECATED; + + if (!ipv6_use_optimistic_addr(score->ifa->idev)) + avoid |= IFA_F_OPTIMISTIC; ret = ipv6_saddr_preferred(score->addr_type) || - !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)); + !(score->ifa->flags & avoid); break; + } #ifdef CONFIG_IPV6_MIP6 case IPV6_SADDR_RULE_HOA: { @@ -1306,6 +1324,14 @@ static int ipv6_get_saddr_eval(struct net *net, ret = score->ifa->prefix_len; score->matchlen = ret; break; +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + case IPV6_SADDR_RULE_NOT_OPTIMISTIC: + /* Optimistic addresses still have lower precedence than other + * preferred addresses. + */ + ret = !(score->ifa->flags & IFA_F_OPTIMISTIC); + break; +#endif default: ret = 0; } @@ -3222,8 +3248,15 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) * Optimistic nodes can start receiving * Frames right away */ - if (ifp->flags & IFA_F_OPTIMISTIC) + if (ifp->flags & IFA_F_OPTIMISTIC) { ip6_ins_rt(ifp->rt); + if (ipv6_use_optimistic_addr(idev)) { + /* Because optimistic nodes can use this address, + * notify listeners. If DAD fails, RTM_DELADDR is sent. + */ + ipv6_ifa_notify(RTM_NEWADDR, ifp); + } + } addrconf_dad_kick(ifp); out: @@ -4330,6 +4363,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; + array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic; #endif #ifdef CONFIG_IPV6_MROUTE array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding; @@ -5155,6 +5189,14 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, + { + .procname = "use_optimistic", + .data = &ipv6_devconf.use_optimistic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, #endif #ifdef CONFIG_IPV6_MROUTE { -- cgit v1.1 From 8ac2bde2a4a05c38e2bd733bea94507cb1461e06 Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Wed, 29 Oct 2014 10:51:13 -0200 Subject: netfilter: log: protect nf_log_register against double registering Currently, despite the comment right before the function, nf_log_register allows registering two loggers on with the same type and end up overwriting the previous register. Not a real issue today as current tree doesn't have two loggers for the same type but it's better to get this protected. Also make sure that all of its callers do error checking. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_log_ipv6.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 7b17a0b..7fc34d1 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,6 +5,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -398,8 +399,17 @@ static int __init nf_log_ipv6_init(void) if (ret < 0) return ret; - nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + ret = nf_log_register(NFPROTO_IPV6, &nf_ip6_logger); + if (ret < 0) { + pr_err("failed to register logger\n"); + goto err1; + } + return 0; + +err1: + unregister_pernet_subsys(&nf_log_ipv6_net_ops); + return ret; } static void __exit nf_log_ipv6_exit(void) -- cgit v1.1 From 40dc2ca3cb22666ecf8715a8c4ca0529148ecac8 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 29 Oct 2014 10:00:26 +0100 Subject: ipv6: spelling s/incomming/incoming Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0171f08..467f310 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2090,7 +2090,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { struct mfc6_cache *cache_proxy; - /* For an (*,G) entry, we only check that the incomming + /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); -- cgit v1.1 From fc08c258191f4acc79df232c0c65c857ee75e59f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 29 Oct 2014 11:38:17 +0100 Subject: ipv6: remove inline on static in c file remove __inline__ / inline and let compiler decide what to do with static functions Inspired-by: "David S. Miller" Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1a157ca..51ab096 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -69,7 +69,7 @@ struct ip6frag_skb_cb { #define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb)) -static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } @@ -178,7 +178,7 @@ static void ip6_frag_expire(unsigned long data) ip6_expire_frag_queue(net, fq, &ip6_frags); } -static __inline__ struct frag_queue * +static struct frag_queue * fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst, u8 ecn) { @@ -684,21 +684,21 @@ static void ip6_frags_sysctl_unregister(void) unregister_net_sysctl_table(ip6_ctl_header); } #else -static inline int ip6_frags_ns_sysctl_register(struct net *net) +static int ip6_frags_ns_sysctl_register(struct net *net) { return 0; } -static inline void ip6_frags_ns_sysctl_unregister(struct net *net) +static void ip6_frags_ns_sysctl_unregister(struct net *net) { } -static inline int ip6_frags_sysctl_register(void) +static int ip6_frags_sysctl_register(void) { return 0; } -static inline void ip6_frags_sysctl_unregister(void) +static void ip6_frags_sysctl_unregister(void) { } #endif -- cgit v1.1 From 43728fa5c5e475e6f0059ec739e715fc49e4a478 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 29 Oct 2014 12:57:51 +0100 Subject: ipv6: remove assignment in if condition Do assignment before if condition and test !skb like in rawv6_recvmsg() Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller --- net/ipv6/raw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 896af88..075a0fb 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -548,7 +548,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, if (!rp->checksum) goto send; - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (!skb) goto out; offset = rp->offset; -- cgit v1.1 From acf722f73499d85e959ce99cf22d1b827d0b273a Mon Sep 17 00:00:00 2001 From: Alexey Andriyanov Date: Wed, 29 Oct 2014 10:54:52 +0300 Subject: ip6_tunnel: allow to change mode for the ip6tnl0 The fallback device is in ipv6 mode by default. The mode can not be changed in runtime, so there is no way to decapsulate ip4in6 packets coming from various sources without creating the specific tunnel ifaces for each peer. This allows to update the fallback tunnel device, but only the mode could be changed. Usual command should work for the fallback device: `ip -6 tun change ip6tnl0 mode any` The fallback device can not be hidden from the packet receiver as a regular tunnel, but there is no need for synchronization as long as we do single assignment. Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Alexey Andriyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9409887..8c97cd1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -477,6 +477,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, int rel_msg = 0; u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH; + u8 tproto; __u32 rel_info = 0; __u16 len; int err = -ENOENT; @@ -490,7 +491,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, &ipv6h->saddr)) == NULL) goto out; - if (t->parms.proto != ipproto && t->parms.proto != 0) + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != ipproto && tproto != 0) goto out; err = 0; @@ -791,6 +793,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 tproto; int err; rcu_read_lock(); @@ -799,7 +802,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, &ipv6h->daddr)) != NULL) { struct pcpu_sw_netstats *tstats; - if (t->parms.proto != ipproto && t->parms.proto != 0) { + tproto = ACCESS_ONCE(t->parms.proto); + if (tproto != ipproto && tproto != 0) { rcu_read_unlock(); goto discard; } @@ -1078,9 +1082,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct flowi6 fl6; __u8 dsfield; __u32 mtu; + u8 tproto; int err; - if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) || + tproto = ACCESS_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPIP && tproto != 0) || !ip6_tnl_xmit_ctl(t)) return -1; @@ -1120,9 +1126,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) struct flowi6 fl6; __u8 dsfield; __u32 mtu; + u8 tproto; int err; - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || + tproto = ACCESS_ONCE(t->parms.proto); + if ((tproto != IPPROTO_IPV6 && tproto != 0) || !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) return -1; @@ -1285,6 +1293,14 @@ static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) return err; } +static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ + /* for default tnl0 device allow to change only the proto */ + t->parms.proto = p->proto; + netdev_state_change(t->dev); + return 0; +} + static void ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) { @@ -1384,7 +1400,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); - if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { + if (cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { err = -EEXIST; @@ -1392,8 +1408,10 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } else t = netdev_priv(dev); - - err = ip6_tnl_update(t, &p1); + if (dev == ip6n->fb_tnl_dev) + err = ip6_tnl0_update(t, &p1); + else + err = ip6_tnl_update(t, &p1); } if (t) { err = 0; -- cgit v1.1 From 646697b9e3fef913bb6393ebfb6115c442a96be7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 30 Oct 2014 02:55:38 +0100 Subject: syncookies: only increment SYNCOOKIESFAILED on validation error Only count packets that failed cookie-authentication. We can get SYNCOOKIESFAILED > 0 while we never even sent a single cookie. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/syncookies.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 0e26e79..be291ba 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -171,8 +171,11 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; - if (tcp_synq_no_recent_overflow(sk) || - (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) { + if (tcp_synq_no_recent_overflow(sk)) + goto out; + + mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); + if (mss == 0) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); goto out; } -- cgit v1.1 From f1673381b1481a409238d4552a0700d490c5b36c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Nov 2014 17:35:02 +0100 Subject: syncookies: split cookie_check_timestamp() into two functions The function cookie_check_timestamp(), both called from IPv4/6 context, is being used to decode the echoed timestamp from the SYN/ACK into TCP options used for follow-up communication with the peer. We can remove ECN handling from that function, split it into a separate one, and simply rename the original function into cookie_decode_options(). cookie_decode_options() just fills in tcp_option struct based on the echoed timestamp received from the peer. Anything that fails in this function will actually discard the request socket. While this is the natural place for decoding options such as ECN which commit 172d69e63c7f ("syncookies: add support for ECN") added, we argue that in particular for ECN handling, it can be checked at a later point in time as the request sock would actually not need to be dropped from this, but just ECN support turned off. Therefore, we split this functionality into cookie_ecn_ok(), which tells us if the timestamp indicates ECN support AND the tcp_ecn sysctl is enabled. This prepares for per-route ECN support: just looking at the tcp_ecn sysctl won't be enough anymore at that point; if the timestamp indicates ECN and sysctl tcp_ecn == 0, we will also need to check the ECN dst metric. This would mean adding a route lookup to cookie_check_timestamp(), which we definitely want to avoid. As we already do a route lookup at a later point in cookie_{v4,v6}_check(), we can simply make use of that as well for the new cookie_ecn_ok() function w/o any additional cost. Joint work with Daniel Borkmann. Acked-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/syncookies.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index be291ba..52cc8cb 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -166,7 +166,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int mss; struct dst_entry *dst; __u8 rcv_wscale; - bool ecn_ok = false; if (!sysctl_tcp_syncookies || !th->ack || th->rst) goto out; @@ -186,7 +185,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&tcp_opt, 0, sizeof(tcp_opt)); tcp_parse_options(skb, &tcp_opt, 0, NULL); - if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) + if (!cookie_timestamp_decode(&tcp_opt)) goto out; ret = NULL; @@ -223,7 +222,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->expires = 0UL; req->num_retrans = 0; - ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; @@ -264,6 +262,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; + ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk)); ret = get_cookie_sock(sk, skb, req, dst); out: -- cgit v1.1 From f7b3bec6f5167efaf56b756abfafb924cb1d3050 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 3 Nov 2014 17:35:03 +0100 Subject: net: allow setting ecn via routing table This patch allows to set ECN on a per-route basis in case the sysctl tcp_ecn is not set to 1. In other words, when ECN is set for specific routes, it provides a tcp_ecn=1 behaviour for that route while the rest of the stack acts according to the global settings. One can use 'ip route change dev $dev $net features ecn' to toggle this. Having a more fine-grained per-route setting can be beneficial for various reasons, for example, 1) within data centers, or 2) local ISPs may deploy ECN support for their own video/streaming services [1], etc. There was a recent measurement study/paper [2] which scanned the Alexa's publicly available top million websites list from a vantage point in US, Europe and Asia: Half of the Alexa list will now happily use ECN (tcp_ecn=2, most likely blamed to commit 255cac91c3 ("tcp: extend ECN sysctl to allow server-side only ECN") ;)); the break in connectivity on-path was found is about 1 in 10,000 cases. Timeouts rather than receiving back RSTs were much more common in the negotiation phase (and mostly seen in the Alexa middle band, ranks around 50k-150k): from 12-thousand hosts on which there _may_ be ECN-linked connection failures, only 79 failed with RST when _not_ failing with RST when ECN is not requested. It's unclear though, how much equipment in the wild actually marks CE when buffers start to fill up. We thought about a fallback to non-ECN for retransmitted SYNs as another global option (which could perhaps one day be made default), but as Eric points out, there's much more work needed to detect broken middleboxes. Two examples Eric mentioned are buggy firewalls that accept only a single SYN per flow, and middleboxes that successfully let an ECN flow establish, but later mark CE for all packets (so cwnd converges to 1). [1] http://www.ietf.org/proceedings/89/slides/slides-89-tsvarea-1.pdf, p.15 [2] http://ecn.ethz.ch/ Joint work with Daniel Borkmann. Reference: http://thread.gmane.org/gmane.linux.network/335797 Suggested-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/syncookies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 52cc8cb..7337fc7 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -262,7 +262,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk)); + ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); ret = get_cookie_sock(sk, skb, req, dst); out: -- cgit v1.1 From 869ba988fedb0fb9f4b0f18904623dc29775c503 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Tue, 4 Nov 2014 23:01:00 +0100 Subject: ipv6: trivial, add bracket for the if block The "else" block is on several lines and use bracket. Signed-off-by: Florent Fourcot Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 3dd7d4e..c143437 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -769,10 +769,10 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v) static int ip6fl_seq_show(struct seq_file *seq, void *v) { struct ip6fl_iter_state *state = ip6fl_seq_private(seq); - if (v == SEQ_START_TOKEN) + if (v == SEQ_START_TOKEN) { seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); - else { + } else { struct ip6_flowlabel *fl = v; seq_printf(seq, "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", -- cgit v1.1 From e585f23636370320bc2071ca5ba2744ae37c3e51 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Tue, 4 Nov 2014 09:06:54 -0800 Subject: udp: Changes to udp_offload to support remote checksum offload Add a new GSO type, SKB_GSO_TUNNEL_REMCSUM, which indicates remote checksum offload being done (in this case inner checksum must not be offloaded to the NIC). Added logic in __skb_udp_tunnel_segment to handle remote checksum offload case. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/ip6_offload.c | 1 + net/ipv6/udp_offload.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index a071563..e976707 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -78,6 +78,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | + SKB_GSO_TUNNEL_REMCSUM | SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 6b8f543..637ba2e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -42,6 +42,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | + SKB_GSO_TUNNEL_REMCSUM | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | -- cgit v1.1 From 51f3d02b980a338cd291d2bc7629cdfb2568424b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Nov 2014 16:46:40 -0500 Subject: net: Add and use skb_copy_datagram_msg() helper. This encapsulates all of the skb_copy_datagram_iovec() callers with call argument signature "skb, offset, msghdr->msg_iov, length". When we move to iov_iters in the networking, the iov_iter object will sit in the msghdr. Having a helper like this means there will be less places to touch during that transformation. Based upon descriptions and patch from Al Viro. Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 4 ++-- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2cdc383..5c6996e 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -351,7 +351,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_TRUNC; copied = len; } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; @@ -445,7 +445,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, msg->msg_flags |= MSG_TRUNC; copied = len; } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 075a0fb..0cbcf98 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -486,11 +486,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, } if (skb_csum_unnecessary(skb)) { - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); } else if (msg->msg_flags&MSG_TRUNC) { if (__skb_checksum_complete(skb)) goto csum_copy_err; - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); } else { err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); if (err == -EINVAL) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f6ba535..9b68092 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -424,8 +424,8 @@ try_again: } if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), + msg, copied); else { err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); if (err == -EINVAL) -- cgit v1.1 From 1744bea1fa382f67263fdd9fee51d603fddb3da6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 4 Nov 2014 15:37:03 -0800 Subject: net: Convert SEQ_START_TOKEN/seq_printf to seq_puts Using a single fixed string is smaller code size than using a format and many string arguments. Reduces overall code size a little. $ size net/ipv4/igmp.o* net/ipv6/mcast.o* net/ipv6/ip6_flowlabel.o* text data bss dec hex filename 34269 7012 14824 56105 db29 net/ipv4/igmp.o.new 34315 7012 14824 56151 db57 net/ipv4/igmp.o.old 30078 7869 13200 51147 c7cb net/ipv6/mcast.o.new 30105 7869 13200 51174 c7e6 net/ipv6/mcast.o.old 11434 3748 8580 23762 5cd2 net/ipv6/ip6_flowlabel.o.new 11491 3748 8580 23819 5d0b net/ipv6/ip6_flowlabel.o.old Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/ip6_flowlabel.c | 3 +-- net/ipv6/mcast.c | 6 +----- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c143437..7221021 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -770,8 +770,7 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) { struct ip6fl_iter_state *state = ip6fl_seq_private(seq); if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", - "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); + seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); } else { struct ip6_flowlabel *fl = v; seq_printf(seq, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9648de2..e04f184 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2823,11 +2823,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); if (v == SEQ_START_TOKEN) { - seq_printf(seq, - "%3s %6s " - "%32s %32s %6s %6s\n", "Idx", - "Device", "Multicast Address", - "Source Address", "INC", "EXC"); + seq_puts(seq, "Idx Device Multicast Address Source Address INC EXC\n"); } else { seq_printf(seq, "%3d %6.6s %pi6 %pi6 %6lu %6lu\n", -- cgit v1.1 From 4c672e4b42bc8046d63a6eb0a2c6a450a501af32 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 5 Nov 2014 20:27:38 +0100 Subject: ipv6: mld: fix add_grhead skb_over_panic for devs with large MTUs It has been reported that generating an MLD listener report on devices with large MTUs (e.g. 9000) and a high number of IPv6 addresses can trigger a skb_over_panic(): skbuff: skb_over_panic: text:ffffffff80612a5d len:3776 put:20 head:ffff88046d751000 data:ffff88046d751010 tail:0xed0 end:0xec0 dev:port1 ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:100! invalid opcode: 0000 [#1] SMP Modules linked in: ixgbe(O) CPU: 3 PID: 0 Comm: swapper/3 Tainted: G O 3.14.23+ #4 [...] Call Trace: [] ? skb_put+0x3a/0x3b [] ? add_grhead+0x45/0x8e [] ? add_grec+0x394/0x3d4 [] ? mld_ifc_timer_expire+0x195/0x20d [] ? mld_dad_timer_expire+0x45/0x45 [] ? call_timer_fn.isra.29+0x12/0x68 [] ? run_timer_softirq+0x163/0x182 [] ? __do_softirq+0xe0/0x21d [] ? irq_exit+0x4e/0xd3 [] ? smp_apic_timer_interrupt+0x3b/0x46 [] ? apic_timer_interrupt+0x6a/0x70 mld_newpack() skb allocations are usually requested with dev->mtu in size, since commit 72e09ad107e7 ("ipv6: avoid high order allocations") we have changed the limit in order to be less likely to fail. However, in MLD/IGMP code, we have some rather ugly AVAILABLE(skb) macros, which determine if we may end up doing an skb_put() for adding another record. To avoid possible fragmentation, we check the skb's tailroom as skb->dev->mtu - skb->len, which is a wrong assumption as the actual max allocation size can be much smaller. The IGMP case doesn't have this issue as commit 57e1ab6eaddc ("igmp: refine skb allocations") stores the allocation size in the cb[]. Set a reserved_tailroom to make it fit into the MTU and use skb_availroom() helper instead. This also allows to get rid of igmp_skb_size(). Reported-by: Wei Liu Fixes: 72e09ad107e7 ("ipv6: avoid high order allocations") Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Hannes Frederic Sowa Cc: David L Stevens Acked-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index e04f184..5ce107c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1550,7 +1550,7 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) { struct net_device *dev = idev->dev; struct net *net = dev_net(dev); @@ -1561,13 +1561,13 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) const struct in6_addr *saddr; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + unsigned int size = mtu + hlen + tlen; int err; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; /* we assume size > sizeof(ra) here */ - size += hlen + tlen; /* limit our allocations to order-0 page */ size = min_t(int, size, SKB_MAX_ORDER(0, 0)); skb = sock_alloc_send_skb(sk, size, 1, &err); @@ -1576,6 +1576,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) return NULL; skb->priority = TC_PRIO_CONTROL; + skb->reserved_tailroom = skb_end_offset(skb) - + min(mtu, skb_end_offset(skb)); skb_reserve(skb, hlen); if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { @@ -1690,8 +1692,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, return skb; } -#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \ - skb_tailroom(skb)) : 0) +#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted, int crsend) -- cgit v1.1 From e1b2cb655060e081e73b384b1fc8b2e978f73467 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Wed, 5 Nov 2014 16:49:38 -0800 Subject: fou: Fix typo in returning flags in netlink When filling netlink info, dport is being returned as flags. Fix instances to return correct value. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 58e5b47..45ad924 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1714,7 +1714,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, - tunnel->encap.dport)) + tunnel->encap.flags)) goto nla_put_failure; return 0; -- cgit v1.1 From 59b93b41e7fa71138734a911b11b044340dd16bd Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 5 Nov 2014 15:27:48 -0800 Subject: net: Remove MPLS GSO feature. Device can export MPLS GSO support in dev->mpls_features same way it export vlan features in dev->vlan_features. So it is safe to remove NETIF_F_GSO_MPLS redundant flag. Signed-off-by: Pravin B Shelar --- net/ipv6/ip6_offload.c | 1 - net/ipv6/udp_offload.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e976707..fd76ce9 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -79,7 +79,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 637ba2e..b6aa8ed 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -46,8 +46,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_MPLS) || + SKB_GSO_SIT) || !(type & (SKB_GSO_UDP)))) goto out; -- cgit v1.1 From d50051407f136028108cfda068d55ef053a54fe1 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 5 Nov 2014 08:02:48 +0100 Subject: ipv6: Allow sending packets through tunnels with wildcard endpoints Currently we need the IP6_TNL_F_CAP_XMIT capabiltiy to transmit packets through an ipv6 tunnel. This capability is set when the tunnel gets configured, based on the tunnel endpoint addresses. On tunnels with wildcard tunnel endpoints, we need to do the capabiltiy checking on a per packet basis like it is done in the receive path. This patch extends ip6_tnl_xmit_ctl() to take local and remote addresses as parameters to allow for per packet capabiltiy checking. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 2 +- net/ipv6/ip6_tunnel.c | 23 +++++++++++++++-------- net/ipv6/ip6_vti.c | 10 ++++++++-- 3 files changed, 24 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 12c3c8e..1fcf62e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -902,7 +902,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, struct net_device_stats *stats = &t->dev->stats; int ret; - if (!ip6_tnl_xmit_ctl(t)) + if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr)) goto tx_err; switch (skb->protocol) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8c97cd1..a8f94ff 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -909,24 +909,28 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } -int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +int ip6_tnl_xmit_ctl(struct ip6_tnl *t, + const struct in6_addr *laddr, + const struct in6_addr *raddr) { struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = t->net; - if (p->flags & IP6_TNL_F_CAP_XMIT) { + if ((p->flags & IP6_TNL_F_CAP_XMIT) || + ((p->flags & IP6_TNL_F_CAP_PER_PACKET) && + (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { struct net_device *ldev = NULL; rcu_read_lock(); if (p->link) ldev = dev_get_by_index_rcu(net, p->link); - if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0))) + if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); - else if (!ipv6_addr_is_multicast(&p->raddr) && - unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0))) + else if (!ipv6_addr_is_multicast(raddr) && + unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); else @@ -977,6 +981,10 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if (!fl6->flowi6_mark) dst = ip6_tnl_dst_check(t); + + if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) + goto tx_err_link_failure; + if (!dst) { ndst = ip6_route_output(net, NULL, fl6); @@ -1086,8 +1094,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) int err; tproto = ACCESS_ONCE(t->parms.proto); - if ((tproto != IPPROTO_IPIP && tproto != 0) || - !ip6_tnl_xmit_ctl(t)) + if (tproto != IPPROTO_IPIP && tproto != 0) return -1; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) @@ -1131,7 +1138,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) tproto = ACCESS_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || - !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) + ip6_tnl_addr_conflict(t, ipv6h)) return -1; offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d440bb5..0e8e97e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -416,6 +416,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct net_device_stats *stats = &t->dev->stats; struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; + struct xfrm_state *x; int err = -1; if (!dst) @@ -429,7 +430,12 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) goto tx_err_link_failure; } - if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr)) + x = dst->xfrm; + if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr)) + goto tx_err_link_failure; + + if (!ip6_tnl_xmit_ctl(t, (const struct in6_addr *)&x->props.saddr, + (const struct in6_addr *)&x->id.daddr)) goto tx_err_link_failure; tdev = dst->dev; @@ -484,7 +490,7 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ipv6h = ipv6_hdr(skb); if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) + vti6_addr_conflict(t, ipv6h)) goto tx_err; xfrm_decode_session(skb, &fl, AF_INET6); -- cgit v1.1 From ea3dc9601bda69d8d695b57c4f7a997cd7039781 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 5 Nov 2014 08:03:50 +0100 Subject: ip6_tunnel: Add support for wildcard tunnel endpoints. This patch adds support for tunnels with local or remote wildcard endpoints. With this we get a NBMA tunnel mode like we have it for ipv4 and sit tunnels. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a8f94ff..4550d08 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -183,6 +183,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + struct in6_addr any; for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && @@ -190,6 +191,22 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ (t->dev->flags & IFF_UP)) return t; } + + memset(&any, 0, sizeof(any)); + hash = HASH(&any, local); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + hash = HASH(remote, &any); + for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -979,7 +996,29 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, u8 proto; int err = -1; - if (!fl6->flowi6_mark) + /* NBMA tunnel */ + if (ipv6_addr_any(&t->parms.raddr)) { + struct in6_addr *addr6; + struct neighbour *neigh; + int addr_type; + + if (!skb_dst(skb)) + goto tx_err_link_failure; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (!neigh) + goto tx_err_link_failure; + + addr6 = (struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) + addr6 = &ipv6_hdr(skb)->daddr; + + memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr)); + neigh_release(neigh); + } else if (!fl6->flowi6_mark) dst = ip6_tnl_dst_check(t); if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr)) -- cgit v1.1 From cbffccc970394f82e397fccbeb136eeaffb3c552 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 5 Nov 2014 14:39:21 -0800 Subject: net; ipv[46] - Remove 2 unnecessary NETDEBUG OOM messages These messages aren't useful as there's a generic dump_stack() on OOM. Neaten the comment and if test above the OOM by separating the assign in if into an allocation then if test. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e950c2..916d2a1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -747,13 +747,11 @@ slow_path: if (len < left) { len &= ~7; } - /* - * Allocate buffer. - */ - if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + - hroom + troom, GFP_ATOMIC)) == NULL) { - NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); + /* Allocate buffer */ + frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + + hroom + troom, GFP_ATOMIC); + if (!frag) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; -- cgit v1.1 From 450834977796cc74d1265d7dfe69cf6767537dfc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 5 Nov 2014 15:36:08 -0800 Subject: net: esp: Convert NETDEBUG to pr_info Commit 64ce207306de ("[NET]: Make NETDEBUG pure printk wrappers") originally had these NETDEBUG printks as always emitting. Commit a2a316fd068c ("[NET]: Replace CONFIG_NET_DEBUG with sysctl") added a net_msg_warn sysctl to these NETDEBUG uses. Convert these NETDEBUG uses to normal pr_info calls. This changes the output prefix from "ESP: " to include "IPSec: " for the ipv4 case and "IPv6: " for the ipv6 case. These output lines are now like the other messages in the files. Other miscellanea: Neaten the arithmetic spacing to be consistent with other arithmetic spacing in the files. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 83fc3a3..d21d7b2 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -544,12 +544,12 @@ static int esp_init_authenc(struct xfrm_state *x) BUG_ON(!aalg_desc); err = -EINVAL; - if (aalg_desc->uinfo.auth.icv_fullbits/8 != + if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", - x->aalg->alg_name, - crypto_aead_authsize(aead), - aalg_desc->uinfo.auth.icv_fullbits/8); + pr_info("ESP: %s digestsize %u != %hu\n", + x->aalg->alg_name, + crypto_aead_authsize(aead), + aalg_desc->uinfo.auth.icv_fullbits / 8); goto free_key; } -- cgit v1.1 From 36cbb2452cbafca64dcdd3578047433787900cf0 Mon Sep 17 00:00:00 2001 From: Rick Jones Date: Thu, 6 Nov 2014 10:37:54 -0800 Subject: udp: Increment UDP_MIB_IGNOREDMULTI for arriving unmatched multicasts As NIC multicast filtering isn't perfect, and some platforms are quite content to spew broadcasts, we should not trigger an event for skb:kfree_skb when we do not have a match for such an incoming datagram. We do though want to avoid sweeping the matter under the rug entirely, so increment a suitable statistic. This incorporates feedback from David L. Stevens, Karl Neiss and Eric Dumazet. V3 - use bool per David Miller Signed-off-by: Rick Jones Signed-off-by: David S. Miller --- net/ipv6/proc.c | 1 + net/ipv6/udp.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 1752cd0..679253d0 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -136,6 +136,7 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS), SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS), + SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI), SNMP_MIB_SENTINEL }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9b68092..b756355 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -771,7 +771,7 @@ static void udp6_csum_zero_error(struct sk_buff *skb) */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, - struct udp_table *udptable) + struct udp_table *udptable, int proto) { struct sock *sk, *stack[256 / sizeof(struct sock *)]; const struct udphdr *uh = udp_hdr(skb); @@ -781,6 +781,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif = inet6_iif(skb); unsigned int count = 0, offset = offsetof(typeof(*sk), sk_nulls_node); unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); + bool inner_flushed = false; if (use_hash2) { hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) & @@ -803,6 +804,7 @@ start_lookup: (uh->check || udp_sk(sk)->no_check6_rx)) { if (unlikely(count == ARRAY_SIZE(stack))) { flush_stack(stack, count, skb, ~0); + inner_flushed = true; count = 0; } stack[count++] = sk; @@ -821,7 +823,10 @@ start_lookup: if (count) { flush_stack(stack, count, skb, count - 1); } else { - kfree_skb(skb); + if (!inner_flushed) + UDP_INC_STATS_BH(net, UDP_MIB_IGNOREDMULTI, + proto == IPPROTO_UDPLITE); + consume_skb(skb); } return 0; } @@ -873,7 +878,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, */ if (ipv6_addr_is_multicast(daddr)) return __udp6_lib_mcast_deliver(net, skb, - saddr, daddr, udptable); + saddr, daddr, udptable, proto); /* Unicast */ -- cgit v1.1 From 3d97379a67486bc481ab5b8f7aa5b7ceb6154a95 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2014 05:54:27 -0800 Subject: tcp: move sk_mark_napi_id() at the right place sk_mark_napi_id() is used to record for a flow napi id of incoming packets for busypoll sake. We should do this only on established flows, not on listeners. This was 'working' by virtue of the socket cloning, but doing this on SYN packets in unecessary cache line dirtying. Even if we move sk_napi_id in the same cache line than sk_lock, we are working to make SYN processing lockless, so it is desirable to set sk_napi_id only for established flows. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ace29b6..fd8e50b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1293,6 +1293,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; sock_rps_save_rxhash(sk, skb); + sk_mark_napi_id(sk, skb); if (dst) { if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || dst->ops->check(dst, np->rx_dst_cookie) == NULL) { @@ -1322,6 +1323,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) */ if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); + sk_mark_napi_id(sk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1454,7 +1456,6 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; - sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); -- cgit v1.1 From 2c8c56e15df3d4c2af3d656e44feb18789f75837 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Nov 2014 05:54:28 -0800 Subject: net: introduce SO_INCOMING_CPU Alternative to RPS/RFS is to use hardware support for multiple queues. Then split a set of million of sockets into worker threads, each one using epoll() to manage events on its own socket pool. Ideally, we want one thread per RX/TX queue/cpu, but we have no way to know after accept() or connect() on which queue/cpu a socket is managed. We normally use one cpu per RX queue (IRQ smp_affinity being properly set), so remembering on socket structure which cpu delivered last packet is enough to solve the problem. After accept(), connect(), or even file descriptor passing around processes, applications can use : int cpu; socklen_t len = sizeof(cpu); getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len); And use this information to put the socket into the right silo for optimal performance, as all networking stack should run on the appropriate cpu, without need to send IPI (RPS/RFS). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 1 + net/ipv6/udp.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fd8e50b..1985b49 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1456,6 +1456,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_incoming_cpu_update(sk); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b756355..d1fe362 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -577,6 +577,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (!ipv6_addr_any(&sk->sk_v6_daddr)) { sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); + sk_incoming_cpu_update(sk); } rc = sock_queue_rcv_skb(sk, skb); -- cgit v1.1 From ba7a46f16dd29f93303daeb1fee8af316c5a07f4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 11 Nov 2014 10:59:17 -0800 Subject: net: Convert LIMIT_NETDEBUG to net_dbg_ratelimited Use the more common dynamic_debug capable net_dbg_ratelimited and remove the LIMIT_NETDEBUG macro. All messages are still ratelimited. Some KERN_ uses are changed to KERN_DEBUG. This may have some negative impact on messages that were emitted at KERN_INFO that are not not enabled at all unless DEBUG is defined or dynamic_debug is enabled. Even so, these messages are now _not_ emitted by default. This also eliminates the use of the net_msg_warn sysctl "/proc/sys/net/core/warnings". For backward compatibility, the sysctl is not removed, but it has no function. The extern declaration of net_msg_warn is removed from sock.h and made static in net/core/sysctl_net_core.c Miscellanea: o Update the sysctl documentation o Remove the embedded uses of pr_fmt o Coalesce format fragments o Realign arguments Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++---- net/ipv6/ah6.c | 7 +++---- net/ipv6/datagram.c | 4 ++-- net/ipv6/esp6.c | 4 ++-- net/ipv6/exthdrs.c | 18 +++++++++--------- net/ipv6/icmp.c | 15 +++++++-------- net/ipv6/mip6.c | 11 ++++++----- net/ipv6/netfilter.c | 2 +- net/ipv6/udp.c | 31 +++++++++++++------------------ 9 files changed, 45 insertions(+), 53 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 06e8978..251fcb4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1411,10 +1411,8 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, if (unlikely(score->addr_type == IPV6_ADDR_ANY || score->addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ADDRCONF: unspecified / multicast address " - "assigned as unicast address on %s", - dev->name); + net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s", + dev->name); continue; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 6d16eb0e..8ab1989 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -272,10 +272,9 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) ipv6_rearrange_destopt(iph, exthdr.opth); case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { - LIMIT_NETDEBUG( - KERN_WARNING "overrun %sopts\n", - nexthdr == NEXTHDR_HOP ? - "hop" : "dest"); + net_dbg_ratelimited("overrun %sopts\n", + nexthdr == NEXTHDR_HOP ? + "hop" : "dest"); return -EINVAL; } break; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5c6996e..cc11396 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -893,8 +893,8 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, break; } default: - LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", - cmsg->cmsg_type); + net_dbg_ratelimited("invalid cmsg type: %d\n", + cmsg->cmsg_type); err = -EINVAL; goto exit_f; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d21d7b2..d2c2d74 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -286,8 +286,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) err = -EINVAL; padlen = nexthdr[0]; if (padlen + 2 + alen >= elen) { - LIMIT_NETDEBUG(KERN_WARNING "ipsec esp packet is garbage " - "padlen=%d, elen=%d\n", padlen + 2, elen - alen); + net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", + padlen + 2, elen - alen); goto out; } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 601d896..a7bbbe4 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -184,7 +184,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) int ret; if (opt->dsthao) { - LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); + net_dbg_ratelimited("hao duplicated\n"); goto discard; } opt->dsthao = opt->dst1; @@ -193,14 +193,14 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); if (hao->length != 16) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao invalid option length = %d\n", hao->length); + net_dbg_ratelimited("hao invalid option length = %d\n", + hao->length); goto discard; } if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr); + net_dbg_ratelimited("hao is not an unicast addr: %pI6\n", + &hao->addr); goto discard; } @@ -551,8 +551,8 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } - LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", - nh[optoff + 1]); + net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n", + nh[optoff + 1]); kfree_skb(skb); return false; } @@ -566,8 +566,8 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) u32 pkt_len; if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { - LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", - nh[optoff+1]); + net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", + nh[optoff+1]); IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 62c1037..0929340 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -338,7 +338,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, * anycast. */ if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n"); + net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); } @@ -452,7 +452,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n"); + net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n"); return; } @@ -460,7 +460,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n"); + net_dbg_ratelimited("icmp6_send: no reply to icmp error\n"); return; } @@ -509,7 +509,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr)); if (len < 0) { - LIMIT_NETDEBUG(KERN_DEBUG "icmp: len problem\n"); + net_dbg_ratelimited("icmp: len problem\n"); goto out_dst_release; } @@ -706,9 +706,8 @@ static int icmpv6_rcv(struct sk_buff *skb) daddr = &ipv6_hdr(skb)->daddr; if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ICMPv6 checksum failed [%pI6c > %pI6c]\n", - saddr, daddr); + net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", + saddr, daddr); goto csum_error; } @@ -781,7 +780,7 @@ static int icmpv6_rcv(struct sk_buff *skb) if (type & ICMPV6_INFOMSG_MASK) break; - LIMIT_NETDEBUG(KERN_DEBUG "icmpv6: msg of unknown type\n"); + net_dbg_ratelimited("icmpv6: msg of unknown type\n"); /* * error of unknown type. diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f61429d..b9779d4 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -97,16 +97,17 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", - mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); + net_dbg_ratelimited("mip6: MH message too short: %d vs >=%d\n", + mh->ip6mh_hdrlen, + mip6_mh_len(mh->ip6mh_type)); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", - mh->ip6mh_proto); + net_dbg_ratelimited("mip6: MH invalid payload proto = %d\n", + mh->ip6mh_proto); mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + skb_network_header_len(skb)); return -1; @@ -288,7 +289,7 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, * XXX: packet if HAO exists. */ if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { - LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); + net_dbg_ratelimited("mip6: hao exists already, override\n"); return offset; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index d38e6a8..398377a 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -36,7 +36,7 @@ int ip6_route_me_harder(struct sk_buff *skb) err = dst->error; if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); - LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); + net_dbg_ratelimited("ip6_route_me_harder: No more route\n"); dst_release(dst); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d1fe362..0ba3de4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -660,15 +660,13 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage" - " %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); + net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); goto drop; } if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d " - "too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); + net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); goto drop; } } @@ -761,9 +759,9 @@ static void udp6_csum_zero_error(struct sk_buff *skb) /* RFC 2460 section 8.1 says that we SHOULD log * this error. Well, it is reasonable. */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", - &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), - &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); + net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); } /* @@ -931,14 +929,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", - saddr, - ntohs(uh->source), - ulen, - skb->len, - daddr, - ntohs(uh->dest)); + net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + saddr, ntohs(uh->source), + ulen, skb->len, + daddr, ntohs(uh->dest)); goto discard; csum_error: UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); @@ -1290,7 +1285,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + net_dbg_ratelimited("udp cork app bug 2\n"); err = -EINVAL; goto out; } -- cgit v1.1 From d7480fd3b1738a8eae6a76098b17af318cf9b9cc Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 10 Nov 2014 15:59:36 -0800 Subject: neigh: remove dynamic neigh table registration support Currently there are only three neigh tables in the whole kernel: arp table, ndisc table and decnet neigh table. What's more, we don't support registering multiple tables per family. Therefore we can just make these tables statically built-in. Cc: David S. Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 4cb45c1..2c9f6bf 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1763,7 +1763,7 @@ int __init ndisc_init(void) /* * Initialize the neighbour table */ - neigh_table_init(&nd_tbl); + neigh_table_init(NEIGH_ND_TABLE, &nd_tbl); #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, @@ -1796,6 +1796,6 @@ void ndisc_cleanup(void) #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(&nd_tbl.parms); #endif - neigh_table_clear(&nd_tbl); + neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl); unregister_pernet_subsys(&ndisc_net_ops); } -- cgit v1.1 From 56768644317c7746cb63f61573fcdc2355885707 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 13 Nov 2014 10:04:16 +0100 Subject: netfilter: fix various sparse warnings net/bridge/br_netfilter.c:870:6: symbol 'br_netfilter_enable' was not declared. Should it be static? no; add include net/ipv4/netfilter/nft_reject_ipv4.c:22:6: symbol 'nft_reject_ipv4_eval' was not declared. Should it be static? yes net/ipv6/netfilter/nf_reject_ipv6.c:16:6: symbol 'nf_send_reset6' was not declared. Should it be static? no; add include net/ipv6/netfilter/nft_reject_ipv6.c:22:6: symbol 'nft_reject_ipv6_eval' was not declared. Should it be static? yes net/netfilter/core.c:33:32: symbol 'nf_ipv6_ops' was not declared. Should it be static? no; add include net/netfilter/xt_DSCP.c:40:57: cast truncates bits from constant value (ffffff03 becomes 3) net/netfilter/xt_DSCP.c:57:59: cast truncates bits from constant value (ffffff03 becomes 3) add __force, 3 is what we want. net/ipv4/netfilter/nf_log_arp.c:77:6: symbol 'nf_log_arp_packet' was not declared. Should it be static? yes net/ipv4/netfilter/nf_reject_ipv4.c:17:6: symbol 'nf_send_reset' was not declared. Should it be static? no; add include Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_reject_ipv6.c | 1 + net/ipv6/netfilter/nft_reject_ipv6.c | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 20d9def..87576ff 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -11,6 +11,7 @@ #include #include #include +#include #include void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 0bc19fa..f732859 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -19,9 +19,9 @@ #include #include -void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) +static void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); @@ -38,7 +38,6 @@ void nft_reject_ipv6_eval(const struct nft_expr *expr, data[NFT_REG_VERDICT].verdict = NF_DROP; } -EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); static struct nft_expr_type nft_reject_ipv6_type; static const struct nft_expr_ops nft_reject_ipv6_ops = { -- cgit v1.1 From e3e3217029a35c579bf100998b43976d0b1cb8d7 Mon Sep 17 00:00:00 2001 From: Rick Jones Date: Mon, 17 Nov 2014 14:04:29 -0800 Subject: icmp: Remove some spurious dropped packet profile hits from the ICMP path If icmp_rcv() has successfully processed the incoming ICMP datagram, we should use consume_skb() rather than kfree_skb() because a hit on the likes of perf -e skb:kfree_skb is not called-for. Signed-off-by: Rick Jones Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 0929340..39b3ff9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -679,6 +679,7 @@ static int icmpv6_rcv(struct sk_buff *skb) const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; + bool success = false; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -726,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - ping_rcv(skb); + success = ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: @@ -790,7 +791,14 @@ static int icmpv6_rcv(struct sk_buff *skb) icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); } - kfree_skb(skb); + /* until the v6 path can be better sorted assume failure and + * preserve the status quo behaviour for the rest of the paths to here + */ + if (success) + consume_skb(skb); + else + kfree_skb(skb); + return 0; csum_error: -- cgit v1.1 From fbe68ee87522f6eaa10f9076c0a7117e1613f2f7 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Thu, 20 Nov 2014 10:01:49 +0100 Subject: vti6: Add a lookup method for tunnels with wildcard endpoints. Currently we can't lookup tunnels with wildcard endpoints. This patch adds a method to lookup these tunnels in the receive path. Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d440bb5..6101919 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -95,6 +95,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, unsigned int hash = HASH(remote, local); struct ip6_tnl *t; struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct in6_addr any; for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && @@ -102,6 +103,22 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, (t->dev->flags & IFF_UP)) return t; } + + memset(&any, 0, sizeof(any)); + hash = HASH(&any, local); + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(local, &t->parms.laddr) && + (t->dev->flags & IFF_UP)) + return t; + } + + hash = HASH(remote, &any); + for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + if (ipv6_addr_equal(remote, &t->parms.raddr) && + (t->dev->flags & IFF_UP)) + return t; + } + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; -- cgit v1.1 From e5d08d718a7cd72c6aa79b5f0c309d9f0d7e4a95 Mon Sep 17 00:00:00 2001 From: Ian Morris Date: Sun, 23 Nov 2014 21:28:43 +0000 Subject: ipv6: coding style improvements (remove assignment in if statements) This change has no functional impact and simply addresses some coding style issues detected by checkpatch. Specifically this change adjusts "if" statements which also include the assignment of a variable. No changes to the resultant object files result as determined by objdiff. Signed-off-by: Ian Morris Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 12 ++++++++---- net/ipv6/ah6.c | 7 ++++--- net/ipv6/esp6.c | 3 ++- net/ipv6/icmp.c | 3 ++- net/ipv6/ip6_flowlabel.c | 6 +++++- net/ipv6/ip6_input.c | 3 ++- net/ipv6/ip6_output.c | 12 ++++++++---- net/ipv6/ip6_tunnel.c | 15 ++++++++------- net/ipv6/ip6_vti.c | 4 ++-- net/ipv6/ndisc.c | 7 ++++--- net/ipv6/reassembly.c | 3 ++- net/ipv6/sit.c | 7 ++++--- net/ipv6/udp.c | 6 ++++-- 13 files changed, 55 insertions(+), 33 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 251fcb4..9eac3a7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2543,7 +2543,8 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, if (!dev) return -ENODEV; - if ((idev = __in6_dev_get(dev)) == NULL) + idev = __in6_dev_get(dev); + if (idev == NULL) return -ENXIO; read_lock_bh(&idev->lock); @@ -2690,7 +2691,8 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2813,7 +2815,8 @@ static void addrconf_sit_config(struct net_device *dev) * our v4 addrs in the tunnel */ - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2837,7 +2840,8 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); - if ((idev = ipv6_find_idev(dev)) == NULL) { + idev = ipv6_find_idev(dev); + if (idev == NULL) { pr_debug("%s: add_dev failed\n", __func__); return; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 8ab1989..a6727ad 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -353,7 +353,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ahp = x->data; ahash = ahp->ahash; - if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) goto out; nfrags = err; @@ -559,8 +560,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, ah_hlen)) goto out; - - if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + err = skb_cow_data(skb, 0, &trailer); + if (err < 0) goto out; nfrags = err; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d2c2d74..e48f2c7 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -345,7 +345,8 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } - if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) { + nfrags = skb_cow_data(skb, 0, &trailer); + if (nfrags < 0) { ret = -EINVAL; goto out; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 39b3ff9..d674152 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -243,7 +243,8 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *icmp6h; int err = 0; - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (skb == NULL) goto out; icmp6h = icmp6_hdr(skb); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 7221021..2f780cb 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -654,7 +654,11 @@ release: goto done; err = -ENOMEM; - if (sfl1 == NULL || (err = mem_check(sk)) != 0) + if (sfl1 == NULL) + goto done; + + err = mem_check(sk); + if (err != 0) goto done; fl1 = fl_intern(net, fl, freq.flr_label); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index a3084ab..aacdcb4 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -220,7 +220,8 @@ resubmit: nexthdr = skb_network_header(skb)[nhoff]; raw = raw6_local_deliver(skb, nexthdr); - if ((ipprot = rcu_dereference(inet6_protos[nexthdr])) != NULL) { + ipprot = rcu_dereference(inet6_protos[nexthdr]); + if (ipprot != NULL) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 916d2a1..ce69a12 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -898,7 +898,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, if (*dst == NULL) *dst = ip6_route_output(net, sk, fl6); - if ((err = (*dst)->error)) + err = (*dst)->error; + if (err) goto out_err_release; if (ipv6_addr_any(&fl6->saddr)) { @@ -946,7 +947,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); *dst = ip6_route_output(net, sk, &fl_gw6); - if ((err = (*dst)->error)) + err = (*dst)->error; + if (err) goto out_err_release; } } @@ -1054,7 +1056,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, * device, so create one single skb packet containing complete * udp datagram */ - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + skb = skb_peek_tail(&sk->sk_write_queue); + if (skb == NULL) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1534,7 +1537,8 @@ int ip6_push_pending_frames(struct sock *sk) unsigned char proto = fl6->flowi6_proto; int err = 0; - if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) + skb = __skb_dequeue(&sk->sk_write_queue); + if (skb == NULL) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e2b6cfb..92b3da5 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -501,8 +501,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, processing of the error. */ rcu_read_lock(); - if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, - &ipv6h->saddr)) == NULL) + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); + if (t == NULL) goto out; tproto = ACCESS_ONCE(t->parms.proto); @@ -550,7 +550,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, mtu = IPV6_MIN_MTU; t->dev->mtu = mtu; - if ((len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) { + len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); + if (len > mtu) { rel_type = ICMPV6_PKT_TOOBIG; rel_code = 0; rel_info = mtu; @@ -811,9 +812,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, int err; rcu_read_lock(); - - if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, - &ipv6h->daddr)) != NULL) { + t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + if (t != NULL) { struct pcpu_sw_netstats *tstats; tproto = ACCESS_ONCE(t->parms.proto); @@ -1069,7 +1069,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb; - if (!(new_skb = skb_realloc_headroom(skb, max_headroom))) + new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) goto tx_err_dst_release; if (skb->sk) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ec84d03..8308216 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -287,8 +287,8 @@ static int vti6_rcv(struct sk_buff *skb) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); rcu_read_lock(); - if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, - &ipv6h->daddr)) != NULL) { + t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); + if (t != NULL) { if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2c9f6bf..6828667 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -162,7 +162,8 @@ static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data) memcpy(opt+2, data, data_len); data_len += 2; opt += data_len; - if ((space -= data_len) > 0) + space -= data_len; + if (space > 0) memset(opt, 0, space); } @@ -656,8 +657,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; - - if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) { + probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); + if (probes < 0) { if (!(neigh->nud_state & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 51ab096..d7d70e6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -429,7 +429,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct sk_buff *clone; int i, plen = 0; - if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) + clone = alloc_skb(0, GFP_ATOMIC); + if (clone == NULL) goto out_oom; clone->next = head->next; head->next = clone; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 660496d..213546b 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1241,7 +1241,8 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) goto done; err = -ENOENT; - if ((t = ipip6_tunnel_locate(net, &p, 0)) == NULL) + t = ipip6_tunnel_locate(net, &p, 0); + if (t == NULL) goto done; err = -EPERM; if (t == netdev_priv(sitn->fb_tunnel_dev)) @@ -1836,8 +1837,8 @@ static int __net_init sit_init_net(struct net *net) goto err_dev_free; ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); - - if ((err = register_netdev(sitn->fb_tunnel_dev))) + err = register_netdev(sitn->fb_tunnel_dev); + if (err) goto err_reg_dev; t = netdev_priv(sitn->fb_tunnel_dev); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0ba3de4..dbc0b04 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -357,7 +357,8 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct sock *sk; const struct ipv6hdr *iph = ipv6_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) + sk = skb_steal_sock(skb); + if (unlikely(sk)) return sk; return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), @@ -1026,7 +1027,8 @@ static int udp_v6_push_pending_frames(struct sock *sk) fl6 = &inet->cork.fl.u.ip6; /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + skb = skb_peek(&sk->sk_write_queue); + if (skb == NULL) goto out; /* -- cgit v1.1 From 227158db160449b6513d2e31894a135104b90e90 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Apr 2014 18:47:38 -0400 Subject: new helper: skb_copy_and_csum_datagram_msg() Signed-off-by: Al Viro --- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0cbcf98..8baa53e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -492,7 +492,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, goto csum_copy_err; err = skb_copy_datagram_msg(skb, 0, msg, copied); } else { - err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov); + err = skb_copy_and_csum_datagram_msg(skb, 0, msg); if (err == -EINVAL) goto csum_copy_err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index dbc0b04..7cfb5d7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -428,7 +428,7 @@ try_again: err = skb_copy_datagram_msg(skb, sizeof(struct udphdr), msg, copied); else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg); if (err == -EINVAL) goto csum_copy_err; } -- cgit v1.1 From 73cf0e923d685a6a1b7754c7d29cc14944f271d9 Mon Sep 17 00:00:00 2001 From: zhuyj Date: Wed, 26 Nov 2014 10:25:58 +0800 Subject: ipv6: Remove unnecessary test The "init_net" test in function addrconf_exit_net is introduced in commit 44a6bd29 [Create ipv6 devconf-s for namespaces] to avoid freeing init_net. In commit c900a800 [ipv6: fix bad free of addrconf_init_net], function addrconf_init_net will allocate memory for every net regardless of init_net. In this case, it is unnecessary to make "init_net" test. CC: Hong Zhiguo CC: Octavian Purdila CC: Pavel Emelyanov CC: Cong Wang Suggested-by: David S. Miller Signed-off-by: Zhu Yanjun Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9eac3a7..f7c8bbe 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5380,10 +5380,8 @@ static void __net_exit addrconf_exit_net(struct net *net) __addrconf_sysctl_unregister(net->ipv6.devconf_dflt); __addrconf_sysctl_unregister(net->ipv6.devconf_all); #endif - if (!net_eq(net, &init_net)) { - kfree(net->ipv6.devconf_dflt); - kfree(net->ipv6.devconf_all); - } + kfree(net->ipv6.devconf_dflt); + kfree(net->ipv6.devconf_all); } static struct pernet_operations addrconf_ops = { -- cgit v1.1 From b59eaf9e2871735ea7cc7e3dbf8bf83bddd786b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 26 Nov 2014 12:46:50 +0100 Subject: netfilter: combine IPv4 and IPv6 nf_nat_redirect code in one module This resolves linking problems with CONFIG_IPV6=n: net/built-in.o: In function `redirect_tg6': xt_REDIRECT.c:(.text+0x6d021): undefined reference to `nf_nat_redirect_ipv6' Reported-by: Andreas Ruprecht Reported-by: Or Gerlitz Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/Kconfig | 8 +--- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/nf_nat_redirect_ipv6.c | 75 ------------------------------- net/ipv6/netfilter/nft_redir_ipv6.c | 2 +- 4 files changed, 2 insertions(+), 84 deletions(-) delete mode 100644 net/ipv6/netfilter/nf_nat_redirect_ipv6.c (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 0dbe5c7..a069822 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -82,12 +82,6 @@ config NF_NAT_MASQUERADE_IPV6 This is the kernel functionality to provide NAT in the masquerade flavour (automatic source address selection) for IPv6. -config NF_NAT_REDIRECT_IPV6 - tristate "IPv6 redirect support" - help - This is the kernel functionality to provide NAT in the redirect - flavour (redirect packet to local machine) for IPv6. - config NFT_MASQ_IPV6 tristate "IPv6 masquerade support for nf_tables" depends on NF_TABLES_IPV6 @@ -101,7 +95,7 @@ config NFT_REDIR_IPV6 tristate "IPv6 redirect support for nf_tables" depends on NF_TABLES_IPV6 depends on NFT_REDIR - select NF_NAT_REDIRECT_IPV6 + select NF_NAT_REDIRECT help This is the expression that provides IPv4 redirect support for nf_tables. diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index d2ac9f5..c36e0a5 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o -obj-$(CONFIG_NF_NAT_REDIRECT_IPV6) += nf_nat_redirect_ipv6.o # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/nf_nat_redirect_ipv6.c b/net/ipv6/netfilter/nf_nat_redirect_ipv6.c deleted file mode 100644 index ea1308a..0000000 --- a/net/ipv6/netfilter/nf_nat_redirect_ipv6.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * Copyright (c) 2011 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; - -unsigned int -nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, - unsigned int hooknum) -{ - struct nf_nat_range newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (hooknum == NF_INET_LOCAL_OUT) { - newdst = loopback_addr; - } else { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - bool addr = false; - - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - if (idev != NULL) { - list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; - addr = true; - break; - } - } - rcu_read_unlock(); - - if (!addr) - return NF_DROP; - } - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); -} -EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index 83420ee..2433a6b 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include static void nft_redir_ipv6_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], -- cgit v1.1 From 4338c5725920be301a02cad7907e98a076bf24b3 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Thu, 27 Nov 2014 22:22:19 -0800 Subject: netfilter: nf_log_ipv6: correct typo in module description It incorrectly identifies itself as "IPv4" packet logging. Signed-off-by: Steven Noonan Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_log_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 7fc34d1..ddf07e6 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -422,6 +422,6 @@ module_init(nf_log_ipv6_init); module_exit(nf_log_ipv6_exit); MODULE_AUTHOR("Netfilter Core Team "); -MODULE_DESCRIPTION("Netfilter IPv4 packet logging"); +MODULE_DESCRIPTION("Netfilter IPv6 packet logging"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NF_LOGGER(AF_INET6, 0); -- cgit v1.1 From 829ae9d611651467fe6cd7be834bd33ca6b28dfe Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 30 Nov 2014 22:22:34 -0500 Subject: net-timestamp: allow reading recv cmsg on errqueue with origin tstamp Allow reading of timestamps and cmsg at the same time on all relevant socket families. One use is to correlate timestamps with egress device, by asking for cmsg IP_PKTINFO. on AF_INET sockets, call the relevant function (ip_cmsg_recv). To avoid changing legacy expectations, only do so if the caller sets a new timestamping flag SOF_TIMESTAMPING_OPT_CMSG. on AF_INET6 sockets, IPV6_PKTINFO and all other recv cmsg are already returned for all origins. only change is to set ifindex, which is not initialized for all error origins. In both cases, only generate the pktinfo message if an ifindex is known. This is not the case for ACK timestamps. The difference between the protocol families is probably a historical accident as a result of the different conditions for generating cmsg in the relevant ip(v6)_recv_error function: ipv4: if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { ipv6: if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { At one time, this was the same test bar for the ICMP/ICMP6 distinction. This is no longer true. Signed-off-by: Willem de Bruijn ---- Changes v1 -> v2 large rewrite - integrate with existing pktinfo cmsg generation code - on ipv4: only send with new flag, to maintain legacy behavior - on ipv6: send at most a single pktinfo cmsg - on ipv6: initialize fields if not yet initialized The recv cmsg interfaces are also relevant to the discussion of whether looping packet headers is problematic. For v6, cmsgs that identify many headers are already returned. This patch expands that to v4. If it sounds reasonable, I will follow with patches 1. request timestamps without payload with SOF_TIMESTAMPING_OPT_TSONLY (http://patchwork.ozlabs.org/patch/366967/) 2. sysctl to conditionally drop all timestamps that have payload or cmsg from users without CAP_NET_RAW. Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc11396..2464a00 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +{ + int ifindex = skb->dev ? skb->dev->ifindex : -1; + + if (skb->protocol == htons(ETH_P_IPV6)) + IP6CB(skb)->iif = ifindex; + else + PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; +} + /* * Handle MSG_ERRQUEUE */ @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; - if (np->rxopt.all) + if (np->rxopt.all) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && + serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) + ip6_datagram_prepare_pktinfo_errqueue(skb); ip6_datagram_recv_common_ctl(sk, msg, skb); + } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, &src_info.ipi6_addr); } - put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + + if (src_info.ipi6_ifindex >= 0) + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, + sizeof(src_info), &src_info); } } -- cgit v1.1 From 60c04aecd8a72a84869308bdf2289a7aabb9a88c Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 1 Dec 2014 20:29:06 -0800 Subject: udp: Neaten and reduce size of compute_score functions The compute_score functions are a bit difficult to read. Neaten them a bit to reduce object sizes and make them a bit more intelligible. Return early to avoid indentation and avoid unnecessary initializations. (allyesconfig, but w/ -O2 and no profiling) $ size net/ipv[46]/udp.o.* text data bss dec hex filename 28680 1184 25 29889 74c1 net/ipv4/udp.o.new 28756 1184 25 29965 750d net/ipv4/udp.o.old 17600 1010 2 18612 48b4 net/ipv6/udp.o.new 17632 1010 2 18644 48d4 net/ipv6/udp.o.old Signed-off-by: Joe Perches Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 113 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 63 insertions(+), 50 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7cfb5d7..7f964322 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,72 +148,85 @@ static inline int compute_score(struct sock *sk, struct net *net, const struct in6_addr *daddr, __be16 dport, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score++; } + + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + score++; + } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } #define SCORE2_MAX (1 + 1 + 1) static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned short hnum, - int dif) + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score++; } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } - /* called with read_rcu_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, -- cgit v1.1 From 86fe8f892013228eef324c42e77c6b54b0bbc6dd Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Wed, 3 Dec 2014 10:29:40 +0800 Subject: ipv6: remove useless spin_lock/spin_unlock xchg is atomic, so there is no necessary to use spin_lock/spin_unlock to protect it. At last, remove the redundant opt = xchg(&inet6_sk(sk)->opt, opt); statement. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index e1a9583..66980d8 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -110,12 +110,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } - opt = xchg(&inet6_sk(sk)->opt, opt); - } else { - spin_lock(&sk->sk_dst_lock); - opt = xchg(&inet6_sk(sk)->opt, opt); - spin_unlock(&sk->sk_dst_lock); } + opt = xchg(&inet6_sk(sk)->opt, opt); sk_dst_reset(sk); return opt; -- cgit v1.1 From 19e3c66b52caf20a9a1119dc847b6abae4c03f4f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 12:10:46 -0500 Subject: ipv6 equivalent of "ipv4: Avoid reading user iov twice after raw_probe_proto_opt" Signed-off-by: Al Viro --- net/ipv6/raw.c | 112 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8baa53e..942f67b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -672,65 +672,62 @@ error: return err; } -static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg) +struct raw6_frag_vec { + struct msghdr *msg; + int hlen; + char c[4]; +}; + +static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6) { - struct iovec *iov; - u8 __user *type = NULL; - u8 __user *code = NULL; - u8 len = 0; - int probed = 0; - int i; - - if (!msg->msg_iov) - return 0; + int err = 0; + switch (fl6->flowi6_proto) { + case IPPROTO_ICMPV6: + rfv->hlen = 2; + err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); + if (!err) { + fl6->fl6_icmp_type = rfv->c[0]; + fl6->fl6_icmp_code = rfv->c[1]; + } + break; + case IPPROTO_MH: + rfv->hlen = 4; + err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); + if (!err) + fl6->fl6_mh_type = rfv->c[2]; + } + return err; +} - for (i = 0; i < msg->msg_iovlen; i++) { - iov = &msg->msg_iov[i]; - if (!iov) - continue; +static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, + struct sk_buff *skb) +{ + struct raw6_frag_vec *rfv = from; - switch (fl6->flowi6_proto) { - case IPPROTO_ICMPV6: - /* check if one-byte field is readable or not. */ - if (iov->iov_base && iov->iov_len < 1) - break; - - if (!type) { - type = iov->iov_base; - /* check if code field is readable or not. */ - if (iov->iov_len > 1) - code = type + 1; - } else if (!code) - code = iov->iov_base; - - if (type && code) { - if (get_user(fl6->fl6_icmp_type, type) || - get_user(fl6->fl6_icmp_code, code)) - return -EFAULT; - probed = 1; - } - break; - case IPPROTO_MH: - if (iov->iov_base && iov->iov_len < 1) - break; - /* check if type field is readable or not. */ - if (iov->iov_len > 2 - len) { - u8 __user *p = iov->iov_base; - if (get_user(fl6->fl6_mh_type, &p[2 - len])) - return -EFAULT; - probed = 1; - } else - len += iov->iov_len; + if (offset < rfv->hlen) { + int copy = min(rfv->hlen - offset, len); - break; - default: - probed = 1; - break; - } - if (probed) - break; + if (skb->ip_summed == CHECKSUM_PARTIAL) + memcpy(to, rfv->c + offset, copy); + else + skb->csum = csum_block_add( + skb->csum, + csum_partial_copy_nocheck(rfv->c + offset, + to, copy, 0), + odd); + + odd = 0; + offset += copy; + to += copy; + len -= copy; + + if (!len) + return 0; } - return 0; + + offset -= rfv->hlen; + + return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb); } static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, @@ -745,6 +742,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; + struct raw6_frag_vec rfv; struct flowi6 fl6; int addr_len = msg->msg_namelen; int hlimit = -1; @@ -848,7 +846,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; - err = rawv6_probe_proto_opt(&fl6, msg); + rfv.msg = msg; + rfv.hlen = 0; + err = rawv6_probe_proto_opt(&rfv, &fl6); if (err) goto out; @@ -889,7 +889,7 @@ back_from_confirm: err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, + err = ip6_append_data(sk, raw6_getfrag, &rfv, len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, msg->msg_flags, dontfrag); -- cgit v1.1 From f69e6d131f5dac8278ac79a902cc448364880d8b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 13:23:40 -0500 Subject: ip_generic_getfrag, udplite_getfrag: switch to passing msghdr Signed-off-by: Al Viro --- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 942f67b..11a9283 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -727,7 +727,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, offset -= rfv->hlen; - return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb); + return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7f964322..189dc4a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1312,7 +1312,7 @@ do_append_data: dontfrag = np->dontfrag; up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, + err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl6, (struct rt6_info *)dst, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); -- cgit v1.1 From c0371da6047abd261bc483c744dbc7d81a116172 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 24 Nov 2014 10:42:55 -0500 Subject: put iov_iter into msghdr Note that the code _using_ ->msg_iter at that point will be very unhappy with anything other than unshifted iovec-backed iov_iter. We still need to convert users to proper primitives. Signed-off-by: Al Viro --- net/ipv6/ping.c | 3 ++- net/ipv6/raw.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5b7a1ed..2d31483 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -163,7 +163,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; - pfh.iov = msg->msg_iov; + /* XXX: stripping const */ + pfh.iov = (struct iovec *)msg->msg_iter.iov; pfh.wcheck = 0; pfh.family = AF_INET6; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 11a9283..ee25631 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -886,7 +886,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, back_from_confirm: if (inet->hdrincl) - err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags); + /* XXX: stripping const */ + err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags); else { lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, -- cgit v1.1 From 0f85feae6b710ced3abad5b2b47d31dfcb956b62 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Dec 2014 09:56:08 -0800 Subject: tcp: fix more NULL deref after prequeue changes When I cooked commit c3658e8d0f1 ("tcp: fix possible NULL dereference in tcp_vX_send_reset()") I missed other spots we could deref a NULL skb_dst(skb) Again, if a socket is provided, we do not need skb_dst() to get a pointer to network namespace : sock_net(sk) is good enough. Reported-by: Dann Frazier Bisected-by: Dann Frazier Tested-by: Dann Frazier Signed-off-by: Eric Dumazet Fixes: ca777eff51f7 ("tcp: remove dst refcount false sharing for prequeue mode") Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index dc495ae..c277951 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -787,16 +787,16 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .queue_hash_add = inet6_csk_reqsk_queue_hash_add, }; -static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, int rst, u8 tclass, - u32 label) +static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, + u32 ack, u32 win, u32 tsval, u32 tsecr, + int oif, struct tcp_md5sig_key *key, int rst, + u8 tclass, u32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; struct sk_buff *buff; struct flowi6 fl6; - struct net *net = dev_net(skb_dst(skb)->dev); + struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); struct dst_entry *dst; @@ -946,7 +946,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) (th->doff << 2); oif = sk ? sk->sk_bound_dev_if : 0; - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -957,13 +957,13 @@ release_sk1: #endif } -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 tsval, u32 tsecr, int oif, +static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq, + u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, u32 label) { - tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, - label); + tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, + tclass, label); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -971,7 +971,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -986,10 +986,10 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, - tcp_rsk(req)->rcv_nxt, - req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_rsk(req)->rcv_nxt, req->rcv_wnd, + tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); } -- cgit v1.1 From f95b414edb18de59940dcebbefb49cf25c6d505c Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 11 Dec 2014 11:22:04 +0800 Subject: net: introduce helper macro for_each_cmsghdr Introduce helper macro for_each_cmsghdr as a wrapper of the enumerating cmsghdr from msghdr, just cleanup. Signed-off-by: Gu Zheng Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2464a00..100c589 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -657,7 +657,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, int len; int err = 0; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { int addr_type; if (!CMSG_OK(msg, cmsg)) { -- cgit v1.1