diff options
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/Kconfig | 11 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 223 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 7 | ||||
-rw-r--r-- | net/ipv6/datagram.c | 8 | ||||
-rw-r--r-- | net/ipv6/exthdrs.c | 2 | ||||
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 9 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 2 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 2 | ||||
-rw-r--r-- | net/ipv6/raw.c | 3 | ||||
-rw-r--r-- | net/ipv6/route.c | 34 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 8 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 3 |
12 files changed, 192 insertions, 120 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 438a73a..643f613 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -5,16 +5,15 @@ # IPv6 as module will cause a CRASH if you try to unload it menuconfig IPV6 tristate "The IPv6 protocol" - default m + default y ---help--- - This is complemental support for the IP version 6. - You will still be able to do traditional IPv4 networking as well. + Support for IP version 6 (IPv6). For general information about IPv6, see <https://en.wikipedia.org/wiki/IPv6>. - For Linux IPv6 development information, see <http://www.linux-ipv6.org>. - For specific information about IPv6 under Linux, read the HOWTO at - <http://www.bieringer.de/linux/IPv6/>. + For specific information about IPv6 under Linux, see + Documentation/networking/ipv6.txt and read the HOWTO at + <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/> To compile this protocol support as a module, choose M here: the module will be called ipv6. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 21c2c81..eb0c6a3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -211,7 +211,8 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_mtu = 1, .stable_secret = { .initialized = false, - } + }, + .use_oif_addrs_only = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -253,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .stable_secret = { .initialized = false, }, + .use_oif_addrs_only = 0, }; /* Check if a valid qdisc is available */ @@ -1358,15 +1360,96 @@ out: return ret; } +static int __ipv6_dev_get_saddr(struct net *net, + struct ipv6_saddr_dst *dst, + struct inet6_dev *idev, + struct ipv6_saddr_score *scores, + int hiscore_idx) +{ + struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx]; + + read_lock_bh(&idev->lock); + list_for_each_entry(score->ifa, &idev->addr_list, if_list) { + int i; + + /* + * - Tentative Address (RFC2462 section 5.4) + * - A tentative address is not considered + * "assigned to an interface" in the traditional + * sense, unless it is also flagged as optimistic. + * - Candidate Source Address (section 4) + * - In any case, anycast addresses, multicast + * addresses, and the unspecified address MUST + * NOT be included in a candidate set. + */ + if ((score->ifa->flags & IFA_F_TENTATIVE) && + (!(score->ifa->flags & IFA_F_OPTIMISTIC))) + continue; + + score->addr_type = __ipv6_addr_type(&score->ifa->addr); + + if (unlikely(score->addr_type == IPV6_ADDR_ANY || + score->addr_type & IPV6_ADDR_MULTICAST)) { + net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s", + idev->dev->name); + continue; + } + + score->rule = -1; + bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX); + + for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { + int minihiscore, miniscore; + + minihiscore = ipv6_get_saddr_eval(net, hiscore, dst, i); + miniscore = ipv6_get_saddr_eval(net, score, dst, i); + + if (minihiscore > miniscore) { + if (i == IPV6_SADDR_RULE_SCOPE && + score->scopedist > 0) { + /* + * special case: + * each remaining entry + * has too small (not enough) + * scope, because ifa entries + * are sorted by their scope + * values. + */ + goto out; + } + break; + } else if (minihiscore < miniscore) { + if (hiscore->ifa) + in6_ifa_put(hiscore->ifa); + + in6_ifa_hold(score->ifa); + + swap(hiscore, score); + hiscore_idx = 1 - hiscore_idx; + + /* restore our iterator */ + score->ifa = hiscore->ifa; + + break; + } + } + } +out: + read_unlock_bh(&idev->lock); + return hiscore_idx; +} + int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { - struct ipv6_saddr_score scores[2], - *score = &scores[0], *hiscore = &scores[1]; + struct ipv6_saddr_score scores[2], *hiscore; struct ipv6_saddr_dst dst; + struct inet6_dev *idev; struct net_device *dev; int dst_type; + bool use_oif_addr = false; + int hiscore_idx = 0; dst_type = __ipv6_addr_type(daddr); dst.addr = daddr; @@ -1375,105 +1458,50 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex); dst.prefs = prefs; - hiscore->rule = -1; - hiscore->ifa = NULL; + scores[hiscore_idx].rule = -1; + scores[hiscore_idx].ifa = NULL; rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - struct inet6_dev *idev; - - /* Candidate Source Address (section 4) - * - multicast and link-local destination address, - * the set of candidate source address MUST only - * include addresses assigned to interfaces - * belonging to the same link as the outgoing - * interface. - * (- For site-local destination addresses, the - * set of candidate source addresses MUST only - * include addresses assigned to interfaces - * belonging to the same site as the outgoing - * interface.) - */ - if (((dst_type & IPV6_ADDR_MULTICAST) || - dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && - dst.ifindex && dev->ifindex != dst.ifindex) - continue; - - idev = __in6_dev_get(dev); - if (!idev) - continue; - - read_lock_bh(&idev->lock); - list_for_each_entry(score->ifa, &idev->addr_list, if_list) { - int i; - - /* - * - Tentative Address (RFC2462 section 5.4) - * - A tentative address is not considered - * "assigned to an interface" in the traditional - * sense, unless it is also flagged as optimistic. - * - Candidate Source Address (section 4) - * - In any case, anycast addresses, multicast - * addresses, and the unspecified address MUST - * NOT be included in a candidate set. - */ - if ((score->ifa->flags & IFA_F_TENTATIVE) && - (!(score->ifa->flags & IFA_F_OPTIMISTIC))) - continue; - - score->addr_type = __ipv6_addr_type(&score->ifa->addr); + /* Candidate Source Address (section 4) + * - multicast and link-local destination address, + * the set of candidate source address MUST only + * include addresses assigned to interfaces + * belonging to the same link as the outgoing + * interface. + * (- For site-local destination addresses, the + * set of candidate source addresses MUST only + * include addresses assigned to interfaces + * belonging to the same site as the outgoing + * interface.) + * - "It is RECOMMENDED that the candidate source addresses + * be the set of unicast addresses assigned to the + * interface that will be used to send to the destination + * (the 'outgoing' interface)." (RFC 6724) + */ + if (dst_dev) { + idev = __in6_dev_get(dst_dev); + if ((dst_type & IPV6_ADDR_MULTICAST) || + dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL || + (idev && idev->cnf.use_oif_addrs_only)) { + use_oif_addr = true; + } + } - if (unlikely(score->addr_type == IPV6_ADDR_ANY || - score->addr_type & IPV6_ADDR_MULTICAST)) { - net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s", - dev->name); + if (use_oif_addr) { + if (idev) + hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx); + } else { + for_each_netdev_rcu(net, dev) { + idev = __in6_dev_get(dev); + if (!idev) continue; - } - - score->rule = -1; - bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX); - - for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) { - int minihiscore, miniscore; - - minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i); - miniscore = ipv6_get_saddr_eval(net, score, &dst, i); - - if (minihiscore > miniscore) { - if (i == IPV6_SADDR_RULE_SCOPE && - score->scopedist > 0) { - /* - * special case: - * each remaining entry - * has too small (not enough) - * scope, because ifa entries - * are sorted by their scope - * values. - */ - goto try_nextdev; - } - break; - } else if (minihiscore < miniscore) { - if (hiscore->ifa) - in6_ifa_put(hiscore->ifa); - - in6_ifa_hold(score->ifa); - - swap(hiscore, score); - - /* restore our iterator */ - score->ifa = hiscore->ifa; - - break; - } - } + hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx); } -try_nextdev: - read_unlock_bh(&idev->lock); } rcu_read_unlock(); + hiscore = &scores[hiscore_idx]; if (!hiscore->ifa) return -EADDRNOTAVAIL; @@ -4586,6 +4614,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; /* we omit DEVCONF_STABLE_SECRET for now */ + array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; } static inline size_t inet6_ifla6_size(void) @@ -5585,6 +5614,14 @@ static struct addrconf_sysctl_table .proc_handler = addrconf_sysctl_stable_secret, }, { + .procname = "use_oif_addrs_only", + .data = &ipv6_devconf.use_oif_addrs_only, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, + { /* sentinel */ } }, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7de52b6..7bc92ea 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -342,7 +342,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { - if (!(inet->freebind || inet->transparent) && + if (!net->ipv6.sysctl.ip_nonlocal_bind && + !(inet->freebind || inet->transparent) && !ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { err = -EADDRNOTAVAIL; @@ -679,8 +680,8 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct ipv6_pinfo *np = inet6_sk(sk); if (np->rxopt.all) { - if ((opt->hop && (np->rxopt.bits.hopopts || - np->rxopt.bits.ohopopts)) || + if (((opt->flags & IP6SKB_HOPBYHOP) && + (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b10a889..2572a32 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -568,8 +568,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } /* HbH is allowed only once */ - if (np->rxopt.bits.hopopts && opt->hop) { - u8 *ptr = nh + opt->hop; + if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) { + u8 *ptr = nh + sizeof(struct ipv6hdr); put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); } @@ -630,8 +630,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, int hlim = ipv6_hdr(skb)->hop_limit; put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } - if (np->rxopt.bits.ohopopts && opt->hop) { - u8 *ptr = nh + opt->hop; + if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) { + u8 *ptr = nh + sizeof(struct ipv6hdr); put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); } if (np->rxopt.bits.odstopts && opt->dst0) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a7bbbe4..ce203b0 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -632,7 +632,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb) return -1; } - opt->hop = sizeof(struct ipv6hdr); + opt->flags |= IP6SKB_HOPBYHOP; if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b4fd96d..6ac8dad 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -207,7 +207,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; - int twrefcnt = 0; spin_lock(lock); @@ -234,21 +233,17 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { - twrefcnt = inet_twsk_unhash(tw); + sk_nulls_del_node_init_rcu((struct sock *)tw); NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); - if (twrefcnt) - inet_twsk_put(tw); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw); - - inet_twsk_put(tw); + inet_twsk_deschedule_put(tw); } return 0; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 55d1986..d715f2e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -32,6 +32,7 @@ #include <net/ipv6.h> #include <net/ndisc.h> #include <net/addrconf.h> +#include <net/lwtunnel.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -177,6 +178,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) static void rt6_release(struct rt6_info *rt) { if (atomic_dec_and_test(&rt->rt6i_ref)) { + lwtunnel_state_put(rt->rt6i_lwtstate); rt6_free_pcpu(rt); dst_free(&rt->dst); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d5f7716..c5fc852 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1023,6 +1023,8 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; + if (!fl6->flowi6_oif) + fl6->flowi6_oif = dst->dev->ifindex; return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ca4700c..fdbada156 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -295,7 +295,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) * unspecified and mapped address have a v4 equivalent. */ v4addr = LOOPBACK4_IPV6; - if (!(addr_type & IPV6_ADDR_MULTICAST)) { + if (!(addr_type & IPV6_ADDR_MULTICAST) && + !sock_net(sk)->ipv6.sysctl.ip_nonlocal_bind) { err = -EADDRNOTAVAIL; if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6090969..7f2214f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -58,6 +58,7 @@ #include <net/netevent.h> #include <net/netlink.h> #include <net/nexthop.h> +#include <net/lwtunnel.h> #include <asm/uaccess.h> @@ -1770,6 +1771,18 @@ int ip6_route_add(struct fib6_config *cfg) rt->dst.output = ip6_output; + if (cfg->fc_encap) { + struct lwtunnel_state *lwtstate; + + err = lwtunnel_build_state(dev, cfg->fc_encap_type, + cfg->fc_encap, &lwtstate); + if (err) + goto out; + lwtunnel_state_get(lwtstate); + rt->rt6i_lwtstate = lwtstate; + rt->dst.output = lwtunnel_output6; + } + ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) @@ -2595,6 +2608,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_PREF] = { .type = NLA_U8 }, + [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, + [RTA_ENCAP] = { .type = NLA_NESTED }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2689,6 +2704,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_flags |= RTF_PREF(pref); } + if (tb[RTA_ENCAP]) + cfg->fc_encap = tb[RTA_ENCAP]; + + if (tb[RTA_ENCAP_TYPE]) + cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); + err = 0; errout: return err; @@ -2721,6 +2742,10 @@ beginning: r_cfg.fc_gateway = nla_get_in6_addr(nla); r_cfg.fc_flags |= RTF_GATEWAY; } + r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); + if (nla) + r_cfg.fc_encap_type = nla_get_u16(nla); } err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); if (err) { @@ -2783,7 +2808,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) return ip6_route_add(&cfg); } -static inline size_t rt6_nlmsg_size(void) +static inline size_t rt6_nlmsg_size(struct rt6_info *rt) { return NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(16) /* RTA_SRC */ @@ -2797,7 +2822,8 @@ static inline size_t rt6_nlmsg_size(void) + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ - + nla_total_size(1); /* RTA_PREF */ + + nla_total_size(1) /* RTA_PREF */ + + lwtunnel_get_encap_size(rt->rt6i_lwtstate); } static int rt6_fill_node(struct net *net, @@ -2945,6 +2971,8 @@ static int rt6_fill_node(struct net *net, if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) goto nla_put_failure; + lwtunnel_fill_encap(skb, rt->rt6i_lwtstate); + nlmsg_end(skb, nlh); return 0; @@ -3071,7 +3099,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) err = -ENOBUFS; seq = info->nlh ? info->nlh->nlmsg_seq : 0; - skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); + skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (!skb) goto errout; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 4e705ad..db48aeb 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -75,6 +75,13 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ip_nonlocal_bind", + .data = &init_net.ipv6.sysctl.ip_nonlocal_bind, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -117,6 +124,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; + ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6748c42..d540846 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1481,8 +1481,7 @@ do_time_wait: ntohs(th->dest), tcp_v6_iif(skb)); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); - inet_twsk_deschedule(tw); - inet_twsk_put(tw); + inet_twsk_deschedule_put(tw); sk = sk2; tcp_v6_restore_cb(skb); goto process; |